legion-data 1.6.18 → 1.6.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/Gemfile +1 -0
- data/legion-data.gemspec +2 -2
- data/lib/legion/data/archival/policy.rb +7 -1
- data/lib/legion/data/archival.rb +27 -4
- data/lib/legion/data/archiver.rb +103 -51
- data/lib/legion/data/audit_record.rb +8 -5
- data/lib/legion/data/connection.rb +88 -17
- data/lib/legion/data/encryption/key_provider.rb +9 -2
- data/lib/legion/data/encryption/sequel_plugin.rb +126 -12
- data/lib/legion/data/event_store.rb +29 -10
- data/lib/legion/data/extract/handlers/base.rb +7 -1
- data/lib/legion/data/extract/handlers/csv.rb +1 -0
- data/lib/legion/data/extract/handlers/docx.rb +3 -1
- data/lib/legion/data/extract/handlers/html.rb +3 -1
- data/lib/legion/data/extract/handlers/json.rb +1 -0
- data/lib/legion/data/extract/handlers/jsonl.rb +1 -0
- data/lib/legion/data/extract/handlers/markdown.rb +1 -0
- data/lib/legion/data/extract/handlers/pdf.rb +3 -1
- data/lib/legion/data/extract/handlers/pptx.rb +3 -1
- data/lib/legion/data/extract/handlers/text.rb +1 -0
- data/lib/legion/data/extract/handlers/vtt.rb +1 -0
- data/lib/legion/data/extract/handlers/xlsx.rb +3 -1
- data/lib/legion/data/extract.rb +7 -0
- data/lib/legion/data/helper.rb +16 -6
- data/lib/legion/data/local.rb +62 -5
- data/lib/legion/data/migration.rb +6 -1
- data/lib/legion/data/migrations/044_expand_memory_traces.rb +4 -1
- data/lib/legion/data/model.rb +8 -4
- data/lib/legion/data/models/audit_log.rb +5 -1
- data/lib/legion/data/models/audit_record.rb +5 -1
- data/lib/legion/data/models/function.rb +5 -1
- data/lib/legion/data/models/node.rb +6 -2
- data/lib/legion/data/partition_manager.rb +15 -19
- data/lib/legion/data/retention.rb +31 -2
- data/lib/legion/data/rls.rb +8 -2
- data/lib/legion/data/settings.rb +5 -1
- data/lib/legion/data/spool.rb +69 -6
- data/lib/legion/data/storage_tiers.rb +16 -3
- data/lib/legion/data/vector.rb +9 -5
- data/lib/legion/data/version.rb +1 -1
- data/lib/legion/data.rb +39 -12
- metadata +5 -5
data/lib/legion/data/spool.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
require 'json'
|
|
4
5
|
require 'fileutils'
|
|
5
6
|
require 'securerandom'
|
|
@@ -31,6 +32,8 @@ module Legion
|
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
class ScopedSpool
|
|
35
|
+
include Legion::Logging::Helper
|
|
36
|
+
|
|
34
37
|
def initialize(extension_module, spool_root)
|
|
35
38
|
@extension_dir = File.join(spool_root, Spool.send(:extension_path, extension_module))
|
|
36
39
|
end
|
|
@@ -40,25 +43,43 @@ module Legion
|
|
|
40
43
|
FileUtils.mkdir_p(dir)
|
|
41
44
|
filename = "#{Time.now.strftime('%s%9N')}-#{SecureRandom.uuid}.json"
|
|
42
45
|
path = File.join(dir, filename)
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
temp_path = temp_path_for(dir, filename)
|
|
47
|
+
File.binwrite(temp_path, ::JSON.generate(payload))
|
|
48
|
+
File.rename(temp_path, path)
|
|
49
|
+
log.info "Spool write: #{sub_namespace} -> #{filename}"
|
|
45
50
|
path
|
|
51
|
+
rescue StandardError => e
|
|
52
|
+
File.delete(temp_path) if defined?(temp_path) && temp_path && File.exist?(temp_path)
|
|
53
|
+
handle_exception(e, level: :error, handled: false, operation: :spool_write, sub_namespace: sub_namespace)
|
|
54
|
+
raise
|
|
46
55
|
end
|
|
47
56
|
|
|
48
57
|
def read(sub_namespace)
|
|
49
|
-
sorted_files(sub_namespace).
|
|
58
|
+
sorted_files(sub_namespace).each_with_object([]) do |path, events|
|
|
59
|
+
event = load_event_file(path, sub_namespace)
|
|
60
|
+
events << event if event
|
|
61
|
+
end
|
|
62
|
+
rescue StandardError => e
|
|
63
|
+
handle_exception(e, level: :error, handled: false, operation: :spool_read, sub_namespace: sub_namespace)
|
|
64
|
+
raise
|
|
50
65
|
end
|
|
51
66
|
|
|
52
67
|
def flush(sub_namespace)
|
|
53
68
|
count = 0
|
|
69
|
+
path = nil
|
|
54
70
|
sorted_files(sub_namespace).each do |path|
|
|
55
|
-
event =
|
|
71
|
+
event = load_event_file(path, sub_namespace)
|
|
72
|
+
next unless event
|
|
73
|
+
|
|
56
74
|
yield event
|
|
57
75
|
File.delete(path)
|
|
58
76
|
count += 1
|
|
59
77
|
end
|
|
60
|
-
|
|
78
|
+
log.info "Spool drained #{count} item(s) from #{sub_namespace}" if count.positive?
|
|
61
79
|
count
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
handle_exception(e, level: :error, handled: false, operation: :spool_flush, sub_namespace: sub_namespace, path: path)
|
|
82
|
+
raise
|
|
62
83
|
end
|
|
63
84
|
|
|
64
85
|
def count(sub_namespace)
|
|
@@ -70,6 +91,10 @@ module Legion
|
|
|
70
91
|
return unless Dir.exist?(dir)
|
|
71
92
|
|
|
72
93
|
Dir[File.join(dir, '*.json')].each { |f| File.delete(f) }
|
|
94
|
+
log.info "Spool cleared #{sub_namespace}"
|
|
95
|
+
rescue StandardError => e
|
|
96
|
+
handle_exception(e, level: :error, handled: false, operation: :spool_clear, sub_namespace: sub_namespace)
|
|
97
|
+
raise
|
|
73
98
|
end
|
|
74
99
|
|
|
75
100
|
private
|
|
@@ -82,7 +107,45 @@ module Legion
|
|
|
82
107
|
dir = sub_dir(sub_namespace)
|
|
83
108
|
return [] unless Dir.exist?(dir)
|
|
84
109
|
|
|
85
|
-
Dir
|
|
110
|
+
Dir.glob(File.join(dir, '*.json'), sort: true)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def load_event_file(path, sub_namespace)
|
|
114
|
+
::JSON.parse(File.binread(path), symbolize_names: true)
|
|
115
|
+
rescue Errno::ENOENT
|
|
116
|
+
nil
|
|
117
|
+
rescue ::JSON::ParserError, EOFError, ArgumentError => e
|
|
118
|
+
quarantine_corrupt_file(path, sub_namespace, e)
|
|
119
|
+
nil
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def quarantine_corrupt_file(path, sub_namespace, error)
|
|
123
|
+
return unless File.exist?(path)
|
|
124
|
+
|
|
125
|
+
quarantine_dir = File.join(sub_dir(sub_namespace), 'quarantine')
|
|
126
|
+
FileUtils.mkdir_p(quarantine_dir)
|
|
127
|
+
quarantine_path = unique_quarantine_path(quarantine_dir, File.basename(path))
|
|
128
|
+
File.rename(path, quarantine_path)
|
|
129
|
+
handle_exception(
|
|
130
|
+
error,
|
|
131
|
+
level: :warn,
|
|
132
|
+
handled: true,
|
|
133
|
+
operation: :spool_quarantine,
|
|
134
|
+
sub_namespace: sub_namespace,
|
|
135
|
+
path: path,
|
|
136
|
+
quarantine_path: quarantine_path
|
|
137
|
+
)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def unique_quarantine_path(quarantine_dir, basename)
|
|
141
|
+
path = File.join(quarantine_dir, "#{basename}.corrupt")
|
|
142
|
+
return path unless File.exist?(path)
|
|
143
|
+
|
|
144
|
+
File.join(quarantine_dir, "#{basename}.#{SecureRandom.uuid}.corrupt")
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def temp_path_for(dir, filename)
|
|
148
|
+
File.join(dir, ".#{filename}.tmp-#{SecureRandom.uuid}")
|
|
86
149
|
end
|
|
87
150
|
end
|
|
88
151
|
end
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module Data
|
|
5
7
|
module StorageTiers
|
|
6
8
|
TIERS = { hot: 0, warm: 1, cold: 2 }.freeze
|
|
7
9
|
|
|
8
10
|
class << self
|
|
11
|
+
include Legion::Logging::Helper
|
|
12
|
+
|
|
9
13
|
def archive_to_warm(table:, age_days: 90, batch_size: 1000)
|
|
10
14
|
return { archived: 0, reason: 'no_connection' } unless Legion::Data.connection
|
|
11
15
|
return { archived: 0, reason: 'no_archive_table' } unless Legion::Data.connection.table_exists?(:data_archive)
|
|
@@ -28,8 +32,11 @@ module Legion
|
|
|
28
32
|
Legion::Data.connection[table].where(id: ids).delete
|
|
29
33
|
end
|
|
30
34
|
|
|
31
|
-
|
|
35
|
+
log.info "Archived #{records.size} row(s) from #{table} to warm tier"
|
|
32
36
|
{ archived: records.size, table: table.to_s }
|
|
37
|
+
rescue StandardError => e
|
|
38
|
+
handle_exception(e, level: :error, handled: false, operation: :archive_to_warm, table: table, age_days: age_days, batch_size: batch_size)
|
|
39
|
+
raise
|
|
33
40
|
end
|
|
34
41
|
|
|
35
42
|
def export_to_cold(age_days: 365, batch_size: 5000)
|
|
@@ -44,14 +51,20 @@ module Legion
|
|
|
44
51
|
|
|
45
52
|
ids = records.map { |r| r[:id] }
|
|
46
53
|
Legion::Data.connection[:data_archive].where(id: ids).update(tier: TIERS[:cold])
|
|
47
|
-
|
|
54
|
+
log.info "Exported #{records.size} row(s) to cold tier"
|
|
48
55
|
{ exported: records.size, data: records }
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
handle_exception(e, level: :error, handled: false, operation: :export_to_cold, age_days: age_days, batch_size: batch_size)
|
|
58
|
+
raise
|
|
49
59
|
end
|
|
50
60
|
|
|
51
61
|
def stats
|
|
52
62
|
return {} unless Legion::Data.connection&.table_exists?(:data_archive)
|
|
53
63
|
|
|
54
64
|
{ warm: count_tier(:warm), cold: count_tier(:cold) }
|
|
65
|
+
rescue StandardError => e
|
|
66
|
+
handle_exception(e, level: :warn, handled: true, operation: :storage_tiers_stats)
|
|
67
|
+
{}
|
|
55
68
|
end
|
|
56
69
|
|
|
57
70
|
private
|
|
@@ -59,7 +72,7 @@ module Legion
|
|
|
59
72
|
def count_tier(tier)
|
|
60
73
|
Legion::Data.connection[:data_archive].where(tier: TIERS[tier]).count
|
|
61
74
|
rescue StandardError => e
|
|
62
|
-
|
|
75
|
+
handle_exception(e, level: :warn, handled: true, operation: :storage_tiers_count, tier: tier)
|
|
63
76
|
0
|
|
64
77
|
end
|
|
65
78
|
end
|
data/lib/legion/data/vector.rb
CHANGED
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module Data
|
|
5
7
|
module Vector
|
|
6
8
|
class << self
|
|
9
|
+
include Legion::Logging::Helper
|
|
10
|
+
|
|
7
11
|
def available?
|
|
8
12
|
return false unless Legion::Data.connection
|
|
9
13
|
return false unless Legion::Data.connection.adapter_scheme == :postgres
|
|
10
14
|
|
|
11
15
|
Legion::Data.connection.fetch("SELECT 1 FROM pg_extension WHERE extname = 'vector'").any?
|
|
12
16
|
rescue StandardError => e
|
|
13
|
-
|
|
17
|
+
handle_exception(e, level: :warn, handled: true, operation: :vector_available?)
|
|
14
18
|
false
|
|
15
19
|
end
|
|
16
20
|
|
|
@@ -18,17 +22,17 @@ module Legion
|
|
|
18
22
|
return false unless Legion::Data.connection&.adapter_scheme == :postgres
|
|
19
23
|
|
|
20
24
|
Legion::Data.connection.run('CREATE EXTENSION IF NOT EXISTS vector')
|
|
21
|
-
|
|
25
|
+
log.info 'pgvector extension enabled'
|
|
22
26
|
true
|
|
23
27
|
rescue StandardError => e
|
|
24
|
-
|
|
28
|
+
handle_exception(e, level: :warn, handled: true, operation: :ensure_vector_extension)
|
|
25
29
|
false
|
|
26
30
|
end
|
|
27
31
|
|
|
28
32
|
def cosine_search(table:, column:, query_vector:, limit: 10, min_similarity: 0.0)
|
|
29
33
|
return [] unless available?
|
|
30
34
|
|
|
31
|
-
|
|
35
|
+
log.debug "Vector cosine_search: table=#{table} column=#{column} limit=#{limit}"
|
|
32
36
|
vec_literal = vector_literal(query_vector)
|
|
33
37
|
ds = Legion::Data.connection[table]
|
|
34
38
|
.select_all
|
|
@@ -43,7 +47,7 @@ module Legion
|
|
|
43
47
|
def l2_search(table:, column:, query_vector:, limit: 10)
|
|
44
48
|
return [] unless available?
|
|
45
49
|
|
|
46
|
-
|
|
50
|
+
log.debug "Vector l2_search: table=#{table} column=#{column} limit=#{limit}"
|
|
47
51
|
vec_literal = vector_literal(query_vector)
|
|
48
52
|
Legion::Data.connection[table]
|
|
49
53
|
.select_all
|
data/lib/legion/data/version.rb
CHANGED
data/lib/legion/data.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
require 'legion/data/version'
|
|
4
5
|
require 'legion/data/settings'
|
|
5
6
|
require 'sequel'
|
|
@@ -16,16 +17,38 @@ require_relative 'data/rls'
|
|
|
16
17
|
require_relative 'data/extract'
|
|
17
18
|
require_relative 'data/audit_record'
|
|
18
19
|
|
|
20
|
+
unless Legion::Logging::Helper.method_defined?(:handle_exception)
|
|
21
|
+
module Legion
|
|
22
|
+
module Logging
|
|
23
|
+
module Helper
|
|
24
|
+
def handle_exception(exception, task_id: nil, level: :error, handled: true, **opts)
|
|
25
|
+
context = opts.map { |key, value| "#{key}=#{value.inspect}" }.join(' ')
|
|
26
|
+
message = "#{exception.class}: #{exception.message}"
|
|
27
|
+
message = "#{message} task_id=#{task_id}" if task_id
|
|
28
|
+
message = "#{message} handled=#{handled}"
|
|
29
|
+
message = "#{message} #{context}" unless context.empty?
|
|
30
|
+
warn("[#{level}] #{message}")
|
|
31
|
+
rescue StandardError => e
|
|
32
|
+
warn("handle_exception fallback failed: #{e.class}: #{e.message}")
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
19
39
|
module Legion
|
|
20
40
|
module Data
|
|
21
41
|
class << self
|
|
42
|
+
include Legion::Logging::Helper
|
|
43
|
+
|
|
22
44
|
def setup
|
|
45
|
+
log.info 'Legion::Data setup starting'
|
|
23
46
|
connection_setup
|
|
24
47
|
migrate
|
|
25
48
|
load_models
|
|
26
49
|
setup_cache
|
|
27
50
|
setup_local
|
|
28
|
-
|
|
51
|
+
log.info 'Legion::Data setup complete'
|
|
29
52
|
end
|
|
30
53
|
|
|
31
54
|
def connection_setup
|
|
@@ -59,7 +82,8 @@ module Legion
|
|
|
59
82
|
|
|
60
83
|
def connected?
|
|
61
84
|
Legion::Settings[:data][:connected] == true
|
|
62
|
-
rescue StandardError
|
|
85
|
+
rescue StandardError => e
|
|
86
|
+
handle_exception(e, level: :debug, handled: true, operation: :connected?)
|
|
63
87
|
false
|
|
64
88
|
end
|
|
65
89
|
|
|
@@ -75,7 +99,8 @@ module Legion
|
|
|
75
99
|
@write_privileges[table_name] = connection
|
|
76
100
|
.fetch("SELECT has_table_privilege(current_user, ?, 'INSERT') AS can", table_name.to_s)
|
|
77
101
|
.first[:can] == true
|
|
78
|
-
rescue StandardError
|
|
102
|
+
rescue StandardError => e
|
|
103
|
+
handle_exception(e, level: :warn, handled: true, operation: :can_write?, table: table_name)
|
|
79
104
|
@write_privileges[table_name] = false if @write_privileges
|
|
80
105
|
false
|
|
81
106
|
end
|
|
@@ -92,7 +117,8 @@ module Legion
|
|
|
92
117
|
@read_privileges[table_name] = connection
|
|
93
118
|
.fetch("SELECT has_table_privilege(current_user, ?, 'SELECT') AS can", table_name.to_s)
|
|
94
119
|
.first[:can] == true
|
|
95
|
-
rescue StandardError
|
|
120
|
+
rescue StandardError => e
|
|
121
|
+
handle_exception(e, level: :warn, handled: true, operation: :can_read?, table: table_name)
|
|
96
122
|
@read_privileges[table_name] = false if @read_privileges
|
|
97
123
|
false
|
|
98
124
|
end
|
|
@@ -111,17 +137,18 @@ module Legion
|
|
|
111
137
|
def setup_static_cache
|
|
112
138
|
[Model::Extension, Model::Runner, Model::Function].each do |model|
|
|
113
139
|
model.plugin :static_cache
|
|
114
|
-
|
|
140
|
+
log.debug("StaticCache enabled for #{model}")
|
|
115
141
|
rescue StandardError => e
|
|
116
|
-
|
|
142
|
+
handle_exception(e, level: :warn, operation: :setup_static_cache, model: model.to_s)
|
|
117
143
|
end
|
|
118
|
-
|
|
144
|
+
log.info 'Legion::Data static cache loaded'
|
|
119
145
|
end
|
|
120
146
|
|
|
121
147
|
def reload_static_cache
|
|
122
148
|
[Model::Extension, Model::Runner, Model::Function].each do |model|
|
|
123
149
|
model.load_cache if model.respond_to?(:load_cache)
|
|
124
150
|
end
|
|
151
|
+
log.info 'Legion::Data static cache reloaded'
|
|
125
152
|
end
|
|
126
153
|
|
|
127
154
|
def setup_external_cache
|
|
@@ -132,17 +159,17 @@ module Legion
|
|
|
132
159
|
Model::Setting => ttl
|
|
133
160
|
}.each do |model, model_ttl|
|
|
134
161
|
model.plugin :caching, ::Legion::Cache, ttl: model_ttl
|
|
135
|
-
|
|
162
|
+
log.debug("Caching enabled for #{model} (ttl: #{model_ttl})")
|
|
136
163
|
rescue StandardError => e
|
|
137
|
-
|
|
164
|
+
handle_exception(e, level: :warn, operation: :setup_external_cache, model: model.to_s, ttl: model_ttl)
|
|
138
165
|
end
|
|
139
|
-
|
|
166
|
+
log.info 'Legion::Data external cache connected'
|
|
140
167
|
end
|
|
141
168
|
|
|
142
169
|
def shutdown
|
|
143
170
|
Legion::Data::Local.shutdown if defined?(Legion::Data::Local) && Legion::Data::Local.connected?
|
|
144
171
|
Legion::Data::Connection.shutdown
|
|
145
|
-
|
|
172
|
+
log.info 'Legion::Data shutdown complete'
|
|
146
173
|
end
|
|
147
174
|
|
|
148
175
|
private
|
|
@@ -152,7 +179,7 @@ module Legion
|
|
|
152
179
|
|
|
153
180
|
Legion::Data::Local.setup
|
|
154
181
|
rescue StandardError => e
|
|
155
|
-
|
|
182
|
+
handle_exception(e, level: :warn, operation: :setup_local)
|
|
156
183
|
end
|
|
157
184
|
end
|
|
158
185
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-data
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.6.
|
|
4
|
+
version: 1.6.20
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -29,28 +29,28 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: 1.
|
|
32
|
+
version: 1.5.0
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: 1.
|
|
39
|
+
version: 1.5.0
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: legion-settings
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
43
43
|
requirements:
|
|
44
44
|
- - ">="
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: 1.3.
|
|
46
|
+
version: 1.3.26
|
|
47
47
|
type: :runtime
|
|
48
48
|
prerelease: false
|
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
50
|
requirements:
|
|
51
51
|
- - ">="
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: 1.3.
|
|
53
|
+
version: 1.3.26
|
|
54
54
|
- !ruby/object:Gem::Dependency
|
|
55
55
|
name: sequel
|
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|