legion-data 1.6.18 → 1.6.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/Gemfile +1 -0
  4. data/legion-data.gemspec +2 -2
  5. data/lib/legion/data/archival/policy.rb +7 -1
  6. data/lib/legion/data/archival.rb +27 -4
  7. data/lib/legion/data/archiver.rb +103 -51
  8. data/lib/legion/data/audit_record.rb +8 -5
  9. data/lib/legion/data/connection.rb +88 -17
  10. data/lib/legion/data/encryption/key_provider.rb +9 -2
  11. data/lib/legion/data/encryption/sequel_plugin.rb +126 -12
  12. data/lib/legion/data/event_store.rb +29 -10
  13. data/lib/legion/data/extract/handlers/base.rb +7 -1
  14. data/lib/legion/data/extract/handlers/csv.rb +1 -0
  15. data/lib/legion/data/extract/handlers/docx.rb +3 -1
  16. data/lib/legion/data/extract/handlers/html.rb +3 -1
  17. data/lib/legion/data/extract/handlers/json.rb +1 -0
  18. data/lib/legion/data/extract/handlers/jsonl.rb +1 -0
  19. data/lib/legion/data/extract/handlers/markdown.rb +1 -0
  20. data/lib/legion/data/extract/handlers/pdf.rb +3 -1
  21. data/lib/legion/data/extract/handlers/pptx.rb +3 -1
  22. data/lib/legion/data/extract/handlers/text.rb +1 -0
  23. data/lib/legion/data/extract/handlers/vtt.rb +1 -0
  24. data/lib/legion/data/extract/handlers/xlsx.rb +3 -1
  25. data/lib/legion/data/extract.rb +7 -0
  26. data/lib/legion/data/helper.rb +16 -6
  27. data/lib/legion/data/local.rb +62 -5
  28. data/lib/legion/data/migration.rb +6 -1
  29. data/lib/legion/data/migrations/044_expand_memory_traces.rb +4 -1
  30. data/lib/legion/data/model.rb +8 -4
  31. data/lib/legion/data/models/audit_log.rb +5 -1
  32. data/lib/legion/data/models/audit_record.rb +5 -1
  33. data/lib/legion/data/models/function.rb +5 -1
  34. data/lib/legion/data/models/node.rb +6 -2
  35. data/lib/legion/data/partition_manager.rb +15 -19
  36. data/lib/legion/data/retention.rb +31 -2
  37. data/lib/legion/data/rls.rb +8 -2
  38. data/lib/legion/data/settings.rb +5 -1
  39. data/lib/legion/data/spool.rb +69 -6
  40. data/lib/legion/data/storage_tiers.rb +16 -3
  41. data/lib/legion/data/vector.rb +9 -5
  42. data/lib/legion/data/version.rb +1 -1
  43. data/lib/legion/data.rb +39 -12
  44. metadata +5 -5
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require 'json'
4
5
  require 'fileutils'
5
6
  require 'securerandom'
@@ -31,6 +32,8 @@ module Legion
31
32
  end
32
33
 
33
34
  class ScopedSpool
35
+ include Legion::Logging::Helper
36
+
34
37
  def initialize(extension_module, spool_root)
35
38
  @extension_dir = File.join(spool_root, Spool.send(:extension_path, extension_module))
36
39
  end
@@ -40,25 +43,43 @@ module Legion
40
43
  FileUtils.mkdir_p(dir)
41
44
  filename = "#{Time.now.strftime('%s%9N')}-#{SecureRandom.uuid}.json"
42
45
  path = File.join(dir, filename)
43
- File.write(path, ::JSON.generate(payload))
44
- Legion::Logging.debug "Spool write: #{sub_namespace} -> #{filename}" if defined?(Legion::Logging)
46
+ temp_path = temp_path_for(dir, filename)
47
+ File.binwrite(temp_path, ::JSON.generate(payload))
48
+ File.rename(temp_path, path)
49
+ log.info "Spool write: #{sub_namespace} -> #{filename}"
45
50
  path
51
+ rescue StandardError => e
52
+ File.delete(temp_path) if defined?(temp_path) && temp_path && File.exist?(temp_path)
53
+ handle_exception(e, level: :error, handled: false, operation: :spool_write, sub_namespace: sub_namespace)
54
+ raise
46
55
  end
47
56
 
48
57
  def read(sub_namespace)
49
- sorted_files(sub_namespace).map { |f| ::JSON.parse(File.read(f), symbolize_names: true) }
58
+ sorted_files(sub_namespace).each_with_object([]) do |path, events|
59
+ event = load_event_file(path, sub_namespace)
60
+ events << event if event
61
+ end
62
+ rescue StandardError => e
63
+ handle_exception(e, level: :error, handled: false, operation: :spool_read, sub_namespace: sub_namespace)
64
+ raise
50
65
  end
51
66
 
52
67
  def flush(sub_namespace)
53
68
  count = 0
69
+ path = nil
54
70
  sorted_files(sub_namespace).each do |path|
55
- event = ::JSON.parse(File.read(path), symbolize_names: true)
71
+ event = load_event_file(path, sub_namespace)
72
+ next unless event
73
+
56
74
  yield event
57
75
  File.delete(path)
58
76
  count += 1
59
77
  end
60
- Legion::Logging.info "Spool drained #{count} item(s) from #{sub_namespace}" if defined?(Legion::Logging) && count.positive?
78
+ log.info "Spool drained #{count} item(s) from #{sub_namespace}" if count.positive?
61
79
  count
80
+ rescue StandardError => e
81
+ handle_exception(e, level: :error, handled: false, operation: :spool_flush, sub_namespace: sub_namespace, path: path)
82
+ raise
62
83
  end
63
84
 
64
85
  def count(sub_namespace)
@@ -70,6 +91,10 @@ module Legion
70
91
  return unless Dir.exist?(dir)
71
92
 
72
93
  Dir[File.join(dir, '*.json')].each { |f| File.delete(f) }
94
+ log.info "Spool cleared #{sub_namespace}"
95
+ rescue StandardError => e
96
+ handle_exception(e, level: :error, handled: false, operation: :spool_clear, sub_namespace: sub_namespace)
97
+ raise
73
98
  end
74
99
 
75
100
  private
@@ -82,7 +107,45 @@ module Legion
82
107
  dir = sub_dir(sub_namespace)
83
108
  return [] unless Dir.exist?(dir)
84
109
 
85
- Dir[File.join(dir, '*.json')]
110
+ Dir.glob(File.join(dir, '*.json'), sort: true)
111
+ end
112
+
113
+ def load_event_file(path, sub_namespace)
114
+ ::JSON.parse(File.binread(path), symbolize_names: true)
115
+ rescue Errno::ENOENT
116
+ nil
117
+ rescue ::JSON::ParserError, EOFError, ArgumentError => e
118
+ quarantine_corrupt_file(path, sub_namespace, e)
119
+ nil
120
+ end
121
+
122
+ def quarantine_corrupt_file(path, sub_namespace, error)
123
+ return unless File.exist?(path)
124
+
125
+ quarantine_dir = File.join(sub_dir(sub_namespace), 'quarantine')
126
+ FileUtils.mkdir_p(quarantine_dir)
127
+ quarantine_path = unique_quarantine_path(quarantine_dir, File.basename(path))
128
+ File.rename(path, quarantine_path)
129
+ handle_exception(
130
+ error,
131
+ level: :warn,
132
+ handled: true,
133
+ operation: :spool_quarantine,
134
+ sub_namespace: sub_namespace,
135
+ path: path,
136
+ quarantine_path: quarantine_path
137
+ )
138
+ end
139
+
140
+ def unique_quarantine_path(quarantine_dir, basename)
141
+ path = File.join(quarantine_dir, "#{basename}.corrupt")
142
+ return path unless File.exist?(path)
143
+
144
+ File.join(quarantine_dir, "#{basename}.#{SecureRandom.uuid}.corrupt")
145
+ end
146
+
147
+ def temp_path_for(dir, filename)
148
+ File.join(dir, ".#{filename}.tmp-#{SecureRandom.uuid}")
86
149
  end
87
150
  end
88
151
  end
@@ -1,11 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
4
+
3
5
  module Legion
4
6
  module Data
5
7
  module StorageTiers
6
8
  TIERS = { hot: 0, warm: 1, cold: 2 }.freeze
7
9
 
8
10
  class << self
11
+ include Legion::Logging::Helper
12
+
9
13
  def archive_to_warm(table:, age_days: 90, batch_size: 1000)
10
14
  return { archived: 0, reason: 'no_connection' } unless Legion::Data.connection
11
15
  return { archived: 0, reason: 'no_archive_table' } unless Legion::Data.connection.table_exists?(:data_archive)
@@ -28,8 +32,11 @@ module Legion
28
32
  Legion::Data.connection[table].where(id: ids).delete
29
33
  end
30
34
 
31
- Legion::Logging.info "Archived #{records.size} row(s) from #{table} to warm tier" if defined?(Legion::Logging)
35
+ log.info "Archived #{records.size} row(s) from #{table} to warm tier"
32
36
  { archived: records.size, table: table.to_s }
37
+ rescue StandardError => e
38
+ handle_exception(e, level: :error, handled: false, operation: :archive_to_warm, table: table, age_days: age_days, batch_size: batch_size)
39
+ raise
33
40
  end
34
41
 
35
42
  def export_to_cold(age_days: 365, batch_size: 5000)
@@ -44,14 +51,20 @@ module Legion
44
51
 
45
52
  ids = records.map { |r| r[:id] }
46
53
  Legion::Data.connection[:data_archive].where(id: ids).update(tier: TIERS[:cold])
47
- Legion::Logging.info "Exported #{records.size} row(s) to cold tier" if defined?(Legion::Logging)
54
+ log.info "Exported #{records.size} row(s) to cold tier"
48
55
  { exported: records.size, data: records }
56
+ rescue StandardError => e
57
+ handle_exception(e, level: :error, handled: false, operation: :export_to_cold, age_days: age_days, batch_size: batch_size)
58
+ raise
49
59
  end
50
60
 
51
61
  def stats
52
62
  return {} unless Legion::Data.connection&.table_exists?(:data_archive)
53
63
 
54
64
  { warm: count_tier(:warm), cold: count_tier(:cold) }
65
+ rescue StandardError => e
66
+ handle_exception(e, level: :warn, handled: true, operation: :storage_tiers_stats)
67
+ {}
55
68
  end
56
69
 
57
70
  private
@@ -59,7 +72,7 @@ module Legion
59
72
  def count_tier(tier)
60
73
  Legion::Data.connection[:data_archive].where(tier: TIERS[tier]).count
61
74
  rescue StandardError => e
62
- Legion::Logging.debug("StorageTiers#count_tier failed for #{tier}: #{e.message}") if defined?(Legion::Logging)
75
+ handle_exception(e, level: :warn, handled: true, operation: :storage_tiers_count, tier: tier)
63
76
  0
64
77
  end
65
78
  end
@@ -1,16 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
4
+
3
5
  module Legion
4
6
  module Data
5
7
  module Vector
6
8
  class << self
9
+ include Legion::Logging::Helper
10
+
7
11
  def available?
8
12
  return false unless Legion::Data.connection
9
13
  return false unless Legion::Data.connection.adapter_scheme == :postgres
10
14
 
11
15
  Legion::Data.connection.fetch("SELECT 1 FROM pg_extension WHERE extname = 'vector'").any?
12
16
  rescue StandardError => e
13
- Legion::Logging.debug("Vector#available? check failed: #{e.message}") if defined?(Legion::Logging)
17
+ handle_exception(e, level: :warn, handled: true, operation: :vector_available?)
14
18
  false
15
19
  end
16
20
 
@@ -18,17 +22,17 @@ module Legion
18
22
  return false unless Legion::Data.connection&.adapter_scheme == :postgres
19
23
 
20
24
  Legion::Data.connection.run('CREATE EXTENSION IF NOT EXISTS vector')
21
- Legion::Logging.info 'pgvector extension enabled' if defined?(Legion::Logging)
25
+ log.info 'pgvector extension enabled'
22
26
  true
23
27
  rescue StandardError => e
24
- Legion::Logging.warn("pgvector extension creation failed: #{e.message}") if defined?(Legion::Logging)
28
+ handle_exception(e, level: :warn, handled: true, operation: :ensure_vector_extension)
25
29
  false
26
30
  end
27
31
 
28
32
  def cosine_search(table:, column:, query_vector:, limit: 10, min_similarity: 0.0)
29
33
  return [] unless available?
30
34
 
31
- Legion::Logging.debug "Vector cosine_search: table=#{table} column=#{column} limit=#{limit}" if defined?(Legion::Logging)
35
+ log.debug "Vector cosine_search: table=#{table} column=#{column} limit=#{limit}"
32
36
  vec_literal = vector_literal(query_vector)
33
37
  ds = Legion::Data.connection[table]
34
38
  .select_all
@@ -43,7 +47,7 @@ module Legion
43
47
  def l2_search(table:, column:, query_vector:, limit: 10)
44
48
  return [] unless available?
45
49
 
46
- Legion::Logging.debug "Vector l2_search: table=#{table} column=#{column} limit=#{limit}" if defined?(Legion::Logging)
50
+ log.debug "Vector l2_search: table=#{table} column=#{column} limit=#{limit}"
47
51
  vec_literal = vector_literal(query_vector)
48
52
  Legion::Data.connection[table]
49
53
  .select_all
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module Data
5
- VERSION = '1.6.18'
5
+ VERSION = '1.6.20'
6
6
  end
7
7
  end
data/lib/legion/data.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require 'legion/data/version'
4
5
  require 'legion/data/settings'
5
6
  require 'sequel'
@@ -16,16 +17,38 @@ require_relative 'data/rls'
16
17
  require_relative 'data/extract'
17
18
  require_relative 'data/audit_record'
18
19
 
20
+ unless Legion::Logging::Helper.method_defined?(:handle_exception)
21
+ module Legion
22
+ module Logging
23
+ module Helper
24
+ def handle_exception(exception, task_id: nil, level: :error, handled: true, **opts)
25
+ context = opts.map { |key, value| "#{key}=#{value.inspect}" }.join(' ')
26
+ message = "#{exception.class}: #{exception.message}"
27
+ message = "#{message} task_id=#{task_id}" if task_id
28
+ message = "#{message} handled=#{handled}"
29
+ message = "#{message} #{context}" unless context.empty?
30
+ warn("[#{level}] #{message}")
31
+ rescue StandardError => e
32
+ warn("handle_exception fallback failed: #{e.class}: #{e.message}")
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+
19
39
  module Legion
20
40
  module Data
21
41
  class << self
42
+ include Legion::Logging::Helper
43
+
22
44
  def setup
45
+ log.info 'Legion::Data setup starting'
23
46
  connection_setup
24
47
  migrate
25
48
  load_models
26
49
  setup_cache
27
50
  setup_local
28
- Legion::Logging.info 'Legion::Data setup complete' if defined?(Legion::Logging)
51
+ log.info 'Legion::Data setup complete'
29
52
  end
30
53
 
31
54
  def connection_setup
@@ -59,7 +82,8 @@ module Legion
59
82
 
60
83
  def connected?
61
84
  Legion::Settings[:data][:connected] == true
62
- rescue StandardError
85
+ rescue StandardError => e
86
+ handle_exception(e, level: :debug, handled: true, operation: :connected?)
63
87
  false
64
88
  end
65
89
 
@@ -75,7 +99,8 @@ module Legion
75
99
  @write_privileges[table_name] = connection
76
100
  .fetch("SELECT has_table_privilege(current_user, ?, 'INSERT') AS can", table_name.to_s)
77
101
  .first[:can] == true
78
- rescue StandardError
102
+ rescue StandardError => e
103
+ handle_exception(e, level: :warn, handled: true, operation: :can_write?, table: table_name)
79
104
  @write_privileges[table_name] = false if @write_privileges
80
105
  false
81
106
  end
@@ -92,7 +117,8 @@ module Legion
92
117
  @read_privileges[table_name] = connection
93
118
  .fetch("SELECT has_table_privilege(current_user, ?, 'SELECT') AS can", table_name.to_s)
94
119
  .first[:can] == true
95
- rescue StandardError
120
+ rescue StandardError => e
121
+ handle_exception(e, level: :warn, handled: true, operation: :can_read?, table: table_name)
96
122
  @read_privileges[table_name] = false if @read_privileges
97
123
  false
98
124
  end
@@ -111,17 +137,18 @@ module Legion
111
137
  def setup_static_cache
112
138
  [Model::Extension, Model::Runner, Model::Function].each do |model|
113
139
  model.plugin :static_cache
114
- Legion::Logging.debug("StaticCache enabled for #{model}") if defined?(Legion::Logging)
140
+ log.debug("StaticCache enabled for #{model}")
115
141
  rescue StandardError => e
116
- Legion::Logging.warn("StaticCache failed for #{model}: #{e.message}") if defined?(Legion::Logging)
142
+ handle_exception(e, level: :warn, operation: :setup_static_cache, model: model.to_s)
117
143
  end
118
- Legion::Logging.info 'Legion::Data static cache loaded' if defined?(Legion::Logging)
144
+ log.info 'Legion::Data static cache loaded'
119
145
  end
120
146
 
121
147
  def reload_static_cache
122
148
  [Model::Extension, Model::Runner, Model::Function].each do |model|
123
149
  model.load_cache if model.respond_to?(:load_cache)
124
150
  end
151
+ log.info 'Legion::Data static cache reloaded'
125
152
  end
126
153
 
127
154
  def setup_external_cache
@@ -132,17 +159,17 @@ module Legion
132
159
  Model::Setting => ttl
133
160
  }.each do |model, model_ttl|
134
161
  model.plugin :caching, ::Legion::Cache, ttl: model_ttl
135
- Legion::Logging.debug("Caching enabled for #{model} (ttl: #{model_ttl})") if defined?(Legion::Logging)
162
+ log.debug("Caching enabled for #{model} (ttl: #{model_ttl})")
136
163
  rescue StandardError => e
137
- Legion::Logging.warn("Caching failed for #{model}: #{e.message}") if defined?(Legion::Logging)
164
+ handle_exception(e, level: :warn, operation: :setup_external_cache, model: model.to_s, ttl: model_ttl)
138
165
  end
139
- Legion::Logging.info 'Legion::Data external cache connected' if defined?(Legion::Logging)
166
+ log.info 'Legion::Data external cache connected'
140
167
  end
141
168
 
142
169
  def shutdown
143
170
  Legion::Data::Local.shutdown if defined?(Legion::Data::Local) && Legion::Data::Local.connected?
144
171
  Legion::Data::Connection.shutdown
145
- Legion::Logging.info 'Legion::Data shutdown complete' if defined?(Legion::Logging)
172
+ log.info 'Legion::Data shutdown complete'
146
173
  end
147
174
 
148
175
  private
@@ -152,7 +179,7 @@ module Legion
152
179
 
153
180
  Legion::Data::Local.setup
154
181
  rescue StandardError => e
155
- Legion::Logging.warn "Legion::Data::Local failed to setup: #{e.message}" if defined?(Legion::Logging)
182
+ handle_exception(e, level: :warn, operation: :setup_local)
156
183
  end
157
184
  end
158
185
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.18
4
+ version: 1.6.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -29,28 +29,28 @@ dependencies:
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 1.2.8
32
+ version: 1.5.0
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: 1.2.8
39
+ version: 1.5.0
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: legion-settings
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 1.3.12
46
+ version: 1.3.26
47
47
  type: :runtime
48
48
  prerelease: false
49
49
  version_requirements: !ruby/object:Gem::Requirement
50
50
  requirements:
51
51
  - - ">="
52
52
  - !ruby/object:Gem::Version
53
- version: 1.3.12
53
+ version: 1.3.26
54
54
  - !ruby/object:Gem::Dependency
55
55
  name: sequel
56
56
  requirement: !ruby/object:Gem::Requirement