legion-data 1.6.18 → 1.6.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/Gemfile +1 -0
  4. data/legion-data.gemspec +2 -2
  5. data/lib/legion/data/archival/policy.rb +7 -1
  6. data/lib/legion/data/archival.rb +27 -4
  7. data/lib/legion/data/archiver.rb +103 -51
  8. data/lib/legion/data/audit_record.rb +8 -5
  9. data/lib/legion/data/connection.rb +88 -17
  10. data/lib/legion/data/encryption/key_provider.rb +9 -2
  11. data/lib/legion/data/encryption/sequel_plugin.rb +126 -12
  12. data/lib/legion/data/event_store.rb +29 -10
  13. data/lib/legion/data/extract/handlers/base.rb +7 -1
  14. data/lib/legion/data/extract/handlers/csv.rb +1 -0
  15. data/lib/legion/data/extract/handlers/docx.rb +3 -1
  16. data/lib/legion/data/extract/handlers/html.rb +3 -1
  17. data/lib/legion/data/extract/handlers/json.rb +1 -0
  18. data/lib/legion/data/extract/handlers/jsonl.rb +1 -0
  19. data/lib/legion/data/extract/handlers/markdown.rb +1 -0
  20. data/lib/legion/data/extract/handlers/pdf.rb +3 -1
  21. data/lib/legion/data/extract/handlers/pptx.rb +3 -1
  22. data/lib/legion/data/extract/handlers/text.rb +1 -0
  23. data/lib/legion/data/extract/handlers/vtt.rb +1 -0
  24. data/lib/legion/data/extract/handlers/xlsx.rb +3 -1
  25. data/lib/legion/data/extract.rb +7 -0
  26. data/lib/legion/data/helper.rb +16 -6
  27. data/lib/legion/data/local.rb +62 -5
  28. data/lib/legion/data/migration.rb +6 -1
  29. data/lib/legion/data/migrations/044_expand_memory_traces.rb +4 -1
  30. data/lib/legion/data/model.rb +8 -4
  31. data/lib/legion/data/models/audit_log.rb +5 -1
  32. data/lib/legion/data/models/audit_record.rb +5 -1
  33. data/lib/legion/data/models/function.rb +5 -1
  34. data/lib/legion/data/models/node.rb +6 -2
  35. data/lib/legion/data/partition_manager.rb +15 -19
  36. data/lib/legion/data/retention.rb +31 -2
  37. data/lib/legion/data/rls.rb +8 -2
  38. data/lib/legion/data/settings.rb +5 -1
  39. data/lib/legion/data/spool.rb +69 -6
  40. data/lib/legion/data/storage_tiers.rb +16 -3
  41. data/lib/legion/data/vector.rb +9 -5
  42. data/lib/legion/data/version.rb +1 -1
  43. data/lib/legion/data.rb +39 -12
  44. metadata +5 -5
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
4
+
3
5
  require 'fileutils'
4
6
  require 'sequel'
5
7
 
@@ -28,6 +30,8 @@ module Legion
28
30
  # Prefixes warn-level messages with [slow-query] since Sequel uses warn
29
31
  # for queries exceeding log_warn_duration.
30
32
  class SlowQueryLogger
33
+ attr_reader :tagged
34
+
31
35
  def initialize(tagged_logger)
32
36
  @tagged = tagged_logger
33
37
  end
@@ -49,9 +53,52 @@ module Legion
49
53
  end
50
54
  end
51
55
 
56
+ class SegmentedTaggedLogger
57
+ attr_reader :segments
58
+
59
+ def initialize(segments:, logger: nil)
60
+ @segments = segments
61
+ @logger = logger || Legion::Logging
62
+ end
63
+
64
+ def warn(message)
65
+ with_segments { dispatch(:warn, message) }
66
+ end
67
+
68
+ def info(message)
69
+ with_segments { dispatch(:info, message) }
70
+ end
71
+
72
+ def debug(message)
73
+ with_segments { dispatch(:debug, message) }
74
+ end
75
+
76
+ def error(message)
77
+ with_segments { dispatch(:error, message) }
78
+ end
79
+
80
+ private
81
+
82
+ def dispatch(level, message)
83
+ return unless @logger.respond_to?(level)
84
+
85
+ @logger.public_send(level, message)
86
+ end
87
+
88
+ def with_segments
89
+ previous = Thread.current[:legion_log_segments]
90
+ Thread.current[:legion_log_segments] = @segments
91
+ yield
92
+ ensure
93
+ Thread.current[:legion_log_segments] = previous
94
+ end
95
+ end
96
+
52
97
  # File-based query logger that writes all SQL to a dedicated log file.
53
98
  # Isolated from the main Legion::Logging domain.
54
99
  class QueryFileLogger
100
+ include Legion::Logging::Helper
101
+
55
102
  attr_reader :path
56
103
 
57
104
  def initialize(path)
@@ -90,12 +137,15 @@ module Legion
90
137
  @mutex.synchronize do
91
138
  @file.puts "[#{Time.now.strftime('%Y-%m-%d %H:%M:%S.%L')}] #{level} #{message}"
92
139
  end
93
- rescue IOError
140
+ rescue IOError => e
141
+ handle_exception(e, level: :warn, handled: true, operation: :query_file_write, path: @path)
94
142
  nil
95
143
  end
96
144
  end
97
145
 
98
146
  class << self
147
+ include Legion::Logging::Helper
148
+
99
149
  attr_accessor :sequel
100
150
 
101
151
  def adapter
@@ -104,26 +154,23 @@ module Legion
104
154
 
105
155
  def setup
106
156
  opts = sequel_opts
157
+ log.info("Legion::Data::Connection setup adapter=#{adapter}")
107
158
  @sequel = if adapter == :sqlite
108
159
  ::Sequel.connect(opts.merge(adapter: :sqlite, database: sqlite_path))
109
160
  else
110
161
  begin
111
- ::Sequel.connect(opts.merge(adapter: adapter, **creds_builder))
162
+ ::Sequel.connect(connection_opts_for(adapter: adapter, opts: opts))
112
163
  rescue StandardError => e
113
164
  raise unless dev_fallback?
114
165
 
115
- if defined?(Legion::Logging)
116
- Legion::Logging.warn(
117
- "Shared DB unreachable (#{e.message}), dev_mode fallback to SQLite"
118
- )
119
- end
166
+ handle_exception(e, level: :warn, handled: true, operation: :shared_connect, fallback: :sqlite)
120
167
  @adapter = :sqlite
121
168
  sqlite_opts = sequel_opts
122
169
  ::Sequel.connect(sqlite_opts.merge(adapter: :sqlite, database: sqlite_path))
123
170
  end
124
171
  end
125
172
  Legion::Settings[:data][:connected] = true
126
- log_connection_info if defined?(Legion::Logging)
173
+ log_connection_info
127
174
  configure_extensions
128
175
  connect_with_replicas
129
176
  end
@@ -140,6 +187,7 @@ module Legion
140
187
  database: database_stats
141
188
  }
142
189
  rescue StandardError => e
190
+ handle_exception(e, level: :warn, handled: true, operation: :data_connection_stats, adapter: adapter)
143
191
  { connected: (data[:connected] if data.is_a?(Hash)), adapter: adapter, error: e.message }
144
192
  end
145
193
 
@@ -171,7 +219,8 @@ module Legion
171
219
  end
172
220
 
173
221
  stats.compact
174
- rescue StandardError
222
+ rescue StandardError => e
223
+ handle_exception(e, level: :warn, handled: true, operation: :data_pool_stats, adapter: adapter)
175
224
  {}
176
225
  end
177
226
 
@@ -180,7 +229,7 @@ module Legion
180
229
  @query_file_logger&.close
181
230
  @query_file_logger = nil
182
231
  Legion::Settings[:data][:connected] = false
183
- Legion::Logging.info 'Legion::Data connection closed' if defined?(Legion::Logging)
232
+ log.info 'Legion::Data connection closed'
184
233
  end
185
234
 
186
235
  def connect_with_replicas
@@ -202,7 +251,7 @@ module Legion
202
251
  end
203
252
 
204
253
  @replica_servers = replica_list.each_with_index.map { |_, idx| :"read_#{idx}" }
205
- Legion::Logging.debug "Registered #{@replica_servers.size} read replica(s)" if defined?(Legion::Logging)
254
+ log.debug "Registered #{@replica_servers.size} read replica(s)"
206
255
  end
207
256
 
208
257
  def read_server
@@ -258,20 +307,20 @@ module Legion
258
307
 
259
308
  Legion::Settings[:data][:tls] || {}
260
309
  rescue StandardError => e
261
- Legion::Logging.debug("Connection#data_tls_settings failed: #{e.message}") if defined?(Legion::Logging)
310
+ handle_exception(e, level: :warn, handled: true, operation: :data_tls_settings)
262
311
  {}
263
312
  end
264
313
 
265
314
  def log_connection_info
266
315
  if adapter == :sqlite
267
- Legion::Logging.info "Connected to SQLite at #{sqlite_path}"
316
+ log.info "Connected to SQLite at #{sqlite_path}"
268
317
  else
269
318
  actual = Legion::Settings[:data][:creds] || {}
270
319
  user = actual[:user] || actual[:username] || 'unknown'
271
320
  host = actual[:host] || '127.0.0.1'
272
321
  port = actual[:port]
273
322
  db = actual[:database] || actual[:db]
274
- Legion::Logging.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
323
+ log.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
275
324
  end
276
325
  end
277
326
 
@@ -284,6 +333,12 @@ module Legion
284
333
  Legion::Settings[:data][:creds][:database] || 'legionio.db'
285
334
  end
286
335
 
336
+ def connection_opts_for(adapter:, opts:)
337
+ connection_opts = opts.merge(adapter: adapter, **creds_builder)
338
+ connection_opts[:preconnect] = false if adapter != :sqlite && dev_fallback?
339
+ connection_opts
340
+ end
341
+
287
342
  def sequel_opts
288
343
  data = Legion::Settings[:data]
289
344
  opts = {}
@@ -356,6 +411,7 @@ module Legion
356
411
  else {}
357
412
  end
358
413
  rescue StandardError => e
414
+ handle_exception(e, level: :warn, handled: true, operation: :data_database_stats, adapter: adapter)
359
415
  { error: e.message }
360
416
  end
361
417
 
@@ -366,7 +422,8 @@ module Legion
366
422
  cache_size busy_timeout].each do |pragma|
367
423
  val = begin
368
424
  db.fetch("PRAGMA #{pragma}").single_value
369
- rescue StandardError
425
+ rescue StandardError => e
426
+ handle_exception(e, level: :warn, handled: true, operation: :sqlite_stats_pragma, pragma: pragma)
370
427
  nil
371
428
  end
372
429
  stats[pragma.to_sym] = val unless val.nil?
@@ -457,12 +514,26 @@ module Legion
457
514
  @sequel.pool.connection_expiration_timeout = data[:connection_expiration_timeout] || 14_400
458
515
  end
459
516
  rescue StandardError => e
460
- Legion::Logging.warn "Failed to load connection extensions: #{e.message}" if defined?(Legion::Logging)
517
+ handle_exception(e, level: :warn, handled: true, operation: :configure_extensions, adapter: adapter)
461
518
  end
462
519
 
463
520
  def build_data_logger
464
- tagged = Legion::Logging::Logger.new(lex: 'data')
521
+ tagged = if defined?(Legion::Logging::TaggedLogger) && respond_to?(:tagged_logger_settings, true)
522
+ Legion::Logging::TaggedLogger.new(
523
+ segments: %w[data connection],
524
+ **send(:tagged_logger_settings)
525
+ )
526
+ else
527
+ SegmentedTaggedLogger.new(segments: %w[data connection])
528
+ end
465
529
  SlowQueryLogger.new(tagged)
530
+ rescue StandardError => e
531
+ if respond_to?(:handle_exception, true)
532
+ handle_exception(e, level: :warn, handled: true, operation: :build_data_logger)
533
+ else
534
+ log.warn("build_data_logger failed: #{e.class}: #{e.message}")
535
+ end
536
+ SlowQueryLogger.new(SegmentedTaggedLogger.new(segments: %w[data connection], logger: log))
466
537
  end
467
538
  end
468
539
  end
@@ -1,11 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require 'openssl'
4
5
 
5
6
  module Legion
6
7
  module Data
7
8
  module Encryption
8
9
  class KeyProvider
10
+ include Legion::Logging::Helper
11
+
9
12
  def initialize(mode: :auto)
10
13
  @mode = mode
11
14
  @key_cache = {}
@@ -18,20 +21,24 @@ module Legion
18
21
 
19
22
  def clear_cache!
20
23
  @key_cache.clear
24
+ log.debug 'Cleared encryption key cache'
21
25
  end
22
26
 
23
27
  private
24
28
 
25
29
  def derive_key(tenant_id)
26
30
  if tenant_id && crypt_available?
27
- Legion::Logging.debug "Deriving Vault key for tenant #{tenant_id}" if defined?(Legion::Logging)
31
+ log.debug "Deriving Vault key for tenant #{tenant_id}"
28
32
  Legion::Crypt::PartitionKeys.derive(tenant_id: tenant_id)
29
33
  elsif crypt_available?
30
34
  Legion::Crypt.default_encryption_key
31
35
  else
32
- Legion::Logging.warn 'Legion::Crypt unavailable, falling back to dev encryption key' if defined?(Legion::Logging)
36
+ log.warn 'Legion::Crypt unavailable, falling back to dev encryption key'
33
37
  local_key
34
38
  end
39
+ rescue StandardError => e
40
+ handle_exception(e, level: :error, handled: false, operation: :derive_key, tenant_id: tenant_id)
41
+ raise
35
42
  end
36
43
 
37
44
  def crypt_available?
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require_relative 'cipher'
4
5
  require_relative 'key_provider'
5
6
 
@@ -7,6 +8,33 @@ module Legion
7
8
  module Data
8
9
  module Encryption
9
10
  module SequelPlugin
11
+ extend Legion::Logging::Helper
12
+
13
+ class << self
14
+ def aad_for(table_name:, primary_key:, column:)
15
+ "#{table_name}:#{primary_key || 0}:#{column}"
16
+ end
17
+
18
+ def decrypt_value(blob:, key:, table_name:, primary_key:, column:)
19
+ errors = []
20
+
21
+ aad_candidates(primary_key).each do |aad_primary_key|
22
+ aad = aad_for(table_name: table_name, primary_key: aad_primary_key, column: column)
23
+ return Legion::Data::Encryption::Cipher.decrypt(blob, key: key, aad: aad)
24
+ rescue OpenSSL::Cipher::CipherError, ArgumentError => e
25
+ errors << e
26
+ end
27
+
28
+ raise errors.last if errors.any?
29
+ end
30
+
31
+ private
32
+
33
+ def aad_candidates(primary_key)
34
+ [primary_key, 0].compact.uniq
35
+ end
36
+ end
37
+
10
38
  module ClassMethods
11
39
  def encrypted_columns
12
40
  @encrypted_columns ||= {}
@@ -20,28 +48,42 @@ module Legion
20
48
  raw = super()
21
49
  return nil if raw.nil?
22
50
 
23
- provider = self.class.encryption_key_provider
24
- tenant = col_scope == :tenant ? self[:tenant_id] : nil
25
- key = provider.key_for(tenant_id: tenant)
26
- aad = "#{self.class.table_name}:#{pk}:#{name}"
27
51
  begin
28
- Legion::Data::Encryption::Cipher.decrypt(raw.b, key: key, aad: aad)
52
+ decrypt_encrypted_column(name, raw, key_scope: col_scope)
29
53
  rescue StandardError => e
30
- Legion::Logging.warn "Decrypt failed for #{self.class.table_name}##{pk} column #{name}: #{e.message}" if defined?(Legion::Logging)
54
+ Legion::Data::Encryption::SequelPlugin.handle_exception(
55
+ e,
56
+ level: :warn,
57
+ handled: false,
58
+ operation: :decrypt_column,
59
+ table: self.class.table_name,
60
+ primary_key: pk,
61
+ column: name
62
+ )
31
63
  raise
32
64
  end
33
65
  end
34
66
 
35
67
  define_method(:"#{name}=") do |value|
36
68
  if value.nil?
69
+ clear_pending_encrypted_column(name)
37
70
  super(nil)
38
71
  else
39
- provider = self.class.encryption_key_provider
40
- tenant = col_scope == :tenant ? self[:tenant_id] : nil
41
- key = provider.key_for(tenant_id: tenant)
42
- aad = "#{self.class.table_name}:#{pk || 0}:#{name}"
43
- encrypted = Legion::Data::Encryption::Cipher.encrypt(value.to_s, key: key, aad: aad)
44
- super(Sequel.blob(encrypted))
72
+ begin
73
+ remember_pending_encrypted_column(name, value, key_scope: col_scope) if new?
74
+ super(encrypt_encrypted_column(name, value, key_scope: col_scope, primary_key: pk || 0))
75
+ rescue StandardError => e
76
+ Legion::Data::Encryption::SequelPlugin.handle_exception(
77
+ e,
78
+ level: :error,
79
+ handled: false,
80
+ operation: :encrypt_column,
81
+ table: self.class.table_name,
82
+ primary_key: pk,
83
+ column: name
84
+ )
85
+ raise
86
+ end
45
87
  end
46
88
  end
47
89
  end
@@ -52,6 +94,78 @@ module Legion
52
94
  end
53
95
 
54
96
  module InstanceMethods
97
+ def after_create
98
+ super
99
+ reencrypt_pending_encrypted_columns
100
+ end
101
+
102
+ private
103
+
104
+ def decrypt_encrypted_column(column, raw, key_scope:)
105
+ provider = self.class.encryption_key_provider
106
+ tenant = key_scope == :tenant ? self[:tenant_id] : nil
107
+ key = provider.key_for(tenant_id: tenant)
108
+
109
+ Legion::Data::Encryption::SequelPlugin.decrypt_value(
110
+ blob: raw.b,
111
+ key: key,
112
+ table_name: self.class.table_name,
113
+ primary_key: pk,
114
+ column: column
115
+ )
116
+ end
117
+
118
+ def encrypt_encrypted_column(column, value, key_scope:, primary_key:)
119
+ provider = self.class.encryption_key_provider
120
+ tenant = key_scope == :tenant ? self[:tenant_id] : nil
121
+ key = provider.key_for(tenant_id: tenant)
122
+ aad = Legion::Data::Encryption::SequelPlugin.aad_for(
123
+ table_name: self.class.table_name,
124
+ primary_key: primary_key,
125
+ column: column
126
+ )
127
+ encrypted = Legion::Data::Encryption::Cipher.encrypt(value.to_s, key: key, aad: aad)
128
+ Sequel.blob(encrypted)
129
+ end
130
+
131
+ def pending_encrypted_columns
132
+ @pending_encrypted_columns ||= {}
133
+ end
134
+
135
+ def remember_pending_encrypted_column(column, value, key_scope:)
136
+ pending_encrypted_columns[column] = { key_scope: key_scope, value: value.to_s }
137
+ end
138
+
139
+ def clear_pending_encrypted_column(column)
140
+ pending_encrypted_columns.delete(column) if defined?(@pending_encrypted_columns)
141
+ end
142
+
143
+ def reencrypt_pending_encrypted_columns
144
+ return if pending_encrypted_columns.empty?
145
+
146
+ encrypted_values = pending_encrypted_columns.each_with_object({}) do |(column, config), updates|
147
+ updates[column] = encrypt_encrypted_column(
148
+ column,
149
+ config[:value],
150
+ key_scope: config[:key_scope],
151
+ primary_key: pk
152
+ )
153
+ end
154
+
155
+ self.class.where(pk_hash).update(encrypted_values)
156
+ encrypted_values.each { |column, encrypted| values[column] = encrypted }
157
+ pending_encrypted_columns.clear
158
+ rescue StandardError => e
159
+ Legion::Data::Encryption::SequelPlugin.handle_exception(
160
+ e,
161
+ level: :error,
162
+ handled: false,
163
+ operation: :reencrypt_pending_columns,
164
+ table: self.class.table_name,
165
+ primary_key: pk
166
+ )
167
+ raise
168
+ end
55
169
  end
56
170
  end
57
171
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require 'digest'
4
5
 
5
6
  module Legion
@@ -14,6 +15,8 @@ module Legion
14
15
  ].freeze
15
16
 
16
17
  class << self
18
+ include Legion::Logging::Helper
19
+
17
20
  def append(stream:, type:, data: {}, metadata: {})
18
21
  return { error: 'db unavailable' } unless db_ready?
19
22
 
@@ -29,7 +32,7 @@ module Legion
29
32
 
30
33
  data_json = Legion::JSON.dump(data)
31
34
  metadata_json = Legion::JSON.dump(metadata)
32
- event_hash = compute_hash(stream, seq, type, data_json, prev_hash)
35
+ event_hash = compute_hash(stream, seq, type, data_json, metadata_json, prev_hash)
33
36
 
34
37
  conn[:governance_events].insert(
35
38
  stream_id: stream,
@@ -42,7 +45,7 @@ module Legion
42
45
  created_at: Time.now
43
46
  )
44
47
 
45
- Legion::Logging.debug "EventStore append: stream=#{stream} type=#{type} seq=#{seq}" if defined?(Legion::Logging)
48
+ log.debug "EventStore append: stream=#{stream} type=#{type} seq=#{seq}"
46
49
  { stream: stream, sequence: seq, hash: event_hash }
47
50
  end
48
51
  end
@@ -72,27 +75,43 @@ module Legion
72
75
  .all
73
76
 
74
77
  prev_hash = '0' * 64
78
+ legacy_hashes = 0
75
79
  events.each do |e|
76
- expected = compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], prev_hash)
77
- unless e[:event_hash] == expected
78
- Legion::Logging.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}" if defined?(Legion::Logging)
80
+ expected = compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], e[:metadata_json], prev_hash)
81
+ legacy_expected = legacy_compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], prev_hash)
82
+
83
+ unless [expected, legacy_expected].include?(e[:event_hash])
84
+ log.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}"
79
85
  return { valid: false, broken_at: e[:sequence_number] }
80
86
  end
81
87
  unless e[:previous_hash] == prev_hash
82
- Legion::Logging.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}" if defined?(Legion::Logging)
88
+ log.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}"
83
89
  return { valid: false, broken_at: e[:sequence_number] }
84
90
  end
85
91
 
92
+ legacy_hashes += 1 if e[:event_hash] == legacy_expected && e[:event_hash] != expected
86
93
  prev_hash = e[:event_hash]
87
94
  end
88
95
 
89
- { valid: true, length: events.size }
96
+ result = { valid: true, length: events.size }
97
+ result[:legacy_hashes] = legacy_hashes if legacy_hashes.positive?
98
+ result
90
99
  end
91
100
 
92
101
  private
93
102
 
94
- def compute_hash(stream, seq, type, data_json, prev_hash)
95
- Digest::SHA256.hexdigest("#{stream}:#{seq}:#{type}:#{data_json}:#{prev_hash}")
103
+ def compute_hash(stream, seq, type, data_json, metadata_json, prev_hash)
104
+ Digest::SHA256.hexdigest(
105
+ "#{stream}:#{seq}:#{type}:#{normalized_json(data_json)}:#{normalized_json(metadata_json)}:#{prev_hash}"
106
+ )
107
+ end
108
+
109
+ def legacy_compute_hash(stream, seq, type, data_json, prev_hash)
110
+ Digest::SHA256.hexdigest("#{stream}:#{seq}:#{type}:#{normalized_json(data_json)}:#{prev_hash}")
111
+ end
112
+
113
+ def normalized_json(json)
114
+ json || '{}'
96
115
  end
97
116
 
98
117
  def deserialize(event)
@@ -111,7 +130,7 @@ module Legion
111
130
  def db_ready?
112
131
  defined?(Legion::Data) && Legion::Data.connection&.table_exists?(:governance_events)
113
132
  rescue StandardError => e
114
- Legion::Logging.debug("EventStore#db_ready? check failed: #{e.message}") if defined?(Legion::Logging)
133
+ handle_exception(e, level: :warn, handled: true, operation: :event_store_db_ready?)
115
134
  false
116
135
  end
117
136
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
4
+
3
5
  module Legion
4
6
  module Data
5
7
  module Extract
@@ -8,6 +10,8 @@ module Legion
8
10
  @registry = {}.freeze
9
11
 
10
12
  class << self
13
+ include Legion::Logging::Helper
14
+
11
15
  attr_reader :registry
12
16
 
13
17
  def inherited(subclass)
@@ -22,6 +26,7 @@ module Legion
22
26
  end
23
27
 
24
28
  def register(handler_class)
29
+ log.debug "Registered extract handler type=#{handler_class.type} class=#{handler_class.name}"
25
30
  @registry = @registry.merge(handler_class.type => handler_class).freeze
26
31
  end
27
32
 
@@ -47,7 +52,8 @@ module Legion
47
52
 
48
53
  require gem_name
49
54
  true
50
- rescue LoadError
55
+ rescue LoadError => e
56
+ handle_exception(e, level: :debug, handled: true, operation: :extract_handler_available, handler: name, gem: gem_name)
51
57
  false
52
58
  end
53
59
  end
@@ -17,6 +17,7 @@ module Legion
17
17
  text = table.map { |row| row.to_h.map { |k, v| "#{k}: #{v}" }.join(', ') }.join("\n")
18
18
  { text: text, metadata: { rows: table.size, columns: table.headers.size, headers: table.headers } }
19
19
  rescue StandardError => e
20
+ handle_exception(e, level: :warn, handled: true, operation: :extract_csv)
20
21
  { text: nil, error: e.message }
21
22
  end
22
23
  end
@@ -16,9 +16,11 @@ module Legion
16
16
  paragraphs = doc.paragraphs.map(&:text).reject(&:empty?)
17
17
  text = paragraphs.join("\n\n")
18
18
  { text: text, metadata: { paragraphs: paragraphs.size } }
19
- rescue LoadError
19
+ rescue LoadError => e
20
+ handle_exception(e, level: :warn, handled: true, operation: :extract_docx, gem: gem_name)
20
21
  { text: nil, error: :gem_not_installed, gem: gem_name }
21
22
  rescue StandardError => e
23
+ handle_exception(e, level: :warn, handled: true, operation: :extract_docx)
22
24
  { text: nil, error: e.message }
23
25
  end
24
26
  end
@@ -21,9 +21,11 @@ module Legion
21
21
  title = doc.at_css('title')&.text&.strip
22
22
  text = doc.text.gsub(/\s+/, ' ').strip
23
23
  { text: text, metadata: { title: title } }
24
- rescue LoadError
24
+ rescue LoadError => e
25
+ handle_exception(e, level: :warn, handled: true, operation: :extract_html, gem: gem_name)
25
26
  { text: nil, error: :gem_not_installed, gem: gem_name }
26
27
  rescue StandardError => e
28
+ handle_exception(e, level: :warn, handled: true, operation: :extract_html)
27
29
  { text: nil, error: e.message }
28
30
  end
29
31
  end
@@ -17,6 +17,7 @@ module Legion
17
17
  text = ::JSON.pretty_generate(parsed)
18
18
  { text: text, metadata: { keys: parsed.is_a?(Hash) ? parsed.keys : nil } }
19
19
  rescue StandardError => e
20
+ handle_exception(e, level: :warn, handled: true, operation: :extract_json)
20
21
  { text: nil, error: e.message }
21
22
  end
22
23
  end
@@ -17,6 +17,7 @@ module Legion
17
17
  text = lines.map { |l| l.is_a?(Hash) ? ::JSON.pretty_generate(l) : l }.join("\n---\n")
18
18
  { text: text, metadata: { lines: lines.size } }
19
19
  rescue StandardError => e
20
+ handle_exception(e, level: :warn, handled: true, operation: :extract_jsonl)
20
21
  { text: nil, error: e.message }
21
22
  end
22
23
  end
@@ -15,6 +15,7 @@ module Legion
15
15
  text = content.sub(/\A---\n.*?\n---\n/m, '')
16
16
  { text: text.strip, metadata: { bytes: content.bytesize, has_frontmatter: content != text } }
17
17
  rescue StandardError => e
18
+ handle_exception(e, level: :warn, handled: true, operation: :extract_markdown)
18
19
  { text: nil, error: e.message }
19
20
  end
20
21
  end
@@ -15,9 +15,11 @@ module Legion
15
15
  reader = ::PDF::Reader.new(source)
16
16
  text = reader.pages.map(&:text).join("\n\n")
17
17
  { text: text, metadata: { pages: reader.page_count, title: reader.info[:Title] } }
18
- rescue LoadError
18
+ rescue LoadError => e
19
+ handle_exception(e, level: :warn, handled: true, operation: :extract_pdf, gem: gem_name)
19
20
  { text: nil, error: :gem_not_installed, gem: gem_name }
20
21
  rescue StandardError => e
22
+ handle_exception(e, level: :warn, handled: true, operation: :extract_pdf)
21
23
  { text: nil, error: e.message }
22
24
  end
23
25
  end
@@ -24,9 +24,11 @@ module Legion
24
24
  end
25
25
  text = slides.each_with_index.map { |s, i| "Slide #{i + 1}: #{s}" }.join("\n\n")
26
26
  { text: text, metadata: { slides: slides.size } }
27
- rescue LoadError
27
+ rescue LoadError => e
28
+ handle_exception(e, level: :warn, handled: true, operation: :extract_pptx, gem: gem_name)
28
29
  { text: nil, error: :gem_not_installed, gem: 'rubyzip' }
29
30
  rescue StandardError => e
31
+ handle_exception(e, level: :warn, handled: true, operation: :extract_pptx)
30
32
  { text: nil, error: e.message }
31
33
  end
32
34
  end
@@ -13,6 +13,7 @@ module Legion
13
13
  content = source.respond_to?(:read) ? source.read : File.read(source.to_s)
14
14
  { text: content, metadata: { bytes: content.bytesize } }
15
15
  rescue StandardError => e
16
+ handle_exception(e, level: :warn, handled: true, operation: :extract_text)
16
17
  { text: nil, error: e.message }
17
18
  end
18
19
  end