legion-data 1.6.18 → 1.6.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/Gemfile +1 -0
- data/legion-data.gemspec +2 -2
- data/lib/legion/data/archival/policy.rb +7 -1
- data/lib/legion/data/archival.rb +27 -4
- data/lib/legion/data/archiver.rb +103 -51
- data/lib/legion/data/audit_record.rb +8 -5
- data/lib/legion/data/connection.rb +88 -17
- data/lib/legion/data/encryption/key_provider.rb +9 -2
- data/lib/legion/data/encryption/sequel_plugin.rb +126 -12
- data/lib/legion/data/event_store.rb +29 -10
- data/lib/legion/data/extract/handlers/base.rb +7 -1
- data/lib/legion/data/extract/handlers/csv.rb +1 -0
- data/lib/legion/data/extract/handlers/docx.rb +3 -1
- data/lib/legion/data/extract/handlers/html.rb +3 -1
- data/lib/legion/data/extract/handlers/json.rb +1 -0
- data/lib/legion/data/extract/handlers/jsonl.rb +1 -0
- data/lib/legion/data/extract/handlers/markdown.rb +1 -0
- data/lib/legion/data/extract/handlers/pdf.rb +3 -1
- data/lib/legion/data/extract/handlers/pptx.rb +3 -1
- data/lib/legion/data/extract/handlers/text.rb +1 -0
- data/lib/legion/data/extract/handlers/vtt.rb +1 -0
- data/lib/legion/data/extract/handlers/xlsx.rb +3 -1
- data/lib/legion/data/extract.rb +7 -0
- data/lib/legion/data/helper.rb +16 -6
- data/lib/legion/data/local.rb +62 -5
- data/lib/legion/data/migration.rb +6 -1
- data/lib/legion/data/migrations/044_expand_memory_traces.rb +4 -1
- data/lib/legion/data/model.rb +8 -4
- data/lib/legion/data/models/audit_log.rb +5 -1
- data/lib/legion/data/models/audit_record.rb +5 -1
- data/lib/legion/data/models/function.rb +5 -1
- data/lib/legion/data/models/node.rb +6 -2
- data/lib/legion/data/partition_manager.rb +15 -19
- data/lib/legion/data/retention.rb +31 -2
- data/lib/legion/data/rls.rb +8 -2
- data/lib/legion/data/settings.rb +5 -1
- data/lib/legion/data/spool.rb +69 -6
- data/lib/legion/data/storage_tiers.rb +16 -3
- data/lib/legion/data/vector.rb +9 -5
- data/lib/legion/data/version.rb +1 -1
- data/lib/legion/data.rb +39 -12
- metadata +5 -5
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
3
5
|
require 'fileutils'
|
|
4
6
|
require 'sequel'
|
|
5
7
|
|
|
@@ -28,6 +30,8 @@ module Legion
|
|
|
28
30
|
# Prefixes warn-level messages with [slow-query] since Sequel uses warn
|
|
29
31
|
# for queries exceeding log_warn_duration.
|
|
30
32
|
class SlowQueryLogger
|
|
33
|
+
attr_reader :tagged
|
|
34
|
+
|
|
31
35
|
def initialize(tagged_logger)
|
|
32
36
|
@tagged = tagged_logger
|
|
33
37
|
end
|
|
@@ -49,9 +53,52 @@ module Legion
|
|
|
49
53
|
end
|
|
50
54
|
end
|
|
51
55
|
|
|
56
|
+
class SegmentedTaggedLogger
|
|
57
|
+
attr_reader :segments
|
|
58
|
+
|
|
59
|
+
def initialize(segments:, logger: nil)
|
|
60
|
+
@segments = segments
|
|
61
|
+
@logger = logger || Legion::Logging
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def warn(message)
|
|
65
|
+
with_segments { dispatch(:warn, message) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def info(message)
|
|
69
|
+
with_segments { dispatch(:info, message) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def debug(message)
|
|
73
|
+
with_segments { dispatch(:debug, message) }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def error(message)
|
|
77
|
+
with_segments { dispatch(:error, message) }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def dispatch(level, message)
|
|
83
|
+
return unless @logger.respond_to?(level)
|
|
84
|
+
|
|
85
|
+
@logger.public_send(level, message)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def with_segments
|
|
89
|
+
previous = Thread.current[:legion_log_segments]
|
|
90
|
+
Thread.current[:legion_log_segments] = @segments
|
|
91
|
+
yield
|
|
92
|
+
ensure
|
|
93
|
+
Thread.current[:legion_log_segments] = previous
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
52
97
|
# File-based query logger that writes all SQL to a dedicated log file.
|
|
53
98
|
# Isolated from the main Legion::Logging domain.
|
|
54
99
|
class QueryFileLogger
|
|
100
|
+
include Legion::Logging::Helper
|
|
101
|
+
|
|
55
102
|
attr_reader :path
|
|
56
103
|
|
|
57
104
|
def initialize(path)
|
|
@@ -90,12 +137,15 @@ module Legion
|
|
|
90
137
|
@mutex.synchronize do
|
|
91
138
|
@file.puts "[#{Time.now.strftime('%Y-%m-%d %H:%M:%S.%L')}] #{level} #{message}"
|
|
92
139
|
end
|
|
93
|
-
rescue IOError
|
|
140
|
+
rescue IOError => e
|
|
141
|
+
handle_exception(e, level: :warn, handled: true, operation: :query_file_write, path: @path)
|
|
94
142
|
nil
|
|
95
143
|
end
|
|
96
144
|
end
|
|
97
145
|
|
|
98
146
|
class << self
|
|
147
|
+
include Legion::Logging::Helper
|
|
148
|
+
|
|
99
149
|
attr_accessor :sequel
|
|
100
150
|
|
|
101
151
|
def adapter
|
|
@@ -104,26 +154,23 @@ module Legion
|
|
|
104
154
|
|
|
105
155
|
def setup
|
|
106
156
|
opts = sequel_opts
|
|
157
|
+
log.info("Legion::Data::Connection setup adapter=#{adapter}")
|
|
107
158
|
@sequel = if adapter == :sqlite
|
|
108
159
|
::Sequel.connect(opts.merge(adapter: :sqlite, database: sqlite_path))
|
|
109
160
|
else
|
|
110
161
|
begin
|
|
111
|
-
::Sequel.connect(
|
|
162
|
+
::Sequel.connect(connection_opts_for(adapter: adapter, opts: opts))
|
|
112
163
|
rescue StandardError => e
|
|
113
164
|
raise unless dev_fallback?
|
|
114
165
|
|
|
115
|
-
|
|
116
|
-
Legion::Logging.warn(
|
|
117
|
-
"Shared DB unreachable (#{e.message}), dev_mode fallback to SQLite"
|
|
118
|
-
)
|
|
119
|
-
end
|
|
166
|
+
handle_exception(e, level: :warn, handled: true, operation: :shared_connect, fallback: :sqlite)
|
|
120
167
|
@adapter = :sqlite
|
|
121
168
|
sqlite_opts = sequel_opts
|
|
122
169
|
::Sequel.connect(sqlite_opts.merge(adapter: :sqlite, database: sqlite_path))
|
|
123
170
|
end
|
|
124
171
|
end
|
|
125
172
|
Legion::Settings[:data][:connected] = true
|
|
126
|
-
log_connection_info
|
|
173
|
+
log_connection_info
|
|
127
174
|
configure_extensions
|
|
128
175
|
connect_with_replicas
|
|
129
176
|
end
|
|
@@ -140,6 +187,7 @@ module Legion
|
|
|
140
187
|
database: database_stats
|
|
141
188
|
}
|
|
142
189
|
rescue StandardError => e
|
|
190
|
+
handle_exception(e, level: :warn, handled: true, operation: :data_connection_stats, adapter: adapter)
|
|
143
191
|
{ connected: (data[:connected] if data.is_a?(Hash)), adapter: adapter, error: e.message }
|
|
144
192
|
end
|
|
145
193
|
|
|
@@ -171,7 +219,8 @@ module Legion
|
|
|
171
219
|
end
|
|
172
220
|
|
|
173
221
|
stats.compact
|
|
174
|
-
rescue StandardError
|
|
222
|
+
rescue StandardError => e
|
|
223
|
+
handle_exception(e, level: :warn, handled: true, operation: :data_pool_stats, adapter: adapter)
|
|
175
224
|
{}
|
|
176
225
|
end
|
|
177
226
|
|
|
@@ -180,7 +229,7 @@ module Legion
|
|
|
180
229
|
@query_file_logger&.close
|
|
181
230
|
@query_file_logger = nil
|
|
182
231
|
Legion::Settings[:data][:connected] = false
|
|
183
|
-
|
|
232
|
+
log.info 'Legion::Data connection closed'
|
|
184
233
|
end
|
|
185
234
|
|
|
186
235
|
def connect_with_replicas
|
|
@@ -202,7 +251,7 @@ module Legion
|
|
|
202
251
|
end
|
|
203
252
|
|
|
204
253
|
@replica_servers = replica_list.each_with_index.map { |_, idx| :"read_#{idx}" }
|
|
205
|
-
|
|
254
|
+
log.debug "Registered #{@replica_servers.size} read replica(s)"
|
|
206
255
|
end
|
|
207
256
|
|
|
208
257
|
def read_server
|
|
@@ -258,20 +307,20 @@ module Legion
|
|
|
258
307
|
|
|
259
308
|
Legion::Settings[:data][:tls] || {}
|
|
260
309
|
rescue StandardError => e
|
|
261
|
-
|
|
310
|
+
handle_exception(e, level: :warn, handled: true, operation: :data_tls_settings)
|
|
262
311
|
{}
|
|
263
312
|
end
|
|
264
313
|
|
|
265
314
|
def log_connection_info
|
|
266
315
|
if adapter == :sqlite
|
|
267
|
-
|
|
316
|
+
log.info "Connected to SQLite at #{sqlite_path}"
|
|
268
317
|
else
|
|
269
318
|
actual = Legion::Settings[:data][:creds] || {}
|
|
270
319
|
user = actual[:user] || actual[:username] || 'unknown'
|
|
271
320
|
host = actual[:host] || '127.0.0.1'
|
|
272
321
|
port = actual[:port]
|
|
273
322
|
db = actual[:database] || actual[:db]
|
|
274
|
-
|
|
323
|
+
log.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
|
|
275
324
|
end
|
|
276
325
|
end
|
|
277
326
|
|
|
@@ -284,6 +333,12 @@ module Legion
|
|
|
284
333
|
Legion::Settings[:data][:creds][:database] || 'legionio.db'
|
|
285
334
|
end
|
|
286
335
|
|
|
336
|
+
def connection_opts_for(adapter:, opts:)
|
|
337
|
+
connection_opts = opts.merge(adapter: adapter, **creds_builder)
|
|
338
|
+
connection_opts[:preconnect] = false if adapter != :sqlite && dev_fallback?
|
|
339
|
+
connection_opts
|
|
340
|
+
end
|
|
341
|
+
|
|
287
342
|
def sequel_opts
|
|
288
343
|
data = Legion::Settings[:data]
|
|
289
344
|
opts = {}
|
|
@@ -356,6 +411,7 @@ module Legion
|
|
|
356
411
|
else {}
|
|
357
412
|
end
|
|
358
413
|
rescue StandardError => e
|
|
414
|
+
handle_exception(e, level: :warn, handled: true, operation: :data_database_stats, adapter: adapter)
|
|
359
415
|
{ error: e.message }
|
|
360
416
|
end
|
|
361
417
|
|
|
@@ -366,7 +422,8 @@ module Legion
|
|
|
366
422
|
cache_size busy_timeout].each do |pragma|
|
|
367
423
|
val = begin
|
|
368
424
|
db.fetch("PRAGMA #{pragma}").single_value
|
|
369
|
-
rescue StandardError
|
|
425
|
+
rescue StandardError => e
|
|
426
|
+
handle_exception(e, level: :warn, handled: true, operation: :sqlite_stats_pragma, pragma: pragma)
|
|
370
427
|
nil
|
|
371
428
|
end
|
|
372
429
|
stats[pragma.to_sym] = val unless val.nil?
|
|
@@ -457,12 +514,26 @@ module Legion
|
|
|
457
514
|
@sequel.pool.connection_expiration_timeout = data[:connection_expiration_timeout] || 14_400
|
|
458
515
|
end
|
|
459
516
|
rescue StandardError => e
|
|
460
|
-
|
|
517
|
+
handle_exception(e, level: :warn, handled: true, operation: :configure_extensions, adapter: adapter)
|
|
461
518
|
end
|
|
462
519
|
|
|
463
520
|
def build_data_logger
|
|
464
|
-
tagged = Legion::Logging::
|
|
521
|
+
tagged = if defined?(Legion::Logging::TaggedLogger) && respond_to?(:tagged_logger_settings, true)
|
|
522
|
+
Legion::Logging::TaggedLogger.new(
|
|
523
|
+
segments: %w[data connection],
|
|
524
|
+
**send(:tagged_logger_settings)
|
|
525
|
+
)
|
|
526
|
+
else
|
|
527
|
+
SegmentedTaggedLogger.new(segments: %w[data connection])
|
|
528
|
+
end
|
|
465
529
|
SlowQueryLogger.new(tagged)
|
|
530
|
+
rescue StandardError => e
|
|
531
|
+
if respond_to?(:handle_exception, true)
|
|
532
|
+
handle_exception(e, level: :warn, handled: true, operation: :build_data_logger)
|
|
533
|
+
else
|
|
534
|
+
log.warn("build_data_logger failed: #{e.class}: #{e.message}")
|
|
535
|
+
end
|
|
536
|
+
SlowQueryLogger.new(SegmentedTaggedLogger.new(segments: %w[data connection], logger: log))
|
|
466
537
|
end
|
|
467
538
|
end
|
|
468
539
|
end
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
require 'openssl'
|
|
4
5
|
|
|
5
6
|
module Legion
|
|
6
7
|
module Data
|
|
7
8
|
module Encryption
|
|
8
9
|
class KeyProvider
|
|
10
|
+
include Legion::Logging::Helper
|
|
11
|
+
|
|
9
12
|
def initialize(mode: :auto)
|
|
10
13
|
@mode = mode
|
|
11
14
|
@key_cache = {}
|
|
@@ -18,20 +21,24 @@ module Legion
|
|
|
18
21
|
|
|
19
22
|
def clear_cache!
|
|
20
23
|
@key_cache.clear
|
|
24
|
+
log.debug 'Cleared encryption key cache'
|
|
21
25
|
end
|
|
22
26
|
|
|
23
27
|
private
|
|
24
28
|
|
|
25
29
|
def derive_key(tenant_id)
|
|
26
30
|
if tenant_id && crypt_available?
|
|
27
|
-
|
|
31
|
+
log.debug "Deriving Vault key for tenant #{tenant_id}"
|
|
28
32
|
Legion::Crypt::PartitionKeys.derive(tenant_id: tenant_id)
|
|
29
33
|
elsif crypt_available?
|
|
30
34
|
Legion::Crypt.default_encryption_key
|
|
31
35
|
else
|
|
32
|
-
|
|
36
|
+
log.warn 'Legion::Crypt unavailable, falling back to dev encryption key'
|
|
33
37
|
local_key
|
|
34
38
|
end
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
handle_exception(e, level: :error, handled: false, operation: :derive_key, tenant_id: tenant_id)
|
|
41
|
+
raise
|
|
35
42
|
end
|
|
36
43
|
|
|
37
44
|
def crypt_available?
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
require_relative 'cipher'
|
|
4
5
|
require_relative 'key_provider'
|
|
5
6
|
|
|
@@ -7,6 +8,33 @@ module Legion
|
|
|
7
8
|
module Data
|
|
8
9
|
module Encryption
|
|
9
10
|
module SequelPlugin
|
|
11
|
+
extend Legion::Logging::Helper
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
def aad_for(table_name:, primary_key:, column:)
|
|
15
|
+
"#{table_name}:#{primary_key || 0}:#{column}"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def decrypt_value(blob:, key:, table_name:, primary_key:, column:)
|
|
19
|
+
errors = []
|
|
20
|
+
|
|
21
|
+
aad_candidates(primary_key).each do |aad_primary_key|
|
|
22
|
+
aad = aad_for(table_name: table_name, primary_key: aad_primary_key, column: column)
|
|
23
|
+
return Legion::Data::Encryption::Cipher.decrypt(blob, key: key, aad: aad)
|
|
24
|
+
rescue OpenSSL::Cipher::CipherError, ArgumentError => e
|
|
25
|
+
errors << e
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
raise errors.last if errors.any?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def aad_candidates(primary_key)
|
|
34
|
+
[primary_key, 0].compact.uniq
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
10
38
|
module ClassMethods
|
|
11
39
|
def encrypted_columns
|
|
12
40
|
@encrypted_columns ||= {}
|
|
@@ -20,28 +48,42 @@ module Legion
|
|
|
20
48
|
raw = super()
|
|
21
49
|
return nil if raw.nil?
|
|
22
50
|
|
|
23
|
-
provider = self.class.encryption_key_provider
|
|
24
|
-
tenant = col_scope == :tenant ? self[:tenant_id] : nil
|
|
25
|
-
key = provider.key_for(tenant_id: tenant)
|
|
26
|
-
aad = "#{self.class.table_name}:#{pk}:#{name}"
|
|
27
51
|
begin
|
|
28
|
-
|
|
52
|
+
decrypt_encrypted_column(name, raw, key_scope: col_scope)
|
|
29
53
|
rescue StandardError => e
|
|
30
|
-
Legion::
|
|
54
|
+
Legion::Data::Encryption::SequelPlugin.handle_exception(
|
|
55
|
+
e,
|
|
56
|
+
level: :warn,
|
|
57
|
+
handled: false,
|
|
58
|
+
operation: :decrypt_column,
|
|
59
|
+
table: self.class.table_name,
|
|
60
|
+
primary_key: pk,
|
|
61
|
+
column: name
|
|
62
|
+
)
|
|
31
63
|
raise
|
|
32
64
|
end
|
|
33
65
|
end
|
|
34
66
|
|
|
35
67
|
define_method(:"#{name}=") do |value|
|
|
36
68
|
if value.nil?
|
|
69
|
+
clear_pending_encrypted_column(name)
|
|
37
70
|
super(nil)
|
|
38
71
|
else
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
72
|
+
begin
|
|
73
|
+
remember_pending_encrypted_column(name, value, key_scope: col_scope) if new?
|
|
74
|
+
super(encrypt_encrypted_column(name, value, key_scope: col_scope, primary_key: pk || 0))
|
|
75
|
+
rescue StandardError => e
|
|
76
|
+
Legion::Data::Encryption::SequelPlugin.handle_exception(
|
|
77
|
+
e,
|
|
78
|
+
level: :error,
|
|
79
|
+
handled: false,
|
|
80
|
+
operation: :encrypt_column,
|
|
81
|
+
table: self.class.table_name,
|
|
82
|
+
primary_key: pk,
|
|
83
|
+
column: name
|
|
84
|
+
)
|
|
85
|
+
raise
|
|
86
|
+
end
|
|
45
87
|
end
|
|
46
88
|
end
|
|
47
89
|
end
|
|
@@ -52,6 +94,78 @@ module Legion
|
|
|
52
94
|
end
|
|
53
95
|
|
|
54
96
|
module InstanceMethods
|
|
97
|
+
def after_create
|
|
98
|
+
super
|
|
99
|
+
reencrypt_pending_encrypted_columns
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
def decrypt_encrypted_column(column, raw, key_scope:)
|
|
105
|
+
provider = self.class.encryption_key_provider
|
|
106
|
+
tenant = key_scope == :tenant ? self[:tenant_id] : nil
|
|
107
|
+
key = provider.key_for(tenant_id: tenant)
|
|
108
|
+
|
|
109
|
+
Legion::Data::Encryption::SequelPlugin.decrypt_value(
|
|
110
|
+
blob: raw.b,
|
|
111
|
+
key: key,
|
|
112
|
+
table_name: self.class.table_name,
|
|
113
|
+
primary_key: pk,
|
|
114
|
+
column: column
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def encrypt_encrypted_column(column, value, key_scope:, primary_key:)
|
|
119
|
+
provider = self.class.encryption_key_provider
|
|
120
|
+
tenant = key_scope == :tenant ? self[:tenant_id] : nil
|
|
121
|
+
key = provider.key_for(tenant_id: tenant)
|
|
122
|
+
aad = Legion::Data::Encryption::SequelPlugin.aad_for(
|
|
123
|
+
table_name: self.class.table_name,
|
|
124
|
+
primary_key: primary_key,
|
|
125
|
+
column: column
|
|
126
|
+
)
|
|
127
|
+
encrypted = Legion::Data::Encryption::Cipher.encrypt(value.to_s, key: key, aad: aad)
|
|
128
|
+
Sequel.blob(encrypted)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def pending_encrypted_columns
|
|
132
|
+
@pending_encrypted_columns ||= {}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def remember_pending_encrypted_column(column, value, key_scope:)
|
|
136
|
+
pending_encrypted_columns[column] = { key_scope: key_scope, value: value.to_s }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def clear_pending_encrypted_column(column)
|
|
140
|
+
pending_encrypted_columns.delete(column) if defined?(@pending_encrypted_columns)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def reencrypt_pending_encrypted_columns
|
|
144
|
+
return if pending_encrypted_columns.empty?
|
|
145
|
+
|
|
146
|
+
encrypted_values = pending_encrypted_columns.each_with_object({}) do |(column, config), updates|
|
|
147
|
+
updates[column] = encrypt_encrypted_column(
|
|
148
|
+
column,
|
|
149
|
+
config[:value],
|
|
150
|
+
key_scope: config[:key_scope],
|
|
151
|
+
primary_key: pk
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
self.class.where(pk_hash).update(encrypted_values)
|
|
156
|
+
encrypted_values.each { |column, encrypted| values[column] = encrypted }
|
|
157
|
+
pending_encrypted_columns.clear
|
|
158
|
+
rescue StandardError => e
|
|
159
|
+
Legion::Data::Encryption::SequelPlugin.handle_exception(
|
|
160
|
+
e,
|
|
161
|
+
level: :error,
|
|
162
|
+
handled: false,
|
|
163
|
+
operation: :reencrypt_pending_columns,
|
|
164
|
+
table: self.class.table_name,
|
|
165
|
+
primary_key: pk
|
|
166
|
+
)
|
|
167
|
+
raise
|
|
168
|
+
end
|
|
55
169
|
end
|
|
56
170
|
end
|
|
57
171
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
require 'digest'
|
|
4
5
|
|
|
5
6
|
module Legion
|
|
@@ -14,6 +15,8 @@ module Legion
|
|
|
14
15
|
].freeze
|
|
15
16
|
|
|
16
17
|
class << self
|
|
18
|
+
include Legion::Logging::Helper
|
|
19
|
+
|
|
17
20
|
def append(stream:, type:, data: {}, metadata: {})
|
|
18
21
|
return { error: 'db unavailable' } unless db_ready?
|
|
19
22
|
|
|
@@ -29,7 +32,7 @@ module Legion
|
|
|
29
32
|
|
|
30
33
|
data_json = Legion::JSON.dump(data)
|
|
31
34
|
metadata_json = Legion::JSON.dump(metadata)
|
|
32
|
-
event_hash = compute_hash(stream, seq, type, data_json, prev_hash)
|
|
35
|
+
event_hash = compute_hash(stream, seq, type, data_json, metadata_json, prev_hash)
|
|
33
36
|
|
|
34
37
|
conn[:governance_events].insert(
|
|
35
38
|
stream_id: stream,
|
|
@@ -42,7 +45,7 @@ module Legion
|
|
|
42
45
|
created_at: Time.now
|
|
43
46
|
)
|
|
44
47
|
|
|
45
|
-
|
|
48
|
+
log.debug "EventStore append: stream=#{stream} type=#{type} seq=#{seq}"
|
|
46
49
|
{ stream: stream, sequence: seq, hash: event_hash }
|
|
47
50
|
end
|
|
48
51
|
end
|
|
@@ -72,27 +75,43 @@ module Legion
|
|
|
72
75
|
.all
|
|
73
76
|
|
|
74
77
|
prev_hash = '0' * 64
|
|
78
|
+
legacy_hashes = 0
|
|
75
79
|
events.each do |e|
|
|
76
|
-
expected = compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], prev_hash)
|
|
77
|
-
|
|
78
|
-
|
|
80
|
+
expected = compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], e[:metadata_json], prev_hash)
|
|
81
|
+
legacy_expected = legacy_compute_hash(stream, e[:sequence_number], e[:event_type], e[:data_json], prev_hash)
|
|
82
|
+
|
|
83
|
+
unless [expected, legacy_expected].include?(e[:event_hash])
|
|
84
|
+
log.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}"
|
|
79
85
|
return { valid: false, broken_at: e[:sequence_number] }
|
|
80
86
|
end
|
|
81
87
|
unless e[:previous_hash] == prev_hash
|
|
82
|
-
|
|
88
|
+
log.warn "EventStore chain broken: stream=#{stream} seq=#{e[:sequence_number]}"
|
|
83
89
|
return { valid: false, broken_at: e[:sequence_number] }
|
|
84
90
|
end
|
|
85
91
|
|
|
92
|
+
legacy_hashes += 1 if e[:event_hash] == legacy_expected && e[:event_hash] != expected
|
|
86
93
|
prev_hash = e[:event_hash]
|
|
87
94
|
end
|
|
88
95
|
|
|
89
|
-
{ valid: true, length: events.size }
|
|
96
|
+
result = { valid: true, length: events.size }
|
|
97
|
+
result[:legacy_hashes] = legacy_hashes if legacy_hashes.positive?
|
|
98
|
+
result
|
|
90
99
|
end
|
|
91
100
|
|
|
92
101
|
private
|
|
93
102
|
|
|
94
|
-
def compute_hash(stream, seq, type, data_json, prev_hash)
|
|
95
|
-
Digest::SHA256.hexdigest(
|
|
103
|
+
def compute_hash(stream, seq, type, data_json, metadata_json, prev_hash)
|
|
104
|
+
Digest::SHA256.hexdigest(
|
|
105
|
+
"#{stream}:#{seq}:#{type}:#{normalized_json(data_json)}:#{normalized_json(metadata_json)}:#{prev_hash}"
|
|
106
|
+
)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def legacy_compute_hash(stream, seq, type, data_json, prev_hash)
|
|
110
|
+
Digest::SHA256.hexdigest("#{stream}:#{seq}:#{type}:#{normalized_json(data_json)}:#{prev_hash}")
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def normalized_json(json)
|
|
114
|
+
json || '{}'
|
|
96
115
|
end
|
|
97
116
|
|
|
98
117
|
def deserialize(event)
|
|
@@ -111,7 +130,7 @@ module Legion
|
|
|
111
130
|
def db_ready?
|
|
112
131
|
defined?(Legion::Data) && Legion::Data.connection&.table_exists?(:governance_events)
|
|
113
132
|
rescue StandardError => e
|
|
114
|
-
|
|
133
|
+
handle_exception(e, level: :warn, handled: true, operation: :event_store_db_ready?)
|
|
115
134
|
false
|
|
116
135
|
end
|
|
117
136
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module Data
|
|
5
7
|
module Extract
|
|
@@ -8,6 +10,8 @@ module Legion
|
|
|
8
10
|
@registry = {}.freeze
|
|
9
11
|
|
|
10
12
|
class << self
|
|
13
|
+
include Legion::Logging::Helper
|
|
14
|
+
|
|
11
15
|
attr_reader :registry
|
|
12
16
|
|
|
13
17
|
def inherited(subclass)
|
|
@@ -22,6 +26,7 @@ module Legion
|
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
def register(handler_class)
|
|
29
|
+
log.debug "Registered extract handler type=#{handler_class.type} class=#{handler_class.name}"
|
|
25
30
|
@registry = @registry.merge(handler_class.type => handler_class).freeze
|
|
26
31
|
end
|
|
27
32
|
|
|
@@ -47,7 +52,8 @@ module Legion
|
|
|
47
52
|
|
|
48
53
|
require gem_name
|
|
49
54
|
true
|
|
50
|
-
rescue LoadError
|
|
55
|
+
rescue LoadError => e
|
|
56
|
+
handle_exception(e, level: :debug, handled: true, operation: :extract_handler_available, handler: name, gem: gem_name)
|
|
51
57
|
false
|
|
52
58
|
end
|
|
53
59
|
end
|
|
@@ -17,6 +17,7 @@ module Legion
|
|
|
17
17
|
text = table.map { |row| row.to_h.map { |k, v| "#{k}: #{v}" }.join(', ') }.join("\n")
|
|
18
18
|
{ text: text, metadata: { rows: table.size, columns: table.headers.size, headers: table.headers } }
|
|
19
19
|
rescue StandardError => e
|
|
20
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_csv)
|
|
20
21
|
{ text: nil, error: e.message }
|
|
21
22
|
end
|
|
22
23
|
end
|
|
@@ -16,9 +16,11 @@ module Legion
|
|
|
16
16
|
paragraphs = doc.paragraphs.map(&:text).reject(&:empty?)
|
|
17
17
|
text = paragraphs.join("\n\n")
|
|
18
18
|
{ text: text, metadata: { paragraphs: paragraphs.size } }
|
|
19
|
-
rescue LoadError
|
|
19
|
+
rescue LoadError => e
|
|
20
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_docx, gem: gem_name)
|
|
20
21
|
{ text: nil, error: :gem_not_installed, gem: gem_name }
|
|
21
22
|
rescue StandardError => e
|
|
23
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_docx)
|
|
22
24
|
{ text: nil, error: e.message }
|
|
23
25
|
end
|
|
24
26
|
end
|
|
@@ -21,9 +21,11 @@ module Legion
|
|
|
21
21
|
title = doc.at_css('title')&.text&.strip
|
|
22
22
|
text = doc.text.gsub(/\s+/, ' ').strip
|
|
23
23
|
{ text: text, metadata: { title: title } }
|
|
24
|
-
rescue LoadError
|
|
24
|
+
rescue LoadError => e
|
|
25
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_html, gem: gem_name)
|
|
25
26
|
{ text: nil, error: :gem_not_installed, gem: gem_name }
|
|
26
27
|
rescue StandardError => e
|
|
28
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_html)
|
|
27
29
|
{ text: nil, error: e.message }
|
|
28
30
|
end
|
|
29
31
|
end
|
|
@@ -17,6 +17,7 @@ module Legion
|
|
|
17
17
|
text = ::JSON.pretty_generate(parsed)
|
|
18
18
|
{ text: text, metadata: { keys: parsed.is_a?(Hash) ? parsed.keys : nil } }
|
|
19
19
|
rescue StandardError => e
|
|
20
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_json)
|
|
20
21
|
{ text: nil, error: e.message }
|
|
21
22
|
end
|
|
22
23
|
end
|
|
@@ -17,6 +17,7 @@ module Legion
|
|
|
17
17
|
text = lines.map { |l| l.is_a?(Hash) ? ::JSON.pretty_generate(l) : l }.join("\n---\n")
|
|
18
18
|
{ text: text, metadata: { lines: lines.size } }
|
|
19
19
|
rescue StandardError => e
|
|
20
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_jsonl)
|
|
20
21
|
{ text: nil, error: e.message }
|
|
21
22
|
end
|
|
22
23
|
end
|
|
@@ -15,6 +15,7 @@ module Legion
|
|
|
15
15
|
text = content.sub(/\A---\n.*?\n---\n/m, '')
|
|
16
16
|
{ text: text.strip, metadata: { bytes: content.bytesize, has_frontmatter: content != text } }
|
|
17
17
|
rescue StandardError => e
|
|
18
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_markdown)
|
|
18
19
|
{ text: nil, error: e.message }
|
|
19
20
|
end
|
|
20
21
|
end
|
|
@@ -15,9 +15,11 @@ module Legion
|
|
|
15
15
|
reader = ::PDF::Reader.new(source)
|
|
16
16
|
text = reader.pages.map(&:text).join("\n\n")
|
|
17
17
|
{ text: text, metadata: { pages: reader.page_count, title: reader.info[:Title] } }
|
|
18
|
-
rescue LoadError
|
|
18
|
+
rescue LoadError => e
|
|
19
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_pdf, gem: gem_name)
|
|
19
20
|
{ text: nil, error: :gem_not_installed, gem: gem_name }
|
|
20
21
|
rescue StandardError => e
|
|
22
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_pdf)
|
|
21
23
|
{ text: nil, error: e.message }
|
|
22
24
|
end
|
|
23
25
|
end
|
|
@@ -24,9 +24,11 @@ module Legion
|
|
|
24
24
|
end
|
|
25
25
|
text = slides.each_with_index.map { |s, i| "Slide #{i + 1}: #{s}" }.join("\n\n")
|
|
26
26
|
{ text: text, metadata: { slides: slides.size } }
|
|
27
|
-
rescue LoadError
|
|
27
|
+
rescue LoadError => e
|
|
28
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_pptx, gem: gem_name)
|
|
28
29
|
{ text: nil, error: :gem_not_installed, gem: 'rubyzip' }
|
|
29
30
|
rescue StandardError => e
|
|
31
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_pptx)
|
|
30
32
|
{ text: nil, error: e.message }
|
|
31
33
|
end
|
|
32
34
|
end
|
|
@@ -13,6 +13,7 @@ module Legion
|
|
|
13
13
|
content = source.respond_to?(:read) ? source.read : File.read(source.to_s)
|
|
14
14
|
{ text: content, metadata: { bytes: content.bytesize } }
|
|
15
15
|
rescue StandardError => e
|
|
16
|
+
handle_exception(e, level: :warn, handled: true, operation: :extract_text)
|
|
16
17
|
{ text: nil, error: e.message }
|
|
17
18
|
end
|
|
18
19
|
end
|