legion-data 1.6.18 → 1.6.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Gemfile +1 -0
  4. data/legion-data.gemspec +2 -2
  5. data/lib/legion/data/archival/policy.rb +7 -1
  6. data/lib/legion/data/archival.rb +27 -4
  7. data/lib/legion/data/archiver.rb +103 -51
  8. data/lib/legion/data/audit_record.rb +8 -5
  9. data/lib/legion/data/connection.rb +88 -17
  10. data/lib/legion/data/encryption/key_provider.rb +9 -2
  11. data/lib/legion/data/encryption/sequel_plugin.rb +126 -12
  12. data/lib/legion/data/event_store.rb +29 -10
  13. data/lib/legion/data/extract/handlers/base.rb +7 -1
  14. data/lib/legion/data/extract/handlers/csv.rb +1 -0
  15. data/lib/legion/data/extract/handlers/docx.rb +3 -1
  16. data/lib/legion/data/extract/handlers/html.rb +3 -1
  17. data/lib/legion/data/extract/handlers/json.rb +1 -0
  18. data/lib/legion/data/extract/handlers/jsonl.rb +1 -0
  19. data/lib/legion/data/extract/handlers/markdown.rb +1 -0
  20. data/lib/legion/data/extract/handlers/pdf.rb +3 -1
  21. data/lib/legion/data/extract/handlers/pptx.rb +3 -1
  22. data/lib/legion/data/extract/handlers/text.rb +1 -0
  23. data/lib/legion/data/extract/handlers/vtt.rb +1 -0
  24. data/lib/legion/data/extract/handlers/xlsx.rb +3 -1
  25. data/lib/legion/data/extract.rb +7 -0
  26. data/lib/legion/data/helper.rb +16 -6
  27. data/lib/legion/data/local.rb +53 -5
  28. data/lib/legion/data/migration.rb +6 -1
  29. data/lib/legion/data/migrations/044_expand_memory_traces.rb +4 -1
  30. data/lib/legion/data/model.rb +8 -4
  31. data/lib/legion/data/models/audit_log.rb +5 -1
  32. data/lib/legion/data/models/audit_record.rb +5 -1
  33. data/lib/legion/data/models/function.rb +5 -1
  34. data/lib/legion/data/models/node.rb +6 -2
  35. data/lib/legion/data/partition_manager.rb +15 -19
  36. data/lib/legion/data/retention.rb +31 -2
  37. data/lib/legion/data/rls.rb +8 -2
  38. data/lib/legion/data/settings.rb +5 -1
  39. data/lib/legion/data/spool.rb +69 -6
  40. data/lib/legion/data/storage_tiers.rb +16 -3
  41. data/lib/legion/data/vector.rb +9 -5
  42. data/lib/legion/data/version.rb +1 -1
  43. data/lib/legion/data.rb +39 -12
  44. metadata +5 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e4eef1be3fc0e69e96629e2ffb38904d83215045af3b1b9d6ae411a511b4d429
4
- data.tar.gz: f8591ff1b36e7d4c29506b16d6ce1e221093c84e6afba3663af721bda4eacb9f
3
+ metadata.gz: 65eb28ef61c57c2d1248fe3def497d54253c6f694bb7b5b237b635b382c93487
4
+ data.tar.gz: 3da968d08c194c396029a0a1ccbf81a02afe390257bfca01aac8bc0d89c2169d
5
5
  SHA512:
6
- metadata.gz: d002c25b1355b3da5770277e486a566967513d1502a09f3b5c19f8e905b3973fb9f0d94856f20082b79168a6035e1d0e71c51a610667c23661a3c06bc60dd1bf
7
- data.tar.gz: fbeb431cc34bd8920d77ded479a7beb579bcdb06f500f6174dbcdaef4f6aa13c726a144b6cba8bd4b581fedc4d9129f45d7883263f55f0ae90bf690baf816ea9
6
+ metadata.gz: fc7ebee421fa890537dd79fbf23280b8952b3d4145c553965678f7822a1229cc56c38ea812e36ec16b9d88f2d8af7de315507d51a5269c0f45dc43b31d136338
7
+ data.tar.gz: 3a7f266c4a70f307b86e000d41c52a36ee4ad773071c99fce4d75d47d4ef349fa97497b0bc437df0f6b479f34a3994c9aceb753e58f44f966b31ecf2078d7aa4
data/CHANGELOG.md CHANGED
@@ -2,6 +2,21 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.6.19] - 2026-04-02
6
+
7
+ ### Changed
8
+ - Logging uplift across non-API `lib/` modules to use `Legion::Logging::Helper` and `log.*` instead of direct `Legion::Logging.*` calls
9
+ - Removed direct `log_info` / `log_warn` wrapper usage in partition management and aligned logging with helper-backed tagged loggers
10
+ - Added broader info-level operational logs for archival, retention, spool, extract, storage-tier, and partition workflows
11
+
12
+ ### Fixed
13
+ - Added `handle_exception(...)` coverage to rescue paths across non-API data modules so failures are logged consistently without changing existing fallback behavior
14
+ - Added compatibility fallback for `handle_exception` when older `legion-logging` releases are present in the runtime
15
+ - Included `metadata_json` in EventStore integrity hashes for new events while preserving verification compatibility for legacy rows
16
+ - Fixed encrypted Sequel columns to re-encrypt newly-created rows with their persisted primary key and maintain legacy read compatibility
17
+ - Hardened spool persistence with atomic writes, deterministic replay ordering, and corrupt-file quarantine during read/flush
18
+ - Updated partition manager specs to assert against helper-backed logger behavior
19
+
5
20
  ## [1.6.18] - 2026-03-30
6
21
 
7
22
  ### Added
data/Gemfile CHANGED
@@ -3,6 +3,7 @@
3
3
  source 'https://rubygems.org'
4
4
 
5
5
  gemspec
6
+
6
7
  group :test do
7
8
  gem 'rake'
8
9
  gem 'rspec'
data/legion-data.gemspec CHANGED
@@ -27,8 +27,8 @@ Gem::Specification.new do |spec|
27
27
  }
28
28
 
29
29
  spec.add_dependency 'csv', '>= 3.2'
30
- spec.add_dependency 'legion-logging', '>= 1.2.8'
31
- spec.add_dependency 'legion-settings', '>= 1.3.12'
30
+ spec.add_dependency 'legion-logging', '>= 1.5.0'
31
+ spec.add_dependency 'legion-settings', '>= 1.3.26'
32
32
  spec.add_dependency 'sequel', '>= 5.70'
33
33
  spec.add_dependency 'sqlite3', '>= 2.0'
34
34
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
4
+
3
5
  module Legion
4
6
  module Data
5
7
  module Archival
@@ -37,6 +39,10 @@ module Legion
37
39
  Time.now - (cold_after_days * 86_400)
38
40
  end
39
41
 
42
+ class << self
43
+ include Legion::Logging::Helper
44
+ end
45
+
40
46
  def self.from_settings
41
47
  return new unless defined?(Legion::Settings)
42
48
 
@@ -46,7 +52,7 @@ module Legion
46
52
 
47
53
  new(**archival.slice(:warm_after_days, :cold_after_days, :batch_size, :tables))
48
54
  rescue StandardError => e
49
- Legion::Logging.warn("Policy.from_settings failed: #{e.message}") if defined?(Legion::Logging)
55
+ handle_exception(e, level: :warn, handled: true, operation: :policy_from_settings)
50
56
  new
51
57
  end
52
58
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require_relative 'archival/policy'
4
5
 
5
6
  module Legion
@@ -11,21 +12,28 @@ module Legion
11
12
  }.freeze
12
13
 
13
14
  class << self
15
+ include Legion::Logging::Helper
16
+
14
17
  def archive!(policy: Policy.new, dry_run: false)
18
+ log.info "Archival run started dry_run=#{dry_run} tables=#{policy.tables.size}"
15
19
  results = {}
16
20
  policy.tables.each do |table_name|
17
21
  table = table_name.to_sym
18
22
  archive_table = ARCHIVE_TABLE_MAP[table]
19
23
  next unless archive_table && db_ready?(table) && db_ready?(archive_table)
20
24
 
21
- Legion::Logging.info "Archiving #{table} -> #{archive_table} (cutoff: #{policy.warm_cutoff}, dry_run: #{dry_run})" if defined?(Legion::Logging)
25
+ log.info "Archiving #{table} -> #{archive_table} (cutoff: #{policy.warm_cutoff}, dry_run: #{dry_run})"
22
26
  count = archive_table!(
23
27
  source: table, destination: archive_table,
24
28
  cutoff: policy.warm_cutoff, batch_size: policy.batch_size, dry_run: dry_run
25
29
  )
26
30
  results[table] = count
27
31
  end
32
+ log.info "Archival run completed tables=#{results.keys.join(',')}" unless results.empty?
28
33
  results
34
+ rescue StandardError => e
35
+ handle_exception(e, level: :error, handled: false, operation: :archive!, dry_run: dry_run)
36
+ raise
29
37
  end
30
38
 
31
39
  def restore(table:, ids:)
@@ -46,8 +54,11 @@ module Legion
46
54
  end
47
55
  conn[archive_table].where(original_id: ids).delete
48
56
  end
49
- Legion::Logging.info "Restored #{restored} row(s) from #{archive_table} -> #{source_table}" if defined?(Legion::Logging)
57
+ log.info "Restored #{restored} row(s) from #{archive_table} -> #{source_table}"
50
58
  restored
59
+ rescue StandardError => e
60
+ handle_exception(e, level: :error, handled: false, operation: :restore, table: source_table, ids: Array(ids))
61
+ raise
51
62
  end
52
63
 
53
64
  def search(table:, where: {})
@@ -55,10 +66,14 @@ module Legion
55
66
  archive_table = ARCHIVE_TABLE_MAP[source_table]
56
67
  return [] unless db_ready?(source_table)
57
68
 
69
+ log.info "Archival search table=#{source_table} where_keys=#{where.keys.join(',')}"
58
70
  conn = Legion::Data.connection
59
71
  hot = conn[source_table].where(where).all
60
72
  warm = db_ready?(archive_table) ? conn[archive_table].where(where).all : []
61
73
  hot + warm
74
+ rescue StandardError => e
75
+ handle_exception(e, level: :error, handled: false, operation: :search, table: source_table, where_keys: where.keys)
76
+ raise
62
77
  end
63
78
 
64
79
  def archive_completed_tasks(days_old: 90, batch_size: 1000)
@@ -92,11 +107,15 @@ module Legion
92
107
  end
93
108
  end
94
109
 
95
- Legion::Logging.info "archive_completed_tasks: archived #{count} tasks (cutoff: #{cutoff.iso8601})" if defined?(Legion::Logging)
110
+ log.info "archive_completed_tasks: archived #{count} tasks (cutoff: #{cutoff.iso8601})"
96
111
  { archived: count, cutoff: cutoff.iso8601 }
112
+ rescue StandardError => e
113
+ handle_exception(e, level: :error, handled: false, operation: :archive_completed_tasks, days_old: days_old, batch_size: batch_size)
114
+ raise
97
115
  end
98
116
 
99
117
  def run_scheduled_archival
118
+ log.info 'Running scheduled archival'
100
119
  results = {}
101
120
  results[:tasks] = archive_completed_tasks
102
121
 
@@ -107,7 +126,11 @@ module Legion
107
126
  )
108
127
  end
109
128
 
129
+ log.info "Scheduled archival completed keys=#{results.keys.join(',')}"
110
130
  results
131
+ rescue StandardError => e
132
+ handle_exception(e, level: :error, handled: false, operation: :run_scheduled_archival)
133
+ raise
111
134
  end
112
135
 
113
136
  private
@@ -135,7 +158,7 @@ module Legion
135
158
  def db_ready?(table)
136
159
  defined?(Legion::Data) && Legion::Data.connection&.table_exists?(table)
137
160
  rescue StandardError => e
138
- Legion::Logging.debug("Archival#db_ready? check failed for #{table}: #{e.message}") if defined?(Legion::Logging)
161
+ handle_exception(e, level: :warn, handled: true, operation: :archival_db_ready, table: table)
139
162
  false
140
163
  end
141
164
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require 'digest'
4
5
  require 'fileutils'
5
6
  require 'json'
@@ -13,62 +14,41 @@ module Legion
13
14
  class UploadError < StandardError; end
14
15
 
15
16
  class << self
17
+ include Legion::Logging::Helper
18
+
16
19
  def archive_table(table:, retention_days: 90, batch_size: 1000, storage_backend: nil)
17
20
  return { skipped: true, reason: 'not_postgres' } unless postgres?
18
21
 
19
- Legion::Logging.info "Archiving table #{table} (retention: #{retention_days}d)" if defined?(Legion::Logging)
22
+ log.info "Archiving table #{table} (retention: #{retention_days}d)"
20
23
 
21
24
  conn = Legion::Data.connection
22
25
  cutoff = Time.now - (retention_days * 86_400)
23
- now = Time.now.utc
24
-
25
- batches = 0
26
- total_rows = 0
27
- paths = []
28
- batch_n = 0
29
-
30
- loop do
31
- batch_n += 1
32
- rows = conn[table].where { created_at < cutoff }.limit(batch_size).all
33
- break if rows.empty?
34
-
35
- ids = rows.map { |r| r[:id] }
36
- jsonl = serialize_rows(rows)
37
- compressed = gzip_compress(jsonl)
38
- checksum = Digest::SHA256.hexdigest(compressed)
39
- batch_id = SecureRandom.uuid
40
-
41
- path = upload_batch(
42
- data: compressed,
43
- table: table.to_s,
44
- year: now.year,
45
- month: now.month,
46
- batch_n: batch_n,
47
- backend: storage_backend
48
- )
49
-
50
- conn.transaction do
51
- conn[:archive_manifest].insert(
52
- batch_id: batch_id,
53
- source_table: table.to_s,
54
- row_count: rows.size,
55
- checksum: checksum,
56
- storage_path: path,
57
- archived_at: now
58
- )
59
- conn[table].where(id: ids).delete
60
- end
61
-
62
- batches += 1
63
- total_rows += rows.size
64
- paths << path
65
- end
66
-
67
- Legion::Logging.info "Archived #{total_rows} rows from #{table} in #{batches} batch(es)" if defined?(Legion::Logging)
68
- { batches: batches, total_rows: total_rows, paths: paths }
26
+ archive_results = archive_batches(
27
+ conn: conn,
28
+ table: table,
29
+ cutoff: cutoff,
30
+ batch_size: batch_size,
31
+ storage_backend: storage_backend
32
+ )
33
+
34
+ log.info "Archived #{archive_results[:total_rows]} rows from #{table} in #{archive_results[:batches]} batch(es)"
35
+ archive_results
36
+ rescue StandardError => e
37
+ handle_exception(
38
+ e,
39
+ level: :error,
40
+ handled: false,
41
+ operation: :archive_table,
42
+ table: table,
43
+ retention_days: retention_days,
44
+ batch_size: batch_size,
45
+ storage_backend: storage_backend
46
+ )
47
+ raise
69
48
  end
70
49
 
71
50
  def upload_batch(data:, table:, year:, month:, batch_n:, backend:)
51
+ log.info "Archiver storing batch table=#{table} backend=#{backend || :tmpdir} year=#{year} month=#{month} batch=#{batch_n}"
72
52
  case backend
73
53
  when :s3
74
54
  upload_s3(data: data, table: table, year: year, month: month, batch_n: batch_n)
@@ -111,6 +91,72 @@ module Legion
111
91
  rows.map { |row| json_dump(row) }.join("\n")
112
92
  end
113
93
 
94
+ def archive_batches(conn:, table:, cutoff:, batch_size:, storage_backend:)
95
+ now = Time.now.utc
96
+ batches = 0
97
+ total_rows = 0
98
+ paths = []
99
+
100
+ loop do
101
+ batch_result = archive_batch(
102
+ conn: conn,
103
+ table: table,
104
+ cutoff: cutoff,
105
+ batch_size: batch_size,
106
+ batch_n: batches + 1,
107
+ now: now,
108
+ storage_backend: storage_backend
109
+ )
110
+ break unless batch_result
111
+
112
+ batches += 1
113
+ total_rows += batch_result[:row_count]
114
+ paths << batch_result[:path]
115
+ end
116
+
117
+ { batches: batches, total_rows: total_rows, paths: paths }
118
+ end
119
+
120
+ def archive_batch(conn:, table:, cutoff:, batch_size:, batch_n:, now:, storage_backend:)
121
+ rows = conn[table].where { created_at < cutoff }.limit(batch_size).all
122
+ return if rows.empty?
123
+
124
+ compressed = gzip_compress(serialize_rows(rows))
125
+ path = upload_batch(
126
+ data: compressed,
127
+ table: table.to_s,
128
+ year: now.year,
129
+ month: now.month,
130
+ batch_n: batch_n,
131
+ backend: storage_backend
132
+ )
133
+
134
+ record_archived_batch(
135
+ conn: conn,
136
+ table: table,
137
+ rows: rows,
138
+ compressed: compressed,
139
+ path: path,
140
+ now: now
141
+ )
142
+
143
+ { row_count: rows.size, path: path }
144
+ end
145
+
146
+ def record_archived_batch(conn:, table:, rows:, compressed:, path:, now:)
147
+ conn.transaction do
148
+ conn[:archive_manifest].insert(
149
+ batch_id: SecureRandom.uuid,
150
+ source_table: table.to_s,
151
+ row_count: rows.size,
152
+ checksum: Digest::SHA256.hexdigest(compressed),
153
+ storage_path: path,
154
+ archived_at: now
155
+ )
156
+ conn[table].where(id: rows.map { |row| row[:id] }).delete
157
+ end
158
+ end
159
+
114
160
  def json_dump(obj)
115
161
  if defined?(Legion::JSON)
116
162
  Legion::JSON.dump(obj)
@@ -133,11 +179,13 @@ module Legion
133
179
 
134
180
  key = "legion-archive/#{table}/#{year}/#{month}/batch_#{batch_n}.jsonl.gz"
135
181
  Legion::Extensions::S3::Runners::Put.run(key: key, body: data)
182
+ log.info "Archiver uploaded batch to s3 key=#{key}"
136
183
  "s3://#{key}"
137
- rescue UploadError
184
+ rescue UploadError => e
185
+ handle_exception(e, level: :error, handled: false, operation: :upload_s3, table: table, year: year, month: month, batch_n: batch_n)
138
186
  raise
139
187
  rescue StandardError => e
140
- Legion::Logging.warn "S3 upload failed: #{e.message}" if defined?(Legion::Logging)
188
+ handle_exception(e, level: :error, handled: true, operation: :upload_s3, table: table, year: year, month: month, batch_n: batch_n)
141
189
  raise UploadError, "S3 upload failed: #{e.message}"
142
190
  end
143
191
 
@@ -148,11 +196,13 @@ module Legion
148
196
 
149
197
  blob_name = "legion-archive/#{table}/#{year}/#{month}/batch_#{batch_n}.jsonl.gz"
150
198
  Legion::Extensions::AzureStorage::Runners::Upload.run(blob_name: blob_name, data: data)
199
+ log.info "Archiver uploaded batch to azure blob=#{blob_name}"
151
200
  "azure://#{blob_name}"
152
- rescue UploadError
201
+ rescue UploadError => e
202
+ handle_exception(e, level: :error, handled: false, operation: :upload_azure, table: table, year: year, month: month, batch_n: batch_n)
153
203
  raise
154
204
  rescue StandardError => e
155
- Legion::Logging.warn "Azure upload failed: #{e.message}" if defined?(Legion::Logging)
205
+ handle_exception(e, level: :error, handled: false, operation: :upload_azure, table: table, year: year, month: month, batch_n: batch_n)
156
206
  raise UploadError, "Azure upload failed: #{e.message}"
157
207
  end
158
208
 
@@ -161,8 +211,10 @@ module Legion
161
211
  FileUtils.mkdir_p(dir)
162
212
  path = File.join(dir, "batch_#{batch_n}.jsonl.gz")
163
213
  File.binwrite(path, data)
214
+ log.info "Archiver stored batch locally path=#{path}"
164
215
  "file://#{path}"
165
216
  rescue StandardError => e
217
+ handle_exception(e, level: :error, handled: true, operation: :upload_tmpdir, table: table, year: year, month: month, batch_n: batch_n)
166
218
  raise UploadError, "Tmpdir upload failed: #{e.message}"
167
219
  end
168
220
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging/helper'
3
4
  require 'digest'
4
5
 
5
6
  module Legion
@@ -8,6 +9,8 @@ module Legion
8
9
  GENESIS_HASH = ('0' * 64).freeze
9
10
 
10
11
  class << self
12
+ include Legion::Logging::Helper
13
+
11
14
  # Append a new record to the named chain. Returns the persisted record hash
12
15
  # on success, or an error hash when the database is unavailable.
13
16
  #
@@ -38,7 +41,7 @@ module Legion
38
41
  created_at: ts
39
42
  )
40
43
 
41
- Legion::Logging.debug "AuditRecord append: chain=#{chain_id} type=#{content_type} id=#{id}" if defined?(Legion::Logging)
44
+ log.debug "AuditRecord append: chain=#{chain_id} type=#{content_type} id=#{id}"
42
45
  { id: id, chain_id: chain_id, chain_hash: ch, parent_hash: parent_hash }
43
46
  end
44
47
  end
@@ -59,13 +62,13 @@ module Legion
59
62
  prev_hash = GENESIS_HASH
60
63
  records.each do |r|
61
64
  unless r[:parent_hash] == prev_hash
62
- Legion::Logging.warn "AuditRecord chain broken: chain=#{chain_id} id=#{r[:id]}" if defined?(Legion::Logging)
65
+ log.warn "AuditRecord chain broken: chain=#{chain_id} id=#{r[:id]}"
63
66
  return { valid: false, broken_at: r[:id], reason: :parent_mismatch }
64
67
  end
65
68
 
66
69
  expected = compute_chain_hash(prev_hash, r[:content_hash], r[:created_at], r[:content_type])
67
70
  unless r[:chain_hash] == expected
68
- Legion::Logging.warn "AuditRecord hash mismatch: chain=#{chain_id} id=#{r[:id]}" if defined?(Legion::Logging)
71
+ log.warn "AuditRecord hash mismatch: chain=#{chain_id} id=#{r[:id]}"
69
72
  return { valid: false, broken_at: r[:id], reason: :hash_mismatch }
70
73
  end
71
74
 
@@ -142,7 +145,7 @@ module Legion
142
145
 
143
146
  Legion::Crypt.sign(chain_hash)
144
147
  rescue StandardError => e
145
- Legion::Logging.warn "AuditRecord signing failed: #{e.message}" if defined?(Legion::Logging)
148
+ handle_exception(e, level: :warn, handled: true, operation: :sign_record)
146
149
  nil
147
150
  end
148
151
 
@@ -163,7 +166,7 @@ module Legion
163
166
  def db_ready?
164
167
  defined?(Legion::Data) && Legion::Data.connection&.table_exists?(:audit_records)
165
168
  rescue StandardError => e
166
- Legion::Logging.debug "AuditRecord#db_ready? check failed: #{e.message}" if defined?(Legion::Logging)
169
+ handle_exception(e, level: :warn, handled: true, operation: :audit_record_db_ready?)
167
170
  false
168
171
  end
169
172
  end