legion-data 1.4.9 → 1.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ed390611755acbac0c9701c31ee783d322f0ade227aaf13b2c9c74802db412e
4
- data.tar.gz: d2c0a758edca0c1a2add30ca3e86a3fa949fe7bd1f79d71cfb48f7f6fb97e77a
3
+ metadata.gz: b4189d8ca05d1a51425ae559b6467e5be87f33b6189c21fd48501abc0102e7d8
4
+ data.tar.gz: 3aae8b800f4026cdff398d068111dc96f95c6ff465929ceb9634af514b46310f
5
5
  SHA512:
6
- metadata.gz: 3e6f7e9fe8bfb5d699b51409bf7feee684014fe90639e4ecc0cfd620d8ffff6c0004611ef1ef442406abdf003f410cc30f45d5187855c75634237ace91bc5b2f
7
- data.tar.gz: fbca2c2acf2862fa51018c42b200c4056d2a4cf6108620a5991c878ce279b2df5945439b7397740dfef3f20ad4d56f0f41d6f7ac5bd99fc1480eecb0825200a1
6
+ metadata.gz: 937412b3403fbcfb47c27d2f4a82ea8d0c8e02f7eec0f0f0540cbdae67ea79cd390e4cd5d721ab80d47197adcd73cce5b92997ea5414bed1c053d03bd076fd11
7
+ data.tar.gz: 11a1b127dedd48eb11d3b0479d1c3055a5183b32619dfcd17f842b42aa39d8bec327bc0a6e99fab805e256c1c24c7e4b23a98c5a81f625a82d425c6f0bc5a7b2
data/.rubocop.yml CHANGED
@@ -49,3 +49,9 @@ Style/FrozenStringLiteralComment:
49
49
 
50
50
  Naming/FileName:
51
51
  Enabled: false
52
+
53
+ Naming/VariableNumber:
54
+ Enabled: false
55
+
56
+ Metrics/ParameterLists:
57
+ Max: 8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Legion::Data Changelog
2
2
 
3
+ ## v1.4.11
4
+
5
+ ### Added
6
+ - Read replica support: `read_replica_url` and `replicas` settings, `Connection.connect_with_replicas` via Sequel `server_block` extension, `read_server` and `replica_servers` class methods for read/write splitting
7
+ - `PartitionManager`: PostgreSQL range partitioning helper — `ensure_partitions`, `drop_old_partitions`, `list_partitions` for monthly table partitioning
8
+ - `Archiver`: cold storage archival pipeline — batch export to JSONL+gzip, SHA-256 manifest, pluggable upload backends (S3, Azure, local tmpdir)
9
+ - Migration 034: `archive_manifest` table (PostgreSQL only) for tracking archived batches
10
+ - Archival settings: `retention_days`, `batch_size`, `storage_backend` defaults
11
+ - 58 new specs (257 total, 0 failures)
12
+
13
+ ## v1.4.10
14
+
15
+ ### Added
16
+ - TLS support for PostgreSQL connections: `sslmode`, `sslrootcert`, `sslcert`, `sslkey`
17
+ - TLS support for MySQL connections: `ssl_mode`, `sslca`, `sslcert`, `sslkey`
18
+ - `Connection.merge_tls_creds` resolves TLS config via `Legion::Crypt::TLS.resolve`
19
+ - SQLite connections skip TLS entirely (local file, no network)
20
+
3
21
  ## v1.4.8
4
22
 
5
23
  ### Fixed
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'fileutils'
5
+ require 'json'
6
+ require 'securerandom'
7
+ require 'tmpdir'
8
+ require 'zlib'
9
+
10
+ module Legion
11
+ module Data
12
+ module Archiver
13
+ class UploadError < StandardError; end
14
+
15
+ class << self
16
+ def archive_table(table:, retention_days: 90, batch_size: 1000, storage_backend: nil)
17
+ return { skipped: true, reason: 'not_postgres' } unless postgres?
18
+
19
+ conn = Legion::Data.connection
20
+ cutoff = Time.now - (retention_days * 86_400)
21
+ now = Time.now.utc
22
+
23
+ batches = 0
24
+ total_rows = 0
25
+ paths = []
26
+ batch_n = 0
27
+
28
+ loop do
29
+ batch_n += 1
30
+ rows = conn[table].where { created_at < cutoff }.limit(batch_size).all
31
+ break if rows.empty?
32
+
33
+ ids = rows.map { |r| r[:id] }
34
+ jsonl = serialize_rows(rows)
35
+ compressed = gzip_compress(jsonl)
36
+ checksum = Digest::SHA256.hexdigest(compressed)
37
+ batch_id = SecureRandom.uuid
38
+
39
+ path = upload_batch(
40
+ data: compressed,
41
+ table: table.to_s,
42
+ year: now.year,
43
+ month: now.month,
44
+ batch_n: batch_n,
45
+ backend: storage_backend
46
+ )
47
+
48
+ conn.transaction do
49
+ conn[:archive_manifest].insert(
50
+ batch_id: batch_id,
51
+ source_table: table.to_s,
52
+ row_count: rows.size,
53
+ checksum: checksum,
54
+ storage_path: path,
55
+ archived_at: now
56
+ )
57
+ conn[table].where(id: ids).delete
58
+ end
59
+
60
+ batches += 1
61
+ total_rows += rows.size
62
+ paths << path
63
+ end
64
+
65
+ { batches: batches, total_rows: total_rows, paths: paths }
66
+ end
67
+
68
+ def upload_batch(data:, table:, year:, month:, batch_n:, backend:)
69
+ case backend
70
+ when :s3
71
+ upload_s3(data: data, table: table, year: year, month: month, batch_n: batch_n)
72
+ when :azure
73
+ upload_azure(data: data, table: table, year: year, month: month, batch_n: batch_n)
74
+ else
75
+ upload_tmpdir(data: data, table: table, year: year, month: month, batch_n: batch_n)
76
+ end
77
+ end
78
+
79
+ def manifest_stats
80
+ return {} unless postgres?
81
+ return {} unless Legion::Data.connection.table_exists?(:archive_manifest)
82
+
83
+ Legion::Data.connection[:archive_manifest]
84
+ .group_and_count(:source_table)
85
+ .select_append(
86
+ Sequel.function(:sum, :row_count).as(:total_rows),
87
+ Sequel.function(:min, :archived_at).as(:earliest),
88
+ Sequel.function(:max, :archived_at).as(:latest)
89
+ )
90
+ .all
91
+ .to_h do |row|
92
+ [row[:source_table], {
93
+ batches: row[:count],
94
+ total_rows: row[:total_rows].to_i,
95
+ earliest: row[:earliest],
96
+ latest: row[:latest]
97
+ }]
98
+ end
99
+ end
100
+
101
+ private
102
+
103
+ def postgres?
104
+ Legion::Data::Connection.adapter == :postgres
105
+ end
106
+
107
+ def serialize_rows(rows)
108
+ rows.map { |row| json_dump(row) }.join("\n")
109
+ end
110
+
111
+ def json_dump(obj)
112
+ if defined?(Legion::JSON)
113
+ Legion::JSON.dump(obj)
114
+ else
115
+ ::JSON.generate(obj)
116
+ end
117
+ end
118
+
119
+ def gzip_compress(data)
120
+ output = StringIO.new
121
+ output.binmode
122
+ gz = Zlib::GzipWriter.new(output)
123
+ gz.write(data)
124
+ gz.close
125
+ output.string
126
+ end
127
+
128
+ def upload_s3(data:, table:, year:, month:, batch_n:)
129
+ raise UploadError, 'S3 backend not available: Legion::Extensions::S3::Runners::Put not defined' unless defined?(Legion::Extensions::S3::Runners::Put)
130
+
131
+ key = "legion-archive/#{table}/#{year}/#{month}/batch_#{batch_n}.jsonl.gz"
132
+ Legion::Extensions::S3::Runners::Put.run(key: key, body: data)
133
+ "s3://#{key}"
134
+ rescue UploadError
135
+ raise
136
+ rescue StandardError => e
137
+ raise UploadError, "S3 upload failed: #{e.message}"
138
+ end
139
+
140
+ def upload_azure(data:, table:, year:, month:, batch_n:)
141
+ unless defined?(Legion::Extensions::AzureStorage::Runners::Upload)
142
+ raise UploadError, 'Azure backend not available: Legion::Extensions::AzureStorage::Runners::Upload not defined'
143
+ end
144
+
145
+ blob_name = "legion-archive/#{table}/#{year}/#{month}/batch_#{batch_n}.jsonl.gz"
146
+ Legion::Extensions::AzureStorage::Runners::Upload.run(blob_name: blob_name, data: data)
147
+ "azure://#{blob_name}"
148
+ rescue UploadError
149
+ raise
150
+ rescue StandardError => e
151
+ raise UploadError, "Azure upload failed: #{e.message}"
152
+ end
153
+
154
+ def upload_tmpdir(data:, table:, year:, month:, batch_n:)
155
+ dir = File.join(Dir.tmpdir, 'legion-archive', table.to_s, year.to_s, month.to_s)
156
+ FileUtils.mkdir_p(dir)
157
+ path = File.join(dir, "batch_#{batch_n}.jsonl.gz")
158
+ File.binwrite(path, data)
159
+ "file://#{path}"
160
+ rescue StandardError => e
161
+ raise UploadError, "Tmpdir upload failed: #{e.message}"
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -34,6 +34,7 @@ module Legion
34
34
  end
35
35
  Legion::Settings[:data][:connected] = true
36
36
  configure_logging
37
+ connect_with_replicas
37
38
  end
38
39
 
39
40
  def shutdown
@@ -41,10 +42,67 @@ module Legion
41
42
  Legion::Settings[:data][:connected] = false
42
43
  end
43
44
 
45
+ def connect_with_replicas
46
+ return unless adapter == :postgres
47
+
48
+ replica_url = Legion::Settings[:data][:read_replica_url]
49
+ replica_list = Array(Legion::Settings[:data][:replicas]).dup
50
+
51
+ replica_list.prepend(replica_url) if replica_url && !replica_url.empty?
52
+ replica_list.uniq!
53
+ replica_list.compact!
54
+
55
+ return if replica_list.empty?
56
+
57
+ @sequel.extension(:server_block)
58
+
59
+ replica_list.each_with_index do |url, idx|
60
+ @sequel.add_servers("read_#{idx}": url)
61
+ end
62
+
63
+ @replica_servers = replica_list.each_with_index.map { |_, idx| :"read_#{idx}" }
64
+ end
65
+
66
+ def read_server
67
+ return :default if @replica_servers.nil? || @replica_servers.empty?
68
+
69
+ :read_0
70
+ end
71
+
72
+ def replica_servers
73
+ @replica_servers || []
74
+ end
75
+
76
+ def merge_tls_creds(creds, adapter:, port:)
77
+ return creds if adapter == :sqlite
78
+ return creds unless defined?(Legion::Crypt::TLS)
79
+
80
+ tls = Legion::Crypt::TLS.resolve(data_tls_settings, port: port)
81
+ return creds unless tls[:enabled]
82
+
83
+ case adapter
84
+ when :postgres
85
+ creds[:sslmode] = tls[:verify] == :none ? 'require' : 'verify-full'
86
+ creds[:sslrootcert] = tls[:ca] if tls[:ca]
87
+ creds[:sslcert] = tls[:cert] if tls[:cert]
88
+ creds[:sslkey] = tls[:key] if tls[:key]
89
+ when :mysql2
90
+ creds[:ssl_mode] = tls[:verify] == :none ? 'required' : 'verify_identity'
91
+ creds[:sslca] = tls[:ca] if tls[:ca]
92
+ creds[:sslcert] = tls[:cert] if tls[:cert]
93
+ creds[:sslkey] = tls[:key] if tls[:key]
94
+ end
95
+
96
+ creds
97
+ end
98
+
44
99
  def creds_builder(final_creds = {})
45
100
  final_creds.merge! Legion::Data::Settings.creds(adapter)
46
101
  final_creds.merge! Legion::Settings[:data][:creds] if Legion::Settings[:data][:creds].is_a? Hash
47
102
 
103
+ port = final_creds[:port]
104
+ merge_tls_creds(final_creds, adapter: adapter, port: port)
105
+
48
106
  return final_creds if Legion::Settings[:vault].nil?
49
107
 
50
108
  if Legion::Settings[:vault][:connected] && ::Vault.sys.mounts.key?(:database)
@@ -58,6 +116,14 @@ module Legion
58
116
 
59
117
  private
60
118
 
119
+ def data_tls_settings
120
+ return {} unless defined?(Legion::Settings)
121
+
122
+ Legion::Settings[:data][:tls] || {}
123
+ rescue StandardError
124
+ {}
125
+ end
126
+
61
127
  def dev_fallback?
62
128
  data_settings = Legion::Settings[:data]
63
129
  data_settings[:dev_mode] == true && data_settings[:dev_fallback] != false
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ next unless [:postgres].include?(adapter_scheme)
6
+ next if table_exists?(:archive_manifest)
7
+
8
+ create_table(:archive_manifest) do
9
+ primary_key :id
10
+ String :batch_id, null: false, unique: true
11
+ String :source_table, null: false
12
+ Integer :row_count, null: false
13
+ String :checksum, null: false
14
+ String :storage_path, null: false
15
+ DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP
16
+ column :metadata, :jsonb
17
+
18
+ index :source_table
19
+ index :archived_at
20
+ end
21
+ end
22
+
23
+ down do
24
+ next unless [:postgres].include?(adapter_scheme)
25
+
26
+ drop_table(:archive_manifest) if table_exists?(:archive_manifest)
27
+ end
28
+ end
@@ -0,0 +1,160 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Data
5
+ module PartitionManager
6
+ NOT_POSTGRES = { skipped: true, reason: 'not_postgres' }.freeze
7
+
8
+ class << self
9
+ def ensure_partitions(table:, months_ahead: 3)
10
+ return NOT_POSTGRES unless postgres?
11
+
12
+ created = []
13
+ existing = []
14
+ base = Date.today
15
+
16
+ months_ahead.times do |i|
17
+ target = advance_months(base, i)
18
+ partition = partition_name(table, target)
19
+ from_str = target.strftime('%Y-%m-%d')
20
+ to_str = advance_months(target, 1).strftime('%Y-%m-%d')
21
+
22
+ ddl = "CREATE TABLE IF NOT EXISTS #{partition} " \
23
+ "PARTITION OF #{table} " \
24
+ "FOR VALUES FROM ('#{from_str}') TO ('#{to_str}')"
25
+
26
+ before_count = partition_names_for(table).size
27
+ Legion::Data.connection.run(ddl)
28
+ after_count = partition_names_for(table).size
29
+
30
+ if after_count > before_count
31
+ log_info("Created partition #{partition}") if logging?
32
+ created << partition
33
+ else
34
+ existing << partition
35
+ end
36
+ end
37
+
38
+ { created: created, existing: existing }
39
+ rescue StandardError => e
40
+ log_warn("ensure_partitions failed for #{table}: #{e.message}") if logging?
41
+ { created: [], existing: [], error: e.message }
42
+ end
43
+
44
+ def drop_old_partitions(table:, retention_months: 24)
45
+ return NOT_POSTGRES unless postgres?
46
+
47
+ cutoff = advance_months(Date.today, -retention_months)
48
+ dropped = []
49
+ retained = []
50
+
51
+ partition_names_for(table).each do |part|
52
+ part_date = parse_partition_date(part)
53
+ next unless part_date
54
+
55
+ if part_date < cutoff
56
+ Legion::Data.connection.run("DROP TABLE #{part}")
57
+ log_info("Dropped partition #{part}") if logging?
58
+ dropped << part
59
+ else
60
+ retained << part
61
+ end
62
+ end
63
+
64
+ { dropped: dropped, retained: retained }
65
+ rescue StandardError => e
66
+ log_warn("drop_old_partitions failed for #{table}: #{e.message}") if logging?
67
+ { dropped: [], retained: [], error: e.message }
68
+ end
69
+
70
+ def list_partitions(table:)
71
+ return NOT_POSTGRES unless postgres?
72
+
73
+ sql = <<~SQL
74
+ SELECT c.relname AS name,
75
+ pg_get_expr(c.relpartbound, c.oid) AS bound
76
+ FROM pg_inherits i
77
+ JOIN pg_class p ON p.oid = i.inhparent
78
+ JOIN pg_class c ON c.oid = i.inhrelid
79
+ WHERE p.relname = '#{table}'
80
+ ORDER BY c.relname
81
+ SQL
82
+
83
+ Legion::Data.connection.fetch(sql).map do |row|
84
+ from_val, to_val = parse_bound(row[:bound])
85
+ { name: row[:name], from: from_val, to: to_val }
86
+ end
87
+ rescue StandardError => e
88
+ log_warn("list_partitions failed for #{table}: #{e.message}") if logging?
89
+ []
90
+ end
91
+
92
+ private
93
+
94
+ def postgres?
95
+ Legion::Data::Connection.adapter == :postgres
96
+ end
97
+
98
+ def logging?
99
+ defined?(Legion::Logging)
100
+ end
101
+
102
+ def log_info(msg)
103
+ Legion::Logging.info(msg)
104
+ end
105
+
106
+ def log_warn(msg)
107
+ Legion::Logging.warn(msg)
108
+ end
109
+
110
+ def partition_name(table, date)
111
+ "#{table}_y#{date.strftime('%Y')}m#{date.strftime('%m')}"
112
+ end
113
+
114
+ def advance_months(date, months)
115
+ year = date.year
116
+ month = date.month + months
117
+ while month > 12
118
+ month -= 12
119
+ year += 1
120
+ end
121
+ while month < 1
122
+ month += 12
123
+ year -= 1
124
+ end
125
+ Date.new(year, month, 1)
126
+ end
127
+
128
+ def partition_names_for(table)
129
+ sql = <<~SQL
130
+ SELECT c.relname AS name
131
+ FROM pg_inherits i
132
+ JOIN pg_class p ON p.oid = i.inhparent
133
+ JOIN pg_class c ON c.oid = i.inhrelid
134
+ WHERE p.relname = '#{table}'
135
+ SQL
136
+
137
+ Legion::Data.connection.fetch(sql).map { |row| row[:name] }
138
+ rescue StandardError
139
+ []
140
+ end
141
+
142
+ def parse_partition_date(partition_name)
143
+ match = partition_name.match(/y(\d{4})m(\d{2})$/)
144
+ return nil unless match
145
+
146
+ Date.new(match[1].to_i, match[2].to_i, 1)
147
+ end
148
+
149
+ def parse_bound(expr)
150
+ return [nil, nil] unless expr
151
+
152
+ matches = expr.scan(/'([^']+)'/)
153
+ from_val = matches[0]&.first
154
+ to_val = matches[1]&.first
155
+ [from_val, to_val]
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -35,7 +35,10 @@ module Legion
35
35
  local: local,
36
36
  dev_mode: false,
37
37
  dev_fallback: true,
38
- connect_on_start: true
38
+ connect_on_start: true,
39
+ read_replica_url: nil,
40
+ replicas: [],
41
+ archival: archival
39
42
  }
40
43
  end
41
44
 
@@ -79,6 +82,14 @@ module Legion
79
82
  CREDS.fetch(adapter, CREDS[:sqlite]).dup
80
83
  end
81
84
 
85
+ def self.archival
86
+ {
87
+ retention_days: 90,
88
+ batch_size: 1000,
89
+ storage_backend: nil
90
+ }
91
+ end
92
+
82
93
  def self.cache
83
94
  {
84
95
  connected: false,
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module Data
5
- VERSION = '1.4.9'
5
+ VERSION = '1.4.11'
6
6
  end
7
7
  end
data/lib/legion/data.rb CHANGED
@@ -9,6 +9,8 @@ require 'legion/data/model'
9
9
  require 'legion/data/migration'
10
10
  require_relative 'data/local'
11
11
  require_relative 'data/spool'
12
+ require_relative 'data/partition_manager'
13
+ require_relative 'data/archiver'
12
14
 
13
15
  module Legion
14
16
  module Data
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.9
4
+ version: 1.4.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -88,6 +88,7 @@ files:
88
88
  - lib/legion/data.rb
89
89
  - lib/legion/data/archival.rb
90
90
  - lib/legion/data/archival/policy.rb
91
+ - lib/legion/data/archiver.rb
91
92
  - lib/legion/data/connection.rb
92
93
  - lib/legion/data/encryption/cipher.rb
93
94
  - lib/legion/data/encryption/key_provider.rb
@@ -129,6 +130,7 @@ files:
129
130
  - lib/legion/data/migrations/031_add_task_depth.rb
130
131
  - lib/legion/data/migrations/032_add_task_cancelled_at.rb
131
132
  - lib/legion/data/migrations/033_add_task_delay.rb
133
+ - lib/legion/data/migrations/034_add_archive_manifest.rb
132
134
  - lib/legion/data/model.rb
133
135
  - lib/legion/data/models/apollo_access_log.rb
134
136
  - lib/legion/data/models/apollo_entry.rb
@@ -147,6 +149,7 @@ files:
147
149
  - lib/legion/data/models/setting.rb
148
150
  - lib/legion/data/models/task.rb
149
151
  - lib/legion/data/models/task_log.rb
152
+ - lib/legion/data/partition_manager.rb
150
153
  - lib/legion/data/retention.rb
151
154
  - lib/legion/data/settings.rb
152
155
  - lib/legion/data/spool.rb