legion-data 1.3.7 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d10f18bacc47360ad1829628bffc116ecbeea74b0b49359f1cee7fba0a7e9d44
4
- data.tar.gz: e13f4acfa8992e3a00129322fef794b80877f723e0c59210016f64fee57c5cae
3
+ metadata.gz: eaf37e68acae5015ef50b01e6a5c7a4806e81b80f5cf74f3c71b72e6799f0bd4
4
+ data.tar.gz: 81b55083c6e8d59403b51a838935a4146b1d3f39eca8316dfd82ee4d2f6a0b05
5
5
  SHA512:
6
- metadata.gz: 0e61649208294598ecdf19ffca469cba3db7b77d31e0e0a065e4c82e47949e4ea87aa291c5fae18970318e0c27ac1dfcd7eb53143bd7b5acc279eb84c0f2b231
7
- data.tar.gz: 7692efc8d123ad178c3f3a66531723518e0fac2cb5999d01833bfa472ac8eb5bee06289dd193886fe66c369f3d49ebff689e68267eff4ab030ce746377a6c9a3
6
+ metadata.gz: 4a24d0a86df7d44669ff38d34667c4f0c63036f877d1453df2e1418670b8a8c59ad803f9c245f5895094465d994253e1b4eced1007f0724cd31f53c4ae804d37
7
+ data.tar.gz: 830b7fc8203eb8a339a5ad84d334e56dabf0e6e02084be99a3b27a3067b104d7b5ca973437f3f8577050983c0126a302903624bdcf6a2916e267a1e6a8558754
data/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # Legion::Data Changelog
2
2
 
3
+ ## v1.4.2
4
+
5
+ ### Fixed
6
+ - Migration 015: use `create_table?` instead of `create_table` for idempotent RBAC table creation
7
+
8
+ ## v1.4.1
9
+
10
+ ### Added
11
+ - Migration 025: tenants table (tenant_id, name, status, quotas, token limits)
12
+
13
+ ## v1.4.0
14
+
15
+ ### Added
16
+ - `Legion::Data::Vector`: reusable pgvector helpers (available?, cosine_search, l2_search, ensure_extension!)
17
+ - `Legion::Data::StorageTiers`: hot/warm/cold archival lifecycle (archive_to_warm, export_to_cold, stats)
18
+ - Migration 022: memory_traces table with optional pgvector embedding column (1536-dim, HNSW index)
19
+ - Migration 023: data_archive table for generic storage tier archival
20
+ - Migration 024: tenant_id partition columns on tasks, digital_workers, audit_log, memory_traces
21
+
22
+ ## v1.3.8
23
+
24
+ ### Added
25
+ - `Legion::Data::Archival`: hot/warm/cold archival pipeline for tasks and metering records
26
+ - `Legion::Data::Archival::Policy`: configurable retention policies (warm_after_days, cold_after_days, batch_size)
27
+ - Archive, restore, and cross-table search operations with dry-run support
28
+ - Migration 021: archive tables for tasks and metering_records
29
+
3
30
  ## v1.3.7
4
31
 
5
32
  ### Added
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Data
5
+ module Archival
6
+ class Policy
7
+ DEFAULTS = {
8
+ warm_after_days: 7,
9
+ cold_after_days: 90,
10
+ batch_size: 1000,
11
+ tables: %w[tasks metering_records].freeze
12
+ }.freeze
13
+
14
+ attr_reader :warm_after_days, :cold_after_days, :batch_size, :tables
15
+
16
+ def initialize(**opts)
17
+ config = DEFAULTS.merge(opts)
18
+ @warm_after_days = config[:warm_after_days]
19
+ @cold_after_days = config[:cold_after_days]
20
+ @batch_size = config[:batch_size]
21
+ @tables = config[:tables]
22
+ end
23
+
24
+ def warm_cutoff
25
+ Time.now - (warm_after_days * 86_400)
26
+ end
27
+
28
+ def cold_cutoff
29
+ Time.now - (cold_after_days * 86_400)
30
+ end
31
+
32
+ def self.from_settings
33
+ return new unless defined?(Legion::Settings)
34
+
35
+ data_settings = Legion::Settings[:data]
36
+ archival = data_settings.is_a?(Hash) ? data_settings[:archival] : nil
37
+ return new unless archival.is_a?(Hash)
38
+
39
+ new(**archival.slice(:warm_after_days, :cold_after_days, :batch_size, :tables))
40
+ rescue StandardError
41
+ new
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'archival/policy'
4
+
5
+ module Legion
6
+ module Data
7
+ module Archival
8
+ ARCHIVE_TABLE_MAP = {
9
+ tasks: :tasks_archive,
10
+ metering_records: :metering_records_archive
11
+ }.freeze
12
+
13
+ class << self
14
+ def archive!(policy: Policy.new, dry_run: false)
15
+ results = {}
16
+ policy.tables.each do |table_name|
17
+ table = table_name.to_sym
18
+ archive_table = ARCHIVE_TABLE_MAP[table]
19
+ next unless archive_table && db_ready?(table) && db_ready?(archive_table)
20
+
21
+ count = archive_table!(
22
+ source: table, destination: archive_table,
23
+ cutoff: policy.warm_cutoff, batch_size: policy.batch_size, dry_run: dry_run
24
+ )
25
+ results[table] = count
26
+ end
27
+ results
28
+ end
29
+
30
+ def restore(table:, ids:)
31
+ source_table = table.to_sym
32
+ archive_table = ARCHIVE_TABLE_MAP[source_table]
33
+ return 0 unless archive_table && db_ready?(archive_table)
34
+
35
+ conn = Legion::Data.connection
36
+ restored = 0
37
+ conn.transaction do
38
+ conn[archive_table].where(original_id: ids).each do |row|
39
+ restore_row = row.except(:id, :archived_at, :original_id, :original_created_at, :original_updated_at)
40
+ restore_row[:id] = row[:original_id]
41
+ restore_row[:created_at] = row[:original_created_at]
42
+ restore_row[:updated_at] = row[:original_updated_at]
43
+ conn[source_table].insert(restore_row)
44
+ restored += 1
45
+ end
46
+ conn[archive_table].where(original_id: ids).delete
47
+ end
48
+ restored
49
+ end
50
+
51
+ def search(table:, where: {})
52
+ source_table = table.to_sym
53
+ archive_table = ARCHIVE_TABLE_MAP[source_table]
54
+ return [] unless db_ready?(source_table)
55
+
56
+ conn = Legion::Data.connection
57
+ hot = conn[source_table].where(where).all
58
+ warm = db_ready?(archive_table) ? conn[archive_table].where(where).all : []
59
+ hot + warm
60
+ end
61
+
62
+ private
63
+
64
+ def archive_table!(source:, destination:, cutoff:, batch_size:, dry_run:)
65
+ conn = Legion::Data.connection
66
+ candidates = conn[source].where { created_at < cutoff }.limit(batch_size)
67
+ count = candidates.count
68
+ return count if dry_run || count.zero?
69
+
70
+ conn.transaction do
71
+ candidates.each do |row|
72
+ archive_row = row.dup
73
+ archive_row[:original_id] = archive_row.delete(:id)
74
+ archive_row[:original_created_at] = archive_row.delete(:created_at)
75
+ archive_row[:original_updated_at] = archive_row.delete(:updated_at)
76
+ archive_row[:archived_at] = Time.now
77
+ conn[destination].insert(archive_row)
78
+ end
79
+ conn[source].where(id: candidates.select(:id)).delete
80
+ end
81
+ count
82
+ end
83
+
84
+ def db_ready?(table)
85
+ defined?(Legion::Data) && Legion::Data.connection&.table_exists?(table)
86
+ rescue StandardError
87
+ false
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Sequel.migration do
4
4
  up do
5
- create_table(:rbac_role_assignments) do
5
+ create_table?(:rbac_role_assignments) do
6
6
  primary_key :id
7
7
  String :principal_type, null: false, size: 10
8
8
  String :principal_id, null: false, size: 255
@@ -16,7 +16,7 @@ Sequel.migration do
16
16
  index :team
17
17
  end
18
18
 
19
- create_table(:rbac_runner_grants) do
19
+ create_table?(:rbac_runner_grants) do
20
20
  primary_key :id
21
21
  String :team, null: false, size: 255
22
22
  String :runner_pattern, null: false, size: 500
@@ -27,7 +27,7 @@ Sequel.migration do
27
27
  index :team
28
28
  end
29
29
 
30
- create_table(:rbac_cross_team_grants) do
30
+ create_table?(:rbac_cross_team_grants) do
31
31
  primary_key :id
32
32
  String :source_team, null: false, size: 255
33
33
  String :target_team, null: false, size: 255
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ unless table_exists?(:tasks_archive)
6
+ create_table(:tasks_archive) do
7
+ primary_key :id
8
+ Integer :original_id, null: false
9
+ String :function_name
10
+ String :status
11
+ String :runner_class
12
+ column :args, :text
13
+ column :result, :text
14
+ String :queue
15
+ Integer :relationship_id
16
+ String :chain_id
17
+ DateTime :original_created_at
18
+ DateTime :original_updated_at
19
+ DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP
20
+ index :original_id
21
+ index :chain_id
22
+ index :archived_at
23
+ end
24
+ end
25
+
26
+ unless table_exists?(:metering_records_archive)
27
+ create_table(:metering_records_archive) do
28
+ primary_key :id
29
+ Integer :original_id, null: false
30
+ String :worker_id
31
+ String :event_type
32
+ String :extension
33
+ String :runner_function
34
+ String :status
35
+ Integer :tokens_in
36
+ Integer :tokens_out
37
+ Float :cost_usd
38
+ Integer :wall_clock_ms
39
+ Integer :cpu_time_ms
40
+ Integer :external_api_calls
41
+ String :model
42
+ String :tenant_id
43
+ DateTime :original_created_at
44
+ DateTime :archived_at, null: false, default: Sequel::CURRENT_TIMESTAMP
45
+ index :original_id
46
+ index :worker_id
47
+ index :tenant_id
48
+ index :archived_at
49
+ end
50
+ end
51
+ end
52
+
53
+ down do
54
+ drop_table(:metering_records_archive) if table_exists?(:metering_records_archive)
55
+ drop_table(:tasks_archive) if table_exists?(:tasks_archive)
56
+ end
57
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ create_table(:memory_traces) do
6
+ primary_key :id
7
+ String :agent_id, null: false, size: 64, index: true
8
+ String :trace_type, null: false, size: 32
9
+ String :content, text: true, null: false
10
+ Float :significance, default: 0.5
11
+ Float :confidence, default: 1.0
12
+ String :associations, text: true
13
+ String :metadata, text: true
14
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
15
+ DateTime :accessed_at
16
+ DateTime :decayed_at
17
+ index %i[agent_id trace_type]
18
+ end
19
+
20
+ next unless adapter_scheme == :postgres
21
+
22
+ run 'ALTER TABLE memory_traces ADD COLUMN IF NOT EXISTS embedding vector(1536)'
23
+ run 'CREATE INDEX IF NOT EXISTS idx_memory_traces_embedding ON memory_traces USING hnsw (embedding vector_cosine_ops)'
24
+ end
25
+
26
+ down do
27
+ drop_table?(:memory_traces)
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ create_table(:data_archive) do
6
+ primary_key :id
7
+ String :source_table, null: false, size: 64, index: true
8
+ Integer :source_id, null: false
9
+ String :data, text: true, null: false
10
+ Integer :tier, default: 1
11
+ DateTime :archived_at, default: Sequel::CURRENT_TIMESTAMP
12
+ index %i[source_table source_id]
13
+ index :tier
14
+ end
15
+ end
16
+
17
+ down do
18
+ drop_table?(:data_archive)
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ %i[tasks digital_workers audit_log memory_traces].each do |table|
6
+ next unless table_exists?(table)
7
+ next if schema(table).any? { |col, _| col == :tenant_id }
8
+
9
+ alter_table(table) do
10
+ add_column :tenant_id, String, size: 64
11
+ add_index :tenant_id
12
+ end
13
+ end
14
+ end
15
+
16
+ down do
17
+ %i[tasks digital_workers audit_log memory_traces].each do |table|
18
+ next unless table_exists?(table)
19
+ next unless schema(table).any? { |col, _| col == :tenant_id }
20
+
21
+ alter_table(table) do
22
+ drop_index :tenant_id
23
+ drop_column :tenant_id
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ create_table(:tenants) do
6
+ primary_key :id
7
+ String :tenant_id, null: false, unique: true, size: 100
8
+ String :name, size: 255
9
+ String :status, default: 'active', size: 20
10
+ Integer :max_workers, default: 10
11
+ Integer :max_queue_depth, default: 10_000
12
+ Float :monthly_token_limit
13
+ Float :daily_token_limit
14
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
15
+ DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
16
+ index :status
17
+ end
18
+ end
19
+
20
+ down do
21
+ drop_table?(:tenants)
22
+ end
23
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Data
5
+ module StorageTiers
6
+ TIERS = { hot: 0, warm: 1, cold: 2 }.freeze
7
+
8
+ class << self
9
+ def archive_to_warm(table:, age_days: 90, batch_size: 1000)
10
+ return { archived: 0, reason: 'no_connection' } unless Legion::Data.connection
11
+ return { archived: 0, reason: 'no_archive_table' } unless Legion::Data.connection.table_exists?(:data_archive)
12
+
13
+ cutoff = Time.now - (age_days * 86_400)
14
+ records = Legion::Data.connection[table].where { created_at < cutoff }.limit(batch_size).all
15
+ return { archived: 0 } if records.empty?
16
+
17
+ Legion::Data.connection.transaction do
18
+ records.each do |record|
19
+ Legion::Data.connection[:data_archive].insert(
20
+ source_table: table.to_s, source_id: record[:id],
21
+ data: Legion::JSON.dump(record),
22
+ tier: TIERS[:warm],
23
+ archived_at: Time.now.utc
24
+ )
25
+ end
26
+
27
+ ids = records.map { |r| r[:id] }
28
+ Legion::Data.connection[table].where(id: ids).delete
29
+ end
30
+
31
+ { archived: records.size, table: table.to_s }
32
+ end
33
+
34
+ def export_to_cold(age_days: 365, batch_size: 5000)
35
+ return { exported: 0 } unless Legion::Data.connection&.table_exists?(:data_archive)
36
+
37
+ cutoff = Time.now - (age_days * 86_400)
38
+ records = Legion::Data.connection[:data_archive]
39
+ .where(tier: TIERS[:warm])
40
+ .where { archived_at < cutoff }
41
+ .limit(batch_size).all
42
+ return { exported: 0 } if records.empty?
43
+
44
+ ids = records.map { |r| r[:id] }
45
+ Legion::Data.connection[:data_archive].where(id: ids).update(tier: TIERS[:cold])
46
+ { exported: records.size, data: records }
47
+ end
48
+
49
+ def stats
50
+ return {} unless Legion::Data.connection&.table_exists?(:data_archive)
51
+
52
+ { warm: count_tier(:warm), cold: count_tier(:cold) }
53
+ end
54
+
55
+ private
56
+
57
+ def count_tier(tier)
58
+ Legion::Data.connection[:data_archive].where(tier: TIERS[tier]).count
59
+ rescue StandardError
60
+ 0
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Data
5
+ module Vector
6
+ class << self
7
+ def available?
8
+ return false unless Legion::Data.connection
9
+ return false unless Legion::Data.connection.adapter_scheme == :postgres
10
+
11
+ Legion::Data.connection.fetch("SELECT 1 FROM pg_extension WHERE extname = 'vector'").any?
12
+ rescue StandardError
13
+ false
14
+ end
15
+
16
+ def ensure_extension!
17
+ return false unless Legion::Data.connection&.adapter_scheme == :postgres
18
+
19
+ Legion::Data.connection.run('CREATE EXTENSION IF NOT EXISTS vector')
20
+ true
21
+ rescue StandardError => e
22
+ Legion::Logging.warn("pgvector extension creation failed: #{e.message}") if defined?(Legion::Logging)
23
+ false
24
+ end
25
+
26
+ def cosine_search(table:, column:, query_vector:, limit: 10, min_similarity: 0.0)
27
+ return [] unless available?
28
+
29
+ vec_literal = vector_literal(query_vector)
30
+ ds = Legion::Data.connection[table]
31
+ .select_all
32
+ .select_append(Sequel.lit("1 - (#{column} <=> ?)", vec_literal).as(:similarity))
33
+ .order(Sequel.lit("#{column} <=> ?", vec_literal))
34
+ .limit(limit)
35
+
36
+ ds = ds.where(Sequel.lit("1 - (#{column} <=> ?) >= ?", vec_literal, min_similarity)) if min_similarity.positive?
37
+ ds.all
38
+ end
39
+
40
+ def l2_search(table:, column:, query_vector:, limit: 10)
41
+ return [] unless available?
42
+
43
+ vec_literal = vector_literal(query_vector)
44
+ Legion::Data.connection[table]
45
+ .select_all
46
+ .select_append(Sequel.lit("#{column} <-> ?", vec_literal).as(:distance))
47
+ .order(Sequel.lit("#{column} <-> ?", vec_literal))
48
+ .limit(limit)
49
+ .all
50
+ end
51
+
52
+ private
53
+
54
+ def vector_literal(query_vector)
55
+ "[#{query_vector.join(',')}]"
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module Data
5
- VERSION = '1.3.7'
5
+ VERSION = '1.4.2'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.7
4
+ version: 1.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -86,6 +86,8 @@ files:
86
86
  - exe/legionio_migrate
87
87
  - legion-data.gemspec
88
88
  - lib/legion/data.rb
89
+ - lib/legion/data/archival.rb
90
+ - lib/legion/data/archival/policy.rb
89
91
  - lib/legion/data/connection.rb
90
92
  - lib/legion/data/encryption/cipher.rb
91
93
  - lib/legion/data/encryption/key_provider.rb
@@ -114,6 +116,11 @@ files:
114
116
  - lib/legion/data/migrations/018_add_governance_events.rb
115
117
  - lib/legion/data/migrations/019_add_audit_hash_chain.rb
116
118
  - lib/legion/data/migrations/020_add_webhooks.rb
119
+ - lib/legion/data/migrations/021_add_archive_tables.rb
120
+ - lib/legion/data/migrations/022_add_memory_traces.rb
121
+ - lib/legion/data/migrations/023_add_data_archive.rb
122
+ - lib/legion/data/migrations/024_add_tenant_partition_columns.rb
123
+ - lib/legion/data/migrations/025_add_tenants_table.rb
117
124
  - lib/legion/data/model.rb
118
125
  - lib/legion/data/models/apollo_access_log.rb
119
126
  - lib/legion/data/models/apollo_entry.rb
@@ -133,6 +140,8 @@ files:
133
140
  - lib/legion/data/models/task.rb
134
141
  - lib/legion/data/models/task_log.rb
135
142
  - lib/legion/data/settings.rb
143
+ - lib/legion/data/storage_tiers.rb
144
+ - lib/legion/data/vector.rb
136
145
  - lib/legion/data/version.rb
137
146
  - sonar-project.properties
138
147
  homepage: https://github.com/LegionIO/legion-data