legion-data 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '02869f702b7e4ca994edd3fd2b503afa5cca4d450741ab99ed85277315c90ceb'
4
- data.tar.gz: 83b8136412ae23993b823e9548f8cf341f66c5f8e37f4002a326ec60e9a8c375
3
+ metadata.gz: '048cfb8d6ead50ef6ddc554765251d61121dc0f6e99491b9675e410f086cd2fe'
4
+ data.tar.gz: 58cced043937061ce27fd3c111d5b0b5c5fd3416ab622a3a7700f1ce553628a4
5
5
  SHA512:
6
- metadata.gz: fbe190c50347a42a28b53627c8a66cf951168c606454672a9f1219cb21373cba8ce0f6da7c557c360c16127ba7cacf317cad2a47b296c72f495581e2253e2128
7
- data.tar.gz: 90e3ca22009bff85eccfa9a8803a5fac9563389518b4ae26145561e1d7600b80d432bd8015677c3b64d35f38e9ed42338f82acfa9a549e490201db53342e0a65
6
+ metadata.gz: 9ba6d97c8cee58a5f41cd5ecc5c2229e75d4880368c43ec44d2f3b969ff144c81c7d6f49fd91d84819b1d3665eab37a7f7ff08f22c0e579109a96ace83d24262
7
+ data.tar.gz: ba69290675fbf53d9185dcdbc62e46e344b6aad3594c7dc4fd76fdf48af386a2b97d5b1e7fe70606b1c91dacee32e5a4117d6cc5ccf34a184fac89f4ce7b2e26
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Legion::Data Changelog
2
2
 
3
+ ## [1.6.2] - 2026-03-25
4
+
5
+ ### Changed
6
+ - Migration 041: Resize all pgvector columns from `vector(1536)` to `vector(1024)` for cross-provider embedding compatibility (apollo_entries.embedding, functions.embedding_vector, memory_traces.embedding). Drops and recreates HNSW cosine indexes.
7
+
3
8
  ## [1.6.1] - 2026-03-25
4
9
 
5
10
  ### Fixed
@@ -61,6 +61,55 @@ module Legion
61
61
  hot + warm
62
62
  end
63
63
 
64
+ def archive_completed_tasks(days_old: 90, batch_size: 1000)
65
+ conn = Legion::Data.connection
66
+ cutoff = Time.now - (days_old * 86_400)
67
+
68
+ return { archived: 0, cutoff: cutoff.iso8601 } unless conn&.table_exists?(:tasks) && conn.table_exists?(:tasks_archive)
69
+
70
+ candidates = conn[:tasks]
71
+ .where(status: %w[completed failed])
72
+ .where(Sequel.lit('created < ?', cutoff))
73
+ .limit(batch_size)
74
+
75
+ count = candidates.count
76
+ if count.positive?
77
+ archive_cols = conn.schema(:tasks_archive).to_set(&:first)
78
+ conn.transaction do
79
+ candidates.each do |row|
80
+ archive_row = {
81
+ original_id: row[:id],
82
+ status: row[:status],
83
+ relationship_id: row[:relationship_id],
84
+ original_created_at: row[:created],
85
+ original_updated_at: row[:updated],
86
+ archived_at: Time.now
87
+ }
88
+ archive_row[:archive_reason] = 'completed_task_archival' if archive_cols.include?(:archive_reason)
89
+ conn[:tasks_archive].insert(archive_row)
90
+ end
91
+ conn[:tasks].where(id: candidates.select(:id)).delete
92
+ end
93
+ end
94
+
95
+ Legion::Logging.info "archive_completed_tasks: archived #{count} tasks (cutoff: #{cutoff.iso8601})" if defined?(Legion::Logging)
96
+ { archived: count, cutoff: cutoff.iso8601 }
97
+ end
98
+
99
+ def run_scheduled_archival
100
+ results = {}
101
+ results[:tasks] = archive_completed_tasks
102
+
103
+ conn = Legion::Data.connection
104
+ if conn&.table_exists?(:metering_records)
105
+ results[:metering] = Legion::Data::Retention.archive_old_records(
106
+ table: :metering_records, date_column: :recorded_at
107
+ )
108
+ end
109
+
110
+ results
111
+ end
112
+
64
113
  private
65
114
 
66
115
  def archive_table!(source:, destination:, cutoff:, batch_size:, dry_run:)
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ next unless adapter_scheme == :postgres
6
+
7
+ # Resize embedding columns from 1536 to 1024 for cross-provider compatibility
8
+ # (Bedrock Titan v2, OpenAI with dimensions:, Ollama mxbai-embed-large all support 1024)
9
+ # Knowledge store is empty so no data re-embedding needed.
10
+
11
+ if table_exists?(:apollo_entries)
12
+ run 'DROP INDEX IF EXISTS idx_apollo_entries_embedding'
13
+ run 'ALTER TABLE apollo_entries ALTER COLUMN embedding TYPE vector(1024)'
14
+ run 'CREATE INDEX idx_apollo_entries_embedding ON apollo_entries USING hnsw (embedding vector_cosine_ops)'
15
+ end
16
+
17
+ if table_exists?(:functions)
18
+ run 'DROP INDEX IF EXISTS idx_functions_embedding'
19
+ run 'ALTER TABLE functions ALTER COLUMN embedding_vector TYPE vector(1024)'
20
+ run 'CREATE INDEX idx_functions_embedding ON functions USING hnsw (embedding_vector vector_cosine_ops)'
21
+ end
22
+
23
+ if table_exists?(:memory_traces)
24
+ run 'DROP INDEX IF EXISTS idx_memory_traces_embedding'
25
+ run 'ALTER TABLE memory_traces ALTER COLUMN embedding TYPE vector(1024)'
26
+ run 'CREATE INDEX idx_memory_traces_embedding ON memory_traces USING hnsw (embedding vector_cosine_ops)'
27
+ end
28
+ end
29
+
30
+ down do
31
+ next unless adapter_scheme == :postgres
32
+
33
+ if table_exists?(:apollo_entries)
34
+ run 'DROP INDEX IF EXISTS idx_apollo_entries_embedding'
35
+ run 'ALTER TABLE apollo_entries ALTER COLUMN embedding TYPE vector(1536)'
36
+ run 'CREATE INDEX idx_apollo_entries_embedding ON apollo_entries USING hnsw (embedding vector_cosine_ops)'
37
+ end
38
+
39
+ if table_exists?(:functions)
40
+ run 'DROP INDEX IF EXISTS idx_functions_embedding'
41
+ run 'ALTER TABLE functions ALTER COLUMN embedding_vector TYPE vector(1536)'
42
+ run 'CREATE INDEX idx_functions_embedding ON functions USING hnsw (embedding_vector vector_cosine_ops)'
43
+ end
44
+
45
+ if table_exists?(:memory_traces)
46
+ run 'DROP INDEX IF EXISTS idx_memory_traces_embedding'
47
+ run 'ALTER TABLE memory_traces ALTER COLUMN embedding TYPE vector(1536)'
48
+ run 'CREATE INDEX idx_memory_traces_embedding ON memory_traces USING hnsw (embedding vector_cosine_ops)'
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ %i[extensions functions runners nodes settings value_metrics].each do |table|
6
+ next unless table_exists?(table)
7
+ next if schema(table).any? { |col, _| col == :tenant_id }
8
+
9
+ alter_table(table) do
10
+ add_column :tenant_id, String, size: 64
11
+ add_index :tenant_id, name: :"idx_#{table}_tenant_id"
12
+ end
13
+ end
14
+ end
15
+
16
+ down do
17
+ %i[extensions functions runners nodes settings value_metrics].each do |table|
18
+ next unless table_exists?(table)
19
+ next unless schema(table).any? { |col, _| col == :tenant_id }
20
+
21
+ alter_table(table) do
22
+ drop_index :tenant_id, name: :"idx_#{table}_tenant_id"
23
+ drop_column :tenant_id
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ next unless adapter_scheme == :postgres
6
+
7
+ tables = %i[
8
+ tasks digital_workers audit_log memory_traces extensions
9
+ functions runners nodes settings value_metrics
10
+ ]
11
+
12
+ tables.each do |table|
13
+ next unless table_exists?(table)
14
+ next unless schema(table).any? { |col, _| col == :tenant_id }
15
+
16
+ run "ALTER TABLE #{table} ENABLE ROW LEVEL SECURITY"
17
+ run <<~SQL
18
+ DO $$ BEGIN
19
+ IF NOT EXISTS (
20
+ SELECT 1 FROM pg_policies WHERE tablename = '#{table}' AND policyname = 'tenant_isolation_#{table}'
21
+ ) THEN
22
+ CREATE POLICY tenant_isolation_#{table} ON #{table}
23
+ USING (tenant_id = current_setting('app.current_tenant', true));
24
+ END IF;
25
+ END $$;
26
+ SQL
27
+ end
28
+ end
29
+
30
+ down do
31
+ next unless adapter_scheme == :postgres
32
+
33
+ tables = %i[
34
+ tasks digital_workers audit_log memory_traces extensions
35
+ functions runners nodes settings value_metrics
36
+ ]
37
+
38
+ tables.each do |table|
39
+ next unless table_exists?(table)
40
+
41
+ run "DROP POLICY IF EXISTS tenant_isolation_#{table} ON #{table}"
42
+ run "ALTER TABLE #{table} DISABLE ROW LEVEL SECURITY"
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ return unless table_exists?(:memory_traces)
6
+
7
+ existing = schema(:memory_traces).map(&:first)
8
+
9
+ alter_table(:memory_traces) do
10
+ add_column :trace_id, String, size: 36 unless existing.include?(:trace_id)
11
+ add_column :strength, Float, default: 0.5 unless existing.include?(:strength)
12
+ add_column :peak_strength, Float, default: 0.5 unless existing.include?(:peak_strength)
13
+ add_column :base_decay_rate, Float, default: 0.05 unless existing.include?(:base_decay_rate)
14
+ add_column :emotional_valence, Float, default: 0.0 unless existing.include?(:emotional_valence)
15
+ add_column :emotional_intensity, Float, default: 0.0 unless existing.include?(:emotional_intensity)
16
+ add_column :domain_tags, :text unless existing.include?(:domain_tags)
17
+ add_column :origin, String, size: 50 unless existing.include?(:origin)
18
+ add_column :source_agent_id, String, size: 255 unless existing.include?(:source_agent_id)
19
+ add_column :storage_tier, String, size: 10, default: 'warm' unless existing.include?(:storage_tier)
20
+ add_column :last_reinforced, DateTime unless existing.include?(:last_reinforced)
21
+ add_column :last_decayed, DateTime unless existing.include?(:last_decayed)
22
+ add_column :reinforcement_count, Integer, default: 0 unless existing.include?(:reinforcement_count)
23
+ add_column :unresolved, TrueClass, default: false unless existing.include?(:unresolved)
24
+ add_column :consolidation_candidate, TrueClass, default: false unless existing.include?(:consolidation_candidate)
25
+ add_column :parent_trace_id, String, size: 36 unless existing.include?(:parent_trace_id)
26
+ add_column :encryption_key_id, String, size: 255 unless existing.include?(:encryption_key_id)
27
+ add_column :partition_id, String, size: 255 unless existing.include?(:partition_id)
28
+ end
29
+
30
+ indexes = begin
31
+ db.indexes(:memory_traces).keys
32
+ rescue StandardError
33
+ []
34
+ end
35
+
36
+ add_index :memory_traces, :trace_id, unique: true, name: :idx_memory_traces_trace_id unless existing.include?(:trace_id)
37
+
38
+ add_index :memory_traces, :storage_tier, name: :idx_memory_traces_storage_tier unless indexes.include?(:idx_memory_traces_storage_tier)
39
+ add_index :memory_traces, :partition_id, name: :idx_memory_traces_partition_id unless indexes.include?(:idx_memory_traces_partition_id)
40
+ add_index :memory_traces, %i[partition_id trace_type], name: :idx_memory_traces_partition_type unless indexes.include?(:idx_memory_traces_partition_type)
41
+ add_index :memory_traces, :unresolved, name: :idx_memory_traces_unresolved unless indexes.include?(:idx_memory_traces_unresolved)
42
+ end
43
+
44
+ down do
45
+ return unless table_exists?(:memory_traces)
46
+
47
+ existing = schema(:memory_traces).map(&:first)
48
+
49
+ %i[trace_id strength peak_strength base_decay_rate emotional_valence emotional_intensity
50
+ domain_tags origin source_agent_id storage_tier last_reinforced last_decayed
51
+ reinforcement_count unresolved consolidation_candidate parent_trace_id
52
+ encryption_key_id partition_id].each do |col|
53
+ alter_table(:memory_traces) { drop_column col } if existing.include?(col)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ return if table_exists?(:memory_associations)
6
+
7
+ create_table(:memory_associations) do
8
+ primary_key :id
9
+ String :trace_id_a, size: 36, null: false
10
+ String :trace_id_b, size: 36, null: false
11
+ Integer :coactivation_count, default: 1, null: false
12
+ TrueClass :linked, default: false, null: false
13
+ String :tenant_id, size: 64
14
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
15
+ DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
16
+
17
+ unique %i[trace_id_a trace_id_b]
18
+ index :trace_id_a
19
+ index :trace_id_b
20
+ index :tenant_id
21
+ end
22
+ end
23
+
24
+ down do
25
+ drop_table?(:memory_associations)
26
+ end
27
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ return if table_exists?(:metering_hourly_rollup)
6
+
7
+ create_table(:metering_hourly_rollup) do
8
+ primary_key :id
9
+ String :worker_id, size: 36, null: false
10
+ String :provider, size: 100, null: false
11
+ String :model_id, size: 255, null: false
12
+ DateTime :hour, null: false
13
+ Integer :total_input_tokens, default: 0, null: false
14
+ Integer :total_output_tokens, default: 0, null: false
15
+ Integer :total_thinking_tokens, default: 0, null: false
16
+ Integer :total_calls, default: 0, null: false
17
+ Float :total_cost_usd, default: 0.0, null: false
18
+ Float :avg_latency_ms, default: 0.0, null: false
19
+ String :tenant_id, size: 64
20
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
21
+
22
+ unique %i[worker_id provider model_id hour], name: :idx_rollup_unique_hour
23
+ index :hour
24
+ index :tenant_id
25
+ index %i[worker_id hour]
26
+ end
27
+ end
28
+
29
+ down do
30
+ drop_table?(:metering_hourly_rollup)
31
+ end
32
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Data
5
+ module Rls
6
+ RLS_TABLES = %i[
7
+ tasks digital_workers audit_log memory_traces extensions
8
+ functions runners nodes settings value_metrics
9
+ ].freeze
10
+
11
+ module_function
12
+
13
+ def rls_enabled?
14
+ return false unless Legion::Settings[:data][:connected]
15
+
16
+ Legion::Data.connection.adapter_scheme == :postgres
17
+ rescue StandardError
18
+ false
19
+ end
20
+
21
+ def assign_tenant(tenant_id)
22
+ return unless rls_enabled?
23
+
24
+ Legion::Data.connection.run(
25
+ Sequel.lit('SET app.current_tenant = ?', tenant_id.to_s)
26
+ )
27
+ end
28
+
29
+ def current_tenant
30
+ return nil unless rls_enabled?
31
+
32
+ Legion::Data.connection.fetch('SHOW app.current_tenant').first&.values&.first
33
+ rescue Sequel::DatabaseError
34
+ nil
35
+ end
36
+
37
+ def reset_tenant
38
+ return unless rls_enabled?
39
+
40
+ Legion::Data.connection.run('RESET app.current_tenant')
41
+ end
42
+
43
+ def with_tenant(tenant_id)
44
+ previous = current_tenant
45
+ assign_tenant(tenant_id)
46
+ yield
47
+ ensure
48
+ previous ? assign_tenant(previous) : reset_tenant
49
+ end
50
+ end
51
+ end
52
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module Data
5
- VERSION = '1.6.1'
5
+ VERSION = '1.6.3'
6
6
  end
7
7
  end
data/lib/legion/data.rb CHANGED
@@ -12,6 +12,7 @@ require_relative 'data/spool'
12
12
  require_relative 'data/partition_manager'
13
13
  require_relative 'data/archiver'
14
14
  require_relative 'data/helper'
15
+ require_relative 'data/rls'
15
16
 
16
17
  module Legion
17
18
  module Data
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -141,6 +141,12 @@ files:
141
141
  - lib/legion/data/migrations/038_add_conversations.rb
142
142
  - lib/legion/data/migrations/039_add_audit_archive_manifest.rb
143
143
  - lib/legion/data/migrations/040_add_slow_query_indexes.rb
144
+ - lib/legion/data/migrations/041_resize_vector_columns.rb
145
+ - lib/legion/data/migrations/042_add_tenant_to_registry_tables.rb
146
+ - lib/legion/data/migrations/043_add_rls_placeholder.rb
147
+ - lib/legion/data/migrations/044_expand_memory_traces.rb
148
+ - lib/legion/data/migrations/045_add_memory_associations.rb
149
+ - lib/legion/data/migrations/046_add_metering_hourly_rollup.rb
144
150
  - lib/legion/data/model.rb
145
151
  - lib/legion/data/models/apollo_access_log.rb
146
152
  - lib/legion/data/models/apollo_entry.rb
@@ -161,6 +167,7 @@ files:
161
167
  - lib/legion/data/models/task_log.rb
162
168
  - lib/legion/data/partition_manager.rb
163
169
  - lib/legion/data/retention.rb
170
+ - lib/legion/data/rls.rb
164
171
  - lib/legion/data/settings.rb
165
172
  - lib/legion/data/spool.rb
166
173
  - lib/legion/data/storage_tiers.rb