legion-data 1.8.5 → 1.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +7 -0
- data/CLAUDE.md +34 -5
- data/README.md +40 -7
- data/lib/legion/data/migrations/100_create_apollo_entries_sqlite.rb +82 -0
- data/lib/legion/data/migrations/101_add_apollo_identity_and_access_scope.rb +35 -0
- data/lib/legion/data/migrations/102_add_apollo_access_scope_and_identity_indexes.rb +15 -0
- data/lib/legion/data/version.rb +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: edd18502b1d95a3863975b1144fa9b53cbc1a51d6306f20d2e02cb6b316d488c
|
|
4
|
+
data.tar.gz: eae1da697b4296e99898852da936a7247791e39e11e2d70fa0d64403e02145d6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: da91b8a4a865c22dc85853f725eddaacbae069b4550ceebb38cf44cc07ec57c8c29fe0bdabccd0b542a677893af2c6622ac12ae43c8fcb395ae955627729e4f8
|
|
7
|
+
data.tar.gz: ff3630c3f7f33082ccef8f6b9c30be34e0d0b222c443ad6ff146963b3655d8c4239fa683293954a9fd1f95c8d476a3536b30304289e3c1b476c5c3536ded5297
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Legion::Data Changelog
|
|
2
2
|
|
|
3
|
+
## [1.8.6] - 2026-05-15
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Migration 100: creates `apollo_entries` and `apollo_entries_archive` tables on non-Postgres adapters (SQLite etc.), bringing them to parity with the existing Postgres schema.
|
|
7
|
+
- Migration 101: adds `access_scope VARCHAR(20) NOT NULL DEFAULT 'global'`, `identity_principal_id INTEGER`, `identity_id INTEGER`, `identity_canonical_name VARCHAR(255)` columns on both `apollo_entries` and `apollo_entries_archive` across all adapters. Existing rows default to `global` access scope.
|
|
8
|
+
- Migration 102: adds indexes on `apollo_entries` — full index on `access_scope`, partial indexes on `identity_principal_id` and `identity_id` (WHERE NOT NULL) across all adapters.
|
|
9
|
+
|
|
3
10
|
## [1.8.5] - 2026-05-09
|
|
4
11
|
|
|
5
12
|
### Removed
|
data/CLAUDE.md
CHANGED
|
@@ -2,7 +2,7 @@ Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all e
|
|
|
2
2
|
|
|
3
3
|
# legion-data
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Persistent storage gem for LegionIO. Owns Sequel database connections, numbered migrations, models, local SQLite state, extract timing persistence, audit/governance storage, identity/RBAC storage, Apollo storage, and the LLM lifecycle ledger.
|
|
6
6
|
|
|
7
7
|
## Commands
|
|
8
8
|
|
|
@@ -20,7 +20,7 @@ jq '[.examples[] | select(.status != "passed") | {file_path, line_number, full_d
|
|
|
20
20
|
|
|
21
21
|
## Architecture
|
|
22
22
|
|
|
23
|
-
- `lib/legion/data/connection.rb`:
|
|
23
|
+
- `lib/legion/data/connection.rb`: Sequel connection setup, diagnostics, fallback, query logging.
|
|
24
24
|
- `lib/legion/data/migration.rb`: numbered Sequel migrations.
|
|
25
25
|
- `lib/legion/data/model.rb`: shared model loader.
|
|
26
26
|
- `lib/legion/data/models/`: flat and namespaced Sequel model classes.
|
|
@@ -40,12 +40,10 @@ jq '[.examples[] | select(.status != "passed") | {file_path, line_number, full_d
|
|
|
40
40
|
## Sequel ORM Rules
|
|
41
41
|
|
|
42
42
|
Use Sequel associations as the object graph. References:
|
|
43
|
-
|
|
44
43
|
- https://sequel.jeremyevans.net/rdoc/classes/Sequel/Model/Associations/ClassMethods.html
|
|
45
44
|
- https://github.com/jeremyevans/sequel/blob/master/doc/association_basics.rdoc
|
|
46
45
|
|
|
47
46
|
Association mapping:
|
|
48
|
-
|
|
49
47
|
- Foreign key on this model: `many_to_one`.
|
|
50
48
|
- Foreign key on the associated model: `one_to_many` or `one_to_one`.
|
|
51
49
|
- Join table between models: `many_to_many`.
|
|
@@ -53,12 +51,43 @@ Association mapping:
|
|
|
53
51
|
|
|
54
52
|
When Sequel cannot infer names, set `:class`, `:key`, `:primary_key`, `:join_table`, `:left_key`, and `:right_key` explicitly. Do not create association names that collide with real columns.
|
|
55
53
|
|
|
54
|
+
## Common Fields Standard
|
|
55
|
+
|
|
56
|
+
All new tables in legion-data should follow this column convention. Required fields must be present on every table. Optional fields are added when the domain warrants them.
|
|
57
|
+
|
|
58
|
+
### Required
|
|
59
|
+
|
|
60
|
+
| Column | Type | Purpose |
|
|
61
|
+
|--------|------|---------|
|
|
62
|
+
| `id` | `INTEGER PRIMARY KEY` (auto-increment) | Internal join key — never exposed externally |
|
|
63
|
+
| `identity_principal_id` | `INTEGER` FK → `identity_principals.id` | The principal who caused this row to exist |
|
|
64
|
+
| `identity_id` | `INTEGER` FK → `identities.id` | The specific provider-bound identity credential |
|
|
65
|
+
| `identity_canonical_name` | `VARCHAR(255)` | Denormalized snapshot of the identity's canonical name for fast filtering without joins. This value is a point-in-time copy — it may become stale if the principal is renamed. Use the FK join for authoritative lookups. |
|
|
66
|
+
| `created_at` | `TIMESTAMPTZ` | Row creation time |
|
|
67
|
+
| `updated_at` | `TIMESTAMPTZ` | Last modification time |
|
|
68
|
+
|
|
69
|
+
### Optional (add when applicable)
|
|
70
|
+
|
|
71
|
+
| Column | Type | Purpose |
|
|
72
|
+
|--------|------|---------|
|
|
73
|
+
| `expires_at` | `TIMESTAMPTZ` | TTL / archival eligibility |
|
|
74
|
+
| `content_type` | `VARCHAR(...)` | Classifier for the row's payload kind |
|
|
75
|
+
| `conversation_id` | `INTEGER` FK → `llm_conversations.id` | Links to the LLM conversation that produced this row |
|
|
76
|
+
| `contains_phi` | `BOOLEAN` | Row contains Protected Health Information |
|
|
77
|
+
| `contains_pii` | `BOOLEAN` | Row contains Personally Identifiable Information |
|
|
78
|
+
|
|
79
|
+
### Naming rules
|
|
80
|
+
|
|
81
|
+
- Identity FKs always use `identity_principal_id` and `identity_id` — never `agent_id`, `principal_id`, `user_id`, or other loose variants for new tables.
|
|
82
|
+
- The denormalized string field is always `identity_canonical_name` — not `canonical_name`, `actor`, `agent_id`, or `identity_name`.
|
|
83
|
+
- Existing columns (`agent_id`, `source_agent`, `submitted_by`, `actor`, etc.) on pre-existing tables are **not renamed or removed** — they are historical record and intentionally left as-is. New identity columns are purely additive.
|
|
84
|
+
|
|
56
85
|
## Current Schema Landmarks
|
|
57
86
|
|
|
58
87
|
- `074`-`076`: Apollo field width, task idempotency, extract step timings.
|
|
59
88
|
- `077`-`090`: LLM lifecycle ledger.
|
|
60
89
|
- `091`-`096`: portable identity companion tables.
|
|
61
|
-
- Namespaced models
|
|
90
|
+
- Namespaced models: `Identity::*`, `Apollo::*`, `RBAC::*`, `LLM::*`.
|
|
62
91
|
|
|
63
92
|
## Boundaries
|
|
64
93
|
|
data/README.md
CHANGED
|
@@ -403,6 +403,39 @@ Legion::Data.reload_static_cache
|
|
|
403
403
|
|
|
404
404
|
---
|
|
405
405
|
|
|
406
|
+
## Common Fields Standard
|
|
407
|
+
|
|
408
|
+
All new tables follow a column convention. Required fields are present on every table. Optional fields are added when the domain warrants them.
|
|
409
|
+
|
|
410
|
+
### Required
|
|
411
|
+
|
|
412
|
+
| Column | Type | Notes |
|
|
413
|
+
|--------|------|-------|
|
|
414
|
+
| `id` | `INTEGER PRIMARY KEY` (auto-increment) | Internal join key. Never expose externally — use a `uuid` column for API/log references. |
|
|
415
|
+
| `identity_principal_id` | `INTEGER` FK → `identity_principals.id` | The principal who caused this row to exist. |
|
|
416
|
+
| `identity_id` | `INTEGER` FK → `identities.id` | The specific provider-bound identity credential. |
|
|
417
|
+
| `identity_canonical_name` | `VARCHAR(255)` | Denormalized snapshot of the principal's canonical name. Point-in-time copy — may become stale if the principal is renamed. Use the FK join for authoritative lookups. Exists for fast filtering without joins. |
|
|
418
|
+
| `created_at` | `TIMESTAMPTZ` | Row creation time. |
|
|
419
|
+
| `updated_at` | `TIMESTAMPTZ` | Last modification time. |
|
|
420
|
+
|
|
421
|
+
### Optional (add when applicable)
|
|
422
|
+
|
|
423
|
+
| Column | Type | Notes |
|
|
424
|
+
|--------|------|-------|
|
|
425
|
+
| `expires_at` | `TIMESTAMPTZ` | TTL / archival eligibility. |
|
|
426
|
+
| `content_type` | `VARCHAR(...)` | Classifier for the row's payload kind. |
|
|
427
|
+
| `conversation_id` | `INTEGER` FK → `llm_conversations.id` | Links to the LLM conversation that produced this row. |
|
|
428
|
+
| `contains_phi` | `BOOLEAN` | Row contains Protected Health Information. |
|
|
429
|
+
| `contains_pii` | `BOOLEAN` | Row contains Personally Identifiable Information. |
|
|
430
|
+
|
|
431
|
+
### Naming rules
|
|
432
|
+
|
|
433
|
+
- Identity FKs are always `identity_principal_id` and `identity_id` — not `principal_id`, `agent_id`, `user_id`, or other loose variants on new tables.
|
|
434
|
+
- The denormalized string column is always `identity_canonical_name` — not `canonical_name`, `actor`, `agent_id`, or `identity_name`.
|
|
435
|
+
- **Existing columns on pre-existing tables are never renamed or removed.** Columns like `agent_id`, `source_agent`, `submitted_by`, and `actor` are historical record. The new identity columns are purely additive.
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
406
439
|
## Data Models
|
|
407
440
|
|
|
408
441
|
| Model | Table | Description |
|
|
@@ -441,13 +474,13 @@ The `Legion::Data::Model::Identity::*`, `Apollo::*`, and `RBAC::*` namespaces pr
|
|
|
441
474
|
|
|
442
475
|
| Model | Table | Description |
|
|
443
476
|
|-------|-------|-------------|
|
|
444
|
-
| `Identity::Provider` | `
|
|
445
|
-
| `Identity::ProviderCapability` | `
|
|
446
|
-
| `Identity::Principal` | `
|
|
447
|
-
| `Identity::Identity` | `
|
|
448
|
-
| `Identity::Group` | `
|
|
449
|
-
| `Identity::GroupMembership` | `
|
|
450
|
-
| `Identity::AuditLog` | `
|
|
477
|
+
| `Identity::Provider` | `identity_providers` | Provider records with integer primary keys and public UUIDs |
|
|
478
|
+
| `Identity::ProviderCapability` | `identity_provider_capabilities` | Normalized provider capability declarations |
|
|
479
|
+
| `Identity::Principal` | `identity_principals` | Human, service, worker, or system principals |
|
|
480
|
+
| `Identity::Identity` | `identities` | Provider-bound identities for principals |
|
|
481
|
+
| `Identity::Group` | `identity_groups` | Identity groups |
|
|
482
|
+
| `Identity::GroupMembership` | `identity_group_memberships` | Principal and identity group membership rows |
|
|
483
|
+
| `Identity::AuditLog` | `identity_audit_log` | Identity lifecycle and lookup audit events |
|
|
451
484
|
|
|
452
485
|
### LLM Lifecycle Models
|
|
453
486
|
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Sequel.migration do
|
|
4
|
+
up do
|
|
5
|
+
next if adapter_scheme == :postgres
|
|
6
|
+
|
|
7
|
+
create_table(:apollo_entries) do
|
|
8
|
+
primary_key :id
|
|
9
|
+
String :content, text: true, null: false
|
|
10
|
+
String :content_type, null: false, size: 50
|
|
11
|
+
Float :confidence, default: 0.5
|
|
12
|
+
String :source_agent, null: false, size: 255
|
|
13
|
+
String :source_context, text: true, default: '{}'
|
|
14
|
+
String :tags, text: true, default: '{}'
|
|
15
|
+
String :status, null: false, size: 20, default: 'candidate'
|
|
16
|
+
Integer :access_count, default: 0
|
|
17
|
+
DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
|
|
18
|
+
DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
|
|
19
|
+
DateTime :confirmed_at
|
|
20
|
+
String :source_provider, size: 255
|
|
21
|
+
String :source_channel, size: 100
|
|
22
|
+
String :knowledge_domain, size: 255, default: 'general'
|
|
23
|
+
String :submitted_by, size: 255
|
|
24
|
+
String :submitted_from, size: 255
|
|
25
|
+
String :content_hash, fixed: true, size: 64
|
|
26
|
+
String :summary_l0, size: 500
|
|
27
|
+
String :summary_l1, text: true
|
|
28
|
+
String :knowledge_tier, null: false, size: 4, default: 'L2'
|
|
29
|
+
String :parent_entry_id, size: 36
|
|
30
|
+
DateTime :l0_generated_at
|
|
31
|
+
DateTime :l1_generated_at
|
|
32
|
+
String :parent_knowledge_id, size: 36
|
|
33
|
+
TrueClass :is_latest, null: false, default: true
|
|
34
|
+
String :supersession_type, size: 20
|
|
35
|
+
DateTime :expires_at
|
|
36
|
+
String :forget_reason, size: 255
|
|
37
|
+
TrueClass :is_inference, null: false, default: false
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
create_table(:apollo_entries_archive) do
|
|
41
|
+
primary_key :id
|
|
42
|
+
String :content, text: true, null: false
|
|
43
|
+
String :content_type, null: false, size: 50
|
|
44
|
+
Float :confidence, default: 0.5
|
|
45
|
+
String :source_agent, null: false, size: 255
|
|
46
|
+
String :source_context, text: true, default: '{}'
|
|
47
|
+
String :tags, text: true, default: '{}'
|
|
48
|
+
String :status, null: false, size: 20, default: 'candidate'
|
|
49
|
+
Integer :access_count, default: 0
|
|
50
|
+
DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
|
|
51
|
+
DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
|
|
52
|
+
DateTime :confirmed_at
|
|
53
|
+
String :source_provider, size: 255
|
|
54
|
+
String :source_channel, size: 100
|
|
55
|
+
String :knowledge_domain, size: 255, default: 'general'
|
|
56
|
+
String :submitted_by, size: 255
|
|
57
|
+
String :submitted_from, size: 255
|
|
58
|
+
String :content_hash, fixed: true, size: 64
|
|
59
|
+
String :summary_l0, size: 500
|
|
60
|
+
String :summary_l1, text: true
|
|
61
|
+
String :knowledge_tier, null: false, size: 4, default: 'L2'
|
|
62
|
+
String :parent_entry_id, size: 36
|
|
63
|
+
DateTime :l0_generated_at
|
|
64
|
+
DateTime :l1_generated_at
|
|
65
|
+
String :parent_knowledge_id, size: 36
|
|
66
|
+
TrueClass :is_latest, null: false, default: true
|
|
67
|
+
String :supersession_type, size: 20
|
|
68
|
+
DateTime :expires_at
|
|
69
|
+
String :forget_reason, size: 255
|
|
70
|
+
TrueClass :is_inference, null: false, default: false
|
|
71
|
+
DateTime :archived_at, default: Sequel::CURRENT_TIMESTAMP
|
|
72
|
+
String :archive_reason, text: true
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
down do
|
|
77
|
+
next if adapter_scheme == :postgres
|
|
78
|
+
|
|
79
|
+
drop_table(:apollo_entries_archive) if table_exists?(:apollo_entries_archive)
|
|
80
|
+
drop_table(:apollo_entries) if table_exists?(:apollo_entries)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Sequel.migration do
|
|
4
|
+
up do
|
|
5
|
+
alter_table(:apollo_entries) do
|
|
6
|
+
add_column :access_scope, String, size: 20, null: false, default: 'global'
|
|
7
|
+
add_column :identity_principal_id, Integer, null: true
|
|
8
|
+
add_column :identity_id, Integer, null: true
|
|
9
|
+
add_column :identity_canonical_name, String, size: 255, null: true
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
alter_table(:apollo_entries_archive) do
|
|
13
|
+
add_column :access_scope, String, size: 20, null: false, default: 'global'
|
|
14
|
+
add_column :identity_principal_id, Integer, null: true
|
|
15
|
+
add_column :identity_id, Integer, null: true
|
|
16
|
+
add_column :identity_canonical_name, String, size: 255, null: true
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
down do
|
|
21
|
+
alter_table(:apollo_entries) do
|
|
22
|
+
drop_column :access_scope
|
|
23
|
+
drop_column :identity_principal_id
|
|
24
|
+
drop_column :identity_id
|
|
25
|
+
drop_column :identity_canonical_name
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
alter_table(:apollo_entries_archive) do
|
|
29
|
+
drop_column :access_scope
|
|
30
|
+
drop_column :identity_principal_id
|
|
31
|
+
drop_column :identity_id
|
|
32
|
+
drop_column :identity_canonical_name
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Sequel.migration do
|
|
4
|
+
up do
|
|
5
|
+
run 'CREATE INDEX IF NOT EXISTS idx_apollo_access_scope ON apollo_entries (access_scope)'
|
|
6
|
+
run 'CREATE INDEX IF NOT EXISTS idx_apollo_identity_principal_id ON apollo_entries (identity_principal_id) WHERE identity_principal_id IS NOT NULL'
|
|
7
|
+
run 'CREATE INDEX IF NOT EXISTS idx_apollo_identity_id ON apollo_entries (identity_id) WHERE identity_id IS NOT NULL'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
down do
|
|
11
|
+
run 'DROP INDEX IF EXISTS idx_apollo_access_scope'
|
|
12
|
+
run 'DROP INDEX IF EXISTS idx_apollo_identity_principal_id'
|
|
13
|
+
run 'DROP INDEX IF EXISTS idx_apollo_identity_id'
|
|
14
|
+
end
|
|
15
|
+
end
|
data/lib/legion/data/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-data
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.8.
|
|
4
|
+
version: 1.8.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -246,6 +246,9 @@ files:
|
|
|
246
246
|
- lib/legion/data/migrations/097_add_llm_dispatch_fields.rb
|
|
247
247
|
- lib/legion/data/migrations/098_drop_legacy_identity_tables.rb
|
|
248
248
|
- lib/legion/data/migrations/099_rename_portable_identity_tables.rb
|
|
249
|
+
- lib/legion/data/migrations/100_create_apollo_entries_sqlite.rb
|
|
250
|
+
- lib/legion/data/migrations/101_add_apollo_identity_and_access_scope.rb
|
|
251
|
+
- lib/legion/data/migrations/102_add_apollo_access_scope_and_identity_indexes.rb
|
|
249
252
|
- lib/legion/data/model.rb
|
|
250
253
|
- lib/legion/data/models/apollo/access_log.rb
|
|
251
254
|
- lib/legion/data/models/apollo/entries.rb
|