legion-data 1.7.3 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.pre-commit-config.yaml +29 -0
- data/AGENTS.md +66 -13
- data/CHANGELOG.md +29 -0
- data/CLAUDE.md +44 -307
- data/README.md +119 -7
- data/lib/legion/data/connection.rb +3 -1
- data/lib/legion/data/migrations/077_create_llm_conversations.rb +32 -0
- data/lib/legion/data/migrations/078_create_llm_messages.rb +33 -0
- data/lib/legion/data/migrations/079_create_llm_message_inference_requests.rb +47 -0
- data/lib/legion/data/migrations/080_create_llm_message_inference_responses.rb +39 -0
- data/lib/legion/data/migrations/081_add_llm_message_inference_foreign_keys.rb +17 -0
- data/lib/legion/data/migrations/082_create_llm_route_attempts.rb +31 -0
- data/lib/legion/data/migrations/083_create_llm_message_inference_metrics.rb +36 -0
- data/lib/legion/data/migrations/084_create_llm_tool_calls.rb +32 -0
- data/lib/legion/data/migrations/085_add_llm_message_tool_call_foreign_key.rb +15 -0
- data/lib/legion/data/migrations/086_create_llm_tool_call_attempts.rb +30 -0
- data/lib/legion/data/migrations/087_create_llm_conversation_compactions.rb +31 -0
- data/lib/legion/data/migrations/088_create_llm_policy_evaluations.rb +33 -0
- data/lib/legion/data/migrations/089_create_llm_security_events.rb +33 -0
- data/lib/legion/data/migrations/090_create_llm_registry_events.rb +23 -0
- data/lib/legion/data/migrations/091_create_portable_identity_providers.rb +35 -0
- data/lib/legion/data/migrations/092_create_portable_identity_principals.rb +25 -0
- data/lib/legion/data/migrations/093_create_portable_identities.rb +31 -0
- data/lib/legion/data/migrations/094_create_portable_identity_groups.rb +21 -0
- data/lib/legion/data/migrations/095_create_portable_identity_group_memberships.rb +25 -0
- data/lib/legion/data/migrations/096_create_portable_identity_audit_log.rb +26 -0
- data/lib/legion/data/migrations/097_add_llm_dispatch_fields.rb +16 -0
- data/lib/legion/data/model.rb +11 -1
- data/lib/legion/data/models/apollo/access_log.rb +17 -0
- data/lib/legion/data/models/apollo/entries.rb +22 -0
- data/lib/legion/data/models/apollo/expertise.rb +16 -0
- data/lib/legion/data/models/apollo/model_helpers.rb +17 -0
- data/lib/legion/data/models/apollo/operation.rb +16 -0
- data/lib/legion/data/models/apollo/relation.rb +18 -0
- data/lib/legion/data/models/function.rb +1 -0
- data/lib/legion/data/models/identity/audit_log.rb +20 -0
- data/lib/legion/data/models/identity/group.rb +28 -0
- data/lib/legion/data/models/identity/group_memberships.rb +28 -0
- data/lib/legion/data/models/identity/identity.rb +24 -0
- data/lib/legion/data/models/identity/model_helpers.rb +86 -0
- data/lib/legion/data/models/identity/principal.rb +37 -0
- data/lib/legion/data/models/identity/providers.rb +34 -0
- data/lib/legion/data/models/identity.rb +8 -0
- data/lib/legion/data/models/identity_group.rb +13 -0
- data/lib/legion/data/models/identity_provider.rb +8 -0
- data/lib/legion/data/models/llm/conversation.rb +25 -0
- data/lib/legion/data/models/llm/conversation_compaction.rb +22 -0
- data/lib/legion/data/models/llm/message.rb +105 -0
- data/lib/legion/data/models/llm/message_inference_metric.rb +46 -0
- data/lib/legion/data/models/llm/message_inference_request.rb +80 -0
- data/lib/legion/data/models/llm/message_inference_response.rb +23 -0
- data/lib/legion/data/models/llm/model_helpers.rb +18 -0
- data/lib/legion/data/models/llm/policy_evaluation.rb +20 -0
- data/lib/legion/data/models/llm/registry_event.rb +15 -0
- data/lib/legion/data/models/llm/route_attempt.rb +18 -0
- data/lib/legion/data/models/llm/security_event.rb +66 -0
- data/lib/legion/data/models/llm/tool_call.rb +21 -0
- data/lib/legion/data/models/llm/tool_call_attempt.rb +18 -0
- data/lib/legion/data/models/node.rb +2 -1
- data/lib/legion/data/models/principal.rb +13 -0
- data/lib/legion/data/models/rbac/cross_team_grants.rb +25 -0
- data/lib/legion/data/models/rbac/model_helpers.rb +25 -0
- data/lib/legion/data/models/rbac/role_assignments.rb +25 -0
- data/lib/legion/data/models/rbac/runner_grants.rb +23 -0
- data/lib/legion/data/models/relationship.rb +1 -0
- data/lib/legion/data/models/runner.rb +24 -2
- data/lib/legion/data/models/task.rb +4 -0
- data/lib/legion/data/version.rb +1 -1
- data/scripts/pre-commit-rubocop.sh +16 -0
- metadata +54 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 86680ede4352bc3726face7c12c9b574212747c308a3a90aae923a48d12f6c24
|
|
4
|
+
data.tar.gz: 97e4c063fc98c918ddccd34e39c84756487f7e88c227f91b2c35e337c2d5045c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6286777c6f31b2eef443c76227dd4404bf8cc0509ab385210d62a8c2860d6449586ae54a6c56c7c5c419c46aed92b4409a63db9e5be1e3c0f77290520ff3d423
|
|
7
|
+
data.tar.gz: bc66b1510ca21645de0440ecbbe8ae543139e96e6d393a37d0a47613e31610cb6fc737bf8e104d31b8c61f8bcb4a14f9e7609a9106fdf26384e37bc4a347edb9
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Standard LegionIO pre-commit configuration
|
|
2
|
+
# Install: pre-commit install
|
|
3
|
+
# Manual: pre-commit run --all-files
|
|
4
|
+
repos:
|
|
5
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
6
|
+
rev: v5.0.0
|
|
7
|
+
hooks:
|
|
8
|
+
- id: trailing-whitespace
|
|
9
|
+
- id: end-of-file-fixer
|
|
10
|
+
- id: check-yaml
|
|
11
|
+
- id: check-json
|
|
12
|
+
exclude: Gemfile\.lock
|
|
13
|
+
- id: check-merge-conflict
|
|
14
|
+
|
|
15
|
+
- repo: local
|
|
16
|
+
hooks:
|
|
17
|
+
- id: rubocop
|
|
18
|
+
name: RuboCop (autofix)
|
|
19
|
+
entry: scripts/pre-commit-rubocop.sh
|
|
20
|
+
language: script
|
|
21
|
+
types: [ruby]
|
|
22
|
+
pass_filenames: true
|
|
23
|
+
|
|
24
|
+
- id: ruby-syntax
|
|
25
|
+
name: Ruby syntax check
|
|
26
|
+
entry: bash -c 'status=0; for file in "$@"; do ruby -c "$file" || status=$?; done; exit $status' --
|
|
27
|
+
language: system
|
|
28
|
+
types: [ruby]
|
|
29
|
+
pass_filenames: true
|
data/AGENTS.md
CHANGED
|
@@ -1,24 +1,77 @@
|
|
|
1
1
|
Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing.
|
|
2
2
|
|
|
3
|
-
# legion-data
|
|
3
|
+
# AGENTS.md - legion-data
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
## Repo Role
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
`legion-data` owns persistent storage for LegionIO. Keep this repo focused on database connectivity, Sequel migrations, Sequel models, local SQLite state, extraction persistence, audit/governance storage, identity/RBAC storage, Apollo storage, and the LLM lifecycle ledger.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
HTTP APIs, runtime orchestration, extension behavior, and UI concerns belong in their owning repos. This repo should expose clean model contracts that those layers can call.
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
- `Legion::Data::Model::*` — Sequel model classes
|
|
13
|
-
- `Legion::Data::Local` — local SQLite for agentic state
|
|
14
|
-
- `Legion::Data::Extract` — text extraction from documents (pdf, docx, csv, etc.)
|
|
15
|
-
- `Legion::Data::Spool` — filesystem write buffer for DB-unavailable scenarios
|
|
11
|
+
## Required Commands
|
|
16
12
|
|
|
17
|
-
|
|
13
|
+
Run from the repo root:
|
|
18
14
|
|
|
19
15
|
```bash
|
|
20
|
-
cd /path/to/legion-data
|
|
21
|
-
bundle install
|
|
22
|
-
bundle exec rspec
|
|
23
16
|
bundle exec rubocop -A
|
|
17
|
+
bundle exec rspec --format json --out tmp/rspec_results.json --format progress --out tmp/rspec_progress.txt
|
|
24
18
|
```
|
|
19
|
+
|
|
20
|
+
If RSpec fails, extract failures with:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
jq '[.examples[] | select(.status != "passed") | {file_path, line_number, full_description, status, exception: .exception}]' tmp/rspec_results.json > tmp/rspec_failures.json
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Do not run partial RSpec or partial RuboCop for release validation.
|
|
27
|
+
|
|
28
|
+
## Migration Rules
|
|
29
|
+
|
|
30
|
+
- Never edit published migrations. Add a new migration instead.
|
|
31
|
+
- Do not guard migrations with `create_table?`, `drop_table?`, `table_exists?`, `if_exists`, `if_not_exists`, `next if`, or `next unless`.
|
|
32
|
+
- Keep migrations split by domain and dependency. Do not hide a whole schema rewrite in one large migration.
|
|
33
|
+
- Use portable Sequel DSL by default. Adapter-specific code is acceptable only for adapter-specific features, such as PostgreSQL vector columns.
|
|
34
|
+
- Prefer `id` integer primary keys for joins and `uuid` public identifiers for APIs, logs, and external references.
|
|
35
|
+
- Avoid JSON columns unless the data is genuinely dynamic provider evidence or cannot be normalized without losing meaning.
|
|
36
|
+
|
|
37
|
+
## Sequel Association Rules
|
|
38
|
+
|
|
39
|
+
Use the official Sequel association APIs as the model contract:
|
|
40
|
+
|
|
41
|
+
- Association API reference: https://sequel.jeremyevans.net/rdoc/classes/Sequel/Model/Associations/ClassMethods.html
|
|
42
|
+
- Association basics: https://github.com/jeremyevans/sequel/blob/master/doc/association_basics.rdoc
|
|
43
|
+
|
|
44
|
+
Required mapping:
|
|
45
|
+
|
|
46
|
+
| Schema shape | Sequel association |
|
|
47
|
+
|--------------|--------------------|
|
|
48
|
+
| This table has the foreign key | `many_to_one` |
|
|
49
|
+
| Other table has the foreign key | `one_to_many` or `one_to_one` |
|
|
50
|
+
| Join table connects both sides | `many_to_many` |
|
|
51
|
+
| One associated row through a join table | `one_through_one` |
|
|
52
|
+
|
|
53
|
+
Rules:
|
|
54
|
+
|
|
55
|
+
- Define associations for real foreign-key relationships when adding or changing models.
|
|
56
|
+
- Prefer association methods and association datasets over ad hoc `where(foreign_key: ...)` lookups in model helpers.
|
|
57
|
+
- When names are not inferable, explicitly set `:class`, `:key`, `:primary_key`, `:join_table`, `:left_key`, and `:right_key`.
|
|
58
|
+
- Do not create association names that collide with actual column names; Sequel creates methods using the association name.
|
|
59
|
+
- Keep namespace models aligned with API/domain shape, for example `Legion::Data::Model::Identity::*`, `LLM::*`, `Apollo::*`, and `RBAC::*`.
|
|
60
|
+
|
|
61
|
+
## Current Schema Notes
|
|
62
|
+
|
|
63
|
+
- Migrations currently run through `096`.
|
|
64
|
+
- `074`-`076` are mainline Apollo/task/extract migrations.
|
|
65
|
+
- `077`-`090` define the LLM lifecycle ledger.
|
|
66
|
+
- `091`-`096` define portable identity companion tables.
|
|
67
|
+
- Published PostgreSQL identity migrations remain in place; portable identity tables are additive.
|
|
68
|
+
|
|
69
|
+
## Release Hygiene
|
|
70
|
+
|
|
71
|
+
For behavior, model, migration, or Ruby code changes:
|
|
72
|
+
|
|
73
|
+
- Update `lib/legion/data/version.rb`.
|
|
74
|
+
- Update `CHANGELOG.md`.
|
|
75
|
+
- Update `README.md` when public behavior, schema, configuration, or model surface changes.
|
|
76
|
+
- Keep `.gitignore` ignoring `/Gemfile.lock` and `*.gem`.
|
|
77
|
+
- Do not include generated DBs, logs, coverage output, built gems, or repo-external `/docs` workspace files in commits.
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,35 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [1.8.0] - 2026-05-06
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- Migration 097 adds official LLM dispatch fields for fleet operation, correlation, idempotency, provider instance, and dispatch path tracking.
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- LLM lifecycle Sequel models now live under `Legion::Data::Models::LLM` to match the official data model namespace.
|
|
12
|
+
|
|
13
|
+
## [1.7.5] - 2026-05-05
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
- Migrations 077-090: portable LLM lifecycle schema covering conversations, messages, message inference requests/responses, route attempts, inference metrics, provider-requested tool calls, tool call attempts, conversation compactions, policy evaluations, security events, and registry events.
|
|
17
|
+
- Migrations 091-096: portable identity companion schema with integer primary keys, public UUIDs, normalized provider capabilities, principals, identities, groups, memberships, and audit events.
|
|
18
|
+
- Sequel models and associations for the new `Legion::Data::Models::LLM` lifecycle tables.
|
|
19
|
+
- Nested Sequel model namespaces for Identity, Apollo, and RBAC tables.
|
|
20
|
+
- Lookup helpers for nested and legacy identity models.
|
|
21
|
+
- LLM reconstruction query helpers for audit lineage, finance rollups, security incident lineage, and message-to-tool incident flow.
|
|
22
|
+
- Additional Sequel associations for core execution and identity models, including function/task, relationship/chain, task/worker, task log aliases, and principal/group many-to-many membership helpers.
|
|
23
|
+
|
|
24
|
+
## [1.7.4] - 2026-04-28
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
- Pre-commit RuboCop hook now distinguishes missing tools from real RuboCop failures and propagates failures instead of silently passing.
|
|
28
|
+
- Ruby syntax pre-commit hook now checks every staged Ruby file instead of only the first argument.
|
|
29
|
+
- Connection setup now refreshes the configured adapter before each setup call and clears fallback state on shutdown so fallback health checks do not stay stale across reconnects.
|
|
30
|
+
|
|
31
|
+
### Changed
|
|
32
|
+
- README refreshed for the current migration count, version line, fallback diagnostics, pre-commit workflow, and recent model surface.
|
|
33
|
+
|
|
5
34
|
## [1.7.3] - 2026-04-27
|
|
6
35
|
|
|
7
36
|
### Added
|
data/CLAUDE.md
CHANGED
|
@@ -1,330 +1,67 @@
|
|
|
1
1
|
Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing.
|
|
2
2
|
|
|
3
|
-
# legion-data
|
|
3
|
+
# legion-data
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
- **Parent**: `/Users/miverso2/rubymine/legion/CLAUDE.md`
|
|
5
|
+
`legion-data` is the persistent storage gem for LegionIO. It owns Sequel database connections, numbered migrations, Sequel models, local SQLite state, extract timing persistence, audit/governance storage, identity/RBAC storage, Apollo storage, and the LLM lifecycle ledger.
|
|
7
6
|
|
|
8
|
-
##
|
|
9
|
-
|
|
10
|
-
Manages persistent database storage for the LegionIO framework. Supports SQLite (default), MySQL, and PostgreSQL via Sequel ORM. Provides automatic schema migrations and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), tenants, webhooks, audit log, and archive tables. Also provides a parallel local SQLite database (`Legion::Data::Local`) for agentic cognitive state persistence.
|
|
11
|
-
|
|
12
|
-
**GitHub**: https://github.com/LegionIO/legion-data
|
|
13
|
-
**Version**: 1.6.21
|
|
14
|
-
**License**: Apache-2.0
|
|
15
|
-
|
|
16
|
-
## Supported Databases
|
|
17
|
-
|
|
18
|
-
| Database | Adapter | Gem | Use Case |
|
|
19
|
-
|----------|---------|-----|----------|
|
|
20
|
-
| SQLite | `sqlite` | `sqlite3` (bundled) | Default, dev/test, single-node |
|
|
21
|
-
| MySQL | `mysql2` | `mysql2` (optional) | Production |
|
|
22
|
-
| PostgreSQL | `postgres` | `pg` (optional) | Production |
|
|
23
|
-
|
|
24
|
-
Adapter is set via `Legion::Settings[:data][:adapter]`. All migrations use Sequel DSL for cross-database compatibility.
|
|
25
|
-
|
|
26
|
-
## Architecture
|
|
7
|
+
## Commands
|
|
27
8
|
|
|
9
|
+
```bash
|
|
10
|
+
bundle install
|
|
11
|
+
bundle exec rubocop -A
|
|
12
|
+
bundle exec rspec --format json --out tmp/rspec_results.json --format progress --out tmp/rspec_progress.txt
|
|
28
13
|
```
|
|
29
|
-
Legion::Data (singleton module)
|
|
30
|
-
├── .setup # Connect, migrate, load models, setup cache, setup local
|
|
31
|
-
├── .connection # Sequel database handle (shared/central)
|
|
32
|
-
├── .local # Legion::Data::Local accessor
|
|
33
|
-
├── .stats # Combined { shared: Connection.stats, local: Local.stats }
|
|
34
|
-
├── .reload_static_cache # Refresh in-memory StaticCache after hot-loading extensions
|
|
35
|
-
├── .shutdown # Close both connections
|
|
36
|
-
│
|
|
37
|
-
├── Connection # Sequel database connection management (shared)
|
|
38
|
-
│ ├── .adapter # Reads from settings (sqlite, mysql2, postgres)
|
|
39
|
-
│ ├── .setup # Establish connection (dev_mode fallback to SQLite if network DB unreachable)
|
|
40
|
-
│ ├── .sequel # Raw Sequel::Database accessor
|
|
41
|
-
│ ├── .stats # Pool metrics, tuning snapshot, adapter-specific DB stats
|
|
42
|
-
│ ├── .pool_stats # Connection pool usage (size, available, in_use, waiting)
|
|
43
|
-
│ ├── .shutdown # Close connection
|
|
44
|
-
│ ├── GENERIC_KEYS # Pool options forwarded to Sequel (:max_connections, :pool_timeout, etc.)
|
|
45
|
-
│ ├── ADAPTER_KEYS # Per-adapter option whitelists (sqlite, postgres, mysql2)
|
|
46
|
-
│ ├── ADAPTER_DEFAULTS # Built-in defaults per adapter when user hasn't set a value
|
|
47
|
-
│ ├── SlowQueryLogger # Wraps Legion::Logging with [slow-query] prefix for Sequel warn
|
|
48
|
-
│ └── QueryFileLogger # Thread-safe file logger for query_log mode (~/.legionio/logs/)
|
|
49
|
-
│
|
|
50
|
-
├── Local # Local SQLite database for agentic cognitive state
|
|
51
|
-
│ ├── .setup # Lazy init — creates legionio_local.db on first access
|
|
52
|
-
│ ├── .connection # Sequel::SQLite::Database handle
|
|
53
|
-
│ ├── .connected? # Whether local DB is active
|
|
54
|
-
│ ├── .db_path # Path to the local SQLite file
|
|
55
|
-
│ ├── .model(:table) # Create Sequel::Model bound to local connection
|
|
56
|
-
│ ├── .register_migrations(name:, path:) # Extensions register their migration dirs
|
|
57
|
-
│ ├── .stats # Local SQLite metrics (PRAGMAs, file size, registered migrations)
|
|
58
|
-
│ ├── .shutdown # Close local connection
|
|
59
|
-
│ └── .reset! # Clear all state (testing)
|
|
60
|
-
│
|
|
61
|
-
├── Migration # Auto-migration system (58 migrations, Sequel DSL)
|
|
62
|
-
│ └── migrations/
|
|
63
|
-
│ ├── 001_add_schema_columns
|
|
64
|
-
│ ├── 002_add_nodes
|
|
65
|
-
│ ├── 003_add_settings
|
|
66
|
-
│ ├── 004_add_extensions
|
|
67
|
-
│ ├── 005_add_runners
|
|
68
|
-
│ ├── 006_add_functions
|
|
69
|
-
│ ├── 007_add_default_extensions
|
|
70
|
-
│ ├── 008_add_tasks
|
|
71
|
-
│ ├── 009_add_digital_workers
|
|
72
|
-
│ ├── 010_add_value_metrics
|
|
73
|
-
│ ├── 011_add_extensions_registry
|
|
74
|
-
│ ├── 012_add_apollo_tables # postgres-only: pgvector, uuid-ossp, 4 apollo tables
|
|
75
|
-
│ ├── 013_add_relationships # relationships table with trigger/action FK to functions
|
|
76
|
-
│ ├── 014_add_relationship_columns # delay, chain_id, debug, conditions, transformation, active, allow_new_chains
|
|
77
|
-
│ ├── 015_add_rbac_tables
|
|
78
|
-
│ ├── 016_add_worker_health
|
|
79
|
-
│ ├── 017_add_audit_log
|
|
80
|
-
│ ├── 018_add_governance_events # append-only event store with hash chain
|
|
81
|
-
│ ├── 019_add_audit_hash_chain
|
|
82
|
-
│ ├── 020_add_webhooks
|
|
83
|
-
│ ├── 021_add_archive_tables
|
|
84
|
-
│ ├── 022_add_memory_traces
|
|
85
|
-
│ ├── 023_add_data_archive
|
|
86
|
-
│ ├── 024_add_tenant_partition_columns
|
|
87
|
-
│ ├── 025_add_tenants_table
|
|
88
|
-
│ ├── 026_add_function_embeddings # description + embedding (TEXT) on functions; postgres: embedding_vector vector(1536) with HNSW cosine index
|
|
89
|
-
│ ├── 027_add_apollo_source_provider
|
|
90
|
-
│ ├── 028_add_agent_cluster
|
|
91
|
-
│ ├── 029_add_agent_cluster_tasks
|
|
92
|
-
│ ├── 030_add_approval_queue
|
|
93
|
-
│ ├── 031_add_task_depth
|
|
94
|
-
│ ├── 032_add_task_cancelled_at
|
|
95
|
-
│ ├── 033_add_task_delay
|
|
96
|
-
│ ├── 034_add_archive_manifest
|
|
97
|
-
│ ├── 035_add_apollo_source_channel
|
|
98
|
-
│ ├── 036_add_audit_context_snapshot
|
|
99
|
-
│ ├── 037_add_apollo_knowledge_domain
|
|
100
|
-
│ ├── 038_add_conversations
|
|
101
|
-
│ ├── 039_add_audit_archive_manifest # 7-year tiered audit retention
|
|
102
|
-
│ ├── 040_add_slow_query_indexes # tasks table performance indexes
|
|
103
|
-
│ ├── 041_resize_vector_columns
|
|
104
|
-
│ ├── 042_add_tenant_to_registry_tables
|
|
105
|
-
│ ├── 043_add_rls_placeholder # PostgreSQL row-level security
|
|
106
|
-
│ ├── 044_expand_memory_traces
|
|
107
|
-
│ ├── 045_add_memory_associations
|
|
108
|
-
│ ├── 046_add_metering_hourly_rollup
|
|
109
|
-
│ ├── 047_apollo_knowledge_capture # identity cols, ops table, archive table, 25+ indexes
|
|
110
|
-
│ ├── 048_add_financial_logging # 7 UAIS cost recovery tables (identity, asset, environment, accounting, execution, tags, usage)
|
|
111
|
-
│ ├── 049_add_remote_invocable_to_functions # remote_invocable boolean on functions (v3.0)
|
|
112
|
-
│ ├── 050_add_missing_indexes # critical indexes across 13 tables
|
|
113
|
-
│ ├── 051_fix_tasks_created_at # created_at alias for archival (PG generated, SQLite backfill)
|
|
114
|
-
│ ├── 052_drop_redundant_apollo_indexes # PG only: remove duplicate auto-named indexes
|
|
115
|
-
│ ├── 053_add_tasks_relationship_fk # PG only: FK constraint on tasks.relationship_id
|
|
116
|
-
│ ├── 054_add_component_type_to_functions # component_type on functions (runner/hook/absorber, v3.0)
|
|
117
|
-
│ ├── 055_add_definition_to_functions # definition text column on functions (v3.0)
|
|
118
|
-
│ ├── 056_add_absorber_patterns # absorber_patterns table for pattern-matched acquisition
|
|
119
|
-
│ ├── 057_add_routing_key_to_runners # routing_key on runners (v3.0 AMQP)
|
|
120
|
-
│ └── 058_add_tool_embedding_cache # tool_embedding_cache table for global embedding cache tier (Tools::EmbeddingCache L4)
|
|
121
|
-
│
|
|
122
|
-
├── Model # Sequel model loader
|
|
123
|
-
│ └── Models/
|
|
124
|
-
│ ├── Extension # Installed LEX extensions
|
|
125
|
-
│ ├── Function # Available functions per extension (with trigger/action relationship associations)
|
|
126
|
-
│ ├── Runner # Runner definitions (extension + function bindings)
|
|
127
|
-
│ ├── Node # Cluster node registry
|
|
128
|
-
│ ├── Task # Task instances (belongs_to Relationship, belongs_to DigitalWorker)
|
|
129
|
-
│ ├── TaskLog # Task execution logs
|
|
130
|
-
│ ├── Setting # Persistent settings store
|
|
131
|
-
│ ├── DigitalWorker # Digital worker registry (lifecycle: bootstrap/active/paused/retired/terminated)
|
|
132
|
-
│ ├── Relationship # Task trigger/action relationships between functions (migration 013/014)
|
|
133
|
-
│ ├── ApolloEntry # Apollo knowledge entries — postgres only (pgvector embedding, confidence lifecycle)
|
|
134
|
-
│ ├── ApolloRelation # Weighted relations between Apollo entries — postgres only
|
|
135
|
-
│ ├── ApolloExpertise # Per-agent domain expertise tracking — postgres only
|
|
136
|
-
│ ├── ApolloAccessLog # Apollo entry access audit log — postgres only
|
|
137
|
-
│ ├── AuditLog # Audit trail entries (AMQP + query layer)
|
|
138
|
-
│ ├── RbacRoleAssignment # RBAC principal -> role mappings
|
|
139
|
-
│ ├── RbacRunnerGrant # RBAC per-runner permission grants
|
|
140
|
-
│ └── RbacCrossTeamGrant # RBAC cross-team access grants
|
|
141
|
-
│ Note: value_metrics table (migration 010) is accessed via raw Sequel dataset,
|
|
142
|
-
│ not via a named Sequel::Model subclass.
|
|
143
|
-
│ Note: Apollo models are guarded with `return unless adapter == :postgres` at load time.
|
|
144
|
-
│
|
|
145
|
-
├── Settings # Default DB config with per-adapter credential presets
|
|
146
|
-
└── Version
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
### Key Design Patterns
|
|
150
14
|
|
|
151
|
-
|
|
152
|
-
- **Adapter-Driven**: `Connection.adapter` reads from settings; all adapters (including SQLite) use `Sequel.connect` so all options flow through uniformly
|
|
153
|
-
- **Flat Settings**: all connection/pool/adapter options live directly on `data.*` — legion-data resolves which options apply to the current adapter via `ADAPTER_KEYS` whitelists
|
|
154
|
-
- **Per-Adapter Defaults**: `ADAPTER_DEFAULTS` provides built-in defaults (e.g., sqlite timeout 5000, postgres connect_timeout 20) when user hasn't set a value; nil in settings means "use adapter default"
|
|
155
|
-
- **Dev Mode Fallback**: When `dev_mode: true` and network DB unreachable, shared connection falls back to SQLite (`legionio.db`) with warning log
|
|
156
|
-
- **Connection Health**: `connection_validator` (pings idle connections) and `connection_expiration` (retires old connections) extensions auto-enabled for non-SQLite adapters
|
|
157
|
-
- **Cross-DB Migrations**: Shared migrations use IntegerMigrator (Sequel DSL), local migrations use TimestampMigrator (per-extension registration)
|
|
158
|
-
- **Auto-Migration**: Runs Sequel migrations on startup (`auto_migrate: true` by default)
|
|
159
|
-
- **Sequel ORM**: Shared models are `Sequel::Model` subclasses (inherit global connection). Local models use `Legion::Data::Local.model(:table)` (explicit connection binding).
|
|
160
|
-
- **Two-Tier Caching**: StaticCache (in-process frozen hash, no external deps) for lookup models (Extension, Runner, Function) + external Caching plugin (via `Legion::Cache` — Redis/Memcached/Memory) for dynamic models (Relationship, Node, Setting). Both disabled by default.
|
|
161
|
-
- **Query Log Isolation**: `query_log` flag pipes all SQL to dedicated files (`~/.legionio/logs/data-shared-query.log`, `data-local-query.log`) via `QueryFileLogger` — completely isolated from the `Legion::Logging` domain
|
|
162
|
-
- **Cryptographic Erasure**: Deleting `legionio_local.db` is a hard guarantee — no residual data. Used by `lex-privatecore`.
|
|
163
|
-
- **CLI Executable**: Ships with `legionio_migrate` executable in `exe/` for running database migrations standalone
|
|
15
|
+
RSpec output belongs in `tmp/`. On failure, extract only failures:
|
|
164
16
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
```json
|
|
168
|
-
{
|
|
169
|
-
"adapter": "sqlite",
|
|
170
|
-
"connected": false,
|
|
171
|
-
"dev_mode": false,
|
|
172
|
-
"dev_fallback": true,
|
|
173
|
-
"connect_on_start": true,
|
|
174
|
-
|
|
175
|
-
"max_connections": 25,
|
|
176
|
-
"pool_timeout": 5,
|
|
177
|
-
"preconnect": "concurrently",
|
|
178
|
-
"single_threaded": false,
|
|
179
|
-
"test": true,
|
|
180
|
-
"name": null,
|
|
181
|
-
|
|
182
|
-
"log": false,
|
|
183
|
-
"query_log": false,
|
|
184
|
-
"log_connection_info": false,
|
|
185
|
-
"log_warn_duration": 1,
|
|
186
|
-
"sql_log_level": "debug",
|
|
187
|
-
|
|
188
|
-
"connection_validation": true,
|
|
189
|
-
"connection_validation_timeout": 600,
|
|
190
|
-
"connection_expiration": true,
|
|
191
|
-
"connection_expiration_timeout": 14400,
|
|
192
|
-
|
|
193
|
-
"connect_timeout": null,
|
|
194
|
-
"read_timeout": null,
|
|
195
|
-
"write_timeout": null,
|
|
196
|
-
"encoding": null,
|
|
197
|
-
"sql_mode": null,
|
|
198
|
-
"sslmode": null,
|
|
199
|
-
"sslrootcert": null,
|
|
200
|
-
"search_path": null,
|
|
201
|
-
"timeout": null,
|
|
202
|
-
"readonly": null,
|
|
203
|
-
"disable_dqs": null,
|
|
204
|
-
|
|
205
|
-
"read_replica_url": null,
|
|
206
|
-
"replicas": [],
|
|
207
|
-
|
|
208
|
-
"creds": {
|
|
209
|
-
"database": "legionio.db"
|
|
210
|
-
},
|
|
211
|
-
"migrations": {
|
|
212
|
-
"continue_on_fail": false,
|
|
213
|
-
"auto_migrate": true,
|
|
214
|
-
"ran": false,
|
|
215
|
-
"version": null
|
|
216
|
-
},
|
|
217
|
-
"models": {
|
|
218
|
-
"continue_on_load_fail": false,
|
|
219
|
-
"autoload": true
|
|
220
|
-
},
|
|
221
|
-
"local": {
|
|
222
|
-
"enabled": true,
|
|
223
|
-
"database": "legionio_local.db",
|
|
224
|
-
"query_log": false,
|
|
225
|
-
"migrations": {
|
|
226
|
-
"auto_migrate": true
|
|
227
|
-
}
|
|
228
|
-
},
|
|
229
|
-
"cache": {
|
|
230
|
-
"connected": false,
|
|
231
|
-
"auto_enable": false,
|
|
232
|
-
"static_cache": false,
|
|
233
|
-
"ttl": 60
|
|
234
|
-
},
|
|
235
|
-
"archival": {
|
|
236
|
-
"retention_days": 90,
|
|
237
|
-
"batch_size": 1000,
|
|
238
|
-
"storage_backend": null
|
|
239
|
-
}
|
|
240
|
-
}
|
|
17
|
+
```bash
|
|
18
|
+
jq '[.examples[] | select(.status != "passed") | {file_path, line_number, full_description, status, exception: .exception}]' tmp/rspec_results.json > tmp/rspec_failures.json
|
|
241
19
|
```
|
|
242
20
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
| Adapter | Applied Options | Defaults |
|
|
246
|
-
|---------|----------------|----------|
|
|
247
|
-
| sqlite | `timeout`, `readonly`, `disable_dqs` | `timeout: 5000`, `readonly: false`, `disable_dqs: true` |
|
|
248
|
-
| postgres | `connect_timeout`, `sslmode`, `sslrootcert`, `search_path` | `connect_timeout: 20`, `sslmode: "disable"` |
|
|
249
|
-
| mysql2 | `connect_timeout`, `read_timeout`, `write_timeout`, `encoding`, `sql_mode` | `connect_timeout: 120`, `encoding: "utf8mb4"` |
|
|
21
|
+
## Architecture
|
|
250
22
|
|
|
251
|
-
|
|
23
|
+
- `lib/legion/data/connection.rb`: shared Sequel connection setup, diagnostics, fallback handling, query logging.
|
|
24
|
+
- `lib/legion/data/migration.rb`: numbered Sequel migrations.
|
|
25
|
+
- `lib/legion/data/model.rb`: shared model loader.
|
|
26
|
+
- `lib/legion/data/models/`: flat and namespaced Sequel model classes.
|
|
27
|
+
- `lib/legion/data/local.rb`: local SQLite database for on-node state.
|
|
28
|
+
- `lib/legion/data/extract.rb`: text extraction and persisted extract step timings.
|
|
29
|
+
- `lib/legion/data/spool.rb`: filesystem write buffer when DB writes are unavailable.
|
|
252
30
|
|
|
253
|
-
|
|
31
|
+
## Migration Rules
|
|
254
32
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
33
|
+
- Never edit published migrations. Add a new migration.
|
|
34
|
+
- Do not guard migrations with `create_table?`, `drop_table?`, `table_exists?`, `if_exists`, `if_not_exists`, `next if`, or `next unless`.
|
|
35
|
+
- Keep migrations small enough to diagnose and roll back. Split by domain and dependency.
|
|
36
|
+
- Use portable Sequel DSL unless the feature truly requires adapter-specific behavior.
|
|
37
|
+
- Use integer `id` primary keys for joins and public `uuid` columns for APIs/logs/external references.
|
|
38
|
+
- Normalize stable fields. Use JSON only for genuinely dynamic provider payloads or evidence.
|
|
259
39
|
|
|
260
|
-
|
|
40
|
+
## Sequel ORM Rules
|
|
261
41
|
|
|
262
|
-
|
|
263
|
-
- **sqlite**: `{ database: "legionio.db" }`
|
|
264
|
-
- **mysql2**: `{ username: "legion", password: "legion", database: "legionio", host: "127.0.0.1", port: 3306 }`
|
|
265
|
-
- **postgres**: `{ user: "legion", password: "legion", database: "legionio", host: "127.0.0.1", port: 5432 }`
|
|
42
|
+
Use Sequel associations as the object graph. References:
|
|
266
43
|
|
|
267
|
-
|
|
44
|
+
- https://sequel.jeremyevans.net/rdoc/classes/Sequel/Model/Associations/ClassMethods.html
|
|
45
|
+
- https://github.com/jeremyevans/sequel/blob/master/doc/association_basics.rdoc
|
|
268
46
|
|
|
269
|
-
|
|
270
|
-
|-----|---------|
|
|
271
|
-
| `sequel` (>= 5.70) | ORM and migration framework |
|
|
272
|
-
| `sqlite3` (>= 2.0) | SQLite adapter (default, bundled) |
|
|
273
|
-
| `mysql2` (>= 0.5.5) | MySQL adapter (optional) |
|
|
274
|
-
| `pg` (>= 1.5) | PostgreSQL adapter (optional) |
|
|
275
|
-
| `legion-logging` | Logging |
|
|
276
|
-
| `legion-settings` | Configuration |
|
|
47
|
+
Association mapping:
|
|
277
48
|
|
|
278
|
-
|
|
49
|
+
- Foreign key on this model: `many_to_one`.
|
|
50
|
+
- Foreign key on the associated model: `one_to_many` or `one_to_one`.
|
|
51
|
+
- Join table between models: `many_to_many`.
|
|
52
|
+
- Single associated record through a join table: `one_through_one`.
|
|
279
53
|
|
|
280
|
-
|
|
281
|
-
|------|---------|
|
|
282
|
-
| `lib/legion/data.rb` | Module entry, setup/shutdown lifecycle |
|
|
283
|
-
| `lib/legion/data/connection.rb` | Sequel database connection (adapter selection) |
|
|
284
|
-
| `lib/legion/data/migration.rb` | Migration runner |
|
|
285
|
-
| `lib/legion/data/migrations/` | 58 numbered migration files (Sequel DSL) |
|
|
286
|
-
| `lib/legion/data/model.rb` | Model autoloader |
|
|
287
|
-
| `lib/legion/data/local.rb` | Local SQLite module for agentic cognitive state |
|
|
288
|
-
| `lib/legion/data/models/` | Sequel models (Extension, Function, Runner, Node, Task, TaskLog, Setting, DigitalWorker, Relationship, ApolloEntry, ApolloRelation, ApolloExpertise, ApolloAccessLog, AuditLog, RbacRoleAssignment, RbacRunnerGrant, RbacCrossTeamGrant) |
|
|
289
|
-
| `lib/legion/data/encryption/cipher.rb` | AES-256-GCM encrypt/decrypt with versioned binary format and AAD |
|
|
290
|
-
| `lib/legion/data/encryption/key_provider.rb` | Vault-backed key derivation with per-tenant scope and local fallback |
|
|
291
|
-
| `lib/legion/data/encryption/sequel_plugin.rb` | Transparent `encrypted_column` DSL for Sequel models |
|
|
292
|
-
| `lib/legion/data/event_store.rb` | Append-only governance event store with hash chain integrity |
|
|
293
|
-
| `lib/legion/data/event_store/projection.rb` | Projection base class, ConsentState, GovernanceTimeline |
|
|
294
|
-
| `lib/legion/data/vector.rb` | Reusable pgvector helpers: `available?`, `cosine_search`, `l2_search`, `ensure_extension!` |
|
|
295
|
-
| `lib/legion/data/storage_tiers.rb` | Hot/warm/cold archival lifecycle: `archive_to_warm`, `export_to_cold`, `stats` |
|
|
296
|
-
| `lib/legion/data/archival.rb` | Archival module entry point and configuration |
|
|
297
|
-
| `lib/legion/data/archival/` | Archival strategy implementations |
|
|
298
|
-
| `lib/legion/data/extract.rb` | 10-handler text extraction registry (txt/md/csv/json/jsonl/html/xlsx/docx/pdf/pptx) |
|
|
299
|
-
| `lib/legion/data/extract/handlers/` | Per-format extraction handlers (base, csv, docx, html, json, jsonl, markdown, pdf, pptx, text, xlsx) |
|
|
300
|
-
| `lib/legion/data/extract/type_detector.rb` | MIME type detection for extract registry |
|
|
301
|
-
| `lib/legion/data/rls.rb` | PostgreSQL row-level security helpers (tenant isolation, session variable) |
|
|
302
|
-
| `lib/legion/data/partition_manager.rb` | Tenant partition management |
|
|
303
|
-
| `lib/legion/data/retention.rb` | Audit retention and archival lifecycle |
|
|
304
|
-
| `lib/legion/data/settings.rb` | Default configuration with per-adapter credential presets |
|
|
305
|
-
| `lib/legion/data/version.rb` | VERSION constant |
|
|
306
|
-
| `exe/legionio_migrate` | CLI executable for running database migrations standalone |
|
|
54
|
+
When Sequel cannot infer names, set `:class`, `:key`, `:primary_key`, `:join_table`, `:left_key`, and `:right_key` explicitly. Do not create association names that collide with real columns.
|
|
307
55
|
|
|
308
|
-
##
|
|
56
|
+
## Current Schema Landmarks
|
|
309
57
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
4. Persistent settings storage
|
|
315
|
-
5. Digital worker registry (AI-as-labor platform)
|
|
316
|
-
6. Task relationship graph (trigger/action chains)
|
|
317
|
-
7. Apollo shared knowledge store (PostgreSQL + pgvector only, used by lex-apollo)
|
|
318
|
-
8. Local SQLite for agentic cognitive state (memory traces, trust scores, dream journals) — always on-node, independent of shared DB
|
|
319
|
-
9. RBAC assignment tables (migrations 015 — role assignments, runner grants, cross-team grants)
|
|
320
|
-
10. Audit log with tamper-evident hash chain (migrations 017, 019)
|
|
321
|
-
11. Governance event store with append-only integrity (migration 018)
|
|
322
|
-
12. Webhook subscription storage (migration 020)
|
|
323
|
-
13. Archive, memory traces, and tenant partition tables (migrations 021–025)
|
|
324
|
-
14. Function embeddings for semantic runner discovery (migration 026 — description + vector columns on functions table)
|
|
325
|
-
15. Financial logging for UAIS cost recovery (migration 048 — 7 tables: identity, asset, environment, accounting, execution, tags, usage rollup)
|
|
326
|
-
16. Global tool embedding cache (migration 058 — `tool_embedding_cache` table, L4 tier for `Legion::Tools::EmbeddingCache`)
|
|
58
|
+
- `074`-`076`: Apollo field width, task idempotency, extract step timings.
|
|
59
|
+
- `077`-`090`: LLM lifecycle ledger.
|
|
60
|
+
- `091`-`096`: portable identity companion tables.
|
|
61
|
+
- Namespaced models exist for `Identity::*`, `Apollo::*`, `RBAC::*`, and `LLM::*`.
|
|
327
62
|
|
|
328
|
-
|
|
63
|
+
## Boundaries
|
|
329
64
|
|
|
330
|
-
|
|
65
|
+
- REST APIs belong in LegionIO, not this gem.
|
|
66
|
+
- Extension runtime behavior belongs in the owning extension repos.
|
|
67
|
+
- Do not commit generated DBs, logs, coverage output, built gems, or workspace `/docs` files from outside this repo.
|