legion-data 1.6.27 → 1.6.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2fca06857473b0c009e6ab0ca7044f09b394ce3652adad2b35e57835bceb5eb
4
- data.tar.gz: ea80dac3fee5e54500534c5399c4f1c5b47b9a68ee6c0d0bf0d5ebe344053531
3
+ metadata.gz: 15cf490b7bb437d183d3e9a0c5292d38953cee6ed85dd51b7e4a38cdbac1c19f
4
+ data.tar.gz: d95703e13bc0076b3432112d41e92992514b54312b6c9dad1b5c9f46f7db1949
5
5
  SHA512:
6
- metadata.gz: '009fcbd9de6e5dd5d8e74d1ccf2d21c8692a930ae15369ae19fccb2a5f7829982ba6bf79757c66af25d4cce32c5caaf4b8350316e98261676e1bc532485302d4'
7
- data.tar.gz: d36ede441cdf282214ff9abc7836f2ba07f2eb02bce839f884bf074a7cacea3336ddfdb0182ff87c4e1ba63e54f0bc26c3d9eb1769b607474cc3a32197c814fd
6
+ metadata.gz: 925e30b597e82b14ce9f260a239a53c98f3b628682e125465668e8e86a6681d2811aec9c2b71463d8b6ecb0993e5ead76652b00e6249c416d030ed08deeb323d
7
+ data.tar.gz: 210dbeb6e2e3a545842b2523c43a1f3fe3e991a8043351e6b55a1a42f3113568c8e59a2f19d8680874fe09c1b2a848da7fd25d0416e3d1180932228d840882ac
data/.github/CODEOWNERS CHANGED
@@ -1,7 +1 @@
1
- # Auto-generated from team-config.yml
2
- # Team: core
3
- #
4
- # To apply: scripts/apply-codeowners.sh legion-data
5
-
6
- * @LegionIO/maintainers
7
- * @LegionIO/core
1
+ * @Esity @LegionIO/core
@@ -6,12 +6,18 @@ on:
6
6
  schedule:
7
7
  - cron: '0 9 * * 1'
8
8
 
9
+ permissions: {}
10
+
9
11
  jobs:
10
12
  ci:
13
+ permissions:
14
+ contents: read
11
15
  uses: LegionIO/.github/.github/workflows/ci.yml@main
12
16
 
13
17
  ci-postgres:
14
18
  name: "RSpec (PostgreSQL)"
19
+ permissions:
20
+ contents: read
15
21
  timeout-minutes: 15
16
22
  runs-on: ubuntu-latest
17
23
  services:
@@ -45,24 +51,43 @@ jobs:
45
51
  run: bundle exec rspec
46
52
 
47
53
  lint:
54
+ permissions:
55
+ checks: write
56
+ contents: read
57
+ pull-requests: read
48
58
  uses: LegionIO/.github/.github/workflows/lint-patterns.yml@main
49
59
 
50
60
  security:
61
+ permissions:
62
+ contents: read
63
+ security-events: write
51
64
  uses: LegionIO/.github/.github/workflows/security-scan.yml@main
52
65
 
53
66
  version-changelog:
67
+ permissions:
68
+ contents: read
69
+ pull-requests: read
54
70
  uses: LegionIO/.github/.github/workflows/version-changelog.yml@main
55
71
 
56
72
  dependency-review:
73
+ permissions:
74
+ contents: read
75
+ pull-requests: write
57
76
  uses: LegionIO/.github/.github/workflows/dependency-review.yml@main
58
77
 
59
78
  stale:
60
79
  if: github.event_name == 'schedule'
80
+ permissions:
81
+ issues: write
82
+ pull-requests: write
61
83
  uses: LegionIO/.github/.github/workflows/stale.yml@main
62
84
 
63
85
  release:
64
86
  needs: [ci, ci-postgres, lint]
65
87
  if: github.event_name == 'push' && github.ref == 'refs/heads/main'
88
+ permissions:
89
+ contents: write
90
+ packages: write
66
91
  uses: LegionIO/.github/.github/workflows/release.yml@main
67
92
  secrets:
68
93
  rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }}
data/.gitignore CHANGED
@@ -16,6 +16,8 @@ legionio.key
16
16
  # logs and OS artifacts
17
17
  legion.log
18
18
  .DS_Store
19
+ # gem build artifacts
20
+ *.gem
19
21
  # SQLite database files
20
22
  *.db
21
23
  .worktrees
data/AGENTS.md ADDED
@@ -0,0 +1,24 @@
1
+ Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing.
2
+
3
+ # legion-data
4
+
5
+ `legion-data` is the persistent database storage gem for the LegionIO async job engine framework. It provides database connectivity via the Sequel ORM, automatic schema migrations (70+ numbered migrations), and Sequel models for the full LegionIO control plane: extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), RBAC, tenants, audit log, governance events, and archive tables.
6
+
7
+ It also ships a parallel local SQLite database (`Legion::Data::Local`) for on-node agentic cognitive state persistence (memory traces, trust scores, etc.), independent of the shared database.
8
+
9
+ ## Key entry points
10
+
11
+ - `Legion::Data.setup` — connect, migrate, load models, set up local DB
12
+ - `Legion::Data::Model::*` — Sequel model classes
13
+ - `Legion::Data::Local` — local SQLite for agentic state
14
+ - `Legion::Data::Extract` — text extraction from documents (pdf, docx, csv, etc.)
15
+ - `Legion::Data::Spool` — filesystem write buffer for DB-unavailable scenarios
16
+
17
+ ## Testing
18
+
19
+ ```bash
20
+ cd /path/to/legion-data
21
+ bundle install
22
+ bundle exec rspec
23
+ bundle exec rubocop -A
24
+ ```
data/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.6.29] - 2026-04-17
6
+
7
+ ### Fixed
8
+ - `Connection#log_connection_info`: renamed local variables `user`/`host`/`port`/`db` to `conn_user`/`conn_host`/`conn_port`/`conn_db` to avoid shadowing outer-scope names and resolve `rb/uninitialized-local-variable` CodeQL alert
9
+ - CI workflow: added explicit `permissions:` block to all jobs (`contents: read` for checkout jobs, `{}` for reusable workflow calls) to satisfy `actions/missing-workflow-permissions` code scanning alerts
10
+ - Spec: replaced deprecated `raise_exception` matcher with `raise_error` in `connection_spec.rb` and `model_spec.rb`; updated stale test description in `model_spec.rb`
11
+
12
+ ## [1.6.28] - 2026-04-17
13
+
14
+ ### Changed
15
+ - `legion-json` added as explicit gemspec runtime dependency — `Legion::JSON` is used throughout and was previously only an implicit transitive dependency
16
+ - Rewrote `README.md` with accurate architecture diagram, full model table, migration history, configuration reference, and usage examples
17
+ - Updated `CLAUDE.md` with mandatory `bundle exec rspec` + `bundle exec rubocop -A` reminder for AI agents
18
+ - Added `AGENTS.md` with mandatory rspec/rubocop reminder and gem overview
19
+ - Updated `.github/CODEOWNERS` to `@Esity @LegionIO/core`
20
+ - Added `*.gem` to `.gitignore` to prevent build artifacts from being committed
21
+ - Removed `sonar-project.properties`
22
+
5
23
  ## [1.6.27] - 2026-04-17
6
24
 
7
25
  ### Fixed
data/CLAUDE.md CHANGED
@@ -1,3 +1,5 @@
1
+ Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing.
2
+
1
3
  # legion-data: Persistent Storage for LegionIO
2
4
 
3
5
  **Repository Level 3 Documentation**
data/README.md CHANGED
@@ -1,18 +1,22 @@
1
1
  # legion-data
2
2
 
3
- Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Provides database connectivity via Sequel ORM, automatic schema migrations (47 numbered migrations), and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), tenants, audit log, and archive tables.
3
+ Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) async job engine and AI coding assistant platform. Provides database connectivity via the [Sequel ORM](https://sequel.jeremyevans.net/), automatic schema migrations (71 numbered migrations), Sequel models for the full LegionIO control plane, and a parallel local SQLite database for on-node agentic cognitive state.
4
4
 
5
- **Version**: 1.6.6
5
+ **Version**: 1.6.25 | **Ruby**: >= 3.4 | **License**: Apache-2.0
6
+
7
+ ---
6
8
 
7
9
  ## Supported Databases
8
10
 
9
11
  | Database | Adapter | Gem | Default |
10
12
  |----------|---------|-----|---------|
11
- | SQLite | `sqlite` | `sqlite3` (included) | Yes |
12
- | MySQL | `mysql2` | `mysql2` | No |
13
- | PostgreSQL | `postgres` | `pg` | No |
13
+ | SQLite | `sqlite` | `sqlite3` (bundled) | Yes |
14
+ | MySQL | `mysql2` | `mysql2` (optional) | No |
15
+ | PostgreSQL | `postgres` | `pg` (optional) | No |
16
+
17
+ SQLite is the default and requires no additional gems. For MySQL or PostgreSQL, install the corresponding gem and configure the adapter.
14
18
 
15
- SQLite is the default adapter. For MySQL or PostgreSQL, install the corresponding gem and set the adapter in your configuration.
19
+ ---
16
20
 
17
21
  ## Installation
18
22
 
@@ -20,86 +24,130 @@ SQLite is the default adapter. For MySQL or PostgreSQL, install the correspondin
20
24
  gem install legion-data
21
25
  ```
22
26
 
23
- Or add to your Gemfile:
27
+ Or add to your `Gemfile`:
24
28
 
25
29
  ```ruby
26
30
  gem 'legion-data'
27
31
 
28
- # Add one of these for production databases:
32
+ # For production databases, add one of these:
29
33
  # gem 'mysql2', '>= 0.5.5'
30
34
  # gem 'pg', '>= 1.5'
31
35
  ```
32
36
 
33
- ## Data Models
37
+ ---
34
38
 
35
- | Model | Table | Description |
36
- |-------|-------|-------------|
37
- | `Extension` | `extensions` | Installed LEX extensions |
38
- | `Function` | `functions` | Available functions per extension |
39
- | `Runner` | `runners` | Runner definitions |
40
- | `Node` | `nodes` | Cluster node registry |
41
- | `Task` | `tasks` | Task instances |
42
- | `TaskLog` | `task_logs` | Task execution logs |
43
- | `Setting` | `settings` | Persistent settings store |
44
- | `DigitalWorker` | `digital_workers` | Digital worker registry |
45
- | `Relationship` | `relationships` | Task trigger/action relationships between functions |
46
- | `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain |
47
- | `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings |
48
- | `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants |
49
- | `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants |
50
- | `ApolloEntry` | `apollo_entries` | Apollo knowledge entries — PostgreSQL only (pgvector) |
51
- | `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only |
52
- | `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only |
53
- | `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only |
39
+ ## Architecture Overview
54
40
 
55
- Apollo models require PostgreSQL with the `pgvector` extension. They are skipped silently on SQLite and MySQL.
41
+ ```
42
+ Legion::Data (singleton module)
43
+ ├── .setup # Connect, migrate, load models, set up local DB
44
+ ├── .connection # Sequel::Database handle (shared/central)
45
+ ├── .local # Legion::Data::Local (local SQLite accessor)
46
+ ├── .stats # Combined { shared: ..., local: ... } metrics
47
+ ├── .reload_static_cache # Refresh in-memory StaticCache after extension hot-load
48
+ ├── .shutdown # Close both shared and local connections
49
+
50
+ ├── Connection # Sequel database connection management
51
+ │ ├── .adapter # Reads adapter from settings (:sqlite, :mysql2, :postgres)
52
+ │ ├── .setup # Establish connection (dev_mode fallback to SQLite if unreachable)
53
+ │ ├── .sequel # Raw Sequel::Database accessor
54
+ │ ├── .stats # Pool metrics, tuning snapshot, adapter-specific DB stats
55
+ │ └── .shutdown # Disconnect and close query file logger
56
+
57
+ ├── Migration # Auto-migration system (71 numbered Sequel DSL migrations)
58
+
59
+ ├── Model # Sequel model autoloader
60
+ │ └── Models: Extension, Function, Runner, Node, Task, TaskLog, Setting,
61
+ │ DigitalWorker, Relationship, AuditLog, AuditRecord, Chain,
62
+ │ RbacRoleAssignment, RbacRunnerGrant, RbacCrossTeamGrant,
63
+ │ IdentityProvider, Principal, Identity, IdentityGroup,
64
+ │ IdentityGroupMembership,
65
+ │ ApolloEntry, ApolloRelation, ApolloExpertise, ApolloAccessLog (PG only)
66
+
67
+ ├── Local # Parallel local SQLite for agentic cognitive state
68
+ │ ├── .setup # Lazy init — creates legionio_local.db on first access
69
+ │ ├── .connection # Sequel::SQLite::Database handle
70
+ │ ├── .model(:table) # Create Sequel::Model bound to local connection
71
+ │ ├── .register_migrations(name:, path:) # Extensions add their own migration dirs
72
+ │ ├── .stats # Local SQLite metrics (PRAGMAs, file size, registered migrations)
73
+ │ └── .shutdown # Close local connection
74
+
75
+ ├── Extract # 10-handler text extraction registry (txt/md/csv/json/jsonl/html/xlsx/docx/pdf/pptx/vtt)
76
+ ├── Spool # Filesystem write buffer for DB-unavailable scenarios
77
+ ├── Rls # PostgreSQL row-level security helpers (tenant isolation)
78
+ ├── StorageTiers # Hot/warm/cold archival lifecycle
79
+ ├── EventStore # Append-only governance event store with hash chain integrity
80
+ ├── Vector # Reusable pgvector helpers (cosine_search, l2_search, ensure_extension!)
81
+ └── Settings # Default configuration with per-adapter credential presets
82
+ ```
83
+
84
+ ### Two-Database Architecture
85
+
86
+ `legion-data` maintains two independent databases:
87
+
88
+ 1. **Shared DB** (SQLite / MySQL / PostgreSQL) — control plane data: extensions, tasks, runners, nodes, settings, audit logs, relationships. Shared across the cluster.
89
+ 2. **Local DB** (always SQLite) — agentic cognitive state: memory traces, trust scores, dream journals. On-node only; no cross-database joins.
90
+
91
+ Deleting `legionio_local.db` provides cryptographic erasure — no residual data.
92
+
93
+ ---
56
94
 
57
95
  ## Usage
58
96
 
59
97
  ```ruby
60
98
  require 'legion/data'
61
99
 
62
- # Standard setup (shared DB + local SQLite)
100
+ # Set up shared DB + local SQLite, run migrations, load models
63
101
  Legion::Data.setup
64
- Legion::Data.connection # => Sequel::Database (shared)
65
- Legion::Data.local.connection # => Sequel::SQLite::Database (local cognitive state)
102
+
103
+ # Access the Sequel database handle
104
+ Legion::Data.connection # => Sequel::Database
105
+
106
+ # Access models
66
107
  Legion::Data::Model::Extension.all # => Sequel::Dataset
108
+ Legion::Data::Model::Task.first(id: 42)
109
+ Legion::Data::Model::Setting.where(key: 'my_setting').first
110
+
111
+ # Access local cognitive state DB
112
+ Legion::Data.local.connection # => Sequel::SQLite::Database
113
+ Legion::Data.local.connected? # => true
114
+ Legion::Data.local.db_path # => "legionio_local.db"
115
+
116
+ # Check connection health
117
+ Legion::Data.connected? # => true
118
+ Legion::Data.stats # => { shared: {...}, local: {...} }
119
+
120
+ # Shut down both connections
121
+ Legion::Data.shutdown
67
122
  ```
68
123
 
69
- ### Local Database
124
+ ### Local Database (Agentic Cognitive State)
70
125
 
71
- `Legion::Data::Local` is a parallel SQLite database always stored locally on the node. Used for agentic cognitive state persistence (memory traces, trust scores, dream journals) and is independent of the shared database.
126
+ Extensions register their own migration directories and create models bound to the local connection:
72
127
 
73
128
  ```ruby
74
- # Local DB is set up automatically during Legion::Data.setup
75
- # Extensions register their own migration directories
129
+ # Register extension migrations (called during extension setup)
76
130
  Legion::Data::Local.register_migrations(name: :memory, path: '/path/to/migrations')
77
131
 
78
- # Create a model bound to the local connection
79
- MyModel = Legion::Data::Local.model(:my_table)
80
-
81
- # Check status
82
- Legion::Data::Local.connected? # => true
83
- Legion::Data::Local.db_path # => "legionio_local.db"
132
+ # Create a model class bound to the local DB
133
+ MyMemoryTrace = Legion::Data::Local.model(:memory_traces)
134
+ MyMemoryTrace.all # queries legionio_local.db, never the shared DB
84
135
  ```
85
136
 
86
- Deleting `legionio_local.db` provides cryptographic erasure — no residual data.
87
-
88
137
  ### Text Extraction
89
138
 
90
- `Legion::Data::Extract` provides a 10-handler registry for extracting text from documents. Supports: `.txt`, `.md`, `.csv`, `.json`, `.jsonl`, `.html`, `.xlsx`, `.docx`, `.pdf`, `.pptx`. Used by `lex-knowledge` for corpus ingestion.
139
+ `Legion::Data::Extract` provides a handler registry for extracting text from documents, used by `lex-knowledge` for corpus ingestion:
91
140
 
92
141
  ```ruby
93
142
  text = Legion::Data::Extract.extract('/path/to/document.pdf')
143
+ text = Legion::Data::Extract.extract('/path/to/data.csv')
94
144
  ```
95
145
 
96
- ### Row-Level Security
97
-
98
- `Legion::Data::Rls` provides tenant isolation helpers for PostgreSQL (migration 043). Sets `app.current_tenant_id` session variable before queries and resets it after.
146
+ Supported formats: `.txt`, `.md`, `.csv`, `.json`, `.jsonl`, `.html`, `.xlsx`, `.docx`, `.pdf`, `.pptx`, `.vtt`
99
147
 
100
- ### Spool (Filesystem Buffer)
148
+ ### Filesystem Spool (Write Buffer)
101
149
 
102
- `Legion::Data::Spool` provides a filesystem-backed write buffer. When the database is unavailable, data is written to `~/.legionio/data/spool/` and replayed once the connection is restored.
150
+ When the database is unavailable, `Legion::Data::Spool` buffers writes to `~/.legionio/data/spool/` and replays once the connection is restored:
103
151
 
104
152
  ```ruby
105
153
  spool = Legion::Data::Spool.for(Legion::Extensions::MyLex)
@@ -107,8 +155,30 @@ spool.write({ task_id: SecureRandom.uuid, data: payload })
107
155
  spool.drain { |entry| process(entry) }
108
156
  ```
109
157
 
158
+ ### Row-Level Security (PostgreSQL)
159
+
160
+ `Legion::Data::Rls` provides tenant isolation via PostgreSQL session variables (migration 043):
161
+
162
+ ```ruby
163
+ Legion::Data::Rls.with_tenant(tenant_id) do
164
+ Legion::Data::Model::Task.all # scoped to tenant_id via RLS policy
165
+ end
166
+ ```
167
+
168
+ ### Permission Checks
169
+
170
+ ```ruby
171
+ Legion::Data.can_write?(:tasks) # => true (SQLite always true)
172
+ Legion::Data.can_read?(:tasks) # => true
173
+ Legion::Data.reset_privileges! # clear cached privilege checks
174
+ ```
175
+
176
+ ---
177
+
110
178
  ## Configuration
111
179
 
180
+ All settings live under the `data` key. The adapter controls which options apply.
181
+
112
182
  ### SQLite (default)
113
183
 
114
184
  ```json
@@ -163,17 +233,56 @@ CREATE EXTENSION IF NOT EXISTS vector;
163
233
  CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
164
234
  ```
165
235
 
166
- ### Local Database
236
+ ### Full Configuration Reference
167
237
 
168
238
  ```json
169
239
  {
170
240
  "data": {
241
+ "adapter": "sqlite",
242
+ "connected": false,
243
+ "dev_mode": false,
244
+ "dev_fallback": true,
245
+ "connect_on_start": true,
246
+
247
+ "max_connections": 25,
248
+ "pool_timeout": 5,
249
+ "preconnect": "concurrently",
250
+ "single_threaded": false,
251
+ "test": true,
252
+
253
+ "log": false,
254
+ "query_log": false,
255
+ "log_warn_duration": 1,
256
+ "sql_log_level": "debug",
257
+
258
+ "connection_validation": true,
259
+ "connection_validation_timeout": 600,
260
+ "connection_expiration": true,
261
+ "connection_expiration_timeout": 14400,
262
+
263
+ "read_replica_url": null,
264
+ "replicas": [],
265
+
266
+ "creds": { "database": "legionio.db" },
267
+
268
+ "migrations": {
269
+ "continue_on_fail": false,
270
+ "auto_migrate": true
271
+ },
272
+ "models": {
273
+ "continue_on_load_fail": false,
274
+ "autoload": true
275
+ },
171
276
  "local": {
172
277
  "enabled": true,
173
278
  "database": "legionio_local.db",
174
- "migrations": {
175
- "auto_migrate": true
176
- }
279
+ "migrations": { "auto_migrate": true }
280
+ },
281
+ "cache": {
282
+ "connected": false,
283
+ "auto_enable": false,
284
+ "static_cache": false,
285
+ "ttl": 60
177
286
  }
178
287
  }
179
288
  }
@@ -181,25 +290,160 @@ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
181
290
 
182
291
  ### Dev Mode Fallback
183
292
 
184
- When `dev_mode: true` and a network database is unreachable, the shared connection falls back to SQLite automatically:
293
+ When `dev_mode: true` and a network database is unreachable, the shared connection automatically falls back to SQLite:
294
+
295
+ ```json
296
+ { "data": { "dev_mode": true, "dev_fallback": true } }
297
+ ```
298
+
299
+ ### HashiCorp Vault Integration
300
+
301
+ When Vault is connected, credentials are fetched dynamically from `database/creds/legion`, overriding any static `creds` block.
302
+
303
+ ### Caching
304
+
305
+ Two independent caching tiers, both disabled by default:
306
+
307
+ | Tier | Setting | Models | Backend |
308
+ |------|---------|--------|---------|
309
+ | **StaticCache** | `data.cache.static_cache: true` | Extension, Runner, Function | In-process frozen Ruby hash |
310
+ | **External Cache** | `data.cache.auto_enable: true` + `Legion::Cache` | Relationship, Node, Setting | Redis/Memcached/Memory |
311
+
312
+ ```ruby
313
+ # After hot-loading extensions, refresh the static cache:
314
+ Legion::Data.reload_static_cache
315
+ ```
316
+
317
+ ### Read Replicas (PostgreSQL)
185
318
 
186
319
  ```json
187
320
  {
188
321
  "data": {
189
- "dev_mode": true,
190
- "dev_fallback": true
322
+ "read_replica_url": "postgres://user:pass@replica1/db",
323
+ "replicas": ["postgres://user:pass@replica2/db"]
191
324
  }
192
325
  }
193
326
  ```
194
327
 
195
- ### HashiCorp Vault Integration
328
+ ---
196
329
 
197
- When Vault is connected, credentials are fetched dynamically from `database/creds/legion`, overriding any static `creds` configuration.
330
+ ## Data Models
198
331
 
199
- ## Requirements
332
+ | Model | Table | Description |
333
+ |-------|-------|-------------|
334
+ | `Extension` | `extensions` | Installed LEX extensions |
335
+ | `Function` | `functions` | Available functions per extension (with embeddings) |
336
+ | `Runner` | `runners` | Runner definitions (AMQP routing keys) |
337
+ | `Node` | `nodes` | Cluster node registry |
338
+ | `Task` | `tasks` | Task instances |
339
+ | `TaskLog` | `task_logs` | Task execution logs |
340
+ | `Setting` | `settings` | Persistent settings store |
341
+ | `DigitalWorker` | `digital_workers` | Digital worker registry |
342
+ | `Relationship` | `relationships` | Task trigger/action chains between functions |
343
+ | `Chain` | `chains` | Task execution chains |
344
+ | `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain |
345
+ | `AuditRecord` | `audit_records` | Structured audit records |
346
+ | `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings |
347
+ | `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants |
348
+ | `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants |
349
+ | `IdentityProvider` | `identity_providers` | Identity provider registrations |
350
+ | `Principal` | `principals` | Authentication principals |
351
+ | `Identity` | `identities` | Identity records tied to principals |
352
+ | `IdentityGroup` | `identity_groups` | Identity groups |
353
+ | `IdentityGroupMembership` | `identity_group_memberships` | Group membership records |
354
+ | `ApolloEntry` | `apollo_entries` | Knowledge entries — PostgreSQL only (pgvector) |
355
+ | `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only |
356
+ | `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only |
357
+ | `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only |
358
+
359
+ Apollo models require PostgreSQL with the `pgvector` extension. They are skipped silently on SQLite and MySQL.
360
+
361
+ ---
362
+
363
+ ## Dependencies
364
+
365
+ | Gem | Purpose |
366
+ |-----|---------|
367
+ | `sequel` (>= 5.70) | ORM and migration framework |
368
+ | `sqlite3` (>= 2.0) | SQLite adapter (default, bundled) |
369
+ | `csv` (>= 3.2) | CSV extraction handler |
370
+ | `legion-json` | JSON serialization via Legion::JSON |
371
+ | `legion-logging` (>= 1.5.0) | Structured logging |
372
+ | `legion-settings` (>= 1.3.26) | Configuration management |
373
+ | `mysql2` (>= 0.5.5) | MySQL adapter (optional) |
374
+ | `pg` (>= 1.5) | PostgreSQL adapter (optional) |
375
+
376
+ ---
377
+
378
+ ## Migrations
379
+
380
+ 71 numbered Sequel DSL migrations run automatically on startup (`auto_migrate: true`). Key milestones:
381
+
382
+ | Range | What was added |
383
+ |-------|---------------|
384
+ | 001–011 | Core schema: nodes, settings, extensions, runners, functions, tasks, digital workers, value metrics |
385
+ | 012 | Apollo tables (PG only: pgvector, uuid-ossp, 4 tables) |
386
+ | 013–014 | Relationships table with trigger/action FK chains |
387
+ | 015 | RBAC tables |
388
+ | 017–019 | Audit log with tamper-evident hash chain |
389
+ | 020–025 | Webhooks, archive tables, memory traces, tenant partitions |
390
+ | 026 | Function embeddings (description + vector on functions) |
391
+ | 028–030 | Agent clusters and approval queue |
392
+ | 047–048 | Apollo knowledge capture + financial logging (UAIS cost recovery, 7 tables) |
393
+ | 050 | Critical indexes across 13 tables |
394
+ | 058–067 | Audit records, chains, knowledge tiers, tool embedding cache, identity system (providers, principals, identities, groups) |
395
+ | 068–071 | Entity type on audit records, principal on nodes, approval queue resume, engine on relationships |
396
+
397
+ Run migrations standalone:
398
+
399
+ ```bash
400
+ bundle exec legionio_migrate
401
+ ```
402
+
403
+ ---
404
+
405
+ ## CLI Executable
406
+
407
+ `exe/legionio_migrate` runs database migrations standalone, outside the full LegionIO service:
408
+
409
+ ```bash
410
+ bundle exec legionio_migrate
411
+ ```
412
+
413
+ ---
414
+
415
+ ## Role in LegionIO
416
+
417
+ `legion-data` is optional but provides core platform persistence. It initializes during `Legion::Service` startup (after transport). Key responsibilities:
418
+
419
+ 1. Extension and function registry
420
+ 2. Task scheduling, logging, and relationship chains
421
+ 3. Node cluster membership tracking
422
+ 4. Persistent settings storage
423
+ 5. Digital worker registry (AI-as-labor platform)
424
+ 6. RBAC assignment tables
425
+ 7. Audit log with tamper-evident hash chain
426
+ 8. Governance event store with append-only integrity
427
+ 9. Apollo shared knowledge store (PostgreSQL + pgvector, used by `lex-apollo`)
428
+ 10. Local SQLite for agentic cognitive state — always on-node, independent of shared DB
429
+ 11. Financial logging for UAIS cost recovery
430
+ 12. Global tool embedding cache (L4 tier for `Legion::Tools::EmbeddingCache`)
431
+ 13. Unified identity system (providers, principals, identities, groups)
432
+
433
+ ---
434
+
435
+ ## Contributing
436
+
437
+ ```bash
438
+ git clone https://github.com/LegionIO/legion-data
439
+ cd legion-data
440
+ bundle install
441
+ bundle exec rspec # all tests must pass
442
+ bundle exec rubocop -A # zero offenses expected
443
+ ```
200
444
 
201
- - Ruby >= 3.4
445
+ Follow the [LegionIO contribution guide](https://github.com/LegionIO/.github/blob/main/CONTRIBUTING.md). Open a PR against `main`.
202
446
 
203
- ## License
447
+ ---
204
448
 
205
- Apache-2.0
449
+ **Maintained by**: Matthew Iverson ([@Esity](https://github.com/Esity))
data/legion-data.gemspec CHANGED
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
27
27
  }
28
28
 
29
29
  spec.add_dependency 'csv', '>= 3.2'
30
+ spec.add_dependency 'legion-json'
30
31
  spec.add_dependency 'legion-logging', '>= 1.5.0'
31
32
  spec.add_dependency 'legion-settings', '>= 1.3.26'
32
33
  spec.add_dependency 'sequel', '>= 5.70'
@@ -158,7 +158,7 @@ module Legion
158
158
  end
159
159
 
160
160
  def json_dump(obj)
161
- ::JSON.generate(obj)
161
+ Legion::JSON.generate(obj)
162
162
  end
163
163
 
164
164
  def gzip_compress(data)
@@ -316,11 +316,11 @@ module Legion
316
316
  log.info "Connected to SQLite at #{sqlite_path}"
317
317
  else
318
318
  actual = Legion::Settings[:data][:creds] || {}
319
- user = actual[:user] || actual[:username] || 'unknown'
320
- host = actual[:host] || '127.0.0.1'
321
- port = actual[:port]
322
- db = actual[:database] || actual[:db]
323
- log.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
319
+ conn_user = actual[:user] || actual[:username] || 'unknown'
320
+ conn_host = actual[:host] || '127.0.0.1'
321
+ conn_port = actual[:port]
322
+ conn_db = actual[:database] || actual[:db]
323
+ log.info "Connected to #{adapter}://#{conn_user}@#{conn_host}:#{conn_port}/#{conn_db}"
324
324
  end
325
325
  end
326
326
 
@@ -166,10 +166,11 @@ Sequel.migration do
166
166
  ].each do |table, indexes|
167
167
  next unless table_exists?(table)
168
168
 
169
- alter_table(table) do
170
- indexes.each do |idx_name|
171
- drop_index nil, name: idx_name, if_exists: true
172
- end
169
+ existing_indexes = indexes(table).keys
170
+ indexes.each do |idx_name|
171
+ next unless existing_indexes.include?(idx_name)
172
+
173
+ alter_table(table) { drop_index nil, name: idx_name }
173
174
  end
174
175
  end
175
176
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module Data
5
- VERSION = '1.6.27'
5
+ VERSION = '1.6.29'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.27
4
+ version: 1.6.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -23,6 +23,20 @@ dependencies:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
25
  version: '3.2'
26
+ - !ruby/object:Gem::Dependency
27
+ name: legion-json
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
26
40
  - !ruby/object:Gem::Dependency
27
41
  name: legion-logging
28
42
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +108,7 @@ files:
94
108
  - ".github/workflows/ci.yml"
95
109
  - ".gitignore"
96
110
  - ".rubocop.yml"
111
+ - AGENTS.md
97
112
  - CHANGELOG.md
98
113
  - CLAUDE.md
99
114
  - CODEOWNERS
@@ -234,7 +249,6 @@ files:
234
249
  - lib/legion/data/storage_tiers.rb
235
250
  - lib/legion/data/vector.rb
236
251
  - lib/legion/data/version.rb
237
- - sonar-project.properties
238
252
  homepage: https://github.com/LegionIO/legion-data
239
253
  licenses:
240
254
  - Apache-2.0
@@ -1,12 +0,0 @@
1
- sonar.projectKey=legion-io_legion-data
2
- sonar.organization=legion-io
3
- sonar.projectName=Legion::Data
4
- sonar.sources=.
5
- sonar.exclusions=vendor/**
6
- sonar.coverage.exclusions=spec/**
7
- sonar.ruby.coverage.reportPath=coverage/.resultset.json
8
- sonar.ruby.file.suffixes=rb,ruby
9
- sonar.ruby.coverage.framework=RSpec
10
- sonar.ruby.rubocopConfig=.rubocop.yml
11
- sonar.ruby.rubocop.reportPath=rubocop-result.json
12
- sonar.ruby.rubocop.filePath=.