legion-data 1.6.27 → 1.6.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +1 -7
- data/.github/workflows/ci.yml +25 -0
- data/.gitignore +2 -0
- data/AGENTS.md +24 -0
- data/CHANGELOG.md +18 -0
- data/CLAUDE.md +2 -0
- data/README.md +307 -63
- data/legion-data.gemspec +1 -0
- data/lib/legion/data/archiver.rb +1 -1
- data/lib/legion/data/connection.rb +5 -5
- data/lib/legion/data/migrations/050_add_missing_indexes.rb +5 -4
- data/lib/legion/data/version.rb +1 -1
- metadata +16 -2
- data/sonar-project.properties +0 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 15cf490b7bb437d183d3e9a0c5292d38953cee6ed85dd51b7e4a38cdbac1c19f
|
|
4
|
+
data.tar.gz: d95703e13bc0076b3432112d41e92992514b54312b6c9dad1b5c9f46f7db1949
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 925e30b597e82b14ce9f260a239a53c98f3b628682e125465668e8e86a6681d2811aec9c2b71463d8b6ecb0993e5ead76652b00e6249c416d030ed08deeb323d
|
|
7
|
+
data.tar.gz: 210dbeb6e2e3a545842b2523c43a1f3fe3e991a8043351e6b55a1a42f3113568c8e59a2f19d8680874fe09c1b2a848da7fd25d0416e3d1180932228d840882ac
|
data/.github/CODEOWNERS
CHANGED
data/.github/workflows/ci.yml
CHANGED
|
@@ -6,12 +6,18 @@ on:
|
|
|
6
6
|
schedule:
|
|
7
7
|
- cron: '0 9 * * 1'
|
|
8
8
|
|
|
9
|
+
permissions: {}
|
|
10
|
+
|
|
9
11
|
jobs:
|
|
10
12
|
ci:
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
11
15
|
uses: LegionIO/.github/.github/workflows/ci.yml@main
|
|
12
16
|
|
|
13
17
|
ci-postgres:
|
|
14
18
|
name: "RSpec (PostgreSQL)"
|
|
19
|
+
permissions:
|
|
20
|
+
contents: read
|
|
15
21
|
timeout-minutes: 15
|
|
16
22
|
runs-on: ubuntu-latest
|
|
17
23
|
services:
|
|
@@ -45,24 +51,43 @@ jobs:
|
|
|
45
51
|
run: bundle exec rspec
|
|
46
52
|
|
|
47
53
|
lint:
|
|
54
|
+
permissions:
|
|
55
|
+
checks: write
|
|
56
|
+
contents: read
|
|
57
|
+
pull-requests: read
|
|
48
58
|
uses: LegionIO/.github/.github/workflows/lint-patterns.yml@main
|
|
49
59
|
|
|
50
60
|
security:
|
|
61
|
+
permissions:
|
|
62
|
+
contents: read
|
|
63
|
+
security-events: write
|
|
51
64
|
uses: LegionIO/.github/.github/workflows/security-scan.yml@main
|
|
52
65
|
|
|
53
66
|
version-changelog:
|
|
67
|
+
permissions:
|
|
68
|
+
contents: read
|
|
69
|
+
pull-requests: read
|
|
54
70
|
uses: LegionIO/.github/.github/workflows/version-changelog.yml@main
|
|
55
71
|
|
|
56
72
|
dependency-review:
|
|
73
|
+
permissions:
|
|
74
|
+
contents: read
|
|
75
|
+
pull-requests: write
|
|
57
76
|
uses: LegionIO/.github/.github/workflows/dependency-review.yml@main
|
|
58
77
|
|
|
59
78
|
stale:
|
|
60
79
|
if: github.event_name == 'schedule'
|
|
80
|
+
permissions:
|
|
81
|
+
issues: write
|
|
82
|
+
pull-requests: write
|
|
61
83
|
uses: LegionIO/.github/.github/workflows/stale.yml@main
|
|
62
84
|
|
|
63
85
|
release:
|
|
64
86
|
needs: [ci, ci-postgres, lint]
|
|
65
87
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
88
|
+
permissions:
|
|
89
|
+
contents: write
|
|
90
|
+
packages: write
|
|
66
91
|
uses: LegionIO/.github/.github/workflows/release.yml@main
|
|
67
92
|
secrets:
|
|
68
93
|
rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }}
|
data/.gitignore
CHANGED
data/AGENTS.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Always run a full `bundle exec rspec` and `bundle exec rubocop -A` and fix all errors before committing.
|
|
2
|
+
|
|
3
|
+
# legion-data
|
|
4
|
+
|
|
5
|
+
`legion-data` is the persistent database storage gem for the LegionIO async job engine framework. It provides database connectivity via the Sequel ORM, automatic schema migrations (70+ numbered migrations), and Sequel models for the full LegionIO control plane: extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), RBAC, tenants, audit log, governance events, and archive tables.
|
|
6
|
+
|
|
7
|
+
It also ships a parallel local SQLite database (`Legion::Data::Local`) for on-node agentic cognitive state persistence (memory traces, trust scores, etc.), independent of the shared database.
|
|
8
|
+
|
|
9
|
+
## Key entry points
|
|
10
|
+
|
|
11
|
+
- `Legion::Data.setup` — connect, migrate, load models, set up local DB
|
|
12
|
+
- `Legion::Data::Model::*` — Sequel model classes
|
|
13
|
+
- `Legion::Data::Local` — local SQLite for agentic state
|
|
14
|
+
- `Legion::Data::Extract` — text extraction from documents (pdf, docx, csv, etc.)
|
|
15
|
+
- `Legion::Data::Spool` — filesystem write buffer for DB-unavailable scenarios
|
|
16
|
+
|
|
17
|
+
## Testing
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
cd /path/to/legion-data
|
|
21
|
+
bundle install
|
|
22
|
+
bundle exec rspec
|
|
23
|
+
bundle exec rubocop -A
|
|
24
|
+
```
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [1.6.29] - 2026-04-17
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- `Connection#log_connection_info`: renamed local variables `user`/`host`/`port`/`db` to `conn_user`/`conn_host`/`conn_port`/`conn_db` to avoid shadowing outer-scope names and resolve `rb/uninitialized-local-variable` CodeQL alert
|
|
9
|
+
- CI workflow: added explicit `permissions:` block to all jobs (`contents: read` for checkout jobs, `{}` for reusable workflow calls) to satisfy `actions/missing-workflow-permissions` code scanning alerts
|
|
10
|
+
- Spec: replaced deprecated `raise_exception` matcher with `raise_error` in `connection_spec.rb` and `model_spec.rb`; updated stale test description in `model_spec.rb`
|
|
11
|
+
|
|
12
|
+
## [1.6.28] - 2026-04-17
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- `legion-json` added as explicit gemspec runtime dependency — `Legion::JSON` is used throughout and was previously only an implicit transitive dependency
|
|
16
|
+
- Rewrote `README.md` with accurate architecture diagram, full model table, migration history, configuration reference, and usage examples
|
|
17
|
+
- Updated `CLAUDE.md` with mandatory `bundle exec rspec` + `bundle exec rubocop -A` reminder for AI agents
|
|
18
|
+
- Added `AGENTS.md` with mandatory rspec/rubocop reminder and gem overview
|
|
19
|
+
- Updated `.github/CODEOWNERS` to `@Esity @LegionIO/core`
|
|
20
|
+
- Added `*.gem` to `.gitignore` to prevent build artifacts from being committed
|
|
21
|
+
- Removed `sonar-project.properties`
|
|
22
|
+
|
|
5
23
|
## [1.6.27] - 2026-04-17
|
|
6
24
|
|
|
7
25
|
### Fixed
|
data/CLAUDE.md
CHANGED
data/README.md
CHANGED
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
# legion-data
|
|
2
2
|
|
|
3
|
-
Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO)
|
|
3
|
+
Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) async job engine and AI coding assistant platform. Provides database connectivity via the [Sequel ORM](https://sequel.jeremyevans.net/), automatic schema migrations (71 numbered migrations), Sequel models for the full LegionIO control plane, and a parallel local SQLite database for on-node agentic cognitive state.
|
|
4
4
|
|
|
5
|
-
**Version**: 1.6.
|
|
5
|
+
**Version**: 1.6.25 | **Ruby**: >= 3.4 | **License**: Apache-2.0
|
|
6
|
+
|
|
7
|
+
---
|
|
6
8
|
|
|
7
9
|
## Supported Databases
|
|
8
10
|
|
|
9
11
|
| Database | Adapter | Gem | Default |
|
|
10
12
|
|----------|---------|-----|---------|
|
|
11
|
-
| SQLite | `sqlite` | `sqlite3` (
|
|
12
|
-
| MySQL | `mysql2` | `mysql2` | No |
|
|
13
|
-
| PostgreSQL | `postgres` | `pg` | No |
|
|
13
|
+
| SQLite | `sqlite` | `sqlite3` (bundled) | Yes |
|
|
14
|
+
| MySQL | `mysql2` | `mysql2` (optional) | No |
|
|
15
|
+
| PostgreSQL | `postgres` | `pg` (optional) | No |
|
|
16
|
+
|
|
17
|
+
SQLite is the default and requires no additional gems. For MySQL or PostgreSQL, install the corresponding gem and configure the adapter.
|
|
14
18
|
|
|
15
|
-
|
|
19
|
+
---
|
|
16
20
|
|
|
17
21
|
## Installation
|
|
18
22
|
|
|
@@ -20,86 +24,130 @@ SQLite is the default adapter. For MySQL or PostgreSQL, install the correspondin
|
|
|
20
24
|
gem install legion-data
|
|
21
25
|
```
|
|
22
26
|
|
|
23
|
-
Or add to your Gemfile
|
|
27
|
+
Or add to your `Gemfile`:
|
|
24
28
|
|
|
25
29
|
```ruby
|
|
26
30
|
gem 'legion-data'
|
|
27
31
|
|
|
28
|
-
#
|
|
32
|
+
# For production databases, add one of these:
|
|
29
33
|
# gem 'mysql2', '>= 0.5.5'
|
|
30
34
|
# gem 'pg', '>= 1.5'
|
|
31
35
|
```
|
|
32
36
|
|
|
33
|
-
|
|
37
|
+
---
|
|
34
38
|
|
|
35
|
-
|
|
36
|
-
|-------|-------|-------------|
|
|
37
|
-
| `Extension` | `extensions` | Installed LEX extensions |
|
|
38
|
-
| `Function` | `functions` | Available functions per extension |
|
|
39
|
-
| `Runner` | `runners` | Runner definitions |
|
|
40
|
-
| `Node` | `nodes` | Cluster node registry |
|
|
41
|
-
| `Task` | `tasks` | Task instances |
|
|
42
|
-
| `TaskLog` | `task_logs` | Task execution logs |
|
|
43
|
-
| `Setting` | `settings` | Persistent settings store |
|
|
44
|
-
| `DigitalWorker` | `digital_workers` | Digital worker registry |
|
|
45
|
-
| `Relationship` | `relationships` | Task trigger/action relationships between functions |
|
|
46
|
-
| `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain |
|
|
47
|
-
| `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings |
|
|
48
|
-
| `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants |
|
|
49
|
-
| `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants |
|
|
50
|
-
| `ApolloEntry` | `apollo_entries` | Apollo knowledge entries — PostgreSQL only (pgvector) |
|
|
51
|
-
| `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only |
|
|
52
|
-
| `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only |
|
|
53
|
-
| `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only |
|
|
39
|
+
## Architecture Overview
|
|
54
40
|
|
|
55
|
-
|
|
41
|
+
```
|
|
42
|
+
Legion::Data (singleton module)
|
|
43
|
+
├── .setup # Connect, migrate, load models, set up local DB
|
|
44
|
+
├── .connection # Sequel::Database handle (shared/central)
|
|
45
|
+
├── .local # Legion::Data::Local (local SQLite accessor)
|
|
46
|
+
├── .stats # Combined { shared: ..., local: ... } metrics
|
|
47
|
+
├── .reload_static_cache # Refresh in-memory StaticCache after extension hot-load
|
|
48
|
+
├── .shutdown # Close both shared and local connections
|
|
49
|
+
│
|
|
50
|
+
├── Connection # Sequel database connection management
|
|
51
|
+
│ ├── .adapter # Reads adapter from settings (:sqlite, :mysql2, :postgres)
|
|
52
|
+
│ ├── .setup # Establish connection (dev_mode fallback to SQLite if unreachable)
|
|
53
|
+
│ ├── .sequel # Raw Sequel::Database accessor
|
|
54
|
+
│ ├── .stats # Pool metrics, tuning snapshot, adapter-specific DB stats
|
|
55
|
+
│ └── .shutdown # Disconnect and close query file logger
|
|
56
|
+
│
|
|
57
|
+
├── Migration # Auto-migration system (71 numbered Sequel DSL migrations)
|
|
58
|
+
│
|
|
59
|
+
├── Model # Sequel model autoloader
|
|
60
|
+
│ └── Models: Extension, Function, Runner, Node, Task, TaskLog, Setting,
|
|
61
|
+
│ DigitalWorker, Relationship, AuditLog, AuditRecord, Chain,
|
|
62
|
+
│ RbacRoleAssignment, RbacRunnerGrant, RbacCrossTeamGrant,
|
|
63
|
+
│ IdentityProvider, Principal, Identity, IdentityGroup,
|
|
64
|
+
│ IdentityGroupMembership,
|
|
65
|
+
│ ApolloEntry, ApolloRelation, ApolloExpertise, ApolloAccessLog (PG only)
|
|
66
|
+
│
|
|
67
|
+
├── Local # Parallel local SQLite for agentic cognitive state
|
|
68
|
+
│ ├── .setup # Lazy init — creates legionio_local.db on first access
|
|
69
|
+
│ ├── .connection # Sequel::SQLite::Database handle
|
|
70
|
+
│ ├── .model(:table) # Create Sequel::Model bound to local connection
|
|
71
|
+
│ ├── .register_migrations(name:, path:) # Extensions add their own migration dirs
|
|
72
|
+
│ ├── .stats # Local SQLite metrics (PRAGMAs, file size, registered migrations)
|
|
73
|
+
│ └── .shutdown # Close local connection
|
|
74
|
+
│
|
|
75
|
+
├── Extract # 10-handler text extraction registry (txt/md/csv/json/jsonl/html/xlsx/docx/pdf/pptx/vtt)
|
|
76
|
+
├── Spool # Filesystem write buffer for DB-unavailable scenarios
|
|
77
|
+
├── Rls # PostgreSQL row-level security helpers (tenant isolation)
|
|
78
|
+
├── StorageTiers # Hot/warm/cold archival lifecycle
|
|
79
|
+
├── EventStore # Append-only governance event store with hash chain integrity
|
|
80
|
+
├── Vector # Reusable pgvector helpers (cosine_search, l2_search, ensure_extension!)
|
|
81
|
+
└── Settings # Default configuration with per-adapter credential presets
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Two-Database Architecture
|
|
85
|
+
|
|
86
|
+
`legion-data` maintains two independent databases:
|
|
87
|
+
|
|
88
|
+
1. **Shared DB** (SQLite / MySQL / PostgreSQL) — control plane data: extensions, tasks, runners, nodes, settings, audit logs, relationships. Shared across the cluster.
|
|
89
|
+
2. **Local DB** (always SQLite) — agentic cognitive state: memory traces, trust scores, dream journals. On-node only; no cross-database joins.
|
|
90
|
+
|
|
91
|
+
Deleting `legionio_local.db` provides cryptographic erasure — no residual data.
|
|
92
|
+
|
|
93
|
+
---
|
|
56
94
|
|
|
57
95
|
## Usage
|
|
58
96
|
|
|
59
97
|
```ruby
|
|
60
98
|
require 'legion/data'
|
|
61
99
|
|
|
62
|
-
#
|
|
100
|
+
# Set up shared DB + local SQLite, run migrations, load models
|
|
63
101
|
Legion::Data.setup
|
|
64
|
-
|
|
65
|
-
|
|
102
|
+
|
|
103
|
+
# Access the Sequel database handle
|
|
104
|
+
Legion::Data.connection # => Sequel::Database
|
|
105
|
+
|
|
106
|
+
# Access models
|
|
66
107
|
Legion::Data::Model::Extension.all # => Sequel::Dataset
|
|
108
|
+
Legion::Data::Model::Task.first(id: 42)
|
|
109
|
+
Legion::Data::Model::Setting.where(key: 'my_setting').first
|
|
110
|
+
|
|
111
|
+
# Access local cognitive state DB
|
|
112
|
+
Legion::Data.local.connection # => Sequel::SQLite::Database
|
|
113
|
+
Legion::Data.local.connected? # => true
|
|
114
|
+
Legion::Data.local.db_path # => "legionio_local.db"
|
|
115
|
+
|
|
116
|
+
# Check connection health
|
|
117
|
+
Legion::Data.connected? # => true
|
|
118
|
+
Legion::Data.stats # => { shared: {...}, local: {...} }
|
|
119
|
+
|
|
120
|
+
# Shut down both connections
|
|
121
|
+
Legion::Data.shutdown
|
|
67
122
|
```
|
|
68
123
|
|
|
69
|
-
### Local Database
|
|
124
|
+
### Local Database (Agentic Cognitive State)
|
|
70
125
|
|
|
71
|
-
|
|
126
|
+
Extensions register their own migration directories and create models bound to the local connection:
|
|
72
127
|
|
|
73
128
|
```ruby
|
|
74
|
-
#
|
|
75
|
-
# Extensions register their own migration directories
|
|
129
|
+
# Register extension migrations (called during extension setup)
|
|
76
130
|
Legion::Data::Local.register_migrations(name: :memory, path: '/path/to/migrations')
|
|
77
131
|
|
|
78
|
-
# Create a model bound to the local
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Check status
|
|
82
|
-
Legion::Data::Local.connected? # => true
|
|
83
|
-
Legion::Data::Local.db_path # => "legionio_local.db"
|
|
132
|
+
# Create a model class bound to the local DB
|
|
133
|
+
MyMemoryTrace = Legion::Data::Local.model(:memory_traces)
|
|
134
|
+
MyMemoryTrace.all # queries legionio_local.db, never the shared DB
|
|
84
135
|
```
|
|
85
136
|
|
|
86
|
-
Deleting `legionio_local.db` provides cryptographic erasure — no residual data.
|
|
87
|
-
|
|
88
137
|
### Text Extraction
|
|
89
138
|
|
|
90
|
-
`Legion::Data::Extract` provides a
|
|
139
|
+
`Legion::Data::Extract` provides a handler registry for extracting text from documents, used by `lex-knowledge` for corpus ingestion:
|
|
91
140
|
|
|
92
141
|
```ruby
|
|
93
142
|
text = Legion::Data::Extract.extract('/path/to/document.pdf')
|
|
143
|
+
text = Legion::Data::Extract.extract('/path/to/data.csv')
|
|
94
144
|
```
|
|
95
145
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
`Legion::Data::Rls` provides tenant isolation helpers for PostgreSQL (migration 043). Sets `app.current_tenant_id` session variable before queries and resets it after.
|
|
146
|
+
Supported formats: `.txt`, `.md`, `.csv`, `.json`, `.jsonl`, `.html`, `.xlsx`, `.docx`, `.pdf`, `.pptx`, `.vtt`
|
|
99
147
|
|
|
100
|
-
### Spool (
|
|
148
|
+
### Filesystem Spool (Write Buffer)
|
|
101
149
|
|
|
102
|
-
|
|
150
|
+
When the database is unavailable, `Legion::Data::Spool` buffers writes to `~/.legionio/data/spool/` and replays once the connection is restored:
|
|
103
151
|
|
|
104
152
|
```ruby
|
|
105
153
|
spool = Legion::Data::Spool.for(Legion::Extensions::MyLex)
|
|
@@ -107,8 +155,30 @@ spool.write({ task_id: SecureRandom.uuid, data: payload })
|
|
|
107
155
|
spool.drain { |entry| process(entry) }
|
|
108
156
|
```
|
|
109
157
|
|
|
158
|
+
### Row-Level Security (PostgreSQL)
|
|
159
|
+
|
|
160
|
+
`Legion::Data::Rls` provides tenant isolation via PostgreSQL session variables (migration 043):
|
|
161
|
+
|
|
162
|
+
```ruby
|
|
163
|
+
Legion::Data::Rls.with_tenant(tenant_id) do
|
|
164
|
+
Legion::Data::Model::Task.all # scoped to tenant_id via RLS policy
|
|
165
|
+
end
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Permission Checks
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
Legion::Data.can_write?(:tasks) # => true (SQLite always true)
|
|
172
|
+
Legion::Data.can_read?(:tasks) # => true
|
|
173
|
+
Legion::Data.reset_privileges! # clear cached privilege checks
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
110
178
|
## Configuration
|
|
111
179
|
|
|
180
|
+
All settings live under the `data` key. The adapter controls which options apply.
|
|
181
|
+
|
|
112
182
|
### SQLite (default)
|
|
113
183
|
|
|
114
184
|
```json
|
|
@@ -163,17 +233,56 @@ CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
163
233
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
164
234
|
```
|
|
165
235
|
|
|
166
|
-
###
|
|
236
|
+
### Full Configuration Reference
|
|
167
237
|
|
|
168
238
|
```json
|
|
169
239
|
{
|
|
170
240
|
"data": {
|
|
241
|
+
"adapter": "sqlite",
|
|
242
|
+
"connected": false,
|
|
243
|
+
"dev_mode": false,
|
|
244
|
+
"dev_fallback": true,
|
|
245
|
+
"connect_on_start": true,
|
|
246
|
+
|
|
247
|
+
"max_connections": 25,
|
|
248
|
+
"pool_timeout": 5,
|
|
249
|
+
"preconnect": "concurrently",
|
|
250
|
+
"single_threaded": false,
|
|
251
|
+
"test": true,
|
|
252
|
+
|
|
253
|
+
"log": false,
|
|
254
|
+
"query_log": false,
|
|
255
|
+
"log_warn_duration": 1,
|
|
256
|
+
"sql_log_level": "debug",
|
|
257
|
+
|
|
258
|
+
"connection_validation": true,
|
|
259
|
+
"connection_validation_timeout": 600,
|
|
260
|
+
"connection_expiration": true,
|
|
261
|
+
"connection_expiration_timeout": 14400,
|
|
262
|
+
|
|
263
|
+
"read_replica_url": null,
|
|
264
|
+
"replicas": [],
|
|
265
|
+
|
|
266
|
+
"creds": { "database": "legionio.db" },
|
|
267
|
+
|
|
268
|
+
"migrations": {
|
|
269
|
+
"continue_on_fail": false,
|
|
270
|
+
"auto_migrate": true
|
|
271
|
+
},
|
|
272
|
+
"models": {
|
|
273
|
+
"continue_on_load_fail": false,
|
|
274
|
+
"autoload": true
|
|
275
|
+
},
|
|
171
276
|
"local": {
|
|
172
277
|
"enabled": true,
|
|
173
278
|
"database": "legionio_local.db",
|
|
174
|
-
"migrations": {
|
|
175
|
-
|
|
176
|
-
|
|
279
|
+
"migrations": { "auto_migrate": true }
|
|
280
|
+
},
|
|
281
|
+
"cache": {
|
|
282
|
+
"connected": false,
|
|
283
|
+
"auto_enable": false,
|
|
284
|
+
"static_cache": false,
|
|
285
|
+
"ttl": 60
|
|
177
286
|
}
|
|
178
287
|
}
|
|
179
288
|
}
|
|
@@ -181,25 +290,160 @@ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
|
181
290
|
|
|
182
291
|
### Dev Mode Fallback
|
|
183
292
|
|
|
184
|
-
When `dev_mode: true` and a network database is unreachable, the shared connection falls back to SQLite
|
|
293
|
+
When `dev_mode: true` and a network database is unreachable, the shared connection automatically falls back to SQLite:
|
|
294
|
+
|
|
295
|
+
```json
|
|
296
|
+
{ "data": { "dev_mode": true, "dev_fallback": true } }
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### HashiCorp Vault Integration
|
|
300
|
+
|
|
301
|
+
When Vault is connected, credentials are fetched dynamically from `database/creds/legion`, overriding any static `creds` block.
|
|
302
|
+
|
|
303
|
+
### Caching
|
|
304
|
+
|
|
305
|
+
Two independent caching tiers, both disabled by default:
|
|
306
|
+
|
|
307
|
+
| Tier | Setting | Models | Backend |
|
|
308
|
+
|------|---------|--------|---------|
|
|
309
|
+
| **StaticCache** | `data.cache.static_cache: true` | Extension, Runner, Function | In-process frozen Ruby hash |
|
|
310
|
+
| **External Cache** | `data.cache.auto_enable: true` + `Legion::Cache` | Relationship, Node, Setting | Redis/Memcached/Memory |
|
|
311
|
+
|
|
312
|
+
```ruby
|
|
313
|
+
# After hot-loading extensions, refresh the static cache:
|
|
314
|
+
Legion::Data.reload_static_cache
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### Read Replicas (PostgreSQL)
|
|
185
318
|
|
|
186
319
|
```json
|
|
187
320
|
{
|
|
188
321
|
"data": {
|
|
189
|
-
"
|
|
190
|
-
"
|
|
322
|
+
"read_replica_url": "postgres://user:pass@replica1/db",
|
|
323
|
+
"replicas": ["postgres://user:pass@replica2/db"]
|
|
191
324
|
}
|
|
192
325
|
}
|
|
193
326
|
```
|
|
194
327
|
|
|
195
|
-
|
|
328
|
+
---
|
|
196
329
|
|
|
197
|
-
|
|
330
|
+
## Data Models
|
|
198
331
|
|
|
199
|
-
|
|
332
|
+
| Model | Table | Description |
|
|
333
|
+
|-------|-------|-------------|
|
|
334
|
+
| `Extension` | `extensions` | Installed LEX extensions |
|
|
335
|
+
| `Function` | `functions` | Available functions per extension (with embeddings) |
|
|
336
|
+
| `Runner` | `runners` | Runner definitions (AMQP routing keys) |
|
|
337
|
+
| `Node` | `nodes` | Cluster node registry |
|
|
338
|
+
| `Task` | `tasks` | Task instances |
|
|
339
|
+
| `TaskLog` | `task_logs` | Task execution logs |
|
|
340
|
+
| `Setting` | `settings` | Persistent settings store |
|
|
341
|
+
| `DigitalWorker` | `digital_workers` | Digital worker registry |
|
|
342
|
+
| `Relationship` | `relationships` | Task trigger/action chains between functions |
|
|
343
|
+
| `Chain` | `chains` | Task execution chains |
|
|
344
|
+
| `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain |
|
|
345
|
+
| `AuditRecord` | `audit_records` | Structured audit records |
|
|
346
|
+
| `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings |
|
|
347
|
+
| `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants |
|
|
348
|
+
| `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants |
|
|
349
|
+
| `IdentityProvider` | `identity_providers` | Identity provider registrations |
|
|
350
|
+
| `Principal` | `principals` | Authentication principals |
|
|
351
|
+
| `Identity` | `identities` | Identity records tied to principals |
|
|
352
|
+
| `IdentityGroup` | `identity_groups` | Identity groups |
|
|
353
|
+
| `IdentityGroupMembership` | `identity_group_memberships` | Group membership records |
|
|
354
|
+
| `ApolloEntry` | `apollo_entries` | Knowledge entries — PostgreSQL only (pgvector) |
|
|
355
|
+
| `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only |
|
|
356
|
+
| `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only |
|
|
357
|
+
| `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only |
|
|
358
|
+
|
|
359
|
+
Apollo models require PostgreSQL with the `pgvector` extension. They are skipped silently on SQLite and MySQL.
|
|
360
|
+
|
|
361
|
+
---
|
|
362
|
+
|
|
363
|
+
## Dependencies
|
|
364
|
+
|
|
365
|
+
| Gem | Purpose |
|
|
366
|
+
|-----|---------|
|
|
367
|
+
| `sequel` (>= 5.70) | ORM and migration framework |
|
|
368
|
+
| `sqlite3` (>= 2.0) | SQLite adapter (default, bundled) |
|
|
369
|
+
| `csv` (>= 3.2) | CSV extraction handler |
|
|
370
|
+
| `legion-json` | JSON serialization via Legion::JSON |
|
|
371
|
+
| `legion-logging` (>= 1.5.0) | Structured logging |
|
|
372
|
+
| `legion-settings` (>= 1.3.26) | Configuration management |
|
|
373
|
+
| `mysql2` (>= 0.5.5) | MySQL adapter (optional) |
|
|
374
|
+
| `pg` (>= 1.5) | PostgreSQL adapter (optional) |
|
|
375
|
+
|
|
376
|
+
---
|
|
377
|
+
|
|
378
|
+
## Migrations
|
|
379
|
+
|
|
380
|
+
71 numbered Sequel DSL migrations run automatically on startup (`auto_migrate: true`). Key milestones:
|
|
381
|
+
|
|
382
|
+
| Range | What was added |
|
|
383
|
+
|-------|---------------|
|
|
384
|
+
| 001–011 | Core schema: nodes, settings, extensions, runners, functions, tasks, digital workers, value metrics |
|
|
385
|
+
| 012 | Apollo tables (PG only: pgvector, uuid-ossp, 4 tables) |
|
|
386
|
+
| 013–014 | Relationships table with trigger/action FK chains |
|
|
387
|
+
| 015 | RBAC tables |
|
|
388
|
+
| 017–019 | Audit log with tamper-evident hash chain |
|
|
389
|
+
| 020–025 | Webhooks, archive tables, memory traces, tenant partitions |
|
|
390
|
+
| 026 | Function embeddings (description + vector on functions) |
|
|
391
|
+
| 028–030 | Agent clusters and approval queue |
|
|
392
|
+
| 047–048 | Apollo knowledge capture + financial logging (UAIS cost recovery, 7 tables) |
|
|
393
|
+
| 050 | Critical indexes across 13 tables |
|
|
394
|
+
| 058–067 | Audit records, chains, knowledge tiers, tool embedding cache, identity system (providers, principals, identities, groups) |
|
|
395
|
+
| 068–071 | Entity type on audit records, principal on nodes, approval queue resume, engine on relationships |
|
|
396
|
+
|
|
397
|
+
Run migrations standalone:
|
|
398
|
+
|
|
399
|
+
```bash
|
|
400
|
+
bundle exec legionio_migrate
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
---
|
|
404
|
+
|
|
405
|
+
## CLI Executable
|
|
406
|
+
|
|
407
|
+
`exe/legionio_migrate` runs database migrations standalone, outside the full LegionIO service:
|
|
408
|
+
|
|
409
|
+
```bash
|
|
410
|
+
bundle exec legionio_migrate
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## Role in LegionIO
|
|
416
|
+
|
|
417
|
+
`legion-data` is optional but provides core platform persistence. It initializes during `Legion::Service` startup (after transport). Key responsibilities:
|
|
418
|
+
|
|
419
|
+
1. Extension and function registry
|
|
420
|
+
2. Task scheduling, logging, and relationship chains
|
|
421
|
+
3. Node cluster membership tracking
|
|
422
|
+
4. Persistent settings storage
|
|
423
|
+
5. Digital worker registry (AI-as-labor platform)
|
|
424
|
+
6. RBAC assignment tables
|
|
425
|
+
7. Audit log with tamper-evident hash chain
|
|
426
|
+
8. Governance event store with append-only integrity
|
|
427
|
+
9. Apollo shared knowledge store (PostgreSQL + pgvector, used by `lex-apollo`)
|
|
428
|
+
10. Local SQLite for agentic cognitive state — always on-node, independent of shared DB
|
|
429
|
+
11. Financial logging for UAIS cost recovery
|
|
430
|
+
12. Global tool embedding cache (L4 tier for `Legion::Tools::EmbeddingCache`)
|
|
431
|
+
13. Unified identity system (providers, principals, identities, groups)
|
|
432
|
+
|
|
433
|
+
---
|
|
434
|
+
|
|
435
|
+
## Contributing
|
|
436
|
+
|
|
437
|
+
```bash
|
|
438
|
+
git clone https://github.com/LegionIO/legion-data
|
|
439
|
+
cd legion-data
|
|
440
|
+
bundle install
|
|
441
|
+
bundle exec rspec # all tests must pass
|
|
442
|
+
bundle exec rubocop -A # zero offenses expected
|
|
443
|
+
```
|
|
200
444
|
|
|
201
|
-
|
|
445
|
+
Follow the [LegionIO contribution guide](https://github.com/LegionIO/.github/blob/main/CONTRIBUTING.md). Open a PR against `main`.
|
|
202
446
|
|
|
203
|
-
|
|
447
|
+
---
|
|
204
448
|
|
|
205
|
-
|
|
449
|
+
**Maintained by**: Matthew Iverson ([@Esity](https://github.com/Esity))
|
data/legion-data.gemspec
CHANGED
|
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
spec.add_dependency 'csv', '>= 3.2'
|
|
30
|
+
spec.add_dependency 'legion-json'
|
|
30
31
|
spec.add_dependency 'legion-logging', '>= 1.5.0'
|
|
31
32
|
spec.add_dependency 'legion-settings', '>= 1.3.26'
|
|
32
33
|
spec.add_dependency 'sequel', '>= 5.70'
|
data/lib/legion/data/archiver.rb
CHANGED
|
@@ -316,11 +316,11 @@ module Legion
|
|
|
316
316
|
log.info "Connected to SQLite at #{sqlite_path}"
|
|
317
317
|
else
|
|
318
318
|
actual = Legion::Settings[:data][:creds] || {}
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
log.info "Connected to #{adapter}://#{
|
|
319
|
+
conn_user = actual[:user] || actual[:username] || 'unknown'
|
|
320
|
+
conn_host = actual[:host] || '127.0.0.1'
|
|
321
|
+
conn_port = actual[:port]
|
|
322
|
+
conn_db = actual[:database] || actual[:db]
|
|
323
|
+
log.info "Connected to #{adapter}://#{conn_user}@#{conn_host}:#{conn_port}/#{conn_db}"
|
|
324
324
|
end
|
|
325
325
|
end
|
|
326
326
|
|
|
@@ -166,10 +166,11 @@ Sequel.migration do
|
|
|
166
166
|
].each do |table, indexes|
|
|
167
167
|
next unless table_exists?(table)
|
|
168
168
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
169
|
+
existing_indexes = indexes(table).keys
|
|
170
|
+
indexes.each do |idx_name|
|
|
171
|
+
next unless existing_indexes.include?(idx_name)
|
|
172
|
+
|
|
173
|
+
alter_table(table) { drop_index nil, name: idx_name }
|
|
173
174
|
end
|
|
174
175
|
end
|
|
175
176
|
end
|
data/lib/legion/data/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-data
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.6.
|
|
4
|
+
version: 1.6.29
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -23,6 +23,20 @@ dependencies:
|
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '3.2'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: legion-json
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
26
40
|
- !ruby/object:Gem::Dependency
|
|
27
41
|
name: legion-logging
|
|
28
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,6 +108,7 @@ files:
|
|
|
94
108
|
- ".github/workflows/ci.yml"
|
|
95
109
|
- ".gitignore"
|
|
96
110
|
- ".rubocop.yml"
|
|
111
|
+
- AGENTS.md
|
|
97
112
|
- CHANGELOG.md
|
|
98
113
|
- CLAUDE.md
|
|
99
114
|
- CODEOWNERS
|
|
@@ -234,7 +249,6 @@ files:
|
|
|
234
249
|
- lib/legion/data/storage_tiers.rb
|
|
235
250
|
- lib/legion/data/vector.rb
|
|
236
251
|
- lib/legion/data/version.rb
|
|
237
|
-
- sonar-project.properties
|
|
238
252
|
homepage: https://github.com/LegionIO/legion-data
|
|
239
253
|
licenses:
|
|
240
254
|
- Apache-2.0
|
data/sonar-project.properties
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
sonar.projectKey=legion-io_legion-data
|
|
2
|
-
sonar.organization=legion-io
|
|
3
|
-
sonar.projectName=Legion::Data
|
|
4
|
-
sonar.sources=.
|
|
5
|
-
sonar.exclusions=vendor/**
|
|
6
|
-
sonar.coverage.exclusions=spec/**
|
|
7
|
-
sonar.ruby.coverage.reportPath=coverage/.resultset.json
|
|
8
|
-
sonar.ruby.file.suffixes=rb,ruby
|
|
9
|
-
sonar.ruby.coverage.framework=RSpec
|
|
10
|
-
sonar.ruby.rubocopConfig=.rubocop.yml
|
|
11
|
-
sonar.ruby.rubocop.reportPath=rubocop-result.json
|
|
12
|
-
sonar.ruby.rubocop.filePath=.
|