legion-data 1.6.7 → 1.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/CLAUDE.md +31 -4
- data/README.md +40 -28
- data/lib/legion/data/connection.rb +14 -12
- data/lib/legion/data/migration.rb +7 -0
- data/lib/legion/data/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 49c73e289f9b262a372502c338850d55671323b3c7911f5e4f06f9f27ec090de
|
|
4
|
+
data.tar.gz: cdccf57ec5b2810d5113f5e28cea5ba54856160fc0ac9c2d5258c0c30f64d8ab
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a65b9366d797d2e47d5846a428f7619314528f63265c2ca091043433fb52923cd0e2c53fc68552db569645fdd3c752831ae8a2c4141a5479abba44f1d4714355
|
|
7
|
+
data.tar.gz: fbb37ce91bc9555e2460b45382a005e2fda63cf2fedc2962aee6ae95108f272745c8f1b858ea6e569b9aa0b918054b4c0c508dcaa9dac0fb267fe9960bee8cb8
|
data/CHANGELOG.md
CHANGED
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Manages persistent database storage for the LegionIO framework. Supports SQLite (default), MySQL, and PostgreSQL via Sequel ORM. Provides automatic schema migrations and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), tenants, webhooks, audit log, and archive tables. Also provides a parallel local SQLite database (`Legion::Data::Local`) for agentic cognitive state persistence.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-data
|
|
11
|
-
**Version**: 1.6.
|
|
11
|
+
**Version**: 1.6.6
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Supported Databases
|
|
@@ -56,7 +56,7 @@ Legion::Data (singleton module)
|
|
|
56
56
|
│ ├── .shutdown # Close local connection
|
|
57
57
|
│ └── .reset! # Clear all state (testing)
|
|
58
58
|
│
|
|
59
|
-
├── Migration # Auto-migration system (
|
|
59
|
+
├── Migration # Auto-migration system (47 migrations, Sequel DSL)
|
|
60
60
|
│ └── migrations/
|
|
61
61
|
│ ├── 001_add_schema_columns
|
|
62
62
|
│ ├── 002_add_nodes
|
|
@@ -83,7 +83,28 @@ Legion::Data (singleton module)
|
|
|
83
83
|
│ ├── 023_add_data_archive
|
|
84
84
|
│ ├── 024_add_tenant_partition_columns
|
|
85
85
|
│ ├── 025_add_tenants_table
|
|
86
|
-
│
|
|
86
|
+
│ ├── 026_add_function_embeddings # description + embedding (TEXT) on functions; postgres: embedding_vector vector(1536) with HNSW cosine index
|
|
87
|
+
│ ├── 027_add_apollo_source_provider
|
|
88
|
+
│ ├── 028_add_agent_cluster
|
|
89
|
+
│ ├── 029_add_agent_cluster_tasks
|
|
90
|
+
│ ├── 030_add_approval_queue
|
|
91
|
+
│ ├── 031_add_task_depth
|
|
92
|
+
│ ├── 032_add_task_cancelled_at
|
|
93
|
+
│ ├── 033_add_task_delay
|
|
94
|
+
│ ├── 034_add_archive_manifest
|
|
95
|
+
│ ├── 035_add_apollo_source_channel
|
|
96
|
+
│ ├── 036_add_audit_context_snapshot
|
|
97
|
+
│ ├── 037_add_apollo_knowledge_domain
|
|
98
|
+
│ ├── 038_add_conversations
|
|
99
|
+
│ ├── 039_add_audit_archive_manifest # 7-year tiered audit retention
|
|
100
|
+
│ ├── 040_add_slow_query_indexes # tasks table performance indexes
|
|
101
|
+
│ ├── 041_resize_vector_columns
|
|
102
|
+
│ ├── 042_add_tenant_to_registry_tables
|
|
103
|
+
│ ├── 043_add_rls_placeholder # PostgreSQL row-level security
|
|
104
|
+
│ ├── 044_expand_memory_traces
|
|
105
|
+
│ ├── 045_add_memory_associations
|
|
106
|
+
│ ├── 046_add_metering_hourly_rollup
|
|
107
|
+
│ └── 047_apollo_knowledge_capture # identity cols, ops table, archive table, 25+ indexes
|
|
87
108
|
│
|
|
88
109
|
├── Model # Sequel model loader
|
|
89
110
|
│ └── Models/
|
|
@@ -248,7 +269,7 @@ Per-adapter credential defaults are defined in `Settings::CREDS`:
|
|
|
248
269
|
| `lib/legion/data.rb` | Module entry, setup/shutdown lifecycle |
|
|
249
270
|
| `lib/legion/data/connection.rb` | Sequel database connection (adapter selection) |
|
|
250
271
|
| `lib/legion/data/migration.rb` | Migration runner |
|
|
251
|
-
| `lib/legion/data/migrations/` |
|
|
272
|
+
| `lib/legion/data/migrations/` | 47 numbered migration files (Sequel DSL) |
|
|
252
273
|
| `lib/legion/data/model.rb` | Model autoloader |
|
|
253
274
|
| `lib/legion/data/local.rb` | Local SQLite module for agentic cognitive state |
|
|
254
275
|
| `lib/legion/data/models/` | Sequel models (Extension, Function, Runner, Node, Task, TaskLog, Setting, DigitalWorker, Relationship, ApolloEntry, ApolloRelation, ApolloExpertise, ApolloAccessLog, AuditLog, RbacRoleAssignment, RbacRunnerGrant, RbacCrossTeamGrant) |
|
|
@@ -261,6 +282,12 @@ Per-adapter credential defaults are defined in `Settings::CREDS`:
|
|
|
261
282
|
| `lib/legion/data/storage_tiers.rb` | Hot/warm/cold archival lifecycle: `archive_to_warm`, `export_to_cold`, `stats` |
|
|
262
283
|
| `lib/legion/data/archival.rb` | Archival module entry point and configuration |
|
|
263
284
|
| `lib/legion/data/archival/` | Archival strategy implementations |
|
|
285
|
+
| `lib/legion/data/extract.rb` | 10-handler text extraction registry (txt/md/csv/json/jsonl/html/xlsx/docx/pdf/pptx) |
|
|
286
|
+
| `lib/legion/data/extract/handlers/` | Per-format extraction handlers (base, csv, docx, html, json, jsonl, markdown, pdf, pptx, text, xlsx) |
|
|
287
|
+
| `lib/legion/data/extract/type_detector.rb` | MIME type detection for extract registry |
|
|
288
|
+
| `lib/legion/data/rls.rb` | PostgreSQL row-level security helpers (tenant isolation, session variable) |
|
|
289
|
+
| `lib/legion/data/partition_manager.rb` | Tenant partition management |
|
|
290
|
+
| `lib/legion/data/retention.rb` | Audit retention and archival lifecycle |
|
|
264
291
|
| `lib/legion/data/settings.rb` | Default configuration with per-adapter credential presets |
|
|
265
292
|
| `lib/legion/data/version.rb` | VERSION constant |
|
|
266
293
|
| `exe/legionio_migrate` | CLI executable for running database migrations standalone |
|
data/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# legion-data
|
|
2
2
|
|
|
3
|
-
Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Provides database connectivity via Sequel ORM, automatic schema migrations, and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships,
|
|
3
|
+
Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Provides database connectivity via Sequel ORM, automatic schema migrations (47 numbered migrations), and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), tenants, audit log, and archive tables.
|
|
4
4
|
|
|
5
|
-
**Version**: 1.
|
|
5
|
+
**Version**: 1.6.6
|
|
6
6
|
|
|
7
7
|
## Supported Databases
|
|
8
8
|
|
|
@@ -12,7 +12,7 @@ Persistent database storage for the [LegionIO](https://github.com/LegionIO/Legio
|
|
|
12
12
|
| MySQL | `mysql2` | `mysql2` | No |
|
|
13
13
|
| PostgreSQL | `postgres` | `pg` | No |
|
|
14
14
|
|
|
15
|
-
SQLite is the default adapter
|
|
15
|
+
SQLite is the default adapter. For MySQL or PostgreSQL, install the corresponding gem and set the adapter in your configuration.
|
|
16
16
|
|
|
17
17
|
## Installation
|
|
18
18
|
|
|
@@ -36,22 +36,24 @@ gem 'legion-data'
|
|
|
36
36
|
|-------|-------|-------------|
|
|
37
37
|
| `Extension` | `extensions` | Installed LEX extensions |
|
|
38
38
|
| `Function` | `functions` | Available functions per extension |
|
|
39
|
-
| `Runner` | `runners` | Runner definitions
|
|
39
|
+
| `Runner` | `runners` | Runner definitions |
|
|
40
40
|
| `Node` | `nodes` | Cluster node registry |
|
|
41
41
|
| `Task` | `tasks` | Task instances |
|
|
42
42
|
| `TaskLog` | `task_logs` | Task execution logs |
|
|
43
43
|
| `Setting` | `settings` | Persistent settings store |
|
|
44
|
-
| `DigitalWorker` | `digital_workers` | Digital worker registry
|
|
44
|
+
| `DigitalWorker` | `digital_workers` | Digital worker registry |
|
|
45
45
|
| `Relationship` | `relationships` | Task trigger/action relationships between functions |
|
|
46
|
-
| `
|
|
47
|
-
| `
|
|
48
|
-
| `
|
|
49
|
-
| `
|
|
46
|
+
| `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain |
|
|
47
|
+
| `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings |
|
|
48
|
+
| `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants |
|
|
49
|
+
| `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants |
|
|
50
|
+
| `ApolloEntry` | `apollo_entries` | Apollo knowledge entries — PostgreSQL only (pgvector) |
|
|
51
|
+
| `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only |
|
|
52
|
+
| `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only |
|
|
53
|
+
| `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only |
|
|
50
54
|
|
|
51
55
|
Apollo models require PostgreSQL with the `pgvector` extension. They are skipped silently on SQLite and MySQL.
|
|
52
56
|
|
|
53
|
-
Migration 026 adds `description` (TEXT) and `embedding` (TEXT, JSON-serialized vector) columns to the `functions` table, plus a `embedding_vector vector(1536)` column with HNSW cosine index on PostgreSQL for semantic similarity search of runner functions.
|
|
54
|
-
|
|
55
57
|
## Usage
|
|
56
58
|
|
|
57
59
|
```ruby
|
|
@@ -66,7 +68,7 @@ Legion::Data::Model::Extension.all # => Sequel::Dataset
|
|
|
66
68
|
|
|
67
69
|
### Local Database
|
|
68
70
|
|
|
69
|
-
|
|
71
|
+
`Legion::Data::Local` is a parallel SQLite database always stored locally on the node. Used for agentic cognitive state persistence (memory traces, trust scores, dream journals) and is independent of the shared database.
|
|
70
72
|
|
|
71
73
|
```ruby
|
|
72
74
|
# Local DB is set up automatically during Legion::Data.setup
|
|
@@ -81,7 +83,29 @@ Legion::Data::Local.connected? # => true
|
|
|
81
83
|
Legion::Data::Local.db_path # => "legionio_local.db"
|
|
82
84
|
```
|
|
83
85
|
|
|
84
|
-
|
|
86
|
+
Deleting `legionio_local.db` provides cryptographic erasure — no residual data.
|
|
87
|
+
|
|
88
|
+
### Text Extraction
|
|
89
|
+
|
|
90
|
+
`Legion::Data::Extract` provides a 10-handler registry for extracting text from documents. Supports: `.txt`, `.md`, `.csv`, `.json`, `.jsonl`, `.html`, `.xlsx`, `.docx`, `.pdf`, `.pptx`. Used by `lex-knowledge` for corpus ingestion.
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
text = Legion::Data::Extract.extract('/path/to/document.pdf')
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Row-Level Security
|
|
97
|
+
|
|
98
|
+
`Legion::Data::Rls` provides tenant isolation helpers for PostgreSQL (migration 043). Sets `app.current_tenant_id` session variable before queries and resets it after.
|
|
99
|
+
|
|
100
|
+
### Spool (Filesystem Buffer)
|
|
101
|
+
|
|
102
|
+
`Legion::Data::Spool` provides a filesystem-backed write buffer. When the database is unavailable, data is written to `~/.legionio/data/spool/` and replayed once the connection is restored.
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
spool = Legion::Data::Spool.for(Legion::Extensions::MyLex)
|
|
106
|
+
spool.write({ task_id: SecureRandom.uuid, data: payload })
|
|
107
|
+
spool.drain { |entry| process(entry) }
|
|
108
|
+
```
|
|
85
109
|
|
|
86
110
|
## Configuration
|
|
87
111
|
|
|
@@ -132,7 +156,7 @@ The local database file (`legionio_local.db` by default) can be deleted for cryp
|
|
|
132
156
|
}
|
|
133
157
|
```
|
|
134
158
|
|
|
135
|
-
PostgreSQL with `pgvector` is required for Apollo models
|
|
159
|
+
PostgreSQL with `pgvector` is required for Apollo models:
|
|
136
160
|
|
|
137
161
|
```sql
|
|
138
162
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
@@ -155,21 +179,9 @@ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
|
155
179
|
}
|
|
156
180
|
```
|
|
157
181
|
|
|
158
|
-
Set `enabled: false` to disable local SQLite entirely.
|
|
159
|
-
|
|
160
|
-
### Spool (Filesystem Buffer)
|
|
161
|
-
|
|
162
|
-
`Legion::Data::Spool` provides a filesystem-backed write buffer for extensions. When the database is unavailable, task data can be written to `~/.legionio/data/spool/` and replayed once the connection is restored.
|
|
163
|
-
|
|
164
|
-
```ruby
|
|
165
|
-
spool = Legion::Data::Spool.for(Legion::Extensions::MyLex)
|
|
166
|
-
spool.write({ task_id: SecureRandom.uuid, data: payload })
|
|
167
|
-
spool.drain { |entry| process(entry) }
|
|
168
|
-
```
|
|
169
|
-
|
|
170
182
|
### Dev Mode Fallback
|
|
171
183
|
|
|
172
|
-
When `dev_mode: true` and a network database
|
|
184
|
+
When `dev_mode: true` and a network database is unreachable, the shared connection falls back to SQLite automatically:
|
|
173
185
|
|
|
174
186
|
```json
|
|
175
187
|
{
|
|
@@ -182,7 +194,7 @@ When `dev_mode: true` and a network database (MySQL/PostgreSQL) is unreachable,
|
|
|
182
194
|
|
|
183
195
|
### HashiCorp Vault Integration
|
|
184
196
|
|
|
185
|
-
When Vault is connected
|
|
197
|
+
When Vault is connected, credentials are fetched dynamically from `database/creds/legion`, overriding any static `creds` configuration.
|
|
186
198
|
|
|
187
199
|
## Requirements
|
|
188
200
|
|
|
@@ -123,18 +123,7 @@ module Legion
|
|
|
123
123
|
end
|
|
124
124
|
end
|
|
125
125
|
Legion::Settings[:data][:connected] = true
|
|
126
|
-
if defined?(Legion::Logging)
|
|
127
|
-
if adapter == :sqlite
|
|
128
|
-
Legion::Logging.info "Connected to SQLite at #{sqlite_path}"
|
|
129
|
-
else
|
|
130
|
-
creds = Legion::Data::Settings.creds(adapter)
|
|
131
|
-
user = creds[:user] || creds[:username] || 'unknown'
|
|
132
|
-
host = creds[:host] || '127.0.0.1'
|
|
133
|
-
port = creds[:port]
|
|
134
|
-
db = creds[:database] || creds[:db]
|
|
135
|
-
Legion::Logging.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
|
|
136
|
-
end
|
|
137
|
-
end
|
|
126
|
+
log_connection_info if defined?(Legion::Logging)
|
|
138
127
|
configure_extensions
|
|
139
128
|
connect_with_replicas
|
|
140
129
|
end
|
|
@@ -273,6 +262,19 @@ module Legion
|
|
|
273
262
|
{}
|
|
274
263
|
end
|
|
275
264
|
|
|
265
|
+
def log_connection_info
|
|
266
|
+
if adapter == :sqlite
|
|
267
|
+
Legion::Logging.info "Connected to SQLite at #{sqlite_path}"
|
|
268
|
+
else
|
|
269
|
+
actual = Legion::Settings[:data][:creds] || {}
|
|
270
|
+
user = actual[:user] || actual[:username] || 'unknown'
|
|
271
|
+
host = actual[:host] || '127.0.0.1'
|
|
272
|
+
port = actual[:port]
|
|
273
|
+
db = actual[:database] || actual[:db]
|
|
274
|
+
Legion::Logging.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
276
278
|
def dev_fallback?
|
|
277
279
|
data_settings = Legion::Settings[:data]
|
|
278
280
|
data_settings[:dev_mode] == true && data_settings[:dev_fallback] != false
|
|
@@ -10,6 +10,13 @@ module Legion
|
|
|
10
10
|
Legion::Settings[:data][:migrations][:version] = Sequel::Migrator.run(connection, path, **)
|
|
11
11
|
Legion::Logging.info("Legion::Data::Migration ran successfully to version #{Legion::Settings[:data][:migrations][:version]}")
|
|
12
12
|
Legion::Settings[:data][:migrations][:ran] = true
|
|
13
|
+
rescue Sequel::DatabaseError => e
|
|
14
|
+
if e.message.include?('InsufficientPrivilege') || e.message.include?('permission denied')
|
|
15
|
+
raise Sequel::DatabaseError,
|
|
16
|
+
"#{e.message}\n Hint: the database user lacks CREATE on schema public " \
|
|
17
|
+
'(required for PG 15+). Grant via: GRANT CREATE ON SCHEMA public TO <user>;'
|
|
18
|
+
end
|
|
19
|
+
raise
|
|
13
20
|
end
|
|
14
21
|
end
|
|
15
22
|
end
|
data/lib/legion/data/version.rb
CHANGED