legion-data 1.6.7 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '086c96d0cbcc3b76593dd560ac03bd117030c9b4eba8a46267c06dea52bbdd65'
4
- data.tar.gz: 3fbea1b46dc309a31eaca4037943d247b9f8d2d5f8f7aa9bbaee496c3f4c445b
3
+ metadata.gz: 49c73e289f9b262a372502c338850d55671323b3c7911f5e4f06f9f27ec090de
4
+ data.tar.gz: cdccf57ec5b2810d5113f5e28cea5ba54856160fc0ac9c2d5258c0c30f64d8ab
5
5
  SHA512:
6
- metadata.gz: 4b90cc9cd4bf154bea5b38bfee693897c48dd03494617e129eae547772833cebf5283d7360c52712d2d234956033a825f490dc1a1543aa15aea2ccd859960784
7
- data.tar.gz: ab0f9fc400ec8a1a047da73e7128386f8b3fdc03ab751a4e0ec456515e48a50b1f36e9d18c0051839922c3ee6fdf05bf3553183dc282d9a6437ac2193001a481
6
+ metadata.gz: a65b9366d797d2e47d5846a428f7619314528f63265c2ca091043433fb52923cd0e2c53fc68552db569645fdd3c752831ae8a2c4141a5479abba44f1d4714355
7
+ data.tar.gz: fbb37ce91bc9555e2460b45382a005e2fda63cf2fedc2962aee6ae95108f272745c8f1b858ea6e569b9aa0b918054b4c0c508dcaa9dac0fb267fe9960bee8cb8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Legion::Data Changelog
2
2
 
3
+ ## [1.6.8] - 2026-03-27
4
+
5
+ ### Changed
6
+ - Documentation updates (CLAUDE.md, README.md)
7
+
3
8
  ## [1.6.7] - 2026-03-26
4
9
 
5
10
  ### Removed
data/CLAUDE.md CHANGED
@@ -8,7 +8,7 @@
8
8
  Manages persistent database storage for the LegionIO framework. Supports SQLite (default), MySQL, and PostgreSQL via Sequel ORM. Provides automatic schema migrations and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), tenants, webhooks, audit log, and archive tables. Also provides a parallel local SQLite database (`Legion::Data::Local`) for agentic cognitive state persistence.
9
9
 
10
10
  **GitHub**: https://github.com/LegionIO/legion-data
11
- **Version**: 1.6.0
11
+ **Version**: 1.6.6
12
12
  **License**: Apache-2.0
13
13
 
14
14
  ## Supported Databases
@@ -56,7 +56,7 @@ Legion::Data (singleton module)
56
56
  │ ├── .shutdown # Close local connection
57
57
  │ └── .reset! # Clear all state (testing)
58
58
 
59
- ├── Migration # Auto-migration system (26 migrations, Sequel DSL)
59
+ ├── Migration # Auto-migration system (47 migrations, Sequel DSL)
60
60
  │ └── migrations/
61
61
  │ ├── 001_add_schema_columns
62
62
  │ ├── 002_add_nodes
@@ -83,7 +83,28 @@ Legion::Data (singleton module)
83
83
  │ ├── 023_add_data_archive
84
84
  │ ├── 024_add_tenant_partition_columns
85
85
  │ ├── 025_add_tenants_table
86
- └── 026_add_function_embeddings # description + embedding (TEXT) on functions; postgres: embedding_vector vector(1536) with HNSW cosine index
86
+ ├── 026_add_function_embeddings # description + embedding (TEXT) on functions; postgres: embedding_vector vector(1536) with HNSW cosine index
87
+ │ ├── 027_add_apollo_source_provider
88
+ │ ├── 028_add_agent_cluster
89
+ │ ├── 029_add_agent_cluster_tasks
90
+ │ ├── 030_add_approval_queue
91
+ │ ├── 031_add_task_depth
92
+ │ ├── 032_add_task_cancelled_at
93
+ │ ├── 033_add_task_delay
94
+ │ ├── 034_add_archive_manifest
95
+ │ ├── 035_add_apollo_source_channel
96
+ │ ├── 036_add_audit_context_snapshot
97
+ │ ├── 037_add_apollo_knowledge_domain
98
+ │ ├── 038_add_conversations
99
+ │ ├── 039_add_audit_archive_manifest # 7-year tiered audit retention
100
+ │ ├── 040_add_slow_query_indexes # tasks table performance indexes
101
+ │ ├── 041_resize_vector_columns
102
+ │ ├── 042_add_tenant_to_registry_tables
103
+ │ ├── 043_add_rls_placeholder # PostgreSQL row-level security
104
+ │ ├── 044_expand_memory_traces
105
+ │ ├── 045_add_memory_associations
106
+ │ ├── 046_add_metering_hourly_rollup
107
+ │ └── 047_apollo_knowledge_capture # identity cols, ops table, archive table, 25+ indexes
87
108
 
88
109
  ├── Model # Sequel model loader
89
110
  │ └── Models/
@@ -248,7 +269,7 @@ Per-adapter credential defaults are defined in `Settings::CREDS`:
248
269
  | `lib/legion/data.rb` | Module entry, setup/shutdown lifecycle |
249
270
  | `lib/legion/data/connection.rb` | Sequel database connection (adapter selection) |
250
271
  | `lib/legion/data/migration.rb` | Migration runner |
251
- | `lib/legion/data/migrations/` | 26 numbered migration files (Sequel DSL) |
272
+ | `lib/legion/data/migrations/` | 47 numbered migration files (Sequel DSL) |
252
273
  | `lib/legion/data/model.rb` | Model autoloader |
253
274
  | `lib/legion/data/local.rb` | Local SQLite module for agentic cognitive state |
254
275
  | `lib/legion/data/models/` | Sequel models (Extension, Function, Runner, Node, Task, TaskLog, Setting, DigitalWorker, Relationship, ApolloEntry, ApolloRelation, ApolloExpertise, ApolloAccessLog, AuditLog, RbacRoleAssignment, RbacRunnerGrant, RbacCrossTeamGrant) |
@@ -261,6 +282,12 @@ Per-adapter credential defaults are defined in `Settings::CREDS`:
261
282
  | `lib/legion/data/storage_tiers.rb` | Hot/warm/cold archival lifecycle: `archive_to_warm`, `export_to_cold`, `stats` |
262
283
  | `lib/legion/data/archival.rb` | Archival module entry point and configuration |
263
284
  | `lib/legion/data/archival/` | Archival strategy implementations |
285
+ | `lib/legion/data/extract.rb` | 10-handler text extraction registry (txt/md/csv/json/jsonl/html/xlsx/docx/pdf/pptx) |
286
+ | `lib/legion/data/extract/handlers/` | Per-format extraction handlers (base, csv, docx, html, json, jsonl, markdown, pdf, pptx, text, xlsx) |
287
+ | `lib/legion/data/extract/type_detector.rb` | MIME type detection for extract registry |
288
+ | `lib/legion/data/rls.rb` | PostgreSQL row-level security helpers (tenant isolation, session variable) |
289
+ | `lib/legion/data/partition_manager.rb` | Tenant partition management |
290
+ | `lib/legion/data/retention.rb` | Audit retention and archival lifecycle |
264
291
  | `lib/legion/data/settings.rb` | Default configuration with per-adapter credential presets |
265
292
  | `lib/legion/data/version.rb` | VERSION constant |
266
293
  | `exe/legionio_migrate` | CLI executable for running database migrations standalone |
data/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # legion-data
2
2
 
3
- Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Provides database connectivity via Sequel ORM, automatic schema migrations, and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, and Apollo shared knowledge tables.
3
+ Persistent database storage for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Provides database connectivity via Sequel ORM, automatic schema migrations (47 numbered migrations), and data models for extensions, functions, runners, nodes, tasks, settings, digital workers, task relationships, Apollo shared knowledge tables (PostgreSQL only), tenants, audit log, and archive tables.
4
4
 
5
- **Version**: 1.4.12
5
+ **Version**: 1.6.6
6
6
 
7
7
  ## Supported Databases
8
8
 
@@ -12,7 +12,7 @@ Persistent database storage for the [LegionIO](https://github.com/LegionIO/Legio
12
12
  | MySQL | `mysql2` | `mysql2` | No |
13
13
  | PostgreSQL | `postgres` | `pg` | No |
14
14
 
15
- SQLite is the default adapter and requires no external database server. For MySQL or PostgreSQL, install the corresponding gem and set the adapter in your configuration.
15
+ SQLite is the default adapter. For MySQL or PostgreSQL, install the corresponding gem and set the adapter in your configuration.
16
16
 
17
17
  ## Installation
18
18
 
@@ -36,22 +36,24 @@ gem 'legion-data'
36
36
  |-------|-------|-------------|
37
37
  | `Extension` | `extensions` | Installed LEX extensions |
38
38
  | `Function` | `functions` | Available functions per extension |
39
- | `Runner` | `runners` | Runner definitions (extension + function bindings) |
39
+ | `Runner` | `runners` | Runner definitions |
40
40
  | `Node` | `nodes` | Cluster node registry |
41
41
  | `Task` | `tasks` | Task instances |
42
42
  | `TaskLog` | `task_logs` | Task execution logs |
43
43
  | `Setting` | `settings` | Persistent settings store |
44
- | `DigitalWorker` | `digital_workers` | Digital worker registry (AI-as-labor platform) |
44
+ | `DigitalWorker` | `digital_workers` | Digital worker registry |
45
45
  | `Relationship` | `relationships` | Task trigger/action relationships between functions |
46
- | `ApolloEntry` | `apollo_entries` | Apollo shared knowledge entries (PostgreSQL only) |
47
- | `ApolloRelation` | `apollo_relations` | Relations between Apollo knowledge entries (PostgreSQL only) |
48
- | `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise tracking (PostgreSQL only) |
49
- | `ApolloAccessLog` | `apollo_access_log` | Apollo entry access audit log (PostgreSQL only) |
46
+ | `AuditLog` | `audit_log` | Tamper-evident audit trail with hash chain |
47
+ | `RbacRoleAssignment` | `rbac_role_assignments` | RBAC principal -> role mappings |
48
+ | `RbacRunnerGrant` | `rbac_runner_grants` | Per-runner permission grants |
49
+ | `RbacCrossTeamGrant` | `rbac_cross_team_grants` | Cross-team access grants |
50
+ | `ApolloEntry` | `apollo_entries` | Apollo knowledge entries — PostgreSQL only (pgvector) |
51
+ | `ApolloRelation` | `apollo_relations` | Relations between Apollo entries — PostgreSQL only |
52
+ | `ApolloExpertise` | `apollo_expertise` | Per-agent domain expertise — PostgreSQL only |
53
+ | `ApolloAccessLog` | `apollo_access_log` | Apollo access audit log — PostgreSQL only |
50
54
 
51
55
  Apollo models require PostgreSQL with the `pgvector` extension. They are skipped silently on SQLite and MySQL.
52
56
 
53
- Migration 026 adds `description` (TEXT) and `embedding` (TEXT, JSON-serialized vector) columns to the `functions` table, plus a `embedding_vector vector(1536)` column with HNSW cosine index on PostgreSQL for semantic similarity search of runner functions.
54
-
55
57
  ## Usage
56
58
 
57
59
  ```ruby
@@ -66,7 +68,7 @@ Legion::Data::Model::Extension.all # => Sequel::Dataset
66
68
 
67
69
  ### Local Database
68
70
 
69
- v1.3.0 introduces `Legion::Data::Local`, a parallel SQLite database always stored locally on the node. It is used for agentic cognitive state persistence (memory traces, trust scores, dream journals, etc.) and is independent of the shared database.
71
+ `Legion::Data::Local` is a parallel SQLite database always stored locally on the node. Used for agentic cognitive state persistence (memory traces, trust scores, dream journals) and is independent of the shared database.
70
72
 
71
73
  ```ruby
72
74
  # Local DB is set up automatically during Legion::Data.setup
@@ -81,7 +83,29 @@ Legion::Data::Local.connected? # => true
81
83
  Legion::Data::Local.db_path # => "legionio_local.db"
82
84
  ```
83
85
 
84
- The local database file (`legionio_local.db` by default) can be deleted for cryptographic erasure — no residual data. This is used by `lex-privatecore`.
86
+ Deleting `legionio_local.db` provides cryptographic erasure — no residual data.
87
+
88
+ ### Text Extraction
89
+
90
+ `Legion::Data::Extract` provides a 10-handler registry for extracting text from documents. Supports: `.txt`, `.md`, `.csv`, `.json`, `.jsonl`, `.html`, `.xlsx`, `.docx`, `.pdf`, `.pptx`. Used by `lex-knowledge` for corpus ingestion.
91
+
92
+ ```ruby
93
+ text = Legion::Data::Extract.extract('/path/to/document.pdf')
94
+ ```
95
+
96
+ ### Row-Level Security
97
+
98
+ `Legion::Data::Rls` provides tenant isolation helpers for PostgreSQL (migration 043). Sets `app.current_tenant_id` session variable before queries and resets it after.
99
+
100
+ ### Spool (Filesystem Buffer)
101
+
102
+ `Legion::Data::Spool` provides a filesystem-backed write buffer. When the database is unavailable, data is written to `~/.legionio/data/spool/` and replayed once the connection is restored.
103
+
104
+ ```ruby
105
+ spool = Legion::Data::Spool.for(Legion::Extensions::MyLex)
106
+ spool.write({ task_id: SecureRandom.uuid, data: payload })
107
+ spool.drain { |entry| process(entry) }
108
+ ```
85
109
 
86
110
  ## Configuration
87
111
 
@@ -132,7 +156,7 @@ The local database file (`legionio_local.db` by default) can be deleted for cryp
132
156
  }
133
157
  ```
134
158
 
135
- PostgreSQL with `pgvector` is required for Apollo models. Install the extension in your database before running migrations:
159
+ PostgreSQL with `pgvector` is required for Apollo models:
136
160
 
137
161
  ```sql
138
162
  CREATE EXTENSION IF NOT EXISTS vector;
@@ -155,21 +179,9 @@ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
155
179
  }
156
180
  ```
157
181
 
158
- Set `enabled: false` to disable local SQLite entirely.
159
-
160
- ### Spool (Filesystem Buffer)
161
-
162
- `Legion::Data::Spool` provides a filesystem-backed write buffer for extensions. When the database is unavailable, task data can be written to `~/.legionio/data/spool/` and replayed once the connection is restored.
163
-
164
- ```ruby
165
- spool = Legion::Data::Spool.for(Legion::Extensions::MyLex)
166
- spool.write({ task_id: SecureRandom.uuid, data: payload })
167
- spool.drain { |entry| process(entry) }
168
- ```
169
-
170
182
  ### Dev Mode Fallback
171
183
 
172
- When `dev_mode: true` and a network database (MySQL/PostgreSQL) is unreachable, the shared connection falls back to SQLite automatically instead of raising.
184
+ When `dev_mode: true` and a network database is unreachable, the shared connection falls back to SQLite automatically:
173
185
 
174
186
  ```json
175
187
  {
@@ -182,7 +194,7 @@ When `dev_mode: true` and a network database (MySQL/PostgreSQL) is unreachable,
182
194
 
183
195
  ### HashiCorp Vault Integration
184
196
 
185
- When Vault is connected and a `database/creds/legion` secret path exists, credentials are fetched dynamically from Vault at connection time, overriding any static `creds` configuration.
197
+ When Vault is connected, credentials are fetched dynamically from `database/creds/legion`, overriding any static `creds` configuration.
186
198
 
187
199
  ## Requirements
188
200
 
@@ -123,18 +123,7 @@ module Legion
123
123
  end
124
124
  end
125
125
  Legion::Settings[:data][:connected] = true
126
- if defined?(Legion::Logging)
127
- if adapter == :sqlite
128
- Legion::Logging.info "Connected to SQLite at #{sqlite_path}"
129
- else
130
- creds = Legion::Data::Settings.creds(adapter)
131
- user = creds[:user] || creds[:username] || 'unknown'
132
- host = creds[:host] || '127.0.0.1'
133
- port = creds[:port]
134
- db = creds[:database] || creds[:db]
135
- Legion::Logging.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
136
- end
137
- end
126
+ log_connection_info if defined?(Legion::Logging)
138
127
  configure_extensions
139
128
  connect_with_replicas
140
129
  end
@@ -273,6 +262,19 @@ module Legion
273
262
  {}
274
263
  end
275
264
 
265
+ def log_connection_info
266
+ if adapter == :sqlite
267
+ Legion::Logging.info "Connected to SQLite at #{sqlite_path}"
268
+ else
269
+ actual = Legion::Settings[:data][:creds] || {}
270
+ user = actual[:user] || actual[:username] || 'unknown'
271
+ host = actual[:host] || '127.0.0.1'
272
+ port = actual[:port]
273
+ db = actual[:database] || actual[:db]
274
+ Legion::Logging.info "Connected to #{adapter}://#{user}@#{host}:#{port}/#{db}"
275
+ end
276
+ end
277
+
276
278
  def dev_fallback?
277
279
  data_settings = Legion::Settings[:data]
278
280
  data_settings[:dev_mode] == true && data_settings[:dev_fallback] != false
@@ -10,6 +10,13 @@ module Legion
10
10
  Legion::Settings[:data][:migrations][:version] = Sequel::Migrator.run(connection, path, **)
11
11
  Legion::Logging.info("Legion::Data::Migration ran successfully to version #{Legion::Settings[:data][:migrations][:version]}")
12
12
  Legion::Settings[:data][:migrations][:ran] = true
13
+ rescue Sequel::DatabaseError => e
14
+ if e.message.include?('InsufficientPrivilege') || e.message.include?('permission denied')
15
+ raise Sequel::DatabaseError,
16
+ "#{e.message}\n Hint: the database user lacks CREATE on schema public " \
17
+ '(required for PG 15+). Grant via: GRANT CREATE ON SCHEMA public TO <user>;'
18
+ end
19
+ raise
13
20
  end
14
21
  end
15
22
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module Data
5
- VERSION = '1.6.7'
5
+ VERSION = '1.6.8'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.7
4
+ version: 1.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity