htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
data/README.md
CHANGED
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
|
|
28
28
|
- **Client-Side Embeddings**
|
|
29
29
|
- Automatic embedding generation before database insertion
|
|
30
|
-
-
|
|
30
|
+
- Uses the [ruby_llm](https://ruby_llm.com) gem for LLM access
|
|
31
31
|
- Configurable embedding providers and models
|
|
32
32
|
|
|
33
33
|
- **Two-Tier Memory Architecture**
|
|
@@ -62,6 +62,12 @@
|
|
|
62
62
|
- Tag-based categorization
|
|
63
63
|
- Hierarchical tag structures
|
|
64
64
|
|
|
65
|
+
- **File Loading**
|
|
66
|
+
- Load markdown files into long-term memory
|
|
67
|
+
- Automatic paragraph-based chunking
|
|
68
|
+
- Source file tracking with re-sync support
|
|
69
|
+
- YAML frontmatter extraction as metadata
|
|
70
|
+
|
|
65
71
|
## Installation
|
|
66
72
|
|
|
67
73
|
Add this line to your application's Gemfile:
|
|
@@ -264,6 +270,42 @@ memories = htm.recall(
|
|
|
264
270
|
htm.forget(node_id, confirm: :confirmed)
|
|
265
271
|
```
|
|
266
272
|
|
|
273
|
+
### Loading Files
|
|
274
|
+
|
|
275
|
+
HTM can load text-based files (currently markdown) into long-term memory with automatic chunking and source tracking.
|
|
276
|
+
|
|
277
|
+
```ruby
|
|
278
|
+
htm = HTM.new(robot_name: "Document Loader")
|
|
279
|
+
|
|
280
|
+
# Load a single markdown file
|
|
281
|
+
result = htm.load_file("docs/guide.md")
|
|
282
|
+
# => { file_source_id: 1, chunks_created: 5, chunks_updated: 0, chunks_deleted: 0 }
|
|
283
|
+
|
|
284
|
+
# Load all markdown files in a directory
|
|
285
|
+
results = htm.load_directory("docs/", pattern: "**/*.md")
|
|
286
|
+
|
|
287
|
+
# Get nodes from a specific file
|
|
288
|
+
nodes = htm.nodes_from_file("docs/guide.md")
|
|
289
|
+
|
|
290
|
+
# Unload a file (soft deletes chunks)
|
|
291
|
+
htm.unload_file("docs/guide.md")
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
**Features:**
|
|
295
|
+
- **Paragraph chunking**: Text split by blank lines, code blocks preserved
|
|
296
|
+
- **Source tracking**: Files tracked with mtime for automatic re-sync
|
|
297
|
+
- **YAML frontmatter**: Extracted and stored as metadata
|
|
298
|
+
- **Duplicate detection**: Content hash prevents duplicate nodes
|
|
299
|
+
|
|
300
|
+
**Rake tasks:**
|
|
301
|
+
```bash
|
|
302
|
+
rake 'htm:files:load[docs/guide.md]' # Load a single file
|
|
303
|
+
rake 'htm:files:load_dir[docs/]' # Load all markdown files from directory
|
|
304
|
+
rake htm:files:list # List all loaded file sources
|
|
305
|
+
rake htm:files:sync # Sync all files (reload changed)
|
|
306
|
+
rake htm:files:stats # Show file loading statistics
|
|
307
|
+
```
|
|
308
|
+
|
|
267
309
|
### Automatic Tag Extraction
|
|
268
310
|
|
|
269
311
|
HTM automatically extracts hierarchical tags from content using LLM analysis. Tags are inferred from the content itself - you never specify them manually.
|
|
@@ -518,13 +560,14 @@ HTM provides a minimal, focused API with only 3 core instance methods for memory
|
|
|
518
560
|
|
|
519
561
|
### Core Memory Operations
|
|
520
562
|
|
|
521
|
-
#### `remember(content, source: "")`
|
|
563
|
+
#### `remember(content, source: "", metadata: {})`
|
|
522
564
|
|
|
523
565
|
Store information in memory. Embeddings and tags are automatically generated asynchronously.
|
|
524
566
|
|
|
525
567
|
**Parameters:**
|
|
526
568
|
- `content` (String, required) - The information to remember. Converted to string if nil. Returns ID of last node if empty.
|
|
527
569
|
- `source` (String, optional) - Where the content came from (e.g., "user", "assistant", "system"). Defaults to empty string.
|
|
570
|
+
- `metadata` (Hash, optional) - Arbitrary key-value metadata stored as JSONB. Keys must be strings or symbols. Defaults to `{}`.
|
|
528
571
|
|
|
529
572
|
**Returns:** Integer - The node ID of the stored memory
|
|
530
573
|
|
|
@@ -536,6 +579,12 @@ node_id = htm.remember("PostgreSQL is excellent for vector search with pgvector"
|
|
|
536
579
|
# Store without source (uses default empty string)
|
|
537
580
|
node_id = htm.remember("HTM uses two-tier memory architecture")
|
|
538
581
|
|
|
582
|
+
# Store with metadata
|
|
583
|
+
node_id = htm.remember(
|
|
584
|
+
"User prefers dark mode",
|
|
585
|
+
metadata: { category: "preference", priority: "high", version: 2 }
|
|
586
|
+
)
|
|
587
|
+
|
|
539
588
|
# Nil/empty handling
|
|
540
589
|
node_id = htm.remember(nil) # Returns ID of last node without creating duplicate
|
|
541
590
|
node_id = htm.remember("") # Returns ID of last node without creating duplicate
|
|
@@ -543,7 +592,7 @@ node_id = htm.remember("") # Returns ID of last node without creating duplicat
|
|
|
543
592
|
|
|
544
593
|
---
|
|
545
594
|
|
|
546
|
-
#### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [])`
|
|
595
|
+
#### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], metadata: {})`
|
|
547
596
|
|
|
548
597
|
Retrieve memories using temporal filtering and semantic/keyword search.
|
|
549
598
|
|
|
@@ -559,8 +608,9 @@ Retrieve memories using temporal filtering and semantic/keyword search.
|
|
|
559
608
|
- `:hybrid` - Weighted combination (70% vector, 30% full-text)
|
|
560
609
|
- `with_relevance` (Boolean, optional) - Include dynamic relevance scores. Default: false
|
|
561
610
|
- `query_tags` (Array<String>, optional) - Filter results by tags. Default: []
|
|
611
|
+
- `metadata` (Hash, optional) - Filter results by metadata using JSONB containment (`@>`). Default: `{}`
|
|
562
612
|
|
|
563
|
-
**Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, (optionally `relevance`)
|
|
613
|
+
**Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, `metadata`, (optionally `relevance`)
|
|
564
614
|
|
|
565
615
|
**Example:**
|
|
566
616
|
```ruby
|
|
@@ -586,6 +636,21 @@ memories = htm.recall(
|
|
|
586
636
|
query_tags: ["architecture"]
|
|
587
637
|
)
|
|
588
638
|
# => [{ "id" => 123, "content" => "...", "relevance" => 0.92, ... }, ...]
|
|
639
|
+
|
|
640
|
+
# Filter by metadata
|
|
641
|
+
memories = htm.recall(
|
|
642
|
+
"user preferences",
|
|
643
|
+
metadata: { category: "preference" }
|
|
644
|
+
)
|
|
645
|
+
# => Returns only nodes with metadata containing { category: "preference" }
|
|
646
|
+
|
|
647
|
+
# Combine metadata with other filters
|
|
648
|
+
memories = htm.recall(
|
|
649
|
+
"settings",
|
|
650
|
+
timeframe: "last month",
|
|
651
|
+
strategy: :hybrid,
|
|
652
|
+
metadata: { priority: "high", version: 2 }
|
|
653
|
+
)
|
|
589
654
|
```
|
|
590
655
|
|
|
591
656
|
---
|
|
@@ -1312,10 +1377,16 @@ See [htm_teamwork.md](htm_teamwork.md) for detailed design documentation and pla
|
|
|
1312
1377
|
### Database Schema
|
|
1313
1378
|
|
|
1314
1379
|
- `robots`: Robot registry for all LLM agents using HTM
|
|
1315
|
-
- `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), metadata
|
|
1380
|
+
- `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), JSONB metadata
|
|
1316
1381
|
- `tags`: Hierarchical tag ontology (format: `root:level1:level2:level3`)
|
|
1317
1382
|
- `node_tags`: Join table implementing many-to-many relationship between nodes and tags
|
|
1318
1383
|
|
|
1384
|
+
**Nodes Table Key Columns:**
|
|
1385
|
+
- `content`: The memory content
|
|
1386
|
+
- `embedding`: Vector embedding for semantic search (up to 2000 dimensions)
|
|
1387
|
+
- `metadata`: JSONB column for arbitrary key-value data (filterable via `@>` containment operator)
|
|
1388
|
+
- `content_hash`: SHA-256 hash for deduplication
|
|
1389
|
+
|
|
1319
1390
|
### Service Architecture
|
|
1320
1391
|
|
|
1321
1392
|
HTM uses a layered architecture for LLM integration:
|
data/Rakefile
CHANGED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateRobots < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
|
|
6
|
+
t.text :name, comment: 'Human-readable name for the robot'
|
|
7
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
|
|
8
|
+
t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateFileSources < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :file_sources, comment: 'Source file metadata for loaded documents' do |t|
|
|
6
|
+
t.text :file_path, null: false, comment: 'Absolute path to source file'
|
|
7
|
+
t.string :file_hash, limit: 64, comment: 'SHA-256 hash of file content'
|
|
8
|
+
t.timestamptz :mtime, comment: 'File modification time'
|
|
9
|
+
t.integer :file_size, comment: 'File size in bytes'
|
|
10
|
+
t.jsonb :frontmatter, default: {}, comment: 'Parsed YAML frontmatter'
|
|
11
|
+
t.timestamptz :last_synced_at, comment: 'When file was last synced to HTM'
|
|
12
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
|
|
13
|
+
t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
add_index :file_sources, :file_path, unique: true, name: 'idx_file_sources_path_unique'
|
|
17
|
+
add_index :file_sources, :file_hash, name: 'idx_file_sources_hash'
|
|
18
|
+
add_index :file_sources, :last_synced_at, name: 'idx_file_sources_last_synced'
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateNodes < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
|
|
6
|
+
t.text :content, null: false, comment: 'The conversation message/utterance content'
|
|
7
|
+
t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
|
|
8
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
|
|
9
|
+
t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
|
|
10
|
+
t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
|
|
11
|
+
t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
|
|
12
|
+
t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
|
|
13
|
+
t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
|
|
14
|
+
t.string :content_hash, limit: 64, comment: 'SHA-256 hash of content for deduplication'
|
|
15
|
+
t.timestamptz :deleted_at, comment: 'Soft delete timestamp - node is considered deleted when set'
|
|
16
|
+
t.bigint :source_id, comment: 'Reference to source file (for file-loaded nodes)'
|
|
17
|
+
t.integer :chunk_position, comment: 'Position within source file (0-indexed)'
|
|
18
|
+
t.jsonb :metadata, default: {}, null: false, comment: 'Flexible metadata storage (memory_type, importance, source, etc.)'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Basic indexes for common queries
|
|
22
|
+
add_index :nodes, :created_at, name: 'idx_nodes_created_at'
|
|
23
|
+
add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
|
|
24
|
+
add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
|
|
25
|
+
add_index :nodes, :access_count, name: 'idx_nodes_access_count'
|
|
26
|
+
add_index :nodes, :content_hash, unique: true, name: 'idx_nodes_content_hash_unique'
|
|
27
|
+
add_index :nodes, :deleted_at, name: 'idx_nodes_deleted_at'
|
|
28
|
+
add_index :nodes, :source_id, name: 'idx_nodes_source_id'
|
|
29
|
+
add_index :nodes, [:source_id, :chunk_position], name: 'idx_nodes_source_chunk_position'
|
|
30
|
+
|
|
31
|
+
# Partial index for efficiently querying non-deleted nodes
|
|
32
|
+
add_index :nodes, :created_at, name: 'idx_nodes_not_deleted_created_at', where: 'deleted_at IS NULL'
|
|
33
|
+
|
|
34
|
+
# GIN index for JSONB metadata queries
|
|
35
|
+
add_index :nodes, :metadata, using: :gin, name: 'idx_nodes_metadata'
|
|
36
|
+
|
|
37
|
+
# Vector similarity search index (HNSW for better performance)
|
|
38
|
+
execute <<-SQL
|
|
39
|
+
CREATE INDEX idx_nodes_embedding ON nodes
|
|
40
|
+
USING hnsw (embedding vector_cosine_ops)
|
|
41
|
+
WITH (m = 16, ef_construction = 64)
|
|
42
|
+
SQL
|
|
43
|
+
|
|
44
|
+
# Full-text search on conversation content
|
|
45
|
+
execute <<-SQL
|
|
46
|
+
CREATE INDEX idx_nodes_content_gin ON nodes
|
|
47
|
+
USING gin(to_tsvector('english', content))
|
|
48
|
+
SQL
|
|
49
|
+
|
|
50
|
+
# Trigram indexes for fuzzy matching on conversation content
|
|
51
|
+
execute <<-SQL
|
|
52
|
+
CREATE INDEX idx_nodes_content_trgm ON nodes
|
|
53
|
+
USING gin(content gin_trgm_ops)
|
|
54
|
+
SQL
|
|
55
|
+
|
|
56
|
+
# Check constraint for embedding dimensions
|
|
57
|
+
execute <<-SQL
|
|
58
|
+
ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
|
|
59
|
+
CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
|
|
60
|
+
SQL
|
|
61
|
+
|
|
62
|
+
# Foreign key to file_sources table
|
|
63
|
+
add_foreign_key :nodes, :file_sources, column: :source_id, on_delete: :nullify
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateTags < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :tags, comment: 'Unique tag names for categorization' do |t|
|
|
6
|
+
t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
|
|
7
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
|
|
11
|
+
add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateNodeTags < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
|
|
6
|
+
t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
|
|
7
|
+
t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
|
|
8
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
|
|
12
|
+
add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
|
|
13
|
+
add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
|
|
14
|
+
|
|
15
|
+
add_foreign_key :node_tags, :nodes, column: :node_id, on_delete: :cascade
|
|
16
|
+
add_foreign_key :node_tags, :tags, column: :tag_id, on_delete: :cascade
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateRobotNodes < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :robot_nodes, comment: 'Join table connecting robots to nodes (many-to-many)' do |t|
|
|
6
|
+
t.bigint :robot_id, null: false, comment: 'ID of the robot that remembered this node'
|
|
7
|
+
t.bigint :node_id, null: false, comment: 'ID of the node being remembered'
|
|
8
|
+
t.timestamptz :first_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
|
|
9
|
+
comment: 'When this robot first remembered this content'
|
|
10
|
+
t.timestamptz :last_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
|
|
11
|
+
comment: 'When this robot last tried to remember this content'
|
|
12
|
+
t.integer :remember_count, default: 1, null: false,
|
|
13
|
+
comment: 'Number of times this robot has tried to remember this content'
|
|
14
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
|
|
15
|
+
t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
add_index :robot_nodes, [:robot_id, :node_id], unique: true, name: 'idx_robot_nodes_unique'
|
|
19
|
+
add_index :robot_nodes, :robot_id, name: 'idx_robot_nodes_robot_id'
|
|
20
|
+
add_index :robot_nodes, :node_id, name: 'idx_robot_nodes_node_id'
|
|
21
|
+
add_index :robot_nodes, :last_remembered_at, name: 'idx_robot_nodes_last_remembered_at'
|
|
22
|
+
|
|
23
|
+
add_foreign_key :robot_nodes, :robots, column: :robot_id, on_delete: :cascade
|
|
24
|
+
add_foreign_key :robot_nodes, :nodes, column: :node_id, on_delete: :cascade
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddWorkingMemoryToRobotNodes < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
add_column :robot_nodes, :working_memory, :boolean, default: false, null: false,
|
|
6
|
+
comment: 'True if this node is currently in the robot working memory'
|
|
7
|
+
|
|
8
|
+
add_index :robot_nodes, [:robot_id, :working_memory],
|
|
9
|
+
where: 'working_memory = true',
|
|
10
|
+
name: 'idx_robot_nodes_working_memory'
|
|
11
|
+
end
|
|
12
|
+
end
|
data/db/schema.sql
CHANGED
|
@@ -23,6 +23,81 @@ CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
|
|
|
23
23
|
-- Name: EXTENSION vector; Type: COMMENT; Schema: -; Owner: -
|
|
24
24
|
--
|
|
25
25
|
|
|
26
|
+
--
|
|
27
|
+
-- Name: file_sources; Type: TABLE; Schema: public; Owner: -
|
|
28
|
+
--
|
|
29
|
+
|
|
30
|
+
CREATE TABLE public.file_sources (
|
|
31
|
+
id bigint NOT NULL,
|
|
32
|
+
file_path text NOT NULL,
|
|
33
|
+
file_hash character varying(64),
|
|
34
|
+
mtime timestamp with time zone,
|
|
35
|
+
file_size integer,
|
|
36
|
+
frontmatter jsonb DEFAULT '{}'::jsonb,
|
|
37
|
+
last_synced_at timestamp with time zone,
|
|
38
|
+
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
|
|
39
|
+
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
--
|
|
43
|
+
-- Name: TABLE file_sources; Type: COMMENT; Schema: public; Owner: -
|
|
44
|
+
--
|
|
45
|
+
|
|
46
|
+
COMMENT ON TABLE public.file_sources IS 'Source file metadata for loaded documents';
|
|
47
|
+
|
|
48
|
+
--
|
|
49
|
+
-- Name: COLUMN file_sources.file_path; Type: COMMENT; Schema: public; Owner: -
|
|
50
|
+
--
|
|
51
|
+
|
|
52
|
+
COMMENT ON COLUMN public.file_sources.file_path IS 'Absolute path to source file';
|
|
53
|
+
|
|
54
|
+
--
|
|
55
|
+
-- Name: COLUMN file_sources.file_hash; Type: COMMENT; Schema: public; Owner: -
|
|
56
|
+
--
|
|
57
|
+
|
|
58
|
+
COMMENT ON COLUMN public.file_sources.file_hash IS 'SHA-256 hash of file content';
|
|
59
|
+
|
|
60
|
+
--
|
|
61
|
+
-- Name: COLUMN file_sources.mtime; Type: COMMENT; Schema: public; Owner: -
|
|
62
|
+
--
|
|
63
|
+
|
|
64
|
+
COMMENT ON COLUMN public.file_sources.mtime IS 'File modification time';
|
|
65
|
+
|
|
66
|
+
--
|
|
67
|
+
-- Name: COLUMN file_sources.file_size; Type: COMMENT; Schema: public; Owner: -
|
|
68
|
+
--
|
|
69
|
+
|
|
70
|
+
COMMENT ON COLUMN public.file_sources.file_size IS 'File size in bytes';
|
|
71
|
+
|
|
72
|
+
--
|
|
73
|
+
-- Name: COLUMN file_sources.frontmatter; Type: COMMENT; Schema: public; Owner: -
|
|
74
|
+
--
|
|
75
|
+
|
|
76
|
+
COMMENT ON COLUMN public.file_sources.frontmatter IS 'Parsed YAML frontmatter';
|
|
77
|
+
|
|
78
|
+
--
|
|
79
|
+
-- Name: COLUMN file_sources.last_synced_at; Type: COMMENT; Schema: public; Owner: -
|
|
80
|
+
--
|
|
81
|
+
|
|
82
|
+
COMMENT ON COLUMN public.file_sources.last_synced_at IS 'When file was last synced to HTM';
|
|
83
|
+
|
|
84
|
+
--
|
|
85
|
+
-- Name: file_sources_id_seq; Type: SEQUENCE; Schema: public; Owner: -
|
|
86
|
+
--
|
|
87
|
+
|
|
88
|
+
CREATE SEQUENCE public.file_sources_id_seq
|
|
89
|
+
START WITH 1
|
|
90
|
+
INCREMENT BY 1
|
|
91
|
+
NO MINVALUE
|
|
92
|
+
NO MAXVALUE
|
|
93
|
+
CACHE 1;
|
|
94
|
+
|
|
95
|
+
--
|
|
96
|
+
-- Name: file_sources_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
|
|
97
|
+
--
|
|
98
|
+
|
|
99
|
+
ALTER SEQUENCE public.file_sources_id_seq OWNED BY public.file_sources.id;
|
|
100
|
+
|
|
26
101
|
--
|
|
27
102
|
-- Name: node_tags; Type: TABLE; Schema: public; Owner: -
|
|
28
103
|
--
|
|
@@ -90,6 +165,10 @@ CREATE TABLE public.nodes (
|
|
|
90
165
|
embedding public.vector(2000),
|
|
91
166
|
embedding_dimension integer,
|
|
92
167
|
content_hash character varying(64),
|
|
168
|
+
deleted_at timestamp with time zone,
|
|
169
|
+
source_id bigint,
|
|
170
|
+
chunk_position integer,
|
|
171
|
+
metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
|
|
93
172
|
CONSTRAINT check_embedding_dimension CHECK (((embedding_dimension IS NULL) OR ((embedding_dimension > 0) AND (embedding_dimension <= 2000))))
|
|
94
173
|
);
|
|
95
174
|
|
|
@@ -153,6 +232,30 @@ COMMENT ON COLUMN public.nodes.embedding_dimension IS 'Actual number of dimensio
|
|
|
153
232
|
|
|
154
233
|
COMMENT ON COLUMN public.nodes.content_hash IS 'SHA-256 hash of content for deduplication';
|
|
155
234
|
|
|
235
|
+
--
|
|
236
|
+
-- Name: COLUMN nodes.deleted_at; Type: COMMENT; Schema: public; Owner: -
|
|
237
|
+
--
|
|
238
|
+
|
|
239
|
+
COMMENT ON COLUMN public.nodes.deleted_at IS 'Soft delete timestamp - node is considered deleted when set';
|
|
240
|
+
|
|
241
|
+
--
|
|
242
|
+
-- Name: COLUMN nodes.source_id; Type: COMMENT; Schema: public; Owner: -
|
|
243
|
+
--
|
|
244
|
+
|
|
245
|
+
COMMENT ON COLUMN public.nodes.source_id IS 'Reference to source file (for file-loaded nodes)';
|
|
246
|
+
|
|
247
|
+
--
|
|
248
|
+
-- Name: COLUMN nodes.chunk_position; Type: COMMENT; Schema: public; Owner: -
|
|
249
|
+
--
|
|
250
|
+
|
|
251
|
+
COMMENT ON COLUMN public.nodes.chunk_position IS 'Position within source file (0-indexed)';
|
|
252
|
+
|
|
253
|
+
--
|
|
254
|
+
-- Name: COLUMN nodes.metadata; Type: COMMENT; Schema: public; Owner: -
|
|
255
|
+
--
|
|
256
|
+
|
|
257
|
+
COMMENT ON COLUMN public.nodes.metadata IS 'Flexible metadata storage (memory_type, importance, source, etc.)';
|
|
258
|
+
|
|
156
259
|
--
|
|
157
260
|
-- Name: nodes_id_seq; Type: SEQUENCE; Schema: public; Owner: -
|
|
158
261
|
--
|
|
@@ -402,6 +505,12 @@ CREATE SEQUENCE public.working_memories_id_seq
|
|
|
402
505
|
|
|
403
506
|
ALTER SEQUENCE public.working_memories_id_seq OWNED BY public.working_memories.id;
|
|
404
507
|
|
|
508
|
+
--
|
|
509
|
+
-- Name: file_sources id; Type: DEFAULT; Schema: public; Owner: -
|
|
510
|
+
--
|
|
511
|
+
|
|
512
|
+
ALTER TABLE ONLY public.file_sources ALTER COLUMN id SET DEFAULT nextval('public.file_sources_id_seq'::regclass);
|
|
513
|
+
|
|
405
514
|
--
|
|
406
515
|
-- Name: node_tags id; Type: DEFAULT; Schema: public; Owner: -
|
|
407
516
|
--
|
|
@@ -438,6 +547,13 @@ ALTER TABLE ONLY public.tags ALTER COLUMN id SET DEFAULT nextval('public.tags_id
|
|
|
438
547
|
|
|
439
548
|
ALTER TABLE ONLY public.working_memories ALTER COLUMN id SET DEFAULT nextval('public.working_memories_id_seq'::regclass);
|
|
440
549
|
|
|
550
|
+
--
|
|
551
|
+
-- Name: file_sources file_sources_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
|
552
|
+
--
|
|
553
|
+
|
|
554
|
+
ALTER TABLE ONLY public.file_sources
|
|
555
|
+
ADD CONSTRAINT file_sources_pkey PRIMARY KEY (id);
|
|
556
|
+
|
|
441
557
|
--
|
|
442
558
|
-- Name: node_tags node_tags_pkey; Type: CONSTRAINT; Schema: public; Owner: -
|
|
443
559
|
--
|
|
@@ -487,6 +603,24 @@ ALTER TABLE ONLY public.tags
|
|
|
487
603
|
ALTER TABLE ONLY public.working_memories
|
|
488
604
|
ADD CONSTRAINT working_memories_pkey PRIMARY KEY (id);
|
|
489
605
|
|
|
606
|
+
--
|
|
607
|
+
-- Name: idx_file_sources_hash; Type: INDEX; Schema: public; Owner: -
|
|
608
|
+
--
|
|
609
|
+
|
|
610
|
+
CREATE INDEX idx_file_sources_hash ON public.file_sources USING btree (file_hash);
|
|
611
|
+
|
|
612
|
+
--
|
|
613
|
+
-- Name: idx_file_sources_last_synced; Type: INDEX; Schema: public; Owner: -
|
|
614
|
+
--
|
|
615
|
+
|
|
616
|
+
CREATE INDEX idx_file_sources_last_synced ON public.file_sources USING btree (last_synced_at);
|
|
617
|
+
|
|
618
|
+
--
|
|
619
|
+
-- Name: idx_file_sources_path_unique; Type: INDEX; Schema: public; Owner: -
|
|
620
|
+
--
|
|
621
|
+
|
|
622
|
+
CREATE UNIQUE INDEX idx_file_sources_path_unique ON public.file_sources USING btree (file_path);
|
|
623
|
+
|
|
490
624
|
--
|
|
491
625
|
-- Name: idx_node_tags_node_id; Type: INDEX; Schema: public; Owner: -
|
|
492
626
|
--
|
|
@@ -535,6 +669,12 @@ CREATE INDEX idx_nodes_content_trgm ON public.nodes USING gin (content public.gi
|
|
|
535
669
|
|
|
536
670
|
CREATE INDEX idx_nodes_created_at ON public.nodes USING btree (created_at);
|
|
537
671
|
|
|
672
|
+
--
|
|
673
|
+
-- Name: idx_nodes_deleted_at; Type: INDEX; Schema: public; Owner: -
|
|
674
|
+
--
|
|
675
|
+
|
|
676
|
+
CREATE INDEX idx_nodes_deleted_at ON public.nodes USING btree (deleted_at);
|
|
677
|
+
|
|
538
678
|
--
|
|
539
679
|
-- Name: idx_nodes_embedding; Type: INDEX; Schema: public; Owner: -
|
|
540
680
|
--
|
|
@@ -547,6 +687,30 @@ CREATE INDEX idx_nodes_embedding ON public.nodes USING hnsw (embedding public.ve
|
|
|
547
687
|
|
|
548
688
|
CREATE INDEX idx_nodes_last_accessed ON public.nodes USING btree (last_accessed);
|
|
549
689
|
|
|
690
|
+
--
|
|
691
|
+
-- Name: idx_nodes_metadata; Type: INDEX; Schema: public; Owner: -
|
|
692
|
+
--
|
|
693
|
+
|
|
694
|
+
CREATE INDEX idx_nodes_metadata ON public.nodes USING gin (metadata);
|
|
695
|
+
|
|
696
|
+
--
|
|
697
|
+
-- Name: idx_nodes_not_deleted_created_at; Type: INDEX; Schema: public; Owner: -
|
|
698
|
+
--
|
|
699
|
+
|
|
700
|
+
CREATE INDEX idx_nodes_not_deleted_created_at ON public.nodes USING btree (created_at) WHERE (deleted_at IS NULL);
|
|
701
|
+
|
|
702
|
+
--
|
|
703
|
+
-- Name: idx_nodes_source_chunk_position; Type: INDEX; Schema: public; Owner: -
|
|
704
|
+
--
|
|
705
|
+
|
|
706
|
+
CREATE INDEX idx_nodes_source_chunk_position ON public.nodes USING btree (source_id, chunk_position);
|
|
707
|
+
|
|
708
|
+
--
|
|
709
|
+
-- Name: idx_nodes_source_id; Type: INDEX; Schema: public; Owner: -
|
|
710
|
+
--
|
|
711
|
+
|
|
712
|
+
CREATE INDEX idx_nodes_source_id ON public.nodes USING btree (source_id);
|
|
713
|
+
|
|
550
714
|
--
|
|
551
715
|
-- Name: idx_nodes_updated_at; Type: INDEX; Schema: public; Owner: -
|
|
552
716
|
--
|
|
@@ -621,6 +785,13 @@ ALTER TABLE ONLY public.working_memories
|
|
|
621
785
|
ALTER TABLE ONLY public.working_memories
|
|
622
786
|
ADD CONSTRAINT fk_rails_4b7c3eb07b FOREIGN KEY (robot_id) REFERENCES public.robots(id) ON DELETE CASCADE;
|
|
623
787
|
|
|
788
|
+
--
|
|
789
|
+
-- Name: nodes fk_rails_920ad16d08; Type: FK CONSTRAINT; Schema: public; Owner: -
|
|
790
|
+
--
|
|
791
|
+
|
|
792
|
+
ALTER TABLE ONLY public.nodes
|
|
793
|
+
ADD CONSTRAINT fk_rails_920ad16d08 FOREIGN KEY (source_id) REFERENCES public.file_sources(id) ON DELETE SET NULL;
|
|
794
|
+
|
|
624
795
|
--
|
|
625
796
|
-- Name: robot_nodes fk_rails_9b003078a8; Type: FK CONSTRAINT; Schema: public; Owner: -
|
|
626
797
|
--
|
|
@@ -653,4 +824,4 @@ ALTER TABLE ONLY public.robot_nodes
|
|
|
653
824
|
-- PostgreSQL database dump complete
|
|
654
825
|
--
|
|
655
826
|
|
|
656
|
-
\unrestrict
|
|
827
|
+
\unrestrict DUrF24Zrve4qSBwlDrJ4qAzzZhvhX5s2S57oHYVJ0ZPbaDC4ItMZ29Pv9oI3Q9d
|
data/docs/api/database.md
CHANGED
|
@@ -276,9 +276,8 @@ For detailed database schema documentation, see:
|
|
|
276
276
|
| [robots](../database/public.robots.md) | Robot registry for multi-robot tracking |
|
|
277
277
|
| [nodes](../database/public.nodes.md) | Primary memory storage with vector embeddings |
|
|
278
278
|
| [tags](../database/public.tags.md) | Hierarchical tag names for categorization |
|
|
279
|
-
| [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind) |
|
|
279
|
+
| [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind, working memory) |
|
|
280
280
|
| [node_tags](../database/public.node_tags.md) | Node-to-tag associations |
|
|
281
|
-
| [working_memories](../database/public.working_memories.md) | Per-robot working memory state |
|
|
282
281
|
|
|
283
282
|
### Required Extensions
|
|
284
283
|
|