htm 0.0.2 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +294 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +76 -5
  10. data/Rakefile +5 -0
  11. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  12. data/db/migrate/00002_create_robots.rb +11 -0
  13. data/db/migrate/00003_create_file_sources.rb +20 -0
  14. data/db/migrate/00004_create_nodes.rb +65 -0
  15. data/db/migrate/00005_create_tags.rb +13 -0
  16. data/db/migrate/00006_create_node_tags.rb +18 -0
  17. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  18. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  19. data/db/schema.sql +172 -1
  20. data/docs/api/database.md +1 -2
  21. data/docs/api/htm.md +197 -2
  22. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  23. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  24. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  25. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  26. data/docs/api/yard/HTM/Configuration.md +175 -0
  27. data/docs/api/yard/HTM/Database.md +99 -0
  28. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  29. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  30. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  31. data/docs/api/yard/HTM/Error.md +11 -0
  32. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  33. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  34. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  35. data/docs/api/yard/HTM/Observability.md +107 -0
  36. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  37. data/docs/api/yard/HTM/Railtie.md +27 -0
  38. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  39. data/docs/api/yard/HTM/TagError.md +18 -0
  40. data/docs/api/yard/HTM/TagService.md +67 -0
  41. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  42. data/docs/api/yard/HTM/Timeframe.md +40 -0
  43. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  45. data/docs/api/yard/HTM/ValidationError.md +20 -0
  46. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  47. data/docs/api/yard/HTM.md +80 -0
  48. data/docs/api/yard/index.csv +179 -0
  49. data/docs/api/yard-reference.md +51 -0
  50. data/docs/database/README.md +128 -128
  51. data/docs/database/public.file_sources.md +42 -0
  52. data/docs/database/public.file_sources.svg +211 -0
  53. data/docs/database/public.node_tags.md +4 -4
  54. data/docs/database/public.node_tags.svg +212 -79
  55. data/docs/database/public.nodes.md +22 -12
  56. data/docs/database/public.nodes.svg +246 -127
  57. data/docs/database/public.robot_nodes.md +11 -9
  58. data/docs/database/public.robot_nodes.svg +220 -98
  59. data/docs/database/public.robots.md +2 -2
  60. data/docs/database/public.robots.svg +136 -81
  61. data/docs/database/public.tags.md +3 -3
  62. data/docs/database/public.tags.svg +118 -39
  63. data/docs/database/schema.json +850 -771
  64. data/docs/database/schema.svg +256 -197
  65. data/docs/development/schema.md +67 -2
  66. data/docs/guides/adding-memories.md +93 -7
  67. data/docs/guides/recalling-memories.md +36 -1
  68. data/examples/README.md +280 -0
  69. data/examples/cli_app/htm_cli.rb +65 -5
  70. data/examples/cli_app/temp.log +93 -0
  71. data/examples/file_loader_usage.rb +177 -0
  72. data/examples/robot_groups/lib/robot_group.rb +419 -0
  73. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  74. data/examples/robot_groups/multi_process.rb +286 -0
  75. data/examples/robot_groups/robot_worker.rb +136 -0
  76. data/examples/robot_groups/same_process.rb +229 -0
  77. data/examples/timeframe_demo.rb +276 -0
  78. data/lib/htm/active_record_config.rb +1 -1
  79. data/lib/htm/circuit_breaker.rb +202 -0
  80. data/lib/htm/configuration.rb +59 -13
  81. data/lib/htm/database.rb +67 -36
  82. data/lib/htm/embedding_service.rb +39 -2
  83. data/lib/htm/errors.rb +131 -11
  84. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  85. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  86. data/lib/htm/loaders/markdown_loader.rb +263 -0
  87. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  88. data/lib/htm/long_term_memory.rb +460 -343
  89. data/lib/htm/models/file_source.rb +99 -0
  90. data/lib/htm/models/node.rb +80 -5
  91. data/lib/htm/models/robot.rb +24 -1
  92. data/lib/htm/models/robot_node.rb +1 -0
  93. data/lib/htm/models/tag.rb +254 -4
  94. data/lib/htm/observability.rb +395 -0
  95. data/lib/htm/tag_service.rb +60 -3
  96. data/lib/htm/tasks.rb +26 -1
  97. data/lib/htm/timeframe.rb +194 -0
  98. data/lib/htm/timeframe_extractor.rb +307 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/working_memory.rb +165 -70
  101. data/lib/htm.rb +328 -130
  102. data/lib/tasks/doc.rake +300 -0
  103. data/lib/tasks/files.rake +299 -0
  104. data/lib/tasks/htm.rake +158 -3
  105. data/lib/tasks/jobs.rake +3 -9
  106. data/lib/tasks/tags.rake +166 -6
  107. data/mkdocs.yml +36 -1
  108. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  109. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  110. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  111. data/notes/next_steps.md +100 -0
  112. data/notes/plan.md +627 -0
  113. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  114. data/notes/timescaledb_removal_summary.md +200 -0
  115. metadata +125 -15
  116. data/db/migrate/20250101000002_create_robots.rb +0 -14
  117. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  118. data/db/migrate/20250101000005_create_tags.rb +0 -38
  119. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  120. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  121. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  122. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  123. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  124. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  125. data/docs/database/public.working_memories.md +0 -40
  126. data/docs/database/public.working_memories.svg +0 -112
  127. data/lib/htm/models/working_memory_entry.rb +0 -88
data/README.md CHANGED
@@ -27,7 +27,7 @@
27
27
 
28
28
  - **Client-Side Embeddings**
29
29
  - Automatic embedding generation before database insertion
30
- - Supports Ollama (local, default) and OpenAI
30
+ - Uses the [ruby_llm](https://ruby_llm.com) gem for LLM access
31
31
  - Configurable embedding providers and models
32
32
 
33
33
  - **Two-Tier Memory Architecture**
@@ -62,6 +62,12 @@
62
62
  - Tag-based categorization
63
63
  - Hierarchical tag structures
64
64
 
65
+ - **File Loading**
66
+ - Load markdown files into long-term memory
67
+ - Automatic paragraph-based chunking
68
+ - Source file tracking with re-sync support
69
+ - YAML frontmatter extraction as metadata
70
+
65
71
  ## Installation
66
72
 
67
73
  Add this line to your application's Gemfile:
@@ -264,6 +270,42 @@ memories = htm.recall(
264
270
  htm.forget(node_id, confirm: :confirmed)
265
271
  ```
266
272
 
273
+ ### Loading Files
274
+
275
+ HTM can load text-based files (currently markdown) into long-term memory with automatic chunking and source tracking.
276
+
277
+ ```ruby
278
+ htm = HTM.new(robot_name: "Document Loader")
279
+
280
+ # Load a single markdown file
281
+ result = htm.load_file("docs/guide.md")
282
+ # => { file_source_id: 1, chunks_created: 5, chunks_updated: 0, chunks_deleted: 0 }
283
+
284
+ # Load all markdown files in a directory
285
+ results = htm.load_directory("docs/", pattern: "**/*.md")
286
+
287
+ # Get nodes from a specific file
288
+ nodes = htm.nodes_from_file("docs/guide.md")
289
+
290
+ # Unload a file (soft deletes chunks)
291
+ htm.unload_file("docs/guide.md")
292
+ ```
293
+
294
+ **Features:**
295
+ - **Paragraph chunking**: Text split by blank lines, code blocks preserved
296
+ - **Source tracking**: Files tracked with mtime for automatic re-sync
297
+ - **YAML frontmatter**: Extracted and stored as metadata
298
+ - **Duplicate detection**: Content hash prevents duplicate nodes
299
+
300
+ **Rake tasks:**
301
+ ```bash
302
+ rake 'htm:files:load[docs/guide.md]' # Load a single file
303
+ rake 'htm:files:load_dir[docs/]' # Load all markdown files from directory
304
+ rake htm:files:list # List all loaded file sources
305
+ rake htm:files:sync # Sync all files (reload changed)
306
+ rake htm:files:stats # Show file loading statistics
307
+ ```
308
+
267
309
  ### Automatic Tag Extraction
268
310
 
269
311
  HTM automatically extracts hierarchical tags from content using LLM analysis. Tags are inferred from the content itself - you never specify them manually.
@@ -518,13 +560,14 @@ HTM provides a minimal, focused API with only 3 core instance methods for memory
518
560
 
519
561
  ### Core Memory Operations
520
562
 
521
- #### `remember(content, source: "")`
563
+ #### `remember(content, source: "", metadata: {})`
522
564
 
523
565
  Store information in memory. Embeddings and tags are automatically generated asynchronously.
524
566
 
525
567
  **Parameters:**
526
568
  - `content` (String, required) - The information to remember. Converted to string if nil. Returns ID of last node if empty.
527
569
  - `source` (String, optional) - Where the content came from (e.g., "user", "assistant", "system"). Defaults to empty string.
570
+ - `metadata` (Hash, optional) - Arbitrary key-value metadata stored as JSONB. Keys must be strings or symbols. Defaults to `{}`.
528
571
 
529
572
  **Returns:** Integer - The node ID of the stored memory
530
573
 
@@ -536,6 +579,12 @@ node_id = htm.remember("PostgreSQL is excellent for vector search with pgvector"
536
579
  # Store without source (uses default empty string)
537
580
  node_id = htm.remember("HTM uses two-tier memory architecture")
538
581
 
582
+ # Store with metadata
583
+ node_id = htm.remember(
584
+ "User prefers dark mode",
585
+ metadata: { category: "preference", priority: "high", version: 2 }
586
+ )
587
+
539
588
  # Nil/empty handling
540
589
  node_id = htm.remember(nil) # Returns ID of last node without creating duplicate
541
590
  node_id = htm.remember("") # Returns ID of last node without creating duplicate
@@ -543,7 +592,7 @@ node_id = htm.remember("") # Returns ID of last node without creating duplicat
543
592
 
544
593
  ---
545
594
 
546
- #### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [])`
595
+ #### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], metadata: {})`
547
596
 
548
597
  Retrieve memories using temporal filtering and semantic/keyword search.
549
598
 
@@ -559,8 +608,9 @@ Retrieve memories using temporal filtering and semantic/keyword search.
559
608
  - `:hybrid` - Weighted combination (70% vector, 30% full-text)
560
609
  - `with_relevance` (Boolean, optional) - Include dynamic relevance scores. Default: false
561
610
  - `query_tags` (Array<String>, optional) - Filter results by tags. Default: []
611
+ - `metadata` (Hash, optional) - Filter results by metadata using JSONB containment (`@>`). Default: `{}`
562
612
 
563
- **Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, (optionally `relevance`)
613
+ **Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, `metadata`, (optionally `relevance`)
564
614
 
565
615
  **Example:**
566
616
  ```ruby
@@ -586,6 +636,21 @@ memories = htm.recall(
586
636
  query_tags: ["architecture"]
587
637
  )
588
638
  # => [{ "id" => 123, "content" => "...", "relevance" => 0.92, ... }, ...]
639
+
640
+ # Filter by metadata
641
+ memories = htm.recall(
642
+ "user preferences",
643
+ metadata: { category: "preference" }
644
+ )
645
+ # => Returns only nodes with metadata containing { category: "preference" }
646
+
647
+ # Combine metadata with other filters
648
+ memories = htm.recall(
649
+ "settings",
650
+ timeframe: "last month",
651
+ strategy: :hybrid,
652
+ metadata: { priority: "high", version: 2 }
653
+ )
589
654
  ```
590
655
 
591
656
  ---
@@ -1312,10 +1377,16 @@ See [htm_teamwork.md](htm_teamwork.md) for detailed design documentation and pla
1312
1377
  ### Database Schema
1313
1378
 
1314
1379
  - `robots`: Robot registry for all LLM agents using HTM
1315
- - `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), metadata
1380
+ - `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), JSONB metadata
1316
1381
  - `tags`: Hierarchical tag ontology (format: `root:level1:level2:level3`)
1317
1382
  - `node_tags`: Join table implementing many-to-many relationship between nodes and tags
1318
1383
 
1384
+ **Nodes Table Key Columns:**
1385
+ - `content`: The memory content
1386
+ - `embedding`: Vector embedding for semantic search (up to 2000 dimensions)
1387
+ - `metadata`: JSONB column for arbitrary key-value data (filterable via `@>` containment operator)
1388
+ - `content_hash`: SHA-256 hash for deduplication
1389
+
1319
1390
  ### Service Architecture
1320
1391
 
1321
1392
  HTM uses a layered architecture for LLM integration:
data/Rakefile CHANGED
@@ -28,6 +28,11 @@ task :example do
28
28
  ruby "examples/basic_usage.rb"
29
29
  end
30
30
 
31
+ desc "Run timeframe demo"
32
+ task :timeframe_demo do
33
+ ruby "examples/timeframe_demo.rb"
34
+ end
35
+
31
36
  desc "Show gem stats"
32
37
  task :stats do
33
38
  puts "\nHTM Gem Statistics:"
@@ -2,7 +2,6 @@
2
2
 
3
3
  class EnableExtensions < ActiveRecord::Migration[7.1]
4
4
  def up
5
- # Note: On TimescaleDB Cloud, the extension is named 'vector' not 'pgvector'
6
5
  enable_extension 'vector'
7
6
  enable_extension 'pg_trgm'
8
7
  end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRobots < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
6
+ t.text :name, comment: 'Human-readable name for the robot'
7
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
8
+ t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateFileSources < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :file_sources, comment: 'Source file metadata for loaded documents' do |t|
6
+ t.text :file_path, null: false, comment: 'Absolute path to source file'
7
+ t.string :file_hash, limit: 64, comment: 'SHA-256 hash of file content'
8
+ t.timestamptz :mtime, comment: 'File modification time'
9
+ t.integer :file_size, comment: 'File size in bytes'
10
+ t.jsonb :frontmatter, default: {}, comment: 'Parsed YAML frontmatter'
11
+ t.timestamptz :last_synced_at, comment: 'When file was last synced to HTM'
12
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
13
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
14
+ end
15
+
16
+ add_index :file_sources, :file_path, unique: true, name: 'idx_file_sources_path_unique'
17
+ add_index :file_sources, :file_hash, name: 'idx_file_sources_hash'
18
+ add_index :file_sources, :last_synced_at, name: 'idx_file_sources_last_synced'
19
+ end
20
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
6
+ t.text :content, null: false, comment: 'The conversation message/utterance content'
7
+ t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
8
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
9
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
10
+ t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
11
+ t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
12
+ t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
13
+ t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
14
+ t.string :content_hash, limit: 64, comment: 'SHA-256 hash of content for deduplication'
15
+ t.timestamptz :deleted_at, comment: 'Soft delete timestamp - node is considered deleted when set'
16
+ t.bigint :source_id, comment: 'Reference to source file (for file-loaded nodes)'
17
+ t.integer :chunk_position, comment: 'Position within source file (0-indexed)'
18
+ t.jsonb :metadata, default: {}, null: false, comment: 'Flexible metadata storage (memory_type, importance, source, etc.)'
19
+ end
20
+
21
+ # Basic indexes for common queries
22
+ add_index :nodes, :created_at, name: 'idx_nodes_created_at'
23
+ add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
24
+ add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
25
+ add_index :nodes, :access_count, name: 'idx_nodes_access_count'
26
+ add_index :nodes, :content_hash, unique: true, name: 'idx_nodes_content_hash_unique'
27
+ add_index :nodes, :deleted_at, name: 'idx_nodes_deleted_at'
28
+ add_index :nodes, :source_id, name: 'idx_nodes_source_id'
29
+ add_index :nodes, [:source_id, :chunk_position], name: 'idx_nodes_source_chunk_position'
30
+
31
+ # Partial index for efficiently querying non-deleted nodes
32
+ add_index :nodes, :created_at, name: 'idx_nodes_not_deleted_created_at', where: 'deleted_at IS NULL'
33
+
34
+ # GIN index for JSONB metadata queries
35
+ add_index :nodes, :metadata, using: :gin, name: 'idx_nodes_metadata'
36
+
37
+ # Vector similarity search index (HNSW for better performance)
38
+ execute <<-SQL
39
+ CREATE INDEX idx_nodes_embedding ON nodes
40
+ USING hnsw (embedding vector_cosine_ops)
41
+ WITH (m = 16, ef_construction = 64)
42
+ SQL
43
+
44
+ # Full-text search on conversation content
45
+ execute <<-SQL
46
+ CREATE INDEX idx_nodes_content_gin ON nodes
47
+ USING gin(to_tsvector('english', content))
48
+ SQL
49
+
50
+ # Trigram indexes for fuzzy matching on conversation content
51
+ execute <<-SQL
52
+ CREATE INDEX idx_nodes_content_trgm ON nodes
53
+ USING gin(content gin_trgm_ops)
54
+ SQL
55
+
56
+ # Check constraint for embedding dimensions
57
+ execute <<-SQL
58
+ ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
59
+ CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
60
+ SQL
61
+
62
+ # Foreign key to file_sources table
63
+ add_foreign_key :nodes, :file_sources, column: :source_id, on_delete: :nullify
64
+ end
65
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateTags < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :tags, comment: 'Unique tag names for categorization' do |t|
6
+ t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
7
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
8
+ end
9
+
10
+ add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
11
+ add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateNodeTags < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
6
+ t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
7
+ t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
8
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
9
+ end
10
+
11
+ add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
12
+ add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
13
+ add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
14
+
15
+ add_foreign_key :node_tags, :nodes, column: :node_id, on_delete: :cascade
16
+ add_foreign_key :node_tags, :tags, column: :tag_id, on_delete: :cascade
17
+ end
18
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRobotNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :robot_nodes, comment: 'Join table connecting robots to nodes (many-to-many)' do |t|
6
+ t.bigint :robot_id, null: false, comment: 'ID of the robot that remembered this node'
7
+ t.bigint :node_id, null: false, comment: 'ID of the node being remembered'
8
+ t.timestamptz :first_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
9
+ comment: 'When this robot first remembered this content'
10
+ t.timestamptz :last_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
11
+ comment: 'When this robot last tried to remember this content'
12
+ t.integer :remember_count, default: 1, null: false,
13
+ comment: 'Number of times this robot has tried to remember this content'
14
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
15
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
16
+ end
17
+
18
+ add_index :robot_nodes, [:robot_id, :node_id], unique: true, name: 'idx_robot_nodes_unique'
19
+ add_index :robot_nodes, :robot_id, name: 'idx_robot_nodes_robot_id'
20
+ add_index :robot_nodes, :node_id, name: 'idx_robot_nodes_node_id'
21
+ add_index :robot_nodes, :last_remembered_at, name: 'idx_robot_nodes_last_remembered_at'
22
+
23
+ add_foreign_key :robot_nodes, :robots, column: :robot_id, on_delete: :cascade
24
+ add_foreign_key :robot_nodes, :nodes, column: :node_id, on_delete: :cascade
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddWorkingMemoryToRobotNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ add_column :robot_nodes, :working_memory, :boolean, default: false, null: false,
6
+ comment: 'True if this node is currently in the robot working memory'
7
+
8
+ add_index :robot_nodes, [:robot_id, :working_memory],
9
+ where: 'working_memory = true',
10
+ name: 'idx_robot_nodes_working_memory'
11
+ end
12
+ end
data/db/schema.sql CHANGED
@@ -23,6 +23,81 @@ CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
23
23
  -- Name: EXTENSION vector; Type: COMMENT; Schema: -; Owner: -
24
24
  --
25
25
 
26
+ --
27
+ -- Name: file_sources; Type: TABLE; Schema: public; Owner: -
28
+ --
29
+
30
+ CREATE TABLE public.file_sources (
31
+ id bigint NOT NULL,
32
+ file_path text NOT NULL,
33
+ file_hash character varying(64),
34
+ mtime timestamp with time zone,
35
+ file_size integer,
36
+ frontmatter jsonb DEFAULT '{}'::jsonb,
37
+ last_synced_at timestamp with time zone,
38
+ created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
39
+ updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
40
+ );
41
+
42
+ --
43
+ -- Name: TABLE file_sources; Type: COMMENT; Schema: public; Owner: -
44
+ --
45
+
46
+ COMMENT ON TABLE public.file_sources IS 'Source file metadata for loaded documents';
47
+
48
+ --
49
+ -- Name: COLUMN file_sources.file_path; Type: COMMENT; Schema: public; Owner: -
50
+ --
51
+
52
+ COMMENT ON COLUMN public.file_sources.file_path IS 'Absolute path to source file';
53
+
54
+ --
55
+ -- Name: COLUMN file_sources.file_hash; Type: COMMENT; Schema: public; Owner: -
56
+ --
57
+
58
+ COMMENT ON COLUMN public.file_sources.file_hash IS 'SHA-256 hash of file content';
59
+
60
+ --
61
+ -- Name: COLUMN file_sources.mtime; Type: COMMENT; Schema: public; Owner: -
62
+ --
63
+
64
+ COMMENT ON COLUMN public.file_sources.mtime IS 'File modification time';
65
+
66
+ --
67
+ -- Name: COLUMN file_sources.file_size; Type: COMMENT; Schema: public; Owner: -
68
+ --
69
+
70
+ COMMENT ON COLUMN public.file_sources.file_size IS 'File size in bytes';
71
+
72
+ --
73
+ -- Name: COLUMN file_sources.frontmatter; Type: COMMENT; Schema: public; Owner: -
74
+ --
75
+
76
+ COMMENT ON COLUMN public.file_sources.frontmatter IS 'Parsed YAML frontmatter';
77
+
78
+ --
79
+ -- Name: COLUMN file_sources.last_synced_at; Type: COMMENT; Schema: public; Owner: -
80
+ --
81
+
82
+ COMMENT ON COLUMN public.file_sources.last_synced_at IS 'When file was last synced to HTM';
83
+
84
+ --
85
+ -- Name: file_sources_id_seq; Type: SEQUENCE; Schema: public; Owner: -
86
+ --
87
+
88
+ CREATE SEQUENCE public.file_sources_id_seq
89
+ START WITH 1
90
+ INCREMENT BY 1
91
+ NO MINVALUE
92
+ NO MAXVALUE
93
+ CACHE 1;
94
+
95
+ --
96
+ -- Name: file_sources_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
97
+ --
98
+
99
+ ALTER SEQUENCE public.file_sources_id_seq OWNED BY public.file_sources.id;
100
+
26
101
  --
27
102
  -- Name: node_tags; Type: TABLE; Schema: public; Owner: -
28
103
  --
@@ -90,6 +165,10 @@ CREATE TABLE public.nodes (
90
165
  embedding public.vector(2000),
91
166
  embedding_dimension integer,
92
167
  content_hash character varying(64),
168
+ deleted_at timestamp with time zone,
169
+ source_id bigint,
170
+ chunk_position integer,
171
+ metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
93
172
  CONSTRAINT check_embedding_dimension CHECK (((embedding_dimension IS NULL) OR ((embedding_dimension > 0) AND (embedding_dimension <= 2000))))
94
173
  );
95
174
 
@@ -153,6 +232,30 @@ COMMENT ON COLUMN public.nodes.embedding_dimension IS 'Actual number of dimensio
153
232
 
154
233
  COMMENT ON COLUMN public.nodes.content_hash IS 'SHA-256 hash of content for deduplication';
155
234
 
235
+ --
236
+ -- Name: COLUMN nodes.deleted_at; Type: COMMENT; Schema: public; Owner: -
237
+ --
238
+
239
+ COMMENT ON COLUMN public.nodes.deleted_at IS 'Soft delete timestamp - node is considered deleted when set';
240
+
241
+ --
242
+ -- Name: COLUMN nodes.source_id; Type: COMMENT; Schema: public; Owner: -
243
+ --
244
+
245
+ COMMENT ON COLUMN public.nodes.source_id IS 'Reference to source file (for file-loaded nodes)';
246
+
247
+ --
248
+ -- Name: COLUMN nodes.chunk_position; Type: COMMENT; Schema: public; Owner: -
249
+ --
250
+
251
+ COMMENT ON COLUMN public.nodes.chunk_position IS 'Position within source file (0-indexed)';
252
+
253
+ --
254
+ -- Name: COLUMN nodes.metadata; Type: COMMENT; Schema: public; Owner: -
255
+ --
256
+
257
+ COMMENT ON COLUMN public.nodes.metadata IS 'Flexible metadata storage (memory_type, importance, source, etc.)';
258
+
156
259
  --
157
260
  -- Name: nodes_id_seq; Type: SEQUENCE; Schema: public; Owner: -
158
261
  --
@@ -402,6 +505,12 @@ CREATE SEQUENCE public.working_memories_id_seq
402
505
 
403
506
  ALTER SEQUENCE public.working_memories_id_seq OWNED BY public.working_memories.id;
404
507
 
508
+ --
509
+ -- Name: file_sources id; Type: DEFAULT; Schema: public; Owner: -
510
+ --
511
+
512
+ ALTER TABLE ONLY public.file_sources ALTER COLUMN id SET DEFAULT nextval('public.file_sources_id_seq'::regclass);
513
+
405
514
  --
406
515
  -- Name: node_tags id; Type: DEFAULT; Schema: public; Owner: -
407
516
  --
@@ -438,6 +547,13 @@ ALTER TABLE ONLY public.tags ALTER COLUMN id SET DEFAULT nextval('public.tags_id
438
547
 
439
548
  ALTER TABLE ONLY public.working_memories ALTER COLUMN id SET DEFAULT nextval('public.working_memories_id_seq'::regclass);
440
549
 
550
+ --
551
+ -- Name: file_sources file_sources_pkey; Type: CONSTRAINT; Schema: public; Owner: -
552
+ --
553
+
554
+ ALTER TABLE ONLY public.file_sources
555
+ ADD CONSTRAINT file_sources_pkey PRIMARY KEY (id);
556
+
441
557
  --
442
558
  -- Name: node_tags node_tags_pkey; Type: CONSTRAINT; Schema: public; Owner: -
443
559
  --
@@ -487,6 +603,24 @@ ALTER TABLE ONLY public.tags
487
603
  ALTER TABLE ONLY public.working_memories
488
604
  ADD CONSTRAINT working_memories_pkey PRIMARY KEY (id);
489
605
 
606
+ --
607
+ -- Name: idx_file_sources_hash; Type: INDEX; Schema: public; Owner: -
608
+ --
609
+
610
+ CREATE INDEX idx_file_sources_hash ON public.file_sources USING btree (file_hash);
611
+
612
+ --
613
+ -- Name: idx_file_sources_last_synced; Type: INDEX; Schema: public; Owner: -
614
+ --
615
+
616
+ CREATE INDEX idx_file_sources_last_synced ON public.file_sources USING btree (last_synced_at);
617
+
618
+ --
619
+ -- Name: idx_file_sources_path_unique; Type: INDEX; Schema: public; Owner: -
620
+ --
621
+
622
+ CREATE UNIQUE INDEX idx_file_sources_path_unique ON public.file_sources USING btree (file_path);
623
+
490
624
  --
491
625
  -- Name: idx_node_tags_node_id; Type: INDEX; Schema: public; Owner: -
492
626
  --
@@ -535,6 +669,12 @@ CREATE INDEX idx_nodes_content_trgm ON public.nodes USING gin (content public.gi
535
669
 
536
670
  CREATE INDEX idx_nodes_created_at ON public.nodes USING btree (created_at);
537
671
 
672
+ --
673
+ -- Name: idx_nodes_deleted_at; Type: INDEX; Schema: public; Owner: -
674
+ --
675
+
676
+ CREATE INDEX idx_nodes_deleted_at ON public.nodes USING btree (deleted_at);
677
+
538
678
  --
539
679
  -- Name: idx_nodes_embedding; Type: INDEX; Schema: public; Owner: -
540
680
  --
@@ -547,6 +687,30 @@ CREATE INDEX idx_nodes_embedding ON public.nodes USING hnsw (embedding public.ve
547
687
 
548
688
  CREATE INDEX idx_nodes_last_accessed ON public.nodes USING btree (last_accessed);
549
689
 
690
+ --
691
+ -- Name: idx_nodes_metadata; Type: INDEX; Schema: public; Owner: -
692
+ --
693
+
694
+ CREATE INDEX idx_nodes_metadata ON public.nodes USING gin (metadata);
695
+
696
+ --
697
+ -- Name: idx_nodes_not_deleted_created_at; Type: INDEX; Schema: public; Owner: -
698
+ --
699
+
700
+ CREATE INDEX idx_nodes_not_deleted_created_at ON public.nodes USING btree (created_at) WHERE (deleted_at IS NULL);
701
+
702
+ --
703
+ -- Name: idx_nodes_source_chunk_position; Type: INDEX; Schema: public; Owner: -
704
+ --
705
+
706
+ CREATE INDEX idx_nodes_source_chunk_position ON public.nodes USING btree (source_id, chunk_position);
707
+
708
+ --
709
+ -- Name: idx_nodes_source_id; Type: INDEX; Schema: public; Owner: -
710
+ --
711
+
712
+ CREATE INDEX idx_nodes_source_id ON public.nodes USING btree (source_id);
713
+
550
714
  --
551
715
  -- Name: idx_nodes_updated_at; Type: INDEX; Schema: public; Owner: -
552
716
  --
@@ -621,6 +785,13 @@ ALTER TABLE ONLY public.working_memories
621
785
  ALTER TABLE ONLY public.working_memories
622
786
  ADD CONSTRAINT fk_rails_4b7c3eb07b FOREIGN KEY (robot_id) REFERENCES public.robots(id) ON DELETE CASCADE;
623
787
 
788
+ --
789
+ -- Name: nodes fk_rails_920ad16d08; Type: FK CONSTRAINT; Schema: public; Owner: -
790
+ --
791
+
792
+ ALTER TABLE ONLY public.nodes
793
+ ADD CONSTRAINT fk_rails_920ad16d08 FOREIGN KEY (source_id) REFERENCES public.file_sources(id) ON DELETE SET NULL;
794
+
624
795
  --
625
796
  -- Name: robot_nodes fk_rails_9b003078a8; Type: FK CONSTRAINT; Schema: public; Owner: -
626
797
  --
@@ -653,4 +824,4 @@ ALTER TABLE ONLY public.robot_nodes
653
824
  -- PostgreSQL database dump complete
654
825
  --
655
826
 
656
- \unrestrict 6qynyffXXn5BTZM7u0DVZKV2Nc24dPezkY3OOwzriuYfchXNsoQuf114yBOqrIb
827
+ \unrestrict DUrF24Zrve4qSBwlDrJ4qAzzZhvhX5s2S57oHYVJ0ZPbaDC4ItMZ29Pv9oI3Q9d
data/docs/api/database.md CHANGED
@@ -276,9 +276,8 @@ For detailed database schema documentation, see:
276
276
  | [robots](../database/public.robots.md) | Robot registry for multi-robot tracking |
277
277
  | [nodes](../database/public.nodes.md) | Primary memory storage with vector embeddings |
278
278
  | [tags](../database/public.tags.md) | Hierarchical tag names for categorization |
279
- | [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind) |
279
+ | [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind, working memory) |
280
280
  | [node_tags](../database/public.node_tags.md) | Node-to-tag associations |
281
- | [working_memories](../database/public.working_memories.md) | Per-robot working memory state |
282
281
 
283
282
  ### Required Extensions
284
283