htm 0.0.2 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +95 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +327 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +83 -12
  10. data/Rakefile +5 -0
  11. data/bin/htm_mcp.rb +527 -0
  12. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  13. data/db/migrate/00002_create_robots.rb +11 -0
  14. data/db/migrate/00003_create_file_sources.rb +20 -0
  15. data/db/migrate/00004_create_nodes.rb +65 -0
  16. data/db/migrate/00005_create_tags.rb +13 -0
  17. data/db/migrate/00006_create_node_tags.rb +18 -0
  18. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  19. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  20. data/db/schema.sql +172 -1
  21. data/docs/api/database.md +1 -2
  22. data/docs/api/htm.md +197 -2
  23. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  24. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  25. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  26. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  27. data/docs/api/yard/HTM/Configuration.md +175 -0
  28. data/docs/api/yard/HTM/Database.md +99 -0
  29. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  30. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  31. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  32. data/docs/api/yard/HTM/Error.md +11 -0
  33. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  34. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  35. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  36. data/docs/api/yard/HTM/Observability.md +107 -0
  37. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  38. data/docs/api/yard/HTM/Railtie.md +27 -0
  39. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  40. data/docs/api/yard/HTM/TagError.md +18 -0
  41. data/docs/api/yard/HTM/TagService.md +67 -0
  42. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  43. data/docs/api/yard/HTM/Timeframe.md +40 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  45. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  46. data/docs/api/yard/HTM/ValidationError.md +20 -0
  47. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  48. data/docs/api/yard/HTM.md +80 -0
  49. data/docs/api/yard/index.csv +179 -0
  50. data/docs/api/yard-reference.md +51 -0
  51. data/docs/database/README.md +128 -128
  52. data/docs/database/public.file_sources.md +42 -0
  53. data/docs/database/public.file_sources.svg +211 -0
  54. data/docs/database/public.node_tags.md +4 -4
  55. data/docs/database/public.node_tags.svg +212 -79
  56. data/docs/database/public.nodes.md +22 -12
  57. data/docs/database/public.nodes.svg +246 -127
  58. data/docs/database/public.robot_nodes.md +11 -9
  59. data/docs/database/public.robot_nodes.svg +220 -98
  60. data/docs/database/public.robots.md +2 -2
  61. data/docs/database/public.robots.svg +136 -81
  62. data/docs/database/public.tags.md +3 -3
  63. data/docs/database/public.tags.svg +118 -39
  64. data/docs/database/schema.json +850 -771
  65. data/docs/database/schema.svg +256 -197
  66. data/docs/development/schema.md +67 -2
  67. data/docs/guides/adding-memories.md +93 -7
  68. data/docs/guides/recalling-memories.md +36 -1
  69. data/examples/README.md +405 -0
  70. data/examples/cli_app/htm_cli.rb +65 -5
  71. data/examples/cli_app/temp.log +93 -0
  72. data/examples/file_loader_usage.rb +177 -0
  73. data/examples/mcp_client.rb +529 -0
  74. data/examples/robot_groups/lib/robot_group.rb +419 -0
  75. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  76. data/examples/robot_groups/multi_process.rb +286 -0
  77. data/examples/robot_groups/robot_worker.rb +136 -0
  78. data/examples/robot_groups/same_process.rb +229 -0
  79. data/examples/timeframe_demo.rb +276 -0
  80. data/lib/htm/active_record_config.rb +1 -1
  81. data/lib/htm/circuit_breaker.rb +202 -0
  82. data/lib/htm/configuration.rb +59 -13
  83. data/lib/htm/database.rb +67 -36
  84. data/lib/htm/embedding_service.rb +39 -2
  85. data/lib/htm/errors.rb +131 -11
  86. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  87. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  88. data/lib/htm/loaders/markdown_loader.rb +263 -0
  89. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  90. data/lib/htm/long_term_memory.rb +460 -343
  91. data/lib/htm/models/file_source.rb +99 -0
  92. data/lib/htm/models/node.rb +80 -5
  93. data/lib/htm/models/robot.rb +24 -1
  94. data/lib/htm/models/robot_node.rb +1 -0
  95. data/lib/htm/models/tag.rb +254 -4
  96. data/lib/htm/observability.rb +395 -0
  97. data/lib/htm/tag_service.rb +60 -3
  98. data/lib/htm/tasks.rb +26 -1
  99. data/lib/htm/timeframe.rb +194 -0
  100. data/lib/htm/timeframe_extractor.rb +307 -0
  101. data/lib/htm/version.rb +1 -1
  102. data/lib/htm/working_memory.rb +165 -70
  103. data/lib/htm.rb +328 -130
  104. data/lib/tasks/doc.rake +300 -0
  105. data/lib/tasks/files.rake +299 -0
  106. data/lib/tasks/htm.rake +158 -3
  107. data/lib/tasks/jobs.rake +3 -9
  108. data/lib/tasks/tags.rake +166 -6
  109. data/mkdocs.yml +36 -1
  110. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  111. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  112. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  113. data/notes/next_steps.md +100 -0
  114. data/notes/plan.md +627 -0
  115. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  116. data/notes/timescaledb_removal_summary.md +200 -0
  117. metadata +158 -17
  118. data/db/migrate/20250101000002_create_robots.rb +0 -14
  119. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  120. data/db/migrate/20250101000005_create_tags.rb +0 -38
  121. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  122. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  123. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  124. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  125. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  126. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  127. data/docs/database/public.working_memories.md +0 -40
  128. data/docs/database/public.working_memories.svg +0 -112
  129. data/lib/htm/models/working_memory_entry.rb +0 -88
@@ -2,7 +2,6 @@
2
2
 
3
3
  class EnableExtensions < ActiveRecord::Migration[7.1]
4
4
  def up
5
- # Note: On TimescaleDB Cloud, the extension is named 'vector' not 'pgvector'
6
5
  enable_extension 'vector'
7
6
  enable_extension 'pg_trgm'
8
7
  end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRobots < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
6
+ t.text :name, comment: 'Human-readable name for the robot'
7
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
8
+ t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateFileSources < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :file_sources, comment: 'Source file metadata for loaded documents' do |t|
6
+ t.text :file_path, null: false, comment: 'Absolute path to source file'
7
+ t.string :file_hash, limit: 64, comment: 'SHA-256 hash of file content'
8
+ t.timestamptz :mtime, comment: 'File modification time'
9
+ t.integer :file_size, comment: 'File size in bytes'
10
+ t.jsonb :frontmatter, default: {}, comment: 'Parsed YAML frontmatter'
11
+ t.timestamptz :last_synced_at, comment: 'When file was last synced to HTM'
12
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
13
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
14
+ end
15
+
16
+ add_index :file_sources, :file_path, unique: true, name: 'idx_file_sources_path_unique'
17
+ add_index :file_sources, :file_hash, name: 'idx_file_sources_hash'
18
+ add_index :file_sources, :last_synced_at, name: 'idx_file_sources_last_synced'
19
+ end
20
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
6
+ t.text :content, null: false, comment: 'The conversation message/utterance content'
7
+ t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
8
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
9
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
10
+ t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
11
+ t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
12
+ t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
13
+ t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
14
+ t.string :content_hash, limit: 64, comment: 'SHA-256 hash of content for deduplication'
15
+ t.timestamptz :deleted_at, comment: 'Soft delete timestamp - node is considered deleted when set'
16
+ t.bigint :source_id, comment: 'Reference to source file (for file-loaded nodes)'
17
+ t.integer :chunk_position, comment: 'Position within source file (0-indexed)'
18
+ t.jsonb :metadata, default: {}, null: false, comment: 'Flexible metadata storage (memory_type, importance, source, etc.)'
19
+ end
20
+
21
+ # Basic indexes for common queries
22
+ add_index :nodes, :created_at, name: 'idx_nodes_created_at'
23
+ add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
24
+ add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
25
+ add_index :nodes, :access_count, name: 'idx_nodes_access_count'
26
+ add_index :nodes, :content_hash, unique: true, name: 'idx_nodes_content_hash_unique'
27
+ add_index :nodes, :deleted_at, name: 'idx_nodes_deleted_at'
28
+ add_index :nodes, :source_id, name: 'idx_nodes_source_id'
29
+ add_index :nodes, [:source_id, :chunk_position], name: 'idx_nodes_source_chunk_position'
30
+
31
+ # Partial index for efficiently querying non-deleted nodes
32
+ add_index :nodes, :created_at, name: 'idx_nodes_not_deleted_created_at', where: 'deleted_at IS NULL'
33
+
34
+ # GIN index for JSONB metadata queries
35
+ add_index :nodes, :metadata, using: :gin, name: 'idx_nodes_metadata'
36
+
37
+ # Vector similarity search index (HNSW for better performance)
38
+ execute <<-SQL
39
+ CREATE INDEX idx_nodes_embedding ON nodes
40
+ USING hnsw (embedding vector_cosine_ops)
41
+ WITH (m = 16, ef_construction = 64)
42
+ SQL
43
+
44
+ # Full-text search on conversation content
45
+ execute <<-SQL
46
+ CREATE INDEX idx_nodes_content_gin ON nodes
47
+ USING gin(to_tsvector('english', content))
48
+ SQL
49
+
50
+ # Trigram indexes for fuzzy matching on conversation content
51
+ execute <<-SQL
52
+ CREATE INDEX idx_nodes_content_trgm ON nodes
53
+ USING gin(content gin_trgm_ops)
54
+ SQL
55
+
56
+ # Check constraint for embedding dimensions
57
+ execute <<-SQL
58
+ ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
59
+ CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
60
+ SQL
61
+
62
+ # Foreign key to file_sources table
63
+ add_foreign_key :nodes, :file_sources, column: :source_id, on_delete: :nullify
64
+ end
65
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateTags < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :tags, comment: 'Unique tag names for categorization' do |t|
6
+ t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
7
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
8
+ end
9
+
10
+ add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
11
+ add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateNodeTags < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
6
+ t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
7
+ t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
8
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
9
+ end
10
+
11
+ add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
12
+ add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
13
+ add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
14
+
15
+ add_foreign_key :node_tags, :nodes, column: :node_id, on_delete: :cascade
16
+ add_foreign_key :node_tags, :tags, column: :tag_id, on_delete: :cascade
17
+ end
18
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRobotNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :robot_nodes, comment: 'Join table connecting robots to nodes (many-to-many)' do |t|
6
+ t.bigint :robot_id, null: false, comment: 'ID of the robot that remembered this node'
7
+ t.bigint :node_id, null: false, comment: 'ID of the node being remembered'
8
+ t.timestamptz :first_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
9
+ comment: 'When this robot first remembered this content'
10
+ t.timestamptz :last_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
11
+ comment: 'When this robot last tried to remember this content'
12
+ t.integer :remember_count, default: 1, null: false,
13
+ comment: 'Number of times this robot has tried to remember this content'
14
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
15
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
16
+ end
17
+
18
+ add_index :robot_nodes, [:robot_id, :node_id], unique: true, name: 'idx_robot_nodes_unique'
19
+ add_index :robot_nodes, :robot_id, name: 'idx_robot_nodes_robot_id'
20
+ add_index :robot_nodes, :node_id, name: 'idx_robot_nodes_node_id'
21
+ add_index :robot_nodes, :last_remembered_at, name: 'idx_robot_nodes_last_remembered_at'
22
+
23
+ add_foreign_key :robot_nodes, :robots, column: :robot_id, on_delete: :cascade
24
+ add_foreign_key :robot_nodes, :nodes, column: :node_id, on_delete: :cascade
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddWorkingMemoryToRobotNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ add_column :robot_nodes, :working_memory, :boolean, default: false, null: false,
6
+ comment: 'True if this node is currently in the robot working memory'
7
+
8
+ add_index :robot_nodes, [:robot_id, :working_memory],
9
+ where: 'working_memory = true',
10
+ name: 'idx_robot_nodes_working_memory'
11
+ end
12
+ end
data/db/schema.sql CHANGED
@@ -23,6 +23,81 @@ CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
23
23
  -- Name: EXTENSION vector; Type: COMMENT; Schema: -; Owner: -
24
24
  --
25
25
 
26
+ --
27
+ -- Name: file_sources; Type: TABLE; Schema: public; Owner: -
28
+ --
29
+
30
+ CREATE TABLE public.file_sources (
31
+ id bigint NOT NULL,
32
+ file_path text NOT NULL,
33
+ file_hash character varying(64),
34
+ mtime timestamp with time zone,
35
+ file_size integer,
36
+ frontmatter jsonb DEFAULT '{}'::jsonb,
37
+ last_synced_at timestamp with time zone,
38
+ created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
39
+ updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
40
+ );
41
+
42
+ --
43
+ -- Name: TABLE file_sources; Type: COMMENT; Schema: public; Owner: -
44
+ --
45
+
46
+ COMMENT ON TABLE public.file_sources IS 'Source file metadata for loaded documents';
47
+
48
+ --
49
+ -- Name: COLUMN file_sources.file_path; Type: COMMENT; Schema: public; Owner: -
50
+ --
51
+
52
+ COMMENT ON COLUMN public.file_sources.file_path IS 'Absolute path to source file';
53
+
54
+ --
55
+ -- Name: COLUMN file_sources.file_hash; Type: COMMENT; Schema: public; Owner: -
56
+ --
57
+
58
+ COMMENT ON COLUMN public.file_sources.file_hash IS 'SHA-256 hash of file content';
59
+
60
+ --
61
+ -- Name: COLUMN file_sources.mtime; Type: COMMENT; Schema: public; Owner: -
62
+ --
63
+
64
+ COMMENT ON COLUMN public.file_sources.mtime IS 'File modification time';
65
+
66
+ --
67
+ -- Name: COLUMN file_sources.file_size; Type: COMMENT; Schema: public; Owner: -
68
+ --
69
+
70
+ COMMENT ON COLUMN public.file_sources.file_size IS 'File size in bytes';
71
+
72
+ --
73
+ -- Name: COLUMN file_sources.frontmatter; Type: COMMENT; Schema: public; Owner: -
74
+ --
75
+
76
+ COMMENT ON COLUMN public.file_sources.frontmatter IS 'Parsed YAML frontmatter';
77
+
78
+ --
79
+ -- Name: COLUMN file_sources.last_synced_at; Type: COMMENT; Schema: public; Owner: -
80
+ --
81
+
82
+ COMMENT ON COLUMN public.file_sources.last_synced_at IS 'When file was last synced to HTM';
83
+
84
+ --
85
+ -- Name: file_sources_id_seq; Type: SEQUENCE; Schema: public; Owner: -
86
+ --
87
+
88
+ CREATE SEQUENCE public.file_sources_id_seq
89
+ START WITH 1
90
+ INCREMENT BY 1
91
+ NO MINVALUE
92
+ NO MAXVALUE
93
+ CACHE 1;
94
+
95
+ --
96
+ -- Name: file_sources_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
97
+ --
98
+
99
+ ALTER SEQUENCE public.file_sources_id_seq OWNED BY public.file_sources.id;
100
+
26
101
  --
27
102
  -- Name: node_tags; Type: TABLE; Schema: public; Owner: -
28
103
  --
@@ -90,6 +165,10 @@ CREATE TABLE public.nodes (
90
165
  embedding public.vector(2000),
91
166
  embedding_dimension integer,
92
167
  content_hash character varying(64),
168
+ deleted_at timestamp with time zone,
169
+ source_id bigint,
170
+ chunk_position integer,
171
+ metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
93
172
  CONSTRAINT check_embedding_dimension CHECK (((embedding_dimension IS NULL) OR ((embedding_dimension > 0) AND (embedding_dimension <= 2000))))
94
173
  );
95
174
 
@@ -153,6 +232,30 @@ COMMENT ON COLUMN public.nodes.embedding_dimension IS 'Actual number of dimensio
153
232
 
154
233
  COMMENT ON COLUMN public.nodes.content_hash IS 'SHA-256 hash of content for deduplication';
155
234
 
235
+ --
236
+ -- Name: COLUMN nodes.deleted_at; Type: COMMENT; Schema: public; Owner: -
237
+ --
238
+
239
+ COMMENT ON COLUMN public.nodes.deleted_at IS 'Soft delete timestamp - node is considered deleted when set';
240
+
241
+ --
242
+ -- Name: COLUMN nodes.source_id; Type: COMMENT; Schema: public; Owner: -
243
+ --
244
+
245
+ COMMENT ON COLUMN public.nodes.source_id IS 'Reference to source file (for file-loaded nodes)';
246
+
247
+ --
248
+ -- Name: COLUMN nodes.chunk_position; Type: COMMENT; Schema: public; Owner: -
249
+ --
250
+
251
+ COMMENT ON COLUMN public.nodes.chunk_position IS 'Position within source file (0-indexed)';
252
+
253
+ --
254
+ -- Name: COLUMN nodes.metadata; Type: COMMENT; Schema: public; Owner: -
255
+ --
256
+
257
+ COMMENT ON COLUMN public.nodes.metadata IS 'Flexible metadata storage (memory_type, importance, source, etc.)';
258
+
156
259
  --
157
260
  -- Name: nodes_id_seq; Type: SEQUENCE; Schema: public; Owner: -
158
261
  --
@@ -402,6 +505,12 @@ CREATE SEQUENCE public.working_memories_id_seq
402
505
 
403
506
  ALTER SEQUENCE public.working_memories_id_seq OWNED BY public.working_memories.id;
404
507
 
508
+ --
509
+ -- Name: file_sources id; Type: DEFAULT; Schema: public; Owner: -
510
+ --
511
+
512
+ ALTER TABLE ONLY public.file_sources ALTER COLUMN id SET DEFAULT nextval('public.file_sources_id_seq'::regclass);
513
+
405
514
  --
406
515
  -- Name: node_tags id; Type: DEFAULT; Schema: public; Owner: -
407
516
  --
@@ -438,6 +547,13 @@ ALTER TABLE ONLY public.tags ALTER COLUMN id SET DEFAULT nextval('public.tags_id
438
547
 
439
548
  ALTER TABLE ONLY public.working_memories ALTER COLUMN id SET DEFAULT nextval('public.working_memories_id_seq'::regclass);
440
549
 
550
+ --
551
+ -- Name: file_sources file_sources_pkey; Type: CONSTRAINT; Schema: public; Owner: -
552
+ --
553
+
554
+ ALTER TABLE ONLY public.file_sources
555
+ ADD CONSTRAINT file_sources_pkey PRIMARY KEY (id);
556
+
441
557
  --
442
558
  -- Name: node_tags node_tags_pkey; Type: CONSTRAINT; Schema: public; Owner: -
443
559
  --
@@ -487,6 +603,24 @@ ALTER TABLE ONLY public.tags
487
603
  ALTER TABLE ONLY public.working_memories
488
604
  ADD CONSTRAINT working_memories_pkey PRIMARY KEY (id);
489
605
 
606
+ --
607
+ -- Name: idx_file_sources_hash; Type: INDEX; Schema: public; Owner: -
608
+ --
609
+
610
+ CREATE INDEX idx_file_sources_hash ON public.file_sources USING btree (file_hash);
611
+
612
+ --
613
+ -- Name: idx_file_sources_last_synced; Type: INDEX; Schema: public; Owner: -
614
+ --
615
+
616
+ CREATE INDEX idx_file_sources_last_synced ON public.file_sources USING btree (last_synced_at);
617
+
618
+ --
619
+ -- Name: idx_file_sources_path_unique; Type: INDEX; Schema: public; Owner: -
620
+ --
621
+
622
+ CREATE UNIQUE INDEX idx_file_sources_path_unique ON public.file_sources USING btree (file_path);
623
+
490
624
  --
491
625
  -- Name: idx_node_tags_node_id; Type: INDEX; Schema: public; Owner: -
492
626
  --
@@ -535,6 +669,12 @@ CREATE INDEX idx_nodes_content_trgm ON public.nodes USING gin (content public.gi
535
669
 
536
670
  CREATE INDEX idx_nodes_created_at ON public.nodes USING btree (created_at);
537
671
 
672
+ --
673
+ -- Name: idx_nodes_deleted_at; Type: INDEX; Schema: public; Owner: -
674
+ --
675
+
676
+ CREATE INDEX idx_nodes_deleted_at ON public.nodes USING btree (deleted_at);
677
+
538
678
  --
539
679
  -- Name: idx_nodes_embedding; Type: INDEX; Schema: public; Owner: -
540
680
  --
@@ -547,6 +687,30 @@ CREATE INDEX idx_nodes_embedding ON public.nodes USING hnsw (embedding public.ve
547
687
 
548
688
  CREATE INDEX idx_nodes_last_accessed ON public.nodes USING btree (last_accessed);
549
689
 
690
+ --
691
+ -- Name: idx_nodes_metadata; Type: INDEX; Schema: public; Owner: -
692
+ --
693
+
694
+ CREATE INDEX idx_nodes_metadata ON public.nodes USING gin (metadata);
695
+
696
+ --
697
+ -- Name: idx_nodes_not_deleted_created_at; Type: INDEX; Schema: public; Owner: -
698
+ --
699
+
700
+ CREATE INDEX idx_nodes_not_deleted_created_at ON public.nodes USING btree (created_at) WHERE (deleted_at IS NULL);
701
+
702
+ --
703
+ -- Name: idx_nodes_source_chunk_position; Type: INDEX; Schema: public; Owner: -
704
+ --
705
+
706
+ CREATE INDEX idx_nodes_source_chunk_position ON public.nodes USING btree (source_id, chunk_position);
707
+
708
+ --
709
+ -- Name: idx_nodes_source_id; Type: INDEX; Schema: public; Owner: -
710
+ --
711
+
712
+ CREATE INDEX idx_nodes_source_id ON public.nodes USING btree (source_id);
713
+
550
714
  --
551
715
  -- Name: idx_nodes_updated_at; Type: INDEX; Schema: public; Owner: -
552
716
  --
@@ -621,6 +785,13 @@ ALTER TABLE ONLY public.working_memories
621
785
  ALTER TABLE ONLY public.working_memories
622
786
  ADD CONSTRAINT fk_rails_4b7c3eb07b FOREIGN KEY (robot_id) REFERENCES public.robots(id) ON DELETE CASCADE;
623
787
 
788
+ --
789
+ -- Name: nodes fk_rails_920ad16d08; Type: FK CONSTRAINT; Schema: public; Owner: -
790
+ --
791
+
792
+ ALTER TABLE ONLY public.nodes
793
+ ADD CONSTRAINT fk_rails_920ad16d08 FOREIGN KEY (source_id) REFERENCES public.file_sources(id) ON DELETE SET NULL;
794
+
624
795
  --
625
796
  -- Name: robot_nodes fk_rails_9b003078a8; Type: FK CONSTRAINT; Schema: public; Owner: -
626
797
  --
@@ -653,4 +824,4 @@ ALTER TABLE ONLY public.robot_nodes
653
824
  -- PostgreSQL database dump complete
654
825
  --
655
826
 
656
- \unrestrict 6qynyffXXn5BTZM7u0DVZKV2Nc24dPezkY3OOwzriuYfchXNsoQuf114yBOqrIb
827
+ \unrestrict DUrF24Zrve4qSBwlDrJ4qAzzZhvhX5s2S57oHYVJ0ZPbaDC4ItMZ29Pv9oI3Q9d
data/docs/api/database.md CHANGED
@@ -276,9 +276,8 @@ For detailed database schema documentation, see:
276
276
  | [robots](../database/public.robots.md) | Robot registry for multi-robot tracking |
277
277
  | [nodes](../database/public.nodes.md) | Primary memory storage with vector embeddings |
278
278
  | [tags](../database/public.tags.md) | Hierarchical tag names for categorization |
279
- | [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind) |
279
+ | [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind, working memory) |
280
280
  | [node_tags](../database/public.node_tags.md) | Node-to-tag associations |
281
- | [working_memories](../database/public.working_memories.md) | Per-robot working memory state |
282
281
 
283
282
  ### Required Extensions
284
283
 
data/docs/api/htm.md CHANGED
@@ -141,12 +141,12 @@ htm.long_term_memory.stats # => {...}
141
141
 
142
142
  ## Public Methods
143
143
 
144
- ### `remember(content, tags:)` {: #remember }
144
+ ### `remember(content, tags:, metadata:)` {: #remember }
145
145
 
146
146
  Remember new information by storing it in long-term memory.
147
147
 
148
148
  ```ruby
149
- remember(content, tags: [])
149
+ remember(content, tags: [], metadata: {})
150
150
  ```
151
151
 
152
152
  #### Parameters
@@ -155,6 +155,7 @@ remember(content, tags: [])
155
155
  |-----------|------|---------|-------------|
156
156
  | `content` | String | *required* | The information to remember |
157
157
  | `tags` | Array\<String\> | `[]` | Manual tags to assign (in addition to auto-extracted tags) |
158
+ | `metadata` | Hash | `{}` | Arbitrary key-value metadata stored as JSONB. Keys must be strings or symbols. |
158
159
 
159
160
  #### Returns
160
161
 
@@ -190,6 +191,19 @@ node_id = htm.remember(
190
191
  tags: ["database:timescaledb", "performance"]
191
192
  )
192
193
 
194
+ # With metadata
195
+ node_id = htm.remember(
196
+ "User prefers dark mode for all interfaces",
197
+ metadata: { category: "preference", priority: "high", source_app: "settings" }
198
+ )
199
+
200
+ # With both tags and metadata
201
+ node_id = htm.remember(
202
+ "API rate limit is 1000 requests per minute",
203
+ tags: ["api:rate-limiting", "infrastructure"],
204
+ metadata: { environment: "production", version: 2 }
205
+ )
206
+
193
207
  # Multiple robots remembering the same content
194
208
  robot1 = HTM.new(robot_name: "assistant_1")
195
209
  robot2 = HTM.new(robot_name: "assistant_2")
@@ -205,6 +219,7 @@ robot2.remember("Ruby 3.3 was released in December 2023")
205
219
  - Embeddings and hierarchical tags are generated asynchronously via background jobs
206
220
  - Empty content returns the ID of the most recent node without creating a duplicate
207
221
  - Token count is calculated automatically using the configured token counter
222
+ - Metadata is stored in a JSONB column with a GIN index for efficient queries
208
223
 
209
224
  ---
210
225
 
@@ -220,6 +235,7 @@ recall(
220
235
  strategy: :vector,
221
236
  with_relevance: false,
222
237
  query_tags: [],
238
+ metadata: {},
223
239
  raw: false
224
240
  )
225
241
  ```
@@ -234,6 +250,7 @@ recall(
234
250
  | `strategy` | Symbol | `:vector` | Search strategy (`:vector`, `:fulltext`, `:hybrid`) |
235
251
  | `with_relevance` | Boolean | `false` | Include dynamic relevance scores |
236
252
  | `query_tags` | Array\<String\> | `[]` | Tags to boost relevance |
253
+ | `metadata` | Hash | `{}` | Filter results by metadata (uses JSONB `@>` containment) |
237
254
  | `raw` | Boolean | `false` | Return full node hashes instead of content strings |
238
255
 
239
256
  #### Timeframe Formats
@@ -284,6 +301,7 @@ When `raw: true`, each hash contains:
284
301
  "access_count" => 5, # Times accessed
285
302
  "created_at" => "2025-01-15...", # Creation timestamp
286
303
  "token_count" => 125, # Token count
304
+ "metadata" => { "category" => "preference", "priority" => "high" }, # JSONB metadata
287
305
  "similarity" => 0.87, # Similarity score (hybrid/vector)
288
306
  "tag_boost" => 0.3, # Tag boost score (hybrid only)
289
307
  "combined_score" => 0.79 # Combined score (hybrid only)
@@ -344,6 +362,23 @@ memories = htm.recall(
344
362
  timeframe: start_time..end_time,
345
363
  limit: 50
346
364
  )
365
+
366
+ # Filter by metadata
367
+ memories = htm.recall(
368
+ "user preferences",
369
+ metadata: { category: "preference" }
370
+ )
371
+ # => Returns only nodes with metadata containing { category: "preference" }
372
+
373
+ # Combine metadata with other filters
374
+ memories = htm.recall(
375
+ "API configuration",
376
+ timeframe: "last month",
377
+ strategy: :hybrid,
378
+ metadata: { environment: "production", version: 2 },
379
+ raw: true
380
+ )
381
+ # => Returns production configs with version 2, sorted by relevance
347
382
  ```
348
383
 
349
384
  #### Performance Notes
@@ -410,6 +445,166 @@ end
410
445
 
411
446
  ---
412
447
 
448
+ ### `load_file(path, force: false)` {: #load_file }
449
+
450
+ Load a markdown file into long-term memory with automatic chunking and source tracking.
451
+
452
+ ```ruby
453
+ load_file(path, force: false)
454
+ ```
455
+
456
+ #### Parameters
457
+
458
+ | Parameter | Type | Default | Description |
459
+ |-----------|------|---------|-------------|
460
+ | `path` | String | *required* | Path to the markdown file to load |
461
+ | `force` | Boolean | `false` | Force re-sync even if file hasn't changed |
462
+
463
+ #### Returns
464
+
465
+ - `Hash` with keys:
466
+ - `file_source_id` - ID of the FileSource record
467
+ - `chunks_created` - Number of new nodes created
468
+ - `chunks_updated` - Number of existing nodes updated
469
+ - `chunks_deleted` - Number of nodes soft-deleted
470
+
471
+ #### Side Effects
472
+
473
+ - Creates or updates a FileSource record for tracking
474
+ - Parses YAML frontmatter and stores as metadata
475
+ - Chunks content by paragraph, preserving code blocks
476
+ - Creates nodes for each chunk with `source_id` linking to file
477
+ - Triggers async embedding and tag extraction for new nodes
478
+
479
+ #### Examples
480
+
481
+ ```ruby
482
+ # Load a file
483
+ result = htm.load_file("docs/guide.md")
484
+ # => { file_source_id: 1, chunks_created: 5, chunks_updated: 0, chunks_deleted: 0 }
485
+
486
+ # Force reload even if unchanged
487
+ result = htm.load_file("docs/guide.md", force: true)
488
+
489
+ # File with frontmatter
490
+ # ---
491
+ # title: User Guide
492
+ # tags: [documentation, tutorial]
493
+ # ---
494
+ # Content here...
495
+ result = htm.load_file("docs/guide.md")
496
+ # Frontmatter stored in FileSource.frontmatter
497
+ ```
498
+
499
+ ---
500
+
501
+ ### `load_directory(path, pattern: '**/*.md', force: false)` {: #load_directory }
502
+
503
+ Load all matching files in a directory into long-term memory.
504
+
505
+ ```ruby
506
+ load_directory(path, pattern: '**/*.md', force: false)
507
+ ```
508
+
509
+ #### Parameters
510
+
511
+ | Parameter | Type | Default | Description |
512
+ |-----------|------|---------|-------------|
513
+ | `path` | String | *required* | Directory path to scan |
514
+ | `pattern` | String | `'**/*.md'` | Glob pattern for matching files |
515
+ | `force` | Boolean | `false` | Force re-sync all files |
516
+
517
+ #### Returns
518
+
519
+ - `Array<Hash>` - Results for each file loaded, each containing:
520
+ - `file_path` - Path of the loaded file
521
+ - `file_source_id` - ID of the FileSource record
522
+ - `chunks_created` - Number of new nodes created
523
+ - `chunks_updated` - Number of existing nodes updated
524
+ - `chunks_deleted` - Number of nodes soft-deleted
525
+
526
+ #### Examples
527
+
528
+ ```ruby
529
+ # Load all markdown files
530
+ results = htm.load_directory("docs/")
531
+
532
+ # Load with custom pattern
533
+ results = htm.load_directory("content/", pattern: "**/*.md")
534
+
535
+ # Force reload all
536
+ results = htm.load_directory("docs/", force: true)
537
+ ```
538
+
539
+ ---
540
+
541
+ ### `nodes_from_file(file_path)` {: #nodes_from_file }
542
+
543
+ Get all nodes loaded from a specific file.
544
+
545
+ ```ruby
546
+ nodes_from_file(file_path)
547
+ ```
548
+
549
+ #### Parameters
550
+
551
+ | Parameter | Type | Description |
552
+ |-----------|------|-------------|
553
+ | `file_path` | String | Path of the source file |
554
+
555
+ #### Returns
556
+
557
+ - `Array<HTM::Models::Node>` - Nodes from the file, ordered by chunk position
558
+
559
+ #### Examples
560
+
561
+ ```ruby
562
+ nodes = htm.nodes_from_file("docs/guide.md")
563
+ nodes.each do |node|
564
+ puts "Chunk #{node.chunk_position}: #{node.content[0..50]}..."
565
+ end
566
+ ```
567
+
568
+ ---
569
+
570
+ ### `unload_file(file_path)` {: #unload_file }
571
+
572
+ Remove a file from memory by soft-deleting all its chunks and the file source.
573
+
574
+ ```ruby
575
+ unload_file(file_path)
576
+ ```
577
+
578
+ #### Parameters
579
+
580
+ | Parameter | Type | Description |
581
+ |-----------|------|-------------|
582
+ | `file_path` | String | Path of the source file to unload |
583
+
584
+ #### Returns
585
+
586
+ - `true` if file was found and unloaded
587
+ - `false` if file was not found
588
+
589
+ #### Side Effects
590
+
591
+ - Soft-deletes all nodes from the file (sets `deleted_at`)
592
+ - Destroys the FileSource record
593
+
594
+ #### Examples
595
+
596
+ ```ruby
597
+ # Unload a file
598
+ htm.unload_file("docs/guide.md")
599
+
600
+ # Check if file is loaded
601
+ if htm.nodes_from_file("docs/guide.md").empty?
602
+ puts "File not loaded"
603
+ end
604
+ ```
605
+
606
+ ---
607
+
413
608
  ## Error Handling
414
609
 
415
610
  ### ArgumentError