htm 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +1 -0
  3. data/.tbls.yml +30 -0
  4. data/CHANGELOG.md +30 -0
  5. data/SETUP.md +132 -101
  6. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +14 -0
  7. data/db/migrate/20250125000002_create_robot_nodes.rb +35 -0
  8. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +28 -0
  9. data/db/migrate/20250126000001_create_working_memories.rb +19 -0
  10. data/db/migrate/20250126000002_remove_unused_columns.rb +12 -0
  11. data/db/schema.sql +226 -43
  12. data/docs/api/database.md +20 -232
  13. data/docs/api/embedding-service.md +1 -7
  14. data/docs/api/htm.md +195 -449
  15. data/docs/api/index.md +1 -7
  16. data/docs/api/long-term-memory.md +342 -590
  17. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  18. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  19. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  20. data/docs/architecture/adrs/index.md +2 -13
  21. data/docs/architecture/hive-mind.md +165 -166
  22. data/docs/architecture/index.md +2 -2
  23. data/docs/architecture/overview.md +5 -171
  24. data/docs/architecture/two-tier-memory.md +1 -35
  25. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  26. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  27. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  28. data/docs/assets/images/class-hierarchy.svg +55 -0
  29. data/docs/assets/images/exception-hierarchy.svg +45 -0
  30. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  31. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  32. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  33. data/docs/assets/images/htm-eviction-process.svg +141 -0
  34. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  35. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  36. data/docs/assets/images/htm-node-states.svg +123 -0
  37. data/docs/assets/images/project-structure.svg +78 -0
  38. data/docs/assets/images/test-directory-structure.svg +38 -0
  39. data/{dbdoc → docs/database}/README.md +5 -3
  40. data/{dbdoc → docs/database}/public.node_tags.md +4 -5
  41. data/docs/database/public.node_tags.svg +106 -0
  42. data/{dbdoc → docs/database}/public.nodes.md +3 -8
  43. data/docs/database/public.nodes.svg +152 -0
  44. data/docs/database/public.robot_nodes.md +44 -0
  45. data/docs/database/public.robot_nodes.svg +121 -0
  46. data/{dbdoc → docs/database}/public.robots.md +1 -2
  47. data/docs/database/public.robots.svg +106 -0
  48. data/docs/database/public.working_memories.md +40 -0
  49. data/docs/database/public.working_memories.svg +112 -0
  50. data/{dbdoc → docs/database}/schema.json +342 -110
  51. data/docs/database/schema.svg +223 -0
  52. data/docs/development/index.md +1 -29
  53. data/docs/development/schema.md +84 -324
  54. data/docs/development/testing.md +1 -9
  55. data/docs/getting-started/index.md +47 -0
  56. data/docs/{installation.md → getting-started/installation.md} +2 -2
  57. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  58. data/docs/guides/adding-memories.md +221 -655
  59. data/docs/guides/search-strategies.md +85 -51
  60. data/docs/images/htm-er-diagram.svg +156 -0
  61. data/docs/index.md +16 -31
  62. data/docs/multi_framework_support.md +4 -4
  63. data/examples/basic_usage.rb +18 -16
  64. data/examples/cli_app/htm_cli.rb +86 -8
  65. data/examples/custom_llm_configuration.rb +1 -2
  66. data/examples/example_app/app.rb +11 -14
  67. data/examples/sinatra_app/Gemfile +1 -0
  68. data/examples/sinatra_app/Gemfile.lock +166 -0
  69. data/examples/sinatra_app/app.rb +219 -24
  70. data/lib/htm/active_record_config.rb +10 -3
  71. data/lib/htm/configuration.rb +265 -78
  72. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  73. data/lib/htm/job_adapter.rb +10 -3
  74. data/lib/htm/long_term_memory.rb +220 -57
  75. data/lib/htm/models/node.rb +36 -7
  76. data/lib/htm/models/robot.rb +30 -4
  77. data/lib/htm/models/robot_node.rb +50 -0
  78. data/lib/htm/models/tag.rb +52 -0
  79. data/lib/htm/models/working_memory_entry.rb +88 -0
  80. data/lib/htm/tasks.rb +4 -0
  81. data/lib/htm/version.rb +1 -1
  82. data/lib/htm.rb +34 -13
  83. data/lib/tasks/htm.rake +32 -1
  84. data/lib/tasks/jobs.rake +7 -3
  85. data/lib/tasks/tags.rake +34 -0
  86. data/mkdocs.yml +56 -9
  87. metadata +61 -31
  88. data/dbdoc/public.node_tags.svg +0 -112
  89. data/dbdoc/public.nodes.svg +0 -118
  90. data/dbdoc/public.robots.svg +0 -90
  91. data/dbdoc/schema.svg +0 -154
  92. /data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  93. /data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  94. /data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  95. /data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  96. /data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  97. /data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  98. /data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  99. /data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  100. /data/{dbdoc → docs/database}/public.relationships.md +0 -0
  101. /data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  102. /data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  103. /data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  104. /data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  105. /data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  106. /data/{dbdoc → docs/database}/public.tags.md +0 -0
  107. /data/{dbdoc → docs/database}/public.tags.svg +0 -0
  108. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  109. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
data/docs/api/htm.md CHANGED
@@ -7,15 +7,15 @@ The main interface for HTM's intelligent memory management system.
7
7
  `HTM` is the primary class that orchestrates the two-tier memory system:
8
8
 
9
9
  - **Working Memory**: Token-limited active context for immediate LLM use
10
- - **Long-term Memory**: Durable PostgreSQL storage
10
+ - **Long-term Memory**: Durable PostgreSQL storage with vector embeddings
11
11
 
12
12
  Key features:
13
13
 
14
14
  - Never forgets unless explicitly told (`forget`)
15
15
  - RAG-based retrieval (temporal + semantic search)
16
- - Multi-robot "hive mind" - all robots share global memory
17
- - Relationship graphs for knowledge connections
18
- - Time-series optimized with TimescaleDB
16
+ - Multi-robot "hive mind" - all robots share global memory via content deduplication
17
+ - Hierarchical tagging for knowledge organization
18
+ - Tag-enhanced hybrid search for improved relevance
19
19
 
20
20
  ## Class Definition
21
21
 
@@ -34,11 +34,12 @@ Create a new HTM instance.
34
34
  ```ruby
35
35
  HTM.new(
36
36
  working_memory_size: 128_000,
37
- robot_id: nil,
38
37
  robot_name: nil,
39
38
  db_config: nil,
40
- embedding_service: :ollama,
41
- embedding_model: 'gpt-oss'
39
+ db_pool_size: 5,
40
+ db_query_timeout: 30_000,
41
+ db_cache_size: 1000,
42
+ db_cache_ttl: 300
42
43
  )
43
44
  ```
44
45
 
@@ -47,11 +48,12 @@ HTM.new(
47
48
  | Parameter | Type | Default | Description |
48
49
  |-----------|------|---------|-------------|
49
50
  | `working_memory_size` | Integer | `128_000` | Maximum tokens for working memory |
50
- | `robot_id` | String, nil | Auto-generated UUID | Unique identifier for this robot |
51
- | `robot_name` | String, nil | `"robot_#{id[0..7]}"` | Human-readable name |
51
+ | `robot_name` | String, nil | `"robot_#{uuid[0..7]}"` | Human-readable name |
52
52
  | `db_config` | Hash, nil | From `ENV['HTM_DBURL']` | Database configuration |
53
- | `embedding_service` | Symbol | `:ollama` | Embedding provider (`:ollama`, `:openai`, `:cohere`, `:local`) |
54
- | `embedding_model` | String | `'gpt-oss'` | Model name for embeddings |
53
+ | `db_pool_size` | Integer | `5` | Database connection pool size |
54
+ | `db_query_timeout` | Integer | `30_000` | Query timeout in milliseconds |
55
+ | `db_cache_size` | Integer | `1000` | Query cache size (0 to disable) |
56
+ | `db_cache_ttl` | Integer | `300` | Cache TTL in seconds |
55
57
 
56
58
  #### Returns
57
59
 
@@ -69,14 +71,7 @@ htm = HTM.new(
69
71
  working_memory_size: 256_000
70
72
  )
71
73
 
72
- # OpenAI embeddings
73
- htm = HTM.new(
74
- robot_name: "Research Bot",
75
- embedding_service: :openai,
76
- embedding_model: 'text-embedding-3-small'
77
- )
78
-
79
- # Custom database
74
+ # Custom database configuration
80
75
  htm = HTM.new(
81
76
  db_config: {
82
77
  host: 'localhost',
@@ -86,6 +81,12 @@ htm = HTM.new(
86
81
  password: 'secret'
87
82
  }
88
83
  )
84
+
85
+ # With caching disabled
86
+ htm = HTM.new(
87
+ robot_name: "No Cache Bot",
88
+ db_cache_size: 0
89
+ )
89
90
  ```
90
91
 
91
92
  ---
@@ -94,13 +95,13 @@ htm = HTM.new(
94
95
 
95
96
  ### `robot_id` {: #robot_id }
96
97
 
97
- Unique identifier for this robot instance.
98
+ Unique integer identifier for this robot instance.
98
99
 
99
- - **Type**: String (UUID format)
100
+ - **Type**: Integer
100
101
  - **Read-only**: Yes
101
102
 
102
103
  ```ruby
103
- htm.robot_id # => "a1b2c3d4-e5f6-..."
104
+ htm.robot_id # => 42
104
105
  ```
105
106
 
106
107
  ### `robot_name` {: #robot_name }
@@ -140,106 +141,86 @@ htm.long_term_memory.stats # => {...}
140
141
 
141
142
  ## Public Methods
142
143
 
143
- ### `add_node(key, value, **options)` {: #add_node }
144
+ ### `remember(content, tags:)` {: #remember }
144
145
 
145
- Add a new memory node to both working and long-term memory.
146
+ Remember new information by storing it in long-term memory.
146
147
 
147
148
  ```ruby
148
- add_node(key, value,
149
- type: nil,
150
- category: nil,
151
- importance: 1.0,
152
- related_to: [],
153
- tags: []
154
- )
149
+ remember(content, tags: [])
155
150
  ```
156
151
 
157
152
  #### Parameters
158
153
 
159
154
  | Parameter | Type | Default | Description |
160
155
  |-----------|------|---------|-------------|
161
- | `key` | String | *required* | Unique identifier for this node |
162
- | `value` | String | *required* | Content of the memory |
163
- | `type` | Symbol, nil | `nil` | Memory type (`:fact`, `:context`, `:code`, `:preference`, `:decision`, `:question`) |
164
- | `category` | String, nil | `nil` | Optional category for organization |
165
- | `importance` | Float | `1.0` | Importance score (0.0-10.0) |
166
- | `related_to` | Array\<String\> | `[]` | Keys of related nodes |
167
- | `tags` | Array\<String\> | `[]` | Tags for categorization |
156
+ | `content` | String | *required* | The information to remember |
157
+ | `tags` | Array\<String\> | `[]` | Manual tags to assign (in addition to auto-extracted tags) |
168
158
 
169
159
  #### Returns
170
160
 
171
- - `Integer` - Database ID of the created node
161
+ - `Integer` - Database ID of the memory node
172
162
 
173
163
  #### Side Effects
174
164
 
175
- - Stores node in PostgreSQL with vector embedding
165
+ - Stores node in PostgreSQL with content deduplication (via SHA-256 hash)
166
+ - Creates/updates `robot_nodes` association for this robot
176
167
  - Adds node to working memory (evicts if needed)
177
- - Creates relationships to `related_to` nodes
178
- - Adds tags to the node
179
- - Logs operation to `operations_log` table
168
+ - Enqueues background job for embedding generation (new nodes only)
169
+ - Enqueues background job for tag extraction (new nodes only)
180
170
  - Updates robot activity timestamp
181
171
 
172
+ #### Content Deduplication
173
+
174
+ When `remember()` is called:
175
+
176
+ 1. A SHA-256 hash of the content is computed
177
+ 2. If a node with the same hash exists, the existing node is reused
178
+ 3. A new `robot_nodes` association is created (or `remember_count` is incremented)
179
+ 4. This ensures identical memories are stored once but can be "remembered" by multiple robots
180
+
182
181
  #### Examples
183
182
 
184
183
  ```ruby
185
- # Simple fact
186
- htm.add_node("db_choice", "We chose PostgreSQL for its reliability")
187
-
188
- # Architectural decision
189
- htm.add_node(
190
- "api_gateway_decision",
191
- "Decided to use Kong as API gateway for rate limiting and auth",
192
- type: :decision,
193
- importance: 9.0,
194
- tags: ["architecture", "api", "gateway"],
195
- related_to: ["microservices_architecture"]
196
- )
184
+ # Basic usage
185
+ node_id = htm.remember("PostgreSQL supports vector similarity search via pgvector")
197
186
 
198
- # Code snippet
199
- code = <<~RUBY
200
- def calculate_total(items)
201
- items.sum(&:price)
202
- end
203
- RUBY
204
-
205
- htm.add_node(
206
- "total_calculation_v1",
207
- code,
208
- type: :code,
209
- category: "helpers",
210
- importance: 5.0,
211
- tags: ["ruby", "calculation"]
187
+ # With manual tags
188
+ node_id = htm.remember(
189
+ "Time-series data works great with hypertables",
190
+ tags: ["database:timescaledb", "performance"]
212
191
  )
213
192
 
214
- # User preference
215
- htm.add_node(
216
- "user_123_timezone",
217
- "User prefers UTC timezone for all timestamps",
218
- type: :preference,
219
- category: "user_settings",
220
- importance: 6.0
221
- )
193
+ # Multiple robots remembering the same content
194
+ robot1 = HTM.new(robot_name: "assistant_1")
195
+ robot2 = HTM.new(robot_name: "assistant_2")
196
+
197
+ # Both robots remember the same fact - stored once, linked to both
198
+ robot1.remember("Ruby 3.3 was released in December 2023")
199
+ robot2.remember("Ruby 3.3 was released in December 2023")
200
+ # Same node_id returned, remember_count incremented for robot2
222
201
  ```
223
202
 
224
203
  #### Notes
225
204
 
226
- - The `key` must be unique across all nodes
227
- - Embeddings are generated automatically
228
- - Token count is calculated automatically
229
- - If working memory is full, less important nodes are evicted
205
+ - Embeddings and hierarchical tags are generated asynchronously via background jobs
206
+ - Empty content returns the ID of the most recent node without creating a duplicate
207
+ - Token count is calculated automatically using the configured token counter
230
208
 
231
209
  ---
232
210
 
233
- ### `recall(timeframe:, topic:, **options)` {: #recall }
211
+ ### `recall(topic, **options)` {: #recall }
234
212
 
235
- Recall memories from a timeframe and topic using RAG-based retrieval.
213
+ Recall memories from long-term storage using RAG-based retrieval.
236
214
 
237
215
  ```ruby
238
216
  recall(
239
- timeframe:,
240
- topic:,
217
+ topic,
218
+ timeframe: "last 7 days",
241
219
  limit: 20,
242
- strategy: :vector
220
+ strategy: :vector,
221
+ with_relevance: false,
222
+ query_tags: [],
223
+ raw: false
243
224
  )
244
225
  ```
245
226
 
@@ -247,10 +228,13 @@ recall(
247
228
 
248
229
  | Parameter | Type | Default | Description |
249
230
  |-----------|------|---------|-------------|
250
- | `timeframe` | String, Range | *required* | Time range (e.g., `"last week"`, `7.days.ago..Time.now`) |
251
231
  | `topic` | String | *required* | Topic to search for |
232
+ | `timeframe` | String, Range | `"last 7 days"` | Time range |
252
233
  | `limit` | Integer | `20` | Maximum number of nodes to retrieve |
253
234
  | `strategy` | Symbol | `:vector` | Search strategy (`:vector`, `:fulltext`, `:hybrid`) |
235
+ | `with_relevance` | Boolean | `false` | Include dynamic relevance scores |
236
+ | `query_tags` | Array\<String\> | `[]` | Tags to boost relevance |
237
+ | `raw` | Boolean | `false` | Return full node hashes instead of content strings |
254
238
 
255
239
  #### Timeframe Formats
256
240
 
@@ -274,27 +258,35 @@ Range format:
274
258
  |----------|-------------|----------|
275
259
  | `:vector` | Semantic similarity using embeddings | Find conceptually related content |
276
260
  | `:fulltext` | PostgreSQL full-text search | Find exact terms and phrases |
277
- | `:hybrid` | Fulltext prefilter + vector ranking | Best accuracy + semantic understanding |
261
+ | `:hybrid` | Vector + fulltext + tag matching | Best accuracy with tag boosting |
262
+
263
+ #### Tag-Enhanced Hybrid Search
264
+
265
+ When using `:hybrid` strategy, the search automatically:
266
+
267
+ 1. Finds tags matching query terms (words 3+ chars)
268
+ 2. Includes nodes with matching tags in the candidate pool
269
+ 3. Calculates combined score: `(similarity × 0.7) + (tag_boost × 0.3)`
270
+ 4. Returns results sorted by combined score
278
271
 
279
272
  #### Returns
280
273
 
281
- - `Array<Hash>` - Retrieved memory nodes
274
+ - `Array<String>` - Content strings (when `raw: false`, default)
275
+ - `Array<Hash>` - Full node hashes (when `raw: true`)
282
276
 
283
- Each hash contains:
277
+ When `raw: true`, each hash contains:
284
278
 
285
279
  ```ruby
286
280
  {
287
281
  "id" => 123, # Database ID
288
- "key" => "node_key", # Node identifier
289
- "value" => "content...", # Node content
290
- "type" => "fact", # Node type
291
- "category" => "architecture", # Category
292
- "importance" => 8.0, # Importance score
282
+ "content" => "...", # Node content
283
+ "content_hash" => "abc123...", # SHA-256 hash
284
+ "access_count" => 5, # Times accessed
293
285
  "created_at" => "2025-01-15...", # Creation timestamp
294
- "robot_id" => "abc123...", # Robot that created it
295
286
  "token_count" => 125, # Token count
296
- "similarity" => 0.87 # Similarity score (vector/hybrid only)
297
- # or "rank" => 0.456 # Rank score (fulltext only)
287
+ "similarity" => 0.87, # Similarity score (hybrid/vector)
288
+ "tag_boost" => 0.3, # Tag boost score (hybrid only)
289
+ "combined_score" => 0.79 # Combined score (hybrid only)
298
290
  }
299
291
  ```
300
292
 
@@ -302,118 +294,79 @@ Each hash contains:
302
294
 
303
295
  - Adds recalled nodes to working memory
304
296
  - Evicts existing nodes if working memory is full
305
- - Logs operation to `operations_log` table
306
297
  - Updates robot activity timestamp
307
298
 
308
299
  #### Examples
309
300
 
310
301
  ```ruby
302
+ # Basic usage (returns content strings)
303
+ memories = htm.recall("PostgreSQL")
304
+ # => ["PostgreSQL supports vector search...", "PostgreSQL with pgvector..."]
305
+
306
+ # Get full node hashes
307
+ nodes = htm.recall("PostgreSQL", raw: true)
308
+ # => [{"id" => 1, "content" => "...", "similarity" => 0.92, ...}, ...]
309
+
311
310
  # Vector semantic search
312
311
  memories = htm.recall(
312
+ "database performance optimization",
313
313
  timeframe: "last week",
314
- topic: "database performance optimization"
314
+ strategy: :vector
315
315
  )
316
316
 
317
317
  # Fulltext search for exact phrases
318
318
  memories = htm.recall(
319
+ "PostgreSQL connection pooling",
319
320
  timeframe: "last 30 days",
320
- topic: "PostgreSQL connection pooling",
321
321
  strategy: :fulltext,
322
322
  limit: 10
323
323
  )
324
324
 
325
- # Hybrid search (best of both)
325
+ # Hybrid search with tag boosting (recommended)
326
326
  memories = htm.recall(
327
+ "API rate limiting implementation",
327
328
  timeframe: "this month",
328
- topic: "API rate limiting implementation",
329
329
  strategy: :hybrid,
330
- limit: 15
330
+ limit: 15,
331
+ raw: true
331
332
  )
332
333
 
334
+ # Check matching tags for a query
335
+ matching_tags = htm.long_term_memory.find_query_matching_tags("PostgreSQL")
336
+ # => ["database:postgresql", "database:postgresql:extensions"]
337
+
333
338
  # Custom time range
334
339
  start_time = Time.new(2025, 1, 1)
335
340
  end_time = Time.now
336
341
 
337
342
  memories = htm.recall(
343
+ "security vulnerabilities",
338
344
  timeframe: start_time..end_time,
339
- topic: "security vulnerabilities",
340
345
  limit: 50
341
346
  )
342
-
343
- # Process results
344
- memories.each do |memory|
345
- puts "#{memory['created_at']}: #{memory['value']}"
346
- puts " Similarity: #{memory['similarity']}" if memory['similarity']
347
- puts " Robot: #{memory['robot_id']}"
348
- end
349
347
  ```
350
348
 
351
349
  #### Performance Notes
352
350
 
353
351
  - Vector search: Best for semantic understanding, requires embedding generation
354
352
  - Fulltext search: Fastest for exact matches, no embedding overhead
355
- - Hybrid search: Slower but most accurate, combines both approaches
356
-
357
- ---
358
-
359
- ### `retrieve(key)` {: #retrieve }
360
-
361
- Retrieve a specific memory node by its key.
362
-
363
- ```ruby
364
- retrieve(key)
365
- ```
366
-
367
- #### Parameters
368
-
369
- | Parameter | Type | Description |
370
- |-----------|------|-------------|
371
- | `key` | String | Key of the node to retrieve |
372
-
373
- #### Returns
374
-
375
- - `Hash` - Node data if found
376
- - `nil` - If node doesn't exist
377
-
378
- #### Side Effects
379
-
380
- - Updates `last_accessed` timestamp for the node
381
- - Logs operation to `operations_log` table
382
-
383
- #### Examples
384
-
385
- ```ruby
386
- # Retrieve a node
387
- node = htm.retrieve("api_decision_001")
388
-
389
- if node
390
- puts node['value']
391
- puts "Created: #{node['created_at']}"
392
- puts "Importance: #{node['importance']}"
393
- else
394
- puts "Node not found"
395
- end
396
-
397
- # Use retrieved data
398
- config = htm.retrieve("database_config")
399
- db_url = JSON.parse(config['value'])['url'] if config
400
- ```
353
+ - Hybrid search: Most accurate, combines vector + fulltext + tags with weighted scoring
401
354
 
402
355
  ---
403
356
 
404
- ### `forget(key, confirm:)` {: #forget }
357
+ ### `forget(node_id, confirm:)` {: #forget }
405
358
 
406
359
  Explicitly delete a memory node. Requires confirmation to prevent accidental deletion.
407
360
 
408
361
  ```ruby
409
- forget(key, confirm: :confirmed)
362
+ forget(node_id, confirm: :confirmed)
410
363
  ```
411
364
 
412
365
  #### Parameters
413
366
 
414
367
  | Parameter | Type | Description |
415
368
  |-----------|------|-------------|
416
- | `key` | String | Key of the node to delete |
369
+ | `node_id` | Integer | ID of the node to delete |
417
370
  | `confirm` | Symbol | Must be `:confirmed` to proceed |
418
371
 
419
372
  #### Returns
@@ -423,27 +376,28 @@ forget(key, confirm: :confirmed)
423
376
  #### Raises
424
377
 
425
378
  - `ArgumentError` - If `confirm` is not `:confirmed`
379
+ - `ArgumentError` - If `node_id` is nil
380
+ - `HTM::NotFoundError` - If node doesn't exist
426
381
 
427
382
  #### Side Effects
428
383
 
429
384
  - Deletes node from PostgreSQL
430
385
  - Removes node from working memory
431
- - Logs operation before deletion
432
386
  - Updates robot activity timestamp
433
387
 
434
388
  #### Examples
435
389
 
436
390
  ```ruby
437
391
  # Correct usage
438
- htm.forget("temp_note_123", confirm: :confirmed)
392
+ htm.forget(123, confirm: :confirmed)
439
393
 
440
394
  # This will raise ArgumentError
441
- htm.forget("temp_note_123") # Missing confirm parameter
395
+ htm.forget(123) # Missing confirm parameter
442
396
 
443
397
  # Safe deletion with verification
444
- if htm.retrieve("old_data")
445
- htm.forget("old_data", confirm: :confirmed)
446
- puts "Deleted old_data"
398
+ if htm.long_term_memory.exists?(node_id)
399
+ htm.forget(node_id, confirm: :confirmed)
400
+ puts "Deleted node #{node_id}"
447
401
  end
448
402
  ```
449
403
 
@@ -451,250 +405,8 @@ end
451
405
 
452
406
  - This is the **only** way to delete data from HTM
453
407
  - Deletion is permanent and cannot be undone
454
- - Related relationships and tags are also deleted (CASCADE)
455
-
456
- ---
457
-
458
- ### `create_context(strategy:, max_tokens:)` {: #create_context }
459
-
460
- Create a context string from working memory for LLM consumption.
461
-
462
- ```ruby
463
- create_context(strategy: :balanced, max_tokens: nil)
464
- ```
465
-
466
- #### Parameters
467
-
468
- | Parameter | Type | Default | Description |
469
- |-----------|------|---------|-------------|
470
- | `strategy` | Symbol | `:balanced` | Assembly strategy |
471
- | `max_tokens` | Integer, nil | Working memory max | Optional token limit |
472
-
473
- #### Assembly Strategies
474
-
475
- | Strategy | Behavior | Use Case |
476
- |----------|----------|----------|
477
- | `:recent` | Most recently accessed first | Prioritize latest information |
478
- | `:important` | Highest importance scores first | Focus on critical information |
479
- | `:balanced` | Weighted by importance × recency | Best general-purpose strategy |
480
-
481
- #### Returns
482
-
483
- - `String` - Assembled context with nodes separated by `"\n\n"`
484
-
485
- #### Examples
486
-
487
- ```ruby
488
- # Balanced context (default)
489
- context = htm.create_context(strategy: :balanced)
490
-
491
- # Recent context with token limit
492
- context = htm.create_context(
493
- strategy: :recent,
494
- max_tokens: 50_000
495
- )
496
-
497
- # Important context only
498
- context = htm.create_context(strategy: :important)
499
-
500
- # Use in LLM prompt
501
- prompt = <<~PROMPT
502
- You are a helpful assistant.
503
-
504
- Context from memory:
505
- #{context}
506
-
507
- User question: #{user_input}
508
- PROMPT
509
- ```
510
-
511
- #### Notes
512
-
513
- - Nodes are concatenated with double newlines
514
- - Token limits are respected (stops adding when limit reached)
515
- - Empty string if working memory is empty
516
-
517
- ---
518
-
519
- ### `memory_stats()` {: #memory_stats }
520
-
521
- Get comprehensive statistics about memory usage.
522
-
523
- ```ruby
524
- memory_stats()
525
- ```
526
-
527
- #### Returns
528
-
529
- - `Hash` - Statistics hash
530
-
531
- Structure:
532
-
533
- ```ruby
534
- {
535
- robot_id: "abc123...",
536
- robot_name: "Assistant",
537
-
538
- # Long-term memory stats
539
- total_nodes: 1234,
540
- nodes_by_robot: {
541
- "robot-1" => 500,
542
- "robot-2" => 734
543
- },
544
- nodes_by_type: [
545
- {"type" => "fact", "count" => 400},
546
- {"type" => "decision", "count" => 200},
547
- ...
548
- ],
549
- total_relationships: 567,
550
- total_tags: 890,
551
- oldest_memory: "2025-01-01 12:00:00",
552
- newest_memory: "2025-01-15 14:30:00",
553
- active_robots: 3,
554
- robot_activity: [...],
555
- database_size: 12345678,
556
-
557
- # Working memory stats
558
- working_memory: {
559
- current_tokens: 45234,
560
- max_tokens: 128000,
561
- utilization: 35.34,
562
- node_count: 23
563
- }
564
- }
565
- ```
566
-
567
- #### Examples
568
-
569
- ```ruby
570
- stats = htm.memory_stats
571
-
572
- puts "Total memories: #{stats[:total_nodes]}"
573
- puts "Working memory: #{stats[:working_memory][:utilization]}% full"
574
- puts "Active robots: #{stats[:active_robots]}"
575
-
576
- # Check if working memory is getting full
577
- if stats[:working_memory][:utilization] > 80
578
- puts "Warning: Working memory is #{stats[:working_memory][:utilization]}% full"
579
- end
580
-
581
- # Display by robot
582
- stats[:nodes_by_robot].each do |robot_id, count|
583
- puts "#{robot_id}: #{count} nodes"
584
- end
585
- ```
586
-
587
- ---
588
-
589
- ### `which_robot_said(topic, limit:)` {: #which_robot_said }
590
-
591
- Find which robots have discussed a specific topic.
592
-
593
- ```ruby
594
- which_robot_said(topic, limit: 100)
595
- ```
596
-
597
- #### Parameters
598
-
599
- | Parameter | Type | Default | Description |
600
- |-----------|------|---------|-------------|
601
- | `topic` | String | *required* | Topic to search for |
602
- | `limit` | Integer | `100` | Maximum results to consider |
603
-
604
- #### Returns
605
-
606
- - `Hash` - Robot IDs mapped to mention counts
607
-
608
- ```ruby
609
- {
610
- "robot-abc123" => 15,
611
- "robot-def456" => 8,
612
- "robot-ghi789" => 3
613
- }
614
- ```
615
-
616
- #### Examples
617
-
618
- ```ruby
619
- # Find who discussed deployment
620
- robots = htm.which_robot_said("deployment")
621
- # => {"robot-1" => 12, "robot-2" => 5}
622
-
623
- # Top contributor
624
- top_robot, count = robots.max_by { |robot, count| count }
625
- puts "#{top_robot} mentioned it #{count} times"
626
-
627
- # Check if specific robot discussed it
628
- if robots.key?("robot-123")
629
- puts "Robot-123 discussed deployment #{robots['robot-123']} times"
630
- end
631
- ```
632
-
633
- ---
634
-
635
- ### `conversation_timeline(topic, limit:)` {: #conversation_timeline }
636
-
637
- Get a chronological timeline of conversation about a topic.
638
-
639
- ```ruby
640
- conversation_timeline(topic, limit: 50)
641
- ```
642
-
643
- #### Parameters
644
-
645
- | Parameter | Type | Default | Description |
646
- |-----------|------|---------|-------------|
647
- | `topic` | String | *required* | Topic to search for |
648
- | `limit` | Integer | `50` | Maximum results |
649
-
650
- #### Returns
651
-
652
- - `Array<Hash>` - Timeline entries sorted by timestamp
653
-
654
- Structure:
655
-
656
- ```ruby
657
- [
658
- {
659
- timestamp: "2025-01-15 10:30:00",
660
- robot: "robot-abc123",
661
- content: "We should consider PostgreSQL...",
662
- type: "decision"
663
- },
664
- {
665
- timestamp: "2025-01-15 11:45:00",
666
- robot: "robot-def456",
667
- content: "Agreed, PostgreSQL has better...",
668
- type: "fact"
669
- },
670
- ...
671
- ]
672
- ```
673
-
674
- #### Examples
675
-
676
- ```ruby
677
- # Get timeline
678
- timeline = htm.conversation_timeline("API design", limit: 20)
679
-
680
- # Display timeline
681
- timeline.each do |entry|
682
- puts "[#{entry[:timestamp]}] #{entry[:robot]}:"
683
- puts " #{entry[:content]}"
684
- puts " (#{entry[:type]})"
685
- puts
686
- end
687
-
688
- # Find first mention
689
- first = timeline.first
690
- puts "First discussed by #{first[:robot]} at #{first[:timestamp]}"
691
-
692
- # Group by robot
693
- by_robot = timeline.group_by { |e| e[:robot] }
694
- by_robot.each do |robot, entries|
695
- puts "#{robot}: #{entries.size} contributions"
696
- end
697
- ```
408
+ - Related robot_nodes, node_tags are also deleted (CASCADE)
409
+ - Other robots' associations to this node are also removed
698
410
 
699
411
  ---
700
412
 
@@ -704,12 +416,24 @@ end
704
416
 
705
417
  ```ruby
706
418
  # Invalid confirm parameter
707
- htm.forget("key")
419
+ htm.forget(123)
708
420
  # => ArgumentError: Must pass confirm: :confirmed to delete
709
421
 
422
+ # Nil node_id
423
+ htm.forget(nil, confirm: :confirmed)
424
+ # => ArgumentError: node_id cannot be nil
425
+
710
426
  # Invalid timeframe
711
- htm.recall(timeframe: nil, topic: "test")
712
- # => ArgumentError: Invalid timeframe: nil
427
+ htm.recall("test", timeframe: 123)
428
+ # => ValidationError: Timeframe must be a Range or String
429
+ ```
430
+
431
+ ### HTM::NotFoundError
432
+
433
+ ```ruby
434
+ # Node doesn't exist
435
+ htm.forget(999999, confirm: :confirmed)
436
+ # => HTM::NotFoundError: Node not found: 999999
713
437
  ```
714
438
 
715
439
  ### PG::Error
@@ -718,73 +442,95 @@ htm.recall(timeframe: nil, topic: "test")
718
442
  # Database connection issues
719
443
  htm = HTM.new(db_config: { host: 'invalid' })
720
444
  # => PG::ConnectionBad: could not translate host name...
721
-
722
- # Duplicate key
723
- htm.add_node("existing_key", "value")
724
- # => PG::UniqueViolation: duplicate key value...
725
445
  ```
726
446
 
727
447
  ## Best Practices
728
448
 
729
- ### Memory Organization
449
+ ### Content Organization
730
450
 
731
451
  ```ruby
732
- # Use consistent key naming
733
- htm.add_node("decision_20250115_api_gateway", ...)
734
- htm.add_node("fact_20250115_database_choice", ...)
735
-
736
- # Use importance strategically
737
- htm.add_node(key, value, importance: 9.0) # Critical
738
- htm.add_node(key, value, importance: 5.0) # Normal
739
- htm.add_node(key, value, importance: 2.0) # Low priority
740
-
741
- # Build knowledge graphs
742
- htm.add_node(
743
- "api_v2_implementation",
744
- "...",
745
- related_to: ["api_v1_design", "authentication_decision"]
452
+ # Use meaningful content that stands alone
453
+ htm.remember("PostgreSQL was chosen for its reliability and pgvector support")
454
+
455
+ # Add hierarchical tags for organization
456
+ htm.remember(
457
+ "Rate limiting implemented using Redis sliding window algorithm",
458
+ tags: ["architecture:api:rate-limiting", "database:redis"]
746
459
  )
460
+
461
+ # Let the system extract tags automatically for most content
462
+ htm.remember("The authentication system uses JWT tokens with 1-hour expiry")
463
+ # Auto-extracted tags might include: security:authentication, technology:jwt
747
464
  ```
748
465
 
749
466
  ### Search Strategies
750
467
 
751
468
  ```ruby
469
+ # Use hybrid for best results (recommended)
470
+ memories = htm.recall(
471
+ "security vulnerability",
472
+ strategy: :hybrid # Combines vector + fulltext + tags
473
+ )
474
+
752
475
  # Use vector for semantic understanding
753
476
  memories = htm.recall(
754
- timeframe: "last month",
755
- topic: "performance issues",
477
+ "performance issues",
756
478
  strategy: :vector # Finds "slow queries", "optimization", etc.
757
479
  )
758
480
 
759
481
  # Use fulltext for exact terms
760
482
  memories = htm.recall(
761
- timeframe: "this week",
762
- topic: "PostgreSQL EXPLAIN ANALYZE",
483
+ "PostgreSQL EXPLAIN ANALYZE",
763
484
  strategy: :fulltext # Exact match
764
485
  )
486
+ ```
765
487
 
766
- # Use hybrid for best results
767
- memories = htm.recall(
768
- timeframe: "last week",
769
- topic: "security vulnerability",
770
- strategy: :hybrid # Accurate + semantic
771
- )
488
+ ### Leveraging Tag-Enhanced Search
489
+
490
+ ```ruby
491
+ # Check what tags exist for a topic
492
+ tags = htm.long_term_memory.find_query_matching_tags("database")
493
+ # => ["database:postgresql", "database:redis", "database:timescaledb"]
494
+
495
+ # Hybrid search automatically boosts nodes with matching tags
496
+ memories = htm.recall("database optimization", strategy: :hybrid, raw: true)
497
+ memories.each do |m|
498
+ puts "Score: #{m['combined_score']} (sim: #{m['similarity']}, tag: #{m['tag_boost']})"
499
+ end
500
+ ```
501
+
502
+ ### Multi-Robot Memory Sharing
503
+
504
+ ```ruby
505
+ # Content is deduplicated across robots
506
+ assistant = HTM.new(robot_name: "assistant")
507
+ researcher = HTM.new(robot_name: "researcher")
508
+
509
+ # Both robots remember the same fact
510
+ assistant.remember("Ruby 3.3 supports YJIT by default")
511
+ researcher.remember("Ruby 3.3 supports YJIT by default")
512
+ # Node stored once, linked to both robots
513
+
514
+ # Any robot can recall shared memories
515
+ memories = assistant.recall("Ruby YJIT")
516
+ # Returns the shared memory
772
517
  ```
773
518
 
774
519
  ### Resource Management
775
520
 
776
521
  ```ruby
777
522
  # Check working memory before large operations
778
- stats = htm.memory_stats
779
- if stats[:working_memory][:utilization] > 90
780
- # Maybe explicitly recall less
523
+ stats = htm.working_memory.stats
524
+ if stats[:utilization] > 90
525
+ # Consider clearing working memory or using smaller limits
781
526
  end
782
527
 
783
528
  # Use appropriate limits
784
- htm.recall(topic: "common_topic", limit: 10) # Not 1000
529
+ htm.recall("common_topic", limit: 10) # Not 1000
785
530
 
786
- # Monitor database size
787
- if stats[:database_size] > 1_000_000_000 # 1GB
531
+ # Monitor node counts
532
+ node_count = HTM::Models::Node.count
533
+ if node_count > 1_000_000
788
534
  # Consider archival strategy
789
535
  end
790
536
  ```