htm 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +1 -0
  3. data/.tbls.yml +30 -0
  4. data/CHANGELOG.md +30 -0
  5. data/SETUP.md +132 -101
  6. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +14 -0
  7. data/db/migrate/20250125000002_create_robot_nodes.rb +35 -0
  8. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +28 -0
  9. data/db/migrate/20250126000001_create_working_memories.rb +19 -0
  10. data/db/migrate/20250126000002_remove_unused_columns.rb +12 -0
  11. data/db/schema.sql +226 -43
  12. data/docs/api/database.md +20 -232
  13. data/docs/api/embedding-service.md +1 -7
  14. data/docs/api/htm.md +195 -449
  15. data/docs/api/index.md +1 -7
  16. data/docs/api/long-term-memory.md +342 -590
  17. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  18. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  19. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  20. data/docs/architecture/adrs/index.md +2 -13
  21. data/docs/architecture/hive-mind.md +165 -166
  22. data/docs/architecture/index.md +2 -2
  23. data/docs/architecture/overview.md +5 -171
  24. data/docs/architecture/two-tier-memory.md +1 -35
  25. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  26. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  27. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  28. data/docs/assets/images/class-hierarchy.svg +55 -0
  29. data/docs/assets/images/exception-hierarchy.svg +45 -0
  30. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  31. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  32. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  33. data/docs/assets/images/htm-eviction-process.svg +141 -0
  34. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  35. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  36. data/docs/assets/images/htm-node-states.svg +123 -0
  37. data/docs/assets/images/project-structure.svg +78 -0
  38. data/docs/assets/images/test-directory-structure.svg +38 -0
  39. data/{dbdoc → docs/database}/README.md +5 -3
  40. data/{dbdoc → docs/database}/public.node_tags.md +4 -5
  41. data/docs/database/public.node_tags.svg +106 -0
  42. data/{dbdoc → docs/database}/public.nodes.md +3 -8
  43. data/docs/database/public.nodes.svg +152 -0
  44. data/docs/database/public.robot_nodes.md +44 -0
  45. data/docs/database/public.robot_nodes.svg +121 -0
  46. data/{dbdoc → docs/database}/public.robots.md +1 -2
  47. data/docs/database/public.robots.svg +106 -0
  48. data/docs/database/public.working_memories.md +40 -0
  49. data/docs/database/public.working_memories.svg +112 -0
  50. data/{dbdoc → docs/database}/schema.json +342 -110
  51. data/docs/database/schema.svg +223 -0
  52. data/docs/development/index.md +1 -29
  53. data/docs/development/schema.md +84 -324
  54. data/docs/development/testing.md +1 -9
  55. data/docs/getting-started/index.md +47 -0
  56. data/docs/{installation.md → getting-started/installation.md} +2 -2
  57. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  58. data/docs/guides/adding-memories.md +221 -655
  59. data/docs/guides/search-strategies.md +85 -51
  60. data/docs/images/htm-er-diagram.svg +156 -0
  61. data/docs/index.md +16 -31
  62. data/docs/multi_framework_support.md +4 -4
  63. data/examples/basic_usage.rb +18 -16
  64. data/examples/cli_app/htm_cli.rb +86 -8
  65. data/examples/custom_llm_configuration.rb +1 -2
  66. data/examples/example_app/app.rb +11 -14
  67. data/examples/sinatra_app/Gemfile +1 -0
  68. data/examples/sinatra_app/Gemfile.lock +166 -0
  69. data/examples/sinatra_app/app.rb +219 -24
  70. data/lib/htm/active_record_config.rb +10 -3
  71. data/lib/htm/configuration.rb +265 -78
  72. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  73. data/lib/htm/job_adapter.rb +10 -3
  74. data/lib/htm/long_term_memory.rb +220 -57
  75. data/lib/htm/models/node.rb +36 -7
  76. data/lib/htm/models/robot.rb +30 -4
  77. data/lib/htm/models/robot_node.rb +50 -0
  78. data/lib/htm/models/tag.rb +52 -0
  79. data/lib/htm/models/working_memory_entry.rb +88 -0
  80. data/lib/htm/tasks.rb +4 -0
  81. data/lib/htm/version.rb +1 -1
  82. data/lib/htm.rb +34 -13
  83. data/lib/tasks/htm.rake +32 -1
  84. data/lib/tasks/jobs.rake +7 -3
  85. data/lib/tasks/tags.rake +34 -0
  86. data/mkdocs.yml +56 -9
  87. metadata +61 -31
  88. data/dbdoc/public.node_tags.svg +0 -112
  89. data/dbdoc/public.nodes.svg +0 -118
  90. data/dbdoc/public.robots.svg +0 -90
  91. data/dbdoc/schema.svg +0 -154
  92. /data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  93. /data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  94. /data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  95. /data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  96. /data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  97. /data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  98. /data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  99. /data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  100. /data/{dbdoc → docs/database}/public.relationships.md +0 -0
  101. /data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  102. /data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  103. /data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  104. /data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  105. /data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  106. /data/{dbdoc → docs/database}/public.tags.md +0 -0
  107. /data/{dbdoc → docs/database}/public.tags.svg +0 -0
  108. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  109. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
@@ -8,35 +8,44 @@ PostgreSQL-backed permanent memory storage with RAG-based retrieval.
8
8
 
9
9
  - **Vector similarity search** - Semantic understanding via embeddings
10
10
  - **Full-text search** - Fast keyword and phrase matching
11
- - **Hybrid search** - Combines fulltext prefiltering with vector ranking
12
- - **Time-range queries** - TimescaleDB-optimized temporal search
13
- - **Relationship graphs** - Connect related knowledge
14
- - **Tag system** - Flexible categorization
15
- - **Multi-robot tracking** - Shared global memory
11
+ - **Tag-enhanced hybrid search** - Combines fulltext + vector + tag matching
12
+ - **Content deduplication** - SHA-256 based node deduplication
13
+ - **Query result caching** - LRU cache for frequent queries
14
+ - **Hierarchical tagging** - Colon-separated tag namespaces
16
15
 
17
16
  ## Class Definition
18
17
 
19
18
  ```ruby
20
19
  class HTM::LongTermMemory
21
- # No public attributes
20
+ attr_reader :query_timeout
22
21
  end
23
22
  ```
24
23
 
25
24
  ## Initialization
26
25
 
27
- ### `new(config)` {: #new }
26
+ ### `new(config, **options)` {: #new }
28
27
 
29
28
  Create a new long-term memory instance.
30
29
 
31
30
  ```ruby
32
- HTM::LongTermMemory.new(config)
31
+ HTM::LongTermMemory.new(
32
+ config,
33
+ pool_size: nil,
34
+ query_timeout: 30_000,
35
+ cache_size: 1000,
36
+ cache_ttl: 300
37
+ )
33
38
  ```
34
39
 
35
40
  #### Parameters
36
41
 
37
- | Parameter | Type | Description |
38
- |-----------|------|-------------|
39
- | `config` | Hash | PostgreSQL connection configuration |
42
+ | Parameter | Type | Default | Description |
43
+ |-----------|------|---------|-------------|
44
+ | `config` | Hash | *required* | PostgreSQL connection configuration |
45
+ | `pool_size` | Integer, nil | `nil` | Connection pool size (managed by ActiveRecord) |
46
+ | `query_timeout` | Integer | `30_000` | Query timeout in milliseconds |
47
+ | `cache_size` | Integer | `1000` | LRU cache size (0 to disable) |
48
+ | `cache_ttl` | Integer | `300` | Cache TTL in seconds |
40
49
 
41
50
  #### Configuration Hash
42
51
 
@@ -47,18 +56,10 @@ HTM::LongTermMemory.new(config)
47
56
  dbname: "database_name",
48
57
  user: "username",
49
58
  password: "password",
50
- sslmode: "require" # or "prefer", "disable"
59
+ sslmode: "require"
51
60
  }
52
61
  ```
53
62
 
54
- #### Returns
55
-
56
- - `HTM::LongTermMemory` instance
57
-
58
- #### Raises
59
-
60
- - `RuntimeError` - If config is nil
61
-
62
63
  #### Examples
63
64
 
64
65
  ```ruby
@@ -66,25 +67,16 @@ HTM::LongTermMemory.new(config)
66
67
  config = HTM::Database.default_config
67
68
  ltm = HTM::LongTermMemory.new(config)
68
69
 
69
- # Custom configuration
70
+ # With custom timeout and cache
70
71
  ltm = HTM::LongTermMemory.new(
71
- host: 'localhost',
72
- port: 5432,
73
- dbname: 'htm_production',
74
- user: 'htm_user',
75
- password: ENV['DB_PASSWORD'],
76
- sslmode: 'require'
72
+ config,
73
+ query_timeout: 60_000, # 60 seconds
74
+ cache_size: 5000,
75
+ cache_ttl: 600
77
76
  )
78
77
 
79
- # TimescaleDB Cloud
80
- ltm = HTM::LongTermMemory.new(
81
- host: 'xxx.tsdb.cloud.timescale.com',
82
- port: 37807,
83
- dbname: 'tsdb',
84
- user: 'tsdbadmin',
85
- password: ENV['HTM_DBPASS'],
86
- sslmode: 'require'
87
- )
78
+ # Disable caching
79
+ ltm = HTM::LongTermMemory.new(config, cache_size: 0)
88
80
  ```
89
81
 
90
82
  ---
@@ -93,18 +85,14 @@ ltm = HTM::LongTermMemory.new(
93
85
 
94
86
  ### `add(**params)` {: #add }
95
87
 
96
- Add a node to long-term memory.
88
+ Add a node to long-term memory with content deduplication.
97
89
 
98
90
  ```ruby
99
91
  add(
100
- key:,
101
- value:,
102
- type: nil,
103
- category: nil,
104
- importance: 1.0,
92
+ content:,
105
93
  token_count: 0,
106
94
  robot_id:,
107
- embedding:
95
+ embedding: nil
108
96
  )
109
97
  ```
110
98
 
@@ -112,100 +100,92 @@ add(
112
100
 
113
101
  | Parameter | Type | Default | Description |
114
102
  |-----------|------|---------|-------------|
115
- | `key` | String | *required* | Unique node identifier |
116
- | `value` | String | *required* | Node content |
117
- | `type` | String, nil | `nil` | Node type |
118
- | `category` | String, nil | `nil` | Node category |
119
- | `importance` | Float | `1.0` | Importance score (0.0-10.0) |
103
+ | `content` | String | *required* | Node content |
120
104
  | `token_count` | Integer | `0` | Token count |
121
- | `robot_id` | String | *required* | Robot identifier |
122
- | `embedding` | Array\<Float\> | *required* | Vector embedding |
105
+ | `robot_id` | Integer | *required* | Robot identifier |
106
+ | `embedding` | Array\<Float\>, nil | `nil` | Pre-generated embedding vector |
123
107
 
124
108
  #### Returns
125
109
 
126
- - `Integer` - Database ID of the created node
110
+ - `Hash` - `{ node_id:, is_new:, robot_node: }`
111
+
112
+ #### Content Deduplication
113
+
114
+ When `add()` is called:
115
+
116
+ 1. A SHA-256 hash of the content is computed
117
+ 2. If a node with the same hash exists:
118
+ - Links the robot to the existing node (or updates `remember_count`)
119
+ - Returns `is_new: false`
120
+ 3. If no match:
121
+ - Creates a new node
122
+ - Links the robot to it
123
+ - Returns `is_new: true`
127
124
 
128
125
  #### Examples
129
126
 
130
127
  ```ruby
131
- embedding = embedding_service.embed("content...")
132
-
133
- node_id = ltm.add(
134
- key: "fact_001",
135
- value: "PostgreSQL is our primary database",
136
- type: "fact",
137
- category: "architecture",
138
- importance: 8.0,
139
- token_count: 50,
140
- robot_id: "robot-abc123",
141
- embedding: embedding
128
+ # Add new content
129
+ result = ltm.add(
130
+ content: "PostgreSQL is our primary database",
131
+ token_count: 8,
132
+ robot_id: 1
142
133
  )
143
- # => 1234
144
-
145
- # Minimal add
146
- node_id = ltm.add(
147
- key: "simple_note",
148
- value: "Remember to check logs",
149
- robot_id: robot_id,
150
- embedding: embedding
134
+ # => { node_id: 123, is_new: true, robot_node: <RobotNode> }
135
+
136
+ # Add duplicate content (different robot)
137
+ result = ltm.add(
138
+ content: "PostgreSQL is our primary database",
139
+ token_count: 8,
140
+ robot_id: 2
141
+ )
142
+ # => { node_id: 123, is_new: false, robot_node: <RobotNode> }
143
+ # Same node_id, robot_node tracks this robot's remember_count
144
+
145
+ # With pre-generated embedding
146
+ result = ltm.add(
147
+ content: "Vector search is powerful",
148
+ token_count: 5,
149
+ robot_id: 1,
150
+ embedding: [0.1, 0.2, 0.3, ...] # Will be padded to 2000 dims
151
151
  )
152
152
  ```
153
153
 
154
- #### Notes
155
-
156
- - `key` must be unique (enforced by database)
157
- - `embedding` is stored as a pgvector type
158
- - Automatically sets `created_at` timestamp
159
-
160
154
  ---
161
155
 
162
- ### `retrieve(key)` {: #retrieve }
156
+ ### `retrieve(node_id)` {: #retrieve }
163
157
 
164
- Retrieve a node by its key.
158
+ Retrieve a node by its database ID.
165
159
 
166
160
  ```ruby
167
- retrieve(key)
161
+ retrieve(node_id)
168
162
  ```
169
163
 
170
164
  #### Parameters
171
165
 
172
166
  | Parameter | Type | Description |
173
167
  |-----------|------|-------------|
174
- | `key` | String | Node identifier |
168
+ | `node_id` | Integer | Node database ID |
175
169
 
176
170
  #### Returns
177
171
 
178
- - `Hash` - Node data if found
172
+ - `Hash` - Node attributes if found
179
173
  - `nil` - If node doesn't exist
180
174
 
181
- #### Hash Structure
175
+ #### Side Effects
182
176
 
183
- ```ruby
184
- {
185
- "id" => "123",
186
- "key" => "fact_001",
187
- "value" => "content...",
188
- "type" => "fact",
189
- "category" => "architecture",
190
- "importance" => "8.0",
191
- "token_count" => "50",
192
- "robot_id" => "robot-abc123",
193
- "created_at" => "2025-01-15 10:30:00",
194
- "last_accessed" => "2025-01-15 14:20:00",
195
- "in_working_memory" => "t",
196
- "evicted_at" => nil
197
- }
198
- ```
177
+ - Increments `access_count`
178
+ - Updates `last_accessed` timestamp
199
179
 
200
180
  #### Examples
201
181
 
202
182
  ```ruby
203
- node = ltm.retrieve("fact_001")
183
+ node = ltm.retrieve(123)
204
184
 
205
185
  if node
206
- puts node['value']
186
+ puts node['content']
187
+ puts "Accessed #{node['access_count']} times"
207
188
  puts "Created: #{node['created_at']}"
208
- puts "Importance: #{node['importance']}"
209
189
  else
210
190
  puts "Node not found"
211
191
  end
@@ -213,108 +193,57 @@ end
213
193
 
214
194
  ---
215
195
 
216
- ### `update_last_accessed(key)` {: #update_last_accessed }
196
+ ### `exists?(node_id)` {: #exists }
217
197
 
218
- Update the last accessed timestamp for a node.
198
+ Check if a node exists.
219
199
 
220
200
  ```ruby
221
- update_last_accessed(key)
201
+ exists?(node_id)
222
202
  ```
223
203
 
224
204
  #### Parameters
225
205
 
226
206
  | Parameter | Type | Description |
227
207
  |-----------|------|-------------|
228
- | `key` | String | Node identifier |
208
+ | `node_id` | Integer | Node database ID |
229
209
 
230
210
  #### Returns
231
211
 
232
- - `void`
212
+ - `Boolean` - True if node exists
233
213
 
234
214
  #### Examples
235
215
 
236
216
  ```ruby
237
- # After retrieving a node
238
- node = ltm.retrieve("important_fact")
239
- ltm.update_last_accessed("important_fact")
240
-
241
- # Track access patterns
242
- accessed_keys = ["key1", "key2", "key3"]
243
- accessed_keys.each { |k| ltm.update_last_accessed(k) }
217
+ if ltm.exists?(123)
218
+ ltm.delete(123)
219
+ end
244
220
  ```
245
221
 
246
222
  ---
247
223
 
248
- ### `delete(key)` {: #delete }
224
+ ### `delete(node_id)` {: #delete }
249
225
 
250
226
  Delete a node permanently.
251
227
 
252
228
  ```ruby
253
- delete(key)
229
+ delete(node_id)
254
230
  ```
255
231
 
256
232
  #### Parameters
257
233
 
258
234
  | Parameter | Type | Description |
259
235
  |-----------|------|-------------|
260
- | `key` | String | Node identifier |
261
-
262
- #### Returns
263
-
264
- - `void`
236
+ | `node_id` | Integer | Node database ID |
265
237
 
266
238
  #### Side Effects
267
239
 
268
240
  - Deletes node from database
269
- - Cascades to related relationships and tags
270
-
271
- #### Examples
272
-
273
- ```ruby
274
- # Delete a node
275
- ltm.delete("temp_note_123")
276
-
277
- # Safe deletion
278
- if ltm.retrieve("old_key")
279
- ltm.delete("old_key")
280
- end
281
- ```
241
+ - Cascades to robot_nodes and node_tags
242
+ - Invalidates query cache
282
243
 
283
244
  #### Warning
284
245
 
285
- Deletion is **permanent** and cannot be undone. Use `HTM#forget` instead for proper confirmation flow.
286
-
287
- ---
288
-
289
- ### `get_node_id(key)` {: #get_node_id }
290
-
291
- Get the database ID for a node.
292
-
293
- ```ruby
294
- get_node_id(key)
295
- ```
296
-
297
- #### Parameters
298
-
299
- | Parameter | Type | Description |
300
- |-----------|------|-------------|
301
- | `key` | String | Node identifier |
302
-
303
- #### Returns
304
-
305
- - `Integer` - Database ID if found
306
- - `nil` - If node doesn't exist
307
-
308
- #### Examples
309
-
310
- ```ruby
311
- node_id = ltm.get_node_id("fact_001")
312
- # => 123
313
-
314
- # Use in relationships
315
- from_id = ltm.get_node_id("decision_001")
316
- to_id = ltm.get_node_id("fact_001")
317
- ```
246
+ Deletion is **permanent** and cannot be undone. Use `HTM#forget` for proper confirmation flow.
318
247
 
319
248
  ---
320
249
 
@@ -334,7 +263,7 @@ search(
334
263
  #### Parameters
335
264
 
336
265
  | Parameter | Type | Description |
337
- |-----------|------|---------|
266
+ |-----------|------|-------------|
338
267
  | `timeframe` | Range | Time range to search (Time..Time) |
339
268
  | `query` | String | Search query text |
340
269
  | `limit` | Integer | Maximum results |
@@ -348,55 +277,32 @@ search(
348
277
 
349
278
  ```ruby
350
279
  {
351
- "id" => "123",
352
- "key" => "fact_001",
353
- "value" => "content...",
354
- "type" => "fact",
355
- "category" => "architecture",
356
- "importance" => "8.0",
280
+ "id" => 123,
281
+ "content" => "content...",
282
+ "access_count" => 5,
357
283
  "created_at" => "2025-01-15 10:30:00",
358
- "robot_id" => "robot-abc123",
359
- "token_count" => "50",
360
- "similarity" => "0.8745" # 0.0-1.0, higher = more similar
284
+ "token_count" => 50,
285
+ "similarity" => 0.8745 # 0.0-1.0, higher = more similar
361
286
  }
362
287
  ```
363
288
 
364
289
  #### Examples
365
290
 
366
291
  ```ruby
367
- # Semantic search
368
292
  timeframe = (Time.now - 7*24*3600)..Time.now
369
293
 
370
294
  results = ltm.search(
371
295
  timeframe: timeframe,
372
296
  query: "database performance optimization",
373
297
  limit: 20,
374
- embedding_service: embedding_service
298
+ embedding_service: HTM
375
299
  )
376
300
 
377
301
  results.each do |node|
378
- puts "[#{node['similarity']}] #{node['value']}"
302
+ puts "[#{node['similarity']}] #{node['content']}"
379
303
  end
380
-
381
- # Find similar to a specific concept
382
- results = ltm.search(
383
- timeframe: (Time.at(0)..Time.now), # All time
384
- query: "microservices architecture patterns",
385
- limit: 10,
386
- embedding_service: embedding_service
387
- )
388
-
389
- # Filter by similarity threshold
390
- high_similarity = results.select { |n| n['similarity'].to_f > 0.7 }
391
304
  ```
392
305
 
393
- #### Technical Details
394
-
395
- - Uses pgvector's `<=>` cosine distance operator
396
- - Returns `1 - distance` as similarity (0.0-1.0)
397
- - Indexed for fast approximate nearest neighbor search
398
- - Query embedding is generated on-the-fly
399
-
400
306
  ---
401
307
 
402
308
  ### `search_fulltext(**params)` {: #search_fulltext }
@@ -425,56 +331,28 @@ search_fulltext(
425
331
 
426
332
  #### Hash Structure
427
333
 
428
- Similar to `search`, but with `"rank"` instead of `"similarity"`:
429
-
430
334
  ```ruby
431
335
  {
432
336
  ...,
433
- "rank" => "0.456" # Higher = better match
337
+ "rank" => 0.456 # Higher = better match
434
338
  }
435
339
  ```
436
340
 
437
341
  #### Examples
438
342
 
439
343
  ```ruby
440
- # Exact phrase search
441
344
  results = ltm.search_fulltext(
442
345
  timeframe: (Time.now - 30*24*3600)..Time.now,
443
346
  query: "PostgreSQL connection pooling",
444
347
  limit: 10
445
348
  )
446
-
447
- # Multiple keywords
448
- results = ltm.search_fulltext(
449
- timeframe: (Time.now - 7*24*3600)..Time.now,
450
- query: "API authentication JWT token",
451
- limit: 20
452
- )
453
-
454
- # Find mentions
455
- results = ltm.search_fulltext(
456
- timeframe: (Time.at(0)..Time.now),
457
- query: "security vulnerability",
458
- limit: 50
459
- )
460
-
461
- results.each do |node|
462
- puts "[#{node['rank']}] #{node['created_at']}: #{node['value']}"
463
- end
464
349
  ```
465
350
 
466
- #### Technical Details
467
-
468
- - Uses PostgreSQL `to_tsvector` and `plainto_tsquery`
469
- - English language stemming and stop words
470
- - GIN index for fast search
471
- - Ranks by `ts_rank` (term frequency)
472
-
473
351
  ---
474
352
 
475
353
  ### `search_hybrid(**params)` {: #search_hybrid }
476
354
 
477
- Hybrid search combining fulltext prefiltering with vector ranking.
355
+ Tag-enhanced hybrid search combining fulltext, vector, and tag matching.
478
356
 
479
357
  ```ruby
480
358
  search_hybrid(
@@ -494,127 +372,88 @@ search_hybrid(
494
372
  | `query` | String | *required* | Search query |
495
373
  | `limit` | Integer | *required* | Maximum final results |
496
374
  | `embedding_service` | Object | *required* | Service for embeddings |
497
- | `prefilter_limit` | Integer | `100` | Fulltext candidates to consider |
375
+ | `prefilter_limit` | Integer | `100` | Candidates to consider |
498
376
 
499
377
  #### Returns
500
378
 
501
- - `Array<Hash>` - Matching nodes sorted by vector similarity
379
+ - `Array<Hash>` - Matching nodes with combined scores
502
380
 
503
- #### Strategy
381
+ #### Hash Structure
382
+
383
+ ```ruby
384
+ {
385
+ "id" => 123,
386
+ "content" => "...",
387
+ "similarity" => 0.87, # Vector similarity (0-1)
388
+ "tag_boost" => 0.3, # Tag match score (0-1)
389
+ "combined_score" => 0.79 # (similarity × 0.7) + (tag_boost × 0.3)
390
+ }
391
+ ```
504
392
 
505
- 1. **Prefilter**: Use fulltext search to find `prefilter_limit` candidates
506
- 2. **Rank**: Compute vector similarity for candidates only
507
- 3. **Return**: Top `limit` results by similarity
393
+ #### Strategy
508
394
 
509
- This combines the **accuracy** of fulltext with the **semantic understanding** of vectors.
395
+ 1. **Find matching tags**: Searches tags for query term matches
396
+ 2. **Build candidate pool**: Fulltext matches + tag-matching nodes
397
+ 3. **Score candidates**: Vector similarity + tag boost
398
+ 4. **Return top results**: Sorted by combined_score
510
399
 
511
400
  #### Examples
512
401
 
513
402
  ```ruby
514
- # Best of both worlds
515
403
  results = ltm.search_hybrid(
516
404
  timeframe: (Time.now - 30*24*3600)..Time.now,
517
- query: "API rate limiting implementation",
405
+ query: "PostgreSQL performance",
518
406
  limit: 15,
519
- embedding_service: embedding_service,
520
- prefilter_limit: 100
521
- )
522
-
523
- # Adjust prefilter for performance
524
- results = ltm.search_hybrid(
525
- timeframe: timeframe,
526
- query: "security best practices",
527
- limit: 20,
528
- embedding_service: embedding_service,
529
- prefilter_limit: 50 # Smaller = faster
407
+ embedding_service: HTM
530
408
  )
531
409
 
532
- # Large candidate pool for better recall
533
- results = ltm.search_hybrid(
534
- timeframe: timeframe,
535
- query: "deployment strategies",
536
- limit: 10,
537
- embedding_service: embedding_service,
538
- prefilter_limit: 200 # Larger = better recall
539
- )
410
+ results.each do |node|
411
+ puts "#{node['content']}"
412
+ puts " Similarity: #{node['similarity']}"
413
+ puts " Tag boost: #{node['tag_boost']}"
414
+ puts " Combined: #{node['combined_score']}"
415
+ end
540
416
  ```
541
417
 
542
- #### Performance Tuning
543
-
544
- | `prefilter_limit` | Speed | Recall | Use Case |
545
- |-------------------|-------|--------|----------|
546
- | 50 | Fast | Low | Common queries |
547
- | 100 | Medium | Medium | Default (recommended) |
548
- | 200+ | Slow | High | Rare/complex queries |
549
-
550
418
  ---
551
419
 
552
- ### `add_relationship(**params)` {: #add_relationship }
420
+ ### `find_query_matching_tags(query)` {: #find_query_matching_tags }
553
421
 
554
- Add a relationship between two nodes.
422
+ Find tags that match terms in the query.
555
423
 
556
424
  ```ruby
557
- add_relationship(
558
- from:,
559
- to:,
560
- type: nil,
561
- strength: 1.0
562
- )
425
+ find_query_matching_tags(query)
563
426
  ```
564
427
 
565
428
  #### Parameters
566
429
 
567
- | Parameter | Type | Default | Description |
568
- |-----------|------|---------|-------------|
569
- | `from` | String | *required* | From node key |
570
- | `to` | String | *required* | To node key |
571
- | `type` | String, nil | `nil` | Relationship type |
572
- | `strength` | Float | `1.0` | Relationship strength (0.0-1.0) |
430
+ | Parameter | Type | Description |
431
+ |-----------|------|-------------|
432
+ | `query` | String | Search query |
573
433
 
574
434
  #### Returns
575
435
 
576
- - `void`
436
+ - `Array<String>` - Matching tag names
577
437
 
578
- #### Side Effects
438
+ #### How It Works
579
439
 
580
- - Inserts relationship into `relationships` table
581
- - Skips if relationship already exists (ON CONFLICT DO NOTHING)
582
- - Returns early if either node doesn't exist
440
+ 1. Extracts words from query (3+ chars, lowercase)
441
+ 2. Searches tags where any hierarchy level matches (ILIKE)
442
+ 3. Returns all matching tag names
583
443
 
584
444
  #### Examples
585
445
 
586
446
  ```ruby
587
- # Simple relationship
588
- ltm.add_relationship(
589
- from: "decision_001",
590
- to: "fact_001"
591
- )
592
-
593
- # Typed relationship with strength
594
- ltm.add_relationship(
595
- from: "api_v2",
596
- to: "api_v1",
597
- type: "replaces",
598
- strength: 0.9
599
- )
447
+ # Query: "PostgreSQL database optimization"
448
+ # Might return: ["database:postgresql", "database:optimization", "database:sql"]
600
449
 
601
- # Build knowledge graph
602
- ltm.add_relationship(from: "microservices", to: "docker", type: "requires")
603
- ltm.add_relationship(from: "microservices", to: "api_gateway", type: "requires")
604
- ltm.add_relationship(from: "microservices", to: "service_mesh", type: "optional")
605
-
606
- # Related decisions
607
- ltm.add_relationship(
608
- from: "database_choice",
609
- to: "timescaledb_decision",
610
- type: "influences",
611
- strength: 0.8
612
- )
450
+ matching_tags = ltm.find_query_matching_tags("PostgreSQL database")
451
+ # => ["database:postgresql", "database:postgresql:extensions"]
613
452
  ```
614
453
 
615
454
  ---
616
455
 
617
- ### `add_tag(**params)` {: #add_tag }
456
+ ### `add_tag(node_id:, tag:)` {: #add_tag }
618
457
 
619
458
  Add a tag to a node.
620
459
 
@@ -629,282 +468,267 @@ add_tag(node_id:, tag:)
629
468
  | `node_id` | Integer | Node database ID |
630
469
  | `tag` | String | Tag name |
631
470
 
632
- #### Returns
633
-
634
- - `void`
635
-
636
- #### Side Effects
637
-
638
- - Inserts tag into `tags` table
639
- - Skips if tag already exists (ON CONFLICT DO NOTHING)
640
-
641
471
  #### Examples
642
472
 
643
473
  ```ruby
644
- node_id = ltm.add(key: "fact_001", ...)
645
-
646
- # Add single tag
647
- ltm.add_tag(node_id: node_id, tag: "architecture")
648
-
649
- # Add multiple tags
650
- ["architecture", "database", "postgresql"].each do |tag|
651
- ltm.add_tag(node_id: node_id, tag: tag)
652
- end
653
-
654
- # Categorize decision
655
- decision_id = ltm.add(key: "decision_001", ...)
656
- ltm.add_tag(node_id: decision_id, tag: "critical")
657
- ltm.add_tag(node_id: decision_id, tag: "security")
658
- ltm.add_tag(node_id: decision_id, tag: "2025-q1")
474
+ ltm.add_tag(node_id: 123, tag: "database:postgresql")
475
+ ltm.add_tag(node_id: 123, tag: "architecture:decision")
659
476
  ```
660
477
 
661
478
  ---
662
479
 
663
- ### `mark_evicted(keys)` {: #mark_evicted }
480
+ ### `get_node_tags(node_id)` {: #get_node_tags }
664
481
 
665
- Mark nodes as evicted from working memory.
482
+ Get tags for a specific node.
666
483
 
667
484
  ```ruby
668
- mark_evicted(keys)
485
+ get_node_tags(node_id)
669
486
  ```
670
487
 
671
488
  #### Parameters
672
489
 
673
490
  | Parameter | Type | Description |
674
491
  |-----------|------|-------------|
675
- | `keys` | Array\<String\> | Node keys to mark |
492
+ | `node_id` | Integer | Node database ID |
676
493
 
677
494
  #### Returns
678
495
 
679
- - `void`
680
-
681
- #### Side Effects
682
-
683
- - Sets `in_working_memory = FALSE` for specified nodes
684
- - Sets `evicted_at` timestamp
496
+ - `Array<String>` - Tag names
685
497
 
686
498
  #### Examples
687
499
 
688
500
  ```ruby
689
- # Mark single eviction
690
- ltm.mark_evicted(["temp_note_123"])
501
+ tags = ltm.get_node_tags(123)
502
+ # => ["database:postgresql", "architecture:decision"]
503
+ ```
504
+
505
+ ---
506
+
507
+ ### `node_topics(node_id)` {: #node_topics }
691
508
 
692
- # Mark batch eviction
693
- evicted_keys = ["key1", "key2", "key3"]
694
- ltm.mark_evicted(evicted_keys)
509
+ Alias for `get_node_tags` - returns topics/tags for a node.
695
510
 
696
- # From working memory eviction
697
- evicted = working_memory.evict_to_make_space(10000)
698
- evicted_keys = evicted.map { |n| n[:key] }
699
- ltm.mark_evicted(evicted_keys) unless evicted_keys.empty?
511
+ ```ruby
512
+ node_topics(node_id)
700
513
  ```
701
514
 
702
515
  ---
703
516
 
704
- ### `register_robot(robot_id, robot_name)` {: #register_robot }
517
+ ### `nodes_by_topic(topic_path, exact:, limit:)` {: #nodes_by_topic }
705
518
 
706
- Register a robot in the system.
519
+ Retrieve nodes by tag/topic.
707
520
 
708
521
  ```ruby
709
- register_robot(robot_id, robot_name)
522
+ nodes_by_topic(topic_path, exact: false, limit: 50)
710
523
  ```
711
524
 
712
525
  #### Parameters
713
526
 
714
- | Parameter | Type | Description |
715
- |-----------|------|-------------|
716
- | `robot_id` | String | Robot identifier |
717
- | `robot_name` | String | Robot name |
527
+ | Parameter | Type | Default | Description |
528
+ |-----------|------|---------|-------------|
529
+ | `topic_path` | String | *required* | Topic hierarchy path |
530
+ | `exact` | Boolean | `false` | Exact match or prefix match |
531
+ | `limit` | Integer | `50` | Maximum results |
718
532
 
719
533
  #### Returns
720
534
 
721
- - `void`
722
-
723
- #### Side Effects
724
-
725
- - Inserts robot into `robots` table
726
- - Updates name and `last_active` if robot exists
535
+ - `Array<Hash>` - Matching node attributes
727
536
 
728
537
  #### Examples
729
538
 
730
539
  ```ruby
731
- ltm.register_robot("robot-abc123", "Code Assistant")
732
- ltm.register_robot("robot-def456", "Research Bot")
540
+ # Prefix match (default) - finds all database-related nodes
541
+ nodes = ltm.nodes_by_topic("database")
733
542
 
734
- # Register with UUID
735
- robot_id = SecureRandom.uuid
736
- ltm.register_robot(robot_id, "Analysis Bot")
543
+ # Exact match - only nodes tagged with exactly "database:postgresql"
544
+ nodes = ltm.nodes_by_topic("database:postgresql", exact: true)
737
545
  ```
738
546
 
739
547
  ---
740
548
 
741
- ### `update_robot_activity(robot_id)` {: #update_robot_activity }
549
+ ### `search_by_tags(**params)` {: #search_by_tags }
742
550
 
743
- Update robot's last activity timestamp.
551
+ Search nodes by tags with relevance scoring.
744
552
 
745
553
  ```ruby
746
- update_robot_activity(robot_id)
554
+ search_by_tags(
555
+ tags:,
556
+ match_all: false,
557
+ timeframe: nil,
558
+ limit: 20
559
+ )
747
560
  ```
748
561
 
749
562
  #### Parameters
750
563
 
751
- | Parameter | Type | Description |
752
- |-----------|------|-------------|
753
- | `robot_id` | String | Robot identifier |
564
+ | Parameter | Type | Default | Description |
565
+ |-----------|------|---------|-------------|
566
+ | `tags` | Array\<String\> | *required* | Tags to search for |
567
+ | `match_all` | Boolean | `false` | Match ALL tags or ANY tag |
568
+ | `timeframe` | Range, nil | `nil` | Optional time range filter |
569
+ | `limit` | Integer | `20` | Maximum results |
754
570
 
755
571
  #### Returns
756
572
 
757
- - `void`
573
+ - `Array<Hash>` - Nodes with relevance scores and tags
758
574
 
759
575
  #### Examples
760
576
 
761
577
  ```ruby
762
- # Update after operations
763
- ltm.update_robot_activity("robot-abc123")
578
+ # Match ANY tag
579
+ nodes = ltm.search_by_tags(tags: ["database", "api"])
764
580
 
765
- # Automatic heartbeat
766
- loop do
767
- ltm.update_robot_activity(robot_id)
768
- sleep 60 # Every minute
769
- end
581
+ # Match ALL tags
582
+ nodes = ltm.search_by_tags(
583
+ tags: ["database:postgresql", "architecture"],
584
+ match_all: true
585
+ )
586
+
587
+ # With timeframe
588
+ nodes = ltm.search_by_tags(
589
+ tags: ["security"],
590
+ timeframe: (Time.now - 7*24*3600)..Time.now
591
+ )
770
592
  ```
771
593
 
772
594
  ---
773
595
 
774
- ### `log_operation(**params)` {: #log_operation }
596
+ ### `popular_tags(limit:, timeframe:)` {: #popular_tags }
775
597
 
776
- Log an operation to the operations log.
598
+ Get most frequently used tags.
777
599
 
778
600
  ```ruby
779
- log_operation(
780
- operation:,
781
- node_id:,
782
- robot_id:,
783
- details:
784
- )
601
+ popular_tags(limit: 20, timeframe: nil)
785
602
  ```
786
603
 
787
- #### Parameters
788
-
789
- | Parameter | Type | Description |
790
- |-----------|------|---------|
791
- | `operation` | String | Operation type |
792
- | `node_id` | Integer, nil | Node database ID (can be nil) |
793
- | `robot_id` | String | Robot identifier |
794
- | `details` | Hash | Operation details (stored as JSON) |
795
-
796
604
  #### Returns
797
605
 
798
- - `void`
606
+ - `Array<Hash>` - `[{ name: "tag_name", usage_count: 42 }, ...]`
799
607
 
800
608
  #### Examples
801
609
 
802
610
  ```ruby
803
- # Log add operation
804
- ltm.log_operation(
805
- operation: 'add',
806
- node_id: 123,
807
- robot_id: robot_id,
808
- details: { key: "fact_001", type: "fact" }
809
- )
611
+ top_tags = ltm.popular_tags(limit: 10)
612
+ top_tags.each do |tag|
613
+ puts "#{tag[:name]}: #{tag[:usage_count]} nodes"
614
+ end
615
+ ```
810
616
 
811
- # Log recall operation
812
- ltm.log_operation(
813
- operation: 'recall',
814
- node_id: nil,
815
- robot_id: robot_id,
816
- details: {
817
- timeframe: "last week",
818
- topic: "postgresql",
819
- count: 15
820
- }
821
- )
617
+ ---
822
618
 
823
- # Log forget operation
824
- ltm.log_operation(
825
- operation: 'forget',
826
- node_id: 456,
827
- robot_id: robot_id,
828
- details: { key: "temp_note", reason: "temporary" }
829
- )
619
+ ### `topic_relationships(min_shared_nodes:, limit:)` {: #topic_relationships }
620
+
621
+ Get tag co-occurrence relationships.
622
+
623
+ ```ruby
624
+ topic_relationships(min_shared_nodes: 2, limit: 50)
625
+ ```
626
+
627
+ #### Returns
628
+
629
+ - `Array<Hash>` - `[{ topic1:, topic2:, shared_nodes: }, ...]`
630
+
631
+ #### Examples
632
+
633
+ ```ruby
634
+ related = ltm.topic_relationships(min_shared_nodes: 3)
635
+ related.each do |r|
636
+ puts "#{r['topic1']} <-> #{r['topic2']}: #{r['shared_nodes']} shared"
637
+ end
830
638
  ```
831
639
 
832
640
  ---
833
641
 
834
- ### `stats()` {: #stats }
642
+ ### `register_robot(robot_name)` {: #register_robot }
835
643
 
836
- Get comprehensive memory statistics.
644
+ Register a robot in the system.
837
645
 
838
646
  ```ruby
839
- stats()
647
+ register_robot(robot_name)
840
648
  ```
841
649
 
842
650
  #### Returns
843
651
 
844
- - `Hash` - Statistics hash
652
+ - `Integer` - Robot ID
845
653
 
846
- #### Hash Structure
654
+ #### Examples
847
655
 
848
656
  ```ruby
849
- {
850
- total_nodes: 1234,
657
+ robot_id = ltm.register_robot("Code Assistant")
658
+ ```
851
659
 
852
- nodes_by_robot: {
853
- "robot-abc123" => 500,
854
- "robot-def456" => 734
855
- },
660
+ ---
856
661
 
857
- nodes_by_type: [
858
- { "type" => "fact", "count" => 400, "avg_importance" => 6.5 },
859
- { "type" => "decision", "count" => 200, "avg_importance" => 8.2 },
860
- ...
861
- ],
662
+ ### `update_robot_activity(robot_id)` {: #update_robot_activity }
862
663
 
863
- total_relationships: 567,
864
- total_tags: 890,
664
+ Update robot's last activity timestamp.
865
665
 
866
- oldest_memory: "2025-01-01 12:00:00",
867
- newest_memory: "2025-01-15 14:30:00",
666
+ ```ruby
667
+ update_robot_activity(robot_id)
668
+ ```
868
669
 
869
- active_robots: 3,
670
+ ---
870
671
 
871
- robot_activity: [
872
- { "id" => "robot-1", "name" => "Assistant", "last_active" => "2025-01-15 14:00:00" },
873
- ...
874
- ],
672
+ ### `mark_evicted(node_ids)` {: #mark_evicted }
875
673
 
876
- database_size: 12345678 # bytes
877
- }
674
+ Mark nodes as evicted from working memory.
675
+
676
+ ```ruby
677
+ mark_evicted(node_ids)
878
678
  ```
879
679
 
880
- #### Examples
680
+ #### Parameters
681
+
682
+ | Parameter | Type | Description |
683
+ |-----------|------|-------------|
684
+ | `node_ids` | Array\<Integer\> | Node IDs to mark |
685
+
686
+ ---
687
+
688
+ ### `track_access(node_ids)` {: #track_access }
689
+
690
+ Track access for multiple nodes (bulk update).
881
691
 
882
692
  ```ruby
883
- stats = ltm.stats
693
+ track_access(node_ids)
694
+ ```
884
695
 
885
- puts "Total memories: #{stats[:total_nodes]}"
886
- puts "Robots: #{stats[:active_robots]}"
887
- puts "Relationships: #{stats[:total_relationships]}"
888
- puts "Tags: #{stats[:total_tags]}"
696
+ Updates `access_count` and `last_accessed` for all specified nodes.
889
697
 
890
- # By type
891
- stats[:nodes_by_type].each do |type_info|
892
- puts "#{type_info['type']}: #{type_info['count']} nodes, avg importance #{type_info['avg_importance']}"
893
- end
698
+ ---
894
699
 
895
- # Database size
896
- size_mb = stats[:database_size] / 1024.0 / 1024.0
897
- puts "Database size: #{size_mb.round(2)} MB"
700
+ ### `stats()` {: #stats }
898
701
 
899
- # Robot activity
900
- stats[:robot_activity].each do |robot|
901
- puts "#{robot['name']}: last active #{robot['last_active']}"
902
- end
702
+ Get comprehensive memory statistics.
703
+
704
+ ```ruby
705
+ stats()
706
+ ```
707
+
708
+ #### Returns
709
+
710
+ ```ruby
711
+ {
712
+ total_nodes: 1234,
713
+ nodes_by_robot: { 1 => 500, 2 => 734 },
714
+ total_tags: 890,
715
+ oldest_memory: Time,
716
+ newest_memory: Time,
717
+ active_robots: 3,
718
+ robot_activity: [{ id:, name:, last_active: }, ...],
719
+ database_size: 12345678, # bytes
720
+ cache: { # Only if cache enabled
721
+ hits: 150,
722
+ misses: 50,
723
+ hit_rate: 75.0,
724
+ size: 200
725
+ }
726
+ }
903
727
  ```
904
728
 
905
729
  ---
906
730
 
907
- ## Database Schema Reference
731
+ ## Database Schema
908
732
 
909
733
  ### Tables Used
910
734
 
@@ -912,95 +736,68 @@ end
912
736
 
913
737
  Primary memory storage:
914
738
 
915
- - `id` - Serial primary key
916
- - `key` - Unique text identifier
917
- - `value` - Text content
918
- - `type` - Optional type
919
- - `category` - Optional category
920
- - `importance` - Float (0.0-10.0)
921
- - `token_count` - Integer
922
- - `robot_id` - Foreign key to robots
923
- - `embedding` - Vector (pgvector)
924
- - `created_at` - Timestamp
925
- - `last_accessed` - Timestamp
926
- - `in_working_memory` - Boolean
927
- - `evicted_at` - Timestamp (nullable)
928
-
929
- #### `relationships`
930
-
931
- Node relationships:
932
-
933
- - `id` - Serial primary key
934
- - `from_node_id` - Foreign key to nodes
935
- - `to_node_id` - Foreign key to nodes
936
- - `relationship_type` - Optional type
937
- - `strength` - Float (0.0-1.0)
938
- - `created_at` - Timestamp
939
-
940
- #### `tags`
941
-
942
- Node tags:
739
+ - `id` - BIGSERIAL primary key
740
+ - `content` - TEXT (the memory content)
741
+ - `content_hash` - VARCHAR(64) UNIQUE (SHA-256 for deduplication)
742
+ - `access_count` - INTEGER (retrieval count)
743
+ - `token_count` - INTEGER
744
+ - `embedding` - vector(2000)
745
+ - `embedding_dimension` - INTEGER
746
+ - `created_at`, `updated_at`, `last_accessed` - TIMESTAMPTZ
747
+ - `in_working_memory` - BOOLEAN
943
748
 
944
- - `id` - Serial primary key
945
- - `node_id` - Foreign key to nodes
946
- - `tag` - Text
947
- - `created_at` - Timestamp
749
+ #### `robot_nodes`
948
750
 
949
- #### `robots`
751
+ Robot-node associations (many-to-many):
950
752
 
951
- Robot registry:
753
+ - `id` - BIGSERIAL primary key
754
+ - `robot_id` - BIGINT FK
755
+ - `node_id` - BIGINT FK
756
+ - `first_remembered_at`, `last_remembered_at` - TIMESTAMPTZ
757
+ - `remember_count` - INTEGER
952
758
 
953
- - `id` - Text primary key
954
- - `name` - Text
955
- - `created_at` - Timestamp
956
- - `last_active` - Timestamp
759
+ #### `tags`
957
760
 
958
- #### `operations_log`
761
+ Hierarchical tag registry:
959
762
 
960
- Operation audit log:
763
+ - `id` - BIGSERIAL primary key
764
+ - `name` - TEXT UNIQUE (colon-separated hierarchy)
765
+ - `created_at` - TIMESTAMPTZ
961
766
 
962
- - `id` - Serial primary key
963
- - `operation` - Text
964
- - `node_id` - Foreign key to nodes (nullable)
965
- - `robot_id` - Foreign key to robots
966
- - `timestamp` - Timestamp
967
- - `details` - JSONB
767
+ #### `node_tags`
968
768
 
969
- ### Views
769
+ Node-tag associations (many-to-many):
970
770
 
971
- #### `node_stats`
771
+ - `node_id` - BIGINT FK
772
+ - `tag_id` - BIGINT FK
972
773
 
973
- Aggregated statistics by type:
774
+ ---
974
775
 
975
- ```sql
976
- SELECT type, COUNT(*) as count, AVG(importance) as avg_importance
977
- FROM nodes
978
- GROUP BY type
979
- ```
776
+ ## Performance Considerations
980
777
 
981
- #### `robot_activity`
778
+ ### Query Caching
982
779
 
983
- Robot activity summary:
780
+ Results are cached in an LRU cache with TTL:
984
781
 
985
- ```sql
986
- SELECT id, name, last_active
987
- FROM robots
988
- ORDER BY last_active DESC
782
+ ```ruby
783
+ # Check cache stats
784
+ stats = ltm.stats
785
+ puts "Cache hit rate: #{stats[:cache][:hit_rate]}%"
989
786
  ```
990
787
 
991
- ---
992
-
993
- ## Performance Considerations
788
+ Cache is automatically invalidated when:
789
+ - Nodes are added
790
+ - Nodes are deleted
994
791
 
995
792
  ### Indexing
996
793
 
997
794
  Automatic indexes:
998
795
 
999
- - `nodes.key` - Unique index for fast retrieval
1000
- - `nodes.embedding` - IVFFlat index for vector search
1001
- - `nodes.value` - GIN index for fulltext search
1002
- - `nodes.created_at` - B-tree index for time-range queries
1003
- - `relationships (from_node_id, to_node_id, relationship_type)` - Unique index
796
+ - `content_hash` - UNIQUE index for deduplication
797
+ - `embedding` - HNSW index for vector search
798
+ - `content` - GIN indexes for fulltext and trigram search
799
+ - `created_at` - B-tree for time-range queries
800
+ - `robot_nodes` and `node_tags` - Indexes on foreign keys
1004
801
 
1005
802
  ### Query Optimization
1006
803
 
@@ -1013,33 +810,8 @@ ltm.search(timeframe: (Time.at(0)..Time.now), ...)
1013
810
 
1014
811
  # Good: Reasonable limits
1015
812
  ltm.search_fulltext(query: "...", limit: 20)
1016
-
1017
- # Bad: Unlimited results
1018
- ltm.search_fulltext(query: "...", limit: 10000)
1019
813
  ```
1020
814
 
1021
- ### Connection Management
1022
-
1023
- Each method call:
1024
-
1025
- 1. Opens a new PostgreSQL connection
1026
- 2. Executes the query
1027
- 3. Closes the connection
1028
-
1029
- For bulk operations, this can be slow. Consider:
1030
-
1031
- - Using connection pooling (future enhancement)
1032
- - Batching operations when possible
1033
- - Caching frequently accessed data
1034
-
1035
- ### TimescaleDB Optimization
1036
-
1037
- The `nodes` table is a hypertable partitioned by `created_at`:
1038
-
1039
- - Automatic data partitioning by time
1040
- - Compression for data older than 30 days
1041
- - Optimized for time-series queries
1042
-
1043
815
  ---
1044
816
 
1045
817
  ## Error Handling
@@ -1051,39 +823,20 @@ The `nodes` table is a hypertable partitioned by `created_at`:
1051
823
  ltm = HTM::LongTermMemory.new(invalid_config)
1052
824
  # => PG::ConnectionBad
1053
825
 
1054
- # Unique constraint violations
1055
- ltm.add(key: "existing_key", ...)
826
+ # Unique constraint violations (rare with deduplication)
1056
827
  # => PG::UniqueViolation
1057
-
1058
- # Foreign key violations
1059
- ltm.add_relationship(from: "nonexistent", to: "key")
1060
- # No error - returns early if nodes don't exist
1061
828
  ```
1062
829
 
1063
830
  ### Best Practices
1064
831
 
1065
832
  ```ruby
1066
- # Wrap in rescue blocks
1067
- begin
1068
- node_id = ltm.add(key: key, ...)
1069
- rescue PG::UniqueViolation
1070
- # Key already exists
1071
- node = ltm.retrieve(key)
1072
- node_id = node['id'].to_i
1073
- end
1074
-
1075
833
  # Check existence before operations
1076
- if ltm.retrieve(key)
1077
- ltm.delete(key)
834
+ if ltm.exists?(node_id)
835
+ ltm.delete(node_id)
1078
836
  end
1079
837
 
1080
- # Validate before adding relationships
1081
- from_exists = ltm.get_node_id(from_key)
1082
- to_exists = ltm.get_node_id(to_key)
1083
-
1084
- if from_exists && to_exists
1085
- ltm.add_relationship(from: from_key, to: to_key)
1086
- end
838
+ # Use HTM#forget for safe deletion with confirmation
839
+ htm.forget(node_id, confirm: :confirmed)
1087
840
  ```
1088
841
 
1089
842
  ---
@@ -1092,5 +845,4 @@ end
1092
845
 
1093
846
  - [HTM API](htm.md) - Main class that uses LongTermMemory
1094
847
  - [WorkingMemory API](working-memory.md) - Token-limited active context
1095
- - [EmbeddingService API](embedding-service.md) - Vector embedding generation
1096
- - [Database API](database.md) - Schema setup and configuration
848
+ - [Database Schema](../development/schema.md) - Full schema documentation