htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,1096 @@
1
+ # LongTermMemory Class
2
+
3
+ PostgreSQL-backed permanent memory storage with RAG-based retrieval.
4
+
5
+ ## Overview
6
+
7
+ `HTM::LongTermMemory` provides durable storage for all memory nodes with advanced search capabilities:
8
+
9
+ - **Vector similarity search** - Semantic understanding via embeddings
10
+ - **Full-text search** - Fast keyword and phrase matching
11
+ - **Hybrid search** - Combines fulltext prefiltering with vector ranking
12
+ - **Time-range queries** - TimescaleDB-optimized temporal search
13
+ - **Relationship graphs** - Connect related knowledge
14
+ - **Tag system** - Flexible categorization
15
+ - **Multi-robot tracking** - Shared global memory
16
+
17
+ ## Class Definition
18
+
19
+ ```ruby
20
+ class HTM::LongTermMemory
21
+ # No public attributes
22
+ end
23
+ ```
24
+
25
+ ## Initialization
26
+
27
+ ### `new(config)` {: #new }
28
+
29
+ Create a new long-term memory instance.
30
+
31
+ ```ruby
32
+ HTM::LongTermMemory.new(config)
33
+ ```
34
+
35
+ #### Parameters
36
+
37
+ | Parameter | Type | Description |
38
+ |-----------|------|-------------|
39
+ | `config` | Hash | PostgreSQL connection configuration |
40
+
41
+ #### Configuration Hash
42
+
43
+ ```ruby
44
+ {
45
+ host: "hostname",
46
+ port: 5432,
47
+ dbname: "database_name",
48
+ user: "username",
49
+ password: "password",
50
+ sslmode: "require" # or "prefer", "disable"
51
+ }
52
+ ```
53
+
54
+ #### Returns
55
+
56
+ - `HTM::LongTermMemory` instance
57
+
58
+ #### Raises
59
+
60
+ - `RuntimeError` - If config is nil
61
+
62
+ #### Examples
63
+
64
+ ```ruby
65
+ # From environment variable
66
+ config = HTM::Database.default_config
67
+ ltm = HTM::LongTermMemory.new(config)
68
+
69
+ # Custom configuration
70
+ ltm = HTM::LongTermMemory.new(
71
+ host: 'localhost',
72
+ port: 5432,
73
+ dbname: 'htm_production',
74
+ user: 'htm_user',
75
+ password: ENV['DB_PASSWORD'],
76
+ sslmode: 'require'
77
+ )
78
+
79
+ # TimescaleDB Cloud
80
+ ltm = HTM::LongTermMemory.new(
81
+ host: 'xxx.tsdb.cloud.timescale.com',
82
+ port: 37807,
83
+ dbname: 'tsdb',
84
+ user: 'tsdbadmin',
85
+ password: ENV['HTM_DBPASS'],
86
+ sslmode: 'require'
87
+ )
88
+ ```
89
+
90
+ ---
91
+
92
+ ## Public Methods
93
+
94
+ ### `add(**params)` {: #add }
95
+
96
+ Add a node to long-term memory.
97
+
98
+ ```ruby
99
+ add(
100
+ key:,
101
+ value:,
102
+ type: nil,
103
+ category: nil,
104
+ importance: 1.0,
105
+ token_count: 0,
106
+ robot_id:,
107
+ embedding:
108
+ )
109
+ ```
110
+
111
+ #### Parameters
112
+
113
+ | Parameter | Type | Default | Description |
114
+ |-----------|------|---------|-------------|
115
+ | `key` | String | *required* | Unique node identifier |
116
+ | `value` | String | *required* | Node content |
117
+ | `type` | String, nil | `nil` | Node type |
118
+ | `category` | String, nil | `nil` | Node category |
119
+ | `importance` | Float | `1.0` | Importance score (0.0-10.0) |
120
+ | `token_count` | Integer | `0` | Token count |
121
+ | `robot_id` | String | *required* | Robot identifier |
122
+ | `embedding` | Array\<Float\> | *required* | Vector embedding |
123
+
124
+ #### Returns
125
+
126
+ - `Integer` - Database ID of the created node
127
+
128
+ #### Examples
129
+
130
+ ```ruby
131
+ embedding = embedding_service.embed("content...")
132
+
133
+ node_id = ltm.add(
134
+ key: "fact_001",
135
+ value: "PostgreSQL is our primary database",
136
+ type: "fact",
137
+ category: "architecture",
138
+ importance: 8.0,
139
+ token_count: 50,
140
+ robot_id: "robot-abc123",
141
+ embedding: embedding
142
+ )
143
+ # => 1234
144
+
145
+ # Minimal add
146
+ node_id = ltm.add(
147
+ key: "simple_note",
148
+ value: "Remember to check logs",
149
+ robot_id: robot_id,
150
+ embedding: embedding
151
+ )
152
+ ```
153
+
154
+ #### Notes
155
+
156
+ - `key` must be unique (enforced by database)
157
+ - `embedding` is stored as a pgvector type
158
+ - Automatically sets `created_at` timestamp
159
+
160
+ ---
161
+
162
+ ### `retrieve(key)` {: #retrieve }
163
+
164
+ Retrieve a node by its key.
165
+
166
+ ```ruby
167
+ retrieve(key)
168
+ ```
169
+
170
+ #### Parameters
171
+
172
+ | Parameter | Type | Description |
173
+ |-----------|------|-------------|
174
+ | `key` | String | Node identifier |
175
+
176
+ #### Returns
177
+
178
+ - `Hash` - Node data if found
179
+ - `nil` - If node doesn't exist
180
+
181
+ #### Hash Structure
182
+
183
+ ```ruby
184
+ {
185
+ "id" => "123",
186
+ "key" => "fact_001",
187
+ "value" => "content...",
188
+ "type" => "fact",
189
+ "category" => "architecture",
190
+ "importance" => "8.0",
191
+ "token_count" => "50",
192
+ "robot_id" => "robot-abc123",
193
+ "created_at" => "2025-01-15 10:30:00",
194
+ "last_accessed" => "2025-01-15 14:20:00",
195
+ "in_working_memory" => "t",
196
+ "evicted_at" => nil
197
+ }
198
+ ```
199
+
200
+ #### Examples
201
+
202
+ ```ruby
203
+ node = ltm.retrieve("fact_001")
204
+
205
+ if node
206
+ puts node['value']
207
+ puts "Created: #{node['created_at']}"
208
+ puts "Importance: #{node['importance']}"
209
+ else
210
+ puts "Node not found"
211
+ end
212
+ ```
213
+
214
+ ---
215
+
216
+ ### `update_last_accessed(key)` {: #update_last_accessed }
217
+
218
+ Update the last accessed timestamp for a node.
219
+
220
+ ```ruby
221
+ update_last_accessed(key)
222
+ ```
223
+
224
+ #### Parameters
225
+
226
+ | Parameter | Type | Description |
227
+ |-----------|------|-------------|
228
+ | `key` | String | Node identifier |
229
+
230
+ #### Returns
231
+
232
+ - `void`
233
+
234
+ #### Examples
235
+
236
+ ```ruby
237
+ # After retrieving a node
238
+ node = ltm.retrieve("important_fact")
239
+ ltm.update_last_accessed("important_fact")
240
+
241
+ # Track access patterns
242
+ accessed_keys = ["key1", "key2", "key3"]
243
+ accessed_keys.each { |k| ltm.update_last_accessed(k) }
244
+ ```
245
+
246
+ ---
247
+
248
+ ### `delete(key)` {: #delete }
249
+
250
+ Delete a node permanently.
251
+
252
+ ```ruby
253
+ delete(key)
254
+ ```
255
+
256
+ #### Parameters
257
+
258
+ | Parameter | Type | Description |
259
+ |-----------|------|-------------|
260
+ | `key` | String | Node identifier |
261
+
262
+ #### Returns
263
+
264
+ - `void`
265
+
266
+ #### Side Effects
267
+
268
+ - Deletes node from database
269
+ - Cascades to related relationships and tags
270
+
271
+ #### Examples
272
+
273
+ ```ruby
274
+ # Delete a node
275
+ ltm.delete("temp_note_123")
276
+
277
+ # Safe deletion
278
+ if ltm.retrieve("old_key")
279
+ ltm.delete("old_key")
280
+ end
281
+ ```
282
+
283
+ #### Warning
284
+
285
+ Deletion is **permanent** and cannot be undone. Use `HTM#forget` instead for proper confirmation flow.
286
+
287
+ ---
288
+
289
+ ### `get_node_id(key)` {: #get_node_id }
290
+
291
+ Get the database ID for a node.
292
+
293
+ ```ruby
294
+ get_node_id(key)
295
+ ```
296
+
297
+ #### Parameters
298
+
299
+ | Parameter | Type | Description |
300
+ |-----------|------|-------------|
301
+ | `key` | String | Node identifier |
302
+
303
+ #### Returns
304
+
305
+ - `Integer` - Database ID if found
306
+ - `nil` - If node doesn't exist
307
+
308
+ #### Examples
309
+
310
+ ```ruby
311
+ node_id = ltm.get_node_id("fact_001")
312
+ # => 123
313
+
314
+ # Use in relationships
315
+ from_id = ltm.get_node_id("decision_001")
316
+ to_id = ltm.get_node_id("fact_001")
317
+ ```
318
+
319
+ ---
320
+
321
+ ### `search(**params)` {: #search }
322
+
323
+ Vector similarity search using embeddings.
324
+
325
+ ```ruby
326
+ search(
327
+ timeframe:,
328
+ query:,
329
+ limit:,
330
+ embedding_service:
331
+ )
332
+ ```
333
+
334
+ #### Parameters
335
+
336
+ | Parameter | Type | Description |
337
+ |-----------|------|---------|
338
+ | `timeframe` | Range | Time range to search (Time..Time) |
339
+ | `query` | String | Search query text |
340
+ | `limit` | Integer | Maximum results |
341
+ | `embedding_service` | Object | Service to generate query embedding |
342
+
343
+ #### Returns
344
+
345
+ - `Array<Hash>` - Matching nodes sorted by similarity (highest first)
346
+
347
+ #### Hash Structure
348
+
349
+ ```ruby
350
+ {
351
+ "id" => "123",
352
+ "key" => "fact_001",
353
+ "value" => "content...",
354
+ "type" => "fact",
355
+ "category" => "architecture",
356
+ "importance" => "8.0",
357
+ "created_at" => "2025-01-15 10:30:00",
358
+ "robot_id" => "robot-abc123",
359
+ "token_count" => "50",
360
+ "similarity" => "0.8745" # 0.0-1.0, higher = more similar
361
+ }
362
+ ```
363
+
364
+ #### Examples
365
+
366
+ ```ruby
367
+ # Semantic search
368
+ timeframe = (Time.now - 7*24*3600)..Time.now
369
+
370
+ results = ltm.search(
371
+ timeframe: timeframe,
372
+ query: "database performance optimization",
373
+ limit: 20,
374
+ embedding_service: embedding_service
375
+ )
376
+
377
+ results.each do |node|
378
+ puts "[#{node['similarity']}] #{node['value']}"
379
+ end
380
+
381
+ # Find similar to a specific concept
382
+ results = ltm.search(
383
+ timeframe: (Time.at(0)..Time.now), # All time
384
+ query: "microservices architecture patterns",
385
+ limit: 10,
386
+ embedding_service: embedding_service
387
+ )
388
+
389
+ # Filter by similarity threshold
390
+ high_similarity = results.select { |n| n['similarity'].to_f > 0.7 }
391
+ ```
392
+
393
+ #### Technical Details
394
+
395
+ - Uses pgvector's `<=>` cosine distance operator
396
+ - Returns `1 - distance` as similarity (0.0-1.0)
397
+ - Indexed for fast approximate nearest neighbor search
398
+ - Query embedding is generated on-the-fly
399
+
400
+ ---
401
+
402
+ ### `search_fulltext(**params)` {: #search_fulltext }
403
+
404
+ Full-text search using PostgreSQL's text search.
405
+
406
+ ```ruby
407
+ search_fulltext(
408
+ timeframe:,
409
+ query:,
410
+ limit:
411
+ )
412
+ ```
413
+
414
+ #### Parameters
415
+
416
+ | Parameter | Type | Description |
417
+ |-----------|------|-------------|
418
+ | `timeframe` | Range | Time range to search |
419
+ | `query` | String | Search query |
420
+ | `limit` | Integer | Maximum results |
421
+
422
+ #### Returns
423
+
424
+ - `Array<Hash>` - Matching nodes sorted by rank (highest first)
425
+
426
+ #### Hash Structure
427
+
428
+ Similar to `search`, but with `"rank"` instead of `"similarity"`:
429
+
430
+ ```ruby
431
+ {
432
+ ...,
433
+ "rank" => "0.456" # Higher = better match
434
+ }
435
+ ```
436
+
437
+ #### Examples
438
+
439
+ ```ruby
440
+ # Exact phrase search
441
+ results = ltm.search_fulltext(
442
+ timeframe: (Time.now - 30*24*3600)..Time.now,
443
+ query: "PostgreSQL connection pooling",
444
+ limit: 10
445
+ )
446
+
447
+ # Multiple keywords
448
+ results = ltm.search_fulltext(
449
+ timeframe: (Time.now - 7*24*3600)..Time.now,
450
+ query: "API authentication JWT token",
451
+ limit: 20
452
+ )
453
+
454
+ # Find mentions
455
+ results = ltm.search_fulltext(
456
+ timeframe: (Time.at(0)..Time.now),
457
+ query: "security vulnerability",
458
+ limit: 50
459
+ )
460
+
461
+ results.each do |node|
462
+ puts "[#{node['rank']}] #{node['created_at']}: #{node['value']}"
463
+ end
464
+ ```
465
+
466
+ #### Technical Details
467
+
468
+ - Uses PostgreSQL `to_tsvector` and `plainto_tsquery`
469
+ - English language stemming and stop words
470
+ - GIN index for fast search
471
+ - Ranks by `ts_rank` (term frequency)
472
+
473
+ ---
474
+
475
+ ### `search_hybrid(**params)` {: #search_hybrid }
476
+
477
+ Hybrid search combining fulltext prefiltering with vector ranking.
478
+
479
+ ```ruby
480
+ search_hybrid(
481
+ timeframe:,
482
+ query:,
483
+ limit:,
484
+ embedding_service:,
485
+ prefilter_limit: 100
486
+ )
487
+ ```
488
+
489
+ #### Parameters
490
+
491
+ | Parameter | Type | Default | Description |
492
+ |-----------|------|---------|-------------|
493
+ | `timeframe` | Range | *required* | Time range to search |
494
+ | `query` | String | *required* | Search query |
495
+ | `limit` | Integer | *required* | Maximum final results |
496
+ | `embedding_service` | Object | *required* | Service for embeddings |
497
+ | `prefilter_limit` | Integer | `100` | Fulltext candidates to consider |
498
+
499
+ #### Returns
500
+
501
+ - `Array<Hash>` - Matching nodes sorted by vector similarity
502
+
503
+ #### Strategy
504
+
505
+ 1. **Prefilter**: Use fulltext search to find `prefilter_limit` candidates
506
+ 2. **Rank**: Compute vector similarity for candidates only
507
+ 3. **Return**: Top `limit` results by similarity
508
+
509
+ This combines the **accuracy** of fulltext with the **semantic understanding** of vectors.
510
+
511
+ #### Examples
512
+
513
+ ```ruby
514
+ # Best of both worlds
515
+ results = ltm.search_hybrid(
516
+ timeframe: (Time.now - 30*24*3600)..Time.now,
517
+ query: "API rate limiting implementation",
518
+ limit: 15,
519
+ embedding_service: embedding_service,
520
+ prefilter_limit: 100
521
+ )
522
+
523
+ # Adjust prefilter for performance
524
+ results = ltm.search_hybrid(
525
+ timeframe: timeframe,
526
+ query: "security best practices",
527
+ limit: 20,
528
+ embedding_service: embedding_service,
529
+ prefilter_limit: 50 # Smaller = faster
530
+ )
531
+
532
+ # Large candidate pool for better recall
533
+ results = ltm.search_hybrid(
534
+ timeframe: timeframe,
535
+ query: "deployment strategies",
536
+ limit: 10,
537
+ embedding_service: embedding_service,
538
+ prefilter_limit: 200 # Larger = better recall
539
+ )
540
+ ```
541
+
542
+ #### Performance Tuning
543
+
544
+ | `prefilter_limit` | Speed | Recall | Use Case |
545
+ |-------------------|-------|--------|----------|
546
+ | 50 | Fast | Low | Common queries |
547
+ | 100 | Medium | Medium | Default (recommended) |
548
+ | 200+ | Slow | High | Rare/complex queries |
549
+
550
+ ---
551
+
552
+ ### `add_relationship(**params)` {: #add_relationship }
553
+
554
+ Add a relationship between two nodes.
555
+
556
+ ```ruby
557
+ add_relationship(
558
+ from:,
559
+ to:,
560
+ type: nil,
561
+ strength: 1.0
562
+ )
563
+ ```
564
+
565
+ #### Parameters
566
+
567
+ | Parameter | Type | Default | Description |
568
+ |-----------|------|---------|-------------|
569
+ | `from` | String | *required* | From node key |
570
+ | `to` | String | *required* | To node key |
571
+ | `type` | String, nil | `nil` | Relationship type |
572
+ | `strength` | Float | `1.0` | Relationship strength (0.0-1.0) |
573
+
574
+ #### Returns
575
+
576
+ - `void`
577
+
578
+ #### Side Effects
579
+
580
+ - Inserts relationship into `relationships` table
581
+ - Skips if relationship already exists (ON CONFLICT DO NOTHING)
582
+ - Returns early if either node doesn't exist
583
+
584
+ #### Examples
585
+
586
+ ```ruby
587
+ # Simple relationship
588
+ ltm.add_relationship(
589
+ from: "decision_001",
590
+ to: "fact_001"
591
+ )
592
+
593
+ # Typed relationship with strength
594
+ ltm.add_relationship(
595
+ from: "api_v2",
596
+ to: "api_v1",
597
+ type: "replaces",
598
+ strength: 0.9
599
+ )
600
+
601
+ # Build knowledge graph
602
+ ltm.add_relationship(from: "microservices", to: "docker", type: "requires")
603
+ ltm.add_relationship(from: "microservices", to: "api_gateway", type: "requires")
604
+ ltm.add_relationship(from: "microservices", to: "service_mesh", type: "optional")
605
+
606
+ # Related decisions
607
+ ltm.add_relationship(
608
+ from: "database_choice",
609
+ to: "timescaledb_decision",
610
+ type: "influences",
611
+ strength: 0.8
612
+ )
613
+ ```
614
+
615
+ ---
616
+
617
+ ### `add_tag(**params)` {: #add_tag }
618
+
619
+ Add a tag to a node.
620
+
621
+ ```ruby
622
+ add_tag(node_id:, tag:)
623
+ ```
624
+
625
+ #### Parameters
626
+
627
+ | Parameter | Type | Description |
628
+ |-----------|------|-------------|
629
+ | `node_id` | Integer | Node database ID |
630
+ | `tag` | String | Tag name |
631
+
632
+ #### Returns
633
+
634
+ - `void`
635
+
636
+ #### Side Effects
637
+
638
+ - Inserts tag into `tags` table
639
+ - Skips if tag already exists (ON CONFLICT DO NOTHING)
640
+
641
+ #### Examples
642
+
643
+ ```ruby
644
+ node_id = ltm.add(key: "fact_001", ...)
645
+
646
+ # Add single tag
647
+ ltm.add_tag(node_id: node_id, tag: "architecture")
648
+
649
+ # Add multiple tags
650
+ ["architecture", "database", "postgresql"].each do |tag|
651
+ ltm.add_tag(node_id: node_id, tag: tag)
652
+ end
653
+
654
+ # Categorize decision
655
+ decision_id = ltm.add(key: "decision_001", ...)
656
+ ltm.add_tag(node_id: decision_id, tag: "critical")
657
+ ltm.add_tag(node_id: decision_id, tag: "security")
658
+ ltm.add_tag(node_id: decision_id, tag: "2025-q1")
659
+ ```
660
+
661
+ ---
662
+
663
+ ### `mark_evicted(keys)` {: #mark_evicted }
664
+
665
+ Mark nodes as evicted from working memory.
666
+
667
+ ```ruby
668
+ mark_evicted(keys)
669
+ ```
670
+
671
+ #### Parameters
672
+
673
+ | Parameter | Type | Description |
674
+ |-----------|------|-------------|
675
+ | `keys` | Array\<String\> | Node keys to mark |
676
+
677
+ #### Returns
678
+
679
+ - `void`
680
+
681
+ #### Side Effects
682
+
683
+ - Sets `in_working_memory = FALSE` for specified nodes
684
+ - Sets `evicted_at` timestamp
685
+
686
+ #### Examples
687
+
688
+ ```ruby
689
+ # Mark single eviction
690
+ ltm.mark_evicted(["temp_note_123"])
691
+
692
+ # Mark batch eviction
693
+ evicted_keys = ["key1", "key2", "key3"]
694
+ ltm.mark_evicted(evicted_keys)
695
+
696
+ # From working memory eviction
697
+ evicted = working_memory.evict_to_make_space(10000)
698
+ evicted_keys = evicted.map { |n| n[:key] }
699
+ ltm.mark_evicted(evicted_keys) unless evicted_keys.empty?
700
+ ```
701
+
702
+ ---
703
+
704
+ ### `register_robot(robot_id, robot_name)` {: #register_robot }
705
+
706
+ Register a robot in the system.
707
+
708
+ ```ruby
709
+ register_robot(robot_id, robot_name)
710
+ ```
711
+
712
+ #### Parameters
713
+
714
+ | Parameter | Type | Description |
715
+ |-----------|------|-------------|
716
+ | `robot_id` | String | Robot identifier |
717
+ | `robot_name` | String | Robot name |
718
+
719
+ #### Returns
720
+
721
+ - `void`
722
+
723
+ #### Side Effects
724
+
725
+ - Inserts robot into `robots` table
726
+ - Updates name and `last_active` if robot exists
727
+
728
+ #### Examples
729
+
730
+ ```ruby
731
+ ltm.register_robot("robot-abc123", "Code Assistant")
732
+ ltm.register_robot("robot-def456", "Research Bot")
733
+
734
+ # Register with UUID
735
+ robot_id = SecureRandom.uuid
736
+ ltm.register_robot(robot_id, "Analysis Bot")
737
+ ```
738
+
739
+ ---
740
+
741
+ ### `update_robot_activity(robot_id)` {: #update_robot_activity }
742
+
743
+ Update robot's last activity timestamp.
744
+
745
+ ```ruby
746
+ update_robot_activity(robot_id)
747
+ ```
748
+
749
+ #### Parameters
750
+
751
+ | Parameter | Type | Description |
752
+ |-----------|------|-------------|
753
+ | `robot_id` | String | Robot identifier |
754
+
755
+ #### Returns
756
+
757
+ - `void`
758
+
759
+ #### Examples
760
+
761
+ ```ruby
762
+ # Update after operations
763
+ ltm.update_robot_activity("robot-abc123")
764
+
765
+ # Automatic heartbeat
766
+ loop do
767
+ ltm.update_robot_activity(robot_id)
768
+ sleep 60 # Every minute
769
+ end
770
+ ```
771
+
772
+ ---
773
+
774
+ ### `log_operation(**params)` {: #log_operation }
775
+
776
+ Log an operation to the operations log.
777
+
778
+ ```ruby
779
+ log_operation(
780
+ operation:,
781
+ node_id:,
782
+ robot_id:,
783
+ details:
784
+ )
785
+ ```
786
+
787
+ #### Parameters
788
+
789
+ | Parameter | Type | Description |
790
+ |-----------|------|---------|
791
+ | `operation` | String | Operation type |
792
+ | `node_id` | Integer, nil | Node database ID (can be nil) |
793
+ | `robot_id` | String | Robot identifier |
794
+ | `details` | Hash | Operation details (stored as JSON) |
795
+
796
+ #### Returns
797
+
798
+ - `void`
799
+
800
+ #### Examples
801
+
802
+ ```ruby
803
+ # Log add operation
804
+ ltm.log_operation(
805
+ operation: 'add',
806
+ node_id: 123,
807
+ robot_id: robot_id,
808
+ details: { key: "fact_001", type: "fact" }
809
+ )
810
+
811
+ # Log recall operation
812
+ ltm.log_operation(
813
+ operation: 'recall',
814
+ node_id: nil,
815
+ robot_id: robot_id,
816
+ details: {
817
+ timeframe: "last week",
818
+ topic: "postgresql",
819
+ count: 15
820
+ }
821
+ )
822
+
823
+ # Log forget operation
824
+ ltm.log_operation(
825
+ operation: 'forget',
826
+ node_id: 456,
827
+ robot_id: robot_id,
828
+ details: { key: "temp_note", reason: "temporary" }
829
+ )
830
+ ```
831
+
832
+ ---
833
+
834
+ ### `stats()` {: #stats }
835
+
836
+ Get comprehensive memory statistics.
837
+
838
+ ```ruby
839
+ stats()
840
+ ```
841
+
842
+ #### Returns
843
+
844
+ - `Hash` - Statistics hash
845
+
846
+ #### Hash Structure
847
+
848
+ ```ruby
849
+ {
850
+ total_nodes: 1234,
851
+
852
+ nodes_by_robot: {
853
+ "robot-abc123" => 500,
854
+ "robot-def456" => 734
855
+ },
856
+
857
+ nodes_by_type: [
858
+ { "type" => "fact", "count" => 400, "avg_importance" => 6.5 },
859
+ { "type" => "decision", "count" => 200, "avg_importance" => 8.2 },
860
+ ...
861
+ ],
862
+
863
+ total_relationships: 567,
864
+ total_tags: 890,
865
+
866
+ oldest_memory: "2025-01-01 12:00:00",
867
+ newest_memory: "2025-01-15 14:30:00",
868
+
869
+ active_robots: 3,
870
+
871
+ robot_activity: [
872
+ { "id" => "robot-1", "name" => "Assistant", "last_active" => "2025-01-15 14:00:00" },
873
+ ...
874
+ ],
875
+
876
+ database_size: 12345678 # bytes
877
+ }
878
+ ```
879
+
880
+ #### Examples
881
+
882
+ ```ruby
883
+ stats = ltm.stats
884
+
885
+ puts "Total memories: #{stats[:total_nodes]}"
886
+ puts "Robots: #{stats[:active_robots]}"
887
+ puts "Relationships: #{stats[:total_relationships]}"
888
+ puts "Tags: #{stats[:total_tags]}"
889
+
890
+ # By type
891
+ stats[:nodes_by_type].each do |type_info|
892
+ puts "#{type_info['type']}: #{type_info['count']} nodes, avg importance #{type_info['avg_importance']}"
893
+ end
894
+
895
+ # Database size
896
+ size_mb = stats[:database_size] / 1024.0 / 1024.0
897
+ puts "Database size: #{size_mb.round(2)} MB"
898
+
899
+ # Robot activity
900
+ stats[:robot_activity].each do |robot|
901
+ puts "#{robot['name']}: last active #{robot['last_active']}"
902
+ end
903
+ ```
904
+
905
+ ---
906
+
907
+ ## Database Schema Reference
908
+
909
+ ### Tables Used
910
+
911
+ #### `nodes`
912
+
913
+ Primary memory storage:
914
+
915
+ - `id` - Serial primary key
916
+ - `key` - Unique text identifier
917
+ - `value` - Text content
918
+ - `type` - Optional type
919
+ - `category` - Optional category
920
+ - `importance` - Float (0.0-10.0)
921
+ - `token_count` - Integer
922
+ - `robot_id` - Foreign key to robots
923
+ - `embedding` - Vector (pgvector)
924
+ - `created_at` - Timestamp
925
+ - `last_accessed` - Timestamp
926
+ - `in_working_memory` - Boolean
927
+ - `evicted_at` - Timestamp (nullable)
928
+
929
+ #### `relationships`
930
+
931
+ Node relationships:
932
+
933
+ - `id` - Serial primary key
934
+ - `from_node_id` - Foreign key to nodes
935
+ - `to_node_id` - Foreign key to nodes
936
+ - `relationship_type` - Optional type
937
+ - `strength` - Float (0.0-1.0)
938
+ - `created_at` - Timestamp
939
+
940
+ #### `tags`
941
+
942
+ Node tags:
943
+
944
+ - `id` - Serial primary key
945
+ - `node_id` - Foreign key to nodes
946
+ - `tag` - Text
947
+ - `created_at` - Timestamp
948
+
949
+ #### `robots`
950
+
951
+ Robot registry:
952
+
953
+ - `id` - Text primary key
954
+ - `name` - Text
955
+ - `created_at` - Timestamp
956
+ - `last_active` - Timestamp
957
+
958
+ #### `operations_log`
959
+
960
+ Operation audit log:
961
+
962
+ - `id` - Serial primary key
963
+ - `operation` - Text
964
+ - `node_id` - Foreign key to nodes (nullable)
965
+ - `robot_id` - Foreign key to robots
966
+ - `timestamp` - Timestamp
967
+ - `details` - JSONB
968
+
969
+ ### Views
970
+
971
+ #### `node_stats`
972
+
973
+ Aggregated statistics by type:
974
+
975
+ ```sql
976
+ SELECT type, COUNT(*) as count, AVG(importance) as avg_importance
977
+ FROM nodes
978
+ GROUP BY type
979
+ ```
980
+
981
+ #### `robot_activity`
982
+
983
+ Robot activity summary:
984
+
985
+ ```sql
986
+ SELECT id, name, last_active
987
+ FROM robots
988
+ ORDER BY last_active DESC
989
+ ```
990
+
991
+ ---
992
+
993
+ ## Performance Considerations
994
+
995
+ ### Indexing
996
+
997
+ Automatic indexes:
998
+
999
+ - `nodes.key` - Unique index for fast retrieval
1000
+ - `nodes.embedding` - IVFFlat index for vector search
1001
+ - `nodes.value` - GIN index for fulltext search
1002
+ - `nodes.created_at` - B-tree index for time-range queries
1003
+ - `relationships (from_node_id, to_node_id, relationship_type)` - Unique index
1004
+
1005
+ ### Query Optimization
1006
+
1007
+ ```ruby
1008
+ # Good: Time-limited searches
1009
+ ltm.search(timeframe: (Time.now - 7*24*3600)..Time.now, ...)
1010
+
1011
+ # Bad: All-time searches (slow)
1012
+ ltm.search(timeframe: (Time.at(0)..Time.now), ...)
1013
+
1014
+ # Good: Reasonable limits
1015
+ ltm.search_fulltext(query: "...", limit: 20)
1016
+
1017
+ # Bad: Unlimited results
1018
+ ltm.search_fulltext(query: "...", limit: 10000)
1019
+ ```
1020
+
1021
+ ### Connection Management
1022
+
1023
+ Each method call:
1024
+
1025
+ 1. Opens a new PostgreSQL connection
1026
+ 2. Executes the query
1027
+ 3. Closes the connection
1028
+
1029
+ For bulk operations, this can be slow. Consider:
1030
+
1031
+ - Using connection pooling (future enhancement)
1032
+ - Batching operations when possible
1033
+ - Caching frequently accessed data
1034
+
1035
+ ### TimescaleDB Optimization
1036
+
1037
+ The `nodes` table is a hypertable partitioned by `created_at`:
1038
+
1039
+ - Automatic data partitioning by time
1040
+ - Compression for data older than 30 days
1041
+ - Optimized for time-series queries
1042
+
1043
+ ---
1044
+
1045
+ ## Error Handling
1046
+
1047
+ ### PG::Error
1048
+
1049
+ ```ruby
1050
+ # Connection errors
1051
+ ltm = HTM::LongTermMemory.new(invalid_config)
1052
+ # => PG::ConnectionBad
1053
+
1054
+ # Unique constraint violations
1055
+ ltm.add(key: "existing_key", ...)
1056
+ # => PG::UniqueViolation
1057
+
1058
+ # Foreign key violations
1059
+ ltm.add_relationship(from: "nonexistent", to: "key")
1060
+ # No error - returns early if nodes don't exist
1061
+ ```
1062
+
1063
+ ### Best Practices
1064
+
1065
+ ```ruby
1066
+ # Wrap in rescue blocks
1067
+ begin
1068
+ node_id = ltm.add(key: key, ...)
1069
+ rescue PG::UniqueViolation
1070
+ # Key already exists
1071
+ node = ltm.retrieve(key)
1072
+ node_id = node['id'].to_i
1073
+ end
1074
+
1075
+ # Check existence before operations
1076
+ if ltm.retrieve(key)
1077
+ ltm.delete(key)
1078
+ end
1079
+
1080
+ # Validate before adding relationships
1081
+ from_exists = ltm.get_node_id(from_key)
1082
+ to_exists = ltm.get_node_id(to_key)
1083
+
1084
+ if from_exists && to_exists
1085
+ ltm.add_relationship(from: from_key, to: to_key)
1086
+ end
1087
+ ```
1088
+
1089
+ ---
1090
+
1091
+ ## See Also
1092
+
1093
+ - [HTM API](htm.md) - Main class that uses LongTermMemory
1094
+ - [WorkingMemory API](working-memory.md) - Token-limited active context
1095
+ - [EmbeddingService API](embedding-service.md) - Vector embedding generation
1096
+ - [Database API](database.md) - Schema setup and configuration