htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,941 @@
1
+ # Long-term Memory
2
+
3
+ Long-term memory is HTM's durable PostgreSQL storage layer. This guide covers database operations, maintenance, performance optimization, and advanced queries.
4
+
5
+ ## Architecture Overview
6
+
7
+ Long-term memory provides:
8
+
9
+ - **Permanent storage** for all memories
10
+ - **Vector embeddings** via pgvector
11
+ - **Full-text search** via PostgreSQL's ts_vector
12
+ - **Time-series optimization** via TimescaleDB hypertables
13
+ - **Relationship graphs** for knowledge connections
14
+ - **Audit logging** for all operations
15
+
16
+ ![Long-term Memory Architecture](../assets/images/htm-long-term-memory-architecture.svg)
17
+
18
+ ## Database Schema
19
+
20
+ ### Nodes Table
21
+
22
+ The primary storage for memories:
23
+
24
+ ```sql
25
+ CREATE TABLE nodes (
26
+ id BIGSERIAL PRIMARY KEY,
27
+ key TEXT NOT NULL UNIQUE,
28
+ value TEXT NOT NULL,
29
+ type TEXT,
30
+ category TEXT,
31
+ importance FLOAT DEFAULT 1.0,
32
+ token_count INTEGER DEFAULT 0,
33
+ robot_id TEXT NOT NULL,
34
+ embedding vector(1536), -- pgvector type
35
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
36
+ last_accessed TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
37
+ in_working_memory BOOLEAN DEFAULT TRUE
38
+ );
39
+
40
+ -- Indexes
41
+ CREATE INDEX idx_nodes_robot_id ON nodes(robot_id);
42
+ CREATE INDEX idx_nodes_type ON nodes(type);
43
+ CREATE INDEX idx_nodes_created_at ON nodes(created_at);
44
+ CREATE INDEX idx_nodes_embedding ON nodes USING hnsw(embedding vector_cosine_ops);
45
+ CREATE INDEX idx_nodes_fulltext ON nodes USING gin(to_tsvector('english', value));
46
+ ```
47
+
48
+ ### Relationships Table
49
+
50
+ Tracks connections between nodes:
51
+
52
+ ```sql
53
+ CREATE TABLE relationships (
54
+ id BIGSERIAL PRIMARY KEY,
55
+ from_node_id BIGINT REFERENCES nodes(id) ON DELETE CASCADE,
56
+ to_node_id BIGINT REFERENCES nodes(id) ON DELETE CASCADE,
57
+ relationship_type TEXT,
58
+ strength FLOAT DEFAULT 1.0,
59
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
60
+ UNIQUE(from_node_id, to_node_id, relationship_type)
61
+ );
62
+ ```
63
+
64
+ ### Tags Table
65
+
66
+ Flexible categorization:
67
+
68
+ ```sql
69
+ CREATE TABLE tags (
70
+ id BIGSERIAL PRIMARY KEY,
71
+ node_id BIGINT REFERENCES nodes(id) ON DELETE CASCADE,
72
+ tag TEXT NOT NULL,
73
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
74
+ UNIQUE(node_id, tag)
75
+ );
76
+
77
+ CREATE INDEX idx_tags_tag ON tags(tag);
78
+ ```
79
+
80
+ ### Operations Log Table (Hypertable)
81
+
82
+ Audit trail with time-series optimization:
83
+
84
+ ```sql
85
+ CREATE TABLE operations_log (
86
+ time TIMESTAMPTZ NOT NULL,
87
+ operation TEXT NOT NULL,
88
+ node_id BIGINT,
89
+ robot_id TEXT NOT NULL,
90
+ details JSONB,
91
+ PRIMARY KEY (time, operation, robot_id)
92
+ );
93
+
94
+ -- Convert to hypertable
95
+ SELECT create_hypertable('operations_log', 'time');
96
+ ```
97
+
98
+ ### Robots Table
99
+
100
+ Robot registry:
101
+
102
+ ```sql
103
+ CREATE TABLE robots (
104
+ id TEXT PRIMARY KEY,
105
+ name TEXT NOT NULL,
106
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
107
+ last_active TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
108
+ );
109
+ ```
110
+
111
+ ## Database Operations
112
+
113
+ ### Direct Database Queries
114
+
115
+ While HTM provides a high-level API, you can query the database directly:
116
+
117
+ ```ruby
118
+ require 'pg'
119
+
120
+ # Get connection config
121
+ config = HTM::Database.default_config
122
+
123
+ # Execute raw queries
124
+ conn = PG.connect(config)
125
+
126
+ # Query nodes
127
+ result = conn.exec("SELECT * FROM nodes WHERE type = 'decision' LIMIT 10")
128
+ result.each do |row|
129
+ puts "#{row['key']}: #{row['value']}"
130
+ end
131
+
132
+ # Query with parameters
133
+ result = conn.exec_params(
134
+ "SELECT * FROM nodes WHERE robot_id = $1 AND importance >= $2",
135
+ ["your-robot-id", 8.0]
136
+ )
137
+
138
+ conn.close
139
+ ```
140
+
141
+ ### Using LongTermMemory Directly
142
+
143
+ Access the long-term memory layer:
144
+
145
+ ```ruby
146
+ ltm = HTM::LongTermMemory.new(HTM::Database.default_config)
147
+
148
+ # Add a node
149
+ node_id = ltm.add(
150
+ key: "test_001",
151
+ value: "Test memory",
152
+ type: :fact,
153
+ importance: 7.0,
154
+ token_count: 10,
155
+ robot_id: "test-robot",
156
+ embedding: Array.new(1536) { rand }
157
+ )
158
+
159
+ # Retrieve a node
160
+ node = ltm.retrieve("test_001")
161
+
162
+ # Update last accessed
163
+ ltm.update_last_accessed("test_001")
164
+
165
+ # Delete a node
166
+ ltm.delete("test_001")
167
+ ```
168
+
169
+ ## Memory Statistics
170
+
171
+ Get comprehensive statistics:
172
+
173
+ ```ruby
174
+ stats = htm.memory_stats
175
+
176
+ # Total nodes
177
+ puts "Total nodes: #{stats[:total_nodes]}"
178
+
179
+ # Nodes by robot
180
+ stats[:nodes_by_robot].each do |robot_id, count|
181
+ puts "#{robot_id}: #{count} nodes"
182
+ end
183
+
184
+ # Nodes by type
185
+ stats[:nodes_by_type].each do |row|
186
+ puts "Type #{row['type']}: #{row['count']} nodes"
187
+ end
188
+
189
+ # Relationships
190
+ puts "Total relationships: #{stats[:total_relationships]}"
191
+
192
+ # Tags
193
+ puts "Total tags: #{stats[:total_tags]}"
194
+
195
+ # Time range
196
+ puts "Oldest memory: #{stats[:oldest_memory]}"
197
+ puts "Newest memory: #{stats[:newest_memory]}"
198
+
199
+ # Database size
200
+ size_mb = stats[:database_size] / (1024.0 * 1024.0)
201
+ puts "Database size: #{size_mb.round(2)} MB"
202
+
203
+ # Active robots
204
+ puts "Active robots: #{stats[:active_robots]}"
205
+ stats[:robot_activity].each do |robot|
206
+ puts " #{robot['name']}: last active #{robot['last_active']}"
207
+ end
208
+ ```
209
+
210
+ ## Advanced Queries
211
+
212
+ ### Query by Date Range
213
+
214
+ ```ruby
215
+ # Get all memories from a specific month
216
+ start_date = Time.new(2024, 1, 1)
217
+ end_date = Time.new(2024, 1, 31, 23, 59, 59)
218
+
219
+ config = HTM::Database.default_config
220
+ conn = PG.connect(config)
221
+
222
+ result = conn.exec_params(
223
+ <<~SQL,
224
+ SELECT key, value, type, importance, created_at
225
+ FROM nodes
226
+ WHERE created_at BETWEEN $1 AND $2
227
+ ORDER BY created_at DESC
228
+ SQL
229
+ [start_date, end_date]
230
+ )
231
+
232
+ result.each do |row|
233
+ puts "#{row['created_at']}: #{row['value'][0..50]}..."
234
+ end
235
+
236
+ conn.close
237
+ ```
238
+
239
+ ### Query by Type and Importance
240
+
241
+ ```ruby
242
+ # Find critical decisions
243
+ conn = PG.connect(HTM::Database.default_config)
244
+
245
+ result = conn.exec_params(
246
+ <<~SQL,
247
+ SELECT key, value, importance, created_at
248
+ FROM nodes
249
+ WHERE type = $1 AND importance >= $2
250
+ ORDER BY importance DESC, created_at DESC
251
+ SQL
252
+ ['decision', 8.0]
253
+ )
254
+
255
+ puts "Critical decisions:"
256
+ result.each do |row|
257
+ puts "- [#{row['importance']}] #{row['value'][0..100]}..."
258
+ end
259
+
260
+ conn.close
261
+ ```
262
+
263
+ ### Query Relationships
264
+
265
+ ```ruby
266
+ # Find all nodes related to a specific node
267
+ conn = PG.connect(HTM::Database.default_config)
268
+
269
+ result = conn.exec_params(
270
+ <<~SQL,
271
+ SELECT n.key, n.value, n.type, r.relationship_type
272
+ FROM nodes n
273
+ JOIN relationships r ON n.id = r.to_node_id
274
+ JOIN nodes source ON r.from_node_id = source.id
275
+ WHERE source.key = $1
276
+ SQL
277
+ ['decision_001']
278
+ )
279
+
280
+ puts "Related nodes:"
281
+ result.each do |row|
282
+ puts "- [#{row['type']}] #{row['value'][0..50]}... (#{row['relationship_type']})"
283
+ end
284
+
285
+ conn.close
286
+ ```
287
+
288
+ ### Query by Tags
289
+
290
+ ```ruby
291
+ # Find all nodes with specific tag
292
+ conn = PG.connect(HTM::Database.default_config)
293
+
294
+ result = conn.exec_params(
295
+ <<~SQL,
296
+ SELECT DISTINCT n.key, n.value, n.type, n.importance
297
+ FROM nodes n
298
+ JOIN tags t ON n.id = t.node_id
299
+ WHERE t.tag = $1
300
+ ORDER BY n.importance DESC
301
+ SQL
302
+ ['architecture']
303
+ )
304
+
305
+ puts "Architecture-related memories:"
306
+ result.each do |row|
307
+ puts "- [#{row['importance']}] #{row['value'][0..80]}..."
308
+ end
309
+
310
+ conn.close
311
+ ```
312
+
313
+ ### Most Active Robots
314
+
315
+ ```ruby
316
+ # Find robots with most contributions
317
+ conn = PG.connect(HTM::Database.default_config)
318
+
319
+ result = conn.exec(
320
+ <<~SQL
321
+ SELECT r.name, r.id, COUNT(n.id) as memory_count
322
+ FROM robots r
323
+ LEFT JOIN nodes n ON r.id = n.robot_id
324
+ GROUP BY r.id, r.name
325
+ ORDER BY memory_count DESC
326
+ SQL
327
+ )
328
+
329
+ puts "Robot contributions:"
330
+ result.each do |row|
331
+ puts "#{row['name']}: #{row['memory_count']} memories"
332
+ end
333
+
334
+ conn.close
335
+ ```
336
+
337
+ ### Time-Based Activity
338
+
339
+ ```ruby
340
+ # Get activity by day
341
+ conn = PG.connect(HTM::Database.default_config)
342
+
343
+ result = conn.exec(
344
+ <<~SQL
345
+ SELECT DATE(created_at) as date, COUNT(*) as count
346
+ FROM nodes
347
+ WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
348
+ GROUP BY DATE(created_at)
349
+ ORDER BY date DESC
350
+ SQL
351
+ )
352
+
353
+ puts "Activity last 30 days:"
354
+ result.each do |row|
355
+ puts "#{row['date']}: #{row['count']} memories"
356
+ end
357
+
358
+ conn.close
359
+ ```
360
+
361
+ ## Database Maintenance
362
+
363
+ ### Vacuuming
364
+
365
+ PostgreSQL requires periodic vacuuming:
366
+
367
+ ```ruby
368
+ # Manual vacuum
369
+ conn = PG.connect(HTM::Database.default_config)
370
+ conn.exec("VACUUM ANALYZE nodes")
371
+ conn.exec("VACUUM ANALYZE relationships")
372
+ conn.exec("VACUUM ANALYZE tags")
373
+ conn.close
374
+
375
+ puts "Vacuum completed"
376
+ ```
377
+
378
+ ### Reindexing
379
+
380
+ Rebuild indexes for optimal performance:
381
+
382
+ ```ruby
383
+ conn = PG.connect(HTM::Database.default_config)
384
+
385
+ # Reindex vector index
386
+ conn.exec("REINDEX INDEX idx_nodes_embedding")
387
+
388
+ # Reindex full-text
389
+ conn.exec("REINDEX INDEX idx_nodes_fulltext")
390
+
391
+ conn.close
392
+
393
+ puts "Reindexing completed"
394
+ ```
395
+
396
+ ### Compression (TimescaleDB)
397
+
398
+ TimescaleDB can compress old data:
399
+
400
+ ```ruby
401
+ # Enable compression on operations_log hypertable
402
+ conn = PG.connect(HTM::Database.default_config)
403
+
404
+ conn.exec(
405
+ <<~SQL
406
+ ALTER TABLE operations_log SET (
407
+ timescaledb.compress,
408
+ timescaledb.compress_segmentby = 'robot_id'
409
+ )
410
+ SQL
411
+ )
412
+
413
+ # Add compression policy (compress data older than 7 days)
414
+ conn.exec(
415
+ <<~SQL
416
+ SELECT add_compression_policy('operations_log', INTERVAL '7 days')
417
+ SQL
418
+ )
419
+
420
+ conn.close
421
+
422
+ puts "Compression policy enabled"
423
+ ```
424
+
425
+ ### Cleanup Old Logs
426
+
427
+ ```ruby
428
+ # Delete operations logs older than 90 days
429
+ conn = PG.connect(HTM::Database.default_config)
430
+
431
+ result = conn.exec_params(
432
+ "DELETE FROM operations_log WHERE time < $1",
433
+ [Time.now - (90 * 24 * 3600)]
434
+ )
435
+
436
+ puts "Deleted #{result.cmd_tuples} old log entries"
437
+ conn.close
438
+ ```
439
+
440
+ ## Performance Optimization
441
+
442
+ ### Analyzing Query Performance
443
+
444
+ ```ruby
445
+ # Explain query plan
446
+ conn = PG.connect(HTM::Database.default_config)
447
+
448
+ query = <<~SQL
449
+ SELECT * FROM nodes
450
+ WHERE type = 'decision'
451
+ AND importance >= 8.0
452
+ ORDER BY created_at DESC
453
+ LIMIT 10
454
+ SQL
455
+
456
+ # Get query plan
457
+ result = conn.exec("EXPLAIN ANALYZE #{query}")
458
+ puts result.values.flatten
459
+ conn.close
460
+ ```
461
+
462
+ ### Index Usage Statistics
463
+
464
+ ```ruby
465
+ # Check index usage
466
+ conn = PG.connect(HTM::Database.default_config)
467
+
468
+ result = conn.exec(
469
+ <<~SQL
470
+ SELECT
471
+ schemaname,
472
+ tablename,
473
+ indexname,
474
+ idx_scan as scans,
475
+ idx_tup_read as tuples_read,
476
+ idx_tup_fetch as tuples_fetched
477
+ FROM pg_stat_user_indexes
478
+ WHERE schemaname = 'public'
479
+ ORDER BY idx_scan DESC
480
+ SQL
481
+ )
482
+
483
+ puts "Index usage statistics:"
484
+ result.each do |row|
485
+ puts "#{row['indexname']}: #{row['scans']} scans, #{row['tuples_read']} tuples"
486
+ end
487
+
488
+ conn.close
489
+ ```
490
+
491
+ ### Table Size Analysis
492
+
493
+ ```ruby
494
+ # Check table sizes
495
+ conn = PG.connect(HTM::Database.default_config)
496
+
497
+ result = conn.exec(
498
+ <<~SQL
499
+ SELECT
500
+ tablename,
501
+ pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
502
+ FROM pg_tables
503
+ WHERE schemaname = 'public'
504
+ ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
505
+ SQL
506
+ )
507
+
508
+ puts "Table sizes:"
509
+ result.each do |row|
510
+ puts "#{row['tablename']}: #{row['size']}"
511
+ end
512
+
513
+ conn.close
514
+ ```
515
+
516
+ ### Optimizing Vector Searches
517
+
518
+ ```ruby
519
+ # HNSW index parameters can be tuned
520
+ # (This is done during index creation, shown for reference)
521
+
522
+ # m: max connections per layer (default: 16)
523
+ # ef_construction: construction time/accuracy tradeoff (default: 64)
524
+
525
+ # Example (run during schema setup):
526
+ # CREATE INDEX idx_nodes_embedding ON nodes
527
+ # USING hnsw(embedding vector_cosine_ops)
528
+ # WITH (m = 16, ef_construction = 64);
529
+
530
+ # For queries, you can adjust ef_search:
531
+ conn = PG.connect(HTM::Database.default_config)
532
+
533
+ # Higher ef_search = more accurate but slower
534
+ conn.exec("SET hnsw.ef_search = 100")
535
+
536
+ # Now run vector searches...
537
+
538
+ conn.close
539
+ ```
540
+
541
+ ## Backup and Restore
542
+
543
+ ### Backup Database
544
+
545
+ ```bash
546
+ # Full database backup
547
+ pg_dump -h localhost -U user -d database -F c -f htm_backup.dump
548
+
549
+ # Backup just the schema
550
+ pg_dump -h localhost -U user -d database -s -f htm_schema.sql
551
+
552
+ # Backup just the data
553
+ pg_dump -h localhost -U user -d database -a -f htm_data.sql
554
+ ```
555
+
556
+ ### Restore Database
557
+
558
+ ```bash
559
+ # Restore from custom format
560
+ pg_restore -h localhost -U user -d database htm_backup.dump
561
+
562
+ # Restore from SQL format
563
+ psql -h localhost -U user -d database -f htm_schema.sql
564
+ psql -h localhost -U user -d database -f htm_data.sql
565
+ ```
566
+
567
+ ### Backup Ruby Script
568
+
569
+ ```ruby
570
+ require 'open3'
571
+
572
+ def backup_database
573
+ config = HTM::Database.default_config
574
+ uri = URI.parse(config[:host])
575
+
576
+ timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
577
+ backup_file = "htm_backup_#{timestamp}.dump"
578
+
579
+ cmd = [
580
+ "pg_dump",
581
+ "-h", uri.host,
582
+ "-p", uri.port.to_s,
583
+ "-U", config[:user],
584
+ "-d", config[:dbname],
585
+ "-F", "c", # Custom format
586
+ "-f", backup_file
587
+ ].join(" ")
588
+
589
+ # Set password via environment
590
+ env = { "PGPASSWORD" => config[:password] }
591
+
592
+ stdout, stderr, status = Open3.capture3(env, cmd)
593
+
594
+ if status.success?
595
+ puts "Backup created: #{backup_file}"
596
+ backup_file
597
+ else
598
+ raise "Backup failed: #{stderr}"
599
+ end
600
+ end
601
+
602
+ # Usage
603
+ backup_database
604
+ ```
605
+
606
+ ## Monitoring and Observability
607
+
608
+ ### Connection Pooling
609
+
610
+ HTM uses connection pooling internally, but you can monitor it:
611
+
612
+ ```ruby
613
+ # Check active connections
614
+ conn = PG.connect(HTM::Database.default_config)
615
+
616
+ result = conn.exec(
617
+ <<~SQL
618
+ SELECT
619
+ count(*) as total,
620
+ count(*) FILTER (WHERE state = 'active') as active,
621
+ count(*) FILTER (WHERE state = 'idle') as idle
622
+ FROM pg_stat_activity
623
+ WHERE datname = current_database()
624
+ SQL
625
+ )
626
+
627
+ puts "Connections: #{result.first['total']}"
628
+ puts " Active: #{result.first['active']}"
629
+ puts " Idle: #{result.first['idle']}"
630
+
631
+ conn.close
632
+ ```
633
+
634
+ ### Slow Query Log
635
+
636
+ Enable slow query logging in PostgreSQL:
637
+
638
+ ```sql
639
+ -- In postgresql.conf or via SQL
640
+ ALTER DATABASE your_database SET log_min_duration_statement = 1000; -- Log queries > 1s
641
+ ```
642
+
643
+ ### Custom Monitoring
644
+
645
+ ```ruby
646
+ class DatabaseMonitor
647
+ def initialize(htm)
648
+ @htm = htm
649
+ @config = HTM::Database.default_config
650
+ end
651
+
652
+ def health_check
653
+ conn = PG.connect(@config)
654
+
655
+ # Check connectivity
656
+ result = conn.exec("SELECT 1")
657
+
658
+ # Check table accessibility
659
+ conn.exec("SELECT COUNT(*) FROM nodes")
660
+ conn.exec("SELECT COUNT(*) FROM relationships")
661
+
662
+ conn.close
663
+
664
+ { status: :healthy, message: "All checks passed" }
665
+ rescue => e
666
+ { status: :error, message: e.message }
667
+ end
668
+
669
+ def performance_report
670
+ conn = PG.connect(@config)
671
+
672
+ report = {}
673
+
674
+ # Query counts
675
+ result = conn.exec("SELECT COUNT(*) FROM nodes")
676
+ report[:total_nodes] = result.first['count'].to_i
677
+
678
+ # Table sizes
679
+ result = conn.exec(
680
+ <<~SQL
681
+ SELECT pg_size_pretty(pg_total_relation_size('nodes')) as size
682
+ SQL
683
+ )
684
+ report[:nodes_size] = result.first['size']
685
+
686
+ # Cache hit ratio
687
+ result = conn.exec(
688
+ <<~SQL
689
+ SELECT
690
+ sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
691
+ FROM pg_statio_user_tables
692
+ WHERE schemaname = 'public'
693
+ SQL
694
+ )
695
+ report[:cache_hit_ratio] = result.first['ratio'].to_f
696
+
697
+ conn.close
698
+ report
699
+ end
700
+
701
+ def alert_if_unhealthy
702
+ health = health_check
703
+
704
+ if health[:status] != :healthy
705
+ # Send alert (email, Slack, etc.)
706
+ warn "Database unhealthy: #{health[:message]}"
707
+ end
708
+ end
709
+ end
710
+
711
+ monitor = DatabaseMonitor.new(htm)
712
+ puts monitor.health_check
713
+ puts monitor.performance_report
714
+ ```
715
+
716
+ ## Best Practices
717
+
718
+ ### 1. Use Prepared Statements
719
+
720
+ ```ruby
721
+ # Good: Use parameterized queries
722
+ conn.exec_params(
723
+ "SELECT * FROM nodes WHERE robot_id = $1 AND type = $2",
724
+ [robot_id, type]
725
+ )
726
+
727
+ # Avoid: String interpolation (SQL injection risk)
728
+ # conn.exec("SELECT * FROM nodes WHERE robot_id = '#{robot_id}'")
729
+ ```
730
+
731
+ ### 2. Connection Management
732
+
733
+ ```ruby
734
+ # Good: Use HTM's internal connection handling
735
+ htm.add_node(...) # Manages connections automatically
736
+
737
+ # Advanced: Manual connections, always close
738
+ conn = PG.connect(config)
739
+ begin
740
+ # Do work
741
+ ensure
742
+ conn.close
743
+ end
744
+ ```
745
+
746
+ ### 3. Batch Operations
747
+
748
+ ```ruby
749
+ # Good: Use transactions for multiple operations
750
+ conn = PG.connect(config)
751
+ conn.transaction do |c|
752
+ 100.times do |i|
753
+ c.exec_params("INSERT INTO nodes (...) VALUES ($1, $2)", [key, value])
754
+ end
755
+ end
756
+ conn.close
757
+ ```
758
+
759
+ ### 4. Regular Maintenance
760
+
761
+ ```ruby
762
+ # Schedule regular maintenance
763
+ require 'whenever' # gem for cron jobs
764
+
765
+ # In schedule.rb
766
+ every 1.day, at: '2:00 am' do
767
+ runner "HTM::Database.vacuum_analyze"
768
+ end
769
+
770
+ every 1.week, at: '3:00 am' do
771
+ runner "HTM::Database.reindex"
772
+ end
773
+ ```
774
+
775
+ ### 5. Monitor Growth
776
+
777
+ ```ruby
778
+ # Track database growth over time
779
+ class GrowthTracker
780
+ def initialize
781
+ @log_file = "database_growth.log"
782
+ end
783
+
784
+ def log_stats
785
+ stats = htm.memory_stats
786
+
787
+ entry = {
788
+ timestamp: Time.now,
789
+ total_nodes: stats[:total_nodes],
790
+ database_size: stats[:database_size]
791
+ }
792
+
793
+ File.open(@log_file, 'a') do |f|
794
+ f.puts entry.to_json
795
+ end
796
+ end
797
+ end
798
+
799
+ # Run daily
800
+ tracker = GrowthTracker.new
801
+ tracker.log_stats
802
+ ```
803
+
804
+ ## Troubleshooting
805
+
806
+ ### Connection Issues
807
+
808
+ ```ruby
809
+ # Test connection
810
+ begin
811
+ conn = PG.connect(HTM::Database.default_config)
812
+ puts "Connection successful"
813
+ conn.close
814
+ rescue PG::Error => e
815
+ puts "Connection failed: #{e.message}"
816
+ puts "Check HTM_DBURL environment variable"
817
+ end
818
+ ```
819
+
820
+ ### Slow Queries
821
+
822
+ ```ruby
823
+ # Enable query timing
824
+ conn = PG.connect(HTM::Database.default_config)
825
+
826
+ start = Time.now
827
+ result = conn.exec("SELECT * FROM nodes WHERE type = 'decision'")
828
+ elapsed = Time.now - start
829
+
830
+ puts "Query returned #{result.ntuples} rows in #{elapsed}s"
831
+
832
+ if elapsed > 1.0
833
+ puts "Slow query detected! Consider:"
834
+ puts "- Adding indexes"
835
+ puts "- Using LIMIT"
836
+ puts "- Narrowing date range"
837
+ end
838
+
839
+ conn.close
840
+ ```
841
+
842
+ ### Disk Space Issues
843
+
844
+ ```ruby
845
+ # Check disk usage
846
+ conn = PG.connect(HTM::Database.default_config)
847
+
848
+ result = conn.exec("SELECT pg_database_size(current_database()) as size")
849
+ size_gb = result.first['size'].to_i / (1024.0 ** 3)
850
+
851
+ puts "Database size: #{size_gb.round(2)} GB"
852
+
853
+ if size_gb > 10
854
+ puts "Large database. Consider:"
855
+ puts "- Archiving old nodes"
856
+ puts "- Enabling compression"
857
+ puts "- Cleaning up operations_log"
858
+ end
859
+
860
+ conn.close
861
+ ```
862
+
863
+ ## Next Steps
864
+
865
+ - [**Working Memory**](working-memory.md) - Understand the memory tier above long-term
866
+ - [**Adding Memories**](adding-memories.md) - Learn how memories are stored
867
+ - [**Search Strategies**](search-strategies.md) - Optimize retrieval from long-term memory
868
+
869
+ ## Complete Example
870
+
871
+ ```ruby
872
+ require 'htm'
873
+ require 'pg'
874
+
875
+ # Initialize HTM
876
+ htm = HTM.new(robot_name: "Database Admin")
877
+
878
+ # Add some test data
879
+ puts "Adding test data..."
880
+ 10.times do |i|
881
+ htm.add_node(
882
+ "test_#{i}",
883
+ "Test memory number #{i}",
884
+ type: :fact,
885
+ importance: rand(1.0..10.0),
886
+ tags: ["test", "batch_#{i / 5}"]
887
+ )
888
+ end
889
+
890
+ # Get statistics
891
+ puts "\n=== Database Statistics ==="
892
+ stats = htm.memory_stats
893
+ puts "Total nodes: #{stats[:total_nodes]}"
894
+ puts "Database size: #{(stats[:database_size] / 1024.0 / 1024.0).round(2)} MB"
895
+ puts "Active robots: #{stats[:active_robots]}"
896
+
897
+ # Query by tag
898
+ puts "\n=== Query by Tag ==="
899
+ config = HTM::Database.default_config
900
+ conn = PG.connect(config)
901
+
902
+ result = conn.exec_params(
903
+ <<~SQL,
904
+ SELECT n.key, n.value
905
+ FROM nodes n
906
+ JOIN tags t ON n.id = t.node_id
907
+ WHERE t.tag = $1
908
+ SQL
909
+ ['test']
910
+ )
911
+
912
+ puts "Found #{result.ntuples} nodes with tag 'test'"
913
+ result.each do |row|
914
+ puts "- #{row['key']}: #{row['value']}"
915
+ end
916
+
917
+ # Performance check
918
+ puts "\n=== Performance Metrics ==="
919
+ result = conn.exec(
920
+ <<~SQL
921
+ SELECT
922
+ pg_size_pretty(pg_total_relation_size('nodes')) as nodes_size,
923
+ pg_size_pretty(pg_total_relation_size('relationships')) as rel_size,
924
+ pg_size_pretty(pg_total_relation_size('tags')) as tags_size
925
+ SQL
926
+ )
927
+
928
+ puts "Table sizes:"
929
+ puts " nodes: #{result.first['nodes_size']}"
930
+ puts " relationships: #{result.first['rel_size']}"
931
+ puts " tags: #{result.first['tags_size']}"
932
+
933
+ conn.close
934
+
935
+ # Cleanup test data
936
+ puts "\n=== Cleanup ==="
937
+ 10.times do |i|
938
+ htm.forget("test_#{i}", confirm: :confirmed)
939
+ end
940
+ puts "Test data removed"
941
+ ```