htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,806 @@
1
+ # Database Class
2
+
3
+ Database schema setup and configuration utilities for HTM.
4
+
5
+ ## Overview
6
+
7
+ `HTM::Database` provides class methods for setting up the HTM database schema, managing PostgreSQL connections, and configuring TimescaleDB hypertables.
8
+
9
+ **Key Features:**
10
+
11
+ - Schema creation and migration
12
+ - TimescaleDB hypertable setup
13
+ - Extension verification (TimescaleDB, pgvector, pg_trgm)
14
+ - Connection configuration parsing
15
+ - Automatic compression policies
16
+
17
+ ## Class Definition
18
+
19
+ ```ruby
20
+ class HTM::Database
21
+ # All methods are class methods
22
+ end
23
+ ```
24
+
25
+ ---
26
+
27
+ ## Class Methods
28
+
29
+ ### `setup(db_url = nil)` {: #setup }
30
+
31
+ Set up the HTM database schema and TimescaleDB hypertables.
32
+
33
+ ```ruby
34
+ HTM::Database.setup(db_url = nil)
35
+ ```
36
+
37
+ #### Parameters
38
+
39
+ | Parameter | Type | Default | Description |
40
+ |-----------|------|---------|-------------|
41
+ | `db_url` | String, nil | `ENV['HTM_DBURL']` | Database connection URL |
42
+
43
+ #### Returns
44
+
45
+ - `void`
46
+
47
+ #### Raises
48
+
49
+ - `RuntimeError` - If database configuration not found
50
+ - `PG::Error` - If database connection or schema creation fails
51
+
52
+ #### Side Effects
53
+
54
+ - Connects to PostgreSQL database
55
+ - Verifies required extensions (TimescaleDB, pgvector, pg_trgm)
56
+ - Creates schema (tables, indexes, views)
57
+ - Converts tables to hypertables
58
+ - Sets up compression policies
59
+ - Prints status messages to stdout
60
+
61
+ #### Examples
62
+
63
+ ```ruby
64
+ # Use default configuration from environment
65
+ HTM::Database.setup
66
+
67
+ # Use specific database URL
68
+ HTM::Database.setup('postgresql://user:pass@host:5432/dbname')
69
+
70
+ # Use TimescaleDB Cloud
71
+ url = 'postgresql://tsdbadmin:pass@xxx.tsdb.cloud.timescale.com:37807/tsdb?sslmode=require'
72
+ HTM::Database.setup(url)
73
+ ```
74
+
75
+ #### Output
76
+
77
+ ```
78
+ ✓ TimescaleDB version: 2.13.0
79
+ ✓ pgvector version: 0.5.1
80
+ ✓ pg_trgm version: 1.6
81
+ Creating HTM schema...
82
+ ✓ Schema created
83
+ ✓ Created hypertable for operations_log
84
+ ✓ Created hypertable for nodes
85
+ ✓ Enabled compression for nodes older than 30 days
86
+ ✓ HTM database schema created successfully
87
+ ```
88
+
89
+ ---
90
+
91
+ ### `parse_connection_url(url)` {: #parse_connection_url }
92
+
93
+ Parse a PostgreSQL connection URL into a configuration hash.
94
+
95
+ ```ruby
96
+ HTM::Database.parse_connection_url(url)
97
+ ```
98
+
99
+ #### Parameters
100
+
101
+ | Parameter | Type | Description |
102
+ |-----------|------|-------------|
103
+ | `url` | String | PostgreSQL connection URL |
104
+
105
+ #### Returns
106
+
107
+ - `Hash` - Connection configuration
108
+ - `nil` - If url is nil
109
+
110
+ #### Hash Structure
111
+
112
+ ```ruby
113
+ {
114
+ host: "hostname",
115
+ port: 5432,
116
+ dbname: "database_name",
117
+ user: "username",
118
+ password: "password",
119
+ sslmode: "require" # or from URL params, default "prefer"
120
+ }
121
+ ```
122
+
123
+ #### Examples
124
+
125
+ ```ruby
126
+ # Standard PostgreSQL URL
127
+ url = 'postgresql://user:pass@localhost:5432/mydb'
128
+ config = HTM::Database.parse_connection_url(url)
129
+ # => {
130
+ # host: "localhost",
131
+ # port: 5432,
132
+ # dbname: "mydb",
133
+ # user: "user",
134
+ # password: "pass",
135
+ # sslmode: "prefer"
136
+ # }
137
+
138
+ # With SSL mode
139
+ url = 'postgresql://user:pass@host:5432/db?sslmode=require'
140
+ config = HTM::Database.parse_connection_url(url)
141
+ # => { ..., sslmode: "require" }
142
+
143
+ # TimescaleDB Cloud URL
144
+ url = 'postgresql://tsdbadmin:secret@xxx.tsdb.cloud.timescale.com:37807/tsdb?sslmode=require'
145
+ config = HTM::Database.parse_connection_url(url)
146
+ # => {
147
+ # host: "xxx.tsdb.cloud.timescale.com",
148
+ # port: 37807,
149
+ # dbname: "tsdb",
150
+ # user: "tsdbadmin",
151
+ # password: "secret",
152
+ # sslmode: "require"
153
+ # }
154
+
155
+ # Nil handling
156
+ config = HTM::Database.parse_connection_url(nil)
157
+ # => nil
158
+ ```
159
+
160
+ ---
161
+
162
+ ### `parse_connection_params()` {: #parse_connection_params }
163
+
164
+ Build configuration from individual environment variables.
165
+
166
+ ```ruby
167
+ HTM::Database.parse_connection_params()
168
+ ```
169
+
170
+ #### Returns
171
+
172
+ - `Hash` - Connection configuration
173
+ - `nil` - If `ENV['HTM_DBNAME']` not set
174
+
175
+ #### Environment Variables
176
+
177
+ | Variable | Description | Default |
178
+ |----------|-------------|---------|
179
+ | `HTM_DBHOST` | Database hostname | `'cw7rxj91bm.srbbwwxn56.tsdb.cloud.timescale.com'` |
180
+ | `HTM_DBPORT` | Database port | `37807` |
181
+ | `HTM_DBNAME` | Database name | *required* |
182
+ | `HTM_DBUSER` | Database user | *required* |
183
+ | `HTM_DBPASS` | Database password | *required* |
184
+
185
+ #### Examples
186
+
187
+ ```ruby
188
+ # Set environment variables
189
+ ENV['HTM_DBNAME'] = 'tsdb'
190
+ ENV['HTM_DBUSER'] = 'tsdbadmin'
191
+ ENV['HTM_DBPASS'] = 'secret'
192
+
193
+ config = HTM::Database.parse_connection_params()
194
+ # => {
195
+ # host: "cw7rxj91bm.srbbwwxn56.tsdb.cloud.timescale.com",
196
+ # port: 37807,
197
+ # dbname: "tsdb",
198
+ # user: "tsdbadmin",
199
+ # password: "secret",
200
+ # sslmode: "require"
201
+ # }
202
+
203
+ # Custom host and port
204
+ ENV['HTM_DBHOST'] = 'localhost'
205
+ ENV['HTM_DBPORT'] = '5432'
206
+
207
+ config = HTM::Database.parse_connection_params()
208
+ # => { host: "localhost", port: 5432, ... }
209
+
210
+ # Without HTM_DBNAME
211
+ ENV.delete('HTM_DBNAME')
212
+ config = HTM::Database.parse_connection_params()
213
+ # => nil
214
+ ```
215
+
216
+ ---
217
+
218
+ ### `default_config()` {: #default_config }
219
+
220
+ Get default database configuration from environment.
221
+
222
+ ```ruby
223
+ HTM::Database.default_config()
224
+ ```
225
+
226
+ #### Returns
227
+
228
+ - `Hash` - Connection configuration
229
+ - `nil` - If no configuration found
230
+
231
+ #### Priority Order
232
+
233
+ 1. `ENV['HTM_DBURL']` - Parse connection URL
234
+ 2. `ENV['HTM_DBNAME']` - Parse individual params
235
+ 3. `nil` - No configuration available
236
+
237
+ #### Examples
238
+
239
+ ```ruby
240
+ # Using HTM_DBURL
241
+ ENV['HTM_DBURL'] = 'postgresql://user:pass@host/db'
242
+ config = HTM::Database.default_config
243
+ # => Parsed from URL
244
+
245
+ # Using HTM_DBNAME
246
+ ENV.delete('HTM_DBURL')
247
+ ENV['HTM_DBNAME'] = 'mydb'
248
+ ENV['HTM_DBUSER'] = 'user'
249
+ ENV['HTM_DBPASS'] = 'pass'
250
+ config = HTM::Database.default_config
251
+ # => Parsed from params
252
+
253
+ # No configuration
254
+ ENV.delete('HTM_DBURL')
255
+ ENV.delete('HTM_DBNAME')
256
+ config = HTM::Database.default_config
257
+ # => nil
258
+
259
+ # Use in HTM initialization
260
+ htm = HTM.new(db_config: HTM::Database.default_config)
261
+ ```
262
+
263
+ ---
264
+
265
+ ## Database Schema
266
+
267
+ ### Tables
268
+
269
+ #### `nodes`
270
+
271
+ Primary memory storage table (hypertable partitioned by `created_at`).
272
+
273
+ ```sql
274
+ CREATE TABLE nodes (
275
+ id SERIAL PRIMARY KEY,
276
+ key TEXT UNIQUE NOT NULL,
277
+ value TEXT NOT NULL,
278
+ type TEXT,
279
+ category TEXT,
280
+ importance REAL DEFAULT 1.0,
281
+ token_count INTEGER DEFAULT 0,
282
+ robot_id TEXT NOT NULL REFERENCES robots(id),
283
+ embedding vector(1536),
284
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
285
+ last_accessed TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
286
+ in_working_memory BOOLEAN DEFAULT TRUE,
287
+ evicted_at TIMESTAMPTZ
288
+ );
289
+
290
+ -- Indexes
291
+ CREATE UNIQUE INDEX idx_nodes_key ON nodes(key);
292
+ CREATE INDEX idx_nodes_created_at ON nodes(created_at DESC);
293
+ CREATE INDEX idx_nodes_robot_id ON nodes(robot_id);
294
+ CREATE INDEX idx_nodes_type ON nodes(type);
295
+ CREATE INDEX idx_nodes_embedding ON nodes USING ivfflat (embedding vector_cosine_ops);
296
+ CREATE INDEX idx_nodes_value_fts ON nodes USING GIN (to_tsvector('english', value));
297
+ ```
298
+
299
+ #### `relationships`
300
+
301
+ Node relationship graph.
302
+
303
+ ```sql
304
+ CREATE TABLE relationships (
305
+ id SERIAL PRIMARY KEY,
306
+ from_node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
307
+ to_node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
308
+ relationship_type TEXT,
309
+ strength REAL DEFAULT 1.0,
310
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
311
+ UNIQUE(from_node_id, to_node_id, relationship_type)
312
+ );
313
+
314
+ -- Indexes
315
+ CREATE INDEX idx_relationships_from ON relationships(from_node_id);
316
+ CREATE INDEX idx_relationships_to ON relationships(to_node_id);
317
+ ```
318
+
319
+ #### `tags`
320
+
321
+ Flexible tagging system.
322
+
323
+ ```sql
324
+ CREATE TABLE tags (
325
+ id SERIAL PRIMARY KEY,
326
+ node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
327
+ tag TEXT NOT NULL,
328
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
329
+ UNIQUE(node_id, tag)
330
+ );
331
+
332
+ -- Indexes
333
+ CREATE INDEX idx_tags_node_id ON tags(node_id);
334
+ CREATE INDEX idx_tags_tag ON tags(tag);
335
+ ```
336
+
337
+ #### `robots`
338
+
339
+ Robot registry for multi-robot tracking.
340
+
341
+ ```sql
342
+ CREATE TABLE robots (
343
+ id TEXT PRIMARY KEY,
344
+ name TEXT NOT NULL,
345
+ created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
346
+ last_active TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
347
+ );
348
+ ```
349
+
350
+ #### `operations_log`
351
+
352
+ Audit log for all operations (hypertable partitioned by `timestamp`).
353
+
354
+ ```sql
355
+ CREATE TABLE operations_log (
356
+ id SERIAL,
357
+ operation TEXT NOT NULL,
358
+ node_id INTEGER REFERENCES nodes(id) ON DELETE SET NULL,
359
+ robot_id TEXT NOT NULL REFERENCES robots(id),
360
+ timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
361
+ details JSONB
362
+ );
363
+
364
+ -- Indexes
365
+ CREATE INDEX idx_operations_log_timestamp ON operations_log(timestamp DESC);
366
+ CREATE INDEX idx_operations_log_robot_id ON operations_log(robot_id);
367
+ CREATE INDEX idx_operations_log_operation ON operations_log(operation);
368
+ ```
369
+
370
+ ### Views
371
+
372
+ #### `node_stats`
373
+
374
+ Aggregate statistics by node type.
375
+
376
+ ```sql
377
+ CREATE VIEW node_stats AS
378
+ SELECT
379
+ type,
380
+ COUNT(*) as count,
381
+ AVG(importance) as avg_importance,
382
+ MIN(created_at) as oldest,
383
+ MAX(created_at) as newest
384
+ FROM nodes
385
+ GROUP BY type;
386
+ ```
387
+
388
+ #### `robot_activity`
389
+
390
+ Robot activity summary.
391
+
392
+ ```sql
393
+ CREATE VIEW robot_activity AS
394
+ SELECT
395
+ id,
396
+ name,
397
+ last_active,
398
+ (SELECT COUNT(*) FROM nodes WHERE robot_id = robots.id) as node_count
399
+ FROM robots
400
+ ORDER BY last_active DESC;
401
+ ```
402
+
403
+ ---
404
+
405
+ ## TimescaleDB Hypertables
406
+
407
+ ### `nodes` Hypertable
408
+
409
+ ```sql
410
+ SELECT create_hypertable('nodes', 'created_at',
411
+ if_not_exists => TRUE,
412
+ migrate_data => TRUE
413
+ );
414
+ ```
415
+
416
+ **Partitioning**: By `created_at` timestamp
417
+
418
+ **Chunk Interval**: 7 days (default)
419
+
420
+ **Compression**:
421
+
422
+ ```sql
423
+ ALTER TABLE nodes SET (
424
+ timescaledb.compress,
425
+ timescaledb.compress_segmentby = 'robot_id,type'
426
+ );
427
+
428
+ SELECT add_compression_policy('nodes', INTERVAL '30 days',
429
+ if_not_exists => TRUE
430
+ );
431
+ ```
432
+
433
+ - Automatically compresses chunks older than 30 days
434
+ - Segments by `robot_id` and `type` for efficient queries
435
+ - Reduces storage by ~90% for old data
436
+
437
+ ### `operations_log` Hypertable
438
+
439
+ ```sql
440
+ SELECT create_hypertable('operations_log', 'timestamp',
441
+ if_not_exists => TRUE,
442
+ migrate_data => TRUE
443
+ );
444
+ ```
445
+
446
+ **Partitioning**: By `timestamp`
447
+
448
+ **Chunk Interval**: 1 day (default)
449
+
450
+ **Benefits**:
451
+
452
+ - Fast time-range queries
453
+ - Automatic data retention policies (can be added)
454
+ - Optimized for append-only workload
455
+
456
+ ---
457
+
458
+ ## Required PostgreSQL Extensions
459
+
460
+ ### TimescaleDB
461
+
462
+ Time-series database extension.
463
+
464
+ ```sql
465
+ CREATE EXTENSION IF NOT EXISTS timescaledb;
466
+ ```
467
+
468
+ **Features Used**:
469
+
470
+ - Hypertables for time-series optimization
471
+ - Automatic chunking and partitioning
472
+ - Compression policies
473
+ - Continuous aggregates (planned)
474
+
475
+ ### pgvector
476
+
477
+ Vector similarity search.
478
+
479
+ ```sql
480
+ CREATE EXTENSION IF NOT EXISTS vector;
481
+ ```
482
+
483
+ **Features Used**:
484
+
485
+ - `vector(1536)` data type for embeddings
486
+ - Cosine similarity operator `<=>`
487
+ - IVFFlat index for approximate nearest neighbor
488
+
489
+ ### pg_trgm
490
+
491
+ Trigram-based text search.
492
+
493
+ ```sql
494
+ CREATE EXTENSION IF NOT EXISTS pg_trgm;
495
+ ```
496
+
497
+ **Features Used**:
498
+
499
+ - Full-text search with fuzzy matching
500
+ - GIN indexes for fast text queries
501
+ - Similarity ranking
502
+
503
+ ---
504
+
505
+ ## Setup Process
506
+
507
+ ### 1. Verify Extensions
508
+
509
+ ```ruby
510
+ # Check TimescaleDB
511
+ timescale = conn.exec("SELECT extversion FROM pg_extension WHERE extname='timescaledb'").first
512
+ # => {"extversion"=>"2.13.0"}
513
+
514
+ # Check pgvector
515
+ pgvector = conn.exec("SELECT extversion FROM pg_extension WHERE extname='vector'").first
516
+ # => {"extversion"=>"0.5.1"}
517
+
518
+ # Check pg_trgm
519
+ pg_trgm = conn.exec("SELECT extversion FROM pg_extension WHERE extname='pg_trgm'").first
520
+ # => {"extversion"=>"1.6"}
521
+ ```
522
+
523
+ ### 2. Run Schema
524
+
525
+ Reads and executes `sql/schema.sql` from the repository:
526
+
527
+ - Creates tables
528
+ - Creates indexes
529
+ - Creates views
530
+ - Sets up constraints
531
+
532
+ Note: `CREATE EXTENSION` lines are filtered out (extensions must be pre-installed).
533
+
534
+ ### 3. Setup Hypertables
535
+
536
+ Converts tables to hypertables:
537
+
538
+ ```ruby
539
+ # operations_log
540
+ conn.exec("SELECT create_hypertable('operations_log', 'timestamp', if_not_exists => TRUE, migrate_data => TRUE)")
541
+
542
+ # nodes (with compression)
543
+ conn.exec("SELECT create_hypertable('nodes', 'created_at', if_not_exists => TRUE, migrate_data => TRUE)")
544
+ conn.exec("ALTER TABLE nodes SET (timescaledb.compress, timescaledb.compress_segmentby = 'robot_id,type')")
545
+ conn.exec("SELECT add_compression_policy('nodes', INTERVAL '30 days', if_not_exists => TRUE)")
546
+ ```
547
+
548
+ ---
549
+
550
+ ## Environment Configuration
551
+
552
+ ### TimescaleDB Cloud
553
+
554
+ Using URL (recommended):
555
+
556
+ ```bash
557
+ # In ~/.bashrc__tiger
558
+ export HTM_DBURL='postgresql://tsdbadmin:PASSWORD@SERVICE.tsdb.cloud.timescale.com:37807/tsdb?sslmode=require'
559
+ ```
560
+
561
+ Using individual variables:
562
+
563
+ ```bash
564
+ # In ~/.bashrc__tiger
565
+ export HTM_DBHOST='xxx.tsdb.cloud.timescale.com'
566
+ export HTM_DBPORT=37807
567
+ export HTM_DBNAME='tsdb'
568
+ export HTM_DBUSER='tsdbadmin'
569
+ export HTM_DBPASS='your_password'
570
+ ```
571
+
572
+ ### Local PostgreSQL
573
+
574
+ ```bash
575
+ export HTM_DBURL='postgresql://localhost/htm_dev'
576
+
577
+ # Or with auth
578
+ export HTM_DBURL='postgresql://user:pass@localhost:5432/htm_dev'
579
+ ```
580
+
581
+ ### Docker PostgreSQL
582
+
583
+ ```bash
584
+ export HTM_DBURL='postgresql://postgres:postgres@localhost:5432/htm'
585
+ ```
586
+
587
+ ---
588
+
589
+ ## Usage Examples
590
+
591
+ ### Initial Setup
592
+
593
+ ```ruby
594
+ # First time setup
595
+ require 'htm'
596
+
597
+ HTM::Database.setup
598
+ # Creates all tables, indexes, hypertables
599
+
600
+ # Verify
601
+ config = HTM::Database.default_config
602
+ conn = PG.connect(config)
603
+ result = conn.exec("SELECT COUNT(*) FROM nodes")
604
+ conn.close
605
+ ```
606
+
607
+ ### Configuration Management
608
+
609
+ ```ruby
610
+ # Get current config
611
+ config = HTM::Database.default_config
612
+
613
+ if config
614
+ puts "Database: #{config[:dbname]}"
615
+ puts "Host: #{config[:host]}"
616
+ puts "Port: #{config[:port]}"
617
+ else
618
+ puts "No database configuration found"
619
+ puts "Please set HTM_DBURL or HTM_DBNAME environment variables"
620
+ end
621
+
622
+ # Test connection
623
+ begin
624
+ conn = PG.connect(config)
625
+ version = conn.exec("SELECT version()").first['version']
626
+ puts "Connected: #{version}"
627
+ conn.close
628
+ rescue PG::Error => e
629
+ puts "Connection failed: #{e.message}"
630
+ end
631
+ ```
632
+
633
+ ### Schema Migration
634
+
635
+ ```ruby
636
+ # Check if schema exists
637
+ config = HTM::Database.default_config
638
+ conn = PG.connect(config)
639
+
640
+ tables = conn.exec(<<~SQL).to_a
641
+ SELECT table_name
642
+ FROM information_schema.tables
643
+ WHERE table_schema = 'public'
644
+ AND table_name IN ('nodes', 'robots', 'relationships', 'tags', 'operations_log')
645
+ SQL
646
+
647
+ if tables.empty?
648
+ puts "Schema not found, running setup..."
649
+ HTM::Database.setup
650
+ else
651
+ puts "Schema already exists:"
652
+ tables.each { |t| puts " - #{t['table_name']}" }
653
+ end
654
+
655
+ conn.close
656
+ ```
657
+
658
+ ### Custom Database
659
+
660
+ ```ruby
661
+ # Use non-standard database
662
+ custom_url = 'postgresql://app:secret@db.example.com:5432/production'
663
+
664
+ HTM::Database.setup(custom_url)
665
+
666
+ # Use with HTM
667
+ config = HTM::Database.parse_connection_url(custom_url)
668
+ htm = HTM.new(db_config: config)
669
+ ```
670
+
671
+ ---
672
+
673
+ ## Troubleshooting
674
+
675
+ ### Extensions Not Available
676
+
677
+ ```
678
+ ⚠ Warning: TimescaleDB extension not found
679
+ ⚠ Warning: pgvector extension not found
680
+ ```
681
+
682
+ **Solution**: Install required extensions:
683
+
684
+ ```bash
685
+ # Ubuntu/Debian
686
+ sudo apt install postgresql-15-timescaledb postgresql-15-pgvector
687
+
688
+ # macOS with Homebrew
689
+ brew install timescaledb pgvector
690
+
691
+ # Or use TimescaleDB Cloud (extensions pre-installed)
692
+ ```
693
+
694
+ ### Connection Refused
695
+
696
+ ```
697
+ PG::ConnectionBad: could not connect to server: Connection refused
698
+ ```
699
+
700
+ **Solution**: Verify PostgreSQL is running and connection details:
701
+
702
+ ```bash
703
+ # Check PostgreSQL status
704
+ pg_isready -h localhost -p 5432
705
+
706
+ # Test connection
707
+ psql -h localhost -U user -d dbname
708
+
709
+ # Verify environment
710
+ echo $HTM_DBURL
711
+ ```
712
+
713
+ ### Permission Denied
714
+
715
+ ```
716
+ PG::InsufficientPrivilege: ERROR: permission denied for schema public
717
+ ```
718
+
719
+ **Solution**: Grant necessary permissions:
720
+
721
+ ```sql
722
+ GRANT ALL ON SCHEMA public TO your_user;
723
+ GRANT ALL ON ALL TABLES IN SCHEMA public TO your_user;
724
+ ```
725
+
726
+ ### Hypertable Already Exists
727
+
728
+ ```
729
+ Note: nodes hypertable: table "nodes" is already a hypertable
730
+ ```
731
+
732
+ This is **not an error** - the schema setup is idempotent. Safe to ignore.
733
+
734
+ ---
735
+
736
+ ## Best Practices
737
+
738
+ ### 1. Use Environment Variables
739
+
740
+ ```ruby
741
+ # Good: Use environment variables
742
+ HTM::Database.setup
743
+
744
+ # Avoid: Hardcoded credentials
745
+ HTM::Database.setup('postgresql://user:password@host/db')
746
+ ```
747
+
748
+ ### 2. Verify Extensions First
749
+
750
+ ```ruby
751
+ # Check extensions before setup
752
+ config = HTM::Database.default_config
753
+ conn = PG.connect(config)
754
+
755
+ required = ['timescaledb', 'vector', 'pg_trgm']
756
+ missing = required.reject do |ext|
757
+ !conn.exec("SELECT 1 FROM pg_extension WHERE extname='#{ext}'").first
758
+ end
759
+
760
+ if missing.any?
761
+ puts "Missing extensions: #{missing.join(', ')}"
762
+ puts "Please install before running setup"
763
+ exit 1
764
+ end
765
+
766
+ conn.close
767
+ HTM::Database.setup
768
+ ```
769
+
770
+ ### 3. Run Setup Once
771
+
772
+ ```ruby
773
+ # Run setup in a migration or initial deployment
774
+ # Not on every application start
775
+
776
+ # Bad:
777
+ def initialize
778
+ HTM::Database.setup # Don't do this
779
+ @htm = HTM.new
780
+ end
781
+
782
+ # Good:
783
+ # Run once during deployment:
784
+ # rake db:setup -> HTM::Database.setup
785
+ ```
786
+
787
+ ### 4. Handle Missing Configuration
788
+
789
+ ```ruby
790
+ config = HTM::Database.default_config
791
+
792
+ unless config
793
+ raise "Database not configured. Please set HTM_DBURL environment variable. " \
794
+ "See README.md for configuration instructions."
795
+ end
796
+ ```
797
+
798
+ ---
799
+
800
+ ## See Also
801
+
802
+ - [HTM API](htm.md) - Main class that uses Database config
803
+ - [LongTermMemory API](long-term-memory.md) - Uses database for storage
804
+ - [Database Schema](../development/schema.md) - Complete schema documentation
805
+ - [TimescaleDB Documentation](https://docs.timescale.com/) - Hypertable features
806
+ - [pgvector Documentation](https://github.com/pgvector/pgvector) - Vector search