htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,596 @@
1
+ # Database Schema Documentation
2
+
3
+ This document provides a comprehensive reference for HTM's PostgreSQL database schema, including all tables, indexes, and relationships.
4
+
5
+ ## Schema Overview
6
+
7
+ HTM uses PostgreSQL 17 with pgvector and pg_trgm extensions to provide:
8
+
9
+ - **Vector similarity search** via pgvector for semantic memory retrieval
10
+ - **Full-text search** with PostgreSQL's built-in tsvector capabilities
11
+ - **Fuzzy matching** using pg_trgm for flexible text search
12
+ - **Many-to-many relationships** for flexible tagging and categorization
13
+
14
+ ### Required Extensions
15
+
16
+ HTM requires these PostgreSQL extensions:
17
+
18
+ ```sql
19
+ CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
20
+ CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
21
+ ```
22
+
23
+ ## Entity-Relationship Diagram
24
+
25
+ Here's the complete database structure:
26
+
27
+ ```svg
28
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 900" style="background: transparent;">
29
+ <defs>
30
+ <style>
31
+ .table-box { fill: #1e1e1e; stroke: #4a9eff; stroke-width: 2; }
32
+ .table-header { fill: #2d5a8e; }
33
+ .text-header { fill: #ffffff; font-family: monospace; font-size: 14px; font-weight: bold; }
34
+ .text-field { fill: #d4d4d4; font-family: monospace; font-size: 11px; }
35
+ .text-type { fill: #8cb4e8; font-family: monospace; font-size: 10px; }
36
+ .relation-line { stroke: #4a9eff; stroke-width: 1.5; fill: none; }
37
+ .arrow { fill: #4a9eff; }
38
+ .join-table { fill: #1e3a1e; stroke: #4a9eff; stroke-width: 2; }
39
+ </style>
40
+ </defs>
41
+
42
+ <!-- Robots Table -->
43
+ <rect class="table-box" x="50" y="50" width="280" height="140" rx="5"/>
44
+ <rect class="table-header" x="50" y="50" width="280" height="35" rx="5"/>
45
+ <text class="text-header" x="190" y="73" text-anchor="middle">robots</text>
46
+
47
+ <text class="text-field" x="60" y="100">id</text>
48
+ <text class="text-type" x="320" y="100" text-anchor="end">BIGSERIAL PK</text>
49
+
50
+ <text class="text-field" x="60" y="120">name</text>
51
+ <text class="text-type" x="320" y="120" text-anchor="end">TEXT</text>
52
+
53
+ <text class="text-field" x="60" y="140">created_at</text>
54
+ <text class="text-type" x="320" y="140" text-anchor="end">TIMESTAMPTZ</text>
55
+
56
+ <text class="text-field" x="60" y="160">last_active</text>
57
+ <text class="text-type" x="320" y="160" text-anchor="end">TIMESTAMPTZ</text>
58
+
59
+ <text class="text-field" x="60" y="180">metadata</text>
60
+ <text class="text-type" x="320" y="180" text-anchor="end">JSONB</text>
61
+
62
+ <!-- Nodes Table -->
63
+ <rect class="table-box" x="50" y="250" width="280" height="400" rx="5"/>
64
+ <rect class="table-header" x="50" y="250" width="280" height="35" rx="5"/>
65
+ <text class="text-header" x="190" y="273" text-anchor="middle">nodes</text>
66
+
67
+ <text class="text-field" x="60" y="300">id</text>
68
+ <text class="text-type" x="320" y="300" text-anchor="end">BIGSERIAL PK</text>
69
+
70
+ <text class="text-field" x="60" y="320">content</text>
71
+ <text class="text-type" x="320" y="320" text-anchor="end">TEXT NOT NULL</text>
72
+
73
+ <text class="text-field" x="60" y="340">speaker</text>
74
+ <text class="text-type" x="320" y="340" text-anchor="end">TEXT NOT NULL</text>
75
+
76
+ <text class="text-field" x="60" y="360">type</text>
77
+ <text class="text-type" x="320" y="360" text-anchor="end">TEXT</text>
78
+
79
+ <text class="text-field" x="60" y="380">category</text>
80
+ <text class="text-type" x="320" y="380" text-anchor="end">TEXT</text>
81
+
82
+ <text class="text-field" x="60" y="400">importance</text>
83
+ <text class="text-type" x="320" y="400" text-anchor="end">DOUBLE PRECISION</text>
84
+
85
+ <text class="text-field" x="60" y="420">created_at</text>
86
+ <text class="text-type" x="320" y="420" text-anchor="end">TIMESTAMPTZ</text>
87
+
88
+ <text class="text-field" x="60" y="440">updated_at</text>
89
+ <text class="text-type" x="320" y="440" text-anchor="end">TIMESTAMPTZ</text>
90
+
91
+ <text class="text-field" x="60" y="460">last_accessed</text>
92
+ <text class="text-type" x="320" y="460" text-anchor="end">TIMESTAMPTZ</text>
93
+
94
+ <text class="text-field" x="60" y="480">token_count</text>
95
+ <text class="text-type" x="320" y="480" text-anchor="end">INTEGER</text>
96
+
97
+ <text class="text-field" x="60" y="500">in_working_memory</text>
98
+ <text class="text-type" x="320" y="500" text-anchor="end">BOOLEAN</text>
99
+
100
+ <text class="text-field" x="60" y="520">robot_id</text>
101
+ <text class="text-type" x="320" y="520" text-anchor="end">BIGINT FK</text>
102
+
103
+ <text class="text-field" x="60" y="540">embedding</text>
104
+ <text class="text-type" x="320" y="540" text-anchor="end">vector(2000)</text>
105
+
106
+ <text class="text-field" x="60" y="560">embedding_dimension</text>
107
+ <text class="text-type" x="320" y="560" text-anchor="end">INTEGER</text>
108
+
109
+ <!-- Tags Table -->
110
+ <rect class="table-box" x="850" y="250" width="280" height="120" rx="5"/>
111
+ <rect class="table-header" x="850" y="250" width="280" height="35" rx="5"/>
112
+ <text class="text-header" x="990" y="273" text-anchor="middle">tags</text>
113
+
114
+ <text class="text-field" x="860" y="300">id</text>
115
+ <text class="text-type" x="1120" y="300" text-anchor="end">BIGSERIAL PK</text>
116
+
117
+ <text class="text-field" x="860" y="320">name</text>
118
+ <text class="text-type" x="1120" y="320" text-anchor="end">TEXT UNIQUE</text>
119
+
120
+ <text class="text-field" x="860" y="340">created_at</text>
121
+ <text class="text-type" x="1120" y="340" text-anchor="end">TIMESTAMPTZ</text>
122
+
123
+ <!-- nodes_tags Join Table -->
124
+ <rect class="join-table" x="450" y="420" width="280" height="140" rx="5"/>
125
+ <rect class="table-header" x="450" y="420" width="280" height="35" rx="5"/>
126
+ <text class="text-header" x="590" y="443" text-anchor="middle">nodes_tags</text>
127
+
128
+ <text class="text-field" x="460" y="470">id</text>
129
+ <text class="text-type" x="720" y="470" text-anchor="end">BIGSERIAL PK</text>
130
+
131
+ <text class="text-field" x="460" y="490">node_id</text>
132
+ <text class="text-type" x="720" y="490" text-anchor="end">BIGINT FK</text>
133
+
134
+ <text class="text-field" x="460" y="510">tag_id</text>
135
+ <text class="text-type" x="720" y="510" text-anchor="end">BIGINT FK</text>
136
+
137
+ <text class="text-field" x="460" y="530">created_at</text>
138
+ <text class="text-type" x="720" y="530" text-anchor="end">TIMESTAMPTZ</text>
139
+
140
+ <!-- Relationships: robots -> nodes -->
141
+ <path class="relation-line" d="M 190 190 L 190 250"/>
142
+ <polygon class="arrow" points="190,250 185,240 195,240"/>
143
+
144
+ <!-- Relationships: nodes -> nodes_tags -->
145
+ <path class="relation-line" d="M 330 490 L 450 490"/>
146
+ <polygon class="arrow" points="450,490 440,485 440,495"/>
147
+
148
+ <!-- Relationships: tags -> nodes_tags -->
149
+ <path class="relation-line" d="M 850 310 L 730 310 L 730 510 L 730 510"/>
150
+ <polygon class="arrow" points="730,510 725,500 735,500"/>
151
+
152
+ <!-- Legend -->
153
+ <text class="text-field" x="50" y="720" font-weight="bold">Legend:</text>
154
+ <text class="text-field" x="50" y="740">PK = Primary Key</text>
155
+ <text class="text-field" x="200" y="740">FK = Foreign Key</text>
156
+ <text class="text-field" x="50" y="760">Green box = Join table (many-to-many)</text>
157
+
158
+ <!-- Annotations -->
159
+ <text class="text-field" x="400" y="370" font-style="italic">1:N</text>
160
+ <text class="text-field" x="380" y="480" font-style="italic">N:M</text>
161
+ <text class="text-field" x="770" y="480" font-style="italic">N:M</text>
162
+ </svg>
163
+ ```
164
+
165
+ ## Table Definitions
166
+
167
+ ### robots
168
+
169
+ The robots table stores registration and metadata for all LLM agents using the HTM system.
170
+
171
+ **Purpose**: Registry of all robots (LLM agents) with their configuration and activity tracking.
172
+
173
+ ```sql
174
+ CREATE TABLE public.robots (
175
+ id bigint NOT NULL,
176
+ name text,
177
+ created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
178
+ last_active timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
179
+ metadata jsonb
180
+ );
181
+
182
+ ALTER TABLE ONLY public.robots ALTER COLUMN id SET DEFAULT nextval('public.robots_id_seq'::regclass);
183
+ ALTER TABLE ONLY public.robots ADD CONSTRAINT robots_pkey PRIMARY KEY (id);
184
+ ```
185
+
186
+ **Columns**:
187
+
188
+ | Column | Type | Nullable | Default | Description |
189
+ |--------|------|----------|---------|-------------|
190
+ | `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
191
+ | `name` | TEXT | YES | NULL | Human-readable name for the robot |
192
+ | `created_at` | TIMESTAMPTZ | YES | NOW() | When the robot was first registered |
193
+ | `last_active` | TIMESTAMPTZ | YES | NOW() | Last time the robot accessed the system |
194
+ | `metadata` | JSONB | YES | NULL | Robot-specific configuration and metadata |
195
+
196
+ **Indexes**:
197
+ - `PRIMARY KEY` on `id`
198
+
199
+ **Relationships**:
200
+ - One robot has many nodes (1:N)
201
+
202
+ ---
203
+
204
+ ### nodes
205
+
206
+ The core table storing all memory nodes with vector embeddings for semantic search.
207
+
208
+ **Purpose**: Stores all memories (conversation messages, facts, decisions, code, etc.) with full-text and vector search capabilities.
209
+
210
+ ```sql
211
+ CREATE TABLE public.nodes (
212
+ id bigint NOT NULL,
213
+ content text NOT NULL,
214
+ speaker text NOT NULL,
215
+ type text,
216
+ category text,
217
+ importance double precision DEFAULT 1.0,
218
+ created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
219
+ updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
220
+ last_accessed timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
221
+ token_count integer,
222
+ in_working_memory boolean DEFAULT false,
223
+ robot_id bigint NOT NULL,
224
+ embedding public.vector(2000),
225
+ embedding_dimension integer,
226
+ CONSTRAINT check_embedding_dimension CHECK (((embedding_dimension IS NULL) OR ((embedding_dimension > 0) AND (embedding_dimension <= 2000))))
227
+ );
228
+
229
+ ALTER TABLE ONLY public.nodes ALTER COLUMN id SET DEFAULT nextval('public.nodes_id_seq'::regclass);
230
+ ALTER TABLE ONLY public.nodes ADD CONSTRAINT nodes_pkey PRIMARY KEY (id);
231
+ ALTER TABLE ONLY public.nodes
232
+ ADD CONSTRAINT fk_rails_60162e9d3a FOREIGN KEY (robot_id) REFERENCES public.robots(id) ON DELETE CASCADE;
233
+ ```
234
+
235
+ **Columns**:
236
+
237
+ | Column | Type | Nullable | Default | Description |
238
+ |--------|------|----------|---------|-------------|
239
+ | `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
240
+ | `content` | TEXT | NO | - | The conversation message/utterance content |
241
+ | `speaker` | TEXT | NO | - | Who said it: user or robot name |
242
+ | `type` | TEXT | YES | NULL | Memory type: fact, context, code, preference, decision, question |
243
+ | `category` | TEXT | YES | NULL | Optional category for organizing memories |
244
+ | `importance` | DOUBLE PRECISION | YES | 1.0 | Importance score (0.0-1.0) for prioritizing recall |
245
+ | `created_at` | TIMESTAMPTZ | YES | NOW() | When this memory was created |
246
+ | `updated_at` | TIMESTAMPTZ | YES | NOW() | When this memory was last modified |
247
+ | `last_accessed` | TIMESTAMPTZ | YES | NOW() | When this memory was last accessed |
248
+ | `token_count` | INTEGER | YES | NULL | Number of tokens in the content (for context budget management) |
249
+ | `in_working_memory` | BOOLEAN | YES | FALSE | Whether this memory is currently in working memory |
250
+ | `robot_id` | BIGINT | NO | - | ID of the robot that owns this memory |
251
+ | `embedding` | vector(2000) | YES | NULL | Vector embedding (max 2000 dimensions) for semantic search |
252
+ | `embedding_dimension` | INTEGER | YES | NULL | Actual number of dimensions used in the embedding vector (max 2000) |
253
+
254
+ **Indexes**:
255
+
256
+ - `PRIMARY KEY` on `id`
257
+ - `idx_nodes_robot_id` BTREE on `robot_id`
258
+ - `idx_nodes_speaker` BTREE on `speaker`
259
+ - `idx_nodes_type` BTREE on `type`
260
+ - `idx_nodes_category` BTREE on `category`
261
+ - `idx_nodes_created_at` BTREE on `created_at`
262
+ - `idx_nodes_updated_at` BTREE on `updated_at`
263
+ - `idx_nodes_last_accessed` BTREE on `last_accessed`
264
+ - `idx_nodes_in_working_memory` BTREE on `in_working_memory`
265
+ - `idx_nodes_embedding` HNSW on `embedding` using `vector_cosine_ops` (m=16, ef_construction=64)
266
+ - `idx_nodes_content_gin` GIN on `to_tsvector('english', content)` for full-text search
267
+ - `idx_nodes_content_trgm` GIN on `content` using `gin_trgm_ops` for fuzzy matching
268
+
269
+ **Foreign Keys**:
270
+ - `robot_id` references `robots(id)` ON DELETE CASCADE
271
+
272
+ **Relationships**:
273
+ - Many nodes belong to one robot (N:1)
274
+ - Many nodes have many tags through nodes_tags (N:M)
275
+
276
+ **Check Constraints**:
277
+ - `check_embedding_dimension`: Ensures embedding_dimension is NULL or between 1 and 2000
278
+
279
+ ---
280
+
281
+ ### tags
282
+
283
+ The tags table stores unique hierarchical tag names for categorization.
284
+
285
+ **Purpose**: Provides flexible, hierarchical categorization using colon-separated namespaces (e.g., `database:postgresql:timescaledb`).
286
+
287
+ ```sql
288
+ CREATE TABLE public.tags (
289
+ id bigint NOT NULL,
290
+ name text NOT NULL,
291
+ created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
292
+ );
293
+
294
+ ALTER TABLE ONLY public.tags ALTER COLUMN id SET DEFAULT nextval('public.tags_id_seq'::regclass);
295
+ ALTER TABLE ONLY public.tags ADD CONSTRAINT tags_pkey PRIMARY KEY (id);
296
+ ```
297
+
298
+ **Columns**:
299
+
300
+ | Column | Type | Nullable | Default | Description |
301
+ |--------|------|----------|---------|-------------|
302
+ | `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
303
+ | `name` | TEXT | NO | - | Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb) |
304
+ | `created_at` | TIMESTAMPTZ | YES | NOW() | When this tag was created |
305
+
306
+ **Indexes**:
307
+ - `PRIMARY KEY` on `id`
308
+ - `idx_tags_name_unique` UNIQUE BTREE on `name`
309
+ - `idx_tags_name_pattern` BTREE on `name` with `text_pattern_ops` for pattern matching
310
+
311
+ **Relationships**:
312
+ - Many tags belong to many nodes through nodes_tags (N:M)
313
+
314
+ **Tag Hierarchy**:
315
+
316
+ Tags use colon-separated hierarchies for organization:
317
+ - `programming:ruby:gems` - Programming > Ruby > Gems
318
+ - `database:postgresql:extensions` - Database > PostgreSQL > Extensions
319
+ - `ai:llm:embeddings` - AI > LLM > Embeddings
320
+
321
+ This allows querying by prefix to find all related tags:
322
+ ```sql
323
+ SELECT * FROM tags WHERE name LIKE 'database:%'; -- All database-related tags
324
+ SELECT * FROM tags WHERE name LIKE 'ai:llm:%'; -- All LLM-related tags
325
+ ```
326
+
327
+ ---
328
+
329
+ ### nodes_tags
330
+
331
+ The nodes_tags join table implements the many-to-many relationship between nodes and tags.
332
+
333
+ **Purpose**: Links nodes to tags, allowing each node to have multiple tags and each tag to be applied to multiple nodes.
334
+
335
+ ```sql
336
+ CREATE TABLE public.nodes_tags (
337
+ id bigint NOT NULL,
338
+ node_id bigint NOT NULL,
339
+ tag_id bigint NOT NULL,
340
+ created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
341
+ );
342
+
343
+ ALTER TABLE ONLY public.nodes_tags ALTER COLUMN id SET DEFAULT nextval('public.node_tags_id_seq'::regclass);
344
+ ALTER TABLE ONLY public.nodes_tags ADD CONSTRAINT node_tags_pkey PRIMARY KEY (id);
345
+
346
+ ALTER TABLE ONLY public.nodes_tags
347
+ ADD CONSTRAINT fk_rails_b0b726ecf8 FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE;
348
+ ALTER TABLE ONLY public.nodes_tags
349
+ ADD CONSTRAINT fk_rails_eccc99cec5 FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE;
350
+ ```
351
+
352
+ **Columns**:
353
+
354
+ | Column | Type | Nullable | Default | Description |
355
+ |--------|------|----------|---------|-------------|
356
+ | `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
357
+ | `node_id` | BIGINT | NO | - | ID of the node being tagged |
358
+ | `tag_id` | BIGINT | NO | - | ID of the tag being applied |
359
+ | `created_at` | TIMESTAMPTZ | YES | NOW() | When this association was created |
360
+
361
+ **Indexes**:
362
+ - `PRIMARY KEY` on `id`
363
+ - `idx_node_tags_unique` UNIQUE BTREE on `(node_id, tag_id)` - Prevents duplicate associations
364
+ - `idx_node_tags_node_id` BTREE on `node_id` - Fast lookups of tags for a node
365
+ - `idx_node_tags_tag_id` BTREE on `tag_id` - Fast lookups of nodes for a tag
366
+
367
+ **Foreign Keys**:
368
+ - `node_id` references `nodes(id)` ON DELETE CASCADE
369
+ - `tag_id` references `tags(id)` ON DELETE CASCADE
370
+
371
+ **Cascade Behavior**:
372
+ - When a node is deleted, all its tag associations are automatically removed
373
+ - When a tag is deleted, all associations to that tag are automatically removed
374
+ - The join table ensures referential integrity between nodes and tags
375
+
376
+ ---
377
+
378
+ ## Common Query Patterns
379
+
380
+ ### Finding Tags for a Node
381
+
382
+ ```sql
383
+ SELECT t.name
384
+ FROM tags t
385
+ JOIN nodes_tags nt ON t.id = nt.tag_id
386
+ WHERE nt.node_id = $1
387
+ ORDER BY t.name;
388
+ ```
389
+
390
+ ### Finding Nodes with a Specific Tag
391
+
392
+ ```sql
393
+ SELECT n.*
394
+ FROM nodes n
395
+ JOIN nodes_tags nt ON n.id = nt.node_id
396
+ JOIN tags t ON nt.tag_id = t.id
397
+ WHERE t.name = 'database:postgresql'
398
+ ORDER BY n.created_at DESC;
399
+ ```
400
+
401
+ ### Finding Nodes with Hierarchical Tag Prefix
402
+
403
+ ```sql
404
+ SELECT n.*
405
+ FROM nodes n
406
+ JOIN nodes_tags nt ON n.id = nt.node_id
407
+ JOIN tags t ON nt.tag_id = t.id
408
+ WHERE t.name LIKE 'ai:llm:%'
409
+ ORDER BY n.created_at DESC;
410
+ ```
411
+
412
+ ### Finding Related Topics by Shared Nodes
413
+
414
+ ```sql
415
+ SELECT
416
+ t1.name AS topic1,
417
+ t2.name AS topic2,
418
+ COUNT(DISTINCT nt1.node_id) AS shared_nodes
419
+ FROM tags t1
420
+ JOIN nodes_tags nt1 ON t1.id = nt1.tag_id
421
+ JOIN nodes_tags nt2 ON nt1.node_id = nt2.node_id
422
+ JOIN tags t2 ON nt2.tag_id = t2.id
423
+ WHERE t1.name < t2.name
424
+ GROUP BY t1.name, t2.name
425
+ HAVING COUNT(DISTINCT nt1.node_id) >= 2
426
+ ORDER BY shared_nodes DESC;
427
+ ```
428
+
429
+ ### Vector Similarity Search with Tag Filter
430
+
431
+ ```sql
432
+ SELECT n.*, n.embedding <=> $1::vector AS distance
433
+ FROM nodes n
434
+ JOIN nodes_tags nt ON n.id = nt.node_id
435
+ JOIN tags t ON nt.tag_id = t.id
436
+ WHERE t.name = 'programming:ruby'
437
+ AND n.embedding IS NOT NULL
438
+ ORDER BY distance
439
+ LIMIT 10;
440
+ ```
441
+
442
+ ### Full-Text Search with Tag Filter
443
+
444
+ ```sql
445
+ SELECT n.*, ts_rank(to_tsvector('english', n.content), query) AS rank
446
+ FROM nodes n
447
+ JOIN nodes_tags nt ON n.id = nt.node_id
448
+ JOIN tags t ON nt.tag_id = t.id,
449
+ to_tsquery('english', 'database & optimization') query
450
+ WHERE to_tsvector('english', n.content) @@ query
451
+ AND t.name LIKE 'database:%'
452
+ ORDER BY rank DESC
453
+ LIMIT 20;
454
+ ```
455
+
456
+ ---
457
+
458
+ ## Database Optimization
459
+
460
+ ### Vector Search Performance
461
+
462
+ The `idx_nodes_embedding` index uses HNSW (Hierarchical Navigable Small World) algorithm for fast approximate nearest neighbor search:
463
+
464
+ - **m=16**: Number of bi-directional links per node (higher = better recall, more memory)
465
+ - **ef_construction=64**: Size of dynamic candidate list during index construction (higher = better quality, slower build)
466
+
467
+ For queries, you can adjust `ef_search` (defaults to 40):
468
+ ```sql
469
+ SET hnsw.ef_search = 100; -- Better recall, slower queries
470
+ ```
471
+
472
+ ### Full-Text Search Performance
473
+
474
+ The `idx_nodes_content_gin` index enables fast full-text search using PostgreSQL's tsvector:
475
+
476
+ ```sql
477
+ -- Query optimization with explicit tsvector
478
+ SELECT * FROM nodes
479
+ WHERE to_tsvector('english', content) @@ to_tsquery('english', 'memory & retrieval');
480
+ ```
481
+
482
+ ### Fuzzy Matching Performance
483
+
484
+ The `idx_nodes_content_trgm` index enables similarity search and pattern matching:
485
+
486
+ ```sql
487
+ -- Similarity search
488
+ SELECT * FROM nodes
489
+ WHERE content % 'semantic retreval'; -- Handles typos
490
+
491
+ -- Pattern matching
492
+ SELECT * FROM nodes
493
+ WHERE content ILIKE '%memry%'; -- Uses trigram index
494
+ ```
495
+
496
+ ### Index Maintenance
497
+
498
+ Monitor and maintain indexes for optimal performance:
499
+
500
+ ```sql
501
+ -- Check index usage
502
+ SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
503
+ FROM pg_stat_user_indexes
504
+ WHERE schemaname = 'public'
505
+ ORDER BY idx_scan DESC;
506
+
507
+ -- Reindex if needed
508
+ REINDEX INDEX CONCURRENTLY idx_nodes_embedding;
509
+ REINDEX INDEX CONCURRENTLY idx_nodes_content_gin;
510
+ ```
511
+
512
+ ---
513
+
514
+ ## Schema Migration
515
+
516
+ The schema is managed through ActiveRecord migrations located in `db/migrate/`:
517
+
518
+ 1. `20250101000001_create_robots.rb` - Creates robots table
519
+ 2. `20250101000002_create_nodes.rb` - Creates nodes table with all indexes
520
+ 3. `20250101000005_create_tags.rb` - Creates tags and nodes_tags tables
521
+
522
+ To apply migrations:
523
+ ```bash
524
+ bundle exec rake htm:db:migrate
525
+ ```
526
+
527
+ To generate the current schema dump:
528
+ ```bash
529
+ bundle exec rake htm:db:schema:dump
530
+ ```
531
+
532
+ The canonical schema is maintained in `db/schema.sql`.
533
+
534
+ ---
535
+
536
+ ## Database Extensions
537
+
538
+ ### pgvector
539
+
540
+ Provides vector similarity search capabilities:
541
+
542
+ ```sql
543
+ -- Install extension
544
+ CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
545
+
546
+ -- Vector operations
547
+ SELECT embedding <=> $1::vector AS cosine_distance FROM nodes; -- Cosine distance
548
+ SELECT embedding <-> $1::vector AS l2_distance FROM nodes; -- L2 distance
549
+ SELECT embedding <#> $1::vector AS inner_product FROM nodes; -- Inner product
550
+ ```
551
+
552
+ ### pg_trgm
553
+
554
+ Provides trigram-based fuzzy text matching:
555
+
556
+ ```sql
557
+ -- Install extension
558
+ CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
559
+
560
+ -- Trigram operations
561
+ SELECT content % 'search term' FROM nodes; -- Similarity operator
562
+ SELECT similarity(content, 'search term') FROM nodes; -- Similarity score
563
+ SELECT content ILIKE '%pattern%' FROM nodes; -- Pattern matching (uses trigram index)
564
+ ```
565
+
566
+ ---
567
+
568
+ ## Best Practices
569
+
570
+ ### Tagging Strategy
571
+
572
+ 1. **Use hierarchical namespaces**: `category:subcategory:detail`
573
+ 2. **Be consistent with naming**: Use lowercase, singular nouns
574
+ 3. **Limit depth**: 2-3 levels is optimal (e.g., `ai:llm:embeddings`)
575
+ 4. **Avoid redundancy**: Don't duplicate information already in node fields
576
+
577
+ ### Node Management
578
+
579
+ 1. **Set appropriate importance**: Use 0.0-1.0 scale for priority-based retrieval
580
+ 2. **Update last_accessed**: Touch timestamp when retrieving for LRU eviction
581
+ 3. **Manage token_count**: Update when content changes for working memory budget
582
+ 4. **Use appropriate types**: fact, context, code, preference, decision, question
583
+
584
+ ### Search Strategy
585
+
586
+ 1. **Vector search**: Best for semantic similarity ("concepts like X")
587
+ 2. **Full-text search**: Best for keyword matching ("documents containing Y")
588
+ 3. **Fuzzy search**: Best for typo tolerance and pattern matching
589
+ 4. **Hybrid search**: Combine vector + full-text with weighted scores
590
+
591
+ ### Performance Tuning
592
+
593
+ 1. **Monitor index usage**: Use pg_stat_user_indexes
594
+ 2. **Vacuum regularly**: Especially after bulk deletes
595
+ 3. **Adjust HNSW parameters**: Balance recall vs speed based on dataset size
596
+ 4. **Use connection pooling**: Managed by HTM::LongTermMemory