htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
# Database Schema Documentation
|
|
2
|
+
|
|
3
|
+
This document provides a comprehensive reference for HTM's PostgreSQL database schema, including all tables, indexes, and relationships.
|
|
4
|
+
|
|
5
|
+
## Schema Overview
|
|
6
|
+
|
|
7
|
+
HTM uses PostgreSQL 17 with pgvector and pg_trgm extensions to provide:
|
|
8
|
+
|
|
9
|
+
- **Vector similarity search** via pgvector for semantic memory retrieval
|
|
10
|
+
- **Full-text search** with PostgreSQL's built-in tsvector capabilities
|
|
11
|
+
- **Fuzzy matching** using pg_trgm for flexible text search
|
|
12
|
+
- **Many-to-many relationships** for flexible tagging and categorization
|
|
13
|
+
|
|
14
|
+
### Required Extensions
|
|
15
|
+
|
|
16
|
+
HTM requires these PostgreSQL extensions:
|
|
17
|
+
|
|
18
|
+
```sql
|
|
19
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
|
|
20
|
+
CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Entity-Relationship Diagram
|
|
24
|
+
|
|
25
|
+
Here's the complete database structure:
|
|
26
|
+
|
|
27
|
+
```svg
|
|
28
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 900" style="background: transparent;">
|
|
29
|
+
<defs>
|
|
30
|
+
<style>
|
|
31
|
+
.table-box { fill: #1e1e1e; stroke: #4a9eff; stroke-width: 2; }
|
|
32
|
+
.table-header { fill: #2d5a8e; }
|
|
33
|
+
.text-header { fill: #ffffff; font-family: monospace; font-size: 14px; font-weight: bold; }
|
|
34
|
+
.text-field { fill: #d4d4d4; font-family: monospace; font-size: 11px; }
|
|
35
|
+
.text-type { fill: #8cb4e8; font-family: monospace; font-size: 10px; }
|
|
36
|
+
.relation-line { stroke: #4a9eff; stroke-width: 1.5; fill: none; }
|
|
37
|
+
.arrow { fill: #4a9eff; }
|
|
38
|
+
.join-table { fill: #1e3a1e; stroke: #4a9eff; stroke-width: 2; }
|
|
39
|
+
</style>
|
|
40
|
+
</defs>
|
|
41
|
+
|
|
42
|
+
<!-- Robots Table -->
|
|
43
|
+
<rect class="table-box" x="50" y="50" width="280" height="140" rx="5"/>
|
|
44
|
+
<rect class="table-header" x="50" y="50" width="280" height="35" rx="5"/>
|
|
45
|
+
<text class="text-header" x="190" y="73" text-anchor="middle">robots</text>
|
|
46
|
+
|
|
47
|
+
<text class="text-field" x="60" y="100">id</text>
|
|
48
|
+
<text class="text-type" x="320" y="100" text-anchor="end">BIGSERIAL PK</text>
|
|
49
|
+
|
|
50
|
+
<text class="text-field" x="60" y="120">name</text>
|
|
51
|
+
<text class="text-type" x="320" y="120" text-anchor="end">TEXT</text>
|
|
52
|
+
|
|
53
|
+
<text class="text-field" x="60" y="140">created_at</text>
|
|
54
|
+
<text class="text-type" x="320" y="140" text-anchor="end">TIMESTAMPTZ</text>
|
|
55
|
+
|
|
56
|
+
<text class="text-field" x="60" y="160">last_active</text>
|
|
57
|
+
<text class="text-type" x="320" y="160" text-anchor="end">TIMESTAMPTZ</text>
|
|
58
|
+
|
|
59
|
+
<text class="text-field" x="60" y="180">metadata</text>
|
|
60
|
+
<text class="text-type" x="320" y="180" text-anchor="end">JSONB</text>
|
|
61
|
+
|
|
62
|
+
<!-- Nodes Table -->
|
|
63
|
+
<rect class="table-box" x="50" y="250" width="280" height="400" rx="5"/>
|
|
64
|
+
<rect class="table-header" x="50" y="250" width="280" height="35" rx="5"/>
|
|
65
|
+
<text class="text-header" x="190" y="273" text-anchor="middle">nodes</text>
|
|
66
|
+
|
|
67
|
+
<text class="text-field" x="60" y="300">id</text>
|
|
68
|
+
<text class="text-type" x="320" y="300" text-anchor="end">BIGSERIAL PK</text>
|
|
69
|
+
|
|
70
|
+
<text class="text-field" x="60" y="320">content</text>
|
|
71
|
+
<text class="text-type" x="320" y="320" text-anchor="end">TEXT NOT NULL</text>
|
|
72
|
+
|
|
73
|
+
<text class="text-field" x="60" y="340">speaker</text>
|
|
74
|
+
<text class="text-type" x="320" y="340" text-anchor="end">TEXT NOT NULL</text>
|
|
75
|
+
|
|
76
|
+
<text class="text-field" x="60" y="360">type</text>
|
|
77
|
+
<text class="text-type" x="320" y="360" text-anchor="end">TEXT</text>
|
|
78
|
+
|
|
79
|
+
<text class="text-field" x="60" y="380">category</text>
|
|
80
|
+
<text class="text-type" x="320" y="380" text-anchor="end">TEXT</text>
|
|
81
|
+
|
|
82
|
+
<text class="text-field" x="60" y="400">importance</text>
|
|
83
|
+
<text class="text-type" x="320" y="400" text-anchor="end">DOUBLE PRECISION</text>
|
|
84
|
+
|
|
85
|
+
<text class="text-field" x="60" y="420">created_at</text>
|
|
86
|
+
<text class="text-type" x="320" y="420" text-anchor="end">TIMESTAMPTZ</text>
|
|
87
|
+
|
|
88
|
+
<text class="text-field" x="60" y="440">updated_at</text>
|
|
89
|
+
<text class="text-type" x="320" y="440" text-anchor="end">TIMESTAMPTZ</text>
|
|
90
|
+
|
|
91
|
+
<text class="text-field" x="60" y="460">last_accessed</text>
|
|
92
|
+
<text class="text-type" x="320" y="460" text-anchor="end">TIMESTAMPTZ</text>
|
|
93
|
+
|
|
94
|
+
<text class="text-field" x="60" y="480">token_count</text>
|
|
95
|
+
<text class="text-type" x="320" y="480" text-anchor="end">INTEGER</text>
|
|
96
|
+
|
|
97
|
+
<text class="text-field" x="60" y="500">in_working_memory</text>
|
|
98
|
+
<text class="text-type" x="320" y="500" text-anchor="end">BOOLEAN</text>
|
|
99
|
+
|
|
100
|
+
<text class="text-field" x="60" y="520">robot_id</text>
|
|
101
|
+
<text class="text-type" x="320" y="520" text-anchor="end">BIGINT FK</text>
|
|
102
|
+
|
|
103
|
+
<text class="text-field" x="60" y="540">embedding</text>
|
|
104
|
+
<text class="text-type" x="320" y="540" text-anchor="end">vector(2000)</text>
|
|
105
|
+
|
|
106
|
+
<text class="text-field" x="60" y="560">embedding_dimension</text>
|
|
107
|
+
<text class="text-type" x="320" y="560" text-anchor="end">INTEGER</text>
|
|
108
|
+
|
|
109
|
+
<!-- Tags Table -->
|
|
110
|
+
<rect class="table-box" x="850" y="250" width="280" height="120" rx="5"/>
|
|
111
|
+
<rect class="table-header" x="850" y="250" width="280" height="35" rx="5"/>
|
|
112
|
+
<text class="text-header" x="990" y="273" text-anchor="middle">tags</text>
|
|
113
|
+
|
|
114
|
+
<text class="text-field" x="860" y="300">id</text>
|
|
115
|
+
<text class="text-type" x="1120" y="300" text-anchor="end">BIGSERIAL PK</text>
|
|
116
|
+
|
|
117
|
+
<text class="text-field" x="860" y="320">name</text>
|
|
118
|
+
<text class="text-type" x="1120" y="320" text-anchor="end">TEXT UNIQUE</text>
|
|
119
|
+
|
|
120
|
+
<text class="text-field" x="860" y="340">created_at</text>
|
|
121
|
+
<text class="text-type" x="1120" y="340" text-anchor="end">TIMESTAMPTZ</text>
|
|
122
|
+
|
|
123
|
+
<!-- nodes_tags Join Table -->
|
|
124
|
+
<rect class="join-table" x="450" y="420" width="280" height="140" rx="5"/>
|
|
125
|
+
<rect class="table-header" x="450" y="420" width="280" height="35" rx="5"/>
|
|
126
|
+
<text class="text-header" x="590" y="443" text-anchor="middle">nodes_tags</text>
|
|
127
|
+
|
|
128
|
+
<text class="text-field" x="460" y="470">id</text>
|
|
129
|
+
<text class="text-type" x="720" y="470" text-anchor="end">BIGSERIAL PK</text>
|
|
130
|
+
|
|
131
|
+
<text class="text-field" x="460" y="490">node_id</text>
|
|
132
|
+
<text class="text-type" x="720" y="490" text-anchor="end">BIGINT FK</text>
|
|
133
|
+
|
|
134
|
+
<text class="text-field" x="460" y="510">tag_id</text>
|
|
135
|
+
<text class="text-type" x="720" y="510" text-anchor="end">BIGINT FK</text>
|
|
136
|
+
|
|
137
|
+
<text class="text-field" x="460" y="530">created_at</text>
|
|
138
|
+
<text class="text-type" x="720" y="530" text-anchor="end">TIMESTAMPTZ</text>
|
|
139
|
+
|
|
140
|
+
<!-- Relationships: robots -> nodes -->
|
|
141
|
+
<path class="relation-line" d="M 190 190 L 190 250"/>
|
|
142
|
+
<polygon class="arrow" points="190,250 185,240 195,240"/>
|
|
143
|
+
|
|
144
|
+
<!-- Relationships: nodes -> nodes_tags -->
|
|
145
|
+
<path class="relation-line" d="M 330 490 L 450 490"/>
|
|
146
|
+
<polygon class="arrow" points="450,490 440,485 440,495"/>
|
|
147
|
+
|
|
148
|
+
<!-- Relationships: tags -> nodes_tags -->
|
|
149
|
+
<path class="relation-line" d="M 850 310 L 730 310 L 730 510 L 730 510"/>
|
|
150
|
+
<polygon class="arrow" points="730,510 725,500 735,500"/>
|
|
151
|
+
|
|
152
|
+
<!-- Legend -->
|
|
153
|
+
<text class="text-field" x="50" y="720" font-weight="bold">Legend:</text>
|
|
154
|
+
<text class="text-field" x="50" y="740">PK = Primary Key</text>
|
|
155
|
+
<text class="text-field" x="200" y="740">FK = Foreign Key</text>
|
|
156
|
+
<text class="text-field" x="50" y="760">Green box = Join table (many-to-many)</text>
|
|
157
|
+
|
|
158
|
+
<!-- Annotations -->
|
|
159
|
+
<text class="text-field" x="400" y="370" font-style="italic">1:N</text>
|
|
160
|
+
<text class="text-field" x="380" y="480" font-style="italic">N:M</text>
|
|
161
|
+
<text class="text-field" x="770" y="480" font-style="italic">N:M</text>
|
|
162
|
+
</svg>
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Table Definitions
|
|
166
|
+
|
|
167
|
+
### robots
|
|
168
|
+
|
|
169
|
+
The robots table stores registration and metadata for all LLM agents using the HTM system.
|
|
170
|
+
|
|
171
|
+
**Purpose**: Registry of all robots (LLM agents) with their configuration and activity tracking.
|
|
172
|
+
|
|
173
|
+
```sql
|
|
174
|
+
CREATE TABLE public.robots (
|
|
175
|
+
id bigint NOT NULL,
|
|
176
|
+
name text,
|
|
177
|
+
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
|
|
178
|
+
last_active timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
|
|
179
|
+
metadata jsonb
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
ALTER TABLE ONLY public.robots ALTER COLUMN id SET DEFAULT nextval('public.robots_id_seq'::regclass);
|
|
183
|
+
ALTER TABLE ONLY public.robots ADD CONSTRAINT robots_pkey PRIMARY KEY (id);
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Columns**:
|
|
187
|
+
|
|
188
|
+
| Column | Type | Nullable | Default | Description |
|
|
189
|
+
|--------|------|----------|---------|-------------|
|
|
190
|
+
| `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
|
|
191
|
+
| `name` | TEXT | YES | NULL | Human-readable name for the robot |
|
|
192
|
+
| `created_at` | TIMESTAMPTZ | YES | NOW() | When the robot was first registered |
|
|
193
|
+
| `last_active` | TIMESTAMPTZ | YES | NOW() | Last time the robot accessed the system |
|
|
194
|
+
| `metadata` | JSONB | YES | NULL | Robot-specific configuration and metadata |
|
|
195
|
+
|
|
196
|
+
**Indexes**:
|
|
197
|
+
- `PRIMARY KEY` on `id`
|
|
198
|
+
|
|
199
|
+
**Relationships**:
|
|
200
|
+
- One robot has many nodes (1:N)
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
### nodes
|
|
205
|
+
|
|
206
|
+
The core table storing all memory nodes with vector embeddings for semantic search.
|
|
207
|
+
|
|
208
|
+
**Purpose**: Stores all memories (conversation messages, facts, decisions, code, etc.) with full-text and vector search capabilities.
|
|
209
|
+
|
|
210
|
+
```sql
|
|
211
|
+
CREATE TABLE public.nodes (
|
|
212
|
+
id bigint NOT NULL,
|
|
213
|
+
content text NOT NULL,
|
|
214
|
+
speaker text NOT NULL,
|
|
215
|
+
type text,
|
|
216
|
+
category text,
|
|
217
|
+
importance double precision DEFAULT 1.0,
|
|
218
|
+
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
|
|
219
|
+
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
|
|
220
|
+
last_accessed timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
|
|
221
|
+
token_count integer,
|
|
222
|
+
in_working_memory boolean DEFAULT false,
|
|
223
|
+
robot_id bigint NOT NULL,
|
|
224
|
+
embedding public.vector(2000),
|
|
225
|
+
embedding_dimension integer,
|
|
226
|
+
CONSTRAINT check_embedding_dimension CHECK (((embedding_dimension IS NULL) OR ((embedding_dimension > 0) AND (embedding_dimension <= 2000))))
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
ALTER TABLE ONLY public.nodes ALTER COLUMN id SET DEFAULT nextval('public.nodes_id_seq'::regclass);
|
|
230
|
+
ALTER TABLE ONLY public.nodes ADD CONSTRAINT nodes_pkey PRIMARY KEY (id);
|
|
231
|
+
ALTER TABLE ONLY public.nodes
|
|
232
|
+
ADD CONSTRAINT fk_rails_60162e9d3a FOREIGN KEY (robot_id) REFERENCES public.robots(id) ON DELETE CASCADE;
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**Columns**:
|
|
236
|
+
|
|
237
|
+
| Column | Type | Nullable | Default | Description |
|
|
238
|
+
|--------|------|----------|---------|-------------|
|
|
239
|
+
| `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
|
|
240
|
+
| `content` | TEXT | NO | - | The conversation message/utterance content |
|
|
241
|
+
| `speaker` | TEXT | NO | - | Who said it: user or robot name |
|
|
242
|
+
| `type` | TEXT | YES | NULL | Memory type: fact, context, code, preference, decision, question |
|
|
243
|
+
| `category` | TEXT | YES | NULL | Optional category for organizing memories |
|
|
244
|
+
| `importance` | DOUBLE PRECISION | YES | 1.0 | Importance score (0.0-1.0) for prioritizing recall |
|
|
245
|
+
| `created_at` | TIMESTAMPTZ | YES | NOW() | When this memory was created |
|
|
246
|
+
| `updated_at` | TIMESTAMPTZ | YES | NOW() | When this memory was last modified |
|
|
247
|
+
| `last_accessed` | TIMESTAMPTZ | YES | NOW() | When this memory was last accessed |
|
|
248
|
+
| `token_count` | INTEGER | YES | NULL | Number of tokens in the content (for context budget management) |
|
|
249
|
+
| `in_working_memory` | BOOLEAN | YES | FALSE | Whether this memory is currently in working memory |
|
|
250
|
+
| `robot_id` | BIGINT | NO | - | ID of the robot that owns this memory |
|
|
251
|
+
| `embedding` | vector(2000) | YES | NULL | Vector embedding (max 2000 dimensions) for semantic search |
|
|
252
|
+
| `embedding_dimension` | INTEGER | YES | NULL | Actual number of dimensions used in the embedding vector (max 2000) |
|
|
253
|
+
|
|
254
|
+
**Indexes**:
|
|
255
|
+
|
|
256
|
+
- `PRIMARY KEY` on `id`
|
|
257
|
+
- `idx_nodes_robot_id` BTREE on `robot_id`
|
|
258
|
+
- `idx_nodes_speaker` BTREE on `speaker`
|
|
259
|
+
- `idx_nodes_type` BTREE on `type`
|
|
260
|
+
- `idx_nodes_category` BTREE on `category`
|
|
261
|
+
- `idx_nodes_created_at` BTREE on `created_at`
|
|
262
|
+
- `idx_nodes_updated_at` BTREE on `updated_at`
|
|
263
|
+
- `idx_nodes_last_accessed` BTREE on `last_accessed`
|
|
264
|
+
- `idx_nodes_in_working_memory` BTREE on `in_working_memory`
|
|
265
|
+
- `idx_nodes_embedding` HNSW on `embedding` using `vector_cosine_ops` (m=16, ef_construction=64)
|
|
266
|
+
- `idx_nodes_content_gin` GIN on `to_tsvector('english', content)` for full-text search
|
|
267
|
+
- `idx_nodes_content_trgm` GIN on `content` using `gin_trgm_ops` for fuzzy matching
|
|
268
|
+
|
|
269
|
+
**Foreign Keys**:
|
|
270
|
+
- `robot_id` references `robots(id)` ON DELETE CASCADE
|
|
271
|
+
|
|
272
|
+
**Relationships**:
|
|
273
|
+
- Many nodes belong to one robot (N:1)
|
|
274
|
+
- Many nodes have many tags through nodes_tags (N:M)
|
|
275
|
+
|
|
276
|
+
**Check Constraints**:
|
|
277
|
+
- `check_embedding_dimension`: Ensures embedding_dimension is NULL or between 1 and 2000
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
### tags
|
|
282
|
+
|
|
283
|
+
The tags table stores unique hierarchical tag names for categorization.
|
|
284
|
+
|
|
285
|
+
**Purpose**: Provides flexible, hierarchical categorization using colon-separated namespaces (e.g., `database:postgresql:timescaledb`).
|
|
286
|
+
|
|
287
|
+
```sql
|
|
288
|
+
CREATE TABLE public.tags (
|
|
289
|
+
id bigint NOT NULL,
|
|
290
|
+
name text NOT NULL,
|
|
291
|
+
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
|
|
292
|
+
);
|
|
293
|
+
|
|
294
|
+
ALTER TABLE ONLY public.tags ALTER COLUMN id SET DEFAULT nextval('public.tags_id_seq'::regclass);
|
|
295
|
+
ALTER TABLE ONLY public.tags ADD CONSTRAINT tags_pkey PRIMARY KEY (id);
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
**Columns**:
|
|
299
|
+
|
|
300
|
+
| Column | Type | Nullable | Default | Description |
|
|
301
|
+
|--------|------|----------|---------|-------------|
|
|
302
|
+
| `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
|
|
303
|
+
| `name` | TEXT | NO | - | Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb) |
|
|
304
|
+
| `created_at` | TIMESTAMPTZ | YES | NOW() | When this tag was created |
|
|
305
|
+
|
|
306
|
+
**Indexes**:
|
|
307
|
+
- `PRIMARY KEY` on `id`
|
|
308
|
+
- `idx_tags_name_unique` UNIQUE BTREE on `name`
|
|
309
|
+
- `idx_tags_name_pattern` BTREE on `name` with `text_pattern_ops` for pattern matching
|
|
310
|
+
|
|
311
|
+
**Relationships**:
|
|
312
|
+
- Many tags belong to many nodes through nodes_tags (N:M)
|
|
313
|
+
|
|
314
|
+
**Tag Hierarchy**:
|
|
315
|
+
|
|
316
|
+
Tags use colon-separated hierarchies for organization:
|
|
317
|
+
- `programming:ruby:gems` - Programming > Ruby > Gems
|
|
318
|
+
- `database:postgresql:extensions` - Database > PostgreSQL > Extensions
|
|
319
|
+
- `ai:llm:embeddings` - AI > LLM > Embeddings
|
|
320
|
+
|
|
321
|
+
This allows querying by prefix to find all related tags:
|
|
322
|
+
```sql
|
|
323
|
+
SELECT * FROM tags WHERE name LIKE 'database:%'; -- All database-related tags
|
|
324
|
+
SELECT * FROM tags WHERE name LIKE 'ai:llm:%'; -- All LLM-related tags
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
---
|
|
328
|
+
|
|
329
|
+
### nodes_tags
|
|
330
|
+
|
|
331
|
+
The nodes_tags join table implements the many-to-many relationship between nodes and tags.
|
|
332
|
+
|
|
333
|
+
**Purpose**: Links nodes to tags, allowing each node to have multiple tags and each tag to be applied to multiple nodes.
|
|
334
|
+
|
|
335
|
+
```sql
|
|
336
|
+
CREATE TABLE public.nodes_tags (
|
|
337
|
+
id bigint NOT NULL,
|
|
338
|
+
node_id bigint NOT NULL,
|
|
339
|
+
tag_id bigint NOT NULL,
|
|
340
|
+
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
|
|
341
|
+
);
|
|
342
|
+
|
|
343
|
+
ALTER TABLE ONLY public.nodes_tags ALTER COLUMN id SET DEFAULT nextval('public.node_tags_id_seq'::regclass);
|
|
344
|
+
ALTER TABLE ONLY public.nodes_tags ADD CONSTRAINT node_tags_pkey PRIMARY KEY (id);
|
|
345
|
+
|
|
346
|
+
ALTER TABLE ONLY public.nodes_tags
|
|
347
|
+
ADD CONSTRAINT fk_rails_b0b726ecf8 FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE;
|
|
348
|
+
ALTER TABLE ONLY public.nodes_tags
|
|
349
|
+
ADD CONSTRAINT fk_rails_eccc99cec5 FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE;
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
**Columns**:
|
|
353
|
+
|
|
354
|
+
| Column | Type | Nullable | Default | Description |
|
|
355
|
+
|--------|------|----------|---------|-------------|
|
|
356
|
+
| `id` | BIGINT | NO | AUTO | Unique identifier (primary key) |
|
|
357
|
+
| `node_id` | BIGINT | NO | - | ID of the node being tagged |
|
|
358
|
+
| `tag_id` | BIGINT | NO | - | ID of the tag being applied |
|
|
359
|
+
| `created_at` | TIMESTAMPTZ | YES | NOW() | When this association was created |
|
|
360
|
+
|
|
361
|
+
**Indexes**:
|
|
362
|
+
- `PRIMARY KEY` on `id`
|
|
363
|
+
- `idx_node_tags_unique` UNIQUE BTREE on `(node_id, tag_id)` - Prevents duplicate associations
|
|
364
|
+
- `idx_node_tags_node_id` BTREE on `node_id` - Fast lookups of tags for a node
|
|
365
|
+
- `idx_node_tags_tag_id` BTREE on `tag_id` - Fast lookups of nodes for a tag
|
|
366
|
+
|
|
367
|
+
**Foreign Keys**:
|
|
368
|
+
- `node_id` references `nodes(id)` ON DELETE CASCADE
|
|
369
|
+
- `tag_id` references `tags(id)` ON DELETE CASCADE
|
|
370
|
+
|
|
371
|
+
**Cascade Behavior**:
|
|
372
|
+
- When a node is deleted, all its tag associations are automatically removed
|
|
373
|
+
- When a tag is deleted, all associations to that tag are automatically removed
|
|
374
|
+
- The join table ensures referential integrity between nodes and tags
|
|
375
|
+
|
|
376
|
+
---
|
|
377
|
+
|
|
378
|
+
## Common Query Patterns
|
|
379
|
+
|
|
380
|
+
### Finding Tags for a Node
|
|
381
|
+
|
|
382
|
+
```sql
|
|
383
|
+
SELECT t.name
|
|
384
|
+
FROM tags t
|
|
385
|
+
JOIN nodes_tags nt ON t.id = nt.tag_id
|
|
386
|
+
WHERE nt.node_id = $1
|
|
387
|
+
ORDER BY t.name;
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
### Finding Nodes with a Specific Tag
|
|
391
|
+
|
|
392
|
+
```sql
|
|
393
|
+
SELECT n.*
|
|
394
|
+
FROM nodes n
|
|
395
|
+
JOIN nodes_tags nt ON n.id = nt.node_id
|
|
396
|
+
JOIN tags t ON nt.tag_id = t.id
|
|
397
|
+
WHERE t.name = 'database:postgresql'
|
|
398
|
+
ORDER BY n.created_at DESC;
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
### Finding Nodes with Hierarchical Tag Prefix
|
|
402
|
+
|
|
403
|
+
```sql
|
|
404
|
+
SELECT n.*
|
|
405
|
+
FROM nodes n
|
|
406
|
+
JOIN nodes_tags nt ON n.id = nt.node_id
|
|
407
|
+
JOIN tags t ON nt.tag_id = t.id
|
|
408
|
+
WHERE t.name LIKE 'ai:llm:%'
|
|
409
|
+
ORDER BY n.created_at DESC;
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
### Finding Related Topics by Shared Nodes
|
|
413
|
+
|
|
414
|
+
```sql
|
|
415
|
+
SELECT
|
|
416
|
+
t1.name AS topic1,
|
|
417
|
+
t2.name AS topic2,
|
|
418
|
+
COUNT(DISTINCT nt1.node_id) AS shared_nodes
|
|
419
|
+
FROM tags t1
|
|
420
|
+
JOIN nodes_tags nt1 ON t1.id = nt1.tag_id
|
|
421
|
+
JOIN nodes_tags nt2 ON nt1.node_id = nt2.node_id
|
|
422
|
+
JOIN tags t2 ON nt2.tag_id = t2.id
|
|
423
|
+
WHERE t1.name < t2.name
|
|
424
|
+
GROUP BY t1.name, t2.name
|
|
425
|
+
HAVING COUNT(DISTINCT nt1.node_id) >= 2
|
|
426
|
+
ORDER BY shared_nodes DESC;
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
### Vector Similarity Search with Tag Filter
|
|
430
|
+
|
|
431
|
+
```sql
|
|
432
|
+
SELECT n.*, n.embedding <=> $1::vector AS distance
|
|
433
|
+
FROM nodes n
|
|
434
|
+
JOIN nodes_tags nt ON n.id = nt.node_id
|
|
435
|
+
JOIN tags t ON nt.tag_id = t.id
|
|
436
|
+
WHERE t.name = 'programming:ruby'
|
|
437
|
+
AND n.embedding IS NOT NULL
|
|
438
|
+
ORDER BY distance
|
|
439
|
+
LIMIT 10;
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
### Full-Text Search with Tag Filter
|
|
443
|
+
|
|
444
|
+
```sql
|
|
445
|
+
SELECT n.*, ts_rank(to_tsvector('english', n.content), query) AS rank
|
|
446
|
+
FROM nodes n
|
|
447
|
+
JOIN nodes_tags nt ON n.id = nt.node_id
|
|
448
|
+
JOIN tags t ON nt.tag_id = t.id,
|
|
449
|
+
to_tsquery('english', 'database & optimization') query
|
|
450
|
+
WHERE to_tsvector('english', n.content) @@ query
|
|
451
|
+
AND t.name LIKE 'database:%'
|
|
452
|
+
ORDER BY rank DESC
|
|
453
|
+
LIMIT 20;
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
---
|
|
457
|
+
|
|
458
|
+
## Database Optimization
|
|
459
|
+
|
|
460
|
+
### Vector Search Performance
|
|
461
|
+
|
|
462
|
+
The `idx_nodes_embedding` index uses HNSW (Hierarchical Navigable Small World) algorithm for fast approximate nearest neighbor search:
|
|
463
|
+
|
|
464
|
+
- **m=16**: Number of bi-directional links per node (higher = better recall, more memory)
|
|
465
|
+
- **ef_construction=64**: Size of dynamic candidate list during index construction (higher = better quality, slower build)
|
|
466
|
+
|
|
467
|
+
For queries, you can adjust `ef_search` (defaults to 40):
|
|
468
|
+
```sql
|
|
469
|
+
SET hnsw.ef_search = 100; -- Better recall, slower queries
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
### Full-Text Search Performance
|
|
473
|
+
|
|
474
|
+
The `idx_nodes_content_gin` index enables fast full-text search using PostgreSQL's tsvector:
|
|
475
|
+
|
|
476
|
+
```sql
|
|
477
|
+
-- Query optimization with explicit tsvector
|
|
478
|
+
SELECT * FROM nodes
|
|
479
|
+
WHERE to_tsvector('english', content) @@ to_tsquery('english', 'memory & retrieval');
|
|
480
|
+
```
|
|
481
|
+
|
|
482
|
+
### Fuzzy Matching Performance
|
|
483
|
+
|
|
484
|
+
The `idx_nodes_content_trgm` index enables similarity search and pattern matching:
|
|
485
|
+
|
|
486
|
+
```sql
|
|
487
|
+
-- Similarity search
|
|
488
|
+
SELECT * FROM nodes
|
|
489
|
+
WHERE content % 'semantic retreval'; -- Handles typos
|
|
490
|
+
|
|
491
|
+
-- Pattern matching
|
|
492
|
+
SELECT * FROM nodes
|
|
493
|
+
WHERE content ILIKE '%memry%'; -- Uses trigram index
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
### Index Maintenance
|
|
497
|
+
|
|
498
|
+
Monitor and maintain indexes for optimal performance:
|
|
499
|
+
|
|
500
|
+
```sql
|
|
501
|
+
-- Check index usage
|
|
502
|
+
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
|
|
503
|
+
FROM pg_stat_user_indexes
|
|
504
|
+
WHERE schemaname = 'public'
|
|
505
|
+
ORDER BY idx_scan DESC;
|
|
506
|
+
|
|
507
|
+
-- Reindex if needed
|
|
508
|
+
REINDEX INDEX CONCURRENTLY idx_nodes_embedding;
|
|
509
|
+
REINDEX INDEX CONCURRENTLY idx_nodes_content_gin;
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
---
|
|
513
|
+
|
|
514
|
+
## Schema Migration
|
|
515
|
+
|
|
516
|
+
The schema is managed through ActiveRecord migrations located in `db/migrate/`:
|
|
517
|
+
|
|
518
|
+
1. `20250101000001_create_robots.rb` - Creates robots table
|
|
519
|
+
2. `20250101000002_create_nodes.rb` - Creates nodes table with all indexes
|
|
520
|
+
3. `20250101000005_create_tags.rb` - Creates tags and nodes_tags tables
|
|
521
|
+
|
|
522
|
+
To apply migrations:
|
|
523
|
+
```bash
|
|
524
|
+
bundle exec rake htm:db:migrate
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
To generate the current schema dump:
|
|
528
|
+
```bash
|
|
529
|
+
bundle exec rake htm:db:schema:dump
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
The canonical schema is maintained in `db/schema.sql`.
|
|
533
|
+
|
|
534
|
+
---
|
|
535
|
+
|
|
536
|
+
## Database Extensions
|
|
537
|
+
|
|
538
|
+
### pgvector
|
|
539
|
+
|
|
540
|
+
Provides vector similarity search capabilities:
|
|
541
|
+
|
|
542
|
+
```sql
|
|
543
|
+
-- Install extension
|
|
544
|
+
CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
|
|
545
|
+
|
|
546
|
+
-- Vector operations
|
|
547
|
+
SELECT embedding <=> $1::vector AS cosine_distance FROM nodes; -- Cosine distance
|
|
548
|
+
SELECT embedding <-> $1::vector AS l2_distance FROM nodes; -- L2 distance
|
|
549
|
+
SELECT embedding <#> $1::vector AS inner_product FROM nodes; -- Inner product
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
### pg_trgm
|
|
553
|
+
|
|
554
|
+
Provides trigram-based fuzzy text matching:
|
|
555
|
+
|
|
556
|
+
```sql
|
|
557
|
+
-- Install extension
|
|
558
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
|
|
559
|
+
|
|
560
|
+
-- Trigram operations
|
|
561
|
+
SELECT content % 'search term' FROM nodes; -- Similarity operator
|
|
562
|
+
SELECT similarity(content, 'search term') FROM nodes; -- Similarity score
|
|
563
|
+
SELECT content ILIKE '%pattern%' FROM nodes; -- Pattern matching (uses trigram index)
|
|
564
|
+
```
|
|
565
|
+
|
|
566
|
+
---
|
|
567
|
+
|
|
568
|
+
## Best Practices
|
|
569
|
+
|
|
570
|
+
### Tagging Strategy
|
|
571
|
+
|
|
572
|
+
1. **Use hierarchical namespaces**: `category:subcategory:detail`
|
|
573
|
+
2. **Be consistent with naming**: Use lowercase, singular nouns
|
|
574
|
+
3. **Limit depth**: 2-3 levels is optimal (e.g., `ai:llm:embeddings`)
|
|
575
|
+
4. **Avoid redundancy**: Don't duplicate information already in node fields
|
|
576
|
+
|
|
577
|
+
### Node Management
|
|
578
|
+
|
|
579
|
+
1. **Set appropriate importance**: Use 0.0-1.0 scale for priority-based retrieval
|
|
580
|
+
2. **Update last_accessed**: Touch timestamp when retrieving for LRU eviction
|
|
581
|
+
3. **Manage token_count**: Update when content changes for working memory budget
|
|
582
|
+
4. **Use appropriate types**: fact, context, code, preference, decision, question
|
|
583
|
+
|
|
584
|
+
### Search Strategy
|
|
585
|
+
|
|
586
|
+
1. **Vector search**: Best for semantic similarity ("concepts like X")
|
|
587
|
+
2. **Full-text search**: Best for keyword matching ("documents containing Y")
|
|
588
|
+
3. **Fuzzy search**: Best for typo tolerance and pattern matching
|
|
589
|
+
4. **Hybrid search**: Combine vector + full-text with weighted scores
|
|
590
|
+
|
|
591
|
+
### Performance Tuning
|
|
592
|
+
|
|
593
|
+
1. **Monitor index usage**: Use pg_stat_user_indexes
|
|
594
|
+
2. **Vacuum regularly**: Especially after bulk deletes
|
|
595
|
+
3. **Adjust HNSW parameters**: Balance recall vs speed based on dataset size
|
|
596
|
+
4. **Use connection pooling**: Managed by HTM::LongTermMemory
|