htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/gem_tasks"
|
|
4
|
+
require "rake/testtask"
|
|
5
|
+
|
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
|
7
|
+
t.libs << "test"
|
|
8
|
+
t.libs << "lib"
|
|
9
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
|
10
|
+
t.verbose = true
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
task default: :test
|
|
14
|
+
|
|
15
|
+
# Load HTM database tasks from lib/tasks/htm.rake
|
|
16
|
+
# This uses the same loader that external applications use
|
|
17
|
+
require_relative "lib/htm/tasks"
|
|
18
|
+
|
|
19
|
+
# Legacy tasks for backwards compatibility
|
|
20
|
+
desc "Run database setup (deprecated: use htm:db:setup)"
|
|
21
|
+
task :db_setup => "htm:db:setup"
|
|
22
|
+
|
|
23
|
+
desc "Test database connection (deprecated: use htm:db:test)"
|
|
24
|
+
task :db_test => "htm:db:test"
|
|
25
|
+
|
|
26
|
+
desc "Run example"
|
|
27
|
+
task :example do
|
|
28
|
+
ruby "examples/basic_usage.rb"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
desc "Show gem stats"
|
|
32
|
+
task :stats do
|
|
33
|
+
puts "\nHTM Gem Statistics:"
|
|
34
|
+
puts "=" * 60
|
|
35
|
+
|
|
36
|
+
# Count lines of code
|
|
37
|
+
lib_files = Dir.glob("lib/**/*.rb")
|
|
38
|
+
lib_lines = lib_files.sum { |f| File.readlines(f).size }
|
|
39
|
+
|
|
40
|
+
test_files = Dir.glob("test/**/*.rb")
|
|
41
|
+
test_lines = test_files.sum { |f| File.readlines(f).size }
|
|
42
|
+
|
|
43
|
+
puts "Library:"
|
|
44
|
+
puts " Files: #{lib_files.size}"
|
|
45
|
+
puts " Lines: #{lib_lines}"
|
|
46
|
+
puts "\nTests:"
|
|
47
|
+
puts " Files: #{test_files.size}"
|
|
48
|
+
puts " Lines: #{test_lines}"
|
|
49
|
+
puts "\nTotal lines: #{lib_lines + test_lines}"
|
|
50
|
+
puts "=" * 60
|
|
51
|
+
end
|
data/SETUP.md
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# HTM Setup Guide
|
|
2
|
+
|
|
3
|
+
## Prerequisites
|
|
4
|
+
|
|
5
|
+
1. **Ruby** (version 3.0 or higher)
|
|
6
|
+
2. **TimescaleDB Cloud Account** (already set up)
|
|
7
|
+
3. **Database Environment Variables** (already configured)
|
|
8
|
+
4. **Ollama** (for embeddings via RubyLLM)
|
|
9
|
+
|
|
10
|
+
## Ollama Setup
|
|
11
|
+
|
|
12
|
+
HTM uses RubyLLM with the Ollama provider for generating embeddings. You need to install and run Ollama locally.
|
|
13
|
+
|
|
14
|
+
### 1. Install Ollama
|
|
15
|
+
|
|
16
|
+
**macOS:**
|
|
17
|
+
```bash
|
|
18
|
+
curl https://ollama.ai/install.sh | sh
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Or download from:** https://ollama.ai/download
|
|
22
|
+
|
|
23
|
+
### 2. Start Ollama Service
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Ollama typically starts automatically after installation
|
|
27
|
+
# Verify it's running:
|
|
28
|
+
curl http://localhost:11434/api/version
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### 3. Pull the gpt-oss Model
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Pull the default model used by HTM
|
|
35
|
+
ollama pull gpt-oss
|
|
36
|
+
|
|
37
|
+
# Verify the model is available
|
|
38
|
+
ollama list
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 4. Test Embedding Generation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# Test that embeddings work
|
|
45
|
+
ollama run gpt-oss "Hello, world!"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Optional: Custom Ollama URL
|
|
49
|
+
|
|
50
|
+
If Ollama is running on a different host/port, set the environment variable:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
export OLLAMA_URL="http://custom-host:11434"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Database Setup
|
|
57
|
+
|
|
58
|
+
### 1. Load Database Credentials
|
|
59
|
+
|
|
60
|
+
The HTM project uses environment variables to manage database credentials. These are defined in `~/.bashrc__tiger`.
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Load the Tiger database environment variables
|
|
64
|
+
source ~/.bashrc__tiger
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
To make these variables available automatically in new shell sessions, ensure `~/.bashrc__tiger` is sourced in your `~/.bashrc` or `~/.bash_profile`.
|
|
68
|
+
|
|
69
|
+
### 2. Verify Connection
|
|
70
|
+
|
|
71
|
+
Test the database connection:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
cd /path/to/HTM
|
|
75
|
+
ruby test_connection.rb
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
You should see:
|
|
79
|
+
```
|
|
80
|
+
✓ Connected successfully!
|
|
81
|
+
✓ TimescaleDB Extension: Version 2.22.1
|
|
82
|
+
✓ pgvector Extension: Version 0.8.1
|
|
83
|
+
✓ pg_trgm Extension: Version 1.6
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 3. Enable Extensions (One-time)
|
|
87
|
+
|
|
88
|
+
Enable required PostgreSQL extensions (already done, but can be re-run safely):
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
ruby enable_extensions.rb
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Environment Variables Reference
|
|
95
|
+
|
|
96
|
+
After sourcing `~/.bashrc__tiger`, these variables are available:
|
|
97
|
+
|
|
98
|
+
| Variable | Description | Example Value |
|
|
99
|
+
|----------|-------------|---------------|
|
|
100
|
+
| `HTM_SERVICE_NAME` | Service identifier | `db-67977` |
|
|
101
|
+
| `HTM_DBNAME` | Database name | `tsdb` |
|
|
102
|
+
| `HTM_DBUSER` | Database user | `tsdbadmin` |
|
|
103
|
+
| `HTM_DBPASS` | Database password | `***` |
|
|
104
|
+
| `HTM_DBURL` | Full connection URL (preferred) | `postgres://...` |
|
|
105
|
+
| `HTM_DBPORT` | Database port | `37807` |
|
|
106
|
+
|
|
107
|
+
## Development Workflow
|
|
108
|
+
|
|
109
|
+
### Quick Start
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# 1. Source environment variables (if not in .bashrc)
|
|
113
|
+
source ~/.bashrc__tiger
|
|
114
|
+
|
|
115
|
+
# 2. Install dependencies (when gem is created)
|
|
116
|
+
bundle install
|
|
117
|
+
|
|
118
|
+
# 3. Initialize database schema (when ready)
|
|
119
|
+
ruby -r ./lib/htm -e "HTMDatabase.setup"
|
|
120
|
+
|
|
121
|
+
# 4. Test HTM functionality (when implemented)
|
|
122
|
+
ruby examples/basic_usage.rb
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Testing
|
|
126
|
+
|
|
127
|
+
HTM uses Minitest for testing:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Run all tests
|
|
131
|
+
rake test
|
|
132
|
+
|
|
133
|
+
# Or run directly with Ruby
|
|
134
|
+
ruby test/htm_test.rb
|
|
135
|
+
|
|
136
|
+
# Run specific test file
|
|
137
|
+
ruby test/embedding_service_test.rb
|
|
138
|
+
|
|
139
|
+
# Run integration tests (requires database)
|
|
140
|
+
ruby test/integration_test.rb
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Project Structure
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
HTM/
|
|
147
|
+
├── lib/
|
|
148
|
+
│ ├── htm.rb # Main HTM class
|
|
149
|
+
│ ├── htm/
|
|
150
|
+
│ │ ├── database.rb # Database setup and schema
|
|
151
|
+
│ │ ├── long_term_memory.rb # PostgreSQL-backed storage
|
|
152
|
+
│ │ ├── working_memory.rb # In-memory active context
|
|
153
|
+
│ │ ├── embedding_service.rb # RubyLLM embedding generation (Ollama/gpt-oss)
|
|
154
|
+
│ │ └── version.rb # Version constant
|
|
155
|
+
├── sql/
|
|
156
|
+
│ └── schema.sql # Database schema
|
|
157
|
+
├── test/
|
|
158
|
+
│ ├── test_helper.rb # Minitest configuration
|
|
159
|
+
│ ├── htm_test.rb # Basic HTM tests
|
|
160
|
+
│ ├── embedding_service_test.rb # Embedding tests (RubyLLM/Ollama)
|
|
161
|
+
│ └── integration_test.rb # Full integration tests
|
|
162
|
+
├── examples/
|
|
163
|
+
│ └── basic_usage.rb # Basic usage example
|
|
164
|
+
├── test_connection.rb # Verify database connection
|
|
165
|
+
├── enable_extensions.rb # Enable PostgreSQL extensions
|
|
166
|
+
├── SETUP.md # This file
|
|
167
|
+
├── README.md # Project overview
|
|
168
|
+
├── htm_teamwork.md # Planning and design doc
|
|
169
|
+
├── Gemfile
|
|
170
|
+
├── htm.gemspec
|
|
171
|
+
└── Rakefile # Rake tasks
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Next Steps
|
|
175
|
+
|
|
176
|
+
1. **Phase 1**: Create basic gem structure
|
|
177
|
+
2. **Phase 2**: Implement database schema
|
|
178
|
+
3. **Phase 3**: Implement LongTermMemory class
|
|
179
|
+
4. **Phase 4**: Implement WorkingMemory class
|
|
180
|
+
5. **Phase 5**: Implement HTM main class
|
|
181
|
+
6. **Phase 6**: Add tests
|
|
182
|
+
7. **Phase 7**: Create examples
|
|
183
|
+
|
|
184
|
+
See `htm_teamwork.md` for detailed roadmap.
|
|
185
|
+
|
|
186
|
+
## Troubleshooting
|
|
187
|
+
|
|
188
|
+
### Ollama Issues
|
|
189
|
+
|
|
190
|
+
If you encounter embedding errors:
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
# Verify Ollama is running
|
|
194
|
+
curl http://localhost:11434/api/version
|
|
195
|
+
|
|
196
|
+
# Check if gpt-oss model is available
|
|
197
|
+
ollama list | grep gpt-oss
|
|
198
|
+
|
|
199
|
+
# Test embedding generation
|
|
200
|
+
ollama run gpt-oss "Test embedding"
|
|
201
|
+
|
|
202
|
+
# View Ollama logs
|
|
203
|
+
ollama logs
|
|
204
|
+
|
|
205
|
+
# Restart Ollama service
|
|
206
|
+
# On macOS, Ollama runs as a background service
|
|
207
|
+
# Check Activity Monitor or restart from the menu bar
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
**Common Ollama Errors:**
|
|
211
|
+
|
|
212
|
+
- **"connection refused"**: Ollama service is not running. Start Ollama from Applications or via CLI.
|
|
213
|
+
- **"model not found"**: Run `ollama pull gpt-oss` to download the model.
|
|
214
|
+
- **Custom URL not working**: Ensure `OLLAMA_URL` environment variable is set correctly.
|
|
215
|
+
|
|
216
|
+
### Database Connection Issues
|
|
217
|
+
|
|
218
|
+
If you get connection errors:
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
# Verify environment variables are set
|
|
222
|
+
echo $HTM_DBURL
|
|
223
|
+
|
|
224
|
+
# Test connection manually
|
|
225
|
+
psql $HTM_DBURL
|
|
226
|
+
|
|
227
|
+
# Check if ~/.bashrc__tiger is sourced
|
|
228
|
+
grep "bashrc__tiger" ~/.bashrc
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Extension Issues
|
|
232
|
+
|
|
233
|
+
If extensions aren't available:
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Re-run extension setup
|
|
237
|
+
ruby enable_extensions.rb
|
|
238
|
+
|
|
239
|
+
# Check extension status manually
|
|
240
|
+
psql $HTM_DBURL -c "SELECT extname, extversion FROM pg_extension ORDER BY extname"
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### SSL Issues
|
|
244
|
+
|
|
245
|
+
The TimescaleDB Cloud instance requires SSL. If you see SSL errors:
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
# Ensure sslmode is set in connection URL
|
|
249
|
+
echo $HTM_DBURL | grep sslmode
|
|
250
|
+
# Should show: sslmode=require
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Resources
|
|
254
|
+
|
|
255
|
+
- **Ollama**: https://ollama.ai/
|
|
256
|
+
- **RubyLLM**: https://github.com/madbomber/ruby_llm
|
|
257
|
+
- **TimescaleDB Docs**: https://docs.timescale.com/
|
|
258
|
+
- **pgvector Docs**: https://github.com/pgvector/pgvector
|
|
259
|
+
- **Planning Document**: `htm_teamwork.md`
|
|
260
|
+
- **PostgreSQL Docs**: https://www.postgresql.org/docs/
|
|
261
|
+
|
|
262
|
+
## Support
|
|
263
|
+
|
|
264
|
+
For issues or questions:
|
|
265
|
+
1. Check `htm_teamwork.md` for design decisions
|
|
266
|
+
2. Review examples in `examples/` directory
|
|
267
|
+
3. Run tests with `rake test` (Minitest framework)
|
|
268
|
+
4. Check Ollama status for embedding issues
|
data/config/database.yml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# HTM Database Configuration
|
|
2
|
+
# Uses ERB to read from environment variables
|
|
3
|
+
#
|
|
4
|
+
# Priority:
|
|
5
|
+
# 1. HTM_DBURL - Full connection URL (preferred)
|
|
6
|
+
# 2. Individual HTM_DB* variables - Host, name, user, password, port
|
|
7
|
+
# 3. Defaults for development/test
|
|
8
|
+
#
|
|
9
|
+
# Example HTM_DBURL format:
|
|
10
|
+
# postgresql://user:password@host:port/database?sslmode=require
|
|
11
|
+
|
|
12
|
+
<%
|
|
13
|
+
require 'uri'
|
|
14
|
+
|
|
15
|
+
# Parse connection from HTM_DBURL or use individual variables
|
|
16
|
+
if ENV['HTM_DBURL']
|
|
17
|
+
uri = URI.parse(ENV['HTM_DBURL'])
|
|
18
|
+
params = URI.decode_www_form(uri.query || '').to_h
|
|
19
|
+
|
|
20
|
+
db_config = {
|
|
21
|
+
'host' => uri.host,
|
|
22
|
+
'port' => uri.port || 5432,
|
|
23
|
+
'database' => uri.path[1..-1],
|
|
24
|
+
'username' => uri.user,
|
|
25
|
+
'password' => uri.password,
|
|
26
|
+
'sslmode' => params['sslmode'] || 'prefer'
|
|
27
|
+
}
|
|
28
|
+
else
|
|
29
|
+
env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
|
30
|
+
db_config = {
|
|
31
|
+
'host' => ENV.fetch('HTM_DBHOST', 'localhost'),
|
|
32
|
+
'port' => ENV.fetch('HTM_DBPORT', 5432).to_i,
|
|
33
|
+
'database' => ENV.fetch('HTM_DBNAME', "htm_#{env}"),
|
|
34
|
+
'username' => ENV.fetch('HTM_DBUSER', 'postgres'),
|
|
35
|
+
'password' => ENV.fetch('HTM_DBPASS', ''),
|
|
36
|
+
'sslmode' => ENV.fetch('HTM_SSLMODE', 'prefer')
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
%>
|
|
40
|
+
|
|
41
|
+
default: &default
|
|
42
|
+
adapter: postgresql
|
|
43
|
+
encoding: unicode
|
|
44
|
+
pool: <%= ENV.fetch("HTM_DB_POOL_SIZE", "10").to_i %>
|
|
45
|
+
timeout: 5000
|
|
46
|
+
prepared_statements: false
|
|
47
|
+
advisory_locks: false
|
|
48
|
+
host: <%= db_config['host'] %>
|
|
49
|
+
port: <%= db_config['port'] %>
|
|
50
|
+
username: <%= db_config['username'] %>
|
|
51
|
+
password: <%= db_config['password'] %>
|
|
52
|
+
sslmode: <%= db_config['sslmode'] %>
|
|
53
|
+
|
|
54
|
+
development:
|
|
55
|
+
<<: *default
|
|
56
|
+
database: <%= db_config['database'] %>
|
|
57
|
+
|
|
58
|
+
test:
|
|
59
|
+
<<: *default
|
|
60
|
+
database: <%= db_config['database'] %>_test
|
|
61
|
+
|
|
62
|
+
production:
|
|
63
|
+
<<: *default
|
|
64
|
+
database: <%= db_config['database'] %>
|
|
65
|
+
<% unless ENV['HTM_DBURL'] %>
|
|
66
|
+
# WARNING: Production should use HTM_DBURL with SSL
|
|
67
|
+
<% end %>
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class EnableExtensions < ActiveRecord::Migration[7.1]
|
|
4
|
+
def up
|
|
5
|
+
# Note: On TimescaleDB Cloud, the extension is named 'vector' not 'pgvector'
|
|
6
|
+
enable_extension 'vector'
|
|
7
|
+
enable_extension 'pg_trgm'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def down
|
|
11
|
+
disable_extension 'pg_trgm'
|
|
12
|
+
disable_extension 'vector'
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateRobots < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
unless table_exists?(:robots)
|
|
6
|
+
create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
|
|
7
|
+
t.text :name, comment: 'Human-readable name for the robot'
|
|
8
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
|
|
9
|
+
t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
|
|
10
|
+
t.jsonb :metadata, comment: 'Robot-specific configuration and metadata'
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateNodes < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
unless table_exists?(:nodes)
|
|
6
|
+
create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
|
|
7
|
+
t.text :content, null: false, comment: 'The conversation message/utterance content'
|
|
8
|
+
t.text :source, default: '', comment: 'From where the content came (empty string if unknown)'
|
|
9
|
+
t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
|
|
10
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
|
|
11
|
+
t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
|
|
12
|
+
t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
|
|
13
|
+
t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
|
|
14
|
+
t.boolean :in_working_memory, default: false, comment: 'Whether this memory is currently in working memory'
|
|
15
|
+
t.bigint :robot_id, null: false, comment: 'ID of the robot that owns this memory'
|
|
16
|
+
t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
|
|
17
|
+
t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Basic indexes for common queries
|
|
21
|
+
add_index :nodes, :created_at, name: 'idx_nodes_created_at'
|
|
22
|
+
add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
|
|
23
|
+
add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
|
|
24
|
+
add_index :nodes, :access_count, name: 'idx_nodes_access_count'
|
|
25
|
+
add_index :nodes, :robot_id, name: 'idx_nodes_robot_id'
|
|
26
|
+
add_index :nodes, :source, name: 'idx_nodes_source'
|
|
27
|
+
add_index :nodes, :in_working_memory, name: 'idx_nodes_in_working_memory'
|
|
28
|
+
|
|
29
|
+
# Add check constraint for embedding dimensions
|
|
30
|
+
# Only validates when embedding_dimension is provided (allows NULL for nodes without embeddings)
|
|
31
|
+
execute <<-SQL
|
|
32
|
+
ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
|
|
33
|
+
CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
|
|
34
|
+
SQL
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Foreign key to robots table (outside table_exists check so it gets added even if table already exists)
|
|
38
|
+
unless foreign_key_exists?(:nodes, :robots, column: :robot_id)
|
|
39
|
+
add_foreign_key :nodes, :robots, column: :robot_id, primary_key: :id, on_delete: :cascade
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateTags < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
# Create tags table with unique tag names
|
|
6
|
+
unless table_exists?(:tags)
|
|
7
|
+
create_table :tags, comment: 'Unique tag names for categorization' do |t|
|
|
8
|
+
t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
|
|
9
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
|
|
13
|
+
add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Create join table for many-to-many relationship
|
|
17
|
+
unless table_exists?(:node_tags)
|
|
18
|
+
create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
|
|
19
|
+
t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
|
|
20
|
+
t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
|
|
21
|
+
t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
|
|
25
|
+
add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
|
|
26
|
+
add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Add foreign keys (outside table_exists check so they get added even if table already exists)
|
|
30
|
+
unless foreign_key_exists?(:node_tags, :nodes, column: :node_id)
|
|
31
|
+
add_foreign_key :node_tags, :nodes, column: :node_id, primary_key: :id, on_delete: :cascade
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
unless foreign_key_exists?(:node_tags, :tags, column: :tag_id)
|
|
35
|
+
add_foreign_key :node_tags, :tags, column: :tag_id, primary_key: :id, on_delete: :cascade
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddNodeVectorIndexes < ActiveRecord::Migration[7.1]
|
|
4
|
+
def up
|
|
5
|
+
# Vector similarity search index (HNSW for better performance)
|
|
6
|
+
execute <<-SQL
|
|
7
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_embedding ON nodes
|
|
8
|
+
USING hnsw (embedding vector_cosine_ops)
|
|
9
|
+
WITH (m = 16, ef_construction = 64)
|
|
10
|
+
SQL
|
|
11
|
+
|
|
12
|
+
# Full-text search on conversation content
|
|
13
|
+
execute <<-SQL
|
|
14
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_content_gin ON nodes
|
|
15
|
+
USING gin(to_tsvector('english', content))
|
|
16
|
+
SQL
|
|
17
|
+
|
|
18
|
+
# Trigram indexes for fuzzy matching on conversation content
|
|
19
|
+
execute <<-SQL
|
|
20
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_content_trgm ON nodes
|
|
21
|
+
USING gin(content gin_trgm_ops)
|
|
22
|
+
SQL
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def down
|
|
26
|
+
remove_index :nodes, name: 'idx_nodes_embedding'
|
|
27
|
+
remove_index :nodes, name: 'idx_nodes_content_gin'
|
|
28
|
+
remove_index :nodes, name: 'idx_nodes_content_trgm'
|
|
29
|
+
end
|
|
30
|
+
end
|