htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ t.verbose = true
11
+ end
12
+
13
+ task default: :test
14
+
15
+ # Load HTM database tasks from lib/tasks/htm.rake
16
+ # This uses the same loader that external applications use
17
+ require_relative "lib/htm/tasks"
18
+
19
+ # Legacy tasks for backwards compatibility
20
+ desc "Run database setup (deprecated: use htm:db:setup)"
21
+ task :db_setup => "htm:db:setup"
22
+
23
+ desc "Test database connection (deprecated: use htm:db:test)"
24
+ task :db_test => "htm:db:test"
25
+
26
+ desc "Run example"
27
+ task :example do
28
+ ruby "examples/basic_usage.rb"
29
+ end
30
+
31
+ desc "Show gem stats"
32
+ task :stats do
33
+ puts "\nHTM Gem Statistics:"
34
+ puts "=" * 60
35
+
36
+ # Count lines of code
37
+ lib_files = Dir.glob("lib/**/*.rb")
38
+ lib_lines = lib_files.sum { |f| File.readlines(f).size }
39
+
40
+ test_files = Dir.glob("test/**/*.rb")
41
+ test_lines = test_files.sum { |f| File.readlines(f).size }
42
+
43
+ puts "Library:"
44
+ puts " Files: #{lib_files.size}"
45
+ puts " Lines: #{lib_lines}"
46
+ puts "\nTests:"
47
+ puts " Files: #{test_files.size}"
48
+ puts " Lines: #{test_lines}"
49
+ puts "\nTotal lines: #{lib_lines + test_lines}"
50
+ puts "=" * 60
51
+ end
data/SETUP.md ADDED
@@ -0,0 +1,268 @@
1
+ # HTM Setup Guide
2
+
3
+ ## Prerequisites
4
+
5
+ 1. **Ruby** (version 3.0 or higher)
6
+ 2. **TimescaleDB Cloud Account** (already set up)
7
+ 3. **Database Environment Variables** (already configured)
8
+ 4. **Ollama** (for embeddings via RubyLLM)
9
+
10
+ ## Ollama Setup
11
+
12
+ HTM uses RubyLLM with the Ollama provider for generating embeddings. You need to install and run Ollama locally.
13
+
14
+ ### 1. Install Ollama
15
+
16
+ **macOS:**
17
+ ```bash
18
+ curl https://ollama.ai/install.sh | sh
19
+ ```
20
+
21
+ **Or download from:** https://ollama.ai/download
22
+
23
+ ### 2. Start Ollama Service
24
+
25
+ ```bash
26
+ # Ollama typically starts automatically after installation
27
+ # Verify it's running:
28
+ curl http://localhost:11434/api/version
29
+ ```
30
+
31
+ ### 3. Pull the gpt-oss Model
32
+
33
+ ```bash
34
+ # Pull the default model used by HTM
35
+ ollama pull gpt-oss
36
+
37
+ # Verify the model is available
38
+ ollama list
39
+ ```
40
+
41
+ ### 4. Test Embedding Generation
42
+
43
+ ```bash
44
+ # Test that embeddings work
45
+ ollama run gpt-oss "Hello, world!"
46
+ ```
47
+
48
+ ### Optional: Custom Ollama URL
49
+
50
+ If Ollama is running on a different host/port, set the environment variable:
51
+
52
+ ```bash
53
+ export OLLAMA_URL="http://custom-host:11434"
54
+ ```
55
+
56
+ ## Database Setup
57
+
58
+ ### 1. Load Database Credentials
59
+
60
+ The HTM project uses environment variables to manage database credentials. These are defined in `~/.bashrc__tiger`.
61
+
62
+ ```bash
63
+ # Load the Tiger database environment variables
64
+ source ~/.bashrc__tiger
65
+ ```
66
+
67
+ To make these variables available automatically in new shell sessions, ensure `~/.bashrc__tiger` is sourced in your `~/.bashrc` or `~/.bash_profile`.
68
+
69
+ ### 2. Verify Connection
70
+
71
+ Test the database connection:
72
+
73
+ ```bash
74
+ cd /path/to/HTM
75
+ ruby test_connection.rb
76
+ ```
77
+
78
+ You should see:
79
+ ```
80
+ ✓ Connected successfully!
81
+ ✓ TimescaleDB Extension: Version 2.22.1
82
+ ✓ pgvector Extension: Version 0.8.1
83
+ ✓ pg_trgm Extension: Version 1.6
84
+ ```
85
+
86
+ ### 3. Enable Extensions (One-time)
87
+
88
+ Enable required PostgreSQL extensions (already done, but can be re-run safely):
89
+
90
+ ```bash
91
+ ruby enable_extensions.rb
92
+ ```
93
+
94
+ ## Environment Variables Reference
95
+
96
+ After sourcing `~/.bashrc__tiger`, these variables are available:
97
+
98
+ | Variable | Description | Example Value |
99
+ |----------|-------------|---------------|
100
+ | `HTM_SERVICE_NAME` | Service identifier | `db-67977` |
101
+ | `HTM_DBNAME` | Database name | `tsdb` |
102
+ | `HTM_DBUSER` | Database user | `tsdbadmin` |
103
+ | `HTM_DBPASS` | Database password | `***` |
104
+ | `HTM_DBURL` | Full connection URL (preferred) | `postgres://...` |
105
+ | `HTM_DBPORT` | Database port | `37807` |
106
+
107
+ ## Development Workflow
108
+
109
+ ### Quick Start
110
+
111
+ ```bash
112
+ # 1. Source environment variables (if not in .bashrc)
113
+ source ~/.bashrc__tiger
114
+
115
+ # 2. Install dependencies (when gem is created)
116
+ bundle install
117
+
118
+ # 3. Initialize database schema (when ready)
119
+ ruby -r ./lib/htm -e "HTMDatabase.setup"
120
+
121
+ # 4. Test HTM functionality (when implemented)
122
+ ruby examples/basic_usage.rb
123
+ ```
124
+
125
+ ### Testing
126
+
127
+ HTM uses Minitest for testing:
128
+
129
+ ```bash
130
+ # Run all tests
131
+ rake test
132
+
133
+ # Or run directly with Ruby
134
+ ruby test/htm_test.rb
135
+
136
+ # Run specific test file
137
+ ruby test/embedding_service_test.rb
138
+
139
+ # Run integration tests (requires database)
140
+ ruby test/integration_test.rb
141
+ ```
142
+
143
+ ## Project Structure
144
+
145
+ ```
146
+ HTM/
147
+ ├── lib/
148
+ │ ├── htm.rb # Main HTM class
149
+ │ ├── htm/
150
+ │ │ ├── database.rb # Database setup and schema
151
+ │ │ ├── long_term_memory.rb # PostgreSQL-backed storage
152
+ │ │ ├── working_memory.rb # In-memory active context
153
+ │ │ ├── embedding_service.rb # RubyLLM embedding generation (Ollama/gpt-oss)
154
+ │ │ └── version.rb # Version constant
155
+ ├── sql/
156
+ │ └── schema.sql # Database schema
157
+ ├── test/
158
+ │ ├── test_helper.rb # Minitest configuration
159
+ │ ├── htm_test.rb # Basic HTM tests
160
+ │ ├── embedding_service_test.rb # Embedding tests (RubyLLM/Ollama)
161
+ │ └── integration_test.rb # Full integration tests
162
+ ├── examples/
163
+ │ └── basic_usage.rb # Basic usage example
164
+ ├── test_connection.rb # Verify database connection
165
+ ├── enable_extensions.rb # Enable PostgreSQL extensions
166
+ ├── SETUP.md # This file
167
+ ├── README.md # Project overview
168
+ ├── htm_teamwork.md # Planning and design doc
169
+ ├── Gemfile
170
+ ├── htm.gemspec
171
+ └── Rakefile # Rake tasks
172
+ ```
173
+
174
+ ## Next Steps
175
+
176
+ 1. **Phase 1**: Create basic gem structure
177
+ 2. **Phase 2**: Implement database schema
178
+ 3. **Phase 3**: Implement LongTermMemory class
179
+ 4. **Phase 4**: Implement WorkingMemory class
180
+ 5. **Phase 5**: Implement HTM main class
181
+ 6. **Phase 6**: Add tests
182
+ 7. **Phase 7**: Create examples
183
+
184
+ See `htm_teamwork.md` for detailed roadmap.
185
+
186
+ ## Troubleshooting
187
+
188
+ ### Ollama Issues
189
+
190
+ If you encounter embedding errors:
191
+
192
+ ```bash
193
+ # Verify Ollama is running
194
+ curl http://localhost:11434/api/version
195
+
196
+ # Check if gpt-oss model is available
197
+ ollama list | grep gpt-oss
198
+
199
+ # Test embedding generation
200
+ ollama run gpt-oss "Test embedding"
201
+
202
+ # View Ollama logs
203
+ ollama logs
204
+
205
+ # Restart Ollama service
206
+ # On macOS, Ollama runs as a background service
207
+ # Check Activity Monitor or restart from the menu bar
208
+ ```
209
+
210
+ **Common Ollama Errors:**
211
+
212
+ - **"connection refused"**: Ollama service is not running. Start Ollama from Applications or via CLI.
213
+ - **"model not found"**: Run `ollama pull gpt-oss` to download the model.
214
+ - **Custom URL not working**: Ensure `OLLAMA_URL` environment variable is set correctly.
215
+
216
+ ### Database Connection Issues
217
+
218
+ If you get connection errors:
219
+
220
+ ```bash
221
+ # Verify environment variables are set
222
+ echo $HTM_DBURL
223
+
224
+ # Test connection manually
225
+ psql $HTM_DBURL
226
+
227
+ # Check if ~/.bashrc__tiger is sourced
228
+ grep "bashrc__tiger" ~/.bashrc
229
+ ```
230
+
231
+ ### Extension Issues
232
+
233
+ If extensions aren't available:
234
+
235
+ ```bash
236
+ # Re-run extension setup
237
+ ruby enable_extensions.rb
238
+
239
+ # Check extension status manually
240
+ psql $HTM_DBURL -c "SELECT extname, extversion FROM pg_extension ORDER BY extname"
241
+ ```
242
+
243
+ ### SSL Issues
244
+
245
+ The TimescaleDB Cloud instance requires SSL. If you see SSL errors:
246
+
247
+ ```bash
248
+ # Ensure sslmode is set in connection URL
249
+ echo $HTM_DBURL | grep sslmode
250
+ # Should show: sslmode=require
251
+ ```
252
+
253
+ ## Resources
254
+
255
+ - **Ollama**: https://ollama.ai/
256
+ - **RubyLLM**: https://github.com/madbomber/ruby_llm
257
+ - **TimescaleDB Docs**: https://docs.timescale.com/
258
+ - **pgvector Docs**: https://github.com/pgvector/pgvector
259
+ - **Planning Document**: `htm_teamwork.md`
260
+ - **PostgreSQL Docs**: https://www.postgresql.org/docs/
261
+
262
+ ## Support
263
+
264
+ For issues or questions:
265
+ 1. Check `htm_teamwork.md` for design decisions
266
+ 2. Review examples in `examples/` directory
267
+ 3. Run tests with `rake test` (Minitest framework)
268
+ 4. Check Ollama status for embedding issues
@@ -0,0 +1,67 @@
1
+ # HTM Database Configuration
2
+ # Uses ERB to read from environment variables
3
+ #
4
+ # Priority:
5
+ # 1. HTM_DBURL - Full connection URL (preferred)
6
+ # 2. Individual HTM_DB* variables - Host, name, user, password, port
7
+ # 3. Defaults for development/test
8
+ #
9
+ # Example HTM_DBURL format:
10
+ # postgresql://user:password@host:port/database?sslmode=require
11
+
12
+ <%
13
+ require 'uri'
14
+
15
+ # Parse connection from HTM_DBURL or use individual variables
16
+ if ENV['HTM_DBURL']
17
+ uri = URI.parse(ENV['HTM_DBURL'])
18
+ params = URI.decode_www_form(uri.query || '').to_h
19
+
20
+ db_config = {
21
+ 'host' => uri.host,
22
+ 'port' => uri.port || 5432,
23
+ 'database' => uri.path[1..-1],
24
+ 'username' => uri.user,
25
+ 'password' => uri.password,
26
+ 'sslmode' => params['sslmode'] || 'prefer'
27
+ }
28
+ else
29
+ env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
30
+ db_config = {
31
+ 'host' => ENV.fetch('HTM_DBHOST', 'localhost'),
32
+ 'port' => ENV.fetch('HTM_DBPORT', 5432).to_i,
33
+ 'database' => ENV.fetch('HTM_DBNAME', "htm_#{env}"),
34
+ 'username' => ENV.fetch('HTM_DBUSER', 'postgres'),
35
+ 'password' => ENV.fetch('HTM_DBPASS', ''),
36
+ 'sslmode' => ENV.fetch('HTM_SSLMODE', 'prefer')
37
+ }
38
+ end
39
+ %>
40
+
41
+ default: &default
42
+ adapter: postgresql
43
+ encoding: unicode
44
+ pool: <%= ENV.fetch("HTM_DB_POOL_SIZE", "10").to_i %>
45
+ timeout: 5000
46
+ prepared_statements: false
47
+ advisory_locks: false
48
+ host: <%= db_config['host'] %>
49
+ port: <%= db_config['port'] %>
50
+ username: <%= db_config['username'] %>
51
+ password: <%= db_config['password'] %>
52
+ sslmode: <%= db_config['sslmode'] %>
53
+
54
+ development:
55
+ <<: *default
56
+ database: <%= db_config['database'] %>
57
+
58
+ test:
59
+ <<: *default
60
+ database: <%= db_config['database'] %>_test
61
+
62
+ production:
63
+ <<: *default
64
+ database: <%= db_config['database'] %>
65
+ <% unless ENV['HTM_DBURL'] %>
66
+ # WARNING: Production should use HTM_DBURL with SSL
67
+ <% end %>
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ class EnableExtensions < ActiveRecord::Migration[7.1]
4
+ def up
5
+ # Note: On TimescaleDB Cloud, the extension is named 'vector' not 'pgvector'
6
+ enable_extension 'vector'
7
+ enable_extension 'pg_trgm'
8
+ end
9
+
10
+ def down
11
+ disable_extension 'pg_trgm'
12
+ disable_extension 'vector'
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRobots < ActiveRecord::Migration[7.1]
4
+ def change
5
+ unless table_exists?(:robots)
6
+ create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
7
+ t.text :name, comment: 'Human-readable name for the robot'
8
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
9
+ t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
10
+ t.jsonb :metadata, comment: 'Robot-specific configuration and metadata'
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateNodes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ unless table_exists?(:nodes)
6
+ create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
7
+ t.text :content, null: false, comment: 'The conversation message/utterance content'
8
+ t.text :source, default: '', comment: 'From where the content came (empty string if unknown)'
9
+ t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
10
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
11
+ t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
12
+ t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
13
+ t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
14
+ t.boolean :in_working_memory, default: false, comment: 'Whether this memory is currently in working memory'
15
+ t.bigint :robot_id, null: false, comment: 'ID of the robot that owns this memory'
16
+ t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
17
+ t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
18
+ end
19
+
20
+ # Basic indexes for common queries
21
+ add_index :nodes, :created_at, name: 'idx_nodes_created_at'
22
+ add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
23
+ add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
24
+ add_index :nodes, :access_count, name: 'idx_nodes_access_count'
25
+ add_index :nodes, :robot_id, name: 'idx_nodes_robot_id'
26
+ add_index :nodes, :source, name: 'idx_nodes_source'
27
+ add_index :nodes, :in_working_memory, name: 'idx_nodes_in_working_memory'
28
+
29
+ # Add check constraint for embedding dimensions
30
+ # Only validates when embedding_dimension is provided (allows NULL for nodes without embeddings)
31
+ execute <<-SQL
32
+ ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
33
+ CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
34
+ SQL
35
+ end
36
+
37
+ # Foreign key to robots table (outside table_exists check so it gets added even if table already exists)
38
+ unless foreign_key_exists?(:nodes, :robots, column: :robot_id)
39
+ add_foreign_key :nodes, :robots, column: :robot_id, primary_key: :id, on_delete: :cascade
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateTags < ActiveRecord::Migration[7.1]
4
+ def change
5
+ # Create tags table with unique tag names
6
+ unless table_exists?(:tags)
7
+ create_table :tags, comment: 'Unique tag names for categorization' do |t|
8
+ t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
9
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
10
+ end
11
+
12
+ add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
13
+ add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
14
+ end
15
+
16
+ # Create join table for many-to-many relationship
17
+ unless table_exists?(:node_tags)
18
+ create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
19
+ t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
20
+ t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
21
+ t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
22
+ end
23
+
24
+ add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
25
+ add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
26
+ add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
27
+ end
28
+
29
+ # Add foreign keys (outside table_exists check so they get added even if table already exists)
30
+ unless foreign_key_exists?(:node_tags, :nodes, column: :node_id)
31
+ add_foreign_key :node_tags, :nodes, column: :node_id, primary_key: :id, on_delete: :cascade
32
+ end
33
+
34
+ unless foreign_key_exists?(:node_tags, :tags, column: :tag_id)
35
+ add_foreign_key :node_tags, :tags, column: :tag_id, primary_key: :id, on_delete: :cascade
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddNodeVectorIndexes < ActiveRecord::Migration[7.1]
4
+ def up
5
+ # Vector similarity search index (HNSW for better performance)
6
+ execute <<-SQL
7
+ CREATE INDEX IF NOT EXISTS idx_nodes_embedding ON nodes
8
+ USING hnsw (embedding vector_cosine_ops)
9
+ WITH (m = 16, ef_construction = 64)
10
+ SQL
11
+
12
+ # Full-text search on conversation content
13
+ execute <<-SQL
14
+ CREATE INDEX IF NOT EXISTS idx_nodes_content_gin ON nodes
15
+ USING gin(to_tsvector('english', content))
16
+ SQL
17
+
18
+ # Trigram indexes for fuzzy matching on conversation content
19
+ execute <<-SQL
20
+ CREATE INDEX IF NOT EXISTS idx_nodes_content_trgm ON nodes
21
+ USING gin(content gin_trgm_ops)
22
+ SQL
23
+ end
24
+
25
+ def down
26
+ remove_index :nodes, name: 'idx_nodes_embedding'
27
+ remove_index :nodes, name: 'idx_nodes_content_gin'
28
+ remove_index :nodes, name: 'idx_nodes_content_trgm'
29
+ end
30
+ end