htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
data/lib/htm/database.rb
ADDED
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'pg'
|
|
4
|
+
require 'uri'
|
|
5
|
+
require 'set'
|
|
6
|
+
|
|
7
|
+
class HTM
|
|
8
|
+
# Database setup and configuration for HTM
|
|
9
|
+
# Handles schema creation and database initialization
|
|
10
|
+
class Database
|
|
11
|
+
class << self
|
|
12
|
+
# Set up the HTM database schema
|
|
13
|
+
#
|
|
14
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
15
|
+
# @param run_migrations [Boolean] Whether to run migrations (default: true)
|
|
16
|
+
# @param dump_schema [Boolean] Whether to dump schema to db/schema.sql after setup (default: false)
|
|
17
|
+
# @return [void]
|
|
18
|
+
#
|
|
19
|
+
def setup(db_url = nil, run_migrations: true, dump_schema: false)
|
|
20
|
+
require 'active_record'
|
|
21
|
+
require_relative 'active_record_config'
|
|
22
|
+
|
|
23
|
+
# Establish ActiveRecord connection
|
|
24
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
25
|
+
|
|
26
|
+
# Run migrations using ActiveRecord
|
|
27
|
+
if run_migrations
|
|
28
|
+
puts "Running ActiveRecord migrations..."
|
|
29
|
+
run_activerecord_migrations
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
puts "✓ HTM database schema created successfully"
|
|
33
|
+
|
|
34
|
+
# Optionally dump schema
|
|
35
|
+
if dump_schema
|
|
36
|
+
puts ""
|
|
37
|
+
self.dump_schema(db_url)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Run pending database migrations
|
|
42
|
+
#
|
|
43
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
44
|
+
# @return [void]
|
|
45
|
+
#
|
|
46
|
+
def migrate(db_url = nil)
|
|
47
|
+
require 'active_record'
|
|
48
|
+
require_relative 'active_record_config'
|
|
49
|
+
|
|
50
|
+
# Establish ActiveRecord connection
|
|
51
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
52
|
+
|
|
53
|
+
run_activerecord_migrations
|
|
54
|
+
|
|
55
|
+
puts "✓ Database migrations completed"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Show migration status
|
|
59
|
+
#
|
|
60
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
61
|
+
# @return [void]
|
|
62
|
+
#
|
|
63
|
+
def migration_status(db_url = nil)
|
|
64
|
+
require 'active_record'
|
|
65
|
+
require_relative 'active_record_config'
|
|
66
|
+
|
|
67
|
+
# Establish ActiveRecord connection
|
|
68
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
69
|
+
|
|
70
|
+
migrations_path = File.expand_path('../../db/migrate', __dir__)
|
|
71
|
+
|
|
72
|
+
# Get available migrations from files
|
|
73
|
+
available_migrations = Dir.glob(File.join(migrations_path, '*.rb')).map do |file|
|
|
74
|
+
{
|
|
75
|
+
version: File.basename(file).split('_').first,
|
|
76
|
+
name: File.basename(file, '.rb')
|
|
77
|
+
}
|
|
78
|
+
end.sort_by { |m| m[:version] }
|
|
79
|
+
|
|
80
|
+
# Get applied migrations from database
|
|
81
|
+
applied_versions = begin
|
|
82
|
+
ActiveRecord::Base.connection.select_values('SELECT version FROM schema_migrations ORDER BY version')
|
|
83
|
+
rescue ActiveRecord::StatementInvalid
|
|
84
|
+
[]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
puts "\nMigration Status"
|
|
88
|
+
puts "=" * 100
|
|
89
|
+
|
|
90
|
+
if available_migrations.empty?
|
|
91
|
+
puts "No migration files found in db/migrate/"
|
|
92
|
+
else
|
|
93
|
+
available_migrations.each do |migration|
|
|
94
|
+
status = applied_versions.include?(migration[:version])
|
|
95
|
+
status_mark = status ? "✓" : "✗"
|
|
96
|
+
|
|
97
|
+
puts "#{status_mark} #{migration[:name]}"
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
applied_count = applied_versions.length
|
|
102
|
+
pending_count = available_migrations.length - applied_count
|
|
103
|
+
|
|
104
|
+
puts "\nSummary: #{applied_count} applied, #{pending_count} pending"
|
|
105
|
+
puts "=" * 100
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Drop all HTM tables
|
|
109
|
+
#
|
|
110
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
111
|
+
# @return [void]
|
|
112
|
+
#
|
|
113
|
+
def drop(db_url = nil)
|
|
114
|
+
config = parse_connection_url(db_url || ENV['HTM_DBURL'])
|
|
115
|
+
raise "Database configuration not found" unless config
|
|
116
|
+
|
|
117
|
+
conn = PG.connect(config)
|
|
118
|
+
|
|
119
|
+
tables = ['nodes', 'node_tags', 'tags', 'robots', 'operations_log', 'schema_migrations']
|
|
120
|
+
|
|
121
|
+
puts "Dropping HTM tables..."
|
|
122
|
+
tables.each do |table|
|
|
123
|
+
begin
|
|
124
|
+
conn.exec("DROP TABLE IF EXISTS #{table} CASCADE")
|
|
125
|
+
puts " ✓ Dropped #{table}"
|
|
126
|
+
rescue PG::Error => e
|
|
127
|
+
puts " ✗ Error dropping #{table}: #{e.message}"
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Drop functions and triggers
|
|
132
|
+
begin
|
|
133
|
+
conn.exec("DROP FUNCTION IF EXISTS extract_ontology_topics() CASCADE")
|
|
134
|
+
puts " ✓ Dropped ontology functions and triggers"
|
|
135
|
+
rescue PG::Error => e
|
|
136
|
+
puts " ✗ Error dropping functions: #{e.message}"
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Drop views
|
|
140
|
+
begin
|
|
141
|
+
conn.exec("DROP VIEW IF EXISTS ontology_structure CASCADE")
|
|
142
|
+
conn.exec("DROP VIEW IF EXISTS topic_relationships CASCADE")
|
|
143
|
+
puts " ✓ Dropped ontology views"
|
|
144
|
+
rescue PG::Error => e
|
|
145
|
+
puts " ✗ Error dropping views: #{e.message}"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
conn.close
|
|
149
|
+
puts "✓ All HTM tables dropped"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Seed database with sample data
|
|
153
|
+
#
|
|
154
|
+
# Loads and executes db/seeds.rb file following Rails conventions.
|
|
155
|
+
# All seeding logic is contained in db/seeds.rb and reads data
|
|
156
|
+
# from markdown files in db/seed_data/ directory.
|
|
157
|
+
#
|
|
158
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
159
|
+
# @return [void]
|
|
160
|
+
#
|
|
161
|
+
def seed(db_url = nil)
|
|
162
|
+
seeds_file = File.expand_path('../../db/seeds.rb', __dir__)
|
|
163
|
+
|
|
164
|
+
unless File.exist?(seeds_file)
|
|
165
|
+
puts "✗ Error: Seeds file not found at #{seeds_file}"
|
|
166
|
+
puts " Please create db/seeds.rb with your seeding logic"
|
|
167
|
+
exit 1
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Load and execute seeds.rb
|
|
171
|
+
load seeds_file
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Dump current database schema to db/schema.sql
|
|
175
|
+
#
|
|
176
|
+
# Uses pg_dump to create a clean SQL schema file without data
|
|
177
|
+
#
|
|
178
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
179
|
+
# @return [void]
|
|
180
|
+
#
|
|
181
|
+
def dump_schema(db_url = nil)
|
|
182
|
+
config = parse_connection_url(db_url || ENV['HTM_DBURL'])
|
|
183
|
+
raise "Database configuration not found" unless config
|
|
184
|
+
|
|
185
|
+
schema_file = File.expand_path('../../db/schema.sql', __dir__)
|
|
186
|
+
|
|
187
|
+
puts "Dumping schema to #{schema_file}..."
|
|
188
|
+
|
|
189
|
+
# Build pg_dump command
|
|
190
|
+
# --schema-only: only dump schema, not data
|
|
191
|
+
# --no-owner: don't set ownership
|
|
192
|
+
# --no-privileges: don't dump access privileges
|
|
193
|
+
# --no-tablespaces: don't dump tablespace assignments
|
|
194
|
+
# --exclude-schema=_timescaledb_*: exclude TimescaleDB internal schemas
|
|
195
|
+
env = {
|
|
196
|
+
'PGPASSWORD' => config[:password]
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
cmd = [
|
|
200
|
+
'pg_dump',
|
|
201
|
+
'--schema-only',
|
|
202
|
+
'--no-owner',
|
|
203
|
+
'--no-privileges',
|
|
204
|
+
'--no-tablespaces',
|
|
205
|
+
'--exclude-schema=_timescaledb_*',
|
|
206
|
+
'--exclude-schema=information_schema',
|
|
207
|
+
'--exclude-schema=pg_catalog',
|
|
208
|
+
'-h', config[:host],
|
|
209
|
+
'-p', config[:port].to_s,
|
|
210
|
+
'-U', config[:user],
|
|
211
|
+
'-d', config[:dbname]
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
# Execute pg_dump and capture output
|
|
215
|
+
require 'open3'
|
|
216
|
+
stdout, stderr, status = Open3.capture3(env, *cmd)
|
|
217
|
+
|
|
218
|
+
unless status.success?
|
|
219
|
+
puts "✗ Error dumping schema:"
|
|
220
|
+
puts stderr
|
|
221
|
+
exit 1
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Clean up the output
|
|
225
|
+
cleaned_schema = clean_schema_dump(stdout)
|
|
226
|
+
|
|
227
|
+
# Write to file
|
|
228
|
+
File.write(schema_file, cleaned_schema)
|
|
229
|
+
|
|
230
|
+
puts "✓ Schema dumped successfully to #{schema_file}"
|
|
231
|
+
puts " Size: #{File.size(schema_file)} bytes"
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Load schema from db/schema.sql
|
|
235
|
+
#
|
|
236
|
+
# Uses psql to load the schema file
|
|
237
|
+
#
|
|
238
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
239
|
+
# @return [void]
|
|
240
|
+
#
|
|
241
|
+
def load_schema(db_url = nil)
|
|
242
|
+
config = parse_connection_url(db_url || ENV['HTM_DBURL'])
|
|
243
|
+
raise "Database configuration not found" unless config
|
|
244
|
+
|
|
245
|
+
schema_file = File.expand_path('../../db/schema.sql', __dir__)
|
|
246
|
+
|
|
247
|
+
unless File.exist?(schema_file)
|
|
248
|
+
puts "✗ Schema file not found: #{schema_file}"
|
|
249
|
+
puts " Run 'rake htm:db:schema:dump' first to create it"
|
|
250
|
+
exit 1
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
puts "Loading schema from #{schema_file}..."
|
|
254
|
+
|
|
255
|
+
# Build psql command
|
|
256
|
+
env = {
|
|
257
|
+
'PGPASSWORD' => config[:password]
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
cmd = [
|
|
261
|
+
'psql',
|
|
262
|
+
'-h', config[:host],
|
|
263
|
+
'-p', config[:port].to_s,
|
|
264
|
+
'-U', config[:user],
|
|
265
|
+
'-d', config[:dbname],
|
|
266
|
+
'-f', schema_file,
|
|
267
|
+
'--quiet'
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
# Execute psql
|
|
271
|
+
require 'open3'
|
|
272
|
+
stdout, stderr, status = Open3.capture3(env, *cmd)
|
|
273
|
+
|
|
274
|
+
unless status.success?
|
|
275
|
+
puts "✗ Error loading schema:"
|
|
276
|
+
puts stderr
|
|
277
|
+
exit 1
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
puts "✓ Schema loaded successfully"
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Generate database documentation using tbls
|
|
284
|
+
#
|
|
285
|
+
# Creates comprehensive database documentation in dbdoc/ directory including:
|
|
286
|
+
# - Entity-relationship diagrams
|
|
287
|
+
# - Table schemas with comments
|
|
288
|
+
# - Index information
|
|
289
|
+
# - Relationship diagrams
|
|
290
|
+
#
|
|
291
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
292
|
+
# @return [void]
|
|
293
|
+
#
|
|
294
|
+
def generate_docs(db_url = nil)
|
|
295
|
+
config = parse_connection_url(db_url || ENV['HTM_DBURL'])
|
|
296
|
+
raise "Database configuration not found" unless config
|
|
297
|
+
|
|
298
|
+
dbdoc_dir = File.expand_path('../../dbdoc', __dir__)
|
|
299
|
+
|
|
300
|
+
puts "Generating database documentation in #{dbdoc_dir}..."
|
|
301
|
+
|
|
302
|
+
# Create dbdoc directory if it doesn't exist
|
|
303
|
+
Dir.mkdir(dbdoc_dir) unless Dir.exist?(dbdoc_dir)
|
|
304
|
+
|
|
305
|
+
# Build PostgreSQL connection string for tbls
|
|
306
|
+
pg_url = if config[:password]
|
|
307
|
+
"postgresql://#{config[:user]}:#{config[:password]}@#{config[:host]}:#{config[:port]}/#{config[:dbname]}?sslmode=#{config[:sslmode] || 'prefer'}"
|
|
308
|
+
else
|
|
309
|
+
"postgresql://#{config[:user]}@#{config[:host]}:#{config[:port]}/#{config[:dbname]}?sslmode=#{config[:sslmode] || 'prefer'}"
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Check if tbls is installed
|
|
313
|
+
unless system('which tbls > /dev/null 2>&1')
|
|
314
|
+
puts "✗ Error: 'tbls' is not installed"
|
|
315
|
+
puts ""
|
|
316
|
+
puts "Install tbls:"
|
|
317
|
+
puts " brew install k1LoW/tap/tbls"
|
|
318
|
+
puts " # or"
|
|
319
|
+
puts " go install github.com/k1LoW/tbls@latest"
|
|
320
|
+
puts ""
|
|
321
|
+
puts "See: https://github.com/k1LoW/tbls"
|
|
322
|
+
exit 1
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Run tbls doc command with --force to allow updates
|
|
326
|
+
require 'open3'
|
|
327
|
+
cmd = ['tbls', 'doc', '--force', pg_url, dbdoc_dir]
|
|
328
|
+
|
|
329
|
+
stdout, stderr, status = Open3.capture3(*cmd)
|
|
330
|
+
|
|
331
|
+
unless status.success?
|
|
332
|
+
puts "✗ Error generating documentation:"
|
|
333
|
+
puts stderr
|
|
334
|
+
puts stdout
|
|
335
|
+
exit 1
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
puts stdout if stdout && !stdout.empty?
|
|
339
|
+
puts "✓ Database documentation generated successfully"
|
|
340
|
+
puts ""
|
|
341
|
+
puts "Documentation files:"
|
|
342
|
+
puts " #{dbdoc_dir}/README.md - Main documentation"
|
|
343
|
+
puts " #{dbdoc_dir}/schema.svg - ER diagram (if generated)"
|
|
344
|
+
puts " #{dbdoc_dir}/*.md - Individual table documentation"
|
|
345
|
+
puts ""
|
|
346
|
+
puts "View documentation:"
|
|
347
|
+
puts " open #{dbdoc_dir}/README.md"
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Show database info
|
|
351
|
+
#
|
|
352
|
+
# @param db_url [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
|
|
353
|
+
# @return [void]
|
|
354
|
+
#
|
|
355
|
+
def info(db_url = nil)
|
|
356
|
+
config = parse_connection_url(db_url || ENV['HTM_DBURL'])
|
|
357
|
+
raise "Database configuration not found" unless config
|
|
358
|
+
|
|
359
|
+
conn = PG.connect(config)
|
|
360
|
+
|
|
361
|
+
puts "\nHTM Database Information"
|
|
362
|
+
puts "=" * 80
|
|
363
|
+
|
|
364
|
+
# Connection info
|
|
365
|
+
puts "\nConnection:"
|
|
366
|
+
puts " Host: #{config[:host]}"
|
|
367
|
+
puts " Port: #{config[:port]}"
|
|
368
|
+
puts " Database: #{config[:dbname]}"
|
|
369
|
+
puts " User: #{config[:user]}"
|
|
370
|
+
|
|
371
|
+
# PostgreSQL version
|
|
372
|
+
version = conn.exec("SELECT version()").first['version']
|
|
373
|
+
puts "\nPostgreSQL Version:"
|
|
374
|
+
puts " #{version.split(',').first}"
|
|
375
|
+
|
|
376
|
+
# Extensions
|
|
377
|
+
puts "\nExtensions:"
|
|
378
|
+
extensions = conn.exec("SELECT extname, extversion FROM pg_extension ORDER BY extname").to_a
|
|
379
|
+
extensions.each do |ext|
|
|
380
|
+
puts " #{ext['extname']} (#{ext['extversion']})"
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# Table info
|
|
384
|
+
puts "\nHTM Tables:"
|
|
385
|
+
tables = ['nodes', 'tags', 'robots', 'operations_log', 'schema_migrations']
|
|
386
|
+
tables.each do |table|
|
|
387
|
+
begin
|
|
388
|
+
count = conn.exec("SELECT COUNT(*) FROM #{table}").first['count']
|
|
389
|
+
puts " #{table}: #{count} rows"
|
|
390
|
+
rescue PG::UndefinedTable
|
|
391
|
+
puts " #{table}: not created"
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Database size
|
|
396
|
+
db_size = conn.exec(
|
|
397
|
+
"SELECT pg_size_pretty(pg_database_size($1)) AS size",
|
|
398
|
+
[config[:dbname]]
|
|
399
|
+
).first['size']
|
|
400
|
+
puts "\nDatabase Size: #{db_size}"
|
|
401
|
+
|
|
402
|
+
conn.close
|
|
403
|
+
puts "=" * 80
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Parse database connection URL
|
|
407
|
+
#
|
|
408
|
+
# @param url [String] Connection URL
|
|
409
|
+
# @return [Hash, nil] Connection configuration hash
|
|
410
|
+
#
|
|
411
|
+
def parse_connection_url(url)
|
|
412
|
+
return nil unless url
|
|
413
|
+
|
|
414
|
+
uri = URI.parse(url)
|
|
415
|
+
params = URI.decode_www_form(uri.query || '').to_h
|
|
416
|
+
|
|
417
|
+
{
|
|
418
|
+
host: uri.host,
|
|
419
|
+
port: uri.port,
|
|
420
|
+
dbname: uri.path[1..-1], # Remove leading /
|
|
421
|
+
user: uri.user,
|
|
422
|
+
password: uri.password,
|
|
423
|
+
sslmode: params['sslmode'] || 'prefer'
|
|
424
|
+
}
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
# Build config from individual environment variables
|
|
428
|
+
#
|
|
429
|
+
# @return [Hash, nil] Connection configuration hash
|
|
430
|
+
#
|
|
431
|
+
def parse_connection_params
|
|
432
|
+
return nil unless ENV['HTM_DBNAME']
|
|
433
|
+
|
|
434
|
+
{
|
|
435
|
+
host: ENV['HTM_DBHOST'] || 'cw7rxj91bm.srbbwwxn56.tsdb.cloud.timescale.com',
|
|
436
|
+
port: (ENV['HTM_DBPORT'] || 37807).to_i,
|
|
437
|
+
dbname: ENV['HTM_DBNAME'],
|
|
438
|
+
user: ENV['HTM_DBUSER'],
|
|
439
|
+
password: ENV['HTM_DBPASS'],
|
|
440
|
+
sslmode: 'require'
|
|
441
|
+
}
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
# Get default database configuration
|
|
445
|
+
#
|
|
446
|
+
# @return [Hash, nil] Connection configuration hash
|
|
447
|
+
#
|
|
448
|
+
def default_config
|
|
449
|
+
# Prefer HTM_DBURL if available
|
|
450
|
+
if ENV['HTM_DBURL']
|
|
451
|
+
parse_connection_url(ENV['HTM_DBURL'])
|
|
452
|
+
elsif ENV['HTM_DBNAME']
|
|
453
|
+
parse_connection_params
|
|
454
|
+
else
|
|
455
|
+
nil
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
private
|
|
460
|
+
|
|
461
|
+
def verify_extensions(conn)
|
|
462
|
+
# Check pgvector
|
|
463
|
+
pgvector = conn.exec("SELECT extversion FROM pg_extension WHERE extname='vector'").first
|
|
464
|
+
if pgvector
|
|
465
|
+
puts "✓ pgvector version: #{pgvector['extversion']}"
|
|
466
|
+
else
|
|
467
|
+
puts "⚠ Warning: pgvector extension not found"
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
# Check pg_trgm
|
|
471
|
+
pg_trgm = conn.exec("SELECT extversion FROM pg_extension WHERE extname='pg_trgm'").first
|
|
472
|
+
if pg_trgm
|
|
473
|
+
puts "✓ pg_trgm version: #{pg_trgm['extversion']}"
|
|
474
|
+
else
|
|
475
|
+
puts "⚠ Warning: pg_trgm extension not found"
|
|
476
|
+
end
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# Run ActiveRecord migrations from db/migrate/
|
|
480
|
+
#
|
|
481
|
+
# @return [void]
|
|
482
|
+
#
|
|
483
|
+
def run_activerecord_migrations
|
|
484
|
+
migrations_path = File.expand_path('../../db/migrate', __dir__)
|
|
485
|
+
|
|
486
|
+
unless Dir.exist?(migrations_path)
|
|
487
|
+
puts "⚠ No migrations directory found at #{migrations_path}"
|
|
488
|
+
return
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
conn = ActiveRecord::Base.connection
|
|
492
|
+
|
|
493
|
+
# Create schema_migrations table if it doesn't exist
|
|
494
|
+
unless conn.table_exists?('schema_migrations')
|
|
495
|
+
conn.create_table(:schema_migrations, id: false) do |t|
|
|
496
|
+
t.string :version, null: false, primary_key: true
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
# Get list of migration files
|
|
501
|
+
migration_files = Dir.glob("#{migrations_path}/*.rb").sort
|
|
502
|
+
puts "Found #{migration_files.length} migration files"
|
|
503
|
+
|
|
504
|
+
# Run each migration
|
|
505
|
+
migration_files.each do |file|
|
|
506
|
+
version = File.basename(file).split('_').first
|
|
507
|
+
name = File.basename(file, '.rb')
|
|
508
|
+
|
|
509
|
+
# Check if already run
|
|
510
|
+
already_run = conn.select_value(
|
|
511
|
+
"SELECT COUNT(*) FROM schema_migrations WHERE version = '#{version}'"
|
|
512
|
+
).to_i > 0
|
|
513
|
+
|
|
514
|
+
if already_run
|
|
515
|
+
puts " ✓ #{name} (already migrated)"
|
|
516
|
+
else
|
|
517
|
+
puts " → Running #{name}..."
|
|
518
|
+
require file
|
|
519
|
+
|
|
520
|
+
# Get the migration class
|
|
521
|
+
class_name = name.split('_')[1..].map(&:capitalize).join
|
|
522
|
+
migration_class = Object.const_get(class_name)
|
|
523
|
+
|
|
524
|
+
# Run the migration
|
|
525
|
+
migration = migration_class.new
|
|
526
|
+
migration.migrate(:up)
|
|
527
|
+
|
|
528
|
+
# Record in schema_migrations
|
|
529
|
+
conn.execute(
|
|
530
|
+
"INSERT INTO schema_migrations (version) VALUES ('#{version}')"
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
puts " ✓ Completed"
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
puts "✓ All migrations completed"
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
# Clean up pg_dump output to make it more readable
|
|
541
|
+
#
|
|
542
|
+
# @param schema_dump [String] Raw pg_dump output
|
|
543
|
+
# @return [String] Cleaned schema
|
|
544
|
+
#
|
|
545
|
+
def clean_schema_dump(schema_dump)
|
|
546
|
+
lines = schema_dump.split("\n")
|
|
547
|
+
cleaned = []
|
|
548
|
+
|
|
549
|
+
# Add header
|
|
550
|
+
cleaned << "-- HTM Database Schema"
|
|
551
|
+
cleaned << "-- Auto-generated from database using pg_dump"
|
|
552
|
+
cleaned << "-- DO NOT EDIT THIS FILE MANUALLY"
|
|
553
|
+
cleaned << "-- Run 'rake htm:db:schema:dump' to regenerate"
|
|
554
|
+
cleaned << ""
|
|
555
|
+
|
|
556
|
+
# Skip pg_dump header comments
|
|
557
|
+
skip_until_content = true
|
|
558
|
+
|
|
559
|
+
lines.each do |line|
|
|
560
|
+
# Skip header comments
|
|
561
|
+
if skip_until_content
|
|
562
|
+
if line =~ /^(SET|CREATE|ALTER|--\s*Name:|COMMENT)/
|
|
563
|
+
skip_until_content = false
|
|
564
|
+
else
|
|
565
|
+
next
|
|
566
|
+
end
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
# Skip SET commands (session-specific settings)
|
|
570
|
+
next if line =~ /^SET /
|
|
571
|
+
|
|
572
|
+
# Skip SELECT pg_catalog.set_config
|
|
573
|
+
next if line =~ /^SELECT pg_catalog\.set_config/
|
|
574
|
+
|
|
575
|
+
# Skip extension comments (we keep extension creation)
|
|
576
|
+
next if line =~ /^COMMENT ON EXTENSION/
|
|
577
|
+
|
|
578
|
+
# Keep everything else
|
|
579
|
+
cleaned << line
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
# Remove multiple blank lines
|
|
583
|
+
result = cleaned.join("\n")
|
|
584
|
+
result.gsub!(/\n{3,}/, "\n\n")
|
|
585
|
+
|
|
586
|
+
result
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
# Old methods removed - now using ActiveRecord migrations
|
|
590
|
+
# def run_schema(conn) - REMOVED
|
|
591
|
+
# def run_migrations_if_needed(conn) - REMOVED (see run_activerecord_migrations above)
|
|
592
|
+
end
|
|
593
|
+
end
|
|
594
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Embedding Service - Processes and validates vector embeddings
|
|
7
|
+
#
|
|
8
|
+
# This service wraps the configured embedding generator and provides:
|
|
9
|
+
# - Response validation
|
|
10
|
+
# - Dimension handling (padding/truncation)
|
|
11
|
+
# - Error handling and logging
|
|
12
|
+
# - Storage formatting
|
|
13
|
+
#
|
|
14
|
+
# The actual LLM call is delegated to HTM.configuration.embedding_generator
|
|
15
|
+
#
|
|
16
|
+
class EmbeddingService
|
|
17
|
+
MAX_DIMENSION = 2000 # Maximum dimension for pgvector HNSW index
|
|
18
|
+
|
|
19
|
+
# Generate embedding with validation and processing
|
|
20
|
+
#
|
|
21
|
+
# @param text [String] Text to embed
|
|
22
|
+
# @return [Hash] Processed embedding with metadata
|
|
23
|
+
# {
|
|
24
|
+
# embedding: Array<Float>, # Original embedding
|
|
25
|
+
# dimension: Integer, # Original dimension
|
|
26
|
+
# storage_embedding: String, # Formatted for database storage
|
|
27
|
+
# storage_dimension: Integer # Padded dimension (2000)
|
|
28
|
+
# }
|
|
29
|
+
#
|
|
30
|
+
def self.generate(text)
|
|
31
|
+
HTM.logger.debug "EmbeddingService: Generating embedding for #{text.length} chars"
|
|
32
|
+
|
|
33
|
+
# Call configured embedding generator
|
|
34
|
+
raw_embedding = HTM.configuration.embedding_generator.call(text)
|
|
35
|
+
|
|
36
|
+
# Validate response
|
|
37
|
+
validate_embedding!(raw_embedding)
|
|
38
|
+
|
|
39
|
+
# Get actual dimension
|
|
40
|
+
actual_dimension = raw_embedding.length
|
|
41
|
+
|
|
42
|
+
# Check dimension limit
|
|
43
|
+
if actual_dimension > MAX_DIMENSION
|
|
44
|
+
HTM.logger.warn "EmbeddingService: Embedding dimension #{actual_dimension} exceeds max #{MAX_DIMENSION}, truncating"
|
|
45
|
+
raw_embedding = raw_embedding[0...MAX_DIMENSION]
|
|
46
|
+
actual_dimension = MAX_DIMENSION
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Pad to 2000 dimensions for consistent storage
|
|
50
|
+
storage_embedding = pad_embedding(raw_embedding)
|
|
51
|
+
|
|
52
|
+
# Format for database storage
|
|
53
|
+
storage_string = format_for_storage(storage_embedding)
|
|
54
|
+
|
|
55
|
+
HTM.logger.debug "EmbeddingService: Generated #{actual_dimension}D embedding (padded to #{MAX_DIMENSION})"
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
embedding: raw_embedding,
|
|
59
|
+
dimension: actual_dimension,
|
|
60
|
+
storage_embedding: storage_string,
|
|
61
|
+
storage_dimension: MAX_DIMENSION
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
rescue HTM::EmbeddingError
|
|
65
|
+
raise
|
|
66
|
+
rescue StandardError => e
|
|
67
|
+
HTM.logger.error "EmbeddingService: Failed to generate embedding: #{e.message}"
|
|
68
|
+
raise HTM::EmbeddingError, "Embedding generation failed: #{e.message}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Validate embedding response format
|
|
72
|
+
#
|
|
73
|
+
# @param embedding [Object] Raw embedding from generator
|
|
74
|
+
# @raise [HTM::EmbeddingError] if invalid
|
|
75
|
+
#
|
|
76
|
+
def self.validate_embedding!(embedding)
|
|
77
|
+
unless embedding.is_a?(Array)
|
|
78
|
+
raise HTM::EmbeddingError, "Embedding must be an Array, got #{embedding.class}"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
if embedding.empty?
|
|
82
|
+
raise HTM::EmbeddingError, "Embedding array is empty"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
unless embedding.all? { |v| v.is_a?(Numeric) }
|
|
86
|
+
raise HTM::EmbeddingError, "Embedding must contain only numeric values"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Check for NaN or Infinity
|
|
90
|
+
if embedding.any? { |v| v.respond_to?(:nan?) && v.nan? || v.respond_to?(:infinite?) && v.infinite? }
|
|
91
|
+
raise HTM::EmbeddingError, "Embedding contains NaN or Infinity values"
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Pad embedding to MAX_DIMENSION with zeros
|
|
96
|
+
#
|
|
97
|
+
# @param embedding [Array<Float>] Original embedding
|
|
98
|
+
# @return [Array<Float>] Padded embedding
|
|
99
|
+
#
|
|
100
|
+
def self.pad_embedding(embedding)
|
|
101
|
+
return embedding if embedding.length >= MAX_DIMENSION
|
|
102
|
+
|
|
103
|
+
embedding + Array.new(MAX_DIMENSION - embedding.length, 0.0)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Format embedding for database storage
|
|
107
|
+
#
|
|
108
|
+
# @param embedding [Array<Float>] Padded embedding
|
|
109
|
+
# @return [String] PostgreSQL array format
|
|
110
|
+
#
|
|
111
|
+
def self.format_for_storage(embedding)
|
|
112
|
+
"[#{embedding.join(',')}]"
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|