htm 0.0.20 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -0
  3. data/Rakefile +104 -18
  4. data/db/migrate/00001_enable_extensions.rb +9 -5
  5. data/db/migrate/00002_create_robots.rb +18 -6
  6. data/db/migrate/00003_create_file_sources.rb +30 -17
  7. data/db/migrate/00004_create_nodes.rb +60 -48
  8. data/db/migrate/00005_create_tags.rb +24 -12
  9. data/db/migrate/00006_create_node_tags.rb +28 -13
  10. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  11. data/db/schema.sql +17 -1
  12. data/db/seeds.rb +33 -33
  13. data/docs/database/naming-convention.md +244 -0
  14. data/docs/database_rake_tasks.md +31 -0
  15. data/docs/development/rake-tasks.md +80 -35
  16. data/docs/guides/mcp-server.md +70 -1
  17. data/examples/.envrc +6 -0
  18. data/examples/.gitignore +2 -0
  19. data/examples/00_create_examples_db.rb +94 -0
  20. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  21. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  22. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  23. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  24. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  25. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  26. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  27. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  28. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  29. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  30. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  31. data/examples/11_robot_groups/README.md +335 -0
  32. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  33. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  34. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  35. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  36. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  37. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  38. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  39. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  40. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  41. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  42. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  43. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  44. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  45. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  46. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  47. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  48. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  49. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  50. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  51. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  52. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  53. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  54. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  55. data/examples/README.md +230 -211
  56. data/examples/examples_helper.rb +138 -0
  57. data/lib/htm/config/builder.rb +167 -0
  58. data/lib/htm/config/database.rb +317 -0
  59. data/lib/htm/config/defaults.yml +37 -9
  60. data/lib/htm/config/section.rb +74 -0
  61. data/lib/htm/config/validator.rb +83 -0
  62. data/lib/htm/config.rb +64 -360
  63. data/lib/htm/database.rb +85 -127
  64. data/lib/htm/errors.rb +14 -0
  65. data/lib/htm/integrations/sinatra.rb +13 -44
  66. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  67. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  68. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  69. data/lib/htm/loaders/defaults_loader.rb +23 -0
  70. data/lib/htm/loaders/markdown_loader.rb +17 -15
  71. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  72. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  73. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  74. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  75. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  76. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  77. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  78. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  79. data/lib/htm/long_term_memory.rb +13 -13
  80. data/lib/htm/mcp/cli.rb +115 -8
  81. data/lib/htm/mcp/resources.rb +4 -3
  82. data/lib/htm/mcp/server.rb +5 -4
  83. data/lib/htm/mcp/tools.rb +37 -28
  84. data/lib/htm/migration.rb +72 -0
  85. data/lib/htm/models/file_source.rb +52 -31
  86. data/lib/htm/models/node.rb +224 -108
  87. data/lib/htm/models/node_tag.rb +49 -28
  88. data/lib/htm/models/robot.rb +38 -27
  89. data/lib/htm/models/robot_node.rb +63 -35
  90. data/lib/htm/models/tag.rb +126 -123
  91. data/lib/htm/observability.rb +45 -41
  92. data/lib/htm/proposition_service.rb +76 -7
  93. data/lib/htm/railtie.rb +2 -2
  94. data/lib/htm/robot_group.rb +30 -18
  95. data/lib/htm/sequel_config.rb +215 -0
  96. data/lib/htm/sql_builder.rb +14 -16
  97. data/lib/htm/tag_service.rb +78 -0
  98. data/lib/htm/tasks.rb +3 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/workflows/remember_workflow.rb +6 -5
  101. data/lib/htm.rb +26 -22
  102. data/lib/tasks/db.rake +0 -2
  103. data/lib/tasks/doc.rake +2 -2
  104. data/lib/tasks/files.rake +11 -18
  105. data/lib/tasks/htm.rake +190 -62
  106. data/lib/tasks/jobs.rake +179 -54
  107. data/lib/tasks/tags.rake +8 -13
  108. data/scripts/backfill_parent_tags.rb +376 -0
  109. data/scripts/normalize_plural_tags.rb +335 -0
  110. metadata +109 -80
  111. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  112. data/examples/sinatra_app/Gemfile.lock +0 -166
  113. data/lib/htm/active_record_config.rb +0 -104
  114. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  115. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  116. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  117. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  118. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  119. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  120. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  121. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  122. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  123. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  124. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  125. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  126. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  127. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  128. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  129. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  130. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  131. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  132. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  133. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  134. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  135. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  136. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  137. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  138. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  139. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  140. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  141. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  142. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  143. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  144. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  145. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  146. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  147. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  148. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  149. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  150. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  151. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  152. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  153. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  154. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73eb1c7c8727363608ac47f7c1afd26ebb47021a173c105e5bba364598bf143f
4
- data.tar.gz: e9c6954e8bb6da65e26ea55878da3e817e59f28e3383d74f9b48a0501ebd882b
3
+ metadata.gz: 5765fbc2b92d89be3f2bae2f53b4353a76343efd3d092456138bea73ac7803d9
4
+ data.tar.gz: e594cc16f789745267ce527dcc182fbd533b7484c36e09887f702007388803a3
5
5
  SHA512:
6
- metadata.gz: cb76637105f6033ba58d35f5fe3d73b41b990bd327069c7c526c196c787c62d2287e0fdac58261a2ee7b6d739af9f11bd58843c77b7ad6b486da95e02468665e
7
- data.tar.gz: 9e4da2a308bfb5efe31a050406caee29e7a653e401a9665a271b3c094d540d50115663966f11f26b7b58a51f7a9354047ee6117ce69250e7dc5ffabb30469a48
6
+ metadata.gz: 78b4b7e226b9911b429e7e0d8735dd0d50d1e56abf7e9e5f494ab50321e33e574605d6a3991d4f0ce18a95ae6dd8ee7928004e3d455730aa388ee2515ffc53c3
7
+ data.tar.gz: c19a0c15d79342d08e406724724cf64afc68c75ea26f1bf332b4a9b661f3ebe9d50e3f9dbf41db780c0542d4cd7a77ac0bb0da9bf45b5ea7d171def21b4ffc4a
data/CHANGELOG.md CHANGED
@@ -7,6 +7,66 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ### Added
11
+ - **`sslmode` database configuration support** - SSL mode now extracted from URL and included when building URL
12
+ - `parse_database_url` extracts `sslmode` from URL query string (e.g., `?sslmode=require`)
13
+ - `build_database_url` includes `sslmode` as query parameter when set
14
+ - `reconcile_from_url` syncs `sslmode` along with other database components
15
+ - Default `sslmode` is `prefer` (from `defaults.yml`)
16
+ - **`htm:db:purge_all` rake task** - Permanently remove all soft-deleted records from database
17
+ - Displays record counts by table before deletion
18
+ - Detects and removes orphaned join table entries (`node_tags`, `robot_nodes`)
19
+ - Detects and removes orphaned propositions (where `source_node_id` no longer exists)
20
+ - Detects and removes orphaned robots (with no associated memory nodes)
21
+ - Requires confirmation before proceeding
22
+ - Deletes in correct order for referential integrity
23
+ - **`htm:jobs:process_propositions` rake task** - Incremental proposition extraction for unprocessed nodes
24
+ - Uses `ruby-progressbar` with ETA display
25
+ - Only processes nodes not yet extracted (tracks via `source_node_id` metadata)
26
+ - Added to `htm:jobs:process_all` task chain
27
+ - **Rake task passthrough in MCP CLI** - Run rake tasks via `htm_mcp rake <task>`
28
+ - `htm_mcp rake htm:db:stats` - Run any HTM rake task
29
+ - `htm_mcp rake -T` / `htm_mcp rake --tasks` - List available tasks
30
+ - **Pattern filtering** - `htm_mcp rake -T htm:jobs` filters to matching namespace (like standard rake)
31
+ - **Meta-response filtering in PropositionService** - Filters LLM responses that ask for input
32
+ - `META_RESPONSE_PATTERNS` constant with common patterns ("please provide", "I need the text", etc.)
33
+ - `meta_response?` method for detecting invalid responses
34
+ - Prevents storing "Please provide the text" as propositions
35
+ - **Progress bars for job processing tasks** - Visual progress with ETA for long-running operations
36
+ - `htm:jobs:process_embeddings` - Shows progress when generating embeddings
37
+ - `htm:jobs:process_tags` - Shows progress when extracting tags
38
+ - Format: `Processing: |████████████████| 50/100 (50%) ETA: 00:01:30`
39
+
40
+ ### Changed
41
+ - **Centralized HTM loading in rake tasks** - Single `require_relative` in `lib/htm/tasks.rb`
42
+ - HTM is now loaded once before any rake task files are loaded
43
+ - Removed ~25 redundant `require 'htm'` statements from individual rake tasks
44
+ - Ensures local development codebase is always used via `require_relative`
45
+ - Removed `$LOAD_PATH` manipulation that was previously needed for `require 'htm'`
46
+ - **Removed `:validate` rake task dependency** - Config validation now automatic
47
+ - `HTM::Config.new` handles all validation at require time (environment, URL/component reconciliation, naming convention)
48
+ - Rake tasks no longer need explicit validation step
49
+ - **Cleaned up Rakefile** - Removed unnecessary code
50
+ - Removed deprecated `db_setup` and `db_test` backward-compatibility tasks
51
+ - Removed redundant `task :test` block (`:set_test_env` prerequisite handles environment setup)
52
+ - **PropositionService validation now fully configurable** - Moved hardcoded constants to `defaults.yml`
53
+ - `proposition.min_length` (default: 10) - Minimum characters for valid proposition
54
+ - `proposition.max_length` (default: 1000) - Maximum characters for valid proposition
55
+ - `proposition.min_words` (default: 5) - Minimum words for valid proposition
56
+ - Added `min_length`, `max_length`, `min_words` class methods that read from config
57
+ - **Improved proposition extraction prompt** - Better quality propositions
58
+ - Added explicit BAD/GOOD examples for pronoun replacement
59
+ - Added context enrichment examples (e.g., "wiring" → "solar panel wiring for Oklahoma barndominium")
60
+ - System prompt now explicitly prevents meta-responses
61
+ - Increased specificity requirements for self-contained facts
62
+
63
+ ### Fixed
64
+ - **Test database isolation** - Two-layer protection prevents tests from polluting development/production
65
+ - `Rakefile`: `set_test_env` task now ALWAYS overrides `HTM_DATABASE__URL` to test database
66
+ - Uses `#{service_name}_test` pattern (e.g., `htm_test`) based on `HTM_SERVICE__NAME` env var
67
+ - `test_helper.rb`: Safety check aborts with helpful message if database URL doesn't contain `_test`
68
+ - Prevents accidental test execution against non-test databases
69
+
10
70
  ## [0.0.20] - 2025-12-22
11
71
  ### Added
12
72
  - **Fiber-based job backend** - New `:fiber` backend for I/O-bound background jobs
data/Rakefile CHANGED
@@ -10,21 +10,27 @@ Rake::TestTask.new(:test) do |t|
10
10
  t.verbose = true
11
11
  end
12
12
 
13
- # Ensure test task runs with HTM_ENV=test (takes priority over RAILS_ENV/RACK_ENV)
14
- task :test do
15
- ENV['HTM_ENV'] = 'test'
16
- end
17
-
18
13
  # Prepend environment setup before test runs
19
14
  Rake::Task[:test].enhance [:set_test_env]
20
15
 
21
16
  task :set_test_env do
22
17
  ENV['HTM_ENV'] = 'test'
23
- # Set database URL if not already configured
24
- # Uses current system user for local PostgreSQL connection
25
- unless ENV['HTM_DATABASE__URL']
26
- ENV['HTM_DATABASE__URL'] = "postgresql://#{ENV['USER']}@localhost:5432/htm_test"
18
+
19
+ # Build test database name from service name + environment
20
+ # Uses HTM_SERVICE__NAME env var if set, otherwise defaults to 'htm'
21
+ service_name = ENV['HTM_SERVICE__NAME'] || 'htm'
22
+ test_db_name = "#{service_name}_test"
23
+
24
+ # ALWAYS use the test database - never allow tests to run against other databases
25
+ # This prevents accidental pollution of development/production data
26
+ test_db_url = "postgresql://#{ENV['USER']}@localhost:5432/#{test_db_name}"
27
+
28
+ if ENV['HTM_DATABASE__URL'] && !ENV['HTM_DATABASE__URL'].include?('_test')
29
+ warn "WARNING: HTM_DATABASE__URL was set to '#{ENV['HTM_DATABASE__URL']}'"
30
+ warn " Overriding to use test database: #{test_db_url}"
27
31
  end
32
+
33
+ ENV['HTM_DATABASE__URL'] = test_db_url
28
34
  end
29
35
 
30
36
  task default: :test
@@ -33,21 +39,101 @@ task default: :test
33
39
  # This uses the same loader that external applications use
34
40
  require_relative "lib/htm/tasks"
35
41
 
36
- # Legacy tasks for backwards compatibility
37
- desc "Run database setup (deprecated: use htm:db:setup)"
38
- task :db_setup => "htm:db:setup"
42
+ # =============================================================================
43
+ # Examples Tasks
44
+ # =============================================================================
45
+
46
+ # Prepend environment setup before running any example
47
+ task :set_examples_env do
48
+ ENV['HTM_ENV'] = 'examples'
49
+
50
+ # Build examples database name from service name + environment
51
+ service_name = ENV['HTM_SERVICE__NAME'] || 'htm'
52
+ examples_db_name = "#{service_name}_examples"
39
53
 
40
- desc "Verify database connection (deprecated: use htm:db:verify)"
41
- task :db_test => "htm:db:verify"
54
+ # ALWAYS use the examples database
55
+ examples_db_url = "postgresql://#{ENV['USER']}@localhost:5432/#{examples_db_name}"
42
56
 
43
- desc "Run example"
44
- task :example do
45
- ruby "examples/basic_usage.rb"
57
+ if ENV['HTM_DATABASE__URL'] && !ENV['HTM_DATABASE__URL'].include?('_examples')
58
+ warn "WARNING: HTM_DATABASE__URL was set to '#{ENV['HTM_DATABASE__URL']}'"
59
+ warn " Overriding to use examples database: #{examples_db_url}"
60
+ end
61
+
62
+ ENV['HTM_DATABASE__URL'] = examples_db_url
46
63
  end
47
64
 
65
+ namespace :examples do
66
+ desc "Set up examples database (create + setup schema)"
67
+ task setup: :set_examples_env do
68
+ Rake::Task['htm:db:create'].invoke rescue nil
69
+ Rake::Task['htm:db:setup'].invoke
70
+ end
71
+
72
+ desc "Reset examples database (drop + create + setup)"
73
+ task reset: :set_examples_env do
74
+ Rake::Task['htm:db:reset'].invoke
75
+ end
76
+
77
+ desc "Run basic_usage example"
78
+ task basic: :set_examples_env do
79
+ ruby "examples/01_basic_usage.rb"
80
+ end
81
+
82
+ desc "Run all standalone examples"
83
+ task :all => :set_examples_env do
84
+ examples = %w[
85
+ examples/01_basic_usage.rb
86
+ examples/03_custom_llm_configuration.rb
87
+ examples/04_file_loader_usage.rb
88
+ examples/05_timeframe_demo.rb
89
+ ]
90
+ examples.each do |example|
91
+ if File.exist?(example)
92
+ puts "\n#{'=' * 60}"
93
+ puts "Running: #{example}"
94
+ puts "#{'=' * 60}"
95
+ ruby example
96
+ end
97
+ end
98
+ end
99
+
100
+ desc "Show examples database status"
101
+ task status: :set_examples_env do
102
+ require_relative 'lib/htm'
103
+ puts "Examples Environment Status"
104
+ puts "=" * 40
105
+ puts "HTM_ENV: #{ENV['HTM_ENV']}"
106
+ puts "Database URL: #{ENV['HTM_DATABASE__URL']}"
107
+ puts "Expected database: #{HTM.config.expected_database_name}"
108
+ if HTM.config.database_configured?
109
+ puts "Database configured: Yes"
110
+ begin
111
+ HTM::ActiveRecordConfig.establish_connection!
112
+ if HTM::ActiveRecordConfig.connected?
113
+ puts "Database connected: Yes"
114
+ puts "\nTable counts:"
115
+ %w[nodes robots tags file_sources].each do |table|
116
+ count = ActiveRecord::Base.connection.execute("SELECT COUNT(*) FROM #{table}").first['count']
117
+ puts " #{table}: #{count}"
118
+ end
119
+ else
120
+ puts "Database connected: No"
121
+ end
122
+ rescue => e
123
+ puts "Database connected: No (#{e.message})"
124
+ end
125
+ else
126
+ puts "Database configured: No"
127
+ end
128
+ end
129
+ end
130
+
131
+ desc "Run example (alias for examples:basic)"
132
+ task :example => 'examples:basic'
133
+
48
134
  desc "Run timeframe demo"
49
135
  task :timeframe_demo do
50
- ruby "examples/timeframe_demo.rb"
136
+ ruby "examples/05_timeframe_demo.rb"
51
137
  end
52
138
 
53
139
  desc "Show gem stats"
@@ -1,13 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class EnableExtensions < ActiveRecord::Migration[7.1]
3
+ require_relative '../../lib/htm/migration'
4
+
5
+ class EnableExtensions < HTM::Migration
4
6
  def up
5
- enable_extension 'vector'
6
- enable_extension 'pg_trgm'
7
+ run "CREATE EXTENSION IF NOT EXISTS vector"
8
+ run "CREATE EXTENSION IF NOT EXISTS pg_trgm"
9
+ run "CREATE EXTENSION IF NOT EXISTS pg_search"
7
10
  end
8
11
 
9
12
  def down
10
- disable_extension 'pg_trgm'
11
- disable_extension 'vector'
13
+ run "DROP EXTENSION IF EXISTS pg_search"
14
+ run "DROP EXTENSION IF EXISTS pg_trgm"
15
+ run "DROP EXTENSION IF EXISTS vector"
12
16
  end
13
17
  end
@@ -1,11 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class CreateRobots < ActiveRecord::Migration[7.1]
4
- def change
5
- create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
6
- t.text :name, comment: 'Human-readable name for the robot'
7
- t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
8
- t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
3
+ require_relative '../../lib/htm/migration'
4
+
5
+ class CreateRobots < HTM::Migration
6
+ def up
7
+ create_table(:robots) do
8
+ primary_key :id
9
+ String :name, text: true
10
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
11
+ DateTime :last_active, default: Sequel::CURRENT_TIMESTAMP
9
12
  end
13
+
14
+ run "COMMENT ON TABLE robots IS 'Registry of all LLM robots using the HTM system'"
15
+ run "COMMENT ON COLUMN robots.name IS 'Human-readable name for the robot'"
16
+ run "COMMENT ON COLUMN robots.created_at IS 'When the robot was first registered'"
17
+ run "COMMENT ON COLUMN robots.last_active IS 'Last time the robot accessed the system'"
18
+ end
19
+
20
+ def down
21
+ drop_table(:robots)
10
22
  end
11
23
  end
@@ -1,25 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class CreateFileSources < ActiveRecord::Migration[7.1]
4
- def change
5
- create_table :file_sources, comment: 'Source file metadata for loaded documents' do |t|
6
- t.text :file_path, null: false, comment: 'Absolute path to source file'
7
- t.string :file_hash, limit: 64, comment: 'SHA-256 hash of file content'
8
- t.timestamptz :mtime, comment: 'File modification time'
9
- t.integer :file_size, comment: 'File size in bytes'
10
- t.jsonb :frontmatter, default: {}, comment: 'Parsed YAML frontmatter'
11
- t.timestamptz :last_synced_at, comment: 'When file was last synced to HTM'
12
- t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
13
- t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
3
+ require_relative '../../lib/htm/migration'
4
+
5
+ class CreateFileSources < HTM::Migration
6
+ def up
7
+ create_table(:file_sources) do
8
+ primary_key :id
9
+ String :file_path, text: true, null: false
10
+ String :file_hash, size: 64
11
+ DateTime :mtime
12
+ Integer :file_size
13
+ column :frontmatter, :jsonb, default: Sequel.lit("'{}'::jsonb")
14
+ DateTime :last_synced_at
15
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
16
+ DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
14
17
  end
15
18
 
16
- add_index :file_sources, :file_path, unique: true, name: 'idx_file_sources_path_unique'
17
- add_index :file_sources, :file_hash, name: 'idx_file_sources_hash'
18
- add_index :file_sources, :last_synced_at, name: 'idx_file_sources_last_synced'
19
+ add_index :file_sources, :file_path, unique: true, name: :idx_file_sources_path_unique
20
+ add_index :file_sources, :file_hash, name: :idx_file_sources_hash
21
+ add_index :file_sources, :last_synced_at, name: :idx_file_sources_last_synced
22
+
23
+ run "COMMENT ON TABLE file_sources IS 'Source file metadata for loaded documents'"
24
+ run "COMMENT ON COLUMN file_sources.file_path IS 'Absolute path to source file'"
25
+ run "COMMENT ON COLUMN file_sources.file_hash IS 'SHA-256 hash of file content'"
26
+ run "COMMENT ON COLUMN file_sources.mtime IS 'File modification time'"
27
+ run "COMMENT ON COLUMN file_sources.file_size IS 'File size in bytes'"
28
+ run "COMMENT ON COLUMN file_sources.frontmatter IS 'Parsed YAML frontmatter'"
29
+ run "COMMENT ON COLUMN file_sources.last_synced_at IS 'When file was last synced to HTM'"
19
30
 
20
31
  # LZ4 compression for better read performance on JSONB column
21
- execute <<~SQL
22
- ALTER TABLE file_sources ALTER COLUMN frontmatter SET COMPRESSION lz4;
23
- SQL
32
+ run "ALTER TABLE file_sources ALTER COLUMN frontmatter SET COMPRESSION lz4"
33
+ end
34
+
35
+ def down
36
+ drop_table(:file_sources)
24
37
  end
25
38
  end
@@ -1,82 +1,94 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class CreateNodes < ActiveRecord::Migration[7.1]
4
- def change
5
- create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
6
- t.text :content, null: false, comment: 'The conversation message/utterance content'
7
- t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
8
- t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
9
- t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
10
- t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
11
- t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
12
- t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
13
- t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
14
- t.string :content_hash, limit: 64, comment: 'SHA-256 hash of content for deduplication'
15
- t.timestamptz :deleted_at, comment: 'Soft delete timestamp - node is considered deleted when set'
16
- t.bigint :source_id, comment: 'Reference to source file (for file-loaded nodes)'
17
- t.integer :chunk_position, comment: 'Position within source file (0-indexed)'
18
- t.jsonb :metadata, default: {}, null: false, comment: 'Flexible metadata storage (memory_type, importance, source, etc.)'
3
+ require_relative '../../lib/htm/migration'
4
+
5
+ class CreateNodes < HTM::Migration
6
+ def up
7
+ create_table(:nodes) do
8
+ primary_key :id
9
+ String :content, text: true, null: false
10
+ Integer :access_count, default: 0, null: false
11
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
12
+ DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
13
+ DateTime :last_accessed, default: Sequel::CURRENT_TIMESTAMP
14
+ Integer :token_count
15
+ column :embedding, 'vector(2000)'
16
+ Integer :embedding_dimension
17
+ String :content_hash, size: 64
18
+ DateTime :deleted_at
19
+ Bignum :source_id
20
+ Integer :chunk_position
21
+ column :metadata, :jsonb, default: Sequel.lit("'{}'::jsonb"), null: false
19
22
  end
20
23
 
21
24
  # Basic indexes for common queries
22
- add_index :nodes, :created_at, name: 'idx_nodes_created_at'
23
- add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
24
- add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
25
- add_index :nodes, :access_count, name: 'idx_nodes_access_count'
26
- add_index :nodes, :content_hash, unique: true, name: 'idx_nodes_content_hash_unique'
27
- add_index :nodes, :deleted_at, name: 'idx_nodes_deleted_at'
28
- add_index :nodes, :source_id, name: 'idx_nodes_source_id'
29
- add_index :nodes, [:source_id, :chunk_position], name: 'idx_nodes_source_chunk_position'
25
+ add_index :nodes, :created_at, name: :idx_nodes_created_at
26
+ add_index :nodes, :updated_at, name: :idx_nodes_updated_at
27
+ add_index :nodes, :last_accessed, name: :idx_nodes_last_accessed
28
+ add_index :nodes, :access_count, name: :idx_nodes_access_count
29
+ add_index :nodes, :content_hash, unique: true, name: :idx_nodes_content_hash_unique
30
+ add_index :nodes, :deleted_at, name: :idx_nodes_deleted_at
31
+ add_index :nodes, :source_id, name: :idx_nodes_source_id
32
+ add_index :nodes, [:source_id, :chunk_position], name: :idx_nodes_source_chunk_position
33
+
34
+ # Comments
35
+ run "COMMENT ON TABLE nodes IS 'Core memory storage for conversation messages and context'"
36
+ run "COMMENT ON COLUMN nodes.content IS 'The conversation message/utterance content'"
37
+ run "COMMENT ON COLUMN nodes.access_count IS 'Number of times this node has been accessed/retrieved'"
38
+ run "COMMENT ON COLUMN nodes.created_at IS 'When this memory was created'"
39
+ run "COMMENT ON COLUMN nodes.updated_at IS 'When this memory was last modified'"
40
+ run "COMMENT ON COLUMN nodes.last_accessed IS 'When this memory was last accessed'"
41
+ run "COMMENT ON COLUMN nodes.token_count IS 'Number of tokens in the content (for context budget management)'"
42
+ run "COMMENT ON COLUMN nodes.embedding IS 'Vector embedding (max 2000 dimensions) for semantic search'"
43
+ run "COMMENT ON COLUMN nodes.embedding_dimension IS 'Actual number of dimensions used in the embedding vector (max 2000)'"
44
+ run "COMMENT ON COLUMN nodes.content_hash IS 'SHA-256 hash of content for deduplication'"
45
+ run "COMMENT ON COLUMN nodes.deleted_at IS 'Soft delete timestamp - node is considered deleted when set'"
46
+ run "COMMENT ON COLUMN nodes.source_id IS 'Reference to source file (for file-loaded nodes)'"
47
+ run "COMMENT ON COLUMN nodes.chunk_position IS 'Position within source file (0-indexed)'"
48
+ run "COMMENT ON COLUMN nodes.metadata IS 'Flexible metadata storage (memory_type, importance, source, etc.)'"
30
49
 
31
50
  # Partial index for efficiently querying non-deleted nodes
32
- add_index :nodes, :created_at, name: 'idx_nodes_not_deleted_created_at', where: 'deleted_at IS NULL'
51
+ run "CREATE INDEX idx_nodes_not_deleted_created_at ON nodes (created_at) WHERE deleted_at IS NULL"
33
52
 
34
53
  # GIN index for JSONB metadata queries
35
- add_index :nodes, :metadata, using: :gin, name: 'idx_nodes_metadata'
54
+ run "CREATE INDEX idx_nodes_metadata ON nodes USING gin(metadata)"
36
55
 
37
56
  # Vector similarity search index (HNSW for better performance)
38
- execute <<-SQL
57
+ run <<-SQL
39
58
  CREATE INDEX idx_nodes_embedding ON nodes
40
59
  USING hnsw (embedding vector_cosine_ops)
41
60
  WITH (m = 16, ef_construction = 64)
42
61
  SQL
43
62
 
44
63
  # Full-text search on conversation content
45
- execute <<-SQL
46
- CREATE INDEX idx_nodes_content_gin ON nodes
47
- USING gin(to_tsvector('english', content))
48
- SQL
64
+ run "CREATE INDEX idx_nodes_content_gin ON nodes USING gin(to_tsvector('english', content))"
49
65
 
50
66
  # Trigram indexes for fuzzy matching on conversation content
51
- execute <<-SQL
52
- CREATE INDEX idx_nodes_content_trgm ON nodes
53
- USING gin(content gin_trgm_ops)
54
- SQL
67
+ run "CREATE INDEX idx_nodes_content_trgm ON nodes USING gin(content gin_trgm_ops)"
55
68
 
56
69
  # Check constraint for embedding dimensions
57
- execute <<-SQL
70
+ run <<-SQL
58
71
  ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
59
72
  CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
60
73
  SQL
61
74
 
62
75
  # Partial index for active (non-deleted) node queries
63
- add_index :nodes, :id,
64
- name: 'idx_nodes_active',
65
- where: 'deleted_at IS NULL'
76
+ run "CREATE INDEX idx_nodes_active ON nodes (id) WHERE deleted_at IS NULL"
66
77
 
67
78
  # Composite index for embedding-based searches on active nodes
68
- execute <<-SQL
69
- CREATE INDEX idx_nodes_active_with_embedding ON nodes (id)
70
- WHERE deleted_at IS NULL AND embedding IS NOT NULL
71
- SQL
79
+ run "CREATE INDEX idx_nodes_active_with_embedding ON nodes (id) WHERE deleted_at IS NULL AND embedding IS NOT NULL"
72
80
 
73
81
  # LZ4 compression for better read performance
74
- execute <<-SQL
75
- ALTER TABLE nodes ALTER COLUMN metadata SET COMPRESSION lz4;
76
- ALTER TABLE nodes ALTER COLUMN content SET COMPRESSION lz4;
77
- SQL
82
+ run "ALTER TABLE nodes ALTER COLUMN metadata SET COMPRESSION lz4"
83
+ run "ALTER TABLE nodes ALTER COLUMN content SET COMPRESSION lz4"
78
84
 
79
85
  # Foreign key to file_sources table
80
- add_foreign_key :nodes, :file_sources, column: :source_id, on_delete: :nullify
86
+ alter_table(:nodes) do
87
+ add_foreign_key [:source_id], :file_sources, on_delete: :set_null
88
+ end
89
+ end
90
+
91
+ def down
92
+ drop_table(:nodes)
81
93
  end
82
94
  end
@@ -1,20 +1,32 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class CreateTags < ActiveRecord::Migration[7.1]
4
- def change
5
- create_table :tags, comment: 'Unique tag names for categorization' do |t|
6
- t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
7
- t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
8
- t.timestamptz :deleted_at, comment: 'Soft delete timestamp'
3
+ require_relative '../../lib/htm/migration'
4
+
5
+ class CreateTags < HTM::Migration
6
+ def up
7
+ create_table(:tags) do
8
+ primary_key :id
9
+ String :name, text: true, null: false
10
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
11
+ DateTime :deleted_at
9
12
  end
10
13
 
11
- add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
12
- add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
13
- add_index :tags, :deleted_at, name: 'idx_tags_deleted_at'
14
+ add_index :tags, :name, unique: true, name: :idx_tags_name_unique
15
+ add_index :tags, :deleted_at, name: :idx_tags_deleted_at
16
+
17
+ # Pattern matching index for prefix queries
18
+ run "CREATE INDEX idx_tags_name_pattern ON tags USING btree (name text_pattern_ops)"
14
19
 
15
20
  # GIN trigram index for fuzzy search (typo-tolerant queries)
16
- execute <<~SQL
17
- CREATE INDEX idx_tags_name_trgm ON tags USING gin(name gin_trgm_ops);
18
- SQL
21
+ run "CREATE INDEX idx_tags_name_trgm ON tags USING gin(name gin_trgm_ops)"
22
+
23
+ run "COMMENT ON TABLE tags IS 'Unique tag names for categorization'"
24
+ run "COMMENT ON COLUMN tags.name IS 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'"
25
+ run "COMMENT ON COLUMN tags.created_at IS 'When this tag was created'"
26
+ run "COMMENT ON COLUMN tags.deleted_at IS 'Soft delete timestamp'"
27
+ end
28
+
29
+ def down
30
+ drop_table(:tags)
19
31
  end
20
32
  end
@@ -1,20 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class CreateNodeTags < ActiveRecord::Migration[7.1]
4
- def change
5
- create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
6
- t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
7
- t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
8
- t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
9
- t.timestamptz :deleted_at, comment: 'Soft delete timestamp'
3
+ require_relative '../../lib/htm/migration'
4
+
5
+ class CreateNodeTags < HTM::Migration
6
+ def up
7
+ create_table(:node_tags) do
8
+ primary_key :id
9
+ Bignum :node_id, null: false
10
+ Bignum :tag_id, null: false
11
+ DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
12
+ DateTime :deleted_at
10
13
  end
11
14
 
12
- add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
13
- add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
14
- add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
15
- add_index :node_tags, :deleted_at, name: 'idx_node_tags_deleted_at'
15
+ add_index :node_tags, [:node_id, :tag_id], unique: true, name: :idx_node_tags_unique
16
+ add_index :node_tags, :node_id, name: :idx_node_tags_node_id
17
+ add_index :node_tags, :tag_id, name: :idx_node_tags_tag_id
18
+ add_index :node_tags, :deleted_at, name: :idx_node_tags_deleted_at
19
+
20
+ alter_table(:node_tags) do
21
+ add_foreign_key [:node_id], :nodes, on_delete: :cascade
22
+ add_foreign_key [:tag_id], :tags, on_delete: :cascade
23
+ end
24
+
25
+ run "COMMENT ON TABLE node_tags IS 'Join table connecting nodes to tags (many-to-many)'"
26
+ run "COMMENT ON COLUMN node_tags.node_id IS 'ID of the node being tagged'"
27
+ run "COMMENT ON COLUMN node_tags.tag_id IS 'ID of the tag being applied'"
28
+ run "COMMENT ON COLUMN node_tags.created_at IS 'When this association was created'"
29
+ run "COMMENT ON COLUMN node_tags.deleted_at IS 'Soft delete timestamp'"
30
+ end
16
31
 
17
- add_foreign_key :node_tags, :nodes, column: :node_id, on_delete: :cascade
18
- add_foreign_key :node_tags, :tags, column: :tag_id, on_delete: :cascade
32
+ def down
33
+ drop_table(:node_tags)
19
34
  end
20
35
  end