htm 0.0.18 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +119 -1
  3. data/README.md +12 -0
  4. data/Rakefile +104 -18
  5. data/db/migrate/00001_enable_extensions.rb +9 -5
  6. data/db/migrate/00002_create_robots.rb +18 -6
  7. data/db/migrate/00003_create_file_sources.rb +30 -17
  8. data/db/migrate/00004_create_nodes.rb +60 -48
  9. data/db/migrate/00005_create_tags.rb +24 -12
  10. data/db/migrate/00006_create_node_tags.rb +28 -13
  11. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  12. data/db/schema.sql +17 -1
  13. data/db/seeds.rb +34 -34
  14. data/docs/api/embedding-service.md +140 -110
  15. data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
  16. data/docs/api/yard/HTM/Config.md +173 -0
  17. data/docs/api/yard/HTM/ConfigSection.md +28 -0
  18. data/docs/api/yard/HTM/Database.md +1 -1
  19. data/docs/api/yard/HTM/Railtie.md +2 -2
  20. data/docs/api/yard/HTM.md +0 -57
  21. data/docs/api/yard/index.csv +76 -61
  22. data/docs/api/yard-reference.md +2 -1
  23. data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
  24. data/docs/architecture/adrs/004-hive-mind.md +1 -1
  25. data/docs/architecture/adrs/008-robot-identification.md +1 -1
  26. data/docs/architecture/index.md +11 -9
  27. data/docs/architecture/overview.md +11 -7
  28. data/docs/assets/images/balanced-strategy-decay.svg +41 -0
  29. data/docs/assets/images/class-hierarchy.svg +1 -1
  30. data/docs/assets/images/eviction-priority.svg +43 -0
  31. data/docs/assets/images/exception-hierarchy.svg +2 -2
  32. data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
  33. data/docs/assets/images/htm-architecture-overview.svg +3 -3
  34. data/docs/assets/images/htm-core-components.svg +4 -4
  35. data/docs/assets/images/htm-layered-architecture.svg +1 -1
  36. data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
  37. data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
  38. data/docs/assets/images/memory-topology.svg +53 -0
  39. data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
  40. data/docs/database/naming-convention.md +244 -0
  41. data/docs/database_rake_tasks.md +31 -0
  42. data/docs/development/rake-tasks.md +80 -35
  43. data/docs/development/setup.md +76 -44
  44. data/docs/examples/basic-usage.md +133 -0
  45. data/docs/examples/config-files.md +170 -0
  46. data/docs/examples/file-loading.md +208 -0
  47. data/docs/examples/index.md +116 -0
  48. data/docs/examples/llm-configuration.md +168 -0
  49. data/docs/examples/mcp-client.md +172 -0
  50. data/docs/examples/rails-integration.md +173 -0
  51. data/docs/examples/robot-groups.md +210 -0
  52. data/docs/examples/sinatra-integration.md +218 -0
  53. data/docs/examples/standalone-app.md +216 -0
  54. data/docs/examples/telemetry.md +224 -0
  55. data/docs/examples/timeframes.md +143 -0
  56. data/docs/getting-started/installation.md +97 -40
  57. data/docs/getting-started/quick-start.md +28 -11
  58. data/docs/guides/configuration.md +515 -0
  59. data/docs/guides/file-loading.md +322 -0
  60. data/docs/guides/getting-started.md +40 -9
  61. data/docs/guides/index.md +3 -3
  62. data/docs/guides/mcp-server.md +100 -13
  63. data/docs/guides/propositions.md +264 -0
  64. data/docs/guides/recalling-memories.md +4 -4
  65. data/docs/guides/search-strategies.md +3 -3
  66. data/docs/guides/tags.md +318 -0
  67. data/docs/guides/telemetry.md +229 -0
  68. data/docs/index.md +8 -16
  69. data/docs/{architecture → robots}/hive-mind.md +8 -111
  70. data/docs/robots/index.md +73 -0
  71. data/docs/{guides → robots}/multi-robot.md +3 -3
  72. data/docs/{guides → robots}/robot-groups.md +8 -7
  73. data/docs/{architecture → robots}/two-tier-memory.md +13 -149
  74. data/docs/robots/why-robots.md +85 -0
  75. data/examples/.envrc +6 -0
  76. data/examples/.gitignore +2 -0
  77. data/examples/00_create_examples_db.rb +94 -0
  78. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  79. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  80. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  81. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  82. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  83. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  84. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  85. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  86. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  87. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  88. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  89. data/examples/11_robot_groups/README.md +335 -0
  90. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  91. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  92. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  93. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  94. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  95. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  96. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  97. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  98. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  99. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  100. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  101. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  102. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  103. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  104. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  105. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  106. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  107. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  108. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  109. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  110. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  111. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  112. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  113. data/examples/README.md +230 -211
  114. data/examples/examples_helper.rb +138 -0
  115. data/lib/htm/config/builder.rb +167 -0
  116. data/lib/htm/config/database.rb +317 -0
  117. data/lib/htm/config/defaults.yml +41 -13
  118. data/lib/htm/config/section.rb +74 -0
  119. data/lib/htm/config/validator.rb +83 -0
  120. data/lib/htm/config.rb +65 -361
  121. data/lib/htm/database.rb +85 -127
  122. data/lib/htm/errors.rb +14 -0
  123. data/lib/htm/integrations/sinatra.rb +13 -44
  124. data/lib/htm/job_adapter.rb +75 -1
  125. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  126. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  127. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  128. data/lib/htm/loaders/defaults_loader.rb +23 -0
  129. data/lib/htm/loaders/markdown_loader.rb +17 -15
  130. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  131. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  132. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  133. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  134. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  135. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  136. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  137. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  138. data/lib/htm/long_term_memory.rb +13 -13
  139. data/lib/htm/mcp/cli.rb +115 -8
  140. data/lib/htm/mcp/resources.rb +4 -3
  141. data/lib/htm/mcp/server.rb +5 -4
  142. data/lib/htm/mcp/tools.rb +37 -28
  143. data/lib/htm/migration.rb +72 -0
  144. data/lib/htm/models/file_source.rb +52 -31
  145. data/lib/htm/models/node.rb +224 -108
  146. data/lib/htm/models/node_tag.rb +49 -28
  147. data/lib/htm/models/robot.rb +38 -27
  148. data/lib/htm/models/robot_node.rb +63 -35
  149. data/lib/htm/models/tag.rb +126 -123
  150. data/lib/htm/observability.rb +45 -41
  151. data/lib/htm/proposition_service.rb +76 -7
  152. data/lib/htm/railtie.rb +2 -2
  153. data/lib/htm/robot_group.rb +30 -18
  154. data/lib/htm/sequel_config.rb +215 -0
  155. data/lib/htm/sql_builder.rb +14 -16
  156. data/lib/htm/tag_service.rb +78 -0
  157. data/lib/htm/tasks.rb +3 -0
  158. data/lib/htm/version.rb +1 -1
  159. data/lib/htm/workflows/remember_workflow.rb +213 -0
  160. data/lib/htm.rb +27 -22
  161. data/lib/tasks/db.rake +0 -2
  162. data/lib/tasks/doc.rake +2 -2
  163. data/lib/tasks/files.rake +11 -18
  164. data/lib/tasks/htm.rake +190 -62
  165. data/lib/tasks/jobs.rake +179 -54
  166. data/lib/tasks/tags.rake +8 -13
  167. data/mkdocs.yml +33 -8
  168. data/scripts/backfill_parent_tags.rb +376 -0
  169. data/scripts/normalize_plural_tags.rb +335 -0
  170. metadata +168 -86
  171. data/docs/api/yard/HTM/Configuration.md +0 -240
  172. data/docs/telemetry.md +0 -391
  173. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  174. data/examples/sinatra_app/Gemfile.lock +0 -166
  175. data/lib/htm/active_record_config.rb +0 -104
  176. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  177. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  178. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  179. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  180. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  181. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  182. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  183. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  184. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  185. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  186. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  187. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  188. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  189. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  190. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  191. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  192. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  193. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  194. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  195. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  196. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  197. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  198. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  199. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  200. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  201. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  202. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  203. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  204. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  205. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  206. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  207. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  208. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  209. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  210. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  211. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  212. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  213. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  214. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  215. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  216. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Examples Helper - Ensures all examples use the htm_examples database
4
+ #
5
+ # This file must be required at the top of every example file before loading HTM.
6
+ # It enforces database isolation to prevent examples from polluting development
7
+ # or production databases.
8
+ #
9
+ # Usage:
10
+ # require_relative 'examples_helper' # For files in examples/
11
+ # require_relative '../examples_helper' # For files in examples/subdirectory/
12
+ #
13
+ # This sets:
14
+ # - HTM_ENV=examples
15
+ # - HTM_DATABASE__URL to point to htm_examples database
16
+ #
17
+ # Before running examples:
18
+ # 1. Create the examples database: createdb htm_examples
19
+ # 2. Set up schema: HTM_ENV=examples rake htm:db:setup
20
+ # 3. Run example: ruby examples/basic_usage.rb
21
+
22
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
23
+
24
+ # Set examples environment BEFORE loading HTM
25
+ # This is critical because HTM::Config reads environment at load time
26
+ # Note: 'examples' is not in the gem's defaults.yml - it's configured here
27
+ ENV['HTM_ENV'] = 'examples'
28
+
29
+ # Build examples database name from service name + environment
30
+ # Uses HTM_SERVICE__NAME env var if set, otherwise defaults to 'htm'
31
+ service_name = ENV['HTM_SERVICE__NAME'] || 'htm'
32
+ examples_db_name = "#{service_name}_examples"
33
+
34
+ # Safety check: Refuse to run examples against non-examples databases
35
+ # Database name must end with _examples (e.g., htm_examples, myapp_examples)
36
+ if ENV['HTM_DATABASE__URL'] && !ENV['HTM_DATABASE__URL'].include?('_examples')
37
+ abort <<~ERROR
38
+ SAFETY CHECK FAILED: Examples must run against an examples database!
39
+
40
+ HTM_DATABASE__URL is set to: #{ENV['HTM_DATABASE__URL']}
41
+
42
+ This does not appear to be an examples database (must contain '_examples').
43
+ Running examples against development or production databases can corrupt data.
44
+
45
+ To fix, either:
46
+ 1. Unset HTM_DATABASE__URL and let this helper configure it
47
+ 2. Set: export HTM_DATABASE__URL="postgresql://#{ENV['USER']}@localhost:5432/#{examples_db_name}"
48
+
49
+ ERROR
50
+ end
51
+
52
+ # ALWAYS use the examples database - never allow examples to run against other databases
53
+ examples_db_url = "postgresql://#{ENV['USER']}@localhost:5432/#{examples_db_name}"
54
+ ENV['HTM_DATABASE__URL'] = examples_db_url
55
+
56
+ # Load HTM first
57
+ require "htm"
58
+
59
+ # Configure HTM for examples environment
60
+ # This keeps the 'examples' environment configuration out of the gem's bundled defaults
61
+ HTM.configure do |config|
62
+ # Use inline job backend for CLI examples (clearer console output)
63
+ # Rails/Sinatra apps should override this in their initializers to use :fiber or :active_job
64
+ config.job.backend = :inline unless defined?(Rails)
65
+
66
+ # Set log level for examples
67
+ config.log_level = :info
68
+
69
+ # Disable telemetry for examples
70
+ config.telemetry_enabled = false
71
+ end
72
+
73
+ # Module with helper methods for examples
74
+ module ExamplesHelper
75
+ # Check if database is available and configured
76
+ #
77
+ # @return [Boolean] true if database is ready
78
+ def self.database_ready?
79
+ return false unless HTM.config.database_configured?
80
+
81
+ begin
82
+ HTM::SequelConfig.establish_connection!
83
+ HTM::SequelConfig.connected?
84
+ rescue => e
85
+ false
86
+ end
87
+ end
88
+
89
+ # Verify database is ready or print helpful error message
90
+ #
91
+ # @return [void] exits with error if database not ready
92
+ def self.require_database!
93
+ unless database_ready?
94
+ abort <<~ERROR
95
+ ERROR: Examples database not available.
96
+
97
+ Please set up the examples database:
98
+ 1. createdb htm_examples
99
+ 2. HTM_ENV=examples rake htm:db:setup
100
+
101
+ Then run the example again.
102
+ ERROR
103
+ end
104
+ end
105
+
106
+ # Print a section header
107
+ #
108
+ # @param title [String] section title
109
+ def self.section(title)
110
+ border = "=" * (title.size + 6)
111
+ puts
112
+ puts border
113
+ puts "== #{title} =="
114
+ puts border
115
+ puts
116
+ end
117
+
118
+ # Print a success message
119
+ #
120
+ # @param message [String] success message
121
+ def self.success(message)
122
+ puts "[OK] #{message}"
123
+ end
124
+
125
+ # Print an info message
126
+ #
127
+ # @param message [String] info message
128
+ def self.info(message)
129
+ puts "[..] #{message}"
130
+ end
131
+
132
+ # Print environment info
133
+ def self.print_environment
134
+ puts "Environment: #{HTM.config.environment}"
135
+ puts "Database: #{HTM.config.actual_database_name}"
136
+ puts "Job Backend: #{HTM.config.job.backend}"
137
+ end
138
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ class Config
5
+ module Builder
6
+ def build_default_logger
7
+ logger = Logger.new($stdout)
8
+ logger.level = log_level
9
+ logger.formatter = proc do |severity, datetime, _progname, msg|
10
+ "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
11
+ end
12
+ logger
13
+ end
14
+
15
+ def build_default_token_counter
16
+ lambda do |text|
17
+ require 'tiktoken_ruby' unless defined?(Tiktoken)
18
+ encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
19
+ encoder.encode(text).length
20
+ end
21
+ end
22
+
23
+ def build_default_embedding_generator
24
+ lambda do |text|
25
+ require 'ruby_llm' unless defined?(RubyLLM)
26
+
27
+ configure_ruby_llm(embedding_provider)
28
+ refresh_ollama_models! if embedding_provider == :ollama
29
+
30
+ model = embedding_provider == :ollama ? normalize_ollama_model(embedding_model) : embedding_model
31
+ response = RubyLLM.embed(text, model: model)
32
+ embedding = extract_embedding_from_response(response)
33
+
34
+ unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) }
35
+ raise HTM::EmbeddingError, "Invalid embedding response format from #{embedding_provider}"
36
+ end
37
+
38
+ embedding
39
+ end
40
+ end
41
+
42
+ def build_default_tag_extractor
43
+ lambda do |text, existing_ontology = []|
44
+ require 'ruby_llm' unless defined?(RubyLLM)
45
+
46
+ configure_ruby_llm(tag_provider)
47
+ refresh_ollama_models! if tag_provider == :ollama
48
+
49
+ model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model
50
+
51
+ prompt = build_tag_extraction_prompt(text, existing_ontology)
52
+ system_prompt = build_tag_system_prompt
53
+
54
+ chat = RubyLLM.chat(model: model)
55
+ chat.with_instructions(system_prompt)
56
+ response = chat.ask(prompt)
57
+
58
+ parse_tag_response(extract_text_from_response(response))
59
+ end
60
+ end
61
+
62
+ def build_default_proposition_extractor
63
+ lambda do |text|
64
+ require 'ruby_llm' unless defined?(RubyLLM)
65
+
66
+ configure_ruby_llm(proposition_provider)
67
+ refresh_ollama_models! if proposition_provider == :ollama
68
+
69
+ model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model
70
+
71
+ prompt = build_proposition_extraction_prompt(text)
72
+ system_prompt = build_proposition_system_prompt
73
+
74
+ chat = RubyLLM.chat(model: model)
75
+ chat.with_instructions(system_prompt)
76
+ response = chat.ask(prompt)
77
+
78
+ parse_proposition_response(extract_text_from_response(response))
79
+ end
80
+ end
81
+
82
+ # ==========================================================================
83
+ # Response Extraction Helpers
84
+ # ==========================================================================
85
+
86
+ def extract_embedding_from_response(response)
87
+ return nil unless response
88
+
89
+ case response
90
+ when Array
91
+ response
92
+ when ->(r) { r.respond_to?(:vectors) }
93
+ vectors = response.vectors
94
+ vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
95
+ when ->(r) { r.respond_to?(:to_a) }
96
+ response.to_a
97
+ when ->(r) { r.respond_to?(:embedding) }
98
+ response.embedding
99
+ else
100
+ if response.respond_to?(:instance_variable_get)
101
+ vectors = response.instance_variable_get(:@vectors)
102
+ return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
103
+ return vectors if vectors.is_a?(Array)
104
+ end
105
+ raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
106
+ end
107
+ end
108
+
109
+ def extract_text_from_response(response)
110
+ return '' unless response
111
+
112
+ case response
113
+ when String then response
114
+ when ->(r) { r.respond_to?(:content) } then response.content.to_s
115
+ when ->(r) { r.respond_to?(:text) } then response.text.to_s
116
+ else response.to_s
117
+ end
118
+ end
119
+
120
+ def parse_tag_response(text)
121
+ tags = text.to_s.split("\n").map(&:strip).reject(&:empty?)
122
+ valid_tags = tags.select { |tag| tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ }
123
+ valid_tags.select { |tag| tag.count(':') < max_tag_depth }
124
+ end
125
+
126
+ def parse_proposition_response(text)
127
+ text.to_s
128
+ .split("\n")
129
+ .map(&:strip)
130
+ .map { |line| line.sub(/^[-*]\s*/, '') }
131
+ .map(&:strip)
132
+ .reject(&:empty?)
133
+ end
134
+
135
+ # ==========================================================================
136
+ # Prompt Builders
137
+ # ==========================================================================
138
+
139
+ def build_tag_extraction_prompt(text, existing_ontology)
140
+ taxonomy_context = if existing_ontology.any?
141
+ sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
142
+ tag.taxonomy_context_existing % { sample_tags: sample_tags.join(', ') }
143
+ else
144
+ tag.taxonomy_context_empty
145
+ end
146
+
147
+ tag.user_prompt_template % {
148
+ text: text,
149
+ max_depth: max_tag_depth,
150
+ taxonomy_context: taxonomy_context
151
+ }
152
+ end
153
+
154
+ def build_tag_system_prompt
155
+ tag.system_prompt.to_s.strip
156
+ end
157
+
158
+ def build_proposition_extraction_prompt(text)
159
+ proposition.user_prompt_template % { text: text }
160
+ end
161
+
162
+ def build_proposition_system_prompt
163
+ proposition.system_prompt.to_s.strip
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+
5
+ class HTM
6
+ class Config
7
+ module Database
8
+ # ==========================================================================
9
+ # Database Component Accessors
10
+ # ==========================================================================
11
+ #
12
+ # These methods provide convenient access to database components.
13
+ # Components are automatically reconciled at config load time:
14
+ # - If database.url exists: components are extracted and populated
15
+ # - If database.url is missing: it's built from components
16
+ #
17
+ # ==========================================================================
18
+
19
+ # @return [String, nil] the database host
20
+ def database_host
21
+ database.host
22
+ end
23
+
24
+ # @return [Integer, nil] the database port
25
+ def database_port
26
+ database.port
27
+ end
28
+
29
+ # @return [String, nil] the database name
30
+ def database_name
31
+ database.name
32
+ end
33
+
34
+ # @return [String, nil] the database user
35
+ def database_user
36
+ database.user
37
+ end
38
+
39
+ # @return [String, nil] the database password
40
+ def database_password
41
+ database.password
42
+ end
43
+
44
+ def database_config
45
+ url = database_url
46
+ return {} unless url
47
+
48
+ uri = URI.parse(url)
49
+
50
+ # Coercion now merges env vars with SCHEMA defaults, so pool_size/timeout
51
+ # are always available even when only HTM_DATABASE__URL is set
52
+ {
53
+ adapter: 'postgresql',
54
+ host: uri.host,
55
+ port: uri.port || 5432,
56
+ database: uri.path&.sub(%r{^/}, ''),
57
+ username: uri.user,
58
+ password: uri.password,
59
+ pool: database.pool_size.to_i,
60
+ timeout: database.timeout.to_i,
61
+ sslmode: database.sslmode,
62
+ encoding: 'unicode',
63
+ prepared_statements: false,
64
+ advisory_locks: false
65
+ }.compact
66
+ end
67
+
68
+ def database_configured?
69
+ url = database_url
70
+ (url && !url.empty?) || (database.name && !database.name.empty?)
71
+ end
72
+
73
+ # Database convenience methods
74
+ def database_url
75
+ url = database.url
76
+ return url if url && !url.empty?
77
+
78
+ build_database_url
79
+ end
80
+
81
+ # Validate that database is configured for the current environment
82
+ #
83
+ # @raise [HTM::ConfigurationError] if database is not configured
84
+ # @return [true] if database is configured
85
+ def validate_database!
86
+ validate_environment!
87
+
88
+ unless database_configured?
89
+ raise HTM::ConfigurationError,
90
+ "No database configured for environment '#{environment}'. " \
91
+ "Set HTM_DATABASE__URL or HTM_DATABASE__NAME, " \
92
+ "or add database.name to the '#{environment}:' section in your config."
93
+ end
94
+
95
+ true
96
+ end
97
+
98
+ # ==========================================================================
99
+ # Database Naming Convention
100
+ # ==========================================================================
101
+ #
102
+ # Database names MUST follow the convention: {service_name}_{environment}
103
+ #
104
+ # Examples:
105
+ # - htm_development
106
+ # - htm_test
107
+ # - htm_production
108
+ # - payroll_development
109
+ # - payroll_test
110
+ #
111
+ # This ensures:
112
+ # 1. Database names are predictable and self-documenting
113
+ # 2. Environment mismatches are impossible (exact match required)
114
+ # 3. Service isolation (can't accidentally use another app's database)
115
+ #
116
+ # ==========================================================================
117
+
118
+ # Returns the expected database name based on service.name and environment
119
+ #
120
+ # @return [String] expected database name in format "{service_name}_{environment}"
121
+ #
122
+ # @example
123
+ # config.service.name = "htm"
124
+ # HTM_ENV = "test"
125
+ # config.expected_database_name # => "htm_test"
126
+ #
127
+ def expected_database_name
128
+ "#{service_name}_#{environment}"
129
+ end
130
+
131
+ # Extract the actual database name from URL or config
132
+ #
133
+ # @return [String, nil] the database name
134
+ def actual_database_name
135
+ url = database&.url
136
+ if url && !url.empty?
137
+ # Parse database name from URL: postgresql://user@host:port/dbname
138
+ uri = URI.parse(url) rescue nil
139
+ return uri&.path&.sub(%r{^/}, '')
140
+ end
141
+
142
+ database&.name
143
+ end
144
+
145
+ # Validate that the database name follows the naming convention
146
+ #
147
+ # Database names must be: {service_name}_{environment}
148
+ #
149
+ # @raise [HTM::ConfigurationError] if database name doesn't match expected
150
+ # @return [true] if database name is valid
151
+ #
152
+ # @example Valid configurations
153
+ # HTM_ENV=test, service.name=htm, database=htm_test # OK
154
+ # HTM_ENV=production, service.name=payroll, database=payroll_production # OK
155
+ #
156
+ # @example Invalid configurations (will raise)
157
+ # HTM_ENV=test, service.name=htm, database=htm_production # Wrong environment
158
+ # HTM_ENV=test, service.name=htm, database=payroll_test # Wrong service
159
+ # HTM_ENV=test, service.name=htm, database=mydb # Wrong format
160
+ #
161
+ def validate_database_name!
162
+ actual = actual_database_name
163
+ expected = expected_database_name
164
+
165
+ return true if actual == expected
166
+
167
+ raise HTM::ConfigurationError,
168
+ "Database name '#{actual}' does not match expected '#{expected}'.\n" \
169
+ "Database names must follow the convention: {service_name}_{environment}\n" \
170
+ " Service name: #{service_name}\n" \
171
+ " Environment: #{environment}\n" \
172
+ " Expected: #{expected}\n" \
173
+ " Actual: #{actual}\n\n" \
174
+ "Either:\n" \
175
+ " - Set HTM_DATABASE__URL to point to '#{expected}'\n" \
176
+ " - Set HTM_DATABASE__NAME=#{expected}\n" \
177
+ " - Change HTM_ENV to match the database suffix"
178
+ end
179
+
180
+ # Check if the database name matches the expected convention
181
+ #
182
+ # @return [Boolean] true if database name matches expected
183
+ def valid_database_name?
184
+ actual_database_name == expected_database_name
185
+ end
186
+
187
+ # ==========================================================================
188
+ # Database URL/Components Parsing
189
+ # ==========================================================================
190
+
191
+ # Parse database URL into component hash
192
+ #
193
+ # @return [Hash, nil] parsed components or nil if no URL
194
+ def parse_database_url
195
+ url = database&.url
196
+ return nil if url.nil? || url.empty?
197
+
198
+ uri = URI.parse(url) rescue nil
199
+ return nil unless uri
200
+
201
+ # Parse query string for sslmode
202
+ query_params = URI.decode_www_form(uri.query || '').to_h
203
+ sslmode = query_params['sslmode']
204
+
205
+ {
206
+ host: uri.host,
207
+ port: uri.port,
208
+ name: uri.path&.sub(%r{^/}, ''),
209
+ user: uri.user,
210
+ password: uri.password,
211
+ sslmode: sslmode
212
+ }.compact
213
+ end
214
+
215
+ private
216
+
217
+ def build_database_url
218
+ return nil unless database.name && !database.name.empty?
219
+
220
+ auth = if database.user && !database.user.empty?
221
+ database.password && !database.password.empty? ? "#{database.user}:#{database.password}@" : "#{database.user}@"
222
+ else
223
+ ''
224
+ end
225
+
226
+ url = "postgresql://#{auth}#{database.host}:#{database.port}/#{database.name}"
227
+
228
+ # Add sslmode as query parameter if set
229
+ if database.sslmode && !database.sslmode.empty?
230
+ url += "?sslmode=#{database.sslmode}"
231
+ end
232
+
233
+ url
234
+ end
235
+
236
+ # ==========================================================================
237
+ # Database Configuration Reconciliation
238
+ # ==========================================================================
239
+ #
240
+ # Ensures database.url and database.* components are synchronized:
241
+ #
242
+ # 1. If database.url exists:
243
+ # - Extract all components from the URL
244
+ # - For each component: if config has a different value → ERROR
245
+ # - For each component: if config is missing → populate from URL
246
+ #
247
+ # 2. If database.url is missing but components exist:
248
+ # - Verify minimum required components (at least database.name)
249
+ # - Build and set database.url from components
250
+ # - If insufficient components → ERROR
251
+ #
252
+ # This runs automatically at config load time via on_load callback.
253
+ #
254
+ # ==========================================================================
255
+
256
+ def reconcile_database_config
257
+ url = database&.url
258
+ has_url = url && !url.empty?
259
+
260
+ if has_url
261
+ reconcile_from_url
262
+ else
263
+ reconcile_from_components
264
+ end
265
+ end
266
+
267
+ def reconcile_from_url
268
+ url_components = parse_database_url
269
+ return unless url_components
270
+
271
+ # URL is the source of truth - populate all components from it
272
+ # This overwrites any values from config files (they're just defaults)
273
+ %i[host port name user password sslmode].each do |component|
274
+ url_value = url_components[component]
275
+ next if url_value.nil?
276
+
277
+ database.send("#{component}=", url_value)
278
+ end
279
+ end
280
+
281
+ def reconcile_from_components
282
+ # Check what components we have
283
+ name = database&.name
284
+ has_name = name && !name.empty?
285
+
286
+ # If no database config at all, that's fine - might not need database
287
+ # Just return without error; validate_database! will catch if needed later
288
+ return unless has_name || has_any_database_component?
289
+
290
+ # If name is missing, auto-generate from service.name and environment
291
+ # Format: {service_name}_{environment} (e.g., "htm_development")
292
+ unless has_name
293
+ database.name = expected_database_name
294
+ end
295
+
296
+ # Use defaults for host/port if not set
297
+ database.host = 'localhost' if database.host.nil? || database.host.empty?
298
+ database.port = 5432 if database.port.nil?
299
+
300
+ # Build and set the URL
301
+ database.url = build_database_url
302
+ end
303
+
304
+ def has_any_database_component?
305
+ %i[host port user password].any? do |comp|
306
+ val = database.send(comp)
307
+ next false if val.nil?
308
+ next false if val.respond_to?(:empty?) && val.empty?
309
+ # Skip defaults
310
+ next false if comp == :host && val == 'localhost'
311
+ next false if comp == :port && val == 5432
312
+ true
313
+ end
314
+ end
315
+ end
316
+ end
317
+ end