htm 0.0.20 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -0
  3. data/Rakefile +104 -18
  4. data/db/migrate/00001_enable_extensions.rb +9 -5
  5. data/db/migrate/00002_create_robots.rb +18 -6
  6. data/db/migrate/00003_create_file_sources.rb +30 -17
  7. data/db/migrate/00004_create_nodes.rb +60 -48
  8. data/db/migrate/00005_create_tags.rb +24 -12
  9. data/db/migrate/00006_create_node_tags.rb +28 -13
  10. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  11. data/db/schema.sql +17 -1
  12. data/db/seeds.rb +33 -33
  13. data/docs/database/naming-convention.md +244 -0
  14. data/docs/database_rake_tasks.md +31 -0
  15. data/docs/development/rake-tasks.md +80 -35
  16. data/docs/guides/mcp-server.md +70 -1
  17. data/examples/.envrc +6 -0
  18. data/examples/.gitignore +2 -0
  19. data/examples/00_create_examples_db.rb +94 -0
  20. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  21. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  22. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  23. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  24. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  25. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  26. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  27. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  28. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  29. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  30. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  31. data/examples/11_robot_groups/README.md +335 -0
  32. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  33. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  34. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  35. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  36. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  37. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  38. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  39. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  40. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  41. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  42. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  43. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  44. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  45. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  46. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  47. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  48. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  49. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  50. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  51. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  52. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  53. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  54. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  55. data/examples/README.md +230 -211
  56. data/examples/examples_helper.rb +138 -0
  57. data/lib/htm/config/builder.rb +167 -0
  58. data/lib/htm/config/database.rb +317 -0
  59. data/lib/htm/config/defaults.yml +37 -9
  60. data/lib/htm/config/section.rb +74 -0
  61. data/lib/htm/config/validator.rb +83 -0
  62. data/lib/htm/config.rb +64 -360
  63. data/lib/htm/database.rb +85 -127
  64. data/lib/htm/errors.rb +14 -0
  65. data/lib/htm/integrations/sinatra.rb +13 -44
  66. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  67. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  68. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  69. data/lib/htm/loaders/defaults_loader.rb +23 -0
  70. data/lib/htm/loaders/markdown_loader.rb +17 -15
  71. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  72. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  73. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  74. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  75. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  76. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  77. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  78. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  79. data/lib/htm/long_term_memory.rb +13 -13
  80. data/lib/htm/mcp/cli.rb +115 -8
  81. data/lib/htm/mcp/resources.rb +4 -3
  82. data/lib/htm/mcp/server.rb +5 -4
  83. data/lib/htm/mcp/tools.rb +37 -28
  84. data/lib/htm/migration.rb +72 -0
  85. data/lib/htm/models/file_source.rb +52 -31
  86. data/lib/htm/models/node.rb +224 -108
  87. data/lib/htm/models/node_tag.rb +49 -28
  88. data/lib/htm/models/robot.rb +38 -27
  89. data/lib/htm/models/robot_node.rb +63 -35
  90. data/lib/htm/models/tag.rb +126 -123
  91. data/lib/htm/observability.rb +45 -41
  92. data/lib/htm/proposition_service.rb +76 -7
  93. data/lib/htm/railtie.rb +2 -2
  94. data/lib/htm/robot_group.rb +30 -18
  95. data/lib/htm/sequel_config.rb +215 -0
  96. data/lib/htm/sql_builder.rb +14 -16
  97. data/lib/htm/tag_service.rb +78 -0
  98. data/lib/htm/tasks.rb +3 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/workflows/remember_workflow.rb +6 -5
  101. data/lib/htm.rb +26 -22
  102. data/lib/tasks/db.rake +0 -2
  103. data/lib/tasks/doc.rake +2 -2
  104. data/lib/tasks/files.rake +11 -18
  105. data/lib/tasks/htm.rake +190 -62
  106. data/lib/tasks/jobs.rake +179 -54
  107. data/lib/tasks/tags.rake +8 -13
  108. data/scripts/backfill_parent_tags.rb +376 -0
  109. data/scripts/normalize_plural_tags.rb +335 -0
  110. metadata +109 -80
  111. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  112. data/examples/sinatra_app/Gemfile.lock +0 -166
  113. data/lib/htm/active_record_config.rb +0 -104
  114. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  115. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  116. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  117. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  118. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  119. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  120. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  121. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  122. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  123. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  124. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  125. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  126. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  127. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  128. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  129. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  130. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  131. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  132. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  133. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  134. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  135. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  136. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  137. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  138. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  139. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  140. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  141. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  142. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  143. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  144. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  145. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  146. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  147. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  148. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  149. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  150. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  151. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  152. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  153. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  154. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ # ConfigSection provides method access to nested configuration hashes
5
+ #
6
+ # @example
7
+ # section = ConfigSection.new(host: 'localhost', port: 5432)
8
+ # section.host # => 'localhost'
9
+ # section.port # => 5432
10
+ #
11
+ class ConfigSection
12
+ def initialize(hash = {})
13
+ @data = {}
14
+ (hash || {}).each do |key, value|
15
+ @data[key.to_sym] = value.is_a?(Hash) ? ConfigSection.new(value) : value
16
+ end
17
+ end
18
+
19
+ def method_missing(method, *args, &block)
20
+ key = method.to_s
21
+ if key.end_with?('=')
22
+ @data[key.chomp('=').to_sym] = args.first
23
+ elsif @data.key?(method)
24
+ @data[method]
25
+ else
26
+ nil
27
+ end
28
+ end
29
+
30
+ def respond_to_missing?(method, include_private = false)
31
+ key = method.to_s.chomp('=').to_sym
32
+ @data.key?(key) || super
33
+ end
34
+
35
+ def to_h
36
+ @data.transform_values do |v|
37
+ v.is_a?(ConfigSection) ? v.to_h : v
38
+ end
39
+ end
40
+
41
+ def [](key)
42
+ @data[key.to_sym]
43
+ end
44
+
45
+ def []=(key, value)
46
+ @data[key.to_sym] = value
47
+ end
48
+
49
+ def merge(other)
50
+ other_hash = other.is_a?(ConfigSection) ? other.to_h : other
51
+ ConfigSection.new(deep_merge(to_h, other_hash || {}))
52
+ end
53
+
54
+ def keys
55
+ @data.keys
56
+ end
57
+
58
+ def each(&block)
59
+ @data.each(&block)
60
+ end
61
+
62
+ private
63
+
64
+ def deep_merge(base, overlay)
65
+ base.merge(overlay) do |_key, old_val, new_val|
66
+ if old_val.is_a?(Hash) && new_val.is_a?(Hash)
67
+ deep_merge(old_val, new_val)
68
+ else
69
+ new_val
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ class Config
5
+ module Validator
6
+ SUPPORTED_PROVIDERS = %i[
7
+ openai anthropic gemini azure ollama
8
+ huggingface openrouter bedrock deepseek
9
+ ].freeze
10
+
11
+ SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
12
+ SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
13
+
14
+ def validate_config
15
+ validate_providers
16
+ validate_job_backend
17
+ validate_week_start
18
+ validate_relevance_weights
19
+ end
20
+
21
+ def validate_providers
22
+ validate_provider(:embedding_provider, embedding_provider)
23
+ validate_provider(:tag_provider, tag_provider)
24
+ validate_provider(:proposition_provider, proposition_provider)
25
+ end
26
+
27
+ def validate_provider(name, value)
28
+ return if value.nil?
29
+
30
+ unless SUPPORTED_PROVIDERS.include?(value)
31
+ raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
32
+ end
33
+ end
34
+
35
+ def validate_job_backend
36
+ return unless job_backend
37
+
38
+ unless SUPPORTED_JOB_BACKENDS.include?(job_backend)
39
+ raise_validation_error("job.backend must be one of: #{SUPPORTED_JOB_BACKENDS.join(', ')} (got #{job_backend.inspect})")
40
+ end
41
+ end
42
+
43
+ def validate_week_start
44
+ unless SUPPORTED_WEEK_STARTS.include?(week_start)
45
+ raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
46
+ end
47
+ end
48
+
49
+ def validate_relevance_weights
50
+ total = relevance_semantic_weight + relevance_tag_weight +
51
+ relevance_recency_weight + relevance_access_weight
52
+
53
+ unless (0.99..1.01).cover?(total)
54
+ raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
55
+ end
56
+ end
57
+
58
+ def validate_callables
59
+ unless @embedding_generator.respond_to?(:call)
60
+ raise HTM::ValidationError, "embedding_generator must be callable"
61
+ end
62
+
63
+ unless @tag_extractor.respond_to?(:call)
64
+ raise HTM::ValidationError, "tag_extractor must be callable"
65
+ end
66
+
67
+ unless @proposition_extractor.respond_to?(:call)
68
+ raise HTM::ValidationError, "proposition_extractor must be callable"
69
+ end
70
+
71
+ unless @token_counter.respond_to?(:call)
72
+ raise HTM::ValidationError, "token_counter must be callable"
73
+ end
74
+ end
75
+
76
+ def validate_logger
77
+ unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
78
+ raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
data/lib/htm/config.rb CHANGED
@@ -4,78 +4,18 @@ require 'anyway_config'
4
4
  require 'logger'
5
5
  require 'yaml'
6
6
 
7
+ # Define Config class first to establish superclass
7
8
  class HTM
8
- # ConfigSection provides method access to nested configuration hashes
9
- #
10
- # @example
11
- # section = ConfigSection.new(host: 'localhost', port: 5432)
12
- # section.host # => 'localhost'
13
- # section.port # => 5432
14
- #
15
- class ConfigSection
16
- def initialize(hash = {})
17
- @data = {}
18
- (hash || {}).each do |key, value|
19
- @data[key.to_sym] = value.is_a?(Hash) ? ConfigSection.new(value) : value
20
- end
21
- end
22
-
23
- def method_missing(method, *args, &block)
24
- key = method.to_s
25
- if key.end_with?('=')
26
- @data[key.chomp('=').to_sym] = args.first
27
- elsif @data.key?(method)
28
- @data[method]
29
- else
30
- nil
31
- end
32
- end
33
-
34
- def respond_to_missing?(method, include_private = false)
35
- key = method.to_s.chomp('=').to_sym
36
- @data.key?(key) || super
37
- end
38
-
39
- def to_h
40
- @data.transform_values do |v|
41
- v.is_a?(ConfigSection) ? v.to_h : v
42
- end
43
- end
44
-
45
- def [](key)
46
- @data[key.to_sym]
47
- end
48
-
49
- def []=(key, value)
50
- @data[key.to_sym] = value
51
- end
52
-
53
- def merge(other)
54
- other_hash = other.is_a?(ConfigSection) ? other.to_h : other
55
- ConfigSection.new(deep_merge(to_h, other_hash || {}))
56
- end
57
-
58
- def keys
59
- @data.keys
60
- end
61
-
62
- def each(&block)
63
- @data.each(&block)
64
- end
65
-
66
- private
67
-
68
- def deep_merge(base, overlay)
69
- base.merge(overlay) do |_key, old_val, new_val|
70
- if old_val.is_a?(Hash) && new_val.is_a?(Hash)
71
- deep_merge(old_val, new_val)
72
- else
73
- new_val
74
- end
75
- end
76
- end
9
+ class Config < Anyway::Config
77
10
  end
11
+ end
12
+
13
+ require_relative 'config/section'
14
+ require_relative 'config/validator'
15
+ require_relative 'config/database'
16
+ require_relative 'config/builder'
78
17
 
18
+ class HTM
79
19
  # HTM Configuration using Anyway Config
80
20
  #
81
21
  # Schema is defined in lib/htm/config/defaults.yml (single source of truth)
@@ -114,7 +54,11 @@ class HTM
114
54
  # config.embedding.model = 'text-embedding-3-small'
115
55
  # end
116
56
  #
117
- class Config < Anyway::Config
57
+ class Config
58
+ include Validator
59
+ include Database
60
+ include Builder
61
+
118
62
  config_name :htm
119
63
  env_prefix :htm
120
64
 
@@ -216,14 +160,6 @@ class HTM
216
160
  # Validation
217
161
  # ==========================================================================
218
162
 
219
- SUPPORTED_PROVIDERS = %i[
220
- openai anthropic gemini azure ollama
221
- huggingface openrouter bedrock deepseek
222
- ].freeze
223
-
224
- SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
225
- SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
226
-
227
163
  # Default embedding dimensions by provider
228
164
  DEFAULT_DIMENSIONS = {
229
165
  openai: 1536,
@@ -237,7 +173,7 @@ class HTM
237
173
  deepseek: 1536
238
174
  }.freeze
239
175
 
240
- on_load :coerce_nested_types, :validate_config, :setup_defaults
176
+ on_load :coerce_nested_types, :reconcile_database_config, :validate_config, :setup_defaults
241
177
 
242
178
  # ==========================================================================
243
179
  # Callable Accessors (not loaded from config sources)
@@ -260,44 +196,6 @@ class HTM
260
196
  # Convenience Accessors (for common nested values)
261
197
  # ==========================================================================
262
198
 
263
- # Database convenience methods
264
- def database_url
265
- url = database.url
266
- return url if url && !url.empty?
267
-
268
- build_database_url
269
- end
270
-
271
- def database_config
272
- url = database_url
273
- return {} unless url
274
-
275
- require 'uri'
276
- uri = URI.parse(url)
277
-
278
- # Coercion now merges env vars with SCHEMA defaults, so pool_size/timeout
279
- # are always available even when only HTM_DATABASE__URL is set
280
- {
281
- adapter: 'postgresql',
282
- host: uri.host,
283
- port: uri.port || 5432,
284
- database: uri.path&.sub(%r{^/}, ''),
285
- username: uri.user,
286
- password: uri.password,
287
- pool: database.pool_size.to_i,
288
- timeout: database.timeout.to_i,
289
- sslmode: database.sslmode,
290
- encoding: 'unicode',
291
- prepared_statements: false,
292
- advisory_locks: false
293
- }.compact
294
- end
295
-
296
- def database_configured?
297
- url = database_url
298
- (url && !url.empty?) || (database.name && !database.name.empty?)
299
- end
300
-
301
199
  # Embedding convenience accessors
302
200
  def embedding_provider
303
201
  provider = embedding.provider
@@ -493,6 +391,48 @@ class HTM
493
391
  self.class.env
494
392
  end
495
393
 
394
+ # ==========================================================================
395
+ # Environment Validation
396
+ # ==========================================================================
397
+
398
+ # Returns list of valid environment names from bundled defaults
399
+ #
400
+ # @return [Array<Symbol>] valid environment names (e.g., [:development, :production, :test])
401
+ def self.valid_environments
402
+ HTM::Loaders::DefaultsLoader.valid_environments
403
+ end
404
+
405
+ # Check if current environment is valid (defined in config)
406
+ #
407
+ # @return [Boolean] true if environment has a config section
408
+ def self.valid_environment?
409
+ HTM::Loaders::DefaultsLoader.valid_environment?(env)
410
+ end
411
+
412
+ # Validate that the current environment is configured
413
+ #
414
+ # @raise [HTM::ConfigurationError] if environment is invalid
415
+ # @return [true] if environment is valid
416
+ def self.validate_environment!
417
+ current = env
418
+ return true if HTM::Loaders::DefaultsLoader.valid_environment?(current)
419
+
420
+ valid = valid_environments.map(&:to_s).join(', ')
421
+ raise HTM::ConfigurationError,
422
+ "Invalid environment '#{current}'. " \
423
+ "Valid environments are: #{valid}. " \
424
+ "Set HTM_ENV to a valid environment or add a '#{current}:' section to your config."
425
+ end
426
+
427
+ # Instance method delegates
428
+ def valid_environment?
429
+ self.class.valid_environment?
430
+ end
431
+
432
+ def validate_environment!
433
+ self.class.validate_environment!
434
+ end
435
+
496
436
  # ==========================================================================
497
437
  # XDG Config Path Helpers
498
438
  # ==========================================================================
@@ -598,18 +538,6 @@ class HTM
598
538
 
599
539
  private
600
540
 
601
- def build_database_url
602
- return nil unless database.name && !database.name.empty?
603
-
604
- auth = if database.user && !database.user.empty?
605
- database.password && !database.password.empty? ? "#{database.user}:#{database.password}@" : "#{database.user}@"
606
- else
607
- ''
608
- end
609
-
610
- "postgresql://#{auth}#{database.host}:#{database.port}/#{database.name}"
611
- end
612
-
613
541
  # ==========================================================================
614
542
  # Type Coercion Callback
615
543
  # ==========================================================================
@@ -622,77 +550,16 @@ class HTM
622
550
  providers[provider] = ConfigSection.new(value) if value.is_a?(Hash)
623
551
  end
624
552
  end
625
- end
626
-
627
- # ==========================================================================
628
- # Validation Callbacks
629
- # ==========================================================================
630
-
631
- def validate_config
632
- validate_providers
633
- validate_job_backend
634
- validate_week_start
635
- validate_relevance_weights
636
- end
637
-
638
- def validate_providers
639
- validate_provider(:embedding_provider, embedding_provider)
640
- validate_provider(:tag_provider, tag_provider)
641
- validate_provider(:proposition_provider, proposition_provider)
642
- end
643
-
644
- def validate_provider(name, value)
645
- return if value.nil?
646
-
647
- unless SUPPORTED_PROVIDERS.include?(value)
648
- raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
649
- end
650
- end
651
-
652
- def validate_job_backend
653
- return unless job_backend
654
553
 
655
- unless SUPPORTED_JOB_BACKENDS.include?(job_backend)
656
- raise_validation_error("job.backend must be one of: #{SUPPORTED_JOB_BACKENDS.join(', ')} (got #{job_backend.inspect})")
554
+ # Coerce database numeric fields to integers (env vars are always strings)
555
+ if database&.port && !database.port.is_a?(Integer)
556
+ database.port = database.port.to_i
657
557
  end
658
- end
659
-
660
- def validate_week_start
661
- unless SUPPORTED_WEEK_STARTS.include?(week_start)
662
- raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
663
- end
664
- end
665
-
666
- def validate_relevance_weights
667
- total = relevance_semantic_weight + relevance_tag_weight +
668
- relevance_recency_weight + relevance_access_weight
669
-
670
- unless (0.99..1.01).cover?(total)
671
- raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
558
+ if database&.pool_size && !database.pool_size.is_a?(Integer)
559
+ database.pool_size = database.pool_size.to_i
672
560
  end
673
- end
674
-
675
- def validate_callables
676
- unless @embedding_generator.respond_to?(:call)
677
- raise HTM::ValidationError, "embedding_generator must be callable"
678
- end
679
-
680
- unless @tag_extractor.respond_to?(:call)
681
- raise HTM::ValidationError, "tag_extractor must be callable"
682
- end
683
-
684
- unless @proposition_extractor.respond_to?(:call)
685
- raise HTM::ValidationError, "proposition_extractor must be callable"
686
- end
687
-
688
- unless @token_counter.respond_to?(:call)
689
- raise HTM::ValidationError, "token_counter must be callable"
690
- end
691
- end
692
-
693
- def validate_logger
694
- unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
695
- raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
561
+ if database&.timeout && !database.timeout.is_a?(Integer)
562
+ database.timeout = database.timeout.to_i
696
563
  end
697
564
  end
698
565
 
@@ -716,169 +583,6 @@ class HTM
716
583
 
717
584
  :fiber
718
585
  end
719
-
720
- def build_default_logger
721
- logger = Logger.new($stdout)
722
- logger.level = log_level
723
- logger.formatter = proc do |severity, datetime, _progname, msg|
724
- "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
725
- end
726
- logger
727
- end
728
-
729
- def build_default_token_counter
730
- lambda do |text|
731
- require 'tiktoken_ruby' unless defined?(Tiktoken)
732
- encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
733
- encoder.encode(text).length
734
- end
735
- end
736
-
737
- def build_default_embedding_generator
738
- lambda do |text|
739
- require 'ruby_llm' unless defined?(RubyLLM)
740
-
741
- configure_ruby_llm(embedding_provider)
742
- refresh_ollama_models! if embedding_provider == :ollama
743
-
744
- model = embedding_provider == :ollama ? normalize_ollama_model(embedding_model) : embedding_model
745
- response = RubyLLM.embed(text, model: model)
746
- embedding = extract_embedding_from_response(response)
747
-
748
- unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) }
749
- raise HTM::EmbeddingError, "Invalid embedding response format from #{embedding_provider}"
750
- end
751
-
752
- embedding
753
- end
754
- end
755
-
756
- def build_default_tag_extractor
757
- lambda do |text, existing_ontology = []|
758
- require 'ruby_llm' unless defined?(RubyLLM)
759
-
760
- configure_ruby_llm(tag_provider)
761
- refresh_ollama_models! if tag_provider == :ollama
762
-
763
- model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model
764
-
765
- prompt = build_tag_extraction_prompt(text, existing_ontology)
766
- system_prompt = build_tag_system_prompt
767
-
768
- chat = RubyLLM.chat(model: model)
769
- chat.with_instructions(system_prompt)
770
- response = chat.ask(prompt)
771
-
772
- parse_tag_response(extract_text_from_response(response))
773
- end
774
- end
775
-
776
- def build_default_proposition_extractor
777
- lambda do |text|
778
- require 'ruby_llm' unless defined?(RubyLLM)
779
-
780
- configure_ruby_llm(proposition_provider)
781
- refresh_ollama_models! if proposition_provider == :ollama
782
-
783
- model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model
784
-
785
- prompt = build_proposition_extraction_prompt(text)
786
- system_prompt = build_proposition_system_prompt
787
-
788
- chat = RubyLLM.chat(model: model)
789
- chat.with_instructions(system_prompt)
790
- response = chat.ask(prompt)
791
-
792
- parse_proposition_response(extract_text_from_response(response))
793
- end
794
- end
795
-
796
- # ==========================================================================
797
- # Response Extraction Helpers
798
- # ==========================================================================
799
-
800
- def extract_embedding_from_response(response)
801
- return nil unless response
802
-
803
- case response
804
- when Array
805
- response
806
- when ->(r) { r.respond_to?(:vectors) }
807
- vectors = response.vectors
808
- vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
809
- when ->(r) { r.respond_to?(:to_a) }
810
- response.to_a
811
- when ->(r) { r.respond_to?(:embedding) }
812
- response.embedding
813
- else
814
- if response.respond_to?(:instance_variable_get)
815
- vectors = response.instance_variable_get(:@vectors)
816
- return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
817
- return vectors if vectors.is_a?(Array)
818
- end
819
- raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
820
- end
821
- end
822
-
823
- def extract_text_from_response(response)
824
- return '' unless response
825
-
826
- case response
827
- when String then response
828
- when ->(r) { r.respond_to?(:content) } then response.content.to_s
829
- when ->(r) { r.respond_to?(:text) } then response.text.to_s
830
- else response.to_s
831
- end
832
- end
833
-
834
- def parse_tag_response(text)
835
- tags = text.to_s.split("\n").map(&:strip).reject(&:empty?)
836
- valid_tags = tags.select { |tag| tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ }
837
- valid_tags.select { |tag| tag.count(':') < max_tag_depth }
838
- end
839
-
840
- def parse_proposition_response(text)
841
- text.to_s
842
- .split("\n")
843
- .map(&:strip)
844
- .map { |line| line.sub(/^[-*]\s*/, '') }
845
- .map(&:strip)
846
- .reject(&:empty?)
847
- end
848
-
849
- # ==========================================================================
850
- # Prompt Builders
851
- #
852
- # These methods use configurable prompt templates from defaults.yml.
853
- # Templates use %{placeholder} syntax for runtime interpolation.
854
- # ==========================================================================
855
-
856
- def build_tag_extraction_prompt(text, existing_ontology)
857
- taxonomy_context = if existing_ontology.any?
858
- sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
859
- tag.taxonomy_context_existing % { sample_tags: sample_tags.join(', ') }
860
- else
861
- tag.taxonomy_context_empty
862
- end
863
-
864
- tag.user_prompt_template % {
865
- text: text,
866
- max_depth: max_tag_depth,
867
- taxonomy_context: taxonomy_context
868
- }
869
- end
870
-
871
- def build_tag_system_prompt
872
- tag.system_prompt.to_s.strip
873
- end
874
-
875
- def build_proposition_extraction_prompt(text)
876
- proposition.user_prompt_template % { text: text }
877
- end
878
-
879
- def build_proposition_system_prompt
880
- proposition.system_prompt.to_s.strip
881
- end
882
586
  end
883
587
  end
884
588