htm 0.0.20 → 0.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -0
  3. data/Rakefile +104 -18
  4. data/db/migrate/00001_enable_extensions.rb +9 -5
  5. data/db/migrate/00002_create_robots.rb +18 -6
  6. data/db/migrate/00003_create_file_sources.rb +30 -17
  7. data/db/migrate/00004_create_nodes.rb +60 -48
  8. data/db/migrate/00005_create_tags.rb +24 -12
  9. data/db/migrate/00006_create_node_tags.rb +28 -13
  10. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  11. data/db/schema.sql +17 -1
  12. data/db/seeds.rb +33 -33
  13. data/docs/database/naming-convention.md +244 -0
  14. data/docs/database_rake_tasks.md +31 -0
  15. data/docs/development/rake-tasks.md +80 -35
  16. data/docs/guides/mcp-server.md +70 -1
  17. data/examples/.envrc +6 -0
  18. data/examples/.gitignore +2 -0
  19. data/examples/00_create_examples_db.rb +94 -0
  20. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  21. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  22. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  23. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  24. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  25. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  26. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  27. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  28. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  29. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  30. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  31. data/examples/11_robot_groups/README.md +335 -0
  32. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  33. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  34. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  35. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  36. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  37. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  38. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  39. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  40. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  41. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  42. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  43. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  44. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  45. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  46. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  47. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  48. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  49. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  50. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  51. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  52. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  53. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  54. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  55. data/examples/README.md +230 -211
  56. data/examples/examples_helper.rb +138 -0
  57. data/lib/htm/config/builder.rb +167 -0
  58. data/lib/htm/config/database.rb +317 -0
  59. data/lib/htm/config/defaults.yml +62 -22
  60. data/lib/htm/config/validator.rb +83 -0
  61. data/lib/htm/config.rb +75 -462
  62. data/lib/htm/database.rb +85 -127
  63. data/lib/htm/errors.rb +14 -0
  64. data/lib/htm/integrations/sinatra.rb +13 -44
  65. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  66. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  67. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  68. data/lib/htm/loaders/markdown_loader.rb +17 -15
  69. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  70. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  71. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  72. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  73. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  74. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  75. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  76. data/lib/htm/long_term_memory.rb +13 -13
  77. data/lib/htm/mcp/cli.rb +115 -8
  78. data/lib/htm/mcp/resources.rb +4 -3
  79. data/lib/htm/mcp/server.rb +5 -4
  80. data/lib/htm/mcp/tools.rb +37 -28
  81. data/lib/htm/migration.rb +72 -0
  82. data/lib/htm/models/file_source.rb +52 -31
  83. data/lib/htm/models/node.rb +224 -108
  84. data/lib/htm/models/node_tag.rb +49 -28
  85. data/lib/htm/models/robot.rb +38 -27
  86. data/lib/htm/models/robot_node.rb +63 -35
  87. data/lib/htm/models/tag.rb +126 -123
  88. data/lib/htm/observability.rb +45 -41
  89. data/lib/htm/proposition_service.rb +76 -7
  90. data/lib/htm/railtie.rb +2 -2
  91. data/lib/htm/robot_group.rb +30 -18
  92. data/lib/htm/sequel_config.rb +215 -0
  93. data/lib/htm/sql_builder.rb +14 -16
  94. data/lib/htm/tag_service.rb +78 -0
  95. data/lib/htm/tasks.rb +3 -0
  96. data/lib/htm/version.rb +1 -1
  97. data/lib/htm/workflows/remember_workflow.rb +6 -5
  98. data/lib/htm.rb +26 -22
  99. data/lib/tasks/db.rake +0 -2
  100. data/lib/tasks/doc.rake +2 -2
  101. data/lib/tasks/files.rake +11 -18
  102. data/lib/tasks/htm.rake +190 -62
  103. data/lib/tasks/jobs.rake +179 -54
  104. data/lib/tasks/tags.rake +8 -13
  105. data/scripts/backfill_parent_tags.rb +376 -0
  106. data/scripts/normalize_plural_tags.rb +335 -0
  107. metadata +111 -85
  108. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  109. data/examples/sinatra_app/Gemfile.lock +0 -166
  110. data/lib/htm/active_record_config.rb +0 -104
  111. data/lib/htm/loaders/defaults_loader.rb +0 -143
  112. data/lib/htm/loaders/xdg_config_loader.rb +0 -116
  113. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  114. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  115. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  116. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  117. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  118. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  119. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  120. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  121. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  122. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  123. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  124. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  125. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  126. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  127. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  128. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  129. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  130. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  131. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  132. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  133. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  134. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  135. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  136. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  137. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  138. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  139. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  140. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  141. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  142. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  143. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  144. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  145. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  146. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  147. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  148. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  149. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  150. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  151. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  152. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  153. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/htm/config.rb CHANGED
@@ -1,81 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'anyway_config'
3
+ require 'myway_config'
4
4
  require 'logger'
5
- require 'yaml'
6
5
 
6
+ # Define Config class first to establish superclass
7
7
  class HTM
8
- # ConfigSection provides method access to nested configuration hashes
9
- #
10
- # @example
11
- # section = ConfigSection.new(host: 'localhost', port: 5432)
12
- # section.host # => 'localhost'
13
- # section.port # => 5432
14
- #
15
- class ConfigSection
16
- def initialize(hash = {})
17
- @data = {}
18
- (hash || {}).each do |key, value|
19
- @data[key.to_sym] = value.is_a?(Hash) ? ConfigSection.new(value) : value
20
- end
21
- end
22
-
23
- def method_missing(method, *args, &block)
24
- key = method.to_s
25
- if key.end_with?('=')
26
- @data[key.chomp('=').to_sym] = args.first
27
- elsif @data.key?(method)
28
- @data[method]
29
- else
30
- nil
31
- end
32
- end
33
-
34
- def respond_to_missing?(method, include_private = false)
35
- key = method.to_s.chomp('=').to_sym
36
- @data.key?(key) || super
37
- end
38
-
39
- def to_h
40
- @data.transform_values do |v|
41
- v.is_a?(ConfigSection) ? v.to_h : v
42
- end
43
- end
44
-
45
- def [](key)
46
- @data[key.to_sym]
47
- end
48
-
49
- def []=(key, value)
50
- @data[key.to_sym] = value
51
- end
52
-
53
- def merge(other)
54
- other_hash = other.is_a?(ConfigSection) ? other.to_h : other
55
- ConfigSection.new(deep_merge(to_h, other_hash || {}))
56
- end
57
-
58
- def keys
59
- @data.keys
60
- end
61
-
62
- def each(&block)
63
- @data.each(&block)
64
- end
65
-
66
- private
67
-
68
- def deep_merge(base, overlay)
69
- base.merge(overlay) do |_key, old_val, new_val|
70
- if old_val.is_a?(Hash) && new_val.is_a?(Hash)
71
- deep_merge(old_val, new_val)
72
- else
73
- new_val
74
- end
75
- end
76
- end
8
+ class Config < MywayConfig::Base
77
9
  end
10
+ end
11
+
12
+ require_relative 'config/validator'
13
+ require_relative 'config/database'
14
+ require_relative 'config/builder'
78
15
 
16
+ class HTM
79
17
  # HTM Configuration using Anyway Config
80
18
  #
81
19
  # Schema is defined in lib/htm/config/defaults.yml (single source of truth)
@@ -114,38 +52,18 @@ class HTM
114
52
  # config.embedding.model = 'text-embedding-3-small'
115
53
  # end
116
54
  #
117
- class Config < Anyway::Config
55
+ class Config
56
+ include Validator
57
+ include Database
58
+ include Builder
59
+
118
60
  config_name :htm
119
61
  env_prefix :htm
62
+ defaults_path File.expand_path('config/defaults.yml', __dir__)
120
63
 
121
- # ==========================================================================
122
- # Schema Definition (loaded from defaults.yml - single source of truth)
123
- # ==========================================================================
124
-
125
- # Path to bundled defaults file (defines both schema and default values)
126
- DEFAULTS_PATH = File.expand_path('config/defaults.yml', __dir__).freeze
127
-
128
- # Load schema from defaults.yml at class definition time
129
- begin
130
- defaults_content = File.read(DEFAULTS_PATH)
131
- raw_yaml = YAML.safe_load(
132
- defaults_content,
133
- permitted_classes: [Symbol],
134
- symbolize_names: true,
135
- aliases: true
136
- ) || {}
137
- SCHEMA = raw_yaml[:defaults] || {}
138
- rescue StandardError => e
139
- warn "HTM: Could not load schema from #{DEFAULTS_PATH}: #{e.message}"
140
- SCHEMA = {}
141
- end
142
-
143
- # Nested section attributes (defined as hashes, converted to ConfigSection)
144
- attr_config :database, :service, :embedding, :tag, :proposition,
145
- :chunking, :circuit_breaker, :relevance, :job, :providers
146
-
147
- # Top-level scalar attributes
148
- attr_config :week_start, :connection_timeout, :telemetry_enabled, :log_level
64
+ # Auto-configure attributes and coercions from defaults.yml schema
65
+ # This replaces manual attr_config and coerce_types declarations
66
+ auto_configure!
149
67
 
150
68
  # Custom environment detection: HTM_ENV > RAILS_ENV > RACK_ENV > 'development'
151
69
  class << self
@@ -158,72 +76,10 @@ class HTM
158
76
  end
159
77
  end
160
78
 
161
- # ==========================================================================
162
- # Type Coercion
163
- # ==========================================================================
164
-
165
- TO_SYMBOL = ->(v) { v.nil? ? nil : v.to_s.to_sym }
166
-
167
- # Create a coercion that merges incoming value with SCHEMA defaults for a section.
168
- # This ensures env vars like HTM_DATABASE__URL don't lose other defaults.
169
- def self.config_section_with_defaults(section_key)
170
- defaults = SCHEMA[section_key] || {}
171
- ->(v) {
172
- return v if v.is_a?(ConfigSection)
173
- incoming = v || {}
174
- # Deep merge: defaults first, then overlay incoming values
175
- merged = deep_merge_hashes(defaults.dup, incoming)
176
- ConfigSection.new(merged)
177
- }
178
- end
179
-
180
- # Deep merge helper for coercion
181
- def self.deep_merge_hashes(base, overlay)
182
- base.merge(overlay) do |_key, old_val, new_val|
183
- if old_val.is_a?(Hash) && new_val.is_a?(Hash)
184
- deep_merge_hashes(old_val, new_val)
185
- else
186
- new_val.nil? ? old_val : new_val
187
- end
188
- end
189
- end
190
-
191
- coerce_types(
192
- # Nested sections -> ConfigSection objects (with SCHEMA defaults merged)
193
- database: config_section_with_defaults(:database),
194
- service: config_section_with_defaults(:service),
195
- embedding: config_section_with_defaults(:embedding),
196
- tag: config_section_with_defaults(:tag),
197
- proposition: config_section_with_defaults(:proposition),
198
- chunking: config_section_with_defaults(:chunking),
199
- circuit_breaker: config_section_with_defaults(:circuit_breaker),
200
- relevance: config_section_with_defaults(:relevance),
201
- job: config_section_with_defaults(:job),
202
- providers: config_section_with_defaults(:providers),
203
-
204
- # Top-level symbols
205
- week_start: TO_SYMBOL,
206
- log_level: TO_SYMBOL,
207
-
208
- # Top-level integers
209
- connection_timeout: :integer,
210
-
211
- # Top-level booleans
212
- telemetry_enabled: :boolean
213
- )
214
-
215
79
  # ==========================================================================
216
80
  # Validation
217
81
  # ==========================================================================
218
82
 
219
- SUPPORTED_PROVIDERS = %i[
220
- openai anthropic gemini azure ollama
221
- huggingface openrouter bedrock deepseek
222
- ].freeze
223
-
224
- SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
225
- SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
226
-
227
83
  # Default embedding dimensions by provider
228
84
  DEFAULT_DIMENSIONS = {
229
85
  openai: 1536,
@@ -237,7 +93,7 @@ class HTM
237
93
  deepseek: 1536
238
94
  }.freeze
239
95
 
240
- on_load :coerce_nested_types, :validate_config, :setup_defaults
96
+ on_load :coerce_nested_types, :reconcile_database_config, :validate_config, :setup_defaults
241
97
 
242
98
  # ==========================================================================
243
99
  # Callable Accessors (not loaded from config sources)
@@ -260,44 +116,6 @@ class HTM
260
116
  # Convenience Accessors (for common nested values)
261
117
  # ==========================================================================
262
118
 
263
- # Database convenience methods
264
- def database_url
265
- url = database.url
266
- return url if url && !url.empty?
267
-
268
- build_database_url
269
- end
270
-
271
- def database_config
272
- url = database_url
273
- return {} unless url
274
-
275
- require 'uri'
276
- uri = URI.parse(url)
277
-
278
- # Coercion now merges env vars with SCHEMA defaults, so pool_size/timeout
279
- # are always available even when only HTM_DATABASE__URL is set
280
- {
281
- adapter: 'postgresql',
282
- host: uri.host,
283
- port: uri.port || 5432,
284
- database: uri.path&.sub(%r{^/}, ''),
285
- username: uri.user,
286
- password: uri.password,
287
- pool: database.pool_size.to_i,
288
- timeout: database.timeout.to_i,
289
- sslmode: database.sslmode,
290
- encoding: 'unicode',
291
- prepared_statements: false,
292
- advisory_locks: false
293
- }.compact
294
- end
295
-
296
- def database_configured?
297
- url = database_url
298
- (url && !url.empty?) || (database.name && !database.name.empty?)
299
- end
300
-
301
119
  # Embedding convenience accessors
302
120
  def embedding_provider
303
121
  provider = embedding.provider
@@ -358,11 +176,11 @@ class HTM
358
176
 
359
177
  # Chunking convenience accessors
360
178
  def chunk_size
361
- chunking.size.to_i
179
+ chunking.chunk_size.to_i
362
180
  end
363
181
 
364
182
  def chunk_overlap
365
- chunking.overlap.to_i
183
+ chunking.chunk_overlap.to_i
366
184
  end
367
185
 
368
186
  # Circuit breaker convenience accessors
@@ -477,20 +295,52 @@ class HTM
477
295
  # Environment Helpers
478
296
  # ==========================================================================
479
297
 
480
- def test?
481
- self.class.env == 'test'
298
+ # Note: test?, development?, production? are auto-generated by MywayConfig::Base
299
+ # based on environment keys in defaults.yml
300
+
301
+ def environment
302
+ self.class.env
303
+ end
304
+
305
+ # ==========================================================================
306
+ # Environment Validation
307
+ # ==========================================================================
308
+
309
+ # Returns list of valid environment names from bundled defaults
310
+ # Inherited from MywayConfig::Base - delegates to DefaultsLoader
311
+ #
312
+ # @return [Array<Symbol>] valid environment names (e.g., [:development, :production, :test])
313
+ # Note: valid_environments is inherited from MywayConfig::Base
314
+
315
+ # Check if current environment is valid (defined in config)
316
+ #
317
+ # @return [Boolean] true if environment has a config section
318
+ def self.valid_environment?
319
+ MywayConfig::Loaders::DefaultsLoader.valid_environment?(config_name, env)
482
320
  end
483
321
 
484
- def development?
485
- self.class.env == 'development'
322
+ # Validate that the current environment is configured
323
+ #
324
+ # @raise [HTM::ConfigurationError] if environment is invalid
325
+ # @return [true] if environment is valid
326
+ def self.validate_environment!
327
+ current = env
328
+ return true if valid_environment?
329
+
330
+ valid = valid_environments.map(&:to_s).join(', ')
331
+ raise HTM::ConfigurationError,
332
+ "Invalid environment '#{current}'. " \
333
+ "Valid environments are: #{valid}. " \
334
+ "Set HTM_ENV to a valid environment or add a '#{current}:' section to your config."
486
335
  end
487
336
 
488
- def production?
489
- self.class.env == 'production'
337
+ # Instance method delegates
338
+ def valid_environment?
339
+ self.class.valid_environment?
490
340
  end
491
341
 
492
- def environment
493
- self.class.env
342
+ def validate_environment!
343
+ self.class.validate_environment!
494
344
  end
495
345
 
496
346
  # ==========================================================================
@@ -498,7 +348,7 @@ class HTM
498
348
  # ==========================================================================
499
349
 
500
350
  def self.xdg_config_paths
501
- HTM::Loaders::XdgConfigLoader.config_paths
351
+ MywayConfig::Loaders::XdgConfigLoader.config_paths(config_name)
502
352
  end
503
353
 
504
354
  def self.xdg_config_file
@@ -512,7 +362,7 @@ class HTM
512
362
  end
513
363
 
514
364
  def self.active_xdg_config_file
515
- HTM::Loaders::XdgConfigLoader.find_config_file('htm')
365
+ MywayConfig::Loaders::XdgConfigLoader.find_config_file(config_name)
516
366
  end
517
367
 
518
368
  # ==========================================================================
@@ -598,101 +448,29 @@ class HTM
598
448
 
599
449
  private
600
450
 
601
- def build_database_url
602
- return nil unless database.name && !database.name.empty?
603
-
604
- auth = if database.user && !database.user.empty?
605
- database.password && !database.password.empty? ? "#{database.user}:#{database.password}@" : "#{database.user}@"
606
- else
607
- ''
608
- end
609
-
610
- "postgresql://#{auth}#{database.host}:#{database.port}/#{database.name}"
611
- end
612
-
613
451
  # ==========================================================================
614
452
  # Type Coercion Callback
615
453
  # ==========================================================================
616
454
 
617
455
  def coerce_nested_types
618
456
  # Ensure nested provider sections are ConfigSections
619
- if providers.is_a?(ConfigSection)
457
+ # myway_config handles top-level sections, but we need to handle nested ones
458
+ if providers.is_a?(MywayConfig::ConfigSection)
620
459
  %i[openai anthropic gemini azure ollama huggingface openrouter bedrock deepseek].each do |provider|
621
460
  value = providers[provider]
622
- providers[provider] = ConfigSection.new(value) if value.is_a?(Hash)
461
+ providers[provider] = MywayConfig::ConfigSection.new(value) if value.is_a?(Hash)
623
462
  end
624
463
  end
625
- end
626
-
627
- # ==========================================================================
628
- # Validation Callbacks
629
- # ==========================================================================
630
-
631
- def validate_config
632
- validate_providers
633
- validate_job_backend
634
- validate_week_start
635
- validate_relevance_weights
636
- end
637
-
638
- def validate_providers
639
- validate_provider(:embedding_provider, embedding_provider)
640
- validate_provider(:tag_provider, tag_provider)
641
- validate_provider(:proposition_provider, proposition_provider)
642
- end
643
-
644
- def validate_provider(name, value)
645
- return if value.nil?
646
-
647
- unless SUPPORTED_PROVIDERS.include?(value)
648
- raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
649
- end
650
- end
651
-
652
- def validate_job_backend
653
- return unless job_backend
654
464
 
655
- unless SUPPORTED_JOB_BACKENDS.include?(job_backend)
656
- raise_validation_error("job.backend must be one of: #{SUPPORTED_JOB_BACKENDS.join(', ')} (got #{job_backend.inspect})")
465
+ # Coerce database numeric fields to integers (env vars are always strings)
466
+ if database&.port && !database.port.is_a?(Integer)
467
+ database.port = database.port.to_i
657
468
  end
658
- end
659
-
660
- def validate_week_start
661
- unless SUPPORTED_WEEK_STARTS.include?(week_start)
662
- raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
469
+ if database&.pool_size && !database.pool_size.is_a?(Integer)
470
+ database.pool_size = database.pool_size.to_i
663
471
  end
664
- end
665
-
666
- def validate_relevance_weights
667
- total = relevance_semantic_weight + relevance_tag_weight +
668
- relevance_recency_weight + relevance_access_weight
669
-
670
- unless (0.99..1.01).cover?(total)
671
- raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
672
- end
673
- end
674
-
675
- def validate_callables
676
- unless @embedding_generator.respond_to?(:call)
677
- raise HTM::ValidationError, "embedding_generator must be callable"
678
- end
679
-
680
- unless @tag_extractor.respond_to?(:call)
681
- raise HTM::ValidationError, "tag_extractor must be callable"
682
- end
683
-
684
- unless @proposition_extractor.respond_to?(:call)
685
- raise HTM::ValidationError, "proposition_extractor must be callable"
686
- end
687
-
688
- unless @token_counter.respond_to?(:call)
689
- raise HTM::ValidationError, "token_counter must be callable"
690
- end
691
- end
692
-
693
- def validate_logger
694
- unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
695
- raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
472
+ if database&.timeout && !database.timeout.is_a?(Integer)
473
+ database.timeout = database.timeout.to_i
696
474
  end
697
475
  end
698
476
 
@@ -716,173 +494,8 @@ class HTM
716
494
 
717
495
  :fiber
718
496
  end
719
-
720
- def build_default_logger
721
- logger = Logger.new($stdout)
722
- logger.level = log_level
723
- logger.formatter = proc do |severity, datetime, _progname, msg|
724
- "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
725
- end
726
- logger
727
- end
728
-
729
- def build_default_token_counter
730
- lambda do |text|
731
- require 'tiktoken_ruby' unless defined?(Tiktoken)
732
- encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
733
- encoder.encode(text).length
734
- end
735
- end
736
-
737
- def build_default_embedding_generator
738
- lambda do |text|
739
- require 'ruby_llm' unless defined?(RubyLLM)
740
-
741
- configure_ruby_llm(embedding_provider)
742
- refresh_ollama_models! if embedding_provider == :ollama
743
-
744
- model = embedding_provider == :ollama ? normalize_ollama_model(embedding_model) : embedding_model
745
- response = RubyLLM.embed(text, model: model)
746
- embedding = extract_embedding_from_response(response)
747
-
748
- unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) }
749
- raise HTM::EmbeddingError, "Invalid embedding response format from #{embedding_provider}"
750
- end
751
-
752
- embedding
753
- end
754
- end
755
-
756
- def build_default_tag_extractor
757
- lambda do |text, existing_ontology = []|
758
- require 'ruby_llm' unless defined?(RubyLLM)
759
-
760
- configure_ruby_llm(tag_provider)
761
- refresh_ollama_models! if tag_provider == :ollama
762
-
763
- model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model
764
-
765
- prompt = build_tag_extraction_prompt(text, existing_ontology)
766
- system_prompt = build_tag_system_prompt
767
-
768
- chat = RubyLLM.chat(model: model)
769
- chat.with_instructions(system_prompt)
770
- response = chat.ask(prompt)
771
-
772
- parse_tag_response(extract_text_from_response(response))
773
- end
774
- end
775
-
776
- def build_default_proposition_extractor
777
- lambda do |text|
778
- require 'ruby_llm' unless defined?(RubyLLM)
779
-
780
- configure_ruby_llm(proposition_provider)
781
- refresh_ollama_models! if proposition_provider == :ollama
782
-
783
- model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model
784
-
785
- prompt = build_proposition_extraction_prompt(text)
786
- system_prompt = build_proposition_system_prompt
787
-
788
- chat = RubyLLM.chat(model: model)
789
- chat.with_instructions(system_prompt)
790
- response = chat.ask(prompt)
791
-
792
- parse_proposition_response(extract_text_from_response(response))
793
- end
794
- end
795
-
796
- # ==========================================================================
797
- # Response Extraction Helpers
798
- # ==========================================================================
799
-
800
- def extract_embedding_from_response(response)
801
- return nil unless response
802
-
803
- case response
804
- when Array
805
- response
806
- when ->(r) { r.respond_to?(:vectors) }
807
- vectors = response.vectors
808
- vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
809
- when ->(r) { r.respond_to?(:to_a) }
810
- response.to_a
811
- when ->(r) { r.respond_to?(:embedding) }
812
- response.embedding
813
- else
814
- if response.respond_to?(:instance_variable_get)
815
- vectors = response.instance_variable_get(:@vectors)
816
- return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
817
- return vectors if vectors.is_a?(Array)
818
- end
819
- raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
820
- end
821
- end
822
-
823
- def extract_text_from_response(response)
824
- return '' unless response
825
-
826
- case response
827
- when String then response
828
- when ->(r) { r.respond_to?(:content) } then response.content.to_s
829
- when ->(r) { r.respond_to?(:text) } then response.text.to_s
830
- else response.to_s
831
- end
832
- end
833
-
834
- def parse_tag_response(text)
835
- tags = text.to_s.split("\n").map(&:strip).reject(&:empty?)
836
- valid_tags = tags.select { |tag| tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ }
837
- valid_tags.select { |tag| tag.count(':') < max_tag_depth }
838
- end
839
-
840
- def parse_proposition_response(text)
841
- text.to_s
842
- .split("\n")
843
- .map(&:strip)
844
- .map { |line| line.sub(/^[-*]\s*/, '') }
845
- .map(&:strip)
846
- .reject(&:empty?)
847
- end
848
-
849
- # ==========================================================================
850
- # Prompt Builders
851
- #
852
- # These methods use configurable prompt templates from defaults.yml.
853
- # Templates use %{placeholder} syntax for runtime interpolation.
854
- # ==========================================================================
855
-
856
- def build_tag_extraction_prompt(text, existing_ontology)
857
- taxonomy_context = if existing_ontology.any?
858
- sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
859
- tag.taxonomy_context_existing % { sample_tags: sample_tags.join(', ') }
860
- else
861
- tag.taxonomy_context_empty
862
- end
863
-
864
- tag.user_prompt_template % {
865
- text: text,
866
- max_depth: max_tag_depth,
867
- taxonomy_context: taxonomy_context
868
- }
869
- end
870
-
871
- def build_tag_system_prompt
872
- tag.system_prompt.to_s.strip
873
- end
874
-
875
- def build_proposition_extraction_prompt(text)
876
- proposition.user_prompt_template % { text: text }
877
- end
878
-
879
- def build_proposition_system_prompt
880
- proposition.system_prompt.to_s.strip
881
- end
882
497
  end
883
498
  end
884
499
 
885
- # Register custom loaders after Config class is defined
886
- # Order matters: defaults (lowest priority) -> XDG -> project config -> ENV (highest)
887
- require_relative 'loaders/defaults_loader'
888
- require_relative 'loaders/xdg_config_loader'
500
+ # myway_config provides DefaultsLoader and XdgConfigLoader automatically
501
+ # Loaders are registered when MywayConfig.setup! is called (happens on require)