htm 0.0.20 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +33 -33
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/guides/mcp-server.md +70 -1
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +62 -22
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +75 -462
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +6 -5
- data/lib/htm.rb +26 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +111 -85
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- data/lib/htm/loaders/defaults_loader.rb +0 -143
- data/lib/htm/loaders/xdg_config_loader.rb +0 -116
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/htm/config.rb
CHANGED
|
@@ -1,81 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require '
|
|
3
|
+
require 'myway_config'
|
|
4
4
|
require 'logger'
|
|
5
|
-
require 'yaml'
|
|
6
5
|
|
|
6
|
+
# Define Config class first to establish superclass
|
|
7
7
|
class HTM
|
|
8
|
-
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# section = ConfigSection.new(host: 'localhost', port: 5432)
|
|
12
|
-
# section.host # => 'localhost'
|
|
13
|
-
# section.port # => 5432
|
|
14
|
-
#
|
|
15
|
-
class ConfigSection
|
|
16
|
-
def initialize(hash = {})
|
|
17
|
-
@data = {}
|
|
18
|
-
(hash || {}).each do |key, value|
|
|
19
|
-
@data[key.to_sym] = value.is_a?(Hash) ? ConfigSection.new(value) : value
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def method_missing(method, *args, &block)
|
|
24
|
-
key = method.to_s
|
|
25
|
-
if key.end_with?('=')
|
|
26
|
-
@data[key.chomp('=').to_sym] = args.first
|
|
27
|
-
elsif @data.key?(method)
|
|
28
|
-
@data[method]
|
|
29
|
-
else
|
|
30
|
-
nil
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def respond_to_missing?(method, include_private = false)
|
|
35
|
-
key = method.to_s.chomp('=').to_sym
|
|
36
|
-
@data.key?(key) || super
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def to_h
|
|
40
|
-
@data.transform_values do |v|
|
|
41
|
-
v.is_a?(ConfigSection) ? v.to_h : v
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def [](key)
|
|
46
|
-
@data[key.to_sym]
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def []=(key, value)
|
|
50
|
-
@data[key.to_sym] = value
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def merge(other)
|
|
54
|
-
other_hash = other.is_a?(ConfigSection) ? other.to_h : other
|
|
55
|
-
ConfigSection.new(deep_merge(to_h, other_hash || {}))
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def keys
|
|
59
|
-
@data.keys
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def each(&block)
|
|
63
|
-
@data.each(&block)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
private
|
|
67
|
-
|
|
68
|
-
def deep_merge(base, overlay)
|
|
69
|
-
base.merge(overlay) do |_key, old_val, new_val|
|
|
70
|
-
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
71
|
-
deep_merge(old_val, new_val)
|
|
72
|
-
else
|
|
73
|
-
new_val
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
end
|
|
8
|
+
class Config < MywayConfig::Base
|
|
77
9
|
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
require_relative 'config/validator'
|
|
13
|
+
require_relative 'config/database'
|
|
14
|
+
require_relative 'config/builder'
|
|
78
15
|
|
|
16
|
+
class HTM
|
|
79
17
|
# HTM Configuration using Anyway Config
|
|
80
18
|
#
|
|
81
19
|
# Schema is defined in lib/htm/config/defaults.yml (single source of truth)
|
|
@@ -114,38 +52,18 @@ class HTM
|
|
|
114
52
|
# config.embedding.model = 'text-embedding-3-small'
|
|
115
53
|
# end
|
|
116
54
|
#
|
|
117
|
-
class Config
|
|
55
|
+
class Config
|
|
56
|
+
include Validator
|
|
57
|
+
include Database
|
|
58
|
+
include Builder
|
|
59
|
+
|
|
118
60
|
config_name :htm
|
|
119
61
|
env_prefix :htm
|
|
62
|
+
defaults_path File.expand_path('config/defaults.yml', __dir__)
|
|
120
63
|
|
|
121
|
-
#
|
|
122
|
-
#
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
# Path to bundled defaults file (defines both schema and default values)
|
|
126
|
-
DEFAULTS_PATH = File.expand_path('config/defaults.yml', __dir__).freeze
|
|
127
|
-
|
|
128
|
-
# Load schema from defaults.yml at class definition time
|
|
129
|
-
begin
|
|
130
|
-
defaults_content = File.read(DEFAULTS_PATH)
|
|
131
|
-
raw_yaml = YAML.safe_load(
|
|
132
|
-
defaults_content,
|
|
133
|
-
permitted_classes: [Symbol],
|
|
134
|
-
symbolize_names: true,
|
|
135
|
-
aliases: true
|
|
136
|
-
) || {}
|
|
137
|
-
SCHEMA = raw_yaml[:defaults] || {}
|
|
138
|
-
rescue StandardError => e
|
|
139
|
-
warn "HTM: Could not load schema from #{DEFAULTS_PATH}: #{e.message}"
|
|
140
|
-
SCHEMA = {}
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
# Nested section attributes (defined as hashes, converted to ConfigSection)
|
|
144
|
-
attr_config :database, :service, :embedding, :tag, :proposition,
|
|
145
|
-
:chunking, :circuit_breaker, :relevance, :job, :providers
|
|
146
|
-
|
|
147
|
-
# Top-level scalar attributes
|
|
148
|
-
attr_config :week_start, :connection_timeout, :telemetry_enabled, :log_level
|
|
64
|
+
# Auto-configure attributes and coercions from defaults.yml schema
|
|
65
|
+
# This replaces manual attr_config and coerce_types declarations
|
|
66
|
+
auto_configure!
|
|
149
67
|
|
|
150
68
|
# Custom environment detection: HTM_ENV > RAILS_ENV > RACK_ENV > 'development'
|
|
151
69
|
class << self
|
|
@@ -158,72 +76,10 @@ class HTM
|
|
|
158
76
|
end
|
|
159
77
|
end
|
|
160
78
|
|
|
161
|
-
# ==========================================================================
|
|
162
|
-
# Type Coercion
|
|
163
|
-
# ==========================================================================
|
|
164
|
-
|
|
165
|
-
TO_SYMBOL = ->(v) { v.nil? ? nil : v.to_s.to_sym }
|
|
166
|
-
|
|
167
|
-
# Create a coercion that merges incoming value with SCHEMA defaults for a section.
|
|
168
|
-
# This ensures env vars like HTM_DATABASE__URL don't lose other defaults.
|
|
169
|
-
def self.config_section_with_defaults(section_key)
|
|
170
|
-
defaults = SCHEMA[section_key] || {}
|
|
171
|
-
->(v) {
|
|
172
|
-
return v if v.is_a?(ConfigSection)
|
|
173
|
-
incoming = v || {}
|
|
174
|
-
# Deep merge: defaults first, then overlay incoming values
|
|
175
|
-
merged = deep_merge_hashes(defaults.dup, incoming)
|
|
176
|
-
ConfigSection.new(merged)
|
|
177
|
-
}
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
# Deep merge helper for coercion
|
|
181
|
-
def self.deep_merge_hashes(base, overlay)
|
|
182
|
-
base.merge(overlay) do |_key, old_val, new_val|
|
|
183
|
-
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
184
|
-
deep_merge_hashes(old_val, new_val)
|
|
185
|
-
else
|
|
186
|
-
new_val.nil? ? old_val : new_val
|
|
187
|
-
end
|
|
188
|
-
end
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
coerce_types(
|
|
192
|
-
# Nested sections -> ConfigSection objects (with SCHEMA defaults merged)
|
|
193
|
-
database: config_section_with_defaults(:database),
|
|
194
|
-
service: config_section_with_defaults(:service),
|
|
195
|
-
embedding: config_section_with_defaults(:embedding),
|
|
196
|
-
tag: config_section_with_defaults(:tag),
|
|
197
|
-
proposition: config_section_with_defaults(:proposition),
|
|
198
|
-
chunking: config_section_with_defaults(:chunking),
|
|
199
|
-
circuit_breaker: config_section_with_defaults(:circuit_breaker),
|
|
200
|
-
relevance: config_section_with_defaults(:relevance),
|
|
201
|
-
job: config_section_with_defaults(:job),
|
|
202
|
-
providers: config_section_with_defaults(:providers),
|
|
203
|
-
|
|
204
|
-
# Top-level symbols
|
|
205
|
-
week_start: TO_SYMBOL,
|
|
206
|
-
log_level: TO_SYMBOL,
|
|
207
|
-
|
|
208
|
-
# Top-level integers
|
|
209
|
-
connection_timeout: :integer,
|
|
210
|
-
|
|
211
|
-
# Top-level booleans
|
|
212
|
-
telemetry_enabled: :boolean
|
|
213
|
-
)
|
|
214
|
-
|
|
215
79
|
# ==========================================================================
|
|
216
80
|
# Validation
|
|
217
81
|
# ==========================================================================
|
|
218
82
|
|
|
219
|
-
SUPPORTED_PROVIDERS = %i[
|
|
220
|
-
openai anthropic gemini azure ollama
|
|
221
|
-
huggingface openrouter bedrock deepseek
|
|
222
|
-
].freeze
|
|
223
|
-
|
|
224
|
-
SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
|
|
225
|
-
SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
|
|
226
|
-
|
|
227
83
|
# Default embedding dimensions by provider
|
|
228
84
|
DEFAULT_DIMENSIONS = {
|
|
229
85
|
openai: 1536,
|
|
@@ -237,7 +93,7 @@ class HTM
|
|
|
237
93
|
deepseek: 1536
|
|
238
94
|
}.freeze
|
|
239
95
|
|
|
240
|
-
on_load :coerce_nested_types, :validate_config, :setup_defaults
|
|
96
|
+
on_load :coerce_nested_types, :reconcile_database_config, :validate_config, :setup_defaults
|
|
241
97
|
|
|
242
98
|
# ==========================================================================
|
|
243
99
|
# Callable Accessors (not loaded from config sources)
|
|
@@ -260,44 +116,6 @@ class HTM
|
|
|
260
116
|
# Convenience Accessors (for common nested values)
|
|
261
117
|
# ==========================================================================
|
|
262
118
|
|
|
263
|
-
# Database convenience methods
|
|
264
|
-
def database_url
|
|
265
|
-
url = database.url
|
|
266
|
-
return url if url && !url.empty?
|
|
267
|
-
|
|
268
|
-
build_database_url
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
def database_config
|
|
272
|
-
url = database_url
|
|
273
|
-
return {} unless url
|
|
274
|
-
|
|
275
|
-
require 'uri'
|
|
276
|
-
uri = URI.parse(url)
|
|
277
|
-
|
|
278
|
-
# Coercion now merges env vars with SCHEMA defaults, so pool_size/timeout
|
|
279
|
-
# are always available even when only HTM_DATABASE__URL is set
|
|
280
|
-
{
|
|
281
|
-
adapter: 'postgresql',
|
|
282
|
-
host: uri.host,
|
|
283
|
-
port: uri.port || 5432,
|
|
284
|
-
database: uri.path&.sub(%r{^/}, ''),
|
|
285
|
-
username: uri.user,
|
|
286
|
-
password: uri.password,
|
|
287
|
-
pool: database.pool_size.to_i,
|
|
288
|
-
timeout: database.timeout.to_i,
|
|
289
|
-
sslmode: database.sslmode,
|
|
290
|
-
encoding: 'unicode',
|
|
291
|
-
prepared_statements: false,
|
|
292
|
-
advisory_locks: false
|
|
293
|
-
}.compact
|
|
294
|
-
end
|
|
295
|
-
|
|
296
|
-
def database_configured?
|
|
297
|
-
url = database_url
|
|
298
|
-
(url && !url.empty?) || (database.name && !database.name.empty?)
|
|
299
|
-
end
|
|
300
|
-
|
|
301
119
|
# Embedding convenience accessors
|
|
302
120
|
def embedding_provider
|
|
303
121
|
provider = embedding.provider
|
|
@@ -358,11 +176,11 @@ class HTM
|
|
|
358
176
|
|
|
359
177
|
# Chunking convenience accessors
|
|
360
178
|
def chunk_size
|
|
361
|
-
chunking.
|
|
179
|
+
chunking.chunk_size.to_i
|
|
362
180
|
end
|
|
363
181
|
|
|
364
182
|
def chunk_overlap
|
|
365
|
-
chunking.
|
|
183
|
+
chunking.chunk_overlap.to_i
|
|
366
184
|
end
|
|
367
185
|
|
|
368
186
|
# Circuit breaker convenience accessors
|
|
@@ -477,20 +295,52 @@ class HTM
|
|
|
477
295
|
# Environment Helpers
|
|
478
296
|
# ==========================================================================
|
|
479
297
|
|
|
480
|
-
|
|
481
|
-
|
|
298
|
+
# Note: test?, development?, production? are auto-generated by MywayConfig::Base
|
|
299
|
+
# based on environment keys in defaults.yml
|
|
300
|
+
|
|
301
|
+
def environment
|
|
302
|
+
self.class.env
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# ==========================================================================
|
|
306
|
+
# Environment Validation
|
|
307
|
+
# ==========================================================================
|
|
308
|
+
|
|
309
|
+
# Returns list of valid environment names from bundled defaults
|
|
310
|
+
# Inherited from MywayConfig::Base - delegates to DefaultsLoader
|
|
311
|
+
#
|
|
312
|
+
# @return [Array<Symbol>] valid environment names (e.g., [:development, :production, :test])
|
|
313
|
+
# Note: valid_environments is inherited from MywayConfig::Base
|
|
314
|
+
|
|
315
|
+
# Check if current environment is valid (defined in config)
|
|
316
|
+
#
|
|
317
|
+
# @return [Boolean] true if environment has a config section
|
|
318
|
+
def self.valid_environment?
|
|
319
|
+
MywayConfig::Loaders::DefaultsLoader.valid_environment?(config_name, env)
|
|
482
320
|
end
|
|
483
321
|
|
|
484
|
-
|
|
485
|
-
|
|
322
|
+
# Validate that the current environment is configured
|
|
323
|
+
#
|
|
324
|
+
# @raise [HTM::ConfigurationError] if environment is invalid
|
|
325
|
+
# @return [true] if environment is valid
|
|
326
|
+
def self.validate_environment!
|
|
327
|
+
current = env
|
|
328
|
+
return true if valid_environment?
|
|
329
|
+
|
|
330
|
+
valid = valid_environments.map(&:to_s).join(', ')
|
|
331
|
+
raise HTM::ConfigurationError,
|
|
332
|
+
"Invalid environment '#{current}'. " \
|
|
333
|
+
"Valid environments are: #{valid}. " \
|
|
334
|
+
"Set HTM_ENV to a valid environment or add a '#{current}:' section to your config."
|
|
486
335
|
end
|
|
487
336
|
|
|
488
|
-
|
|
489
|
-
|
|
337
|
+
# Instance method delegates
|
|
338
|
+
def valid_environment?
|
|
339
|
+
self.class.valid_environment?
|
|
490
340
|
end
|
|
491
341
|
|
|
492
|
-
def
|
|
493
|
-
self.class.
|
|
342
|
+
def validate_environment!
|
|
343
|
+
self.class.validate_environment!
|
|
494
344
|
end
|
|
495
345
|
|
|
496
346
|
# ==========================================================================
|
|
@@ -498,7 +348,7 @@ class HTM
|
|
|
498
348
|
# ==========================================================================
|
|
499
349
|
|
|
500
350
|
def self.xdg_config_paths
|
|
501
|
-
|
|
351
|
+
MywayConfig::Loaders::XdgConfigLoader.config_paths(config_name)
|
|
502
352
|
end
|
|
503
353
|
|
|
504
354
|
def self.xdg_config_file
|
|
@@ -512,7 +362,7 @@ class HTM
|
|
|
512
362
|
end
|
|
513
363
|
|
|
514
364
|
def self.active_xdg_config_file
|
|
515
|
-
|
|
365
|
+
MywayConfig::Loaders::XdgConfigLoader.find_config_file(config_name)
|
|
516
366
|
end
|
|
517
367
|
|
|
518
368
|
# ==========================================================================
|
|
@@ -598,101 +448,29 @@ class HTM
|
|
|
598
448
|
|
|
599
449
|
private
|
|
600
450
|
|
|
601
|
-
def build_database_url
|
|
602
|
-
return nil unless database.name && !database.name.empty?
|
|
603
|
-
|
|
604
|
-
auth = if database.user && !database.user.empty?
|
|
605
|
-
database.password && !database.password.empty? ? "#{database.user}:#{database.password}@" : "#{database.user}@"
|
|
606
|
-
else
|
|
607
|
-
''
|
|
608
|
-
end
|
|
609
|
-
|
|
610
|
-
"postgresql://#{auth}#{database.host}:#{database.port}/#{database.name}"
|
|
611
|
-
end
|
|
612
|
-
|
|
613
451
|
# ==========================================================================
|
|
614
452
|
# Type Coercion Callback
|
|
615
453
|
# ==========================================================================
|
|
616
454
|
|
|
617
455
|
def coerce_nested_types
|
|
618
456
|
# Ensure nested provider sections are ConfigSections
|
|
619
|
-
|
|
457
|
+
# myway_config handles top-level sections, but we need to handle nested ones
|
|
458
|
+
if providers.is_a?(MywayConfig::ConfigSection)
|
|
620
459
|
%i[openai anthropic gemini azure ollama huggingface openrouter bedrock deepseek].each do |provider|
|
|
621
460
|
value = providers[provider]
|
|
622
|
-
providers[provider] = ConfigSection.new(value) if value.is_a?(Hash)
|
|
461
|
+
providers[provider] = MywayConfig::ConfigSection.new(value) if value.is_a?(Hash)
|
|
623
462
|
end
|
|
624
463
|
end
|
|
625
|
-
end
|
|
626
|
-
|
|
627
|
-
# ==========================================================================
|
|
628
|
-
# Validation Callbacks
|
|
629
|
-
# ==========================================================================
|
|
630
|
-
|
|
631
|
-
def validate_config
|
|
632
|
-
validate_providers
|
|
633
|
-
validate_job_backend
|
|
634
|
-
validate_week_start
|
|
635
|
-
validate_relevance_weights
|
|
636
|
-
end
|
|
637
|
-
|
|
638
|
-
def validate_providers
|
|
639
|
-
validate_provider(:embedding_provider, embedding_provider)
|
|
640
|
-
validate_provider(:tag_provider, tag_provider)
|
|
641
|
-
validate_provider(:proposition_provider, proposition_provider)
|
|
642
|
-
end
|
|
643
|
-
|
|
644
|
-
def validate_provider(name, value)
|
|
645
|
-
return if value.nil?
|
|
646
|
-
|
|
647
|
-
unless SUPPORTED_PROVIDERS.include?(value)
|
|
648
|
-
raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
|
|
649
|
-
end
|
|
650
|
-
end
|
|
651
|
-
|
|
652
|
-
def validate_job_backend
|
|
653
|
-
return unless job_backend
|
|
654
464
|
|
|
655
|
-
|
|
656
|
-
|
|
465
|
+
# Coerce database numeric fields to integers (env vars are always strings)
|
|
466
|
+
if database&.port && !database.port.is_a?(Integer)
|
|
467
|
+
database.port = database.port.to_i
|
|
657
468
|
end
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
def validate_week_start
|
|
661
|
-
unless SUPPORTED_WEEK_STARTS.include?(week_start)
|
|
662
|
-
raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
|
|
469
|
+
if database&.pool_size && !database.pool_size.is_a?(Integer)
|
|
470
|
+
database.pool_size = database.pool_size.to_i
|
|
663
471
|
end
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
def validate_relevance_weights
|
|
667
|
-
total = relevance_semantic_weight + relevance_tag_weight +
|
|
668
|
-
relevance_recency_weight + relevance_access_weight
|
|
669
|
-
|
|
670
|
-
unless (0.99..1.01).cover?(total)
|
|
671
|
-
raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
|
|
672
|
-
end
|
|
673
|
-
end
|
|
674
|
-
|
|
675
|
-
def validate_callables
|
|
676
|
-
unless @embedding_generator.respond_to?(:call)
|
|
677
|
-
raise HTM::ValidationError, "embedding_generator must be callable"
|
|
678
|
-
end
|
|
679
|
-
|
|
680
|
-
unless @tag_extractor.respond_to?(:call)
|
|
681
|
-
raise HTM::ValidationError, "tag_extractor must be callable"
|
|
682
|
-
end
|
|
683
|
-
|
|
684
|
-
unless @proposition_extractor.respond_to?(:call)
|
|
685
|
-
raise HTM::ValidationError, "proposition_extractor must be callable"
|
|
686
|
-
end
|
|
687
|
-
|
|
688
|
-
unless @token_counter.respond_to?(:call)
|
|
689
|
-
raise HTM::ValidationError, "token_counter must be callable"
|
|
690
|
-
end
|
|
691
|
-
end
|
|
692
|
-
|
|
693
|
-
def validate_logger
|
|
694
|
-
unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
|
|
695
|
-
raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
|
|
472
|
+
if database&.timeout && !database.timeout.is_a?(Integer)
|
|
473
|
+
database.timeout = database.timeout.to_i
|
|
696
474
|
end
|
|
697
475
|
end
|
|
698
476
|
|
|
@@ -716,173 +494,8 @@ class HTM
|
|
|
716
494
|
|
|
717
495
|
:fiber
|
|
718
496
|
end
|
|
719
|
-
|
|
720
|
-
def build_default_logger
|
|
721
|
-
logger = Logger.new($stdout)
|
|
722
|
-
logger.level = log_level
|
|
723
|
-
logger.formatter = proc do |severity, datetime, _progname, msg|
|
|
724
|
-
"[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
|
|
725
|
-
end
|
|
726
|
-
logger
|
|
727
|
-
end
|
|
728
|
-
|
|
729
|
-
def build_default_token_counter
|
|
730
|
-
lambda do |text|
|
|
731
|
-
require 'tiktoken_ruby' unless defined?(Tiktoken)
|
|
732
|
-
encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|
733
|
-
encoder.encode(text).length
|
|
734
|
-
end
|
|
735
|
-
end
|
|
736
|
-
|
|
737
|
-
def build_default_embedding_generator
|
|
738
|
-
lambda do |text|
|
|
739
|
-
require 'ruby_llm' unless defined?(RubyLLM)
|
|
740
|
-
|
|
741
|
-
configure_ruby_llm(embedding_provider)
|
|
742
|
-
refresh_ollama_models! if embedding_provider == :ollama
|
|
743
|
-
|
|
744
|
-
model = embedding_provider == :ollama ? normalize_ollama_model(embedding_model) : embedding_model
|
|
745
|
-
response = RubyLLM.embed(text, model: model)
|
|
746
|
-
embedding = extract_embedding_from_response(response)
|
|
747
|
-
|
|
748
|
-
unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) }
|
|
749
|
-
raise HTM::EmbeddingError, "Invalid embedding response format from #{embedding_provider}"
|
|
750
|
-
end
|
|
751
|
-
|
|
752
|
-
embedding
|
|
753
|
-
end
|
|
754
|
-
end
|
|
755
|
-
|
|
756
|
-
def build_default_tag_extractor
|
|
757
|
-
lambda do |text, existing_ontology = []|
|
|
758
|
-
require 'ruby_llm' unless defined?(RubyLLM)
|
|
759
|
-
|
|
760
|
-
configure_ruby_llm(tag_provider)
|
|
761
|
-
refresh_ollama_models! if tag_provider == :ollama
|
|
762
|
-
|
|
763
|
-
model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model
|
|
764
|
-
|
|
765
|
-
prompt = build_tag_extraction_prompt(text, existing_ontology)
|
|
766
|
-
system_prompt = build_tag_system_prompt
|
|
767
|
-
|
|
768
|
-
chat = RubyLLM.chat(model: model)
|
|
769
|
-
chat.with_instructions(system_prompt)
|
|
770
|
-
response = chat.ask(prompt)
|
|
771
|
-
|
|
772
|
-
parse_tag_response(extract_text_from_response(response))
|
|
773
|
-
end
|
|
774
|
-
end
|
|
775
|
-
|
|
776
|
-
def build_default_proposition_extractor
|
|
777
|
-
lambda do |text|
|
|
778
|
-
require 'ruby_llm' unless defined?(RubyLLM)
|
|
779
|
-
|
|
780
|
-
configure_ruby_llm(proposition_provider)
|
|
781
|
-
refresh_ollama_models! if proposition_provider == :ollama
|
|
782
|
-
|
|
783
|
-
model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model
|
|
784
|
-
|
|
785
|
-
prompt = build_proposition_extraction_prompt(text)
|
|
786
|
-
system_prompt = build_proposition_system_prompt
|
|
787
|
-
|
|
788
|
-
chat = RubyLLM.chat(model: model)
|
|
789
|
-
chat.with_instructions(system_prompt)
|
|
790
|
-
response = chat.ask(prompt)
|
|
791
|
-
|
|
792
|
-
parse_proposition_response(extract_text_from_response(response))
|
|
793
|
-
end
|
|
794
|
-
end
|
|
795
|
-
|
|
796
|
-
# ==========================================================================
|
|
797
|
-
# Response Extraction Helpers
|
|
798
|
-
# ==========================================================================
|
|
799
|
-
|
|
800
|
-
def extract_embedding_from_response(response)
|
|
801
|
-
return nil unless response
|
|
802
|
-
|
|
803
|
-
case response
|
|
804
|
-
when Array
|
|
805
|
-
response
|
|
806
|
-
when ->(r) { r.respond_to?(:vectors) }
|
|
807
|
-
vectors = response.vectors
|
|
808
|
-
vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
|
|
809
|
-
when ->(r) { r.respond_to?(:to_a) }
|
|
810
|
-
response.to_a
|
|
811
|
-
when ->(r) { r.respond_to?(:embedding) }
|
|
812
|
-
response.embedding
|
|
813
|
-
else
|
|
814
|
-
if response.respond_to?(:instance_variable_get)
|
|
815
|
-
vectors = response.instance_variable_get(:@vectors)
|
|
816
|
-
return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
|
|
817
|
-
return vectors if vectors.is_a?(Array)
|
|
818
|
-
end
|
|
819
|
-
raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
|
|
820
|
-
end
|
|
821
|
-
end
|
|
822
|
-
|
|
823
|
-
def extract_text_from_response(response)
|
|
824
|
-
return '' unless response
|
|
825
|
-
|
|
826
|
-
case response
|
|
827
|
-
when String then response
|
|
828
|
-
when ->(r) { r.respond_to?(:content) } then response.content.to_s
|
|
829
|
-
when ->(r) { r.respond_to?(:text) } then response.text.to_s
|
|
830
|
-
else response.to_s
|
|
831
|
-
end
|
|
832
|
-
end
|
|
833
|
-
|
|
834
|
-
def parse_tag_response(text)
|
|
835
|
-
tags = text.to_s.split("\n").map(&:strip).reject(&:empty?)
|
|
836
|
-
valid_tags = tags.select { |tag| tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ }
|
|
837
|
-
valid_tags.select { |tag| tag.count(':') < max_tag_depth }
|
|
838
|
-
end
|
|
839
|
-
|
|
840
|
-
def parse_proposition_response(text)
|
|
841
|
-
text.to_s
|
|
842
|
-
.split("\n")
|
|
843
|
-
.map(&:strip)
|
|
844
|
-
.map { |line| line.sub(/^[-*]\s*/, '') }
|
|
845
|
-
.map(&:strip)
|
|
846
|
-
.reject(&:empty?)
|
|
847
|
-
end
|
|
848
|
-
|
|
849
|
-
# ==========================================================================
|
|
850
|
-
# Prompt Builders
|
|
851
|
-
#
|
|
852
|
-
# These methods use configurable prompt templates from defaults.yml.
|
|
853
|
-
# Templates use %{placeholder} syntax for runtime interpolation.
|
|
854
|
-
# ==========================================================================
|
|
855
|
-
|
|
856
|
-
def build_tag_extraction_prompt(text, existing_ontology)
|
|
857
|
-
taxonomy_context = if existing_ontology.any?
|
|
858
|
-
sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
|
|
859
|
-
tag.taxonomy_context_existing % { sample_tags: sample_tags.join(', ') }
|
|
860
|
-
else
|
|
861
|
-
tag.taxonomy_context_empty
|
|
862
|
-
end
|
|
863
|
-
|
|
864
|
-
tag.user_prompt_template % {
|
|
865
|
-
text: text,
|
|
866
|
-
max_depth: max_tag_depth,
|
|
867
|
-
taxonomy_context: taxonomy_context
|
|
868
|
-
}
|
|
869
|
-
end
|
|
870
|
-
|
|
871
|
-
def build_tag_system_prompt
|
|
872
|
-
tag.system_prompt.to_s.strip
|
|
873
|
-
end
|
|
874
|
-
|
|
875
|
-
def build_proposition_extraction_prompt(text)
|
|
876
|
-
proposition.user_prompt_template % { text: text }
|
|
877
|
-
end
|
|
878
|
-
|
|
879
|
-
def build_proposition_system_prompt
|
|
880
|
-
proposition.system_prompt.to_s.strip
|
|
881
|
-
end
|
|
882
497
|
end
|
|
883
498
|
end
|
|
884
499
|
|
|
885
|
-
#
|
|
886
|
-
#
|
|
887
|
-
require_relative 'loaders/defaults_loader'
|
|
888
|
-
require_relative 'loaders/xdg_config_loader'
|
|
500
|
+
# myway_config provides DefaultsLoader and XdgConfigLoader automatically
|
|
501
|
+
# Loaders are registered when MywayConfig.setup! is called (happens on require)
|