htm 0.0.20 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +33 -33
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/guides/mcp-server.md +70 -1
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +37 -9
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +64 -360
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +6 -5
- data/lib/htm.rb +26 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +109 -80
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
# ConfigSection provides method access to nested configuration hashes
|
|
5
|
+
#
|
|
6
|
+
# @example
|
|
7
|
+
# section = ConfigSection.new(host: 'localhost', port: 5432)
|
|
8
|
+
# section.host # => 'localhost'
|
|
9
|
+
# section.port # => 5432
|
|
10
|
+
#
|
|
11
|
+
class ConfigSection
|
|
12
|
+
def initialize(hash = {})
|
|
13
|
+
@data = {}
|
|
14
|
+
(hash || {}).each do |key, value|
|
|
15
|
+
@data[key.to_sym] = value.is_a?(Hash) ? ConfigSection.new(value) : value
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def method_missing(method, *args, &block)
|
|
20
|
+
key = method.to_s
|
|
21
|
+
if key.end_with?('=')
|
|
22
|
+
@data[key.chomp('=').to_sym] = args.first
|
|
23
|
+
elsif @data.key?(method)
|
|
24
|
+
@data[method]
|
|
25
|
+
else
|
|
26
|
+
nil
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def respond_to_missing?(method, include_private = false)
|
|
31
|
+
key = method.to_s.chomp('=').to_sym
|
|
32
|
+
@data.key?(key) || super
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def to_h
|
|
36
|
+
@data.transform_values do |v|
|
|
37
|
+
v.is_a?(ConfigSection) ? v.to_h : v
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def [](key)
|
|
42
|
+
@data[key.to_sym]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def []=(key, value)
|
|
46
|
+
@data[key.to_sym] = value
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def merge(other)
|
|
50
|
+
other_hash = other.is_a?(ConfigSection) ? other.to_h : other
|
|
51
|
+
ConfigSection.new(deep_merge(to_h, other_hash || {}))
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def keys
|
|
55
|
+
@data.keys
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def each(&block)
|
|
59
|
+
@data.each(&block)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def deep_merge(base, overlay)
|
|
65
|
+
base.merge(overlay) do |_key, old_val, new_val|
|
|
66
|
+
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
67
|
+
deep_merge(old_val, new_val)
|
|
68
|
+
else
|
|
69
|
+
new_val
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
class Config
|
|
5
|
+
module Validator
|
|
6
|
+
SUPPORTED_PROVIDERS = %i[
|
|
7
|
+
openai anthropic gemini azure ollama
|
|
8
|
+
huggingface openrouter bedrock deepseek
|
|
9
|
+
].freeze
|
|
10
|
+
|
|
11
|
+
SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
|
|
12
|
+
SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
|
|
13
|
+
|
|
14
|
+
def validate_config
|
|
15
|
+
validate_providers
|
|
16
|
+
validate_job_backend
|
|
17
|
+
validate_week_start
|
|
18
|
+
validate_relevance_weights
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def validate_providers
|
|
22
|
+
validate_provider(:embedding_provider, embedding_provider)
|
|
23
|
+
validate_provider(:tag_provider, tag_provider)
|
|
24
|
+
validate_provider(:proposition_provider, proposition_provider)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def validate_provider(name, value)
|
|
28
|
+
return if value.nil?
|
|
29
|
+
|
|
30
|
+
unless SUPPORTED_PROVIDERS.include?(value)
|
|
31
|
+
raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def validate_job_backend
|
|
36
|
+
return unless job_backend
|
|
37
|
+
|
|
38
|
+
unless SUPPORTED_JOB_BACKENDS.include?(job_backend)
|
|
39
|
+
raise_validation_error("job.backend must be one of: #{SUPPORTED_JOB_BACKENDS.join(', ')} (got #{job_backend.inspect})")
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def validate_week_start
|
|
44
|
+
unless SUPPORTED_WEEK_STARTS.include?(week_start)
|
|
45
|
+
raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def validate_relevance_weights
|
|
50
|
+
total = relevance_semantic_weight + relevance_tag_weight +
|
|
51
|
+
relevance_recency_weight + relevance_access_weight
|
|
52
|
+
|
|
53
|
+
unless (0.99..1.01).cover?(total)
|
|
54
|
+
raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def validate_callables
|
|
59
|
+
unless @embedding_generator.respond_to?(:call)
|
|
60
|
+
raise HTM::ValidationError, "embedding_generator must be callable"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
unless @tag_extractor.respond_to?(:call)
|
|
64
|
+
raise HTM::ValidationError, "tag_extractor must be callable"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
unless @proposition_extractor.respond_to?(:call)
|
|
68
|
+
raise HTM::ValidationError, "proposition_extractor must be callable"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
unless @token_counter.respond_to?(:call)
|
|
72
|
+
raise HTM::ValidationError, "token_counter must be callable"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def validate_logger
|
|
77
|
+
unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
|
|
78
|
+
raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
data/lib/htm/config.rb
CHANGED
|
@@ -4,78 +4,18 @@ require 'anyway_config'
|
|
|
4
4
|
require 'logger'
|
|
5
5
|
require 'yaml'
|
|
6
6
|
|
|
7
|
+
# Define Config class first to establish superclass
|
|
7
8
|
class HTM
|
|
8
|
-
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# section = ConfigSection.new(host: 'localhost', port: 5432)
|
|
12
|
-
# section.host # => 'localhost'
|
|
13
|
-
# section.port # => 5432
|
|
14
|
-
#
|
|
15
|
-
class ConfigSection
|
|
16
|
-
def initialize(hash = {})
|
|
17
|
-
@data = {}
|
|
18
|
-
(hash || {}).each do |key, value|
|
|
19
|
-
@data[key.to_sym] = value.is_a?(Hash) ? ConfigSection.new(value) : value
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def method_missing(method, *args, &block)
|
|
24
|
-
key = method.to_s
|
|
25
|
-
if key.end_with?('=')
|
|
26
|
-
@data[key.chomp('=').to_sym] = args.first
|
|
27
|
-
elsif @data.key?(method)
|
|
28
|
-
@data[method]
|
|
29
|
-
else
|
|
30
|
-
nil
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def respond_to_missing?(method, include_private = false)
|
|
35
|
-
key = method.to_s.chomp('=').to_sym
|
|
36
|
-
@data.key?(key) || super
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def to_h
|
|
40
|
-
@data.transform_values do |v|
|
|
41
|
-
v.is_a?(ConfigSection) ? v.to_h : v
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def [](key)
|
|
46
|
-
@data[key.to_sym]
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def []=(key, value)
|
|
50
|
-
@data[key.to_sym] = value
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def merge(other)
|
|
54
|
-
other_hash = other.is_a?(ConfigSection) ? other.to_h : other
|
|
55
|
-
ConfigSection.new(deep_merge(to_h, other_hash || {}))
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def keys
|
|
59
|
-
@data.keys
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def each(&block)
|
|
63
|
-
@data.each(&block)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
private
|
|
67
|
-
|
|
68
|
-
def deep_merge(base, overlay)
|
|
69
|
-
base.merge(overlay) do |_key, old_val, new_val|
|
|
70
|
-
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
71
|
-
deep_merge(old_val, new_val)
|
|
72
|
-
else
|
|
73
|
-
new_val
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
end
|
|
9
|
+
class Config < Anyway::Config
|
|
77
10
|
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
require_relative 'config/section'
|
|
14
|
+
require_relative 'config/validator'
|
|
15
|
+
require_relative 'config/database'
|
|
16
|
+
require_relative 'config/builder'
|
|
78
17
|
|
|
18
|
+
class HTM
|
|
79
19
|
# HTM Configuration using Anyway Config
|
|
80
20
|
#
|
|
81
21
|
# Schema is defined in lib/htm/config/defaults.yml (single source of truth)
|
|
@@ -114,7 +54,11 @@ class HTM
|
|
|
114
54
|
# config.embedding.model = 'text-embedding-3-small'
|
|
115
55
|
# end
|
|
116
56
|
#
|
|
117
|
-
class Config
|
|
57
|
+
class Config
|
|
58
|
+
include Validator
|
|
59
|
+
include Database
|
|
60
|
+
include Builder
|
|
61
|
+
|
|
118
62
|
config_name :htm
|
|
119
63
|
env_prefix :htm
|
|
120
64
|
|
|
@@ -216,14 +160,6 @@ class HTM
|
|
|
216
160
|
# Validation
|
|
217
161
|
# ==========================================================================
|
|
218
162
|
|
|
219
|
-
SUPPORTED_PROVIDERS = %i[
|
|
220
|
-
openai anthropic gemini azure ollama
|
|
221
|
-
huggingface openrouter bedrock deepseek
|
|
222
|
-
].freeze
|
|
223
|
-
|
|
224
|
-
SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
|
|
225
|
-
SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
|
|
226
|
-
|
|
227
163
|
# Default embedding dimensions by provider
|
|
228
164
|
DEFAULT_DIMENSIONS = {
|
|
229
165
|
openai: 1536,
|
|
@@ -237,7 +173,7 @@ class HTM
|
|
|
237
173
|
deepseek: 1536
|
|
238
174
|
}.freeze
|
|
239
175
|
|
|
240
|
-
on_load :coerce_nested_types, :validate_config, :setup_defaults
|
|
176
|
+
on_load :coerce_nested_types, :reconcile_database_config, :validate_config, :setup_defaults
|
|
241
177
|
|
|
242
178
|
# ==========================================================================
|
|
243
179
|
# Callable Accessors (not loaded from config sources)
|
|
@@ -260,44 +196,6 @@ class HTM
|
|
|
260
196
|
# Convenience Accessors (for common nested values)
|
|
261
197
|
# ==========================================================================
|
|
262
198
|
|
|
263
|
-
# Database convenience methods
|
|
264
|
-
def database_url
|
|
265
|
-
url = database.url
|
|
266
|
-
return url if url && !url.empty?
|
|
267
|
-
|
|
268
|
-
build_database_url
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
def database_config
|
|
272
|
-
url = database_url
|
|
273
|
-
return {} unless url
|
|
274
|
-
|
|
275
|
-
require 'uri'
|
|
276
|
-
uri = URI.parse(url)
|
|
277
|
-
|
|
278
|
-
# Coercion now merges env vars with SCHEMA defaults, so pool_size/timeout
|
|
279
|
-
# are always available even when only HTM_DATABASE__URL is set
|
|
280
|
-
{
|
|
281
|
-
adapter: 'postgresql',
|
|
282
|
-
host: uri.host,
|
|
283
|
-
port: uri.port || 5432,
|
|
284
|
-
database: uri.path&.sub(%r{^/}, ''),
|
|
285
|
-
username: uri.user,
|
|
286
|
-
password: uri.password,
|
|
287
|
-
pool: database.pool_size.to_i,
|
|
288
|
-
timeout: database.timeout.to_i,
|
|
289
|
-
sslmode: database.sslmode,
|
|
290
|
-
encoding: 'unicode',
|
|
291
|
-
prepared_statements: false,
|
|
292
|
-
advisory_locks: false
|
|
293
|
-
}.compact
|
|
294
|
-
end
|
|
295
|
-
|
|
296
|
-
def database_configured?
|
|
297
|
-
url = database_url
|
|
298
|
-
(url && !url.empty?) || (database.name && !database.name.empty?)
|
|
299
|
-
end
|
|
300
|
-
|
|
301
199
|
# Embedding convenience accessors
|
|
302
200
|
def embedding_provider
|
|
303
201
|
provider = embedding.provider
|
|
@@ -493,6 +391,48 @@ class HTM
|
|
|
493
391
|
self.class.env
|
|
494
392
|
end
|
|
495
393
|
|
|
394
|
+
# ==========================================================================
|
|
395
|
+
# Environment Validation
|
|
396
|
+
# ==========================================================================
|
|
397
|
+
|
|
398
|
+
# Returns list of valid environment names from bundled defaults
|
|
399
|
+
#
|
|
400
|
+
# @return [Array<Symbol>] valid environment names (e.g., [:development, :production, :test])
|
|
401
|
+
def self.valid_environments
|
|
402
|
+
HTM::Loaders::DefaultsLoader.valid_environments
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
# Check if current environment is valid (defined in config)
|
|
406
|
+
#
|
|
407
|
+
# @return [Boolean] true if environment has a config section
|
|
408
|
+
def self.valid_environment?
|
|
409
|
+
HTM::Loaders::DefaultsLoader.valid_environment?(env)
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Validate that the current environment is configured
|
|
413
|
+
#
|
|
414
|
+
# @raise [HTM::ConfigurationError] if environment is invalid
|
|
415
|
+
# @return [true] if environment is valid
|
|
416
|
+
def self.validate_environment!
|
|
417
|
+
current = env
|
|
418
|
+
return true if HTM::Loaders::DefaultsLoader.valid_environment?(current)
|
|
419
|
+
|
|
420
|
+
valid = valid_environments.map(&:to_s).join(', ')
|
|
421
|
+
raise HTM::ConfigurationError,
|
|
422
|
+
"Invalid environment '#{current}'. " \
|
|
423
|
+
"Valid environments are: #{valid}. " \
|
|
424
|
+
"Set HTM_ENV to a valid environment or add a '#{current}:' section to your config."
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
# Instance method delegates
|
|
428
|
+
def valid_environment?
|
|
429
|
+
self.class.valid_environment?
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def validate_environment!
|
|
433
|
+
self.class.validate_environment!
|
|
434
|
+
end
|
|
435
|
+
|
|
496
436
|
# ==========================================================================
|
|
497
437
|
# XDG Config Path Helpers
|
|
498
438
|
# ==========================================================================
|
|
@@ -598,18 +538,6 @@ class HTM
|
|
|
598
538
|
|
|
599
539
|
private
|
|
600
540
|
|
|
601
|
-
def build_database_url
|
|
602
|
-
return nil unless database.name && !database.name.empty?
|
|
603
|
-
|
|
604
|
-
auth = if database.user && !database.user.empty?
|
|
605
|
-
database.password && !database.password.empty? ? "#{database.user}:#{database.password}@" : "#{database.user}@"
|
|
606
|
-
else
|
|
607
|
-
''
|
|
608
|
-
end
|
|
609
|
-
|
|
610
|
-
"postgresql://#{auth}#{database.host}:#{database.port}/#{database.name}"
|
|
611
|
-
end
|
|
612
|
-
|
|
613
541
|
# ==========================================================================
|
|
614
542
|
# Type Coercion Callback
|
|
615
543
|
# ==========================================================================
|
|
@@ -622,77 +550,16 @@ class HTM
|
|
|
622
550
|
providers[provider] = ConfigSection.new(value) if value.is_a?(Hash)
|
|
623
551
|
end
|
|
624
552
|
end
|
|
625
|
-
end
|
|
626
|
-
|
|
627
|
-
# ==========================================================================
|
|
628
|
-
# Validation Callbacks
|
|
629
|
-
# ==========================================================================
|
|
630
|
-
|
|
631
|
-
def validate_config
|
|
632
|
-
validate_providers
|
|
633
|
-
validate_job_backend
|
|
634
|
-
validate_week_start
|
|
635
|
-
validate_relevance_weights
|
|
636
|
-
end
|
|
637
|
-
|
|
638
|
-
def validate_providers
|
|
639
|
-
validate_provider(:embedding_provider, embedding_provider)
|
|
640
|
-
validate_provider(:tag_provider, tag_provider)
|
|
641
|
-
validate_provider(:proposition_provider, proposition_provider)
|
|
642
|
-
end
|
|
643
|
-
|
|
644
|
-
def validate_provider(name, value)
|
|
645
|
-
return if value.nil?
|
|
646
|
-
|
|
647
|
-
unless SUPPORTED_PROVIDERS.include?(value)
|
|
648
|
-
raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
|
|
649
|
-
end
|
|
650
|
-
end
|
|
651
|
-
|
|
652
|
-
def validate_job_backend
|
|
653
|
-
return unless job_backend
|
|
654
553
|
|
|
655
|
-
|
|
656
|
-
|
|
554
|
+
# Coerce database numeric fields to integers (env vars are always strings)
|
|
555
|
+
if database&.port && !database.port.is_a?(Integer)
|
|
556
|
+
database.port = database.port.to_i
|
|
657
557
|
end
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
def validate_week_start
|
|
661
|
-
unless SUPPORTED_WEEK_STARTS.include?(week_start)
|
|
662
|
-
raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
|
|
663
|
-
end
|
|
664
|
-
end
|
|
665
|
-
|
|
666
|
-
def validate_relevance_weights
|
|
667
|
-
total = relevance_semantic_weight + relevance_tag_weight +
|
|
668
|
-
relevance_recency_weight + relevance_access_weight
|
|
669
|
-
|
|
670
|
-
unless (0.99..1.01).cover?(total)
|
|
671
|
-
raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
|
|
558
|
+
if database&.pool_size && !database.pool_size.is_a?(Integer)
|
|
559
|
+
database.pool_size = database.pool_size.to_i
|
|
672
560
|
end
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
def validate_callables
|
|
676
|
-
unless @embedding_generator.respond_to?(:call)
|
|
677
|
-
raise HTM::ValidationError, "embedding_generator must be callable"
|
|
678
|
-
end
|
|
679
|
-
|
|
680
|
-
unless @tag_extractor.respond_to?(:call)
|
|
681
|
-
raise HTM::ValidationError, "tag_extractor must be callable"
|
|
682
|
-
end
|
|
683
|
-
|
|
684
|
-
unless @proposition_extractor.respond_to?(:call)
|
|
685
|
-
raise HTM::ValidationError, "proposition_extractor must be callable"
|
|
686
|
-
end
|
|
687
|
-
|
|
688
|
-
unless @token_counter.respond_to?(:call)
|
|
689
|
-
raise HTM::ValidationError, "token_counter must be callable"
|
|
690
|
-
end
|
|
691
|
-
end
|
|
692
|
-
|
|
693
|
-
def validate_logger
|
|
694
|
-
unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
|
|
695
|
-
raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
|
|
561
|
+
if database&.timeout && !database.timeout.is_a?(Integer)
|
|
562
|
+
database.timeout = database.timeout.to_i
|
|
696
563
|
end
|
|
697
564
|
end
|
|
698
565
|
|
|
@@ -716,169 +583,6 @@ class HTM
|
|
|
716
583
|
|
|
717
584
|
:fiber
|
|
718
585
|
end
|
|
719
|
-
|
|
720
|
-
def build_default_logger
|
|
721
|
-
logger = Logger.new($stdout)
|
|
722
|
-
logger.level = log_level
|
|
723
|
-
logger.formatter = proc do |severity, datetime, _progname, msg|
|
|
724
|
-
"[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
|
|
725
|
-
end
|
|
726
|
-
logger
|
|
727
|
-
end
|
|
728
|
-
|
|
729
|
-
def build_default_token_counter
|
|
730
|
-
lambda do |text|
|
|
731
|
-
require 'tiktoken_ruby' unless defined?(Tiktoken)
|
|
732
|
-
encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|
733
|
-
encoder.encode(text).length
|
|
734
|
-
end
|
|
735
|
-
end
|
|
736
|
-
|
|
737
|
-
def build_default_embedding_generator
|
|
738
|
-
lambda do |text|
|
|
739
|
-
require 'ruby_llm' unless defined?(RubyLLM)
|
|
740
|
-
|
|
741
|
-
configure_ruby_llm(embedding_provider)
|
|
742
|
-
refresh_ollama_models! if embedding_provider == :ollama
|
|
743
|
-
|
|
744
|
-
model = embedding_provider == :ollama ? normalize_ollama_model(embedding_model) : embedding_model
|
|
745
|
-
response = RubyLLM.embed(text, model: model)
|
|
746
|
-
embedding = extract_embedding_from_response(response)
|
|
747
|
-
|
|
748
|
-
unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) }
|
|
749
|
-
raise HTM::EmbeddingError, "Invalid embedding response format from #{embedding_provider}"
|
|
750
|
-
end
|
|
751
|
-
|
|
752
|
-
embedding
|
|
753
|
-
end
|
|
754
|
-
end
|
|
755
|
-
|
|
756
|
-
def build_default_tag_extractor
|
|
757
|
-
lambda do |text, existing_ontology = []|
|
|
758
|
-
require 'ruby_llm' unless defined?(RubyLLM)
|
|
759
|
-
|
|
760
|
-
configure_ruby_llm(tag_provider)
|
|
761
|
-
refresh_ollama_models! if tag_provider == :ollama
|
|
762
|
-
|
|
763
|
-
model = tag_provider == :ollama ? normalize_ollama_model(tag_model) : tag_model
|
|
764
|
-
|
|
765
|
-
prompt = build_tag_extraction_prompt(text, existing_ontology)
|
|
766
|
-
system_prompt = build_tag_system_prompt
|
|
767
|
-
|
|
768
|
-
chat = RubyLLM.chat(model: model)
|
|
769
|
-
chat.with_instructions(system_prompt)
|
|
770
|
-
response = chat.ask(prompt)
|
|
771
|
-
|
|
772
|
-
parse_tag_response(extract_text_from_response(response))
|
|
773
|
-
end
|
|
774
|
-
end
|
|
775
|
-
|
|
776
|
-
def build_default_proposition_extractor
|
|
777
|
-
lambda do |text|
|
|
778
|
-
require 'ruby_llm' unless defined?(RubyLLM)
|
|
779
|
-
|
|
780
|
-
configure_ruby_llm(proposition_provider)
|
|
781
|
-
refresh_ollama_models! if proposition_provider == :ollama
|
|
782
|
-
|
|
783
|
-
model = proposition_provider == :ollama ? normalize_ollama_model(proposition_model) : proposition_model
|
|
784
|
-
|
|
785
|
-
prompt = build_proposition_extraction_prompt(text)
|
|
786
|
-
system_prompt = build_proposition_system_prompt
|
|
787
|
-
|
|
788
|
-
chat = RubyLLM.chat(model: model)
|
|
789
|
-
chat.with_instructions(system_prompt)
|
|
790
|
-
response = chat.ask(prompt)
|
|
791
|
-
|
|
792
|
-
parse_proposition_response(extract_text_from_response(response))
|
|
793
|
-
end
|
|
794
|
-
end
|
|
795
|
-
|
|
796
|
-
# ==========================================================================
|
|
797
|
-
# Response Extraction Helpers
|
|
798
|
-
# ==========================================================================
|
|
799
|
-
|
|
800
|
-
def extract_embedding_from_response(response)
|
|
801
|
-
return nil unless response
|
|
802
|
-
|
|
803
|
-
case response
|
|
804
|
-
when Array
|
|
805
|
-
response
|
|
806
|
-
when ->(r) { r.respond_to?(:vectors) }
|
|
807
|
-
vectors = response.vectors
|
|
808
|
-
vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
|
|
809
|
-
when ->(r) { r.respond_to?(:to_a) }
|
|
810
|
-
response.to_a
|
|
811
|
-
when ->(r) { r.respond_to?(:embedding) }
|
|
812
|
-
response.embedding
|
|
813
|
-
else
|
|
814
|
-
if response.respond_to?(:instance_variable_get)
|
|
815
|
-
vectors = response.instance_variable_get(:@vectors)
|
|
816
|
-
return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
|
|
817
|
-
return vectors if vectors.is_a?(Array)
|
|
818
|
-
end
|
|
819
|
-
raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
|
|
820
|
-
end
|
|
821
|
-
end
|
|
822
|
-
|
|
823
|
-
def extract_text_from_response(response)
|
|
824
|
-
return '' unless response
|
|
825
|
-
|
|
826
|
-
case response
|
|
827
|
-
when String then response
|
|
828
|
-
when ->(r) { r.respond_to?(:content) } then response.content.to_s
|
|
829
|
-
when ->(r) { r.respond_to?(:text) } then response.text.to_s
|
|
830
|
-
else response.to_s
|
|
831
|
-
end
|
|
832
|
-
end
|
|
833
|
-
|
|
834
|
-
def parse_tag_response(text)
|
|
835
|
-
tags = text.to_s.split("\n").map(&:strip).reject(&:empty?)
|
|
836
|
-
valid_tags = tags.select { |tag| tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ }
|
|
837
|
-
valid_tags.select { |tag| tag.count(':') < max_tag_depth }
|
|
838
|
-
end
|
|
839
|
-
|
|
840
|
-
def parse_proposition_response(text)
|
|
841
|
-
text.to_s
|
|
842
|
-
.split("\n")
|
|
843
|
-
.map(&:strip)
|
|
844
|
-
.map { |line| line.sub(/^[-*]\s*/, '') }
|
|
845
|
-
.map(&:strip)
|
|
846
|
-
.reject(&:empty?)
|
|
847
|
-
end
|
|
848
|
-
|
|
849
|
-
# ==========================================================================
|
|
850
|
-
# Prompt Builders
|
|
851
|
-
#
|
|
852
|
-
# These methods use configurable prompt templates from defaults.yml.
|
|
853
|
-
# Templates use %{placeholder} syntax for runtime interpolation.
|
|
854
|
-
# ==========================================================================
|
|
855
|
-
|
|
856
|
-
def build_tag_extraction_prompt(text, existing_ontology)
|
|
857
|
-
taxonomy_context = if existing_ontology.any?
|
|
858
|
-
sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
|
|
859
|
-
tag.taxonomy_context_existing % { sample_tags: sample_tags.join(', ') }
|
|
860
|
-
else
|
|
861
|
-
tag.taxonomy_context_empty
|
|
862
|
-
end
|
|
863
|
-
|
|
864
|
-
tag.user_prompt_template % {
|
|
865
|
-
text: text,
|
|
866
|
-
max_depth: max_tag_depth,
|
|
867
|
-
taxonomy_context: taxonomy_context
|
|
868
|
-
}
|
|
869
|
-
end
|
|
870
|
-
|
|
871
|
-
def build_tag_system_prompt
|
|
872
|
-
tag.system_prompt.to_s.strip
|
|
873
|
-
end
|
|
874
|
-
|
|
875
|
-
def build_proposition_extraction_prompt(text)
|
|
876
|
-
proposition.user_prompt_template % { text: text }
|
|
877
|
-
end
|
|
878
|
-
|
|
879
|
-
def build_proposition_system_prompt
|
|
880
|
-
proposition.system_prompt.to_s.strip
|
|
881
|
-
end
|
|
882
586
|
end
|
|
883
587
|
end
|
|
884
588
|
|