htm 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +1 -1
  3. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +4 -4
  4. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +1 -1
  5. data/.envrc +12 -25
  6. data/.irbrc +7 -7
  7. data/.tbls.yml +2 -2
  8. data/CHANGELOG.md +71 -0
  9. data/README.md +1 -1
  10. data/Rakefile +8 -3
  11. data/SETUP.md +12 -12
  12. data/bin/htm_mcp +0 -4
  13. data/db/seed_data/README.md +2 -2
  14. data/db/seeds.rb +2 -2
  15. data/docs/api/database.md +37 -37
  16. data/docs/api/htm.md +1 -1
  17. data/docs/api/yard/HTM/ActiveRecordConfig.md +2 -2
  18. data/docs/api/yard/HTM/Configuration.md +26 -15
  19. data/docs/api/yard/HTM/Database.md +7 -8
  20. data/docs/api/yard/HTM/JobAdapter.md +1 -1
  21. data/docs/api/yard/HTM/Railtie.md +2 -2
  22. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  23. data/docs/architecture/adrs/011-pgai-integration.md +4 -4
  24. data/docs/database_rake_tasks.md +5 -5
  25. data/docs/development/rake-tasks.md +11 -11
  26. data/docs/development/setup.md +21 -21
  27. data/docs/development/testing.md +1 -1
  28. data/docs/getting-started/installation.md +20 -20
  29. data/docs/getting-started/quick-start.md +12 -12
  30. data/docs/guides/getting-started.md +2 -2
  31. data/docs/guides/long-term-memory.md +1 -1
  32. data/docs/guides/mcp-server.md +17 -17
  33. data/docs/guides/robot-groups.md +8 -8
  34. data/docs/index.md +4 -4
  35. data/docs/multi_framework_support.md +8 -8
  36. data/docs/setup_local_database.md +19 -19
  37. data/docs/using_rake_tasks_in_your_app.md +14 -14
  38. data/examples/README.md +50 -6
  39. data/examples/basic_usage.rb +31 -21
  40. data/examples/cli_app/README.md +8 -8
  41. data/examples/cli_app/htm_cli.rb +5 -5
  42. data/examples/config_file_example/README.md +256 -0
  43. data/examples/config_file_example/config/htm.local.yml +34 -0
  44. data/examples/config_file_example/custom_config.yml +22 -0
  45. data/examples/config_file_example/show_config.rb +125 -0
  46. data/examples/custom_llm_configuration.rb +7 -7
  47. data/examples/example_app/Rakefile +2 -2
  48. data/examples/example_app/app.rb +8 -8
  49. data/examples/file_loader_usage.rb +9 -9
  50. data/examples/mcp_client.rb +5 -5
  51. data/examples/rails_app/Gemfile.lock +48 -56
  52. data/examples/rails_app/README.md +1 -1
  53. data/examples/robot_groups/multi_process.rb +5 -5
  54. data/examples/robot_groups/robot_worker.rb +5 -5
  55. data/examples/robot_groups/same_process.rb +9 -9
  56. data/examples/sinatra_app/app.rb +1 -1
  57. data/examples/timeframe_demo.rb +1 -1
  58. data/lib/htm/active_record_config.rb +12 -25
  59. data/lib/htm/circuit_breaker.rb +0 -2
  60. data/lib/htm/config/defaults.yml +246 -0
  61. data/lib/htm/config.rb +888 -0
  62. data/lib/htm/database.rb +23 -27
  63. data/lib/htm/embedding_service.rb +0 -4
  64. data/lib/htm/integrations/sinatra.rb +3 -7
  65. data/lib/htm/job_adapter.rb +1 -15
  66. data/lib/htm/jobs/generate_embedding_job.rb +1 -7
  67. data/lib/htm/jobs/generate_propositions_job.rb +2 -12
  68. data/lib/htm/jobs/generate_tags_job.rb +1 -8
  69. data/lib/htm/loaders/defaults_loader.rb +143 -0
  70. data/lib/htm/loaders/xdg_config_loader.rb +116 -0
  71. data/lib/htm/mcp/cli.rb +200 -58
  72. data/lib/htm/mcp/server.rb +3 -3
  73. data/lib/htm/proposition_service.rb +2 -12
  74. data/lib/htm/railtie.rb +3 -4
  75. data/lib/htm/tag_service.rb +1 -8
  76. data/lib/htm/version.rb +1 -1
  77. data/lib/htm.rb +124 -5
  78. metadata +24 -4
  79. data/config/database.yml +0 -77
  80. data/lib/htm/configuration.rb +0 -799
@@ -1,799 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'errors'
4
- require 'logger'
5
-
6
- class HTM
7
- # HTM Configuration
8
- #
9
- # HTM uses RubyLLM for multi-provider LLM support. Supported providers:
10
- # - :openai (OpenAI API)
11
- # - :anthropic (Anthropic Claude)
12
- # - :gemini (Google Gemini)
13
- # - :azure (Azure OpenAI)
14
- # - :ollama (Local Ollama - default)
15
- # - :huggingface (HuggingFace Inference API)
16
- # - :openrouter (OpenRouter)
17
- # - :bedrock (AWS Bedrock)
18
- # - :deepseek (DeepSeek)
19
- #
20
- # @example Configure with OpenAI
21
- # HTM.configure do |config|
22
- # config.embedding_provider = :openai
23
- # config.embedding_model = 'text-embedding-3-small'
24
- # config.tag_provider = :openai
25
- # config.tag_model = 'gpt-4o-mini'
26
- # config.openai_api_key = ENV['OPENAI_API_KEY']
27
- # end
28
- #
29
- # @example Configure with Ollama (default)
30
- # HTM.configure do |config|
31
- # config.embedding_provider = :ollama
32
- # config.embedding_model = 'nomic-embed-text'
33
- # config.tag_provider = :ollama
34
- # config.tag_model = 'llama3'
35
- # config.ollama_url = 'http://localhost:11434'
36
- # end
37
- #
38
- # @example Configure with Anthropic for tags, OpenAI for embeddings
39
- # HTM.configure do |config|
40
- # config.embedding_provider = :openai
41
- # config.embedding_model = 'text-embedding-3-small'
42
- # config.openai_api_key = ENV['OPENAI_API_KEY']
43
- # config.tag_provider = :anthropic
44
- # config.tag_model = 'claude-3-haiku-20240307'
45
- # config.anthropic_api_key = ENV['ANTHROPIC_API_KEY']
46
- # end
47
- #
48
- # @example Configure with custom methods
49
- # HTM.configure do |config|
50
- # config.embedding_generator = ->(text) {
51
- # MyApp::LLMService.embed(text) # Returns Array<Float>
52
- # }
53
- # config.tag_extractor = ->(text, ontology) {
54
- # MyApp::LLMService.extract_tags(text, ontology) # Returns Array<String>
55
- # }
56
- # config.logger = Rails.logger
57
- # end
58
- #
59
- class Configuration
60
- attr_accessor :embedding_generator, :tag_extractor, :proposition_extractor, :token_counter
61
- attr_accessor :embedding_model, :embedding_provider, :embedding_dimensions
62
- attr_accessor :tag_model, :tag_provider
63
- attr_accessor :proposition_model, :proposition_provider, :extract_propositions
64
- attr_accessor :embedding_timeout, :tag_timeout, :proposition_timeout, :connection_timeout
65
- attr_accessor :logger
66
- attr_accessor :job_backend
67
- attr_accessor :week_start
68
- attr_accessor :telemetry_enabled # Enable OpenTelemetry metrics (default: false)
69
-
70
- # Limit configuration
71
- attr_accessor :max_embedding_dimension # Max vector dimensions (default: 2000)
72
- attr_accessor :max_tag_depth # Max tag hierarchy depth (default: 4)
73
-
74
- # Chunking configuration (for file loading)
75
- attr_accessor :chunk_size # Max characters per chunk (default: 1024)
76
- attr_accessor :chunk_overlap # Character overlap between chunks (default: 64)
77
-
78
- # Circuit breaker configuration
79
- attr_accessor :circuit_breaker_failure_threshold # Failures before opening (default: 5)
80
- attr_accessor :circuit_breaker_reset_timeout # Seconds before half-open (default: 60)
81
- attr_accessor :circuit_breaker_half_open_max_calls # Successes to close (default: 3)
82
-
83
- # Relevance scoring weights (must sum to 1.0)
84
- attr_accessor :relevance_semantic_weight # Vector similarity weight (default: 0.5)
85
- attr_accessor :relevance_tag_weight # Tag overlap weight (default: 0.3)
86
- attr_accessor :relevance_recency_weight # Temporal freshness weight (default: 0.1)
87
- attr_accessor :relevance_access_weight # Access frequency weight (default: 0.1)
88
- attr_accessor :relevance_recency_half_life_hours # Decay half-life in hours (default: 168 = 1 week)
89
-
90
- # Provider-specific API keys and endpoints
91
- attr_accessor :openai_api_key, :openai_organization, :openai_project
92
- attr_accessor :anthropic_api_key
93
- attr_accessor :gemini_api_key
94
- attr_accessor :azure_api_key, :azure_endpoint, :azure_api_version
95
- attr_accessor :ollama_url
96
- attr_accessor :huggingface_api_key
97
- attr_accessor :openrouter_api_key
98
- attr_accessor :bedrock_access_key, :bedrock_secret_key, :bedrock_region
99
- attr_accessor :deepseek_api_key
100
-
101
- # Supported providers
102
- SUPPORTED_PROVIDERS = %i[
103
- openai anthropic gemini azure ollama
104
- huggingface openrouter bedrock deepseek
105
- ].freeze
106
-
107
- # Default embedding dimensions by provider/model
108
- DEFAULT_DIMENSIONS = {
109
- openai: 1536, # text-embedding-3-small
110
- anthropic: 1024, # voyage embeddings
111
- gemini: 768, # text-embedding-004
112
- azure: 1536, # same as OpenAI
113
- ollama: 768, # nomic-embed-text
114
- huggingface: 768, # varies by model
115
- openrouter: 1536, # varies by model
116
- bedrock: 1536, # titan-embed-text
117
- deepseek: 1536 # varies by model
118
- }.freeze
119
-
120
- def initialize
121
- # Default configuration - Ollama for local development
122
- # All settings can be overridden via HTM_* environment variables
123
- @embedding_provider = ENV.fetch('HTM_EMBEDDING_PROVIDER', 'ollama').to_sym
124
- @embedding_model = ENV.fetch('HTM_EMBEDDING_MODEL', 'nomic-embed-text:latest')
125
- @embedding_dimensions = ENV.fetch('HTM_EMBEDDING_DIMENSIONS', 768).to_i
126
-
127
- @tag_provider = ENV.fetch('HTM_TAG_PROVIDER', 'ollama').to_sym
128
- @tag_model = ENV.fetch('HTM_TAG_MODEL', 'gemma3:latest')
129
-
130
- @proposition_provider = ENV.fetch('HTM_PROPOSITION_PROVIDER', 'ollama').to_sym
131
- @proposition_model = ENV.fetch('HTM_PROPOSITION_MODEL', 'gemma3:latest')
132
- @extract_propositions = ENV.fetch('HTM_EXTRACT_PROPOSITIONS', 'false').downcase == 'true'
133
-
134
- # Provider credentials from environment variables
135
- # These use standard provider env var names for compatibility
136
- @openai_api_key = ENV.fetch('HTM_OPENAI_API_KEY', ENV['OPENAI_API_KEY'])
137
- @openai_organization = ENV.fetch('HTM_OPENAI_ORGANIZATION', ENV['OPENAI_ORGANIZATION'])
138
- @openai_project = ENV.fetch('HTM_OPENAI_PROJECT', ENV['OPENAI_PROJECT'])
139
- @anthropic_api_key = ENV.fetch('HTM_ANTHROPIC_API_KEY', ENV['ANTHROPIC_API_KEY'])
140
- @gemini_api_key = ENV.fetch('HTM_GEMINI_API_KEY', ENV['GEMINI_API_KEY'])
141
- @azure_api_key = ENV.fetch('HTM_AZURE_API_KEY', ENV['AZURE_OPENAI_API_KEY'])
142
- @azure_endpoint = ENV.fetch('HTM_AZURE_ENDPOINT', ENV['AZURE_OPENAI_ENDPOINT'])
143
- @azure_api_version = ENV.fetch('HTM_AZURE_API_VERSION', ENV.fetch('AZURE_OPENAI_API_VERSION', '2024-02-01'))
144
- @ollama_url = ENV.fetch('HTM_OLLAMA_URL', ENV['OLLAMA_API_BASE'] || ENV['OLLAMA_URL'] || 'http://localhost:11434')
145
- @huggingface_api_key = ENV.fetch('HTM_HUGGINGFACE_API_KEY', ENV['HUGGINGFACE_API_KEY'])
146
- @openrouter_api_key = ENV.fetch('HTM_OPENROUTER_API_KEY', ENV['OPENROUTER_API_KEY'])
147
- @bedrock_access_key = ENV.fetch('HTM_BEDROCK_ACCESS_KEY', ENV['AWS_ACCESS_KEY_ID'])
148
- @bedrock_secret_key = ENV.fetch('HTM_BEDROCK_SECRET_KEY', ENV['AWS_SECRET_ACCESS_KEY'])
149
- @bedrock_region = ENV.fetch('HTM_BEDROCK_REGION', ENV.fetch('AWS_REGION', 'us-east-1'))
150
- @deepseek_api_key = ENV.fetch('HTM_DEEPSEEK_API_KEY', ENV['DEEPSEEK_API_KEY'])
151
-
152
- # Timeout settings (in seconds) - apply to all LLM providers
153
- @embedding_timeout = ENV.fetch('HTM_EMBEDDING_TIMEOUT', 120).to_i
154
- @tag_timeout = ENV.fetch('HTM_TAG_TIMEOUT', 180).to_i
155
- @proposition_timeout = ENV.fetch('HTM_PROPOSITION_TIMEOUT', 180).to_i
156
- @connection_timeout = ENV.fetch('HTM_CONNECTION_TIMEOUT', 30).to_i
157
-
158
- # Limit settings
159
- @max_embedding_dimension = ENV.fetch('HTM_MAX_EMBEDDING_DIMENSION', 2000).to_i
160
- @max_tag_depth = ENV.fetch('HTM_MAX_TAG_DEPTH', 4).to_i
161
-
162
- # Chunking settings (for file loading)
163
- @chunk_size = ENV.fetch('HTM_CHUNK_SIZE', 1024).to_i
164
- @chunk_overlap = ENV.fetch('HTM_CHUNK_OVERLAP', 64).to_i
165
-
166
- # Circuit breaker settings
167
- @circuit_breaker_failure_threshold = ENV.fetch('HTM_CIRCUIT_BREAKER_FAILURE_THRESHOLD', 5).to_i
168
- @circuit_breaker_reset_timeout = ENV.fetch('HTM_CIRCUIT_BREAKER_RESET_TIMEOUT', 60).to_i
169
- @circuit_breaker_half_open_max_calls = ENV.fetch('HTM_CIRCUIT_BREAKER_HALF_OPEN_MAX_CALLS', 3).to_i
170
-
171
- # Relevance scoring weights (should sum to 1.0)
172
- @relevance_semantic_weight = ENV.fetch('HTM_RELEVANCE_SEMANTIC_WEIGHT', 0.5).to_f
173
- @relevance_tag_weight = ENV.fetch('HTM_RELEVANCE_TAG_WEIGHT', 0.3).to_f
174
- @relevance_recency_weight = ENV.fetch('HTM_RELEVANCE_RECENCY_WEIGHT', 0.1).to_f
175
- @relevance_access_weight = ENV.fetch('HTM_RELEVANCE_ACCESS_WEIGHT', 0.1).to_f
176
- @relevance_recency_half_life_hours = ENV.fetch('HTM_RELEVANCE_RECENCY_HALF_LIFE_HOURS', 168.0).to_f
177
-
178
- # Default logger (STDOUT with INFO level)
179
- @logger = default_logger
180
-
181
- # Job backend: inline, thread, active_job, sidekiq (auto-detected if not set)
182
- @job_backend = ENV['HTM_JOB_BACKEND'] ? ENV['HTM_JOB_BACKEND'].to_sym : detect_job_backend
183
-
184
- # Timeframe parsing configuration: sunday or monday
185
- @week_start = ENV.fetch('HTM_WEEK_START', 'sunday').to_sym
186
-
187
- # Telemetry (OpenTelemetry metrics)
188
- @telemetry_enabled = ENV.fetch('HTM_TELEMETRY_ENABLED', 'false').downcase == 'true'
189
-
190
- # Thread-safe Ollama model refresh tracking
191
- @ollama_models_refreshed = false
192
- @ollama_refresh_mutex = Mutex.new
193
-
194
- # Set default implementations
195
- reset_to_defaults
196
- end
197
-
198
- # Reset to default RubyLLM-based implementations
199
- def reset_to_defaults
200
- @embedding_generator = default_embedding_generator
201
- @tag_extractor = default_tag_extractor
202
- @proposition_extractor = default_proposition_extractor
203
- @token_counter = default_token_counter
204
- end
205
-
206
- # Validate configuration
207
- def validate!
208
- unless @embedding_generator.respond_to?(:call)
209
- raise HTM::ValidationError, "embedding_generator must be callable (proc, lambda, or object responding to :call)"
210
- end
211
-
212
- unless @tag_extractor.respond_to?(:call)
213
- raise HTM::ValidationError, "tag_extractor must be callable (proc, lambda, or object responding to :call)"
214
- end
215
-
216
- unless @proposition_extractor.respond_to?(:call)
217
- raise HTM::ValidationError, "proposition_extractor must be callable (proc, lambda, or object responding to :call)"
218
- end
219
-
220
- unless @token_counter.respond_to?(:call)
221
- raise HTM::ValidationError, "token_counter must be callable (proc, lambda, or object responding to :call)"
222
- end
223
-
224
- unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
225
- raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
226
- end
227
-
228
- unless [:active_job, :sidekiq, :inline, :thread].include?(@job_backend)
229
- raise HTM::ValidationError, "job_backend must be one of: :active_job, :sidekiq, :inline, :thread (got #{@job_backend.inspect})"
230
- end
231
-
232
- unless [:sunday, :monday].include?(@week_start)
233
- raise HTM::ValidationError, "week_start must be :sunday or :monday (got #{@week_start.inspect})"
234
- end
235
-
236
- # Validate provider if specified
237
- if @embedding_provider && !SUPPORTED_PROVIDERS.include?(@embedding_provider)
238
- raise HTM::ValidationError, "embedding_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@embedding_provider.inspect})"
239
- end
240
-
241
- if @tag_provider && !SUPPORTED_PROVIDERS.include?(@tag_provider)
242
- raise HTM::ValidationError, "tag_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@tag_provider.inspect})"
243
- end
244
-
245
- if @proposition_provider && !SUPPORTED_PROVIDERS.include?(@proposition_provider)
246
- raise HTM::ValidationError, "proposition_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@proposition_provider.inspect})"
247
- end
248
- end
249
-
250
- # Normalize Ollama model name to include tag if missing
251
- #
252
- # Ollama models require a tag (e.g., :latest, :7b, :13b). If the user
253
- # specifies a model without a tag, we append :latest by default.
254
- #
255
- # @param model_name [String] Original model name
256
- # @return [String] Normalized model name with tag
257
- #
258
- def normalize_ollama_model(model_name)
259
- return model_name if model_name.nil? || model_name.empty?
260
- return model_name if model_name.include?(':')
261
-
262
- "#{model_name}:latest"
263
- end
264
-
265
- # Configure RubyLLM with the appropriate provider credentials
266
- #
267
- # @param provider [Symbol] The provider to configure (:openai, :anthropic, etc.)
268
- #
269
- def configure_ruby_llm(provider = nil)
270
- # Always require ruby_llm to ensure full module is loaded
271
- # (require is idempotent, and defined?(RubyLLM) can be true before configure method exists)
272
- require 'ruby_llm'
273
-
274
- provider ||= @embedding_provider
275
-
276
- RubyLLM.configure do |config|
277
- case provider
278
- when :openai
279
- config.openai_api_key = @openai_api_key if @openai_api_key
280
- config.openai_organization = @openai_organization if @openai_organization && config.respond_to?(:openai_organization=)
281
- config.openai_project = @openai_project if @openai_project && config.respond_to?(:openai_project=)
282
- when :anthropic
283
- config.anthropic_api_key = @anthropic_api_key if @anthropic_api_key
284
- when :gemini
285
- config.gemini_api_key = @gemini_api_key if @gemini_api_key
286
- when :azure
287
- config.azure_api_key = @azure_api_key if @azure_api_key && config.respond_to?(:azure_api_key=)
288
- config.azure_endpoint = @azure_endpoint if @azure_endpoint && config.respond_to?(:azure_endpoint=)
289
- config.azure_api_version = @azure_api_version if @azure_api_version && config.respond_to?(:azure_api_version=)
290
- when :ollama
291
- # Ollama exposes OpenAI-compatible API at /v1
292
- # Ensure URL has /v1 suffix (add if missing, don't duplicate if present)
293
- ollama_api_base = if @ollama_url.end_with?('/v1') || @ollama_url.end_with?('/v1/')
294
- @ollama_url.sub(%r{/+$}, '') # Just remove trailing slashes
295
- else
296
- "#{@ollama_url.sub(%r{/+$}, '')}/v1"
297
- end
298
- config.ollama_api_base = ollama_api_base
299
- when :huggingface
300
- config.huggingface_api_key = @huggingface_api_key if @huggingface_api_key && config.respond_to?(:huggingface_api_key=)
301
- when :openrouter
302
- config.openrouter_api_key = @openrouter_api_key if @openrouter_api_key && config.respond_to?(:openrouter_api_key=)
303
- when :bedrock
304
- config.bedrock_api_key = @bedrock_access_key if @bedrock_access_key && config.respond_to?(:bedrock_api_key=)
305
- config.bedrock_secret_key = @bedrock_secret_key if @bedrock_secret_key && config.respond_to?(:bedrock_secret_key=)
306
- config.bedrock_region = @bedrock_region if @bedrock_region && config.respond_to?(:bedrock_region=)
307
- when :deepseek
308
- config.deepseek_api_key = @deepseek_api_key if @deepseek_api_key && config.respond_to?(:deepseek_api_key=)
309
- end
310
- end
311
- end
312
-
313
- private
314
-
315
- # Auto-detect appropriate job backend based on environment
316
- #
317
- # Detection priority:
318
- # 1. ActiveJob (if defined) - Rails applications
319
- # 2. Sidekiq (if defined) - Sinatra and other web apps
320
- # 3. Inline (if test environment) - Test suites
321
- # 4. Thread (default fallback) - CLI and standalone apps
322
- #
323
- # @return [Symbol] Detected job backend
324
- #
325
- def detect_job_backend
326
- # Check for explicit environment variable override
327
- if ENV['HTM_JOB_BACKEND']
328
- return ENV['HTM_JOB_BACKEND'].to_sym
329
- end
330
-
331
- # Detect test environment - use inline for synchronous execution
332
- return :inline if HTM.test?
333
-
334
- # Detect Rails - prefer ActiveJob
335
- if defined?(ActiveJob)
336
- return :active_job
337
- end
338
-
339
- # Detect Sidekiq - direct integration for Sinatra apps
340
- if defined?(Sidekiq)
341
- return :sidekiq
342
- end
343
-
344
- # Default fallback - simple threading for standalone/CLI apps
345
- :thread
346
- end
347
-
348
- # Default logger configuration
349
- def default_logger
350
- logger = Logger.new($stdout)
351
- logger.level = ENV.fetch('HTM_LOG_LEVEL', 'INFO').upcase.to_sym
352
- logger.formatter = proc do |severity, datetime, progname, msg|
353
- "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- HTM: #{msg}\n"
354
- end
355
- logger
356
- end
357
-
358
- # Default token counter using Tiktoken
359
- def default_token_counter
360
- lambda do |text|
361
- require 'tiktoken_ruby' unless defined?(Tiktoken)
362
- encoder = Tiktoken.encoding_for_model("gpt-3.5-turbo")
363
- encoder.encode(text).length
364
- end
365
- end
366
-
367
- # Default embedding generator using RubyLLM
368
- #
369
- # @return [Proc] Callable that takes text and returns embedding vector
370
- #
371
- def default_embedding_generator
372
- lambda do |text|
373
- require 'ruby_llm' unless defined?(RubyLLM)
374
-
375
- # Configure RubyLLM for the embedding provider
376
- configure_ruby_llm(@embedding_provider)
377
-
378
- # Refresh models for Ollama to discover local models (thread-safe)
379
- if @embedding_provider == :ollama
380
- @ollama_refresh_mutex.synchronize do
381
- unless @ollama_models_refreshed
382
- RubyLLM.models.refresh!
383
- @ollama_models_refreshed = true
384
- end
385
- end
386
- end
387
-
388
- # Normalize Ollama model name (ensure it has a tag like :latest)
389
- model = @embedding_provider == :ollama ? normalize_ollama_model(@embedding_model) : @embedding_model
390
-
391
- # Generate embedding using RubyLLM
392
- response = RubyLLM.embed(text, model: model)
393
-
394
- # Extract embedding vector from response
395
- embedding = extract_embedding_from_response(response)
396
-
397
- unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) }
398
- raise HTM::EmbeddingError, "Invalid embedding response format from #{@embedding_provider}"
399
- end
400
-
401
- embedding
402
- end
403
- end
404
-
405
- # Extract embedding vector from RubyLLM response
406
- #
407
- # @param response [Object] RubyLLM embed response
408
- # @return [Array<Float>] Embedding vector
409
- #
410
- def extract_embedding_from_response(response)
411
- return nil unless response
412
-
413
- # Handle different response formats from RubyLLM
414
- case response
415
- when Array
416
- # Direct array response
417
- response
418
- when ->(r) { r.respond_to?(:vectors) }
419
- # RubyLLM::Embedding object with vectors method
420
- vectors = response.vectors
421
- vectors.is_a?(Array) && vectors.first.is_a?(Array) ? vectors.first : vectors
422
- when ->(r) { r.respond_to?(:to_a) }
423
- # Can be converted to array
424
- response.to_a
425
- when ->(r) { r.respond_to?(:embedding) }
426
- # Has embedding attribute
427
- response.embedding
428
- else
429
- # Try to extract vectors from instance variables
430
- if response.respond_to?(:instance_variable_get)
431
- vectors = response.instance_variable_get(:@vectors)
432
- return vectors.first if vectors.is_a?(Array) && vectors.first.is_a?(Array)
433
- return vectors if vectors.is_a?(Array)
434
- end
435
- raise HTM::EmbeddingError, "Cannot extract embedding from response: #{response.class}"
436
- end
437
- end
438
-
439
- # Default tag extractor using RubyLLM chat
440
- #
441
- # @return [Proc] Callable that takes text and ontology, returns array of tags
442
- #
443
- def default_tag_extractor
444
- lambda do |text, existing_ontology = []|
445
- require 'ruby_llm' unless defined?(RubyLLM)
446
-
447
- # Configure RubyLLM for the tag provider
448
- configure_ruby_llm(@tag_provider)
449
-
450
- # Refresh models for Ollama to discover local models (thread-safe)
451
- if @tag_provider == :ollama
452
- @ollama_refresh_mutex.synchronize do
453
- unless @ollama_models_refreshed
454
- RubyLLM.models.refresh!
455
- @ollama_models_refreshed = true
456
- end
457
- end
458
- end
459
-
460
- # Normalize Ollama model name (ensure it has a tag like :latest)
461
- model = @tag_provider == :ollama ? normalize_ollama_model(@tag_model) : @tag_model
462
-
463
- # Build prompt
464
- taxonomy_context = if existing_ontology.any?
465
- sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
466
- "Existing taxonomy paths: #{sample_tags.join(', ')}\n\nPrefer reusing these paths when the text matches their domain."
467
- else
468
- "This is a new taxonomy - establish clear root categories."
469
- end
470
-
471
- prompt = <<~PROMPT
472
- Extract classification tags for this text using a HIERARCHICAL TAXONOMY.
473
-
474
- A hierarchical taxonomy is a tree where each concept has exactly ONE parent path:
475
-
476
- domain
477
- ├── category
478
- │ ├── subcategory
479
- │ │ └── specific-term
480
- │ └── subcategory
481
- └── category
482
-
483
- #{taxonomy_context}
484
-
485
- TAG FORMAT: domain:category:subcategory:term (colon-separated, max 4 levels)
486
-
487
- LEVEL GUIDELINES:
488
- - Level 1 (domain): Broad field (database, ai, web, security, devops)
489
- - Level 2 (category): Major subdivision (database:relational, ai:machine-learning)
490
- - Level 3 (subcategory): Specific area (database:relational:postgresql)
491
- - Level 4 (term): Fine detail, use sparingly (database:relational:postgresql:extensions)
492
-
493
- RULES:
494
- 1. Each concept belongs to ONE path only (no duplicates across branches)
495
- 2. Use lowercase, hyphens for multi-word terms (natural-language-processing)
496
- 3. Return 2-5 tags that best classify this text
497
- 4. Match existing taxonomy paths when applicable
498
- 5. More general tags are often better than overly specific ones
499
-
500
- GOOD EXAMPLES:
501
- - database:postgresql
502
- - ai:machine-learning:embeddings
503
- - web:api:rest
504
- - programming:ruby:gems
505
-
506
- BAD EXAMPLES:
507
- - postgresql (missing domain - where does it belong?)
508
- - database:postgresql AND data:storage:postgresql (duplicate concept)
509
- - ai:ml:nlp:transformers:bert:embeddings (too deep)
510
-
511
- TEXT: #{text}
512
-
513
- Return ONLY tags, one per line.
514
- PROMPT
515
-
516
- system_prompt = <<~SYSTEM.strip
517
- You are a taxonomy classifier that assigns texts to a hierarchical classification tree.
518
-
519
- Core principle: Each concept has ONE canonical location in the tree. If "postgresql" exists under "database", never create it elsewhere.
520
-
521
- Your task:
522
- 1. Identify the domains/topics present in the text
523
- 2. Build paths from general (root) to specific (leaf)
524
- 3. Reuse existing taxonomy branches when they fit
525
- 4. Output 2-5 classification paths, one per line
526
- SYSTEM
527
-
528
- # Use RubyLLM chat for tag extraction
529
- chat = RubyLLM.chat(model: model)
530
- chat.with_instructions(system_prompt)
531
- response = chat.ask(prompt)
532
-
533
- # Extract text from response
534
- response_text = extract_text_from_response(response)
535
-
536
- # Parse and validate tags
537
- tags = response_text.to_s.split("\n").map(&:strip).reject(&:empty?)
538
-
539
- # Validate format: lowercase alphanumeric + hyphens + colons
540
- valid_tags = tags.select do |tag|
541
- tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/
542
- end
543
-
544
- # Limit depth to 4 levels (3 colons maximum)
545
- valid_tags.select { |tag| tag.count(':') < 4 }
546
- end
547
- end
548
-
549
- # Default proposition extractor using RubyLLM chat
550
- #
551
- # @return [Proc] Callable that takes text and returns array of propositions
552
- #
553
- def default_proposition_extractor
554
- lambda do |text|
555
- require 'ruby_llm' unless defined?(RubyLLM)
556
-
557
- # Configure RubyLLM for the proposition provider
558
- configure_ruby_llm(@proposition_provider)
559
-
560
- # Refresh models for Ollama to discover local models (thread-safe)
561
- if @proposition_provider == :ollama
562
- @ollama_refresh_mutex.synchronize do
563
- unless @ollama_models_refreshed
564
- RubyLLM.models.refresh!
565
- @ollama_models_refreshed = true
566
- end
567
- end
568
- end
569
-
570
- # Normalize Ollama model name (ensure it has a tag like :latest)
571
- model = @proposition_provider == :ollama ? normalize_ollama_model(@proposition_model) : @proposition_model
572
-
573
- # Build prompt
574
- prompt = <<~PROMPT
575
- Extract all ATOMIC factual propositions from the following text.
576
-
577
- An atomic proposition expresses exactly ONE relationship or fact. If a statement combines multiple pieces of information (what, where, when, who, why), split it into separate propositions.
578
-
579
- CRITICAL: Each proposition must contain only ONE of these:
580
- - ONE subject-verb relationship
581
- - ONE attribute or property
582
- - ONE location, time, or qualifier
583
-
584
- Example input: "Todd Warren plans to pursue a PhD in Music at the University of Texas."
585
-
586
- CORRECT atomic output:
587
- - Todd Warren plans to pursue a PhD.
588
- - Todd Warren plans to study Music.
589
- - Todd Warren plans to attend the University of Texas.
590
- - The University of Texas offers a PhD program in Music.
591
-
592
- WRONG (not atomic - combines multiple facts):
593
- - Todd Warren plans to pursue a PhD in Music at the University of Texas.
594
-
595
- Example input: "In 1969, Neil Armstrong became the first person to walk on the Moon during the Apollo 11 mission."
596
-
597
- CORRECT atomic output:
598
- - Neil Armstrong was an astronaut.
599
- - Neil Armstrong walked on the Moon.
600
- - Neil Armstrong walked on the Moon in 1969.
601
- - Neil Armstrong was the first person to walk on the Moon.
602
- - The Apollo 11 mission occurred in 1969.
603
- - Neil Armstrong participated in the Apollo 11 mission.
604
-
605
- Rules:
606
- 1. Split compound statements into separate atomic facts
607
- 2. Each proposition = exactly one fact
608
- 3. Use full names, never pronouns
609
- 4. Make each proposition understandable in isolation
610
- 5. Prefer more propositions over fewer
611
-
612
- TEXT: #{text}
613
-
614
- Return ONLY atomic propositions, one per line. Use a dash (-) prefix for each.
615
- PROMPT
616
-
617
- system_prompt = <<~SYSTEM.strip
618
- You are an atomic fact extraction system. Your goal is maximum decomposition.
619
-
620
- IMPORTANT: Break every statement into its smallest possible factual units.
621
-
622
- A statement like "John bought a red car in Paris" contains FOUR facts:
623
- - John bought a car.
624
- - The car John bought is red.
625
- - John made a purchase in Paris.
626
- - John bought a car in Paris.
627
-
628
- Always ask: "Can this be split further?" If yes, split it.
629
-
630
- Rules:
631
- 1. ONE fact per proposition (subject-predicate or subject-attribute)
632
- 2. Never combine location + action + time in one proposition
633
- 3. Never combine multiple attributes in one proposition
634
- 4. Use full names, never pronouns
635
- 5. Each proposition must stand alone without context
636
-
637
- Output ONLY propositions, one per line, prefixed with a dash (-).
638
- SYSTEM
639
-
640
- # Use RubyLLM chat for proposition extraction
641
- chat = RubyLLM.chat(model: model)
642
- chat.with_instructions(system_prompt)
643
- response = chat.ask(prompt)
644
-
645
- # Extract text from response
646
- response_text = extract_text_from_response(response)
647
-
648
- # Parse propositions (remove dash prefix, filter empty lines)
649
- response_text.to_s
650
- .split("\n")
651
- .map(&:strip)
652
- .map { |line| line.sub(/^[-*•]\s*/, '') }
653
- .map(&:strip)
654
- .reject(&:empty?)
655
- end
656
- end
657
-
658
- # Extract text content from RubyLLM chat response
659
- #
660
- # @param response [Object] RubyLLM chat response
661
- # @return [String] Response text
662
- #
663
- def extract_text_from_response(response)
664
- return '' unless response
665
-
666
- case response
667
- when String
668
- response
669
- when ->(r) { r.respond_to?(:content) }
670
- response.content.to_s
671
- when ->(r) { r.respond_to?(:text) }
672
- response.text.to_s
673
- when ->(r) { r.respond_to?(:to_s) }
674
- response.to_s
675
- else
676
- ''
677
- end
678
- end
679
- end
680
-
681
- class << self
682
- attr_writer :configuration
683
-
684
- # Get current environment
685
- #
686
- # Priority: HTM_ENV > RAILS_ENV > RACK_ENV > 'development'
687
- #
688
- # @return [String] Current environment name
689
- #
690
- def env
691
- ENV['HTM_ENV'] || ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
692
- end
693
-
694
- # Check if running in test environment
695
- #
696
- # @return [Boolean]
697
- #
698
- def test?
699
- env == 'test'
700
- end
701
-
702
- # Check if running in development environment
703
- #
704
- # @return [Boolean]
705
- #
706
- def development?
707
- env == 'development'
708
- end
709
-
710
- # Check if running in production environment
711
- #
712
- # @return [Boolean]
713
- #
714
- def production?
715
- env == 'production'
716
- end
717
-
718
- # Get current configuration
719
- #
720
- # @return [HTM::Configuration]
721
- #
722
- def configuration
723
- @configuration ||= Configuration.new
724
- end
725
-
726
- # Configure HTM
727
- #
728
- # @yield [config] Configuration object
729
- # @yieldparam config [HTM::Configuration]
730
- #
731
- # @example Custom configuration
732
- # HTM.configure do |config|
733
- # config.embedding_generator = ->(text) { MyEmbedder.embed(text) }
734
- # config.tag_extractor = ->(text, ontology) { MyTagger.extract(text, ontology) }
735
- # end
736
- #
737
- # @example Default configuration
738
- # HTM.configure # Uses RubyLLM defaults
739
- #
740
- def configure
741
- yield(configuration) if block_given?
742
- configuration.validate!
743
- configuration
744
- end
745
-
746
- # Reset configuration to defaults
747
- def reset_configuration!
748
- @configuration = Configuration.new
749
- end
750
-
751
- # Generate embedding using EmbeddingService
752
- #
753
- # @param text [String] Text to embed
754
- # @return [Array<Float>] Embedding vector (original, not padded)
755
- #
756
- def embed(text)
757
- result = HTM::EmbeddingService.generate(text)
758
- result[:embedding]
759
- end
760
-
761
- # Extract tags using TagService
762
- #
763
- # @param text [String] Text to analyze
764
- # @param existing_ontology [Array<String>] Sample of existing tags for context
765
- # @return [Array<String>] Extracted and validated tag names
766
- #
767
- def extract_tags(text, existing_ontology: [])
768
- HTM::TagService.extract(text, existing_ontology: existing_ontology)
769
- end
770
-
771
- # Extract propositions using PropositionService
772
- #
773
- # @param text [String] Text to analyze
774
- # @return [Array<String>] Extracted atomic propositions
775
- #
776
- def extract_propositions(text)
777
- HTM::PropositionService.extract(text)
778
- end
779
-
780
- # Count tokens using configured counter
781
- #
782
- # @param text [String] Text to count tokens for
783
- # @return [Integer] Token count
784
- #
785
- def count_tokens(text)
786
- configuration.token_counter.call(text)
787
- rescue StandardError => e
788
- raise HTM::ValidationError, "Token counting failed: #{e.message}"
789
- end
790
-
791
- # Get configured logger
792
- #
793
- # @return [Logger] Configured logger instance
794
- #
795
- def logger
796
- configuration.logger
797
- end
798
- end
799
- end