htm 0.0.10 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dictate.toml +46 -0
- data/.envrc +2 -0
- data/CHANGELOG.md +86 -3
- data/README.md +86 -7
- data/Rakefile +14 -2
- data/bin/htm_mcp.rb +621 -0
- data/config/database.yml +20 -13
- data/db/migrate/00010_add_soft_delete_to_associations.rb +29 -0
- data/db/migrate/00011_add_performance_indexes.rb +21 -0
- data/db/migrate/00012_add_tags_trigram_index.rb +18 -0
- data/db/migrate/00013_enable_lz4_compression.rb +43 -0
- data/db/schema.sql +49 -92
- data/docs/api/index.md +1 -1
- data/docs/api/yard/HTM.md +2 -4
- data/docs/architecture/index.md +1 -1
- data/docs/development/index.md +1 -1
- data/docs/getting-started/index.md +1 -1
- data/docs/guides/index.md +1 -1
- data/docs/images/telemetry-architecture.svg +153 -0
- data/docs/telemetry.md +391 -0
- data/examples/README.md +171 -1
- data/examples/cli_app/README.md +1 -1
- data/examples/cli_app/htm_cli.rb +1 -1
- data/examples/mcp_client.rb +529 -0
- data/examples/sinatra_app/app.rb +1 -1
- data/examples/telemetry/README.md +147 -0
- data/examples/telemetry/SETUP_README.md +169 -0
- data/examples/telemetry/demo.rb +498 -0
- data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
- data/lib/htm/configuration.rb +261 -70
- data/lib/htm/database.rb +46 -22
- data/lib/htm/embedding_service.rb +24 -14
- data/lib/htm/errors.rb +15 -1
- data/lib/htm/jobs/generate_embedding_job.rb +19 -0
- data/lib/htm/jobs/generate_propositions_job.rb +103 -0
- data/lib/htm/jobs/generate_tags_job.rb +24 -0
- data/lib/htm/loaders/markdown_chunker.rb +79 -0
- data/lib/htm/loaders/markdown_loader.rb +41 -15
- data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
- data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
- data/lib/htm/long_term_memory/node_operations.rb +209 -0
- data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
- data/lib/htm/long_term_memory/robot_operations.rb +34 -0
- data/lib/htm/long_term_memory/tag_operations.rb +428 -0
- data/lib/htm/long_term_memory/vector_search.rb +109 -0
- data/lib/htm/long_term_memory.rb +51 -1153
- data/lib/htm/models/node.rb +35 -2
- data/lib/htm/models/node_tag.rb +31 -0
- data/lib/htm/models/robot_node.rb +31 -0
- data/lib/htm/models/tag.rb +44 -0
- data/lib/htm/proposition_service.rb +169 -0
- data/lib/htm/query_cache.rb +214 -0
- data/lib/htm/sql_builder.rb +178 -0
- data/lib/htm/tag_service.rb +16 -6
- data/lib/htm/tasks.rb +8 -2
- data/lib/htm/telemetry.rb +224 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm.rb +64 -3
- data/lib/tasks/doc.rake +1 -1
- data/lib/tasks/htm.rake +259 -13
- data/mkdocs.yml +96 -96
- metadata +75 -18
- data/.aigcm_msg +0 -1
- data/.claude/settings.local.json +0 -92
- data/CLAUDE.md +0 -603
- data/examples/cli_app/temp.log +0 -93
- data/lib/htm/loaders/paragraph_chunker.rb +0 -112
- data/notes/ARCHITECTURE_REVIEW.md +0 -1167
- data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
- data/notes/next_steps.md +0 -100
- data/notes/plan.md +0 -627
- data/notes/tag_ontology_enhancement_ideas.md +0 -222
- data/notes/timescaledb_removal_summary.md +0 -200
data/lib/htm/configuration.rb
CHANGED
|
@@ -57,13 +57,35 @@ class HTM
|
|
|
57
57
|
# end
|
|
58
58
|
#
|
|
59
59
|
class Configuration
|
|
60
|
-
attr_accessor :embedding_generator, :tag_extractor, :token_counter
|
|
60
|
+
attr_accessor :embedding_generator, :tag_extractor, :proposition_extractor, :token_counter
|
|
61
61
|
attr_accessor :embedding_model, :embedding_provider, :embedding_dimensions
|
|
62
62
|
attr_accessor :tag_model, :tag_provider
|
|
63
|
-
attr_accessor :
|
|
63
|
+
attr_accessor :proposition_model, :proposition_provider, :extract_propositions
|
|
64
|
+
attr_accessor :embedding_timeout, :tag_timeout, :proposition_timeout, :connection_timeout
|
|
64
65
|
attr_accessor :logger
|
|
65
66
|
attr_accessor :job_backend
|
|
66
67
|
attr_accessor :week_start
|
|
68
|
+
attr_accessor :telemetry_enabled # Enable OpenTelemetry metrics (default: false)
|
|
69
|
+
|
|
70
|
+
# Limit configuration
|
|
71
|
+
attr_accessor :max_embedding_dimension # Max vector dimensions (default: 2000)
|
|
72
|
+
attr_accessor :max_tag_depth # Max tag hierarchy depth (default: 4)
|
|
73
|
+
|
|
74
|
+
# Chunking configuration (for file loading)
|
|
75
|
+
attr_accessor :chunk_size # Max characters per chunk (default: 1024)
|
|
76
|
+
attr_accessor :chunk_overlap # Character overlap between chunks (default: 64)
|
|
77
|
+
|
|
78
|
+
# Circuit breaker configuration
|
|
79
|
+
attr_accessor :circuit_breaker_failure_threshold # Failures before opening (default: 5)
|
|
80
|
+
attr_accessor :circuit_breaker_reset_timeout # Seconds before half-open (default: 60)
|
|
81
|
+
attr_accessor :circuit_breaker_half_open_max_calls # Successes to close (default: 3)
|
|
82
|
+
|
|
83
|
+
# Relevance scoring weights (must sum to 1.0)
|
|
84
|
+
attr_accessor :relevance_semantic_weight # Vector similarity weight (default: 0.5)
|
|
85
|
+
attr_accessor :relevance_tag_weight # Tag overlap weight (default: 0.3)
|
|
86
|
+
attr_accessor :relevance_recency_weight # Temporal freshness weight (default: 0.1)
|
|
87
|
+
attr_accessor :relevance_access_weight # Access frequency weight (default: 0.1)
|
|
88
|
+
attr_accessor :relevance_recency_half_life_hours # Decay half-life in hours (default: 168 = 1 week)
|
|
67
89
|
|
|
68
90
|
# Provider-specific API keys and endpoints
|
|
69
91
|
attr_accessor :openai_api_key, :openai_organization, :openai_project
|
|
@@ -97,48 +119,77 @@ class HTM
|
|
|
97
119
|
|
|
98
120
|
def initialize
|
|
99
121
|
# Default configuration - Ollama for local development
|
|
100
|
-
|
|
101
|
-
@
|
|
102
|
-
@
|
|
122
|
+
# All settings can be overridden via HTM_* environment variables
|
|
123
|
+
@embedding_provider = ENV.fetch('HTM_EMBEDDING_PROVIDER', 'ollama').to_sym
|
|
124
|
+
@embedding_model = ENV.fetch('HTM_EMBEDDING_MODEL', 'nomic-embed-text:latest')
|
|
125
|
+
@embedding_dimensions = ENV.fetch('HTM_EMBEDDING_DIMENSIONS', 768).to_i
|
|
126
|
+
|
|
127
|
+
@tag_provider = ENV.fetch('HTM_TAG_PROVIDER', 'ollama').to_sym
|
|
128
|
+
@tag_model = ENV.fetch('HTM_TAG_MODEL', 'gemma3:latest')
|
|
103
129
|
|
|
104
|
-
@
|
|
105
|
-
@
|
|
130
|
+
@proposition_provider = ENV.fetch('HTM_PROPOSITION_PROVIDER', 'ollama').to_sym
|
|
131
|
+
@proposition_model = ENV.fetch('HTM_PROPOSITION_MODEL', 'gemma3:latest')
|
|
132
|
+
@extract_propositions = ENV.fetch('HTM_EXTRACT_PROPOSITIONS', 'false').downcase == 'true'
|
|
106
133
|
|
|
107
134
|
# Provider credentials from environment variables
|
|
108
|
-
|
|
109
|
-
@
|
|
110
|
-
@
|
|
111
|
-
@
|
|
112
|
-
@
|
|
113
|
-
@
|
|
114
|
-
@
|
|
115
|
-
@
|
|
116
|
-
@
|
|
117
|
-
@
|
|
118
|
-
@
|
|
119
|
-
@
|
|
120
|
-
@
|
|
121
|
-
@
|
|
122
|
-
@
|
|
135
|
+
# These use standard provider env var names for compatibility
|
|
136
|
+
@openai_api_key = ENV.fetch('HTM_OPENAI_API_KEY', ENV['OPENAI_API_KEY'])
|
|
137
|
+
@openai_organization = ENV.fetch('HTM_OPENAI_ORGANIZATION', ENV['OPENAI_ORGANIZATION'])
|
|
138
|
+
@openai_project = ENV.fetch('HTM_OPENAI_PROJECT', ENV['OPENAI_PROJECT'])
|
|
139
|
+
@anthropic_api_key = ENV.fetch('HTM_ANTHROPIC_API_KEY', ENV['ANTHROPIC_API_KEY'])
|
|
140
|
+
@gemini_api_key = ENV.fetch('HTM_GEMINI_API_KEY', ENV['GEMINI_API_KEY'])
|
|
141
|
+
@azure_api_key = ENV.fetch('HTM_AZURE_API_KEY', ENV['AZURE_OPENAI_API_KEY'])
|
|
142
|
+
@azure_endpoint = ENV.fetch('HTM_AZURE_ENDPOINT', ENV['AZURE_OPENAI_ENDPOINT'])
|
|
143
|
+
@azure_api_version = ENV.fetch('HTM_AZURE_API_VERSION', ENV.fetch('AZURE_OPENAI_API_VERSION', '2024-02-01'))
|
|
144
|
+
@ollama_url = ENV.fetch('HTM_OLLAMA_URL', ENV['OLLAMA_API_BASE'] || ENV['OLLAMA_URL'] || 'http://localhost:11434')
|
|
145
|
+
@huggingface_api_key = ENV.fetch('HTM_HUGGINGFACE_API_KEY', ENV['HUGGINGFACE_API_KEY'])
|
|
146
|
+
@openrouter_api_key = ENV.fetch('HTM_OPENROUTER_API_KEY', ENV['OPENROUTER_API_KEY'])
|
|
147
|
+
@bedrock_access_key = ENV.fetch('HTM_BEDROCK_ACCESS_KEY', ENV['AWS_ACCESS_KEY_ID'])
|
|
148
|
+
@bedrock_secret_key = ENV.fetch('HTM_BEDROCK_SECRET_KEY', ENV['AWS_SECRET_ACCESS_KEY'])
|
|
149
|
+
@bedrock_region = ENV.fetch('HTM_BEDROCK_REGION', ENV.fetch('AWS_REGION', 'us-east-1'))
|
|
150
|
+
@deepseek_api_key = ENV.fetch('HTM_DEEPSEEK_API_KEY', ENV['DEEPSEEK_API_KEY'])
|
|
123
151
|
|
|
124
152
|
# Timeout settings (in seconds) - apply to all LLM providers
|
|
125
|
-
@embedding_timeout
|
|
126
|
-
@tag_timeout
|
|
127
|
-
@
|
|
153
|
+
@embedding_timeout = ENV.fetch('HTM_EMBEDDING_TIMEOUT', 120).to_i
|
|
154
|
+
@tag_timeout = ENV.fetch('HTM_TAG_TIMEOUT', 180).to_i
|
|
155
|
+
@proposition_timeout = ENV.fetch('HTM_PROPOSITION_TIMEOUT', 180).to_i
|
|
156
|
+
@connection_timeout = ENV.fetch('HTM_CONNECTION_TIMEOUT', 30).to_i
|
|
157
|
+
|
|
158
|
+
# Limit settings
|
|
159
|
+
@max_embedding_dimension = ENV.fetch('HTM_MAX_EMBEDDING_DIMENSION', 2000).to_i
|
|
160
|
+
@max_tag_depth = ENV.fetch('HTM_MAX_TAG_DEPTH', 4).to_i
|
|
161
|
+
|
|
162
|
+
# Chunking settings (for file loading)
|
|
163
|
+
@chunk_size = ENV.fetch('HTM_CHUNK_SIZE', 1024).to_i
|
|
164
|
+
@chunk_overlap = ENV.fetch('HTM_CHUNK_OVERLAP', 64).to_i
|
|
165
|
+
|
|
166
|
+
# Circuit breaker settings
|
|
167
|
+
@circuit_breaker_failure_threshold = ENV.fetch('HTM_CIRCUIT_BREAKER_FAILURE_THRESHOLD', 5).to_i
|
|
168
|
+
@circuit_breaker_reset_timeout = ENV.fetch('HTM_CIRCUIT_BREAKER_RESET_TIMEOUT', 60).to_i
|
|
169
|
+
@circuit_breaker_half_open_max_calls = ENV.fetch('HTM_CIRCUIT_BREAKER_HALF_OPEN_MAX_CALLS', 3).to_i
|
|
170
|
+
|
|
171
|
+
# Relevance scoring weights (should sum to 1.0)
|
|
172
|
+
@relevance_semantic_weight = ENV.fetch('HTM_RELEVANCE_SEMANTIC_WEIGHT', 0.5).to_f
|
|
173
|
+
@relevance_tag_weight = ENV.fetch('HTM_RELEVANCE_TAG_WEIGHT', 0.3).to_f
|
|
174
|
+
@relevance_recency_weight = ENV.fetch('HTM_RELEVANCE_RECENCY_WEIGHT', 0.1).to_f
|
|
175
|
+
@relevance_access_weight = ENV.fetch('HTM_RELEVANCE_ACCESS_WEIGHT', 0.1).to_f
|
|
176
|
+
@relevance_recency_half_life_hours = ENV.fetch('HTM_RELEVANCE_RECENCY_HALF_LIFE_HOURS', 168.0).to_f
|
|
128
177
|
|
|
129
178
|
# Default logger (STDOUT with INFO level)
|
|
130
|
-
@logger
|
|
179
|
+
@logger = default_logger
|
|
131
180
|
|
|
132
|
-
#
|
|
133
|
-
@job_backend
|
|
181
|
+
# Job backend: inline, thread, active_job, sidekiq (auto-detected if not set)
|
|
182
|
+
@job_backend = ENV['HTM_JOB_BACKEND'] ? ENV['HTM_JOB_BACKEND'].to_sym : detect_job_backend
|
|
134
183
|
|
|
135
|
-
# Timeframe parsing configuration
|
|
136
|
-
|
|
137
|
-
|
|
184
|
+
# Timeframe parsing configuration: sunday or monday
|
|
185
|
+
@week_start = ENV.fetch('HTM_WEEK_START', 'sunday').to_sym
|
|
186
|
+
|
|
187
|
+
# Telemetry (OpenTelemetry metrics)
|
|
188
|
+
@telemetry_enabled = ENV.fetch('HTM_TELEMETRY_ENABLED', 'false').downcase == 'true'
|
|
138
189
|
|
|
139
190
|
# Thread-safe Ollama model refresh tracking
|
|
140
|
-
@ollama_models_refreshed
|
|
141
|
-
@ollama_refresh_mutex
|
|
191
|
+
@ollama_models_refreshed = false
|
|
192
|
+
@ollama_refresh_mutex = Mutex.new
|
|
142
193
|
|
|
143
194
|
# Set default implementations
|
|
144
195
|
reset_to_defaults
|
|
@@ -148,6 +199,7 @@ class HTM
|
|
|
148
199
|
def reset_to_defaults
|
|
149
200
|
@embedding_generator = default_embedding_generator
|
|
150
201
|
@tag_extractor = default_tag_extractor
|
|
202
|
+
@proposition_extractor = default_proposition_extractor
|
|
151
203
|
@token_counter = default_token_counter
|
|
152
204
|
end
|
|
153
205
|
|
|
@@ -161,6 +213,10 @@ class HTM
|
|
|
161
213
|
raise HTM::ValidationError, "tag_extractor must be callable (proc, lambda, or object responding to :call)"
|
|
162
214
|
end
|
|
163
215
|
|
|
216
|
+
unless @proposition_extractor.respond_to?(:call)
|
|
217
|
+
raise HTM::ValidationError, "proposition_extractor must be callable (proc, lambda, or object responding to :call)"
|
|
218
|
+
end
|
|
219
|
+
|
|
164
220
|
unless @token_counter.respond_to?(:call)
|
|
165
221
|
raise HTM::ValidationError, "token_counter must be callable (proc, lambda, or object responding to :call)"
|
|
166
222
|
end
|
|
@@ -185,6 +241,10 @@ class HTM
|
|
|
185
241
|
if @tag_provider && !SUPPORTED_PROVIDERS.include?(@tag_provider)
|
|
186
242
|
raise HTM::ValidationError, "tag_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@tag_provider.inspect})"
|
|
187
243
|
end
|
|
244
|
+
|
|
245
|
+
if @proposition_provider && !SUPPORTED_PROVIDERS.include?(@proposition_provider)
|
|
246
|
+
raise HTM::ValidationError, "proposition_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@proposition_provider.inspect})"
|
|
247
|
+
end
|
|
188
248
|
end
|
|
189
249
|
|
|
190
250
|
# Normalize Ollama model name to include tag if missing
|
|
@@ -207,7 +267,9 @@ class HTM
|
|
|
207
267
|
# @param provider [Symbol] The provider to configure (:openai, :anthropic, etc.)
|
|
208
268
|
#
|
|
209
269
|
def configure_ruby_llm(provider = nil)
|
|
210
|
-
require
|
|
270
|
+
# Always require ruby_llm to ensure full module is loaded
|
|
271
|
+
# (require is idempotent, and defined?(RubyLLM) can be true before configure method exists)
|
|
272
|
+
require 'ruby_llm'
|
|
211
273
|
|
|
212
274
|
provider ||= @embedding_provider
|
|
213
275
|
|
|
@@ -400,57 +462,68 @@ class HTM
|
|
|
400
462
|
model = @tag_provider == :ollama ? normalize_ollama_model(@tag_model) : @tag_model
|
|
401
463
|
|
|
402
464
|
# Build prompt
|
|
403
|
-
|
|
465
|
+
taxonomy_context = if existing_ontology.any?
|
|
404
466
|
sample_tags = existing_ontology.sample([existing_ontology.size, 20].min)
|
|
405
|
-
"Existing
|
|
467
|
+
"Existing taxonomy paths: #{sample_tags.join(', ')}\n\nPrefer reusing these paths when the text matches their domain."
|
|
406
468
|
else
|
|
407
|
-
"This is a new
|
|
469
|
+
"This is a new taxonomy - establish clear root categories."
|
|
408
470
|
end
|
|
409
471
|
|
|
410
472
|
prompt = <<~PROMPT
|
|
411
|
-
Extract
|
|
473
|
+
Extract classification tags for this text using a HIERARCHICAL TAXONOMY.
|
|
412
474
|
|
|
413
|
-
|
|
414
|
-
Format: root:level1:level2:level3 (use colons to separate levels)
|
|
475
|
+
A hierarchical taxonomy is a tree where each concept has exactly ONE parent path:
|
|
415
476
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
477
|
+
domain
|
|
478
|
+
├── category
|
|
479
|
+
│ ├── subcategory
|
|
480
|
+
│ │ └── specific-term
|
|
481
|
+
│ └── subcategory
|
|
482
|
+
└── category
|
|
483
|
+
|
|
484
|
+
#{taxonomy_context}
|
|
485
|
+
|
|
486
|
+
TAG FORMAT: domain:category:subcategory:term (colon-separated, max 4 levels)
|
|
487
|
+
|
|
488
|
+
LEVEL GUIDELINES:
|
|
489
|
+
- Level 1 (domain): Broad field (database, ai, web, security, devops)
|
|
490
|
+
- Level 2 (category): Major subdivision (database:relational, ai:machine-learning)
|
|
491
|
+
- Level 3 (subcategory): Specific area (database:relational:postgresql)
|
|
492
|
+
- Level 4 (term): Fine detail, use sparingly (database:relational:postgresql:extensions)
|
|
493
|
+
|
|
494
|
+
RULES:
|
|
495
|
+
1. Each concept belongs to ONE path only (no duplicates across branches)
|
|
496
|
+
2. Use lowercase, hyphens for multi-word terms (natural-language-processing)
|
|
497
|
+
3. Return 2-5 tags that best classify this text
|
|
498
|
+
4. Match existing taxonomy paths when applicable
|
|
499
|
+
5. More general tags are often better than overly specific ones
|
|
500
|
+
|
|
501
|
+
GOOD EXAMPLES:
|
|
502
|
+
- database:postgresql
|
|
503
|
+
- ai:machine-learning:embeddings
|
|
504
|
+
- web:api:rest
|
|
505
|
+
- programming:ruby:gems
|
|
506
|
+
|
|
507
|
+
BAD EXAMPLES:
|
|
508
|
+
- postgresql (missing domain - where does it belong?)
|
|
509
|
+
- database:postgresql AND data:storage:postgresql (duplicate concept)
|
|
510
|
+
- ai:ml:nlp:transformers:bert:embeddings (too deep)
|
|
437
511
|
|
|
438
512
|
TEXT: #{text}
|
|
439
513
|
|
|
440
|
-
Return ONLY
|
|
514
|
+
Return ONLY tags, one per line.
|
|
441
515
|
PROMPT
|
|
442
516
|
|
|
443
517
|
system_prompt = <<~SYSTEM.strip
|
|
444
|
-
You are a
|
|
518
|
+
You are a taxonomy classifier that assigns texts to a hierarchical classification tree.
|
|
445
519
|
|
|
446
|
-
|
|
447
|
-
1. Extract hierarchical tags in format: root:subtopic:detail
|
|
448
|
-
2. Maintain consistency with existing ontology (no duplicates)
|
|
449
|
-
3. Prevent circular references and self-containing concepts
|
|
450
|
-
4. Keep hierarchies at consistent depth levels
|
|
451
|
-
5. Choose PRIMARY locations for concepts (no multi-parent confusion)
|
|
520
|
+
Core principle: Each concept has ONE canonical location in the tree. If "postgresql" exists under "database", never create it elsewhere.
|
|
452
521
|
|
|
453
|
-
|
|
522
|
+
Your task:
|
|
523
|
+
1. Identify the domains/topics present in the text
|
|
524
|
+
2. Build paths from general (root) to specific (leaf)
|
|
525
|
+
3. Reuse existing taxonomy branches when they fit
|
|
526
|
+
4. Output 2-5 classification paths, one per line
|
|
454
527
|
SYSTEM
|
|
455
528
|
|
|
456
529
|
# Use RubyLLM chat for tag extraction
|
|
@@ -474,6 +547,115 @@ class HTM
|
|
|
474
547
|
end
|
|
475
548
|
end
|
|
476
549
|
|
|
550
|
+
# Default proposition extractor using RubyLLM chat
|
|
551
|
+
#
|
|
552
|
+
# @return [Proc] Callable that takes text and returns array of propositions
|
|
553
|
+
#
|
|
554
|
+
def default_proposition_extractor
|
|
555
|
+
lambda do |text|
|
|
556
|
+
require 'ruby_llm' unless defined?(RubyLLM)
|
|
557
|
+
|
|
558
|
+
# Configure RubyLLM for the proposition provider
|
|
559
|
+
configure_ruby_llm(@proposition_provider)
|
|
560
|
+
|
|
561
|
+
# Refresh models for Ollama to discover local models (thread-safe)
|
|
562
|
+
if @proposition_provider == :ollama
|
|
563
|
+
@ollama_refresh_mutex.synchronize do
|
|
564
|
+
unless @ollama_models_refreshed
|
|
565
|
+
RubyLLM.models.refresh!
|
|
566
|
+
@ollama_models_refreshed = true
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
# Normalize Ollama model name (ensure it has a tag like :latest)
|
|
572
|
+
model = @proposition_provider == :ollama ? normalize_ollama_model(@proposition_model) : @proposition_model
|
|
573
|
+
|
|
574
|
+
# Build prompt
|
|
575
|
+
prompt = <<~PROMPT
|
|
576
|
+
Extract all ATOMIC factual propositions from the following text.
|
|
577
|
+
|
|
578
|
+
An atomic proposition expresses exactly ONE relationship or fact. If a statement combines multiple pieces of information (what, where, when, who, why), split it into separate propositions.
|
|
579
|
+
|
|
580
|
+
CRITICAL: Each proposition must contain only ONE of these:
|
|
581
|
+
- ONE subject-verb relationship
|
|
582
|
+
- ONE attribute or property
|
|
583
|
+
- ONE location, time, or qualifier
|
|
584
|
+
|
|
585
|
+
Example input: "Todd Warren plans to pursue a PhD in Music at the University of Texas."
|
|
586
|
+
|
|
587
|
+
CORRECT atomic output:
|
|
588
|
+
- Todd Warren plans to pursue a PhD.
|
|
589
|
+
- Todd Warren plans to study Music.
|
|
590
|
+
- Todd Warren plans to attend the University of Texas.
|
|
591
|
+
- The University of Texas offers a PhD program in Music.
|
|
592
|
+
|
|
593
|
+
WRONG (not atomic - combines multiple facts):
|
|
594
|
+
- Todd Warren plans to pursue a PhD in Music at the University of Texas.
|
|
595
|
+
|
|
596
|
+
Example input: "In 1969, Neil Armstrong became the first person to walk on the Moon during the Apollo 11 mission."
|
|
597
|
+
|
|
598
|
+
CORRECT atomic output:
|
|
599
|
+
- Neil Armstrong was an astronaut.
|
|
600
|
+
- Neil Armstrong walked on the Moon.
|
|
601
|
+
- Neil Armstrong walked on the Moon in 1969.
|
|
602
|
+
- Neil Armstrong was the first person to walk on the Moon.
|
|
603
|
+
- The Apollo 11 mission occurred in 1969.
|
|
604
|
+
- Neil Armstrong participated in the Apollo 11 mission.
|
|
605
|
+
|
|
606
|
+
Rules:
|
|
607
|
+
1. Split compound statements into separate atomic facts
|
|
608
|
+
2. Each proposition = exactly one fact
|
|
609
|
+
3. Use full names, never pronouns
|
|
610
|
+
4. Make each proposition understandable in isolation
|
|
611
|
+
5. Prefer more propositions over fewer
|
|
612
|
+
|
|
613
|
+
TEXT: #{text}
|
|
614
|
+
|
|
615
|
+
Return ONLY atomic propositions, one per line. Use a dash (-) prefix for each.
|
|
616
|
+
PROMPT
|
|
617
|
+
|
|
618
|
+
system_prompt = <<~SYSTEM.strip
|
|
619
|
+
You are an atomic fact extraction system. Your goal is maximum decomposition.
|
|
620
|
+
|
|
621
|
+
IMPORTANT: Break every statement into its smallest possible factual units.
|
|
622
|
+
|
|
623
|
+
A statement like "John bought a red car in Paris" contains FOUR facts:
|
|
624
|
+
- John bought a car.
|
|
625
|
+
- The car John bought is red.
|
|
626
|
+
- John made a purchase in Paris.
|
|
627
|
+
- John bought a car in Paris.
|
|
628
|
+
|
|
629
|
+
Always ask: "Can this be split further?" If yes, split it.
|
|
630
|
+
|
|
631
|
+
Rules:
|
|
632
|
+
1. ONE fact per proposition (subject-predicate or subject-attribute)
|
|
633
|
+
2. Never combine location + action + time in one proposition
|
|
634
|
+
3. Never combine multiple attributes in one proposition
|
|
635
|
+
4. Use full names, never pronouns
|
|
636
|
+
5. Each proposition must stand alone without context
|
|
637
|
+
|
|
638
|
+
Output ONLY propositions, one per line, prefixed with a dash (-).
|
|
639
|
+
SYSTEM
|
|
640
|
+
|
|
641
|
+
# Use RubyLLM chat for proposition extraction
|
|
642
|
+
chat = RubyLLM.chat(model: model)
|
|
643
|
+
chat.with_instructions(system_prompt)
|
|
644
|
+
response = chat.ask(prompt)
|
|
645
|
+
|
|
646
|
+
# Extract text from response
|
|
647
|
+
response_text = extract_text_from_response(response)
|
|
648
|
+
|
|
649
|
+
# Parse propositions (remove dash prefix, filter empty lines)
|
|
650
|
+
response_text.to_s
|
|
651
|
+
.split("\n")
|
|
652
|
+
.map(&:strip)
|
|
653
|
+
.map { |line| line.sub(/^[-*•]\s*/, '') }
|
|
654
|
+
.map(&:strip)
|
|
655
|
+
.reject(&:empty?)
|
|
656
|
+
end
|
|
657
|
+
end
|
|
658
|
+
|
|
477
659
|
# Extract text content from RubyLLM chat response
|
|
478
660
|
#
|
|
479
661
|
# @param response [Object] RubyLLM chat response
|
|
@@ -553,6 +735,15 @@ class HTM
|
|
|
553
735
|
HTM::TagService.extract(text, existing_ontology: existing_ontology)
|
|
554
736
|
end
|
|
555
737
|
|
|
738
|
+
# Extract propositions using PropositionService
|
|
739
|
+
#
|
|
740
|
+
# @param text [String] Text to analyze
|
|
741
|
+
# @return [Array<String>] Extracted atomic propositions
|
|
742
|
+
#
|
|
743
|
+
def extract_propositions(text)
|
|
744
|
+
HTM::PropositionService.extract(text)
|
|
745
|
+
end
|
|
746
|
+
|
|
556
747
|
# Count tokens using configured counter
|
|
557
748
|
#
|
|
558
749
|
# @param text [String] Text to count tokens for
|
data/lib/htm/database.rb
CHANGED
|
@@ -105,15 +105,19 @@ class HTM
|
|
|
105
105
|
puts "=" * 100
|
|
106
106
|
end
|
|
107
107
|
|
|
108
|
-
# Drop all HTM tables
|
|
108
|
+
# Drop all HTM tables (respects RAILS_ENV)
|
|
109
109
|
#
|
|
110
|
-
# @param db_url [String] Database connection URL (uses
|
|
110
|
+
# @param db_url [String] Database connection URL (uses default_config if not provided)
|
|
111
111
|
# @return [void]
|
|
112
112
|
#
|
|
113
113
|
def drop(db_url = nil)
|
|
114
|
-
config = parse_connection_url(db_url
|
|
114
|
+
config = db_url ? parse_connection_url(db_url) : default_config
|
|
115
115
|
raise "Database configuration not found" unless config
|
|
116
116
|
|
|
117
|
+
env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
|
118
|
+
puts "Environment: #{env}"
|
|
119
|
+
puts "Database: #{config[:dbname]}"
|
|
120
|
+
|
|
117
121
|
conn = PG.connect(config)
|
|
118
122
|
|
|
119
123
|
tables = ['nodes', 'node_tags', 'tags', 'robots', 'robot_nodes', 'file_sources', 'schema_migrations']
|
|
@@ -171,15 +175,15 @@ class HTM
|
|
|
171
175
|
load seeds_file
|
|
172
176
|
end
|
|
173
177
|
|
|
174
|
-
# Dump current database schema to db/schema.sql
|
|
178
|
+
# Dump current database schema to db/schema.sql (respects RAILS_ENV)
|
|
175
179
|
#
|
|
176
180
|
# Uses pg_dump to create a clean SQL schema file without data
|
|
177
181
|
#
|
|
178
|
-
# @param db_url [String] Database connection URL (uses
|
|
182
|
+
# @param db_url [String] Database connection URL (uses default_config if not provided)
|
|
179
183
|
# @return [void]
|
|
180
184
|
#
|
|
181
185
|
def dump_schema(db_url = nil)
|
|
182
|
-
config = parse_connection_url(db_url
|
|
186
|
+
config = db_url ? parse_connection_url(db_url) : default_config
|
|
183
187
|
raise "Database configuration not found" unless config
|
|
184
188
|
|
|
185
189
|
schema_file = File.expand_path('../../db/schema.sql', __dir__)
|
|
@@ -231,15 +235,15 @@ class HTM
|
|
|
231
235
|
puts " Size: #{File.size(schema_file)} bytes"
|
|
232
236
|
end
|
|
233
237
|
|
|
234
|
-
# Load schema from db/schema.sql
|
|
238
|
+
# Load schema from db/schema.sql (respects RAILS_ENV)
|
|
235
239
|
#
|
|
236
240
|
# Uses psql to load the schema file
|
|
237
241
|
#
|
|
238
|
-
# @param db_url [String] Database connection URL (uses
|
|
242
|
+
# @param db_url [String] Database connection URL (uses default_config if not provided)
|
|
239
243
|
# @return [void]
|
|
240
244
|
#
|
|
241
245
|
def load_schema(db_url = nil)
|
|
242
|
-
config = parse_connection_url(db_url
|
|
246
|
+
config = db_url ? parse_connection_url(db_url) : default_config
|
|
243
247
|
raise "Database configuration not found" unless config
|
|
244
248
|
|
|
245
249
|
schema_file = File.expand_path('../../db/schema.sql', __dir__)
|
|
@@ -356,22 +360,25 @@ class HTM
|
|
|
356
360
|
puts " open #{doc_path}/README.md"
|
|
357
361
|
end
|
|
358
362
|
|
|
359
|
-
# Show database info
|
|
363
|
+
# Show database info (respects RAILS_ENV)
|
|
360
364
|
#
|
|
361
|
-
# @param db_url [String] Database connection URL (uses
|
|
365
|
+
# @param db_url [String] Database connection URL (uses default_config if not provided)
|
|
362
366
|
# @return [void]
|
|
363
367
|
#
|
|
364
368
|
def info(db_url = nil)
|
|
365
|
-
config = parse_connection_url(db_url
|
|
369
|
+
config = db_url ? parse_connection_url(db_url) : default_config
|
|
366
370
|
raise "Database configuration not found" unless config
|
|
367
371
|
|
|
372
|
+
env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
|
373
|
+
|
|
368
374
|
conn = PG.connect(config)
|
|
369
375
|
|
|
370
|
-
puts "\nHTM Database Information"
|
|
376
|
+
puts "\nHTM Database Information (#{env})"
|
|
371
377
|
puts "=" * 80
|
|
372
378
|
|
|
373
379
|
# Connection info
|
|
374
380
|
puts "\nConnection:"
|
|
381
|
+
puts " Environment: #{env}"
|
|
375
382
|
puts " Host: #{config[:host]}"
|
|
376
383
|
puts " Port: #{config[:port]}"
|
|
377
384
|
puts " Database: #{config[:dbname]}"
|
|
@@ -468,18 +475,35 @@ class HTM
|
|
|
468
475
|
}
|
|
469
476
|
end
|
|
470
477
|
|
|
471
|
-
# Get default database configuration
|
|
478
|
+
# Get default database configuration (respects RAILS_ENV)
|
|
472
479
|
#
|
|
473
|
-
#
|
|
480
|
+
# Uses ActiveRecordConfig which reads from config/database.yml
|
|
481
|
+
# and respects RAILS_ENV for environment-specific database selection.
|
|
482
|
+
#
|
|
483
|
+
# @return [Hash, nil] Connection configuration hash with PG-style keys
|
|
474
484
|
#
|
|
475
485
|
def default_config
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
486
|
+
require_relative 'active_record_config'
|
|
487
|
+
|
|
488
|
+
begin
|
|
489
|
+
ar_config = HTM::ActiveRecordConfig.load_database_config
|
|
490
|
+
|
|
491
|
+
# Convert ActiveRecord config keys to PG-style keys
|
|
492
|
+
{
|
|
493
|
+
host: ar_config[:host],
|
|
494
|
+
port: ar_config[:port],
|
|
495
|
+
dbname: ar_config[:database],
|
|
496
|
+
user: ar_config[:username],
|
|
497
|
+
password: ar_config[:password],
|
|
498
|
+
sslmode: ar_config[:sslmode] || 'prefer'
|
|
499
|
+
}
|
|
500
|
+
rescue StandardError
|
|
501
|
+
# Fallback to legacy behavior if ActiveRecordConfig fails
|
|
502
|
+
if ENV['HTM_DBURL']
|
|
503
|
+
parse_connection_url(ENV['HTM_DBURL'])
|
|
504
|
+
elsif ENV['HTM_DBNAME']
|
|
505
|
+
parse_connection_params
|
|
506
|
+
end
|
|
483
507
|
end
|
|
484
508
|
end
|
|
485
509
|
|
|
@@ -15,23 +15,31 @@ class HTM
|
|
|
15
15
|
# The actual LLM call is delegated to HTM.configuration.embedding_generator
|
|
16
16
|
#
|
|
17
17
|
class EmbeddingService
|
|
18
|
-
MAX_DIMENSION = 2000 # Maximum dimension for pgvector HNSW index
|
|
19
|
-
|
|
20
18
|
# Circuit breaker for embedding API calls
|
|
21
19
|
@circuit_breaker = nil
|
|
22
20
|
@circuit_breaker_mutex = Mutex.new
|
|
23
21
|
|
|
24
22
|
class << self
|
|
23
|
+
# Maximum embedding dimension (configurable, default 2000)
|
|
24
|
+
#
|
|
25
|
+
# @return [Integer] Max dimensions for pgvector HNSW index
|
|
26
|
+
#
|
|
27
|
+
def max_dimension
|
|
28
|
+
HTM.configuration.max_embedding_dimension
|
|
29
|
+
end
|
|
30
|
+
|
|
25
31
|
# Get or create the circuit breaker for embedding service
|
|
26
32
|
#
|
|
27
33
|
# @return [HTM::CircuitBreaker] The circuit breaker instance
|
|
28
34
|
#
|
|
29
35
|
def circuit_breaker
|
|
36
|
+
config = HTM.configuration
|
|
30
37
|
@circuit_breaker_mutex.synchronize do
|
|
31
38
|
@circuit_breaker ||= HTM::CircuitBreaker.new(
|
|
32
39
|
name: 'embedding_service',
|
|
33
|
-
failure_threshold:
|
|
34
|
-
reset_timeout:
|
|
40
|
+
failure_threshold: config.circuit_breaker_failure_threshold,
|
|
41
|
+
reset_timeout: config.circuit_breaker_reset_timeout,
|
|
42
|
+
half_open_max_calls: config.circuit_breaker_half_open_max_calls
|
|
35
43
|
)
|
|
36
44
|
end
|
|
37
45
|
end
|
|
@@ -74,25 +82,26 @@ class HTM
|
|
|
74
82
|
actual_dimension = raw_embedding.length
|
|
75
83
|
|
|
76
84
|
# Check dimension limit
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
85
|
+
max_dim = max_dimension
|
|
86
|
+
if actual_dimension > max_dim
|
|
87
|
+
HTM.logger.warn "EmbeddingService: Embedding dimension #{actual_dimension} exceeds max #{max_dim}, truncating"
|
|
88
|
+
raw_embedding = raw_embedding[0...max_dim]
|
|
89
|
+
actual_dimension = max_dim
|
|
81
90
|
end
|
|
82
91
|
|
|
83
|
-
# Pad to
|
|
92
|
+
# Pad to max dimensions for consistent storage
|
|
84
93
|
storage_embedding = pad_embedding(raw_embedding)
|
|
85
94
|
|
|
86
95
|
# Format for database storage
|
|
87
96
|
storage_string = format_for_storage(storage_embedding)
|
|
88
97
|
|
|
89
|
-
HTM.logger.debug "EmbeddingService: Generated #{actual_dimension}D embedding (padded to #{
|
|
98
|
+
HTM.logger.debug "EmbeddingService: Generated #{actual_dimension}D embedding (padded to #{max_dim})"
|
|
90
99
|
|
|
91
100
|
{
|
|
92
101
|
embedding: raw_embedding,
|
|
93
102
|
dimension: actual_dimension,
|
|
94
103
|
storage_embedding: storage_string,
|
|
95
|
-
storage_dimension:
|
|
104
|
+
storage_dimension: max_dim
|
|
96
105
|
}
|
|
97
106
|
|
|
98
107
|
rescue HTM::CircuitBreakerOpenError
|
|
@@ -129,15 +138,16 @@ class HTM
|
|
|
129
138
|
end
|
|
130
139
|
end
|
|
131
140
|
|
|
132
|
-
# Pad embedding to
|
|
141
|
+
# Pad embedding to max_dimension with zeros
|
|
133
142
|
#
|
|
134
143
|
# @param embedding [Array<Float>] Original embedding
|
|
135
144
|
# @return [Array<Float>] Padded embedding
|
|
136
145
|
#
|
|
137
146
|
def self.pad_embedding(embedding)
|
|
138
|
-
|
|
147
|
+
max_dim = max_dimension
|
|
148
|
+
return embedding if embedding.length >= max_dim
|
|
139
149
|
|
|
140
|
-
embedding + Array.new(
|
|
150
|
+
embedding + Array.new(max_dim - embedding.length, 0.0)
|
|
141
151
|
end
|
|
142
152
|
|
|
143
153
|
# Format embedding for database storage
|