htm 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +1 -1
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +4 -4
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +1 -1
- data/.envrc +12 -25
- data/.irbrc +7 -7
- data/.tbls.yml +2 -2
- data/CHANGELOG.md +71 -0
- data/README.md +1 -1
- data/Rakefile +8 -3
- data/SETUP.md +12 -12
- data/bin/htm_mcp +0 -4
- data/db/seed_data/README.md +2 -2
- data/db/seeds.rb +2 -2
- data/docs/api/database.md +37 -37
- data/docs/api/htm.md +1 -1
- data/docs/api/yard/HTM/ActiveRecordConfig.md +2 -2
- data/docs/api/yard/HTM/Configuration.md +26 -15
- data/docs/api/yard/HTM/Database.md +7 -8
- data/docs/api/yard/HTM/JobAdapter.md +1 -1
- data/docs/api/yard/HTM/Railtie.md +2 -2
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
- data/docs/architecture/adrs/011-pgai-integration.md +4 -4
- data/docs/database_rake_tasks.md +5 -5
- data/docs/development/rake-tasks.md +11 -11
- data/docs/development/setup.md +21 -21
- data/docs/development/testing.md +1 -1
- data/docs/getting-started/installation.md +20 -20
- data/docs/getting-started/quick-start.md +12 -12
- data/docs/guides/getting-started.md +2 -2
- data/docs/guides/long-term-memory.md +1 -1
- data/docs/guides/mcp-server.md +17 -17
- data/docs/guides/robot-groups.md +8 -8
- data/docs/index.md +4 -4
- data/docs/multi_framework_support.md +8 -8
- data/docs/setup_local_database.md +19 -19
- data/docs/using_rake_tasks_in_your_app.md +14 -14
- data/examples/README.md +50 -6
- data/examples/basic_usage.rb +31 -21
- data/examples/cli_app/README.md +8 -8
- data/examples/cli_app/htm_cli.rb +5 -5
- data/examples/config_file_example/README.md +256 -0
- data/examples/config_file_example/config/htm.local.yml +34 -0
- data/examples/config_file_example/custom_config.yml +22 -0
- data/examples/config_file_example/show_config.rb +125 -0
- data/examples/custom_llm_configuration.rb +7 -7
- data/examples/example_app/Rakefile +2 -2
- data/examples/example_app/app.rb +8 -8
- data/examples/file_loader_usage.rb +9 -9
- data/examples/mcp_client.rb +5 -5
- data/examples/rails_app/Gemfile.lock +48 -56
- data/examples/rails_app/README.md +1 -1
- data/examples/robot_groups/multi_process.rb +5 -5
- data/examples/robot_groups/robot_worker.rb +5 -5
- data/examples/robot_groups/same_process.rb +9 -9
- data/examples/sinatra_app/app.rb +1 -1
- data/examples/timeframe_demo.rb +1 -1
- data/lib/htm/active_record_config.rb +12 -25
- data/lib/htm/circuit_breaker.rb +0 -2
- data/lib/htm/config/defaults.yml +246 -0
- data/lib/htm/config.rb +888 -0
- data/lib/htm/database.rb +23 -27
- data/lib/htm/embedding_service.rb +0 -4
- data/lib/htm/integrations/sinatra.rb +3 -7
- data/lib/htm/job_adapter.rb +1 -15
- data/lib/htm/jobs/generate_embedding_job.rb +1 -7
- data/lib/htm/jobs/generate_propositions_job.rb +2 -12
- data/lib/htm/jobs/generate_tags_job.rb +1 -8
- data/lib/htm/loaders/defaults_loader.rb +143 -0
- data/lib/htm/loaders/xdg_config_loader.rb +116 -0
- data/lib/htm/mcp/cli.rb +200 -58
- data/lib/htm/mcp/server.rb +3 -3
- data/lib/htm/proposition_service.rb +2 -12
- data/lib/htm/railtie.rb +3 -4
- data/lib/htm/tag_service.rb +1 -8
- data/lib/htm/version.rb +1 -1
- data/lib/htm.rb +124 -5
- metadata +24 -4
- data/config/database.yml +0 -77
- data/lib/htm/configuration.rb +0 -799
data/lib/htm/mcp/cli.rb
CHANGED
|
@@ -8,7 +8,7 @@ class HTM
|
|
|
8
8
|
|
|
9
9
|
def print_help
|
|
10
10
|
puts <<~HELP
|
|
11
|
-
HTM MCP Server - Memory management for AI assistants
|
|
11
|
+
HTM MCP Server v#{HTM::VERSION} - Memory management for AI assistants
|
|
12
12
|
|
|
13
13
|
USAGE:
|
|
14
14
|
htm_mcp [COMMAND]
|
|
@@ -20,66 +20,81 @@ class HTM
|
|
|
20
20
|
init Alias for setup
|
|
21
21
|
verify Verify database connection and extensions
|
|
22
22
|
stats Show memory statistics
|
|
23
|
+
config Output default configuration to STDOUT
|
|
23
24
|
version Show HTM version
|
|
24
25
|
help Show this help message
|
|
25
26
|
|
|
26
27
|
ENVIRONMENT VARIABLES:
|
|
27
28
|
|
|
29
|
+
Note: Nested config uses double underscores (e.g., HTM_EMBEDDING__PROVIDER)
|
|
30
|
+
|
|
28
31
|
Environment:
|
|
29
32
|
HTM_ENV Environment name: development, test, production
|
|
30
33
|
(priority: HTM_ENV > RAILS_ENV > RACK_ENV > 'development')
|
|
31
34
|
|
|
32
|
-
Database
|
|
33
|
-
|
|
35
|
+
Database:
|
|
36
|
+
HTM_DATABASE__URL PostgreSQL connection URL (preferred)
|
|
34
37
|
Example: postgresql://user:pass@localhost:5432/htm_development
|
|
35
|
-
|
|
36
|
-
Database (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
HTM_AZURE_API_KEY Azure OpenAI API key
|
|
62
|
-
HTM_AZURE_ENDPOINT Azure OpenAI endpoint
|
|
63
|
-
|
|
64
|
-
Timeouts:
|
|
65
|
-
HTM_EMBEDDING_TIMEOUT Embedding timeout seconds (default: 120)
|
|
66
|
-
HTM_TAG_TIMEOUT Tag timeout seconds (default: 180)
|
|
67
|
-
HTM_CONNECTION_TIMEOUT Connection timeout seconds (default: 30)
|
|
38
|
+
HTM_DATABASE__HOST Database host (default: localhost)
|
|
39
|
+
HTM_DATABASE__PORT Database port (default: 5432)
|
|
40
|
+
HTM_DATABASE__NAME Database name
|
|
41
|
+
HTM_DATABASE__USER Database username
|
|
42
|
+
HTM_DATABASE__PASSWORD Database password
|
|
43
|
+
HTM_DATABASE__SSLMODE SSL mode (default: prefer)
|
|
44
|
+
HTM_DATABASE__POOL_SIZE Connection pool size (default: 10)
|
|
45
|
+
|
|
46
|
+
Embedding:
|
|
47
|
+
HTM_EMBEDDING__PROVIDER Provider (default: ollama)
|
|
48
|
+
HTM_EMBEDDING__MODEL Model (default: nomic-embed-text:latest)
|
|
49
|
+
HTM_EMBEDDING__DIMENSIONS Dimensions (default: 768)
|
|
50
|
+
HTM_EMBEDDING__TIMEOUT Timeout seconds (default: 120)
|
|
51
|
+
HTM_EMBEDDING__MAX_DIMENSION Max dimensions (default: 2000)
|
|
52
|
+
|
|
53
|
+
Tag Extraction:
|
|
54
|
+
HTM_TAG__PROVIDER Provider (default: ollama)
|
|
55
|
+
HTM_TAG__MODEL Model (default: gemma3:latest)
|
|
56
|
+
HTM_TAG__TIMEOUT Timeout seconds (default: 180)
|
|
57
|
+
HTM_TAG__MAX_DEPTH Max hierarchy depth (default: 4)
|
|
58
|
+
|
|
59
|
+
Proposition Extraction:
|
|
60
|
+
HTM_PROPOSITION__PROVIDER Provider (default: ollama)
|
|
61
|
+
HTM_PROPOSITION__MODEL Model (default: gemma3:latest)
|
|
62
|
+
HTM_PROPOSITION__TIMEOUT Timeout seconds (default: 180)
|
|
63
|
+
HTM_PROPOSITION__ENABLED Enable extraction (default: false)
|
|
68
64
|
|
|
69
65
|
Chunking:
|
|
70
|
-
|
|
71
|
-
|
|
66
|
+
HTM_CHUNKING__SIZE Max chars per chunk (default: 1024)
|
|
67
|
+
HTM_CHUNKING__OVERLAP Chunk overlap chars (default: 64)
|
|
68
|
+
|
|
69
|
+
Job Backend:
|
|
70
|
+
HTM_JOB__BACKEND Backend: inline, thread, active_job, sidekiq
|
|
71
|
+
|
|
72
|
+
Provider API Keys:
|
|
73
|
+
HTM_PROVIDERS__OLLAMA__URL Ollama URL (default: http://localhost:11434)
|
|
74
|
+
HTM_PROVIDERS__OPENAI__API_KEY OpenAI API key
|
|
75
|
+
HTM_PROVIDERS__ANTHROPIC__API_KEY Anthropic API key
|
|
76
|
+
HTM_PROVIDERS__GEMINI__API_KEY Google Gemini API key
|
|
77
|
+
HTM_PROVIDERS__AZURE__API_KEY Azure OpenAI API key
|
|
78
|
+
HTM_PROVIDERS__AZURE__ENDPOINT Azure OpenAI endpoint
|
|
72
79
|
|
|
73
80
|
Other:
|
|
74
|
-
HTM_LOG_LEVEL Log level (default:
|
|
75
|
-
|
|
81
|
+
HTM_LOG_LEVEL Log level (default: info)
|
|
82
|
+
HTM_CONNECTION_TIMEOUT Connection timeout seconds (default: 30)
|
|
76
83
|
HTM_TELEMETRY_ENABLED Enable OpenTelemetry (default: false)
|
|
77
|
-
|
|
78
|
-
|
|
84
|
+
|
|
85
|
+
OPTIONS:
|
|
86
|
+
-c, --config [PATH] Without PATH: output default config to STDOUT
|
|
87
|
+
With PATH: load config from YAML file
|
|
79
88
|
|
|
80
89
|
EXAMPLES:
|
|
90
|
+
# Generate a config file template
|
|
91
|
+
htm_mcp --config > my_config.yml
|
|
92
|
+
|
|
93
|
+
# Start server with custom config
|
|
94
|
+
htm_mcp --config my_config.yml
|
|
95
|
+
|
|
81
96
|
# First-time setup
|
|
82
|
-
export
|
|
97
|
+
export HTM_DATABASE__URL="postgresql://postgres@localhost:5432/htm"
|
|
83
98
|
htm_mcp setup
|
|
84
99
|
|
|
85
100
|
# Verify connection
|
|
@@ -100,7 +115,7 @@ class HTM
|
|
|
100
115
|
"htm-memory": {
|
|
101
116
|
"command": "/path/to/htm_mcp",
|
|
102
117
|
"env": {
|
|
103
|
-
"
|
|
118
|
+
"HTM_DATABASE__URL": "postgresql://postgres@localhost:5432/htm_development"
|
|
104
119
|
}
|
|
105
120
|
}
|
|
106
121
|
}
|
|
@@ -109,9 +124,9 @@ class HTM
|
|
|
109
124
|
end
|
|
110
125
|
|
|
111
126
|
def check_database_config!
|
|
112
|
-
unless ENV['
|
|
127
|
+
unless ENV['HTM_DATABASE__URL'] || ENV['HTM_DATABASE__NAME']
|
|
113
128
|
warn "Error: Database not configured."
|
|
114
|
-
warn "Set
|
|
129
|
+
warn "Set HTM_DATABASE__URL or HTM_DATABASE__NAME environment variable."
|
|
115
130
|
warn "Run 'htm_mcp help' for details."
|
|
116
131
|
exit 1
|
|
117
132
|
end
|
|
@@ -123,12 +138,12 @@ class HTM
|
|
|
123
138
|
warn ""
|
|
124
139
|
if msg.include?("does not exist")
|
|
125
140
|
warn "Suggestion: The database does not exist. Create it with:"
|
|
126
|
-
warn " createdb #{extract_dbname(ENV['
|
|
141
|
+
warn " createdb #{extract_dbname(ENV['HTM_DATABASE__URL'] || ENV['HTM_DATABASE__NAME'])}"
|
|
127
142
|
warn "Then initialize the schema with:"
|
|
128
143
|
warn " htm_mcp setup"
|
|
129
144
|
elsif msg.include?("password authentication failed") || msg.include?("no password supplied")
|
|
130
145
|
warn "Suggestion: Check your database credentials."
|
|
131
|
-
warn "Verify
|
|
146
|
+
warn "Verify HTM_DATABASE__URL has correct username and password:"
|
|
132
147
|
warn " postgresql://USER:PASSWORD@localhost:5432/DATABASE"
|
|
133
148
|
elsif msg.include?("connection refused") || msg.include?("could not connect")
|
|
134
149
|
warn "Suggestion: PostgreSQL server is not running or not accepting connections."
|
|
@@ -261,6 +276,101 @@ class HTM
|
|
|
261
276
|
pending_count
|
|
262
277
|
end
|
|
263
278
|
|
|
279
|
+
def output_default_config
|
|
280
|
+
defaults_path = File.expand_path('../config/defaults.yml', __dir__)
|
|
281
|
+
if File.exist?(defaults_path)
|
|
282
|
+
puts File.read(defaults_path)
|
|
283
|
+
else
|
|
284
|
+
warn "Error: defaults.yml not found at #{defaults_path}"
|
|
285
|
+
exit 1
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def load_config_file(path)
|
|
290
|
+
unless File.exist?(path)
|
|
291
|
+
warn "Error: Config file not found: #{path}"
|
|
292
|
+
exit 1
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
begin
|
|
296
|
+
require 'yaml'
|
|
297
|
+
config_data = YAML.safe_load(
|
|
298
|
+
File.read(path),
|
|
299
|
+
permitted_classes: [Symbol],
|
|
300
|
+
symbolize_names: true,
|
|
301
|
+
aliases: true
|
|
302
|
+
) || {}
|
|
303
|
+
|
|
304
|
+
# Determine which section to use based on environment
|
|
305
|
+
env = HTM::Config.env.to_sym
|
|
306
|
+
base = config_data[:defaults] || {}
|
|
307
|
+
env_overrides = config_data[env] || {}
|
|
308
|
+
|
|
309
|
+
# Merge base with environment-specific overrides
|
|
310
|
+
merged = deep_merge(base, env_overrides)
|
|
311
|
+
|
|
312
|
+
apply_config(merged)
|
|
313
|
+
|
|
314
|
+
warn "Loaded configuration from: #{path}"
|
|
315
|
+
warn "Environment: #{env}"
|
|
316
|
+
rescue => e
|
|
317
|
+
warn "Error loading config file: #{e.message}"
|
|
318
|
+
warn e.backtrace.first(5).join("\n") if ENV['DEBUG']
|
|
319
|
+
exit 1
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def deep_merge(base, override)
|
|
324
|
+
base.merge(override) do |_key, old_val, new_val|
|
|
325
|
+
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
326
|
+
deep_merge(old_val, new_val)
|
|
327
|
+
else
|
|
328
|
+
new_val.nil? ? old_val : new_val
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def apply_config(config)
|
|
334
|
+
HTM.configure do |c|
|
|
335
|
+
# Apply nested sections
|
|
336
|
+
apply_section(c, :database, config[:database])
|
|
337
|
+
apply_section(c, :service, config[:service])
|
|
338
|
+
apply_section(c, :embedding, config[:embedding])
|
|
339
|
+
apply_section(c, :tag, config[:tag])
|
|
340
|
+
apply_section(c, :proposition, config[:proposition])
|
|
341
|
+
apply_section(c, :chunking, config[:chunking])
|
|
342
|
+
apply_section(c, :circuit_breaker, config[:circuit_breaker])
|
|
343
|
+
apply_section(c, :relevance, config[:relevance])
|
|
344
|
+
apply_section(c, :job, config[:job])
|
|
345
|
+
apply_section(c, :providers, config[:providers])
|
|
346
|
+
|
|
347
|
+
# Apply top-level scalars
|
|
348
|
+
c.week_start = config[:week_start] if config[:week_start]
|
|
349
|
+
c.connection_timeout = config[:connection_timeout] if config[:connection_timeout]
|
|
350
|
+
c.telemetry_enabled = config[:telemetry_enabled] unless config[:telemetry_enabled].nil?
|
|
351
|
+
c.log_level = config[:log_level] if config[:log_level]
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def apply_section(config, section_name, values)
|
|
356
|
+
return unless values.is_a?(Hash)
|
|
357
|
+
|
|
358
|
+
section = config.send(section_name)
|
|
359
|
+
values.each do |key, value|
|
|
360
|
+
next if value.nil?
|
|
361
|
+
|
|
362
|
+
if value.is_a?(Hash)
|
|
363
|
+
# Handle nested sections (like providers.openai)
|
|
364
|
+
subsection = section.send(key)
|
|
365
|
+
value.each do |subkey, subvalue|
|
|
366
|
+
subsection.send("#{subkey}=", subvalue) unless subvalue.nil?
|
|
367
|
+
end
|
|
368
|
+
else
|
|
369
|
+
section.send("#{key}=", value)
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
264
374
|
def run_stats
|
|
265
375
|
puts "HTM Memory Statistics"
|
|
266
376
|
puts "====================="
|
|
@@ -298,35 +408,67 @@ class HTM
|
|
|
298
408
|
end
|
|
299
409
|
|
|
300
410
|
def run(args)
|
|
411
|
+
args = args.dup
|
|
412
|
+
|
|
413
|
+
# Handle -c / --config option first (can be combined with other commands)
|
|
414
|
+
config_loaded = handle_config_option(args)
|
|
415
|
+
|
|
416
|
+
# Process remaining command
|
|
301
417
|
case args[0]&.downcase
|
|
302
418
|
when 'help', '-h', '--help'
|
|
303
419
|
print_help
|
|
304
|
-
exit 0
|
|
305
420
|
when 'version', '-v', '--version'
|
|
306
421
|
puts "HTM #{HTM::VERSION}"
|
|
307
|
-
exit 0
|
|
308
422
|
when 'setup', 'init'
|
|
309
423
|
run_setup
|
|
310
|
-
exit 0
|
|
311
424
|
when 'verify'
|
|
312
425
|
run_verify
|
|
313
|
-
exit 0
|
|
314
426
|
when 'stats'
|
|
315
427
|
run_stats
|
|
316
|
-
|
|
428
|
+
when 'config'
|
|
429
|
+
output_default_config
|
|
317
430
|
when 'server', 'stdio', nil
|
|
318
431
|
# Return false to indicate server should start
|
|
319
432
|
# 'stdio' is accepted for compatibility with MCP clients that pass it as an argument
|
|
320
|
-
false
|
|
433
|
+
return false
|
|
321
434
|
when /^-/
|
|
322
|
-
|
|
323
|
-
|
|
435
|
+
$stderr.puts "Unknown option: #{args[0]}"
|
|
436
|
+
$stderr.puts "Run 'htm_mcp help' for usage."
|
|
324
437
|
exit 1
|
|
325
438
|
else
|
|
326
|
-
|
|
327
|
-
|
|
439
|
+
$stderr.puts "Unknown command: #{args[0]}"
|
|
440
|
+
$stderr.puts "Run 'htm_mcp help' for usage."
|
|
328
441
|
exit 1
|
|
329
442
|
end
|
|
443
|
+
true
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
# Handle -c / --config option, modifying args in place
|
|
447
|
+
# Returns true if config was loaded, nil otherwise
|
|
448
|
+
def handle_config_option(args)
|
|
449
|
+
config_idx = args.index('-c') || args.index('--config')
|
|
450
|
+
return nil unless config_idx
|
|
451
|
+
|
|
452
|
+
# Remove the -c/--config flag
|
|
453
|
+
args.delete_at(config_idx)
|
|
454
|
+
|
|
455
|
+
# Check if next arg is a path (not another flag or command)
|
|
456
|
+
next_arg = args[config_idx]
|
|
457
|
+
|
|
458
|
+
if next_arg.nil? || next_arg.start_with?('-') || command?(next_arg)
|
|
459
|
+
# No path provided - output default config and exit
|
|
460
|
+
output_default_config
|
|
461
|
+
exit 0
|
|
462
|
+
else
|
|
463
|
+
# Path provided - load config file
|
|
464
|
+
config_path = args.delete_at(config_idx)
|
|
465
|
+
load_config_file(config_path)
|
|
466
|
+
true
|
|
467
|
+
end
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
def command?(arg)
|
|
471
|
+
%w[help version setup init verify stats config server stdio].include?(arg.downcase)
|
|
330
472
|
end
|
|
331
473
|
end
|
|
332
474
|
end
|
data/lib/htm/mcp/server.rb
CHANGED
|
@@ -28,9 +28,9 @@ class HTM
|
|
|
28
28
|
end
|
|
29
29
|
|
|
30
30
|
def check_database_config!
|
|
31
|
-
unless ENV['
|
|
31
|
+
unless ENV['HTM_DATABASE__URL'] || ENV['HTM_DATABASE__NAME']
|
|
32
32
|
warn "Error: Database not configured."
|
|
33
|
-
warn "Set
|
|
33
|
+
warn "Set HTM_DATABASE__URL or HTM_DATABASE__NAME environment variable."
|
|
34
34
|
warn "Run 'htm_mcp help' for details."
|
|
35
35
|
exit 1
|
|
36
36
|
end
|
|
@@ -70,7 +70,7 @@ class HTM
|
|
|
70
70
|
|
|
71
71
|
def configure_htm!
|
|
72
72
|
HTM.configure do |config|
|
|
73
|
-
config.
|
|
73
|
+
config.job.backend = :inline # Synchronous for MCP responses
|
|
74
74
|
config.logger = @silent_logger # Silent logging for MCP
|
|
75
75
|
end
|
|
76
76
|
end
|
|
@@ -67,8 +67,6 @@ class HTM
|
|
|
67
67
|
# @raise [PropositionError] If extraction fails
|
|
68
68
|
#
|
|
69
69
|
def self.extract(content)
|
|
70
|
-
HTM.logger.debug "PropositionService: Extracting propositions from #{content.length} chars"
|
|
71
|
-
|
|
72
70
|
# Use circuit breaker to protect against cascading failures
|
|
73
71
|
raw_propositions = circuit_breaker.call do
|
|
74
72
|
HTM.configuration.proposition_extractor.call(content)
|
|
@@ -78,11 +76,7 @@ class HTM
|
|
|
78
76
|
parsed_propositions = parse_propositions(raw_propositions)
|
|
79
77
|
|
|
80
78
|
# Validate and filter propositions
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
HTM.logger.debug "PropositionService: Extracted #{valid_propositions.length} valid propositions"
|
|
84
|
-
|
|
85
|
-
valid_propositions
|
|
79
|
+
validate_and_filter_propositions(parsed_propositions)
|
|
86
80
|
|
|
87
81
|
rescue HTM::CircuitBreakerOpenError
|
|
88
82
|
# Re-raise circuit breaker errors without wrapping
|
|
@@ -128,10 +122,7 @@ class HTM
|
|
|
128
122
|
|
|
129
123
|
propositions.each do |proposition|
|
|
130
124
|
# Check minimum length
|
|
131
|
-
if proposition.length < MIN_PROPOSITION_LENGTH
|
|
132
|
-
HTM.logger.debug "PropositionService: Proposition too short, skipping: #{proposition}"
|
|
133
|
-
next
|
|
134
|
-
end
|
|
125
|
+
next if proposition.length < MIN_PROPOSITION_LENGTH
|
|
135
126
|
|
|
136
127
|
# Check maximum length
|
|
137
128
|
if proposition.length > MAX_PROPOSITION_LENGTH
|
|
@@ -141,7 +132,6 @@ class HTM
|
|
|
141
132
|
|
|
142
133
|
# Check for actual content (not just punctuation/whitespace)
|
|
143
134
|
unless proposition.match?(/[a-zA-Z]{3,}/)
|
|
144
|
-
HTM.logger.debug "PropositionService: Proposition lacks content, skipping: #{proposition}"
|
|
145
135
|
next
|
|
146
136
|
end
|
|
147
137
|
|
data/lib/htm/railtie.rb
CHANGED
|
@@ -32,14 +32,13 @@ class HTM
|
|
|
32
32
|
config.logger = Rails.logger
|
|
33
33
|
|
|
34
34
|
# Use ActiveJob for background jobs in Rails
|
|
35
|
-
config.
|
|
35
|
+
config.job.backend = :active_job unless Rails.env.test?
|
|
36
36
|
|
|
37
37
|
# Use inline execution in test environment for synchronous behavior
|
|
38
|
-
config.
|
|
38
|
+
config.job.backend = :inline if Rails.env.test?
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
HTM.logger.info "HTM initialized for Rails application"
|
|
42
|
-
HTM.logger.debug "HTM job backend: #{HTM.configuration.job_backend}"
|
|
43
42
|
end
|
|
44
43
|
|
|
45
44
|
# Load Rake tasks
|
|
@@ -63,7 +62,7 @@ class HTM
|
|
|
63
62
|
HTM.logger.info "HTM database connection verified"
|
|
64
63
|
rescue StandardError => e
|
|
65
64
|
HTM.logger.warn "HTM database connection check failed: #{e.message}"
|
|
66
|
-
HTM.logger.warn "Set
|
|
65
|
+
HTM.logger.warn "Set HTM_DATABASE__URL environment variable"
|
|
67
66
|
end
|
|
68
67
|
end
|
|
69
68
|
end
|
data/lib/htm/tag_service.rb
CHANGED
|
@@ -65,9 +65,6 @@ class HTM
|
|
|
65
65
|
# @raise [CircuitBreakerOpenError] If circuit breaker is open
|
|
66
66
|
#
|
|
67
67
|
def self.extract(content, existing_ontology: [])
|
|
68
|
-
HTM.logger.debug "TagService: Extracting tags from #{content.length} chars"
|
|
69
|
-
HTM.logger.debug "TagService: Using ontology with #{existing_ontology.size} existing tags"
|
|
70
|
-
|
|
71
68
|
# Use circuit breaker to protect against cascading failures
|
|
72
69
|
raw_tags = circuit_breaker.call do
|
|
73
70
|
HTM.configuration.tag_extractor.call(content, existing_ontology)
|
|
@@ -77,11 +74,7 @@ class HTM
|
|
|
77
74
|
parsed_tags = parse_tags(raw_tags)
|
|
78
75
|
|
|
79
76
|
# Validate and filter tags
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
HTM.logger.debug "TagService: Extracted #{valid_tags.length} valid tags: #{valid_tags.join(', ')}"
|
|
83
|
-
|
|
84
|
-
valid_tags
|
|
77
|
+
validate_and_filter_tags(parsed_tags)
|
|
85
78
|
|
|
86
79
|
rescue HTM::CircuitBreakerOpenError
|
|
87
80
|
# Re-raise circuit breaker errors without wrapping
|
data/lib/htm/version.rb
CHANGED
data/lib/htm.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "htm/version"
|
|
4
4
|
require_relative "htm/errors"
|
|
5
|
-
require_relative "htm/
|
|
5
|
+
require_relative "htm/config"
|
|
6
6
|
require_relative "htm/circuit_breaker"
|
|
7
7
|
require_relative "htm/active_record_config"
|
|
8
8
|
require_relative "htm/database"
|
|
@@ -70,7 +70,7 @@ class HTM
|
|
|
70
70
|
#
|
|
71
71
|
# @param working_memory_size [Integer] Maximum tokens for working memory (default: 128,000)
|
|
72
72
|
# @param robot_name [String] Human-readable name for this robot (auto-generated if not provided)
|
|
73
|
-
# @param db_config [Hash] Database configuration (uses ENV['
|
|
73
|
+
# @param db_config [Hash] Database configuration (uses ENV['HTM_DATABASE__URL'] if not provided)
|
|
74
74
|
# @param db_pool_size [Integer] Database connection pool size (default: 5)
|
|
75
75
|
# @param db_query_timeout [Integer] Database query timeout in milliseconds (default: 30000)
|
|
76
76
|
# @param db_cache_size [Integer] Number of database query results to cache (default: 1000, use 0 to disable)
|
|
@@ -170,7 +170,7 @@ class HTM
|
|
|
170
170
|
enqueue_tags_job(node_id, manual_tags: tags)
|
|
171
171
|
|
|
172
172
|
# Enqueue proposition extraction if enabled and not already a proposition
|
|
173
|
-
if HTM.
|
|
173
|
+
if HTM.config.extract_propositions && !metadata[:is_proposition]
|
|
174
174
|
enqueue_propositions_job(node_id)
|
|
175
175
|
end
|
|
176
176
|
else
|
|
@@ -251,7 +251,6 @@ class HTM
|
|
|
251
251
|
normalized_timeframe = if timeframe == :auto
|
|
252
252
|
result = HTM::Timeframe.normalize(:auto, query: topic)
|
|
253
253
|
search_query = result.query # Use cleaned query for search
|
|
254
|
-
HTM.logger.debug "Auto-extracted timeframe: #{result.extracted.inspect}" if result.extracted
|
|
255
254
|
result.timeframe
|
|
256
255
|
else
|
|
257
256
|
HTM::Timeframe.normalize(timeframe)
|
|
@@ -595,7 +594,6 @@ class HTM
|
|
|
595
594
|
tag = HTM::Models::Tag.find_or_create_by!(name: tag_name)
|
|
596
595
|
HTM::Models::NodeTag.find_or_create_by!(node_id: node_id, tag_id: tag.id)
|
|
597
596
|
end
|
|
598
|
-
HTM.logger.debug "Added #{manual_tags.length} manual tags to node #{node_id}"
|
|
599
597
|
end
|
|
600
598
|
|
|
601
599
|
# Enqueue tag generation using configured job backend
|
|
@@ -679,4 +677,125 @@ class HTM
|
|
|
679
677
|
end
|
|
680
678
|
end
|
|
681
679
|
|
|
680
|
+
# ===========================================================================
|
|
681
|
+
# Class Methods
|
|
682
|
+
# ===========================================================================
|
|
683
|
+
|
|
684
|
+
class << self
|
|
685
|
+
# Get current configuration (singleton)
|
|
686
|
+
#
|
|
687
|
+
# @return [HTM::Config]
|
|
688
|
+
#
|
|
689
|
+
def config
|
|
690
|
+
@config ||= Config.new
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
# Alias for backward compatibility
|
|
694
|
+
alias configuration config
|
|
695
|
+
|
|
696
|
+
# Configure HTM
|
|
697
|
+
#
|
|
698
|
+
# @yield [config] Configuration object
|
|
699
|
+
# @yieldparam config [HTM::Config]
|
|
700
|
+
#
|
|
701
|
+
# @example Custom configuration
|
|
702
|
+
# HTM.configure do |config|
|
|
703
|
+
# config.embedding_generator = ->(text) { MyEmbedder.embed(text) }
|
|
704
|
+
# config.tag_extractor = ->(text, ontology) { MyTagger.extract(text, ontology) }
|
|
705
|
+
# end
|
|
706
|
+
#
|
|
707
|
+
# @example Default configuration
|
|
708
|
+
# HTM.configure # Uses RubyLLM defaults
|
|
709
|
+
#
|
|
710
|
+
def configure
|
|
711
|
+
yield(config) if block_given?
|
|
712
|
+
config.validate!
|
|
713
|
+
config
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
# Reset configuration to defaults
|
|
717
|
+
def reset_configuration!
|
|
718
|
+
@config = nil
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
# Get current environment
|
|
722
|
+
#
|
|
723
|
+
# @return [String] Current environment name
|
|
724
|
+
#
|
|
725
|
+
def env
|
|
726
|
+
Config.env
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
# Check if running in test environment
|
|
730
|
+
#
|
|
731
|
+
# @return [Boolean]
|
|
732
|
+
#
|
|
733
|
+
def test?
|
|
734
|
+
env == 'test'
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
# Check if running in development environment
|
|
738
|
+
#
|
|
739
|
+
# @return [Boolean]
|
|
740
|
+
#
|
|
741
|
+
def development?
|
|
742
|
+
env == 'development'
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
# Check if running in production environment
|
|
746
|
+
#
|
|
747
|
+
# @return [Boolean]
|
|
748
|
+
#
|
|
749
|
+
def production?
|
|
750
|
+
env == 'production'
|
|
751
|
+
end
|
|
752
|
+
|
|
753
|
+
# Generate embedding using EmbeddingService
|
|
754
|
+
#
|
|
755
|
+
# @param text [String] Text to embed
|
|
756
|
+
# @return [Array<Float>] Embedding vector (original, not padded)
|
|
757
|
+
#
|
|
758
|
+
def embed(text)
|
|
759
|
+
result = HTM::EmbeddingService.generate(text)
|
|
760
|
+
result[:embedding]
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
# Extract tags using TagService
|
|
764
|
+
#
|
|
765
|
+
# @param text [String] Text to analyze
|
|
766
|
+
# @param existing_ontology [Array<String>] Sample of existing tags for context
|
|
767
|
+
# @return [Array<String>] Extracted and validated tag names
|
|
768
|
+
#
|
|
769
|
+
def extract_tags(text, existing_ontology: [])
|
|
770
|
+
HTM::TagService.extract(text, existing_ontology: existing_ontology)
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
# Extract propositions using PropositionService
|
|
774
|
+
#
|
|
775
|
+
# @param text [String] Text to analyze
|
|
776
|
+
# @return [Array<String>] Extracted atomic propositions
|
|
777
|
+
#
|
|
778
|
+
def extract_propositions(text)
|
|
779
|
+
HTM::PropositionService.extract(text)
|
|
780
|
+
end
|
|
781
|
+
|
|
782
|
+
# Count tokens using configured counter
|
|
783
|
+
#
|
|
784
|
+
# @param text [String] Text to count tokens for
|
|
785
|
+
# @return [Integer] Token count
|
|
786
|
+
#
|
|
787
|
+
def count_tokens(text)
|
|
788
|
+
config.token_counter.call(text)
|
|
789
|
+
rescue StandardError => e
|
|
790
|
+
raise HTM::ValidationError, "Token counting failed: #{e.message}"
|
|
791
|
+
end
|
|
792
|
+
|
|
793
|
+
# Get configured logger
|
|
794
|
+
#
|
|
795
|
+
# @return [Logger] Configured logger instance
|
|
796
|
+
#
|
|
797
|
+
def logger
|
|
798
|
+
config.logger
|
|
799
|
+
end
|
|
800
|
+
end
|
|
682
801
|
end
|