htm 0.0.20 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +33 -33
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/guides/mcp-server.md +70 -1
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +62 -22
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +75 -462
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +6 -5
- data/lib/htm.rb +26 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +111 -85
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- data/lib/htm/loaders/defaults_loader.rb +0 -143
- data/lib/htm/loaders/xdg_config_loader.rb +0 -116
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/htm/config/defaults.yml
CHANGED
|
@@ -49,7 +49,7 @@ defaults:
|
|
|
49
49
|
# Access: HTM.config.embedding.provider, HTM.config.embedding.model, etc.
|
|
50
50
|
# ---------------------------------------------------------------------------
|
|
51
51
|
embedding:
|
|
52
|
-
provider: ollama
|
|
52
|
+
provider: :ollama
|
|
53
53
|
model: nomic-embed-text:latest
|
|
54
54
|
dimensions: 768
|
|
55
55
|
timeout: 120
|
|
@@ -65,7 +65,7 @@ defaults:
|
|
|
65
65
|
# %{taxonomy_context} - existing taxonomy info or new taxonomy message
|
|
66
66
|
# ---------------------------------------------------------------------------
|
|
67
67
|
tag:
|
|
68
|
-
provider: ollama
|
|
68
|
+
provider: :ollama
|
|
69
69
|
model: gemma3:latest
|
|
70
70
|
timeout: 180
|
|
71
71
|
max_depth: 4
|
|
@@ -91,8 +91,9 @@ defaults:
|
|
|
91
91
|
RULES:
|
|
92
92
|
1. Each concept belongs to ONE path only
|
|
93
93
|
2. Use lowercase, hyphens for multi-word terms
|
|
94
|
-
3.
|
|
95
|
-
4.
|
|
94
|
+
3. ALWAYS use SINGULAR forms, never plurals (user NOT users, framework NOT frameworks, model NOT models)
|
|
95
|
+
4. Return 2-5 tags that best classify this text
|
|
96
|
+
5. Match existing taxonomy paths when applicable
|
|
96
97
|
|
|
97
98
|
TEXT: %{text}
|
|
98
99
|
|
|
@@ -109,38 +110,66 @@ defaults:
|
|
|
109
110
|
# %{text} - the content to extract propositions from
|
|
110
111
|
# ---------------------------------------------------------------------------
|
|
111
112
|
proposition:
|
|
112
|
-
provider: ollama
|
|
113
|
+
provider: :ollama
|
|
113
114
|
model: gemma3:latest
|
|
114
115
|
timeout: 180
|
|
115
116
|
enabled: true
|
|
117
|
+
min_length: 10
|
|
118
|
+
max_length: 1000
|
|
119
|
+
min_words: 5
|
|
116
120
|
|
|
117
121
|
system_prompt: |
|
|
118
|
-
You are an atomic fact extraction system.
|
|
119
|
-
Break every statement into its smallest possible factual units.
|
|
122
|
+
You are an atomic fact extraction system. Extract factual propositions from text.
|
|
120
123
|
Output ONLY propositions, one per line, prefixed with a dash (-).
|
|
124
|
+
NEVER ask for clarification or more text - extract what you can from the given text.
|
|
125
|
+
If the text contains no extractable facts, output nothing.
|
|
121
126
|
|
|
122
127
|
user_prompt_template: |
|
|
123
|
-
Extract
|
|
128
|
+
Extract ATOMIC factual propositions from the following text.
|
|
124
129
|
|
|
125
|
-
An atomic proposition
|
|
130
|
+
An atomic proposition:
|
|
131
|
+
- Expresses exactly ONE fact or relationship
|
|
132
|
+
- Is understandable WITHOUT any additional context
|
|
133
|
+
- Uses FULL NAMES, never pronouns (he, she, it, they, this, that)
|
|
134
|
+
- Contains at least 5 words for sufficient context
|
|
126
135
|
|
|
127
|
-
|
|
136
|
+
PRONOUN REPLACEMENT EXAMPLES:
|
|
137
|
+
BAD: "It is important to know the enemy."
|
|
138
|
+
GOOD: "Knowing the enemy is important for spiritual warfare."
|
|
139
|
+
|
|
140
|
+
BAD: "They support thousands of users."
|
|
141
|
+
GOOD: "The Every.to products support thousands of daily users."
|
|
142
|
+
|
|
143
|
+
BAD: "This results in a calculation."
|
|
144
|
+
GOOD: "The age calculation depends on whether the birthday has occurred."
|
|
145
|
+
|
|
146
|
+
CONTEXT ENRICHMENT EXAMPLES:
|
|
147
|
+
BAD: "Wiring costs $1,000."
|
|
148
|
+
GOOD: "Solar panel wiring costs $1,000 for the Oklahoma barndominium project."
|
|
149
|
+
|
|
150
|
+
BAD: "The driveway is gravel."
|
|
151
|
+
GOOD: "Dewayne's barndominium driveway is gravel."
|
|
152
|
+
|
|
153
|
+
RULES:
|
|
128
154
|
1. Split compound statements into separate atomic facts
|
|
129
155
|
2. Each proposition = exactly one fact
|
|
130
|
-
3.
|
|
131
|
-
4.
|
|
156
|
+
3. Replace ALL pronouns with the actual names/nouns they refer to
|
|
157
|
+
4. Include enough context to understand the fact in isolation
|
|
158
|
+
5. If unsure what a pronoun refers to, omit that proposition
|
|
132
159
|
|
|
133
160
|
TEXT: %{text}
|
|
134
161
|
|
|
135
162
|
Return ONLY atomic propositions, one per line. Use a dash (-) prefix for each.
|
|
163
|
+
If you cannot extract any facts, return nothing. Do NOT ask for more text.
|
|
136
164
|
|
|
137
165
|
# ---------------------------------------------------------------------------
|
|
138
166
|
# Chunking Configuration (for file loading)
|
|
139
|
-
# Access: HTM.config.chunking.
|
|
167
|
+
# Access: HTM.config.chunking.chunk_size, HTM.config.chunking.chunk_overlap
|
|
168
|
+
# Note: Using chunk_size/chunk_overlap to avoid collision with Enumerable#size
|
|
140
169
|
# ---------------------------------------------------------------------------
|
|
141
170
|
chunking:
|
|
142
|
-
|
|
143
|
-
|
|
171
|
+
chunk_size: 1024
|
|
172
|
+
chunk_overlap: 64
|
|
144
173
|
|
|
145
174
|
# ---------------------------------------------------------------------------
|
|
146
175
|
# Circuit Breaker Configuration
|
|
@@ -167,16 +196,16 @@ defaults:
|
|
|
167
196
|
# Access: HTM.config.job.backend
|
|
168
197
|
# ---------------------------------------------------------------------------
|
|
169
198
|
job:
|
|
170
|
-
backend: fiber
|
|
199
|
+
backend: :fiber
|
|
171
200
|
|
|
172
201
|
# ---------------------------------------------------------------------------
|
|
173
202
|
# General Settings
|
|
174
203
|
# Access: HTM.config.week_start, HTM.config.connection_timeout, etc.
|
|
175
204
|
# ---------------------------------------------------------------------------
|
|
176
|
-
week_start: sunday
|
|
205
|
+
week_start: :sunday
|
|
177
206
|
connection_timeout: 60
|
|
178
207
|
telemetry_enabled: false
|
|
179
|
-
log_level: info
|
|
208
|
+
log_level: :info
|
|
180
209
|
|
|
181
210
|
# ---------------------------------------------------------------------------
|
|
182
211
|
# Provider Credentials
|
|
@@ -222,7 +251,7 @@ defaults:
|
|
|
222
251
|
development:
|
|
223
252
|
database:
|
|
224
253
|
name: htm_development
|
|
225
|
-
log_level: debug
|
|
254
|
+
log_level: :debug
|
|
226
255
|
|
|
227
256
|
# =============================================================================
|
|
228
257
|
# Test Environment Overrides
|
|
@@ -231,8 +260,8 @@ test:
|
|
|
231
260
|
database:
|
|
232
261
|
name: htm_test
|
|
233
262
|
job:
|
|
234
|
-
backend: inline
|
|
235
|
-
log_level: warn
|
|
263
|
+
backend: :inline
|
|
264
|
+
log_level: :warn
|
|
236
265
|
telemetry_enabled: false
|
|
237
266
|
|
|
238
267
|
# =============================================================================
|
|
@@ -242,5 +271,16 @@ production:
|
|
|
242
271
|
database:
|
|
243
272
|
pool_size: 25
|
|
244
273
|
sslmode: require
|
|
245
|
-
log_level: warn
|
|
274
|
+
log_level: :warn
|
|
246
275
|
telemetry_enabled: true
|
|
276
|
+
|
|
277
|
+
# =============================================================================
|
|
278
|
+
# Examples Environment Overrides (for running example scripts)
|
|
279
|
+
# =============================================================================
|
|
280
|
+
examples:
|
|
281
|
+
database:
|
|
282
|
+
name: htm_examples
|
|
283
|
+
job:
|
|
284
|
+
backend: :inline
|
|
285
|
+
log_level: :info
|
|
286
|
+
telemetry_enabled: false
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
class Config
|
|
5
|
+
module Validator
|
|
6
|
+
SUPPORTED_PROVIDERS = %i[
|
|
7
|
+
openai anthropic gemini azure ollama
|
|
8
|
+
huggingface openrouter bedrock deepseek
|
|
9
|
+
].freeze
|
|
10
|
+
|
|
11
|
+
SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
|
|
12
|
+
SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
|
|
13
|
+
|
|
14
|
+
def validate_config
|
|
15
|
+
validate_providers
|
|
16
|
+
validate_job_backend
|
|
17
|
+
validate_week_start
|
|
18
|
+
validate_relevance_weights
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def validate_providers
|
|
22
|
+
validate_provider(:embedding_provider, embedding_provider)
|
|
23
|
+
validate_provider(:tag_provider, tag_provider)
|
|
24
|
+
validate_provider(:proposition_provider, proposition_provider)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def validate_provider(name, value)
|
|
28
|
+
return if value.nil?
|
|
29
|
+
|
|
30
|
+
unless SUPPORTED_PROVIDERS.include?(value)
|
|
31
|
+
raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def validate_job_backend
|
|
36
|
+
return unless job_backend
|
|
37
|
+
|
|
38
|
+
unless SUPPORTED_JOB_BACKENDS.include?(job_backend)
|
|
39
|
+
raise_validation_error("job.backend must be one of: #{SUPPORTED_JOB_BACKENDS.join(', ')} (got #{job_backend.inspect})")
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def validate_week_start
|
|
44
|
+
unless SUPPORTED_WEEK_STARTS.include?(week_start)
|
|
45
|
+
raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def validate_relevance_weights
|
|
50
|
+
total = relevance_semantic_weight + relevance_tag_weight +
|
|
51
|
+
relevance_recency_weight + relevance_access_weight
|
|
52
|
+
|
|
53
|
+
unless (0.99..1.01).cover?(total)
|
|
54
|
+
raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def validate_callables
|
|
59
|
+
unless @embedding_generator.respond_to?(:call)
|
|
60
|
+
raise HTM::ValidationError, "embedding_generator must be callable"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
unless @tag_extractor.respond_to?(:call)
|
|
64
|
+
raise HTM::ValidationError, "tag_extractor must be callable"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
unless @proposition_extractor.respond_to?(:call)
|
|
68
|
+
raise HTM::ValidationError, "proposition_extractor must be callable"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
unless @token_counter.respond_to?(:call)
|
|
72
|
+
raise HTM::ValidationError, "token_counter must be callable"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def validate_logger
|
|
77
|
+
unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
|
|
78
|
+
raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|