htm 0.0.18 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +119 -1
- data/README.md +12 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +34 -34
- data/docs/api/embedding-service.md +140 -110
- data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
- data/docs/api/yard/HTM/Config.md +173 -0
- data/docs/api/yard/HTM/ConfigSection.md +28 -0
- data/docs/api/yard/HTM/Database.md +1 -1
- data/docs/api/yard/HTM/Railtie.md +2 -2
- data/docs/api/yard/HTM.md +0 -57
- data/docs/api/yard/index.csv +76 -61
- data/docs/api/yard-reference.md +2 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
- data/docs/architecture/adrs/004-hive-mind.md +1 -1
- data/docs/architecture/adrs/008-robot-identification.md +1 -1
- data/docs/architecture/index.md +11 -9
- data/docs/architecture/overview.md +11 -7
- data/docs/assets/images/balanced-strategy-decay.svg +41 -0
- data/docs/assets/images/class-hierarchy.svg +1 -1
- data/docs/assets/images/eviction-priority.svg +43 -0
- data/docs/assets/images/exception-hierarchy.svg +2 -2
- data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
- data/docs/assets/images/htm-architecture-overview.svg +3 -3
- data/docs/assets/images/htm-core-components.svg +4 -4
- data/docs/assets/images/htm-layered-architecture.svg +1 -1
- data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
- data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
- data/docs/assets/images/memory-topology.svg +53 -0
- data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/development/setup.md +76 -44
- data/docs/examples/basic-usage.md +133 -0
- data/docs/examples/config-files.md +170 -0
- data/docs/examples/file-loading.md +208 -0
- data/docs/examples/index.md +116 -0
- data/docs/examples/llm-configuration.md +168 -0
- data/docs/examples/mcp-client.md +172 -0
- data/docs/examples/rails-integration.md +173 -0
- data/docs/examples/robot-groups.md +210 -0
- data/docs/examples/sinatra-integration.md +218 -0
- data/docs/examples/standalone-app.md +216 -0
- data/docs/examples/telemetry.md +224 -0
- data/docs/examples/timeframes.md +143 -0
- data/docs/getting-started/installation.md +97 -40
- data/docs/getting-started/quick-start.md +28 -11
- data/docs/guides/configuration.md +515 -0
- data/docs/guides/file-loading.md +322 -0
- data/docs/guides/getting-started.md +40 -9
- data/docs/guides/index.md +3 -3
- data/docs/guides/mcp-server.md +100 -13
- data/docs/guides/propositions.md +264 -0
- data/docs/guides/recalling-memories.md +4 -4
- data/docs/guides/search-strategies.md +3 -3
- data/docs/guides/tags.md +318 -0
- data/docs/guides/telemetry.md +229 -0
- data/docs/index.md +8 -16
- data/docs/{architecture → robots}/hive-mind.md +8 -111
- data/docs/robots/index.md +73 -0
- data/docs/{guides → robots}/multi-robot.md +3 -3
- data/docs/{guides → robots}/robot-groups.md +8 -7
- data/docs/{architecture → robots}/two-tier-memory.md +13 -149
- data/docs/robots/why-robots.md +85 -0
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +41 -13
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +65 -361
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/job_adapter.rb +75 -1
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +213 -0
- data/lib/htm.rb +27 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/mkdocs.yml +33 -8
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +168 -86
- data/docs/api/yard/HTM/Configuration.md +0 -240
- data/docs/telemetry.md +0 -391
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/htm/job_adapter.rb
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'async'
|
|
4
|
+
require 'async/barrier'
|
|
5
|
+
|
|
3
6
|
class HTM
|
|
4
7
|
# Job adapter for pluggable background job backends
|
|
5
8
|
#
|
|
@@ -11,6 +14,7 @@ class HTM
|
|
|
11
14
|
# - :sidekiq - Direct Sidekiq integration (recommended for Sinatra apps)
|
|
12
15
|
# - :inline - Synchronous execution (recommended for CLI and tests)
|
|
13
16
|
# - :thread - Background thread (legacy, for standalone apps)
|
|
17
|
+
# - :fiber - Fiber-based concurrency using async gem (recommended for I/O-bound jobs)
|
|
14
18
|
#
|
|
15
19
|
# @example Configure job backend
|
|
16
20
|
# HTM.configure do |config|
|
|
@@ -44,8 +48,10 @@ class HTM
|
|
|
44
48
|
enqueue_inline(job_class, **params)
|
|
45
49
|
when :thread
|
|
46
50
|
enqueue_thread(job_class, **params)
|
|
51
|
+
when :fiber
|
|
52
|
+
enqueue_fiber(job_class, **params)
|
|
47
53
|
else
|
|
48
|
-
raise HTM::Error, "Unknown job backend: #{backend}. Supported backends: :active_job, :sidekiq, :inline, :thread"
|
|
54
|
+
raise HTM::Error, "Unknown job backend: #{backend}. Supported backends: :active_job, :sidekiq, :inline, :thread, :fiber"
|
|
49
55
|
end
|
|
50
56
|
end
|
|
51
57
|
|
|
@@ -98,6 +104,74 @@ class HTM
|
|
|
98
104
|
HTM.logger.error "Failed to start thread for #{job_class.name}: #{e.message}"
|
|
99
105
|
end
|
|
100
106
|
|
|
107
|
+
# Execute job using async gem (fiber-based concurrency)
|
|
108
|
+
# Non-blocking for I/O-bound operations like LLM API calls
|
|
109
|
+
def enqueue_fiber(job_class, **params)
|
|
110
|
+
Async do
|
|
111
|
+
begin
|
|
112
|
+
job_class.perform(**params)
|
|
113
|
+
rescue StandardError => e
|
|
114
|
+
HTM.logger.error "Fiber job #{job_class.name} failed: #{e.class.name} - #{e.message}"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
rescue StandardError => e
|
|
118
|
+
HTM.logger.error "Failed to start fiber for #{job_class.name}: #{e.message}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
public
|
|
122
|
+
|
|
123
|
+
# Execute multiple jobs in parallel using fibers
|
|
124
|
+
# Best for I/O-bound jobs like LLM API calls
|
|
125
|
+
#
|
|
126
|
+
# @param jobs [Array<Array>] Array of [job_class, params] pairs
|
|
127
|
+
# @return [void]
|
|
128
|
+
#
|
|
129
|
+
# @example Run embedding and tags jobs in parallel
|
|
130
|
+
# JobAdapter.enqueue_parallel([
|
|
131
|
+
# [GenerateEmbeddingJob, { node_id: 123 }],
|
|
132
|
+
# [GenerateTagsJob, { node_id: 123 }]
|
|
133
|
+
# ])
|
|
134
|
+
#
|
|
135
|
+
def enqueue_parallel(jobs)
|
|
136
|
+
return if jobs.empty?
|
|
137
|
+
|
|
138
|
+
backend = HTM.configuration.job_backend
|
|
139
|
+
|
|
140
|
+
case backend
|
|
141
|
+
when :fiber
|
|
142
|
+
enqueue_parallel_fiber(jobs)
|
|
143
|
+
when :inline
|
|
144
|
+
# Run sequentially for inline backend
|
|
145
|
+
jobs.each { |job_class, params| enqueue_inline(job_class, **params) }
|
|
146
|
+
else
|
|
147
|
+
# For other backends, enqueue each job separately
|
|
148
|
+
jobs.each { |job_class, params| enqueue(job_class, **params) }
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
private
|
|
153
|
+
|
|
154
|
+
# Execute multiple jobs in parallel using async fibers
|
|
155
|
+
def enqueue_parallel_fiber(jobs)
|
|
156
|
+
Async do |task|
|
|
157
|
+
barrier = Async::Barrier.new
|
|
158
|
+
|
|
159
|
+
jobs.each do |job_class, params|
|
|
160
|
+
barrier.async do
|
|
161
|
+
begin
|
|
162
|
+
job_class.perform(**params)
|
|
163
|
+
rescue StandardError => e
|
|
164
|
+
HTM.logger.error "Parallel fiber job #{job_class.name} failed: #{e.class.name} - #{e.message}"
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
barrier.wait
|
|
170
|
+
end
|
|
171
|
+
rescue StandardError => e
|
|
172
|
+
HTM.logger.error "Failed to start parallel fibers: #{e.message}"
|
|
173
|
+
end
|
|
174
|
+
|
|
101
175
|
# Convert HTM job class to ActiveJob class
|
|
102
176
|
def to_active_job_class(job_class)
|
|
103
177
|
# If it's already an ActiveJob, return it
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative '../errors'
|
|
4
|
-
require_relative '../models/node'
|
|
5
4
|
require_relative '../embedding_service'
|
|
6
5
|
|
|
7
6
|
class HTM
|
|
@@ -23,7 +22,7 @@ class HTM
|
|
|
23
22
|
# @param node_id [Integer] ID of the node to process
|
|
24
23
|
#
|
|
25
24
|
def self.perform(node_id:)
|
|
26
|
-
node = HTM::Models::Node.
|
|
25
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
27
26
|
|
|
28
27
|
unless node
|
|
29
28
|
HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found"
|
|
@@ -31,7 +30,7 @@ class HTM
|
|
|
31
30
|
end
|
|
32
31
|
|
|
33
32
|
# Skip if already has embedding
|
|
34
|
-
return if node.embedding
|
|
33
|
+
return if node.embedding
|
|
35
34
|
|
|
36
35
|
provider = HTM.configuration.embedding_provider.to_s
|
|
37
36
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
@@ -41,7 +40,7 @@ class HTM
|
|
|
41
40
|
result = HTM::EmbeddingService.generate(node.content)
|
|
42
41
|
|
|
43
42
|
# Update node with processed embedding
|
|
44
|
-
node.update
|
|
43
|
+
node.update(embedding: result[:storage_embedding])
|
|
45
44
|
|
|
46
45
|
# Record success metrics
|
|
47
46
|
elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative '../errors'
|
|
4
|
-
require_relative '../models/node'
|
|
5
4
|
require_relative '../proposition_service'
|
|
6
5
|
|
|
7
6
|
class HTM
|
|
@@ -25,7 +24,7 @@ class HTM
|
|
|
25
24
|
# @param robot_id [Integer] ID of the robot that owns this node
|
|
26
25
|
#
|
|
27
26
|
def self.perform(node_id:, robot_id:)
|
|
28
|
-
node = HTM::Models::Node.
|
|
27
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
29
28
|
|
|
30
29
|
unless node
|
|
31
30
|
HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found"
|
|
@@ -49,14 +48,14 @@ class HTM
|
|
|
49
48
|
token_count = HTM.count_tokens(proposition_text)
|
|
50
49
|
|
|
51
50
|
# Create proposition node with is_proposition marker
|
|
52
|
-
proposition_node = HTM::Models::Node.create
|
|
51
|
+
proposition_node = HTM::Models::Node.create(
|
|
53
52
|
content: proposition_text,
|
|
54
53
|
token_count: token_count,
|
|
55
54
|
metadata: { is_proposition: true, source_node_id: node_id }
|
|
56
55
|
)
|
|
57
56
|
|
|
58
57
|
# Link to robot via RobotNode
|
|
59
|
-
HTM::Models::RobotNode.
|
|
58
|
+
HTM::Models::RobotNode.find_or_create(
|
|
60
59
|
robot_id: robot_id,
|
|
61
60
|
node_id: proposition_node.id
|
|
62
61
|
)
|
|
@@ -79,7 +78,7 @@ class HTM
|
|
|
79
78
|
# Log proposition-specific errors
|
|
80
79
|
HTM.logger.error "GeneratePropositionsJob: Proposition extraction failed for node #{node_id}: #{e.message}"
|
|
81
80
|
|
|
82
|
-
rescue
|
|
81
|
+
rescue Sequel::ValidationFailed => e
|
|
83
82
|
# Log validation errors
|
|
84
83
|
HTM.logger.error "GeneratePropositionsJob: Database validation failed for node #{node_id}: #{e.message}"
|
|
85
84
|
|
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative '../errors'
|
|
4
|
-
require_relative '../models/node'
|
|
5
|
-
require_relative '../models/tag'
|
|
6
|
-
require_relative '../models/node_tag'
|
|
7
4
|
require_relative '../tag_service'
|
|
8
5
|
|
|
9
6
|
class HTM
|
|
@@ -26,7 +23,7 @@ class HTM
|
|
|
26
23
|
# @param node_id [Integer] ID of the node to process
|
|
27
24
|
#
|
|
28
25
|
def self.perform(node_id:)
|
|
29
|
-
node = HTM::Models::Node.
|
|
26
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
30
27
|
|
|
31
28
|
unless node
|
|
32
29
|
HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found"
|
|
@@ -39,23 +36,27 @@ class HTM
|
|
|
39
36
|
begin
|
|
40
37
|
# Get existing ontology for context (sample of recent tags)
|
|
41
38
|
existing_ontology = HTM::Models::Tag
|
|
42
|
-
.order(created_at
|
|
39
|
+
.order(Sequel.desc(:created_at))
|
|
43
40
|
.limit(100)
|
|
44
|
-
.
|
|
41
|
+
.select_map(:name)
|
|
45
42
|
|
|
46
43
|
# Extract and validate tags using TagService
|
|
47
44
|
tag_names = HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
|
|
48
45
|
return if tag_names.empty?
|
|
49
46
|
|
|
50
|
-
# Create or find tags and associate with node
|
|
47
|
+
# Create or find tags (including all parent tags) and associate with node
|
|
48
|
+
# For "database:postgresql:extensions", this creates and associates:
|
|
49
|
+
# - "database"
|
|
50
|
+
# - "database:postgresql"
|
|
51
|
+
# - "database:postgresql:extensions"
|
|
51
52
|
tag_names.each do |tag_name|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
53
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
54
|
+
# Create association if it doesn't exist
|
|
55
|
+
HTM::Models::NodeTag.find_or_create(
|
|
56
|
+
node_id: node.id,
|
|
57
|
+
tag_id: tag.id
|
|
58
|
+
)
|
|
59
|
+
end
|
|
59
60
|
end
|
|
60
61
|
|
|
61
62
|
# Record success metrics
|
|
@@ -79,7 +80,7 @@ class HTM
|
|
|
79
80
|
# Log tag-specific errors
|
|
80
81
|
HTM.logger.error "GenerateTagsJob: Tag generation failed for node #{node_id}: #{e.message}"
|
|
81
82
|
|
|
82
|
-
rescue
|
|
83
|
+
rescue Sequel::ValidationFailed => e
|
|
83
84
|
# Record failure metrics
|
|
84
85
|
elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
85
86
|
HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
|
|
@@ -79,6 +79,29 @@ class HTM
|
|
|
79
79
|
raw = load_raw_yaml
|
|
80
80
|
raw[:defaults] || {}
|
|
81
81
|
end
|
|
82
|
+
|
|
83
|
+
# Returns valid environment names from the config file
|
|
84
|
+
#
|
|
85
|
+
# Valid environments are top-level keys in defaults.yml excluding 'defaults'.
|
|
86
|
+
# For example, if defaults.yml has keys: defaults, development, test, production
|
|
87
|
+
# this returns [:development, :test, :production]
|
|
88
|
+
#
|
|
89
|
+
# @return [Array<Symbol>] list of valid environment names
|
|
90
|
+
def valid_environments
|
|
91
|
+
raw = load_raw_yaml
|
|
92
|
+
raw.keys.reject { |k| k == :defaults }.sort
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Check if a given environment name is valid
|
|
96
|
+
#
|
|
97
|
+
# @param env [String, Symbol] environment name to check
|
|
98
|
+
# @return [Boolean] true if environment is valid
|
|
99
|
+
def valid_environment?(env)
|
|
100
|
+
return false if env.nil? || env.to_s.empty?
|
|
101
|
+
return false if env.to_s == 'defaults'
|
|
102
|
+
|
|
103
|
+
valid_environments.include?(env.to_sym)
|
|
104
|
+
end
|
|
82
105
|
end
|
|
83
106
|
|
|
84
107
|
def call(name:, **_options)
|
|
@@ -78,10 +78,12 @@ class HTM
|
|
|
78
78
|
file_hash = Digest::SHA256.hexdigest(content)
|
|
79
79
|
|
|
80
80
|
# Find or create source record
|
|
81
|
-
source = HTM::Models::FileSource.
|
|
81
|
+
source = HTM::Models::FileSource.first(file_path: expanded_path)
|
|
82
|
+
is_new = source.nil?
|
|
83
|
+
source ||= HTM::Models::FileSource.new(file_path: expanded_path)
|
|
82
84
|
|
|
83
85
|
# Check if sync needed
|
|
84
|
-
unless force ||
|
|
86
|
+
unless force || is_new || source.needs_sync?(stat.mtime)
|
|
85
87
|
return {
|
|
86
88
|
file_path: expanded_path,
|
|
87
89
|
chunks_created: 0,
|
|
@@ -104,18 +106,18 @@ class HTM
|
|
|
104
106
|
end
|
|
105
107
|
|
|
106
108
|
# Save source first (need ID for node association)
|
|
107
|
-
source.save
|
|
109
|
+
source.save if is_new
|
|
108
110
|
|
|
109
111
|
# Sync chunks to database (chunks now include cursor positions)
|
|
110
112
|
result = sync_chunks(source, chunks)
|
|
111
113
|
|
|
112
114
|
# Update source record
|
|
113
|
-
source.update
|
|
115
|
+
source.update(
|
|
114
116
|
file_hash: file_hash,
|
|
115
117
|
mtime: stat.mtime,
|
|
116
118
|
file_size: stat.size,
|
|
117
119
|
frontmatter: frontmatter,
|
|
118
|
-
last_synced_at: Time.
|
|
120
|
+
last_synced_at: Time.now
|
|
119
121
|
)
|
|
120
122
|
|
|
121
123
|
result.merge(
|
|
@@ -195,9 +197,9 @@ class HTM
|
|
|
195
197
|
deleted = 0
|
|
196
198
|
|
|
197
199
|
# Get existing nodes for this source (include soft-deleted for potential restore)
|
|
198
|
-
existing_nodes = source.
|
|
199
|
-
HTM::Models::Node.
|
|
200
|
-
existing_by_hash = existing_nodes.
|
|
200
|
+
existing_nodes = source.id ?
|
|
201
|
+
HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
|
|
202
|
+
existing_by_hash = existing_nodes.each_with_object({}) { |n, h| h[n.content_hash] = n }
|
|
201
203
|
|
|
202
204
|
# Track which existing nodes we've matched
|
|
203
205
|
matched_hashes = Set.new
|
|
@@ -217,7 +219,7 @@ class HTM
|
|
|
217
219
|
|
|
218
220
|
changes = {}
|
|
219
221
|
changes[:chunk_position] = position if node.chunk_position != position
|
|
220
|
-
changes[:deleted_at] = nil if node.deleted_at
|
|
222
|
+
changes[:deleted_at] = nil if node.deleted_at
|
|
221
223
|
|
|
222
224
|
# Update cursor in metadata if changed
|
|
223
225
|
current_cursor = node.metadata&.dig('cursor')
|
|
@@ -227,7 +229,7 @@ class HTM
|
|
|
227
229
|
end
|
|
228
230
|
|
|
229
231
|
if changes.any?
|
|
230
|
-
node.update
|
|
232
|
+
node.update(changes)
|
|
231
233
|
updated += 1
|
|
232
234
|
end
|
|
233
235
|
else
|
|
@@ -240,7 +242,7 @@ class HTM
|
|
|
240
242
|
# Soft-delete chunks that no longer exist in file
|
|
241
243
|
existing_by_hash.each do |hash, node|
|
|
242
244
|
next if matched_hashes.include?(hash)
|
|
243
|
-
next if node.deleted_at
|
|
245
|
+
next if node.deleted_at # Already deleted
|
|
244
246
|
|
|
245
247
|
node.soft_delete!
|
|
246
248
|
deleted += 1
|
|
@@ -265,18 +267,18 @@ class HTM
|
|
|
265
267
|
node_id = @htm.remember(content, metadata: chunk_metadata)
|
|
266
268
|
|
|
267
269
|
# Update with source reference
|
|
268
|
-
node = HTM::Models::Node
|
|
269
|
-
node.update
|
|
270
|
+
node = HTM::Models::Node[node_id]
|
|
271
|
+
node.update(source_id: source.id, chunk_position: position)
|
|
270
272
|
|
|
271
273
|
node
|
|
272
|
-
rescue
|
|
274
|
+
rescue Sequel::UniqueConstraintViolation
|
|
273
275
|
# Duplicate content exists (different source or no source)
|
|
274
276
|
# Find and link to this source
|
|
275
277
|
existing = HTM::Models::Node.find_by_content(content)
|
|
276
278
|
if existing && existing.source_id.nil?
|
|
277
279
|
# Merge cursor into existing metadata
|
|
278
280
|
new_metadata = (existing.metadata || {}).merge('cursor' => cursor) if cursor
|
|
279
|
-
existing.update
|
|
281
|
+
existing.update(
|
|
280
282
|
source_id: source.id,
|
|
281
283
|
chunk_position: position,
|
|
282
284
|
metadata: new_metadata || existing.metadata
|
|
@@ -28,24 +28,24 @@ class HTM
|
|
|
28
28
|
class << self
|
|
29
29
|
# Returns all XDG config paths to check, in order of priority (lowest first)
|
|
30
30
|
#
|
|
31
|
+
# Per XDG spec: If $XDG_CONFIG_HOME is set, use it; otherwise use ~/.config
|
|
32
|
+
#
|
|
31
33
|
# @return [Array<String>] list of potential config file paths
|
|
32
34
|
def config_paths
|
|
33
35
|
paths = []
|
|
34
36
|
|
|
35
|
-
# macOS Application Support (lowest priority
|
|
36
|
-
if macos?
|
|
37
|
+
# macOS Application Support (lowest priority, only when XDG_CONFIG_HOME is not set)
|
|
38
|
+
if macos? && (!ENV['XDG_CONFIG_HOME'] || ENV['XDG_CONFIG_HOME'].empty?)
|
|
37
39
|
macos_path = File.expand_path('~/Library/Application Support/htm')
|
|
38
40
|
paths << macos_path if Dir.exist?(File.dirname(macos_path))
|
|
39
41
|
end
|
|
40
42
|
|
|
41
|
-
#
|
|
42
|
-
xdg_default = File.expand_path('~/.config/htm')
|
|
43
|
-
paths << xdg_default
|
|
44
|
-
|
|
45
|
-
# XDG_CONFIG_HOME override (highest priority for XDG loader)
|
|
43
|
+
# XDG_CONFIG_HOME takes precedence over default
|
|
46
44
|
if ENV['XDG_CONFIG_HOME'] && !ENV['XDG_CONFIG_HOME'].empty?
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
paths << File.join(ENV['XDG_CONFIG_HOME'], 'htm')
|
|
46
|
+
else
|
|
47
|
+
# XDG default: ~/.config/htm (only when XDG_CONFIG_HOME is not set)
|
|
48
|
+
paths << File.expand_path('~/.config/htm')
|
|
49
49
|
end
|
|
50
50
|
|
|
51
51
|
paths
|
|
@@ -79,6 +79,7 @@ class HTM
|
|
|
79
79
|
|
|
80
80
|
# Combined tsvector + trigram search
|
|
81
81
|
# tsvector matches get boosted score, trigram provides fuzzy fallback
|
|
82
|
+
# Note: Using ? placeholders for Sequel compatibility
|
|
82
83
|
sql = <<~SQL
|
|
83
84
|
WITH tsvector_matches AS (
|
|
84
85
|
-- Primary: tsvector full-text search (stemmed word matching)
|
|
@@ -114,24 +115,23 @@ class HTM
|
|
|
114
115
|
LIMIT ?
|
|
115
116
|
SQL
|
|
116
117
|
|
|
117
|
-
result =
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
])
|
|
128
|
-
)
|
|
118
|
+
result = HTM.db.fetch(
|
|
119
|
+
sql,
|
|
120
|
+
TSVECTOR_SCORE_BOOST, # boost for tsvector
|
|
121
|
+
query, # query for ts_rank
|
|
122
|
+
query, # query for plainto_tsquery
|
|
123
|
+
query, # query for similarity (trigram)
|
|
124
|
+
query, # query for similarity condition
|
|
125
|
+
TRIGRAM_SIMILARITY_THRESHOLD, # similarity threshold
|
|
126
|
+
limit # limit
|
|
127
|
+
).all
|
|
129
128
|
|
|
130
129
|
# Track access for retrieved nodes
|
|
131
|
-
node_ids = result.map { |r| r[
|
|
130
|
+
node_ids = result.map { |r| r[:id] }
|
|
132
131
|
track_access(node_ids)
|
|
133
132
|
|
|
134
|
-
|
|
133
|
+
# Convert to hash with string keys for compatibility
|
|
134
|
+
result.map { |r| r.transform_keys(&:to_s) }
|
|
135
135
|
end
|
|
136
136
|
end
|
|
137
137
|
end
|