htm 0.0.20 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +33 -33
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/guides/mcp-server.md +70 -1
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +37 -9
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +64 -360
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +6 -5
- data/lib/htm.rb +26 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +109 -80
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/tasks/jobs.rake
CHANGED
|
@@ -4,10 +4,9 @@ namespace :htm do
|
|
|
4
4
|
namespace :jobs do
|
|
5
5
|
desc "Show statistics for nodes and async job processing"
|
|
6
6
|
task :stats => :environment do
|
|
7
|
-
require 'htm'
|
|
8
7
|
|
|
9
8
|
# Establish connection
|
|
10
|
-
HTM::
|
|
9
|
+
HTM::SequelConfig.establish_connection!
|
|
11
10
|
|
|
12
11
|
puts "HTM Async Job Statistics"
|
|
13
12
|
puts "=" * 60
|
|
@@ -17,7 +16,7 @@ namespace :htm do
|
|
|
17
16
|
puts "Total nodes: #{total_nodes}"
|
|
18
17
|
|
|
19
18
|
# Nodes with embeddings
|
|
20
|
-
with_embeddings = HTM::Models::Node.
|
|
19
|
+
with_embeddings = HTM::Models::Node.exclude(embedding: nil).count
|
|
21
20
|
puts "Nodes with embeddings: #{with_embeddings} (#{percentage(with_embeddings, total_nodes)}%)"
|
|
22
21
|
|
|
23
22
|
# Nodes without embeddings (pending embedding jobs)
|
|
@@ -26,7 +25,7 @@ namespace :htm do
|
|
|
26
25
|
|
|
27
26
|
# Nodes with tags
|
|
28
27
|
nodes_with_tags = HTM::Models::Node
|
|
29
|
-
.
|
|
28
|
+
.join(:node_tags, node_id: :id)
|
|
30
29
|
.distinct
|
|
31
30
|
.count
|
|
32
31
|
puts "Nodes with tags: #{nodes_with_tags} (#{percentage(nodes_with_tags, total_nodes)}%)"
|
|
@@ -43,7 +42,7 @@ namespace :htm do
|
|
|
43
42
|
if total_tags > 0
|
|
44
43
|
puts "\nTag hierarchy breakdown:"
|
|
45
44
|
depth_counts = Hash.new(0)
|
|
46
|
-
HTM::Models::Tag.
|
|
45
|
+
HTM::Models::Tag.select_map(:name).each do |name|
|
|
47
46
|
depth = name.count(':')
|
|
48
47
|
depth_counts[depth] += 1
|
|
49
48
|
end
|
|
@@ -58,15 +57,15 @@ namespace :htm do
|
|
|
58
57
|
puts "\nAverage tags per node: #{avg_tags.round(2)}"
|
|
59
58
|
end
|
|
60
59
|
|
|
61
|
-
HTM::
|
|
60
|
+
HTM::SequelConfig.disconnect!
|
|
62
61
|
end
|
|
63
62
|
|
|
64
63
|
desc "Process pending embedding jobs for nodes without embeddings"
|
|
65
64
|
task :process_embeddings => :environment do
|
|
66
|
-
require '
|
|
65
|
+
require 'ruby-progressbar'
|
|
67
66
|
|
|
68
67
|
# Establish connection and configure HTM
|
|
69
|
-
HTM::
|
|
68
|
+
HTM::SequelConfig.establish_connection!
|
|
70
69
|
HTM.configure # Use default configuration
|
|
71
70
|
|
|
72
71
|
# Find nodes without embeddings
|
|
@@ -75,95 +74,222 @@ namespace :htm do
|
|
|
75
74
|
|
|
76
75
|
if total.zero?
|
|
77
76
|
puts "No pending embedding jobs"
|
|
78
|
-
HTM::
|
|
77
|
+
HTM::SequelConfig.disconnect!
|
|
79
78
|
exit 0
|
|
80
79
|
end
|
|
81
80
|
|
|
82
|
-
puts "Processing #{total} pending embedding jobs
|
|
81
|
+
puts "Processing #{total} pending embedding jobs...\n"
|
|
82
|
+
|
|
83
|
+
# Create progress bar with ETA
|
|
84
|
+
progressbar = ProgressBar.create(
|
|
85
|
+
total: total,
|
|
86
|
+
format: '%t: |%B| %c/%C (%p%%) %e',
|
|
87
|
+
title: 'Embeddings',
|
|
88
|
+
output: $stdout,
|
|
89
|
+
smoothing: 0.5
|
|
90
|
+
)
|
|
83
91
|
|
|
84
92
|
processed = 0
|
|
85
93
|
failed = 0
|
|
86
94
|
|
|
87
|
-
pending_nodes.
|
|
95
|
+
pending_nodes.paged_each do |node|
|
|
88
96
|
begin
|
|
89
97
|
# Use the service class directly (same as job)
|
|
90
98
|
result = HTM::EmbeddingService.generate(node.content)
|
|
91
|
-
node.update
|
|
99
|
+
node.update(embedding: result[:storage_embedding])
|
|
92
100
|
processed += 1
|
|
93
|
-
print "\rProcessed: #{processed}/#{total}"
|
|
94
101
|
rescue StandardError => e
|
|
95
102
|
failed += 1
|
|
96
|
-
|
|
103
|
+
progressbar.log " Error on node #{node.id}: #{e.message}"
|
|
97
104
|
end
|
|
105
|
+
|
|
106
|
+
progressbar.increment
|
|
98
107
|
end
|
|
99
108
|
|
|
100
|
-
|
|
109
|
+
progressbar.finish
|
|
110
|
+
|
|
111
|
+
puts "\nCompleted:"
|
|
101
112
|
puts " Processed: #{processed}"
|
|
102
113
|
puts " Failed: #{failed}"
|
|
103
114
|
|
|
104
|
-
HTM::
|
|
115
|
+
HTM::SequelConfig.disconnect!
|
|
105
116
|
end
|
|
106
117
|
|
|
107
118
|
desc "Process pending tag extraction jobs for nodes without tags"
|
|
108
119
|
task :process_tags => :environment do
|
|
109
|
-
require '
|
|
120
|
+
require 'ruby-progressbar'
|
|
110
121
|
|
|
111
122
|
# Establish connection and configure HTM
|
|
112
|
-
HTM::
|
|
123
|
+
HTM::SequelConfig.establish_connection!
|
|
113
124
|
HTM.configure # Use default configuration
|
|
114
125
|
|
|
115
|
-
# Find nodes without any tags
|
|
126
|
+
# Find nodes without any tags (using NOT EXISTS subquery)
|
|
116
127
|
nodes_without_tags = HTM::Models::Node
|
|
117
|
-
.
|
|
118
|
-
|
|
128
|
+
.where(Sequel.~(Sequel.exists(
|
|
129
|
+
HTM::Models::NodeTag.where(Sequel[:node_tags][:node_id] => Sequel[:nodes][:id]).select(1)
|
|
130
|
+
)))
|
|
119
131
|
|
|
120
132
|
total = nodes_without_tags.count
|
|
121
133
|
|
|
122
134
|
if total.zero?
|
|
123
135
|
puts "No pending tag extraction jobs"
|
|
124
|
-
HTM::
|
|
136
|
+
HTM::SequelConfig.disconnect!
|
|
125
137
|
exit 0
|
|
126
138
|
end
|
|
127
139
|
|
|
128
|
-
puts "Processing #{total} pending tag extraction jobs
|
|
140
|
+
puts "Processing #{total} pending tag extraction jobs...\n"
|
|
141
|
+
|
|
142
|
+
# Create progress bar with ETA
|
|
143
|
+
progressbar = ProgressBar.create(
|
|
144
|
+
total: total,
|
|
145
|
+
format: '%t: |%B| %c/%C (%p%%) %e',
|
|
146
|
+
title: 'Tags',
|
|
147
|
+
output: $stdout,
|
|
148
|
+
smoothing: 0.5
|
|
149
|
+
)
|
|
129
150
|
|
|
130
151
|
processed = 0
|
|
131
152
|
failed = 0
|
|
132
153
|
|
|
133
|
-
nodes_without_tags.
|
|
154
|
+
nodes_without_tags.paged_each do |node|
|
|
134
155
|
begin
|
|
135
156
|
# Use the service class directly (same as job)
|
|
136
|
-
existing_ontology = HTM::Models::Tag.order(created_at
|
|
157
|
+
existing_ontology = HTM::Models::Tag.order(Sequel.desc(:created_at)).limit(100).select_map(:name)
|
|
137
158
|
tag_names = HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
|
|
138
159
|
|
|
139
160
|
tag_names.each do |tag_name|
|
|
140
|
-
tag = HTM::Models::Tag.
|
|
141
|
-
HTM::Models::NodeTag.
|
|
161
|
+
tag = HTM::Models::Tag.find_or_create_by_name(tag_name)
|
|
162
|
+
HTM::Models::NodeTag.find_or_create(node_id: node.id, tag_id: tag.id)
|
|
142
163
|
end
|
|
143
164
|
|
|
144
165
|
processed += 1
|
|
145
|
-
print "\rProcessed: #{processed}/#{total}"
|
|
146
166
|
rescue StandardError => e
|
|
147
167
|
failed += 1
|
|
148
|
-
|
|
168
|
+
progressbar.log " Error on node #{node.id}: #{e.message}"
|
|
149
169
|
end
|
|
170
|
+
|
|
171
|
+
progressbar.increment
|
|
150
172
|
end
|
|
151
173
|
|
|
152
|
-
|
|
174
|
+
progressbar.finish
|
|
175
|
+
|
|
176
|
+
puts "\nCompleted:"
|
|
153
177
|
puts " Processed: #{processed}"
|
|
154
178
|
puts " Failed: #{failed}"
|
|
155
179
|
|
|
156
|
-
HTM::
|
|
180
|
+
HTM::SequelConfig.disconnect!
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
desc "Process pending proposition extraction for nodes without propositions"
|
|
184
|
+
task :process_propositions => :environment do
|
|
185
|
+
require 'ruby-progressbar'
|
|
186
|
+
|
|
187
|
+
# Establish connection and configure HTM
|
|
188
|
+
HTM::SequelConfig.establish_connection!
|
|
189
|
+
HTM.configure # Use default configuration
|
|
190
|
+
|
|
191
|
+
# Find non-proposition nodes that haven't been processed yet
|
|
192
|
+
# A node needs processing if:
|
|
193
|
+
# 1. It's not a proposition itself (is_proposition != true)
|
|
194
|
+
# 2. No proposition node references it as source_node_id
|
|
195
|
+
processed_source_ids = HTM::Models::Node
|
|
196
|
+
.where(Sequel.lit("metadata->>'is_proposition' = ?", 'true'))
|
|
197
|
+
.exclude(Sequel.lit("metadata->>'source_node_id' IS NULL"))
|
|
198
|
+
.select_map(Sequel.lit("metadata->>'source_node_id'"))
|
|
199
|
+
.map(&:to_i)
|
|
200
|
+
.uniq
|
|
201
|
+
|
|
202
|
+
pending_nodes = HTM::Models::Node
|
|
203
|
+
.non_propositions
|
|
204
|
+
.exclude(id: processed_source_ids)
|
|
205
|
+
|
|
206
|
+
total = pending_nodes.count
|
|
207
|
+
|
|
208
|
+
if total.zero?
|
|
209
|
+
puts "No pending proposition extraction jobs"
|
|
210
|
+
HTM::SequelConfig.disconnect!
|
|
211
|
+
exit 0
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
puts "Processing #{total} nodes for proposition extraction..."
|
|
215
|
+
puts "(Nodes already processed: #{processed_source_ids.size})\n"
|
|
216
|
+
|
|
217
|
+
# Get a robot for linking proposition nodes
|
|
218
|
+
robot = HTM::Models::Robot.first || HTM::Models::Robot.create(name: 'proposition_extractor')
|
|
219
|
+
|
|
220
|
+
# Create progress bar with ETA
|
|
221
|
+
progressbar = ProgressBar.create(
|
|
222
|
+
total: total,
|
|
223
|
+
format: '%t: |%B| %c/%C (%p%%) %e',
|
|
224
|
+
title: 'Extracting',
|
|
225
|
+
output: $stdout,
|
|
226
|
+
smoothing: 0.5
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
nodes_processed = 0
|
|
230
|
+
propositions_created = 0
|
|
231
|
+
failed = 0
|
|
232
|
+
|
|
233
|
+
pending_nodes.paged_each do |node|
|
|
234
|
+
begin
|
|
235
|
+
# Extract propositions using the service
|
|
236
|
+
propositions = HTM::PropositionService.extract(node.content)
|
|
237
|
+
|
|
238
|
+
if propositions.any?
|
|
239
|
+
propositions.each do |proposition_text|
|
|
240
|
+
token_count = HTM.count_tokens(proposition_text)
|
|
241
|
+
|
|
242
|
+
# Create proposition node
|
|
243
|
+
prop_node = HTM::Models::Node.create(
|
|
244
|
+
content: proposition_text,
|
|
245
|
+
token_count: token_count,
|
|
246
|
+
metadata: { is_proposition: true, source_node_id: node.id }
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Link to robot
|
|
250
|
+
HTM::Models::RobotNode.find_or_create(
|
|
251
|
+
robot_id: robot.id,
|
|
252
|
+
node_id: prop_node.id
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Generate embedding for proposition node
|
|
256
|
+
begin
|
|
257
|
+
result = HTM::EmbeddingService.generate(proposition_text)
|
|
258
|
+
prop_node.update(embedding: result[:storage_embedding])
|
|
259
|
+
rescue StandardError => e
|
|
260
|
+
progressbar.log " Warning: Embedding failed for proposition #{prop_node.id}: #{e.message}"
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
propositions_created += 1
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
nodes_processed += 1
|
|
268
|
+
rescue StandardError => e
|
|
269
|
+
failed += 1
|
|
270
|
+
progressbar.log " Error on node #{node.id}: #{e.message}"
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
progressbar.increment
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
progressbar.finish
|
|
277
|
+
|
|
278
|
+
puts "\nCompleted:"
|
|
279
|
+
puts " Nodes processed: #{nodes_processed}"
|
|
280
|
+
puts " Propositions created: #{propositions_created}"
|
|
281
|
+
puts " Failed: #{failed}"
|
|
282
|
+
|
|
283
|
+
HTM::SequelConfig.disconnect!
|
|
157
284
|
end
|
|
158
285
|
|
|
159
|
-
desc "Process all pending jobs (embeddings and
|
|
160
|
-
task :process_all => [:process_embeddings, :process_tags] do
|
|
286
|
+
desc "Process all pending jobs (embeddings, tags, and propositions)"
|
|
287
|
+
task :process_all => [:process_embeddings, :process_tags, :process_propositions] do
|
|
161
288
|
puts "\nAll pending jobs processed!"
|
|
162
289
|
end
|
|
163
290
|
|
|
164
291
|
desc "Reprocess embeddings for all nodes (force regeneration)"
|
|
165
292
|
task :reprocess_embeddings => :environment do
|
|
166
|
-
require 'htm'
|
|
167
293
|
|
|
168
294
|
print "This will regenerate embeddings for ALL nodes. Are you sure? (yes/no): "
|
|
169
295
|
confirmation = $stdin.gets.chomp
|
|
@@ -174,7 +300,7 @@ namespace :htm do
|
|
|
174
300
|
end
|
|
175
301
|
|
|
176
302
|
# Establish connection and configure HTM
|
|
177
|
-
HTM::
|
|
303
|
+
HTM::SequelConfig.establish_connection!
|
|
178
304
|
HTM.configure # Use default configuration
|
|
179
305
|
|
|
180
306
|
total = HTM::Models::Node.count
|
|
@@ -184,11 +310,11 @@ namespace :htm do
|
|
|
184
310
|
processed = 0
|
|
185
311
|
failed = 0
|
|
186
312
|
|
|
187
|
-
HTM::Models::Node.
|
|
313
|
+
HTM::Models::Node.paged_each do |node|
|
|
188
314
|
begin
|
|
189
315
|
# Use the service class directly to regenerate
|
|
190
316
|
result = HTM::EmbeddingService.generate(node.content)
|
|
191
|
-
node.update
|
|
317
|
+
node.update(embedding: result[:storage_embedding])
|
|
192
318
|
processed += 1
|
|
193
319
|
print "\rProcessed: #{processed}/#{total}"
|
|
194
320
|
rescue StandardError => e
|
|
@@ -201,14 +327,13 @@ namespace :htm do
|
|
|
201
327
|
puts " Processed: #{processed}"
|
|
202
328
|
puts " Failed: #{failed}"
|
|
203
329
|
|
|
204
|
-
HTM::
|
|
330
|
+
HTM::SequelConfig.disconnect!
|
|
205
331
|
end
|
|
206
332
|
|
|
207
333
|
desc "Show nodes that failed async processing"
|
|
208
334
|
task :failed => :environment do
|
|
209
|
-
require 'htm'
|
|
210
335
|
|
|
211
|
-
HTM::
|
|
336
|
+
HTM::SequelConfig.establish_connection!
|
|
212
337
|
|
|
213
338
|
puts "Nodes with Processing Issues"
|
|
214
339
|
puts "=" * 60
|
|
@@ -216,9 +341,9 @@ namespace :htm do
|
|
|
216
341
|
# Old nodes without embeddings (created more than 1 hour ago)
|
|
217
342
|
old_without_embeddings = HTM::Models::Node
|
|
218
343
|
.where(embedding: nil)
|
|
219
|
-
.where('created_at < ?',
|
|
344
|
+
.where(Sequel.lit('created_at < ?', Time.now - 3600))
|
|
220
345
|
|
|
221
|
-
if old_without_embeddings.
|
|
346
|
+
if old_without_embeddings.count > 0
|
|
222
347
|
puts "\nNodes without embeddings (>1 hour old):"
|
|
223
348
|
old_without_embeddings.limit(10).each do |node|
|
|
224
349
|
puts " Node #{node.id}: created #{time_ago(node.created_at)}"
|
|
@@ -231,13 +356,14 @@ namespace :htm do
|
|
|
231
356
|
puts "\n✓ No old nodes without embeddings"
|
|
232
357
|
end
|
|
233
358
|
|
|
234
|
-
# Old nodes without tags
|
|
359
|
+
# Old nodes without tags (using NOT EXISTS subquery)
|
|
235
360
|
old_without_tags = HTM::Models::Node
|
|
236
|
-
.
|
|
237
|
-
|
|
238
|
-
|
|
361
|
+
.where(Sequel.~(Sequel.exists(
|
|
362
|
+
HTM::Models::NodeTag.where(Sequel[:node_tags][:node_id] => Sequel[:nodes][:id]).select(1)
|
|
363
|
+
)))
|
|
364
|
+
.where(Sequel.lit('created_at < ?', Time.now - 3600))
|
|
239
365
|
|
|
240
|
-
if old_without_tags.
|
|
366
|
+
if old_without_tags.count > 0
|
|
241
367
|
puts "\nNodes without tags (>1 hour old):"
|
|
242
368
|
old_without_tags.limit(10).each do |node|
|
|
243
369
|
puts " Node #{node.id}: created #{time_ago(node.created_at)}"
|
|
@@ -250,12 +376,11 @@ namespace :htm do
|
|
|
250
376
|
puts "\n✓ No old nodes without tags"
|
|
251
377
|
end
|
|
252
378
|
|
|
253
|
-
HTM::
|
|
379
|
+
HTM::SequelConfig.disconnect!
|
|
254
380
|
end
|
|
255
381
|
|
|
256
382
|
desc "Clear all embeddings and tags (for testing/development)"
|
|
257
383
|
task :clear_all => :environment do
|
|
258
|
-
require 'htm'
|
|
259
384
|
|
|
260
385
|
print "This will clear ALL embeddings and tags. Are you sure? (yes/no): "
|
|
261
386
|
confirmation = $stdin.gets.chomp
|
|
@@ -265,18 +390,18 @@ namespace :htm do
|
|
|
265
390
|
exit 0
|
|
266
391
|
end
|
|
267
392
|
|
|
268
|
-
HTM::
|
|
393
|
+
HTM::SequelConfig.establish_connection!
|
|
269
394
|
|
|
270
395
|
puts "Clearing embeddings..."
|
|
271
|
-
HTM::Models::Node.
|
|
396
|
+
HTM::Models::Node.update(embedding: nil)
|
|
272
397
|
|
|
273
398
|
puts "Clearing tags..."
|
|
274
|
-
HTM::Models::NodeTag.
|
|
275
|
-
HTM::Models::Tag.
|
|
399
|
+
HTM::Models::NodeTag.dataset.delete
|
|
400
|
+
HTM::Models::Tag.dataset.delete
|
|
276
401
|
|
|
277
402
|
puts "Done! All embeddings and tags cleared."
|
|
278
403
|
|
|
279
|
-
HTM::
|
|
404
|
+
HTM::SequelConfig.disconnect!
|
|
280
405
|
end
|
|
281
406
|
|
|
282
407
|
# Helper methods
|
data/lib/tasks/tags.rake
CHANGED
|
@@ -12,10 +12,9 @@ namespace :htm do
|
|
|
12
12
|
namespace :tags do
|
|
13
13
|
desc "Display tags as a hierarchical tree (text format). Optional prefix filter."
|
|
14
14
|
task :tree, [:prefix] do |_t, args|
|
|
15
|
-
require 'htm'
|
|
16
15
|
|
|
17
16
|
# Ensure database connection
|
|
18
|
-
HTM::
|
|
17
|
+
HTM::SequelConfig.establish_connection!
|
|
19
18
|
|
|
20
19
|
tags = args[:prefix] ? HTM::Models::Tag.with_prefix(args[:prefix]) : HTM::Models::Tag.all
|
|
21
20
|
count = tags.count
|
|
@@ -34,10 +33,9 @@ namespace :htm do
|
|
|
34
33
|
|
|
35
34
|
desc "Export tags as Mermaid flowchart to tags.md. Optional prefix filter."
|
|
36
35
|
task :mermaid, [:prefix] do |_t, args|
|
|
37
|
-
require 'htm'
|
|
38
36
|
|
|
39
37
|
# Ensure database connection
|
|
40
|
-
HTM::
|
|
38
|
+
HTM::SequelConfig.establish_connection!
|
|
41
39
|
|
|
42
40
|
tags = args[:prefix] ? HTM::Models::Tag.with_prefix(args[:prefix]) : HTM::Models::Tag.all
|
|
43
41
|
count = tags.count
|
|
@@ -56,10 +54,9 @@ namespace :htm do
|
|
|
56
54
|
|
|
57
55
|
desc "Export tags as SVG visualization to tags.svg. Optional prefix filter."
|
|
58
56
|
task :svg, [:prefix] do |_t, args|
|
|
59
|
-
require 'htm'
|
|
60
57
|
|
|
61
58
|
# Ensure database connection
|
|
62
|
-
HTM::
|
|
59
|
+
HTM::SequelConfig.establish_connection!
|
|
63
60
|
|
|
64
61
|
tags = args[:prefix] ? HTM::Models::Tag.with_prefix(args[:prefix]) : HTM::Models::Tag.all
|
|
65
62
|
count = tags.count
|
|
@@ -79,10 +76,9 @@ namespace :htm do
|
|
|
79
76
|
|
|
80
77
|
desc "Rebuild all tags from node content. Clears existing tags and regenerates using LLM."
|
|
81
78
|
task :rebuild do
|
|
82
|
-
require 'htm'
|
|
83
79
|
|
|
84
80
|
# Ensure database connection
|
|
85
|
-
HTM::
|
|
81
|
+
HTM::SequelConfig.establish_connection!
|
|
86
82
|
|
|
87
83
|
# Node uses default_scope for active (non-deleted) nodes
|
|
88
84
|
node_count = HTM::Models::Node.count
|
|
@@ -107,11 +103,11 @@ namespace :htm do
|
|
|
107
103
|
puts "\nClearing existing tags..."
|
|
108
104
|
|
|
109
105
|
# Clear join table first (foreign key constraint)
|
|
110
|
-
deleted_associations = HTM::Models::NodeTag.
|
|
106
|
+
deleted_associations = HTM::Models::NodeTag.dataset.delete
|
|
111
107
|
puts " Deleted #{deleted_associations} node-tag associations"
|
|
112
108
|
|
|
113
109
|
# Clear tags table
|
|
114
|
-
deleted_tags = HTM::Models::Tag.
|
|
110
|
+
deleted_tags = HTM::Models::Tag.dataset.delete
|
|
115
111
|
puts " Deleted #{deleted_tags} tags"
|
|
116
112
|
|
|
117
113
|
puts "\nRegenerating tags for #{node_count} nodes..."
|
|
@@ -131,7 +127,7 @@ namespace :htm do
|
|
|
131
127
|
# Process each active node (default_scope excludes deleted)
|
|
132
128
|
errors = 0
|
|
133
129
|
|
|
134
|
-
HTM::Models::Node.
|
|
130
|
+
HTM::Models::Node.paged_each do |node|
|
|
135
131
|
begin
|
|
136
132
|
HTM::Jobs::GenerateTagsJob.perform(node_id: node.id)
|
|
137
133
|
rescue StandardError => e
|
|
@@ -157,10 +153,9 @@ namespace :htm do
|
|
|
157
153
|
|
|
158
154
|
desc "Export tags in all formats (tags.txt, tags.md, tags.svg). Optional prefix filter."
|
|
159
155
|
task :export, [:prefix] do |_t, args|
|
|
160
|
-
require 'htm'
|
|
161
156
|
|
|
162
157
|
# Ensure database connection
|
|
163
|
-
HTM::
|
|
158
|
+
HTM::SequelConfig.establish_connection!
|
|
164
159
|
|
|
165
160
|
tags = args[:prefix] ? HTM::Models::Tag.with_prefix(args[:prefix]) : HTM::Models::Tag.all
|
|
166
161
|
count = tags.count
|