htm 0.0.20 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -0
  3. data/Rakefile +104 -18
  4. data/db/migrate/00001_enable_extensions.rb +9 -5
  5. data/db/migrate/00002_create_robots.rb +18 -6
  6. data/db/migrate/00003_create_file_sources.rb +30 -17
  7. data/db/migrate/00004_create_nodes.rb +60 -48
  8. data/db/migrate/00005_create_tags.rb +24 -12
  9. data/db/migrate/00006_create_node_tags.rb +28 -13
  10. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  11. data/db/schema.sql +17 -1
  12. data/db/seeds.rb +33 -33
  13. data/docs/database/naming-convention.md +244 -0
  14. data/docs/database_rake_tasks.md +31 -0
  15. data/docs/development/rake-tasks.md +80 -35
  16. data/docs/guides/mcp-server.md +70 -1
  17. data/examples/.envrc +6 -0
  18. data/examples/.gitignore +2 -0
  19. data/examples/00_create_examples_db.rb +94 -0
  20. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  21. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  22. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  23. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  24. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  25. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  26. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  27. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  28. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  29. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  30. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  31. data/examples/11_robot_groups/README.md +335 -0
  32. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  33. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  34. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  35. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  36. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  37. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  38. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  39. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  40. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  41. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  42. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  43. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  44. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  45. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  46. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  47. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  48. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  49. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  50. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  51. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  52. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  53. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  54. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  55. data/examples/README.md +230 -211
  56. data/examples/examples_helper.rb +138 -0
  57. data/lib/htm/config/builder.rb +167 -0
  58. data/lib/htm/config/database.rb +317 -0
  59. data/lib/htm/config/defaults.yml +37 -9
  60. data/lib/htm/config/section.rb +74 -0
  61. data/lib/htm/config/validator.rb +83 -0
  62. data/lib/htm/config.rb +64 -360
  63. data/lib/htm/database.rb +85 -127
  64. data/lib/htm/errors.rb +14 -0
  65. data/lib/htm/integrations/sinatra.rb +13 -44
  66. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  67. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  68. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  69. data/lib/htm/loaders/defaults_loader.rb +23 -0
  70. data/lib/htm/loaders/markdown_loader.rb +17 -15
  71. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  72. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  73. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  74. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  75. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  76. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  77. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  78. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  79. data/lib/htm/long_term_memory.rb +13 -13
  80. data/lib/htm/mcp/cli.rb +115 -8
  81. data/lib/htm/mcp/resources.rb +4 -3
  82. data/lib/htm/mcp/server.rb +5 -4
  83. data/lib/htm/mcp/tools.rb +37 -28
  84. data/lib/htm/migration.rb +72 -0
  85. data/lib/htm/models/file_source.rb +52 -31
  86. data/lib/htm/models/node.rb +224 -108
  87. data/lib/htm/models/node_tag.rb +49 -28
  88. data/lib/htm/models/robot.rb +38 -27
  89. data/lib/htm/models/robot_node.rb +63 -35
  90. data/lib/htm/models/tag.rb +126 -123
  91. data/lib/htm/observability.rb +45 -41
  92. data/lib/htm/proposition_service.rb +76 -7
  93. data/lib/htm/railtie.rb +2 -2
  94. data/lib/htm/robot_group.rb +30 -18
  95. data/lib/htm/sequel_config.rb +215 -0
  96. data/lib/htm/sql_builder.rb +14 -16
  97. data/lib/htm/tag_service.rb +78 -0
  98. data/lib/htm/tasks.rb +3 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/workflows/remember_workflow.rb +6 -5
  101. data/lib/htm.rb +26 -22
  102. data/lib/tasks/db.rake +0 -2
  103. data/lib/tasks/doc.rake +2 -2
  104. data/lib/tasks/files.rake +11 -18
  105. data/lib/tasks/htm.rake +190 -62
  106. data/lib/tasks/jobs.rake +179 -54
  107. data/lib/tasks/tags.rake +8 -13
  108. data/scripts/backfill_parent_tags.rb +376 -0
  109. data/scripts/normalize_plural_tags.rb +335 -0
  110. metadata +109 -80
  111. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  112. data/examples/sinatra_app/Gemfile.lock +0 -166
  113. data/lib/htm/active_record_config.rb +0 -104
  114. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  115. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  116. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  117. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  118. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  119. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  120. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  121. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  122. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  123. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  124. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  125. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  126. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  127. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  128. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  129. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  130. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  131. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  132. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  133. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  134. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  135. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  136. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  137. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  138. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  139. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  140. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  141. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  142. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  143. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  144. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  145. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  146. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  147. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  148. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  149. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  150. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  151. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  152. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  153. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  154. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
data/lib/tasks/htm.rake CHANGED
@@ -8,35 +8,29 @@
8
8
  # require 'htm/tasks'
9
9
  #
10
10
 
11
- # Add lib directory to load path for development
12
- # This allows the tasks to work both during gem development and when installed
13
- lib_path = File.expand_path('../../lib', __dir__)
14
- $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
15
-
16
11
  namespace :htm do
17
12
  namespace :db do
13
+ # Note: Database configuration validation (environment, URL/component reconciliation,
14
+ # naming convention) happens automatically when HTM is required above.
15
+
18
16
  desc "Set up HTM database schema and run migrations (set DUMP_SCHEMA=true to auto-dump schema after)"
19
17
  task :setup do
20
- require 'htm'
21
18
  dump_schema = ENV['DUMP_SCHEMA'] == 'true'
22
19
  HTM::Database.setup(dump_schema: dump_schema)
23
20
  end
24
21
 
25
22
  desc "Run pending database migrations"
26
23
  task :migrate do
27
- require 'htm'
28
24
  HTM::Database.migrate
29
25
  end
30
26
 
31
27
  desc "Show migration status"
32
28
  task :status do
33
- require 'htm'
34
29
  HTM::Database.migration_status
35
30
  end
36
31
 
37
32
  desc "Drop all HTM tables (WARNING: destructive! Set CONFIRM=yes to skip prompt)"
38
33
  task :drop do
39
- require 'htm'
40
34
  if ENV['CONFIRM'] == 'yes'
41
35
  HTM::Database.drop
42
36
  else
@@ -52,7 +46,6 @@ namespace :htm do
52
46
 
53
47
  desc "Drop and recreate database (WARNING: destructive! Set CONFIRM=yes to skip prompt)"
54
48
  task :reset do
55
- require 'htm'
56
49
  if ENV['CONFIRM'] == 'yes'
57
50
  HTM::Database.drop
58
51
  HTM::Database.setup(dump_schema: true)
@@ -70,9 +63,7 @@ namespace :htm do
70
63
 
71
64
  desc "Verify database connection (respects HTM_ENV/RAILS_ENV)"
72
65
  task :verify do
73
- require 'htm'
74
-
75
- config = HTM::ActiveRecordConfig.load_database_config
66
+ config = HTM::SequelConfig.load_database_config
76
67
 
77
68
  puts "Verifying HTM database connection (#{HTM.env})..."
78
69
  puts " Host: #{config[:host]}"
@@ -108,9 +99,7 @@ namespace :htm do
108
99
 
109
100
  desc "Open PostgreSQL console (respects HTM_ENV/RAILS_ENV)"
110
101
  task :console do
111
- require 'htm'
112
-
113
- config = HTM::ActiveRecordConfig.load_database_config
102
+ config = HTM::SequelConfig.load_database_config
114
103
 
115
104
  puts "Connecting to #{config[:database]} (#{HTM.env})..."
116
105
  exec "psql", "-h", config[:host],
@@ -121,22 +110,18 @@ namespace :htm do
121
110
 
122
111
  desc "Seed database with sample data"
123
112
  task :seed do
124
- require 'htm'
125
113
  HTM::Database.seed
126
114
  end
127
115
 
128
116
  desc "Show database info (size, tables, extensions)"
129
117
  task :info do
130
- require 'htm'
131
118
  HTM::Database.info
132
119
  end
133
120
 
134
121
  desc "Show record counts for all HTM tables"
135
122
  task :stats do
136
- require 'htm'
137
-
138
123
  # Ensure database connection
139
- HTM::ActiveRecordConfig.establish_connection!
124
+ HTM::SequelConfig.establish_connection!
140
125
 
141
126
  puts "\nHTM Database Statistics"
142
127
  puts "=" * 50
@@ -148,7 +133,7 @@ namespace :htm do
148
133
  # Node uses default_scope for active nodes, so m.count is active count
149
134
  active = m.count
150
135
  deleted = m.deleted.count
151
- with_embedding = m.where.not(embedding: nil).count
136
+ with_embedding = m.exclude(embedding: nil).count
152
137
  " (active: #{active}, deleted: #{deleted}, with embeddings: #{with_embedding})"
153
138
  }},
154
139
  { name: 'tags', model: HTM::Models::Tag },
@@ -175,11 +160,12 @@ namespace :htm do
175
160
 
176
161
  # Nodes per robot (via robot_nodes join table)
177
162
  robot_counts = HTM::Models::RobotNode
178
- .joins(:node)
179
- .where(nodes: { deleted_at: nil })
180
- .group(:robot_id)
181
- .count
182
- .transform_keys { |id| HTM::Models::Robot.find(id).name rescue "Unknown (#{id})" }
163
+ .join(:nodes, id: :node_id)
164
+ .where(Sequel[:nodes][:deleted_at] => nil)
165
+ .group_and_count(:robot_id)
166
+ .all
167
+ .to_h { |row| [row[:robot_id], row[:count]] }
168
+ .transform_keys { |id| HTM::Models::Robot[id]&.name || "Unknown (#{id})" }
183
169
  .sort_by { |_, count| -count }
184
170
  .first(5)
185
171
 
@@ -191,12 +177,13 @@ namespace :htm do
191
177
  end
192
178
 
193
179
  # Tag distribution
194
- top_root_tags = HTM::Models::Tag
195
- .select("split_part(name, ':', 1) as root, count(*) as cnt")
196
- .group("split_part(name, ':', 1)")
197
- .order("cnt DESC")
180
+ top_root_tags = HTM.db[:tags]
181
+ .select(Sequel.lit("split_part(name, ':', 1) as root"), Sequel.function(:count, Sequel.lit('*')).as(:cnt))
182
+ .group(Sequel.lit("split_part(name, ':', 1)"))
183
+ .order(Sequel.desc(:cnt))
198
184
  .limit(5)
199
- .map { |t| [t.root, t.cnt] }
185
+ .all
186
+ .map { |t| [t[:root], t[:cnt]] }
200
187
 
201
188
  if top_root_tags.any?
202
189
  puts " Top root tag categories:"
@@ -211,15 +198,14 @@ namespace :htm do
211
198
  namespace :rebuild do
212
199
  desc "Rebuild embeddings for all nodes. Clears existing embeddings and regenerates via LLM."
213
200
  task :embeddings do
214
- require 'htm'
215
201
  require 'ruby-progressbar'
216
202
 
217
203
  # Ensure database connection
218
- HTM::ActiveRecordConfig.establish_connection!
204
+ HTM::SequelConfig.establish_connection!
219
205
 
220
206
  # Node uses default_scope for active (non-deleted) nodes
221
207
  node_count = HTM::Models::Node.count
222
- with_embeddings = HTM::Models::Node.where.not(embedding: nil).count
208
+ with_embeddings = HTM::Models::Node.exclude(embedding: nil).count
223
209
  without_embeddings = node_count - with_embeddings
224
210
 
225
211
  puts "\nHTM Embeddings Rebuild"
@@ -239,7 +225,7 @@ namespace :htm do
239
225
  end
240
226
 
241
227
  puts "\nClearing existing embeddings..."
242
- cleared = HTM::Models::Node.where.not(embedding: nil).update_all(embedding: nil)
228
+ cleared = HTM::Models::Node.exclude(embedding: nil).update(embedding: nil)
243
229
  puts " Cleared #{cleared} embeddings"
244
230
 
245
231
  puts "\nGenerating embeddings for #{node_count} nodes..."
@@ -258,12 +244,12 @@ namespace :htm do
258
244
  errors = 0
259
245
  success = 0
260
246
 
261
- HTM::Models::Node.find_each do |node|
247
+ HTM::Models::Node.paged_each do |node|
262
248
  begin
263
249
  # Generate embedding directly (not via job since we cleared them)
264
250
  result = HTM::EmbeddingService.generate(node.content)
265
251
 
266
- node.update!(embedding: result[:storage_embedding])
252
+ node.update(embedding: result[:storage_embedding])
267
253
  success += 1
268
254
  rescue StandardError => e
269
255
  errors += 1
@@ -276,7 +262,7 @@ namespace :htm do
276
262
  progressbar.finish
277
263
 
278
264
  # Final stats
279
- final_with_embeddings = HTM::Models::Node.where.not(embedding: nil).count
265
+ final_with_embeddings = HTM::Models::Node.exclude(embedding: nil).count
280
266
 
281
267
  puts "\nRebuild complete!"
282
268
  puts " Nodes processed: #{node_count}"
@@ -287,11 +273,10 @@ namespace :htm do
287
273
 
288
274
  desc "Rebuild propositions for all non-proposition nodes. Extracts atomic facts and creates new nodes."
289
275
  task :propositions do
290
- require 'htm'
291
276
  require 'ruby-progressbar'
292
277
 
293
278
  # Ensure database connection
294
- HTM::ActiveRecordConfig.establish_connection!
279
+ HTM::SequelConfig.establish_connection!
295
280
 
296
281
  # Find all non-proposition nodes (nodes that haven't been extracted from)
297
282
  source_nodes = HTM::Models::Node.non_propositions
@@ -319,7 +304,7 @@ namespace :htm do
319
304
  # Delete existing proposition nodes
320
305
  if existing_propositions > 0
321
306
  puts "\nDeleting #{existing_propositions} existing proposition nodes..."
322
- deleted = HTM::Models::Node.propositions.delete_all
307
+ deleted = HTM::Models::Node.propositions.delete
323
308
  puts " Deleted #{deleted} proposition nodes"
324
309
  end
325
310
 
@@ -328,7 +313,7 @@ namespace :htm do
328
313
 
329
314
  # Get a robot ID for linking proposition nodes
330
315
  # Use the first robot or create a system robot
331
- robot = HTM::Models::Robot.first || HTM::Models::Robot.create!(name: 'proposition_rebuilder')
316
+ robot = HTM::Models::Robot.first || HTM::Models::Robot.create(name: 'proposition_rebuilder')
332
317
 
333
318
  # Create progress bar with ETA
334
319
  progressbar = ProgressBar.create(
@@ -344,7 +329,7 @@ namespace :htm do
344
329
  nodes_processed = 0
345
330
  propositions_created = 0
346
331
 
347
- source_nodes.find_each do |node|
332
+ source_nodes.paged_each do |node|
348
333
  begin
349
334
  # Extract propositions
350
335
  propositions = HTM::PropositionService.extract(node.content)
@@ -354,14 +339,14 @@ namespace :htm do
354
339
  token_count = HTM.count_tokens(proposition_text)
355
340
 
356
341
  # Create proposition node
357
- prop_node = HTM::Models::Node.create!(
342
+ prop_node = HTM::Models::Node.create(
358
343
  content: proposition_text,
359
344
  token_count: token_count,
360
345
  metadata: { is_proposition: true, source_node_id: node.id }
361
346
  )
362
347
 
363
348
  # Link to robot
364
- HTM::Models::RobotNode.find_or_create_by!(
349
+ HTM::Models::RobotNode.find_or_create(
365
350
  robot_id: robot.id,
366
351
  node_id: prop_node.id
367
352
  )
@@ -369,7 +354,7 @@ namespace :htm do
369
354
  # Generate embedding for proposition node
370
355
  begin
371
356
  result = HTM::EmbeddingService.generate(proposition_text)
372
- prop_node.update!(embedding: result[:storage_embedding])
357
+ prop_node.update(embedding: result[:storage_embedding])
373
358
  rescue StandardError => e
374
359
  progressbar.log " Warning: Embedding failed for proposition: #{e.message}"
375
360
  end
@@ -403,22 +388,18 @@ namespace :htm do
403
388
  namespace :schema do
404
389
  desc "Dump current schema to db/schema.sql"
405
390
  task :dump do
406
- require 'htm'
407
391
  HTM::Database.dump_schema
408
392
  end
409
393
 
410
394
  desc "Load schema from db/schema.sql"
411
395
  task :load do
412
- require 'htm'
413
396
  HTM::Database.load_schema
414
397
  end
415
398
  end
416
399
 
417
400
  desc "Create database if it doesn't exist (respects HTM_ENV/RAILS_ENV)"
418
401
  task :create do
419
- require 'htm'
420
-
421
- config = HTM::ActiveRecordConfig.load_database_config
402
+ config = HTM::SequelConfig.load_database_config
422
403
  db_name = config[:database]
423
404
 
424
405
  puts "Creating database: #{db_name} (#{HTM.env})"
@@ -472,18 +453,16 @@ namespace :htm do
472
453
  namespace :tags do
473
454
  desc "Soft delete orphaned tags and stale node_tags entries"
474
455
  task :cleanup do
475
- require 'htm'
476
-
477
456
  # Ensure database connection
478
- HTM::ActiveRecordConfig.establish_connection!
457
+ HTM::SequelConfig.establish_connection!
479
458
 
480
459
  puts "\nHTM Tag Cleanup"
481
460
  puts "=" * 50
482
461
 
483
462
  # Step 1: Find active node_tags pointing to soft-deleted or missing nodes
484
463
  stale_node_tags = HTM::Models::NodeTag
485
- .joins("LEFT JOIN nodes ON nodes.id = node_tags.node_id")
486
- .where("nodes.id IS NULL OR nodes.deleted_at IS NOT NULL")
464
+ .left_join(:nodes, id: :node_id)
465
+ .where(Sequel.lit("nodes.id IS NULL OR nodes.deleted_at IS NOT NULL"))
487
466
 
488
467
  stale_count = stale_node_tags.count
489
468
 
@@ -504,7 +483,7 @@ namespace :htm do
504
483
 
505
484
  if orphan_count > 0
506
485
  puts "\nOrphaned tags:"
507
- orphaned_tags.limit(20).pluck(:name).each do |name|
486
+ orphaned_tags.limit(20).select_map(:name).each do |name|
508
487
  puts " - #{name}"
509
488
  end
510
489
  puts " ... and #{orphan_count - 20} more" if orphan_count > 20
@@ -518,17 +497,17 @@ namespace :htm do
518
497
  next
519
498
  end
520
499
 
521
- now = Time.current
500
+ now = Time.now
522
501
 
523
502
  # Soft delete stale node_tags first
524
503
  if stale_count > 0
525
- soft_deleted_node_tags = stale_node_tags.update_all(deleted_at: now)
504
+ soft_deleted_node_tags = stale_node_tags.update(deleted_at: now)
526
505
  puts "\nSoft deleted #{soft_deleted_node_tags} stale node_tags entries."
527
506
  end
528
507
 
529
508
  # Then soft delete orphaned tags
530
509
  if orphan_count > 0
531
- soft_deleted_tags = orphaned_tags.update_all(deleted_at: now)
510
+ soft_deleted_tags = orphaned_tags.update(deleted_at: now)
532
511
  puts "Soft deleted #{soft_deleted_tags} orphaned tags."
533
512
  end
534
513
 
@@ -536,6 +515,156 @@ namespace :htm do
536
515
  end
537
516
  end
538
517
 
518
+ desc "Permanently delete all soft-deleted records from all tables (WARNING: irreversible!)"
519
+ task :purge_all do
520
+ # Ensure database connection
521
+ HTM::SequelConfig.establish_connection!
522
+
523
+ puts "\nHTM Purge All Soft-Deleted Records"
524
+ puts "=" * 60
525
+
526
+ # Count soft-deleted records in each table
527
+ deleted_nodes = HTM::Models::Node.deleted.count
528
+ deleted_node_tags = HTM::Models::NodeTag.deleted.count
529
+ deleted_robot_nodes = HTM::Models::RobotNode.deleted.count
530
+
531
+ # Find orphaned propositions (source_node_id no longer exists)
532
+ # Get all source_node_ids from propositions
533
+ proposition_source_ids = HTM::Models::Node
534
+ .where(Sequel.lit("metadata->>'is_proposition' = ?", 'true'))
535
+ .exclude(Sequel.lit("metadata->>'source_node_id' IS NULL"))
536
+ .select_map(Sequel.lit("(metadata->>'source_node_id')::integer"))
537
+ .uniq
538
+
539
+ # Find which source nodes no longer exist (not even soft-deleted)
540
+ existing_node_ids = HTM::Models::Node.with_deleted
541
+ .where(id: proposition_source_ids)
542
+ .select_map(:id)
543
+
544
+ missing_source_ids = proposition_source_ids - existing_node_ids
545
+
546
+ orphaned_propositions = if missing_source_ids.any?
547
+ HTM::Models::Node
548
+ .where(Sequel.lit("metadata->>'is_proposition' = ?", 'true'))
549
+ .where(Sequel.lit("(metadata->>'source_node_id')::integer") => missing_source_ids)
550
+ .count
551
+ else
552
+ 0
553
+ end
554
+
555
+ # Find orphaned join table entries (pointing to non-existent nodes)
556
+ orphaned_node_tags = HTM::Models::NodeTag.with_deleted
557
+ .left_join(:nodes, id: :node_id)
558
+ .where(Sequel[:nodes][:id] => nil)
559
+ .count
560
+
561
+ orphaned_robot_nodes = HTM::Models::RobotNode.with_deleted
562
+ .left_join(:nodes, id: :node_id)
563
+ .where(Sequel[:nodes][:id] => nil)
564
+ .count
565
+
566
+ # Find orphaned robots (no active memory nodes)
567
+ orphaned_robots = HTM::Models::Robot
568
+ .where(Sequel.~(Sequel.exists(
569
+ HTM::Models::RobotNode.where(Sequel[:robot_nodes][:robot_id] => Sequel[:robots][:id]).select(1)
570
+ )))
571
+ .count
572
+
573
+ # Display record counts by table
574
+ puts "\nSoft-deleted records by table:"
575
+ puts " %-20s %8d" % ['nodes', deleted_nodes]
576
+ puts " %-20s %8d" % ['node_tags', deleted_node_tags]
577
+ puts " %-20s %8d" % ['robot_nodes', deleted_robot_nodes]
578
+
579
+ puts "\nOrphaned records:"
580
+ puts " %-20s %8d (source node no longer exists)" % ['propositions', orphaned_propositions]
581
+ puts " %-20s %8d (pointing to missing nodes)" % ['node_tags', orphaned_node_tags]
582
+ puts " %-20s %8d (pointing to missing nodes)" % ['robot_nodes', orphaned_robot_nodes]
583
+ puts " %-20s %8d (no associated memory nodes)" % ['robots', orphaned_robots]
584
+
585
+ total_to_delete = deleted_nodes + deleted_node_tags + deleted_robot_nodes +
586
+ orphaned_propositions + orphaned_node_tags + orphaned_robot_nodes + orphaned_robots
587
+
588
+ puts " " + "-" * 40
589
+ puts " %-20s %8d" % ['Total', total_to_delete]
590
+
591
+ if total_to_delete == 0
592
+ puts "\nNo records to purge."
593
+ next
594
+ end
595
+
596
+ puts "\nWARNING: This permanently deletes records and cannot be undone!"
597
+ print "Type 'yes' to continue with hard delete: "
598
+ confirmation = $stdin.gets&.strip
599
+
600
+ unless confirmation == 'yes'
601
+ puts "Cancelled."
602
+ next
603
+ end
604
+
605
+ puts "\nPurging records..."
606
+
607
+ purged = {}
608
+
609
+ # Delete in correct order to maintain referential integrity:
610
+ # 1. Orphaned propositions first (creates orphaned join table entries)
611
+ # 2. Join tables (node_tags, robot_nodes)
612
+ # 3. Main tables last (nodes, robots)
613
+
614
+ # Step 1: Delete orphaned propositions (source_node_id no longer exists)
615
+ if missing_source_ids.any?
616
+ purged[:orphaned_propositions] = HTM::Models::Node
617
+ .where(Sequel.lit("metadata->>'is_proposition' = ?", 'true'))
618
+ .where(Sequel.lit("(metadata->>'source_node_id')::integer") => missing_source_ids)
619
+ .delete
620
+ else
621
+ purged[:orphaned_propositions] = 0
622
+ end
623
+
624
+ # Step 2: Delete orphaned node_tags (pointing to non-existent nodes)
625
+ # This now includes entries from deleted propositions
626
+ orphaned_nt_ids = HTM::Models::NodeTag.with_deleted
627
+ .left_join(:nodes, id: :node_id)
628
+ .where(Sequel[:nodes][:id] => nil)
629
+ .select_map(Sequel[:node_tags][:id])
630
+ purged[:orphaned_node_tags] = HTM::Models::NodeTag.with_deleted.where(id: orphaned_nt_ids).delete
631
+
632
+ # Step 3: Delete soft-deleted node_tags
633
+ purged[:deleted_node_tags] = HTM::Models::NodeTag.deleted.delete
634
+
635
+ # Step 4: Delete orphaned robot_nodes (pointing to non-existent nodes)
636
+ # This now includes entries from deleted propositions
637
+ orphaned_rn_ids = HTM::Models::RobotNode.with_deleted
638
+ .left_join(:nodes, id: :node_id)
639
+ .where(Sequel[:nodes][:id] => nil)
640
+ .select_map(Sequel[:robot_nodes][:id])
641
+ purged[:orphaned_robot_nodes] = HTM::Models::RobotNode.with_deleted.where(id: orphaned_rn_ids).delete
642
+
643
+ # Step 5: Delete soft-deleted robot_nodes
644
+ purged[:deleted_robot_nodes] = HTM::Models::RobotNode.deleted.delete
645
+
646
+ # Step 6: Delete soft-deleted nodes
647
+ purged[:deleted_nodes] = HTM::Models::Node.deleted.delete
648
+
649
+ # Step 7: Delete orphaned robots (no associated memory nodes)
650
+ purged[:orphaned_robots] = HTM::Models::Robot
651
+ .where(Sequel.~(Sequel.exists(
652
+ HTM::Models::RobotNode.where(Sequel[:robot_nodes][:robot_id] => Sequel[:robots][:id]).select(1)
653
+ )))
654
+ .delete
655
+
656
+ puts "\nPurge complete!"
657
+ puts " Orphaned propositions purged: #{purged[:orphaned_propositions]}"
658
+ puts " Orphaned node_tags purged: #{purged[:orphaned_node_tags]}"
659
+ puts " Deleted node_tags purged: #{purged[:deleted_node_tags]}"
660
+ puts " Orphaned robot_nodes purged: #{purged[:orphaned_robot_nodes]}"
661
+ puts " Deleted robot_nodes purged: #{purged[:deleted_robot_nodes]}"
662
+ puts " Deleted nodes purged: #{purged[:deleted_nodes]}"
663
+ puts " Orphaned robots purged: #{purged[:orphaned_robots]}"
664
+ puts " " + "-" * 40
665
+ puts " Total records purged: #{purged.values.sum}"
666
+ end
667
+
539
668
  end
540
669
 
541
670
  namespace :doc do
@@ -546,7 +675,6 @@ namespace :htm do
546
675
  puts "Install it with: brew install tbls"
547
676
  exit 1
548
677
  end
549
- require 'htm'
550
678
  HTM::Database.generate_docs
551
679
  end
552
680