htm 0.0.18 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +119 -1
  3. data/README.md +12 -0
  4. data/Rakefile +104 -18
  5. data/db/migrate/00001_enable_extensions.rb +9 -5
  6. data/db/migrate/00002_create_robots.rb +18 -6
  7. data/db/migrate/00003_create_file_sources.rb +30 -17
  8. data/db/migrate/00004_create_nodes.rb +60 -48
  9. data/db/migrate/00005_create_tags.rb +24 -12
  10. data/db/migrate/00006_create_node_tags.rb +28 -13
  11. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  12. data/db/schema.sql +17 -1
  13. data/db/seeds.rb +34 -34
  14. data/docs/api/embedding-service.md +140 -110
  15. data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
  16. data/docs/api/yard/HTM/Config.md +173 -0
  17. data/docs/api/yard/HTM/ConfigSection.md +28 -0
  18. data/docs/api/yard/HTM/Database.md +1 -1
  19. data/docs/api/yard/HTM/Railtie.md +2 -2
  20. data/docs/api/yard/HTM.md +0 -57
  21. data/docs/api/yard/index.csv +76 -61
  22. data/docs/api/yard-reference.md +2 -1
  23. data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
  24. data/docs/architecture/adrs/004-hive-mind.md +1 -1
  25. data/docs/architecture/adrs/008-robot-identification.md +1 -1
  26. data/docs/architecture/index.md +11 -9
  27. data/docs/architecture/overview.md +11 -7
  28. data/docs/assets/images/balanced-strategy-decay.svg +41 -0
  29. data/docs/assets/images/class-hierarchy.svg +1 -1
  30. data/docs/assets/images/eviction-priority.svg +43 -0
  31. data/docs/assets/images/exception-hierarchy.svg +2 -2
  32. data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
  33. data/docs/assets/images/htm-architecture-overview.svg +3 -3
  34. data/docs/assets/images/htm-core-components.svg +4 -4
  35. data/docs/assets/images/htm-layered-architecture.svg +1 -1
  36. data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
  37. data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
  38. data/docs/assets/images/memory-topology.svg +53 -0
  39. data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
  40. data/docs/database/naming-convention.md +244 -0
  41. data/docs/database_rake_tasks.md +31 -0
  42. data/docs/development/rake-tasks.md +80 -35
  43. data/docs/development/setup.md +76 -44
  44. data/docs/examples/basic-usage.md +133 -0
  45. data/docs/examples/config-files.md +170 -0
  46. data/docs/examples/file-loading.md +208 -0
  47. data/docs/examples/index.md +116 -0
  48. data/docs/examples/llm-configuration.md +168 -0
  49. data/docs/examples/mcp-client.md +172 -0
  50. data/docs/examples/rails-integration.md +173 -0
  51. data/docs/examples/robot-groups.md +210 -0
  52. data/docs/examples/sinatra-integration.md +218 -0
  53. data/docs/examples/standalone-app.md +216 -0
  54. data/docs/examples/telemetry.md +224 -0
  55. data/docs/examples/timeframes.md +143 -0
  56. data/docs/getting-started/installation.md +97 -40
  57. data/docs/getting-started/quick-start.md +28 -11
  58. data/docs/guides/configuration.md +515 -0
  59. data/docs/guides/file-loading.md +322 -0
  60. data/docs/guides/getting-started.md +40 -9
  61. data/docs/guides/index.md +3 -3
  62. data/docs/guides/mcp-server.md +100 -13
  63. data/docs/guides/propositions.md +264 -0
  64. data/docs/guides/recalling-memories.md +4 -4
  65. data/docs/guides/search-strategies.md +3 -3
  66. data/docs/guides/tags.md +318 -0
  67. data/docs/guides/telemetry.md +229 -0
  68. data/docs/index.md +8 -16
  69. data/docs/{architecture → robots}/hive-mind.md +8 -111
  70. data/docs/robots/index.md +73 -0
  71. data/docs/{guides → robots}/multi-robot.md +3 -3
  72. data/docs/{guides → robots}/robot-groups.md +8 -7
  73. data/docs/{architecture → robots}/two-tier-memory.md +13 -149
  74. data/docs/robots/why-robots.md +85 -0
  75. data/examples/.envrc +6 -0
  76. data/examples/.gitignore +2 -0
  77. data/examples/00_create_examples_db.rb +94 -0
  78. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  79. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  80. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  81. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  82. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  83. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  84. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  85. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  86. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  87. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  88. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  89. data/examples/11_robot_groups/README.md +335 -0
  90. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  91. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  92. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  93. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  94. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  95. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  96. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  97. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  98. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  99. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  100. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  101. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  102. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  103. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  104. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  105. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  106. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  107. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  108. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  109. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  110. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  111. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  112. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  113. data/examples/README.md +230 -211
  114. data/examples/examples_helper.rb +138 -0
  115. data/lib/htm/config/builder.rb +167 -0
  116. data/lib/htm/config/database.rb +317 -0
  117. data/lib/htm/config/defaults.yml +41 -13
  118. data/lib/htm/config/section.rb +74 -0
  119. data/lib/htm/config/validator.rb +83 -0
  120. data/lib/htm/config.rb +65 -361
  121. data/lib/htm/database.rb +85 -127
  122. data/lib/htm/errors.rb +14 -0
  123. data/lib/htm/integrations/sinatra.rb +13 -44
  124. data/lib/htm/job_adapter.rb +75 -1
  125. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  126. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  127. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  128. data/lib/htm/loaders/defaults_loader.rb +23 -0
  129. data/lib/htm/loaders/markdown_loader.rb +17 -15
  130. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  131. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  132. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  133. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  134. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  135. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  136. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  137. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  138. data/lib/htm/long_term_memory.rb +13 -13
  139. data/lib/htm/mcp/cli.rb +115 -8
  140. data/lib/htm/mcp/resources.rb +4 -3
  141. data/lib/htm/mcp/server.rb +5 -4
  142. data/lib/htm/mcp/tools.rb +37 -28
  143. data/lib/htm/migration.rb +72 -0
  144. data/lib/htm/models/file_source.rb +52 -31
  145. data/lib/htm/models/node.rb +224 -108
  146. data/lib/htm/models/node_tag.rb +49 -28
  147. data/lib/htm/models/robot.rb +38 -27
  148. data/lib/htm/models/robot_node.rb +63 -35
  149. data/lib/htm/models/tag.rb +126 -123
  150. data/lib/htm/observability.rb +45 -41
  151. data/lib/htm/proposition_service.rb +76 -7
  152. data/lib/htm/railtie.rb +2 -2
  153. data/lib/htm/robot_group.rb +30 -18
  154. data/lib/htm/sequel_config.rb +215 -0
  155. data/lib/htm/sql_builder.rb +14 -16
  156. data/lib/htm/tag_service.rb +78 -0
  157. data/lib/htm/tasks.rb +3 -0
  158. data/lib/htm/version.rb +1 -1
  159. data/lib/htm/workflows/remember_workflow.rb +213 -0
  160. data/lib/htm.rb +27 -22
  161. data/lib/tasks/db.rake +0 -2
  162. data/lib/tasks/doc.rake +2 -2
  163. data/lib/tasks/files.rake +11 -18
  164. data/lib/tasks/htm.rake +190 -62
  165. data/lib/tasks/jobs.rake +179 -54
  166. data/lib/tasks/tags.rake +8 -13
  167. data/mkdocs.yml +33 -8
  168. data/scripts/backfill_parent_tags.rb +376 -0
  169. data/scripts/normalize_plural_tags.rb +335 -0
  170. metadata +168 -86
  171. data/docs/api/yard/HTM/Configuration.md +0 -240
  172. data/docs/telemetry.md +0 -391
  173. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  174. data/examples/sinatra_app/Gemfile.lock +0 -166
  175. data/lib/htm/active_record_config.rb +0 -104
  176. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  177. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  178. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  179. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  180. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  181. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  182. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  183. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  184. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  185. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  186. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  187. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  188. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  189. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  190. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  191. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  192. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  193. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  194. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  195. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  196. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  197. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  198. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  199. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  200. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  201. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  202. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  203. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  204. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  205. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  206. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  207. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  208. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  209. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  210. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  211. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  212. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  213. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  214. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  215. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  216. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -0,0 +1,376 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Backfill Parent Tags
5
+ #
6
+ # This one-off script scans the existing tags table and creates missing
7
+ # parent tags for hierarchical tag names. It also ensures that nodes
8
+ # associated with child tags are also associated with all parent tags.
9
+ #
10
+ # Run with --help for usage information.
11
+
12
+ require 'optparse'
13
+ require 'ruby-progressbar'
14
+ require_relative '../lib/htm'
15
+
16
+ class ParentTagBackfill
17
+ VERSION = '0.0.2'
18
+
19
+ attr_reader :options, :stats
20
+
21
+ def self.run(argv = ARGV)
22
+ if argv.empty?
23
+ new(['--help']).run
24
+ exit 0
25
+ end
26
+ new(argv).run
27
+ end
28
+
29
+ def initialize(argv)
30
+ @options = {
31
+ dryrun: true,
32
+ verbose: false
33
+ }
34
+ @stats = {
35
+ tags_scanned: 0,
36
+ parent_tags_created: 0,
37
+ node_tags_created: 0,
38
+ cache_hits: 0,
39
+ errors: []
40
+ }
41
+ @tag_cache = {} # Cache for tags we've already found/created
42
+ parse_options(argv)
43
+ end
44
+
45
+ def run
46
+ print_header
47
+ return unless confirm_execution
48
+
49
+ HTM::Database.setup
50
+ process_tags
51
+ print_summary
52
+ end
53
+
54
+ private
55
+
56
+ def parse_options(argv)
57
+ parser = OptionParser.new do |opts|
58
+ opts.banner = usage_banner
59
+
60
+ opts.separator ""
61
+ opts.separator "Options:"
62
+
63
+ opts.on("--[no-]dryrun", "Dry run mode (default: true). Use --no-dryrun to apply changes.") do |v|
64
+ @options[:dryrun] = v
65
+ end
66
+
67
+ opts.on("-v", "--verbose", "Show detailed output for each tag processed") do
68
+ @options[:verbose] = true
69
+ end
70
+
71
+ opts.on("-h", "--help", "Show this help message") do
72
+ puts opts
73
+ exit 0
74
+ end
75
+
76
+ opts.on("--version", "Show version") do
77
+ puts "backfill_parent_tags v#{VERSION}"
78
+ exit 0
79
+ end
80
+
81
+ opts.separator ""
82
+ opts.separator "Examples:"
83
+ opts.separator " # Preview what would be done (default)"
84
+ opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/backfill_parent_tags.rb --dryrun"
85
+ opts.separator ""
86
+ opts.separator " # Preview with detailed output"
87
+ opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/backfill_parent_tags.rb --dryrun --verbose"
88
+ opts.separator ""
89
+ opts.separator " # Apply changes to database"
90
+ opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/backfill_parent_tags.rb --no-dryrun"
91
+ opts.separator ""
92
+ opts.separator "Environment Variables:"
93
+ opts.separator " HTM_DATABASE__URL PostgreSQL connection URL (required)"
94
+ opts.separator ""
95
+ end
96
+
97
+ remaining = parser.parse!(argv)
98
+
99
+ # Check for unexpected positional arguments
100
+ if remaining.any?
101
+ warn "\033[1;31mError: unexpected argument(s): #{remaining.join(', ')}\033[0m"
102
+ warn
103
+ puts parser
104
+ exit 1
105
+ end
106
+ rescue OptionParser::InvalidOption => e
107
+ warn "\033[1;31mError: #{e.message}\033[0m"
108
+ warn
109
+ puts parser
110
+ exit 1
111
+ end
112
+
113
+ def usage_banner
114
+ <<~BANNER
115
+ Usage: ruby scripts/backfill_parent_tags.rb [options]
116
+
117
+ Backfills missing parent tags for hierarchical tag names in the HTM database.
118
+
119
+ For a tag like "database:postgresql:extensions", this script:
120
+ 1. Creates parent tags: "database", "database:postgresql" (if missing)
121
+ 2. Associates nodes with all parent tags via node_tags records
122
+
123
+ By default, runs in dry-run mode (no changes made). Use --no-dryrun to apply.
124
+ BANNER
125
+ end
126
+
127
+ def print_header
128
+ puts "=" * 70
129
+ puts "Parent Tag Backfill Script v#{VERSION}"
130
+ puts "=" * 70
131
+ puts "Mode: #{options[:dryrun] ? 'DRY RUN (no changes will be made)' : 'LIVE (will modify database)'}"
132
+ puts "Verbose: #{options[:verbose] ? 'Yes' : 'No'}"
133
+ puts "Database: #{masked_database_url}"
134
+ puts "=" * 70
135
+ puts
136
+ end
137
+
138
+ def masked_database_url
139
+ HTM.config.database.url&.gsub(/:[^:@]+@/, ':***@') || '(not configured)'
140
+ end
141
+
142
+ def confirm_execution
143
+ return true if options[:dryrun]
144
+
145
+ puts "\033[1;33m⚠️ WARNING: This will modify the database!\033[0m"
146
+ puts
147
+ puts "This script will:"
148
+ puts " • Create new tag records for missing parent tags"
149
+ puts " • Create new node_tag records to associate nodes with parent tags"
150
+ puts
151
+ print "Are you sure you want to continue? [y/N] "
152
+
153
+ response = $stdin.gets&.strip&.downcase
154
+ unless response == 'y' || response == 'yes'
155
+ puts
156
+ puts "Aborted. No changes were made."
157
+ return false
158
+ end
159
+
160
+ puts
161
+ true
162
+ end
163
+
164
+ def process_tags
165
+ hierarchical_tags = HTM::Models::Tag.where(Sequel.like(:name, "%:%")).order(:name)
166
+ total_count = hierarchical_tags.count
167
+
168
+ puts "Found #{total_count} hierarchical tags to process"
169
+ puts
170
+
171
+ if total_count == 0
172
+ puts "No hierarchical tags found. Nothing to do."
173
+ return
174
+ end
175
+
176
+ progressbar = ProgressBar.create(
177
+ title: options[:dryrun] ? "Analyzing" : "Processing",
178
+ total: total_count,
179
+ format: "%t: |%B| %c/%C (%P%%) %e",
180
+ output: $stdout
181
+ )
182
+
183
+ hierarchical_tags.paged_each do |tag|
184
+ process_tag(tag)
185
+ progressbar.increment
186
+ end
187
+
188
+ puts
189
+ end
190
+
191
+ def process_tag(tag)
192
+ @stats[:tags_scanned] += 1
193
+
194
+ # Get parent names only (excludes the tag itself since it already exists)
195
+ parent_names = parent_tag_names(tag.name)
196
+ return if parent_names.empty?
197
+
198
+ log_verbose "Processing: #{tag.name}"
199
+ log_verbose " Parents needed: #{parent_names.join(', ')}"
200
+
201
+ # OPTIMIZATION: Batch lookup - find all existing parents in one query
202
+ parent_tags = find_or_create_parent_tags_batch(parent_names)
203
+
204
+ # Get nodes associated with this tag
205
+ node_ids = HTM::Models::NodeTag.where(tag_id: tag.id).select_map(:node_id)
206
+
207
+ if node_ids.any?
208
+ log_verbose " Nodes with this tag: #{node_ids.count}"
209
+
210
+ # Associate nodes with all parent tags
211
+ parent_tags.each do |parent_tag|
212
+ next unless parent_tag
213
+ create_missing_node_tags(parent_tag, node_ids)
214
+ end
215
+ end
216
+
217
+ log_verbose "" if options[:verbose]
218
+ end
219
+
220
+ # Extract parent tag names from a hierarchical tag
221
+ # For "a:b:c:d" returns ["a", "a:b", "a:b:c"] (excludes "a:b:c:d" since it already exists)
222
+ def parent_tag_names(tag_name)
223
+ levels = tag_name.split(':')
224
+ return [] if levels.size <= 1
225
+
226
+ # Generate all parent paths (exclusive of the full tag name)
227
+ (1...levels.size).map { |i| levels[0, i].join(':') }
228
+ end
229
+
230
+ # OPTIMIZATION: Find or create multiple parent tags with batched queries
231
+ def find_or_create_parent_tags_batch(names)
232
+ return [] if names.empty?
233
+
234
+ # Check cache first
235
+ uncached_names = names.reject { |name| @tag_cache.key?(name) }
236
+ cached_names = names - uncached_names
237
+
238
+ cached_names.each do |name|
239
+ @stats[:cache_hits] += 1
240
+ log_verbose " Tag '#{name}' (cached, id: #{@tag_cache[name]&.id || 'pending'})"
241
+ end
242
+
243
+ if uncached_names.any?
244
+ # Single query to find all existing tags
245
+ existing_tags = HTM::Models::Tag.where(name: uncached_names).index_by(&:name)
246
+
247
+ # Process each uncached name
248
+ uncached_names.each do |name|
249
+ if existing_tags[name]
250
+ @tag_cache[name] = existing_tags[name]
251
+ log_verbose " Tag '#{name}' already exists (id: #{existing_tags[name].id})"
252
+ else
253
+ # Tag doesn't exist - create it
254
+ @tag_cache[name] = create_parent_tag(name)
255
+ end
256
+ end
257
+ end
258
+
259
+ # Return tags in original order
260
+ names.map { |name| @tag_cache[name] }
261
+ end
262
+
263
+ def create_parent_tag(name)
264
+ if options[:dryrun]
265
+ log_verbose " [DRY RUN] Would create tag: '#{name}'"
266
+ @stats[:parent_tags_created] += 1
267
+ return nil
268
+ end
269
+
270
+ begin
271
+ tag = HTM::Models::Tag.create(name: name)
272
+ log_verbose " Created tag: '#{name}' (id: #{tag.id})"
273
+ @stats[:parent_tags_created] += 1
274
+ tag
275
+ rescue Sequel::ValidationFailed => e
276
+ error_msg = "Failed to create tag '#{name}': #{e.message}"
277
+ log_verbose " ERROR: #{error_msg}"
278
+ @stats[:errors] << error_msg
279
+ nil
280
+ rescue Sequel::UniqueConstraintViolation
281
+ # Race condition - tag was created by another process, fetch it
282
+ tag = HTM::Models::Tag.first(name: name)
283
+ log_verbose " Tag '#{name}' created by concurrent process (id: #{tag&.id})"
284
+ tag
285
+ end
286
+ end
287
+
288
+ def create_missing_node_tags(parent_tag, node_ids)
289
+ # Find which nodes are NOT already associated with this parent tag
290
+ existing_node_ids = HTM::Models::NodeTag
291
+ .where(tag_id: parent_tag.id, node_id: node_ids)
292
+ .select_map(:node_id)
293
+
294
+ missing_node_ids = node_ids - existing_node_ids
295
+ return if missing_node_ids.empty?
296
+
297
+ if options[:dryrun]
298
+ log_verbose " [DRY RUN] Would create #{missing_node_ids.count} node_tags for '#{parent_tag.name}'"
299
+ @stats[:node_tags_created] += missing_node_ids.count
300
+ return
301
+ end
302
+
303
+ # OPTIMIZATION: Batch insert node_tags
304
+ records = missing_node_ids.map do |node_id|
305
+ { node_id: node_id, tag_id: parent_tag.id }
306
+ end
307
+
308
+ begin
309
+ # Use multi_insert to batch insert (ignores duplicates)
310
+ HTM::Models::NodeTag.dataset.multi_insert(records)
311
+ created_count = records.size
312
+ @stats[:node_tags_created] += created_count
313
+ log_verbose " Created #{created_count} node_tags for '#{parent_tag.name}'" if created_count > 0
314
+ rescue Sequel::ValidationFailed => e
315
+ # Fallback to individual inserts if batch fails
316
+ created_count = 0
317
+ missing_node_ids.each do |node_id|
318
+ begin
319
+ HTM::Models::NodeTag.create(node_id: node_id, tag_id: parent_tag.id)
320
+ created_count += 1
321
+ @stats[:node_tags_created] += 1
322
+ rescue Sequel::UniqueConstraintViolation
323
+ # Already exists, skip
324
+ rescue Sequel::ValidationFailed => e
325
+ error_msg = "Failed to create node_tag (node: #{node_id}, tag: #{parent_tag.id}): #{e.message}"
326
+ log_verbose " ERROR: #{error_msg}"
327
+ @stats[:errors] << error_msg
328
+ end
329
+ end
330
+ log_verbose " Created #{created_count} node_tags for '#{parent_tag.name}' (fallback)" if created_count > 0
331
+ end
332
+ end
333
+
334
+ def log_verbose(message)
335
+ puts message if options[:verbose]
336
+ end
337
+
338
+ def print_summary
339
+ puts "=" * 70
340
+ puts "Summary"
341
+ puts "=" * 70
342
+ puts "Tags scanned: #{@stats[:tags_scanned]}"
343
+ puts "Parent tags created: #{@stats[:parent_tags_created]}"
344
+ puts "Node tags created: #{@stats[:node_tags_created]}"
345
+ puts "Cache hits: #{@stats[:cache_hits]}"
346
+
347
+ if @stats[:errors].any?
348
+ puts
349
+ puts "\033[1;31mErrors (#{@stats[:errors].count}):\033[0m"
350
+ @stats[:errors].first(10).each { |e| puts " • #{e}" }
351
+ puts " ... and #{@stats[:errors].count - 10} more" if @stats[:errors].count > 10
352
+ end
353
+
354
+ puts
355
+ if options[:dryrun]
356
+ puts "\033[1;36mThis was a DRY RUN. No changes were made.\033[0m"
357
+ puts "Run with --no-dryrun to apply changes."
358
+ else
359
+ puts "\033[1;32m✓ Backfill complete!\033[0m"
360
+ end
361
+ end
362
+ end
363
+
364
+ # Run the script
365
+ if __FILE__ == $PROGRAM_NAME
366
+ begin
367
+ ParentTagBackfill.run
368
+ rescue Interrupt
369
+ puts "\n\nAborted by user."
370
+ exit 130
371
+ rescue => e
372
+ warn "\033[1;31mFATAL ERROR: #{e.class.name} - #{e.message}\033[0m"
373
+ warn e.backtrace.first(10).join("\n") if ENV['DEBUG']
374
+ exit 1
375
+ end
376
+ end
@@ -0,0 +1,335 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Normalize Plural Tags
5
+ #
6
+ # This one-off script scans the existing tags table and normalizes
7
+ # plural tag level names to their singular forms. It merges plural
8
+ # tags into existing singular tags when both exist.
9
+ #
10
+ # Run with --help for usage information.
11
+
12
+ require 'optparse'
13
+ require 'ruby-progressbar'
14
+ require_relative '../lib/htm'
15
+
16
+ class PluralTagNormalizer
17
+ VERSION = '0.0.1'
18
+
19
+ attr_reader :options, :stats
20
+
21
+ def self.run(argv = ARGV)
22
+ if argv.empty?
23
+ new(['--help']).run
24
+ exit 0
25
+ end
26
+ new(argv).run
27
+ end
28
+
29
+ def initialize(argv)
30
+ @options = {
31
+ dryrun: true,
32
+ verbose: false
33
+ }
34
+ @stats = {
35
+ tags_scanned: 0,
36
+ plural_tags_found: 0,
37
+ tags_renamed: 0,
38
+ tags_merged: 0,
39
+ node_tags_reassigned: 0,
40
+ errors: []
41
+ }
42
+ parse_options(argv)
43
+ end
44
+
45
+ def run
46
+ print_header
47
+ return unless confirm_execution
48
+
49
+ HTM::Database.setup
50
+ process_tags
51
+ print_summary
52
+ end
53
+
54
+ private
55
+
56
+ def parse_options(argv)
57
+ parser = OptionParser.new do |opts|
58
+ opts.banner = usage_banner
59
+
60
+ opts.separator ""
61
+ opts.separator "Options:"
62
+
63
+ opts.on("--[no-]dryrun", "Dry run mode (default: true). Use --no-dryrun to apply changes.") do |v|
64
+ @options[:dryrun] = v
65
+ end
66
+
67
+ opts.on("-v", "--verbose", "Show detailed output for each tag processed") do
68
+ @options[:verbose] = true
69
+ end
70
+
71
+ opts.on("-h", "--help", "Show this help message") do
72
+ puts opts
73
+ exit 0
74
+ end
75
+
76
+ opts.on("--version", "Show version") do
77
+ puts "normalize_plural_tags v#{VERSION}"
78
+ exit 0
79
+ end
80
+
81
+ opts.separator ""
82
+ opts.separator "Examples:"
83
+ opts.separator " # Preview what would be done (default)"
84
+ opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/normalize_plural_tags.rb --dryrun"
85
+ opts.separator ""
86
+ opts.separator " # Preview with detailed output"
87
+ opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/normalize_plural_tags.rb --dryrun --verbose"
88
+ opts.separator ""
89
+ opts.separator " # Apply changes to database"
90
+ opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/normalize_plural_tags.rb --no-dryrun"
91
+ opts.separator ""
92
+ opts.separator "Environment Variables:"
93
+ opts.separator " HTM_DATABASE__URL PostgreSQL connection URL (required)"
94
+ opts.separator ""
95
+ end
96
+
97
+ remaining = parser.parse!(argv)
98
+
99
+ # Check for unexpected positional arguments
100
+ if remaining.any?
101
+ warn "\033[1;31mError: unexpected argument(s): #{remaining.join(', ')}\033[0m"
102
+ warn
103
+ puts parser
104
+ exit 1
105
+ end
106
+ rescue OptionParser::InvalidOption => e
107
+ warn "\033[1;31mError: #{e.message}\033[0m"
108
+ warn
109
+ puts parser
110
+ exit 1
111
+ end
112
+
113
+ def usage_banner
114
+ <<~BANNER
115
+ Usage: ruby scripts/normalize_plural_tags.rb [options]
116
+
117
+ Normalizes plural tag level names to singular forms in the HTM database.
118
+
119
+ For a tag like "users:frameworks:models", this script:
120
+ 1. Singularizes each level: "user:framework:model"
121
+ 2. If singular tag exists, merges node associations
122
+ 3. If singular tag doesn't exist, renames the plural tag
123
+
124
+ By default, runs in dry-run mode (no changes made). Use --no-dryrun to apply.
125
+ BANNER
126
+ end
127
+
128
+ def print_header
129
+ puts "=" * 70
130
+ puts "Plural Tag Normalizer v#{VERSION}"
131
+ puts "=" * 70
132
+ puts "Mode: #{options[:dryrun] ? 'DRY RUN (no changes will be made)' : 'LIVE (will modify database)'}"
133
+ puts "Verbose: #{options[:verbose] ? 'Yes' : 'No'}"
134
+ puts "Database: #{masked_database_url}"
135
+ puts "=" * 70
136
+ puts
137
+ end
138
+
139
+ def masked_database_url
140
+ HTM.config.database.url&.gsub(/:[^:@]+@/, ':***@') || '(not configured)'
141
+ end
142
+
143
+ def confirm_execution
144
+ return true if options[:dryrun]
145
+
146
+ puts "\033[1;33m⚠️ WARNING: This will modify the database!\033[0m"
147
+ puts
148
+ puts "This script will:"
149
+ puts " • Rename plural tags to singular forms"
150
+ puts " • Merge node associations when both plural and singular exist"
151
+ puts " • Delete redundant plural tags after merging"
152
+ puts
153
+ print "Are you sure you want to continue? [y/N] "
154
+
155
+ response = $stdin.gets&.strip&.downcase
156
+ unless response == 'y' || response == 'yes'
157
+ puts
158
+ puts "Aborted. No changes were made."
159
+ return false
160
+ end
161
+
162
+ puts
163
+ true
164
+ end
165
+
166
+ def process_tags
167
+ all_tags = HTM::Models::Tag.order(:name)
168
+ total_count = all_tags.count
169
+
170
+ puts "Found #{total_count} tags to scan"
171
+ puts
172
+
173
+ if total_count == 0
174
+ puts "No tags found. Nothing to do."
175
+ return
176
+ end
177
+
178
+ progressbar = ProgressBar.create(
179
+ title: options[:dryrun] ? "Analyzing" : "Processing",
180
+ total: total_count,
181
+ format: "%t: |%B| %c/%C (%P%%) %e",
182
+ output: $stdout
183
+ )
184
+
185
+ all_tags.paged_each do |tag|
186
+ process_tag(tag)
187
+ progressbar.increment
188
+ end
189
+
190
+ puts
191
+ end
192
+
193
+ def process_tag(tag)
194
+ @stats[:tags_scanned] += 1
195
+
196
+ # Singularize all levels of the tag
197
+ singular_name = singularize_tag(tag.name)
198
+
199
+ # If no change needed, skip
200
+ return if singular_name == tag.name
201
+
202
+ @stats[:plural_tags_found] += 1
203
+ log_verbose "Found plural tag: '#{tag.name}' -> '#{singular_name}'"
204
+
205
+ # Check if singular version already exists
206
+ existing_singular = HTM::Models::Tag.first(name: singular_name)
207
+
208
+ if existing_singular
209
+ # Merge: reassign node_tags from plural to singular, then delete plural
210
+ merge_tags(tag, existing_singular)
211
+ else
212
+ # Rename: just update the tag name
213
+ rename_tag(tag, singular_name)
214
+ end
215
+ end
216
+
217
+ def singularize_tag(tag_name)
218
+ # Use the TagService's singularization logic for consistency
219
+ HTM::TagService.singularize_tag_levels(tag_name)
220
+ end
221
+
222
+ def merge_tags(plural_tag, singular_tag)
223
+ log_verbose " Merging '#{plural_tag.name}' into '#{singular_tag.name}'"
224
+
225
+ # Get node IDs associated with plural tag
226
+ plural_node_ids = HTM::Models::NodeTag.where(tag_id: plural_tag.id).select_map(:node_id)
227
+
228
+ if plural_node_ids.empty?
229
+ log_verbose " No nodes to reassign"
230
+ else
231
+ # Find which nodes already have the singular tag
232
+ existing_node_ids = HTM::Models::NodeTag
233
+ .where(tag_id: singular_tag.id, node_id: plural_node_ids)
234
+ .select_map(:node_id)
235
+
236
+ new_node_ids = plural_node_ids - existing_node_ids
237
+
238
+ if options[:dryrun]
239
+ log_verbose " [DRY RUN] Would reassign #{new_node_ids.count} nodes from plural to singular"
240
+ log_verbose " [DRY RUN] Would delete #{existing_node_ids.count} duplicate node_tags"
241
+ log_verbose " [DRY RUN] Would delete plural tag '#{plural_tag.name}'"
242
+ @stats[:node_tags_reassigned] += new_node_ids.count
243
+ @stats[:tags_merged] += 1
244
+ else
245
+ begin
246
+ HTM.db.transaction do
247
+ # Reassign new nodes to singular tag
248
+ if new_node_ids.any?
249
+ HTM::Models::NodeTag.where(tag_id: plural_tag.id, node_id: new_node_ids)
250
+ .update(tag_id: singular_tag.id)
251
+ log_verbose " Reassigned #{new_node_ids.count} nodes to '#{singular_tag.name}'"
252
+ @stats[:node_tags_reassigned] += new_node_ids.count
253
+ end
254
+
255
+ # Delete duplicate node_tags (nodes that had both tags)
256
+ if existing_node_ids.any?
257
+ HTM::Models::NodeTag.where(tag_id: plural_tag.id, node_id: existing_node_ids).delete
258
+ log_verbose " Deleted #{existing_node_ids.count} duplicate node_tags"
259
+ end
260
+
261
+ # Delete the plural tag
262
+ plural_tag.destroy
263
+ log_verbose " Deleted plural tag '#{plural_tag.name}'"
264
+ @stats[:tags_merged] += 1
265
+ end
266
+ rescue Sequel::Error => e
267
+ error_msg = "Failed to merge '#{plural_tag.name}' into '#{singular_tag.name}': #{e.message}"
268
+ log_verbose " ERROR: #{error_msg}"
269
+ @stats[:errors] << error_msg
270
+ end
271
+ end
272
+ end
273
+ end
274
+
275
+ def rename_tag(tag, new_name)
276
+ if options[:dryrun]
277
+ log_verbose " [DRY RUN] Would rename '#{tag.name}' to '#{new_name}'"
278
+ @stats[:tags_renamed] += 1
279
+ else
280
+ begin
281
+ tag.update(name: new_name)
282
+ log_verbose " Renamed '#{tag.name}' to '#{new_name}'"
283
+ @stats[:tags_renamed] += 1
284
+ rescue Sequel::Error => e
285
+ error_msg = "Failed to rename '#{tag.name}' to '#{new_name}': #{e.message}"
286
+ log_verbose " ERROR: #{error_msg}"
287
+ @stats[:errors] << error_msg
288
+ end
289
+ end
290
+ end
291
+
292
+ def log_verbose(message)
293
+ puts message if options[:verbose]
294
+ end
295
+
296
+ def print_summary
297
+ puts "=" * 70
298
+ puts "Summary"
299
+ puts "=" * 70
300
+ puts "Tags scanned: #{@stats[:tags_scanned]}"
301
+ puts "Plural tags found: #{@stats[:plural_tags_found]}"
302
+ puts "Tags renamed: #{@stats[:tags_renamed]}"
303
+ puts "Tags merged: #{@stats[:tags_merged]}"
304
+ puts "Node tags reassigned: #{@stats[:node_tags_reassigned]}"
305
+
306
+ if @stats[:errors].any?
307
+ puts
308
+ puts "\033[1;31mErrors (#{@stats[:errors].count}):\033[0m"
309
+ @stats[:errors].first(10).each { |e| puts " • #{e}" }
310
+ puts " ... and #{@stats[:errors].count - 10} more" if @stats[:errors].count > 10
311
+ end
312
+
313
+ puts
314
+ if options[:dryrun]
315
+ puts "\033[1;36mThis was a DRY RUN. No changes were made.\033[0m"
316
+ puts "Run with --no-dryrun to apply changes."
317
+ else
318
+ puts "\033[1;32m✓ Normalization complete!\033[0m"
319
+ end
320
+ end
321
+ end
322
+
323
+ # Run the script
324
+ if __FILE__ == $PROGRAM_NAME
325
+ begin
326
+ PluralTagNormalizer.run
327
+ rescue Interrupt
328
+ puts "\n\nAborted by user."
329
+ exit 130
330
+ rescue => e
331
+ warn "\033[1;31mFATAL ERROR: #{e.class.name} - #{e.message}\033[0m"
332
+ warn e.backtrace.first(10).join("\n") if ENV['DEBUG']
333
+ exit 1
334
+ end
335
+ end