htm 0.0.20 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +33 -33
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/guides/mcp-server.md +70 -1
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +37 -9
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +64 -360
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +6 -5
- data/lib/htm.rb +26 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +109 -80
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Backfill Parent Tags
|
|
5
|
+
#
|
|
6
|
+
# This one-off script scans the existing tags table and creates missing
|
|
7
|
+
# parent tags for hierarchical tag names. It also ensures that nodes
|
|
8
|
+
# associated with child tags are also associated with all parent tags.
|
|
9
|
+
#
|
|
10
|
+
# Run with --help for usage information.
|
|
11
|
+
|
|
12
|
+
require 'optparse'
|
|
13
|
+
require 'ruby-progressbar'
|
|
14
|
+
require_relative '../lib/htm'
|
|
15
|
+
|
|
16
|
+
class ParentTagBackfill
|
|
17
|
+
VERSION = '0.0.2'
|
|
18
|
+
|
|
19
|
+
attr_reader :options, :stats
|
|
20
|
+
|
|
21
|
+
def self.run(argv = ARGV)
|
|
22
|
+
if argv.empty?
|
|
23
|
+
new(['--help']).run
|
|
24
|
+
exit 0
|
|
25
|
+
end
|
|
26
|
+
new(argv).run
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def initialize(argv)
|
|
30
|
+
@options = {
|
|
31
|
+
dryrun: true,
|
|
32
|
+
verbose: false
|
|
33
|
+
}
|
|
34
|
+
@stats = {
|
|
35
|
+
tags_scanned: 0,
|
|
36
|
+
parent_tags_created: 0,
|
|
37
|
+
node_tags_created: 0,
|
|
38
|
+
cache_hits: 0,
|
|
39
|
+
errors: []
|
|
40
|
+
}
|
|
41
|
+
@tag_cache = {} # Cache for tags we've already found/created
|
|
42
|
+
parse_options(argv)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def run
|
|
46
|
+
print_header
|
|
47
|
+
return unless confirm_execution
|
|
48
|
+
|
|
49
|
+
HTM::Database.setup
|
|
50
|
+
process_tags
|
|
51
|
+
print_summary
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def parse_options(argv)
|
|
57
|
+
parser = OptionParser.new do |opts|
|
|
58
|
+
opts.banner = usage_banner
|
|
59
|
+
|
|
60
|
+
opts.separator ""
|
|
61
|
+
opts.separator "Options:"
|
|
62
|
+
|
|
63
|
+
opts.on("--[no-]dryrun", "Dry run mode (default: true). Use --no-dryrun to apply changes.") do |v|
|
|
64
|
+
@options[:dryrun] = v
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
opts.on("-v", "--verbose", "Show detailed output for each tag processed") do
|
|
68
|
+
@options[:verbose] = true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
opts.on("-h", "--help", "Show this help message") do
|
|
72
|
+
puts opts
|
|
73
|
+
exit 0
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
opts.on("--version", "Show version") do
|
|
77
|
+
puts "backfill_parent_tags v#{VERSION}"
|
|
78
|
+
exit 0
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
opts.separator ""
|
|
82
|
+
opts.separator "Examples:"
|
|
83
|
+
opts.separator " # Preview what would be done (default)"
|
|
84
|
+
opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/backfill_parent_tags.rb --dryrun"
|
|
85
|
+
opts.separator ""
|
|
86
|
+
opts.separator " # Preview with detailed output"
|
|
87
|
+
opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/backfill_parent_tags.rb --dryrun --verbose"
|
|
88
|
+
opts.separator ""
|
|
89
|
+
opts.separator " # Apply changes to database"
|
|
90
|
+
opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/backfill_parent_tags.rb --no-dryrun"
|
|
91
|
+
opts.separator ""
|
|
92
|
+
opts.separator "Environment Variables:"
|
|
93
|
+
opts.separator " HTM_DATABASE__URL PostgreSQL connection URL (required)"
|
|
94
|
+
opts.separator ""
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
remaining = parser.parse!(argv)
|
|
98
|
+
|
|
99
|
+
# Check for unexpected positional arguments
|
|
100
|
+
if remaining.any?
|
|
101
|
+
warn "\033[1;31mError: unexpected argument(s): #{remaining.join(', ')}\033[0m"
|
|
102
|
+
warn
|
|
103
|
+
puts parser
|
|
104
|
+
exit 1
|
|
105
|
+
end
|
|
106
|
+
rescue OptionParser::InvalidOption => e
|
|
107
|
+
warn "\033[1;31mError: #{e.message}\033[0m"
|
|
108
|
+
warn
|
|
109
|
+
puts parser
|
|
110
|
+
exit 1
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def usage_banner
|
|
114
|
+
<<~BANNER
|
|
115
|
+
Usage: ruby scripts/backfill_parent_tags.rb [options]
|
|
116
|
+
|
|
117
|
+
Backfills missing parent tags for hierarchical tag names in the HTM database.
|
|
118
|
+
|
|
119
|
+
For a tag like "database:postgresql:extensions", this script:
|
|
120
|
+
1. Creates parent tags: "database", "database:postgresql" (if missing)
|
|
121
|
+
2. Associates nodes with all parent tags via node_tags records
|
|
122
|
+
|
|
123
|
+
By default, runs in dry-run mode (no changes made). Use --no-dryrun to apply.
|
|
124
|
+
BANNER
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def print_header
|
|
128
|
+
puts "=" * 70
|
|
129
|
+
puts "Parent Tag Backfill Script v#{VERSION}"
|
|
130
|
+
puts "=" * 70
|
|
131
|
+
puts "Mode: #{options[:dryrun] ? 'DRY RUN (no changes will be made)' : 'LIVE (will modify database)'}"
|
|
132
|
+
puts "Verbose: #{options[:verbose] ? 'Yes' : 'No'}"
|
|
133
|
+
puts "Database: #{masked_database_url}"
|
|
134
|
+
puts "=" * 70
|
|
135
|
+
puts
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def masked_database_url
|
|
139
|
+
HTM.config.database.url&.gsub(/:[^:@]+@/, ':***@') || '(not configured)'
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def confirm_execution
|
|
143
|
+
return true if options[:dryrun]
|
|
144
|
+
|
|
145
|
+
puts "\033[1;33m⚠️ WARNING: This will modify the database!\033[0m"
|
|
146
|
+
puts
|
|
147
|
+
puts "This script will:"
|
|
148
|
+
puts " • Create new tag records for missing parent tags"
|
|
149
|
+
puts " • Create new node_tag records to associate nodes with parent tags"
|
|
150
|
+
puts
|
|
151
|
+
print "Are you sure you want to continue? [y/N] "
|
|
152
|
+
|
|
153
|
+
response = $stdin.gets&.strip&.downcase
|
|
154
|
+
unless response == 'y' || response == 'yes'
|
|
155
|
+
puts
|
|
156
|
+
puts "Aborted. No changes were made."
|
|
157
|
+
return false
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
puts
|
|
161
|
+
true
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def process_tags
|
|
165
|
+
hierarchical_tags = HTM::Models::Tag.where(Sequel.like(:name, "%:%")).order(:name)
|
|
166
|
+
total_count = hierarchical_tags.count
|
|
167
|
+
|
|
168
|
+
puts "Found #{total_count} hierarchical tags to process"
|
|
169
|
+
puts
|
|
170
|
+
|
|
171
|
+
if total_count == 0
|
|
172
|
+
puts "No hierarchical tags found. Nothing to do."
|
|
173
|
+
return
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
progressbar = ProgressBar.create(
|
|
177
|
+
title: options[:dryrun] ? "Analyzing" : "Processing",
|
|
178
|
+
total: total_count,
|
|
179
|
+
format: "%t: |%B| %c/%C (%P%%) %e",
|
|
180
|
+
output: $stdout
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
hierarchical_tags.paged_each do |tag|
|
|
184
|
+
process_tag(tag)
|
|
185
|
+
progressbar.increment
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
puts
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def process_tag(tag)
|
|
192
|
+
@stats[:tags_scanned] += 1
|
|
193
|
+
|
|
194
|
+
# Get parent names only (excludes the tag itself since it already exists)
|
|
195
|
+
parent_names = parent_tag_names(tag.name)
|
|
196
|
+
return if parent_names.empty?
|
|
197
|
+
|
|
198
|
+
log_verbose "Processing: #{tag.name}"
|
|
199
|
+
log_verbose " Parents needed: #{parent_names.join(', ')}"
|
|
200
|
+
|
|
201
|
+
# OPTIMIZATION: Batch lookup - find all existing parents in one query
|
|
202
|
+
parent_tags = find_or_create_parent_tags_batch(parent_names)
|
|
203
|
+
|
|
204
|
+
# Get nodes associated with this tag
|
|
205
|
+
node_ids = HTM::Models::NodeTag.where(tag_id: tag.id).select_map(:node_id)
|
|
206
|
+
|
|
207
|
+
if node_ids.any?
|
|
208
|
+
log_verbose " Nodes with this tag: #{node_ids.count}"
|
|
209
|
+
|
|
210
|
+
# Associate nodes with all parent tags
|
|
211
|
+
parent_tags.each do |parent_tag|
|
|
212
|
+
next unless parent_tag
|
|
213
|
+
create_missing_node_tags(parent_tag, node_ids)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
log_verbose "" if options[:verbose]
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Extract parent tag names from a hierarchical tag
|
|
221
|
+
# For "a:b:c:d" returns ["a", "a:b", "a:b:c"] (excludes "a:b:c:d" since it already exists)
|
|
222
|
+
def parent_tag_names(tag_name)
|
|
223
|
+
levels = tag_name.split(':')
|
|
224
|
+
return [] if levels.size <= 1
|
|
225
|
+
|
|
226
|
+
# Generate all parent paths (exclusive of the full tag name)
|
|
227
|
+
(1...levels.size).map { |i| levels[0, i].join(':') }
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# OPTIMIZATION: Find or create multiple parent tags with batched queries
|
|
231
|
+
def find_or_create_parent_tags_batch(names)
|
|
232
|
+
return [] if names.empty?
|
|
233
|
+
|
|
234
|
+
# Check cache first
|
|
235
|
+
uncached_names = names.reject { |name| @tag_cache.key?(name) }
|
|
236
|
+
cached_names = names - uncached_names
|
|
237
|
+
|
|
238
|
+
cached_names.each do |name|
|
|
239
|
+
@stats[:cache_hits] += 1
|
|
240
|
+
log_verbose " Tag '#{name}' (cached, id: #{@tag_cache[name]&.id || 'pending'})"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
if uncached_names.any?
|
|
244
|
+
# Single query to find all existing tags
|
|
245
|
+
existing_tags = HTM::Models::Tag.where(name: uncached_names).index_by(&:name)
|
|
246
|
+
|
|
247
|
+
# Process each uncached name
|
|
248
|
+
uncached_names.each do |name|
|
|
249
|
+
if existing_tags[name]
|
|
250
|
+
@tag_cache[name] = existing_tags[name]
|
|
251
|
+
log_verbose " Tag '#{name}' already exists (id: #{existing_tags[name].id})"
|
|
252
|
+
else
|
|
253
|
+
# Tag doesn't exist - create it
|
|
254
|
+
@tag_cache[name] = create_parent_tag(name)
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Return tags in original order
|
|
260
|
+
names.map { |name| @tag_cache[name] }
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def create_parent_tag(name)
|
|
264
|
+
if options[:dryrun]
|
|
265
|
+
log_verbose " [DRY RUN] Would create tag: '#{name}'"
|
|
266
|
+
@stats[:parent_tags_created] += 1
|
|
267
|
+
return nil
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
begin
|
|
271
|
+
tag = HTM::Models::Tag.create(name: name)
|
|
272
|
+
log_verbose " Created tag: '#{name}' (id: #{tag.id})"
|
|
273
|
+
@stats[:parent_tags_created] += 1
|
|
274
|
+
tag
|
|
275
|
+
rescue Sequel::ValidationFailed => e
|
|
276
|
+
error_msg = "Failed to create tag '#{name}': #{e.message}"
|
|
277
|
+
log_verbose " ERROR: #{error_msg}"
|
|
278
|
+
@stats[:errors] << error_msg
|
|
279
|
+
nil
|
|
280
|
+
rescue Sequel::UniqueConstraintViolation
|
|
281
|
+
# Race condition - tag was created by another process, fetch it
|
|
282
|
+
tag = HTM::Models::Tag.first(name: name)
|
|
283
|
+
log_verbose " Tag '#{name}' created by concurrent process (id: #{tag&.id})"
|
|
284
|
+
tag
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def create_missing_node_tags(parent_tag, node_ids)
|
|
289
|
+
# Find which nodes are NOT already associated with this parent tag
|
|
290
|
+
existing_node_ids = HTM::Models::NodeTag
|
|
291
|
+
.where(tag_id: parent_tag.id, node_id: node_ids)
|
|
292
|
+
.select_map(:node_id)
|
|
293
|
+
|
|
294
|
+
missing_node_ids = node_ids - existing_node_ids
|
|
295
|
+
return if missing_node_ids.empty?
|
|
296
|
+
|
|
297
|
+
if options[:dryrun]
|
|
298
|
+
log_verbose " [DRY RUN] Would create #{missing_node_ids.count} node_tags for '#{parent_tag.name}'"
|
|
299
|
+
@stats[:node_tags_created] += missing_node_ids.count
|
|
300
|
+
return
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# OPTIMIZATION: Batch insert node_tags
|
|
304
|
+
records = missing_node_ids.map do |node_id|
|
|
305
|
+
{ node_id: node_id, tag_id: parent_tag.id }
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
begin
|
|
309
|
+
# Use multi_insert to batch insert (ignores duplicates)
|
|
310
|
+
HTM::Models::NodeTag.dataset.multi_insert(records)
|
|
311
|
+
created_count = records.size
|
|
312
|
+
@stats[:node_tags_created] += created_count
|
|
313
|
+
log_verbose " Created #{created_count} node_tags for '#{parent_tag.name}'" if created_count > 0
|
|
314
|
+
rescue Sequel::ValidationFailed => e
|
|
315
|
+
# Fallback to individual inserts if batch fails
|
|
316
|
+
created_count = 0
|
|
317
|
+
missing_node_ids.each do |node_id|
|
|
318
|
+
begin
|
|
319
|
+
HTM::Models::NodeTag.create(node_id: node_id, tag_id: parent_tag.id)
|
|
320
|
+
created_count += 1
|
|
321
|
+
@stats[:node_tags_created] += 1
|
|
322
|
+
rescue Sequel::UniqueConstraintViolation
|
|
323
|
+
# Already exists, skip
|
|
324
|
+
rescue Sequel::ValidationFailed => e
|
|
325
|
+
error_msg = "Failed to create node_tag (node: #{node_id}, tag: #{parent_tag.id}): #{e.message}"
|
|
326
|
+
log_verbose " ERROR: #{error_msg}"
|
|
327
|
+
@stats[:errors] << error_msg
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
log_verbose " Created #{created_count} node_tags for '#{parent_tag.name}' (fallback)" if created_count > 0
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def log_verbose(message)
|
|
335
|
+
puts message if options[:verbose]
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def print_summary
|
|
339
|
+
puts "=" * 70
|
|
340
|
+
puts "Summary"
|
|
341
|
+
puts "=" * 70
|
|
342
|
+
puts "Tags scanned: #{@stats[:tags_scanned]}"
|
|
343
|
+
puts "Parent tags created: #{@stats[:parent_tags_created]}"
|
|
344
|
+
puts "Node tags created: #{@stats[:node_tags_created]}"
|
|
345
|
+
puts "Cache hits: #{@stats[:cache_hits]}"
|
|
346
|
+
|
|
347
|
+
if @stats[:errors].any?
|
|
348
|
+
puts
|
|
349
|
+
puts "\033[1;31mErrors (#{@stats[:errors].count}):\033[0m"
|
|
350
|
+
@stats[:errors].first(10).each { |e| puts " • #{e}" }
|
|
351
|
+
puts " ... and #{@stats[:errors].count - 10} more" if @stats[:errors].count > 10
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
puts
|
|
355
|
+
if options[:dryrun]
|
|
356
|
+
puts "\033[1;36mThis was a DRY RUN. No changes were made.\033[0m"
|
|
357
|
+
puts "Run with --no-dryrun to apply changes."
|
|
358
|
+
else
|
|
359
|
+
puts "\033[1;32m✓ Backfill complete!\033[0m"
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Run the script
|
|
365
|
+
if __FILE__ == $PROGRAM_NAME
|
|
366
|
+
begin
|
|
367
|
+
ParentTagBackfill.run
|
|
368
|
+
rescue Interrupt
|
|
369
|
+
puts "\n\nAborted by user."
|
|
370
|
+
exit 130
|
|
371
|
+
rescue => e
|
|
372
|
+
warn "\033[1;31mFATAL ERROR: #{e.class.name} - #{e.message}\033[0m"
|
|
373
|
+
warn e.backtrace.first(10).join("\n") if ENV['DEBUG']
|
|
374
|
+
exit 1
|
|
375
|
+
end
|
|
376
|
+
end
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Normalize Plural Tags
|
|
5
|
+
#
|
|
6
|
+
# This one-off script scans the existing tags table and normalizes
|
|
7
|
+
# plural tag level names to their singular forms. It merges plural
|
|
8
|
+
# tags into existing singular tags when both exist.
|
|
9
|
+
#
|
|
10
|
+
# Run with --help for usage information.
|
|
11
|
+
|
|
12
|
+
require 'optparse'
|
|
13
|
+
require 'ruby-progressbar'
|
|
14
|
+
require_relative '../lib/htm'
|
|
15
|
+
|
|
16
|
+
class PluralTagNormalizer
|
|
17
|
+
VERSION = '0.0.1'
|
|
18
|
+
|
|
19
|
+
attr_reader :options, :stats
|
|
20
|
+
|
|
21
|
+
def self.run(argv = ARGV)
|
|
22
|
+
if argv.empty?
|
|
23
|
+
new(['--help']).run
|
|
24
|
+
exit 0
|
|
25
|
+
end
|
|
26
|
+
new(argv).run
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def initialize(argv)
|
|
30
|
+
@options = {
|
|
31
|
+
dryrun: true,
|
|
32
|
+
verbose: false
|
|
33
|
+
}
|
|
34
|
+
@stats = {
|
|
35
|
+
tags_scanned: 0,
|
|
36
|
+
plural_tags_found: 0,
|
|
37
|
+
tags_renamed: 0,
|
|
38
|
+
tags_merged: 0,
|
|
39
|
+
node_tags_reassigned: 0,
|
|
40
|
+
errors: []
|
|
41
|
+
}
|
|
42
|
+
parse_options(argv)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def run
|
|
46
|
+
print_header
|
|
47
|
+
return unless confirm_execution
|
|
48
|
+
|
|
49
|
+
HTM::Database.setup
|
|
50
|
+
process_tags
|
|
51
|
+
print_summary
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def parse_options(argv)
|
|
57
|
+
parser = OptionParser.new do |opts|
|
|
58
|
+
opts.banner = usage_banner
|
|
59
|
+
|
|
60
|
+
opts.separator ""
|
|
61
|
+
opts.separator "Options:"
|
|
62
|
+
|
|
63
|
+
opts.on("--[no-]dryrun", "Dry run mode (default: true). Use --no-dryrun to apply changes.") do |v|
|
|
64
|
+
@options[:dryrun] = v
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
opts.on("-v", "--verbose", "Show detailed output for each tag processed") do
|
|
68
|
+
@options[:verbose] = true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
opts.on("-h", "--help", "Show this help message") do
|
|
72
|
+
puts opts
|
|
73
|
+
exit 0
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
opts.on("--version", "Show version") do
|
|
77
|
+
puts "normalize_plural_tags v#{VERSION}"
|
|
78
|
+
exit 0
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
opts.separator ""
|
|
82
|
+
opts.separator "Examples:"
|
|
83
|
+
opts.separator " # Preview what would be done (default)"
|
|
84
|
+
opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/normalize_plural_tags.rb --dryrun"
|
|
85
|
+
opts.separator ""
|
|
86
|
+
opts.separator " # Preview with detailed output"
|
|
87
|
+
opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/normalize_plural_tags.rb --dryrun --verbose"
|
|
88
|
+
opts.separator ""
|
|
89
|
+
opts.separator " # Apply changes to database"
|
|
90
|
+
opts.separator " HTM_DATABASE__URL=\"...\" ruby scripts/normalize_plural_tags.rb --no-dryrun"
|
|
91
|
+
opts.separator ""
|
|
92
|
+
opts.separator "Environment Variables:"
|
|
93
|
+
opts.separator " HTM_DATABASE__URL PostgreSQL connection URL (required)"
|
|
94
|
+
opts.separator ""
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
remaining = parser.parse!(argv)
|
|
98
|
+
|
|
99
|
+
# Check for unexpected positional arguments
|
|
100
|
+
if remaining.any?
|
|
101
|
+
warn "\033[1;31mError: unexpected argument(s): #{remaining.join(', ')}\033[0m"
|
|
102
|
+
warn
|
|
103
|
+
puts parser
|
|
104
|
+
exit 1
|
|
105
|
+
end
|
|
106
|
+
rescue OptionParser::InvalidOption => e
|
|
107
|
+
warn "\033[1;31mError: #{e.message}\033[0m"
|
|
108
|
+
warn
|
|
109
|
+
puts parser
|
|
110
|
+
exit 1
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def usage_banner
|
|
114
|
+
<<~BANNER
|
|
115
|
+
Usage: ruby scripts/normalize_plural_tags.rb [options]
|
|
116
|
+
|
|
117
|
+
Normalizes plural tag level names to singular forms in the HTM database.
|
|
118
|
+
|
|
119
|
+
For a tag like "users:frameworks:models", this script:
|
|
120
|
+
1. Singularizes each level: "user:framework:model"
|
|
121
|
+
2. If singular tag exists, merges node associations
|
|
122
|
+
3. If singular tag doesn't exist, renames the plural tag
|
|
123
|
+
|
|
124
|
+
By default, runs in dry-run mode (no changes made). Use --no-dryrun to apply.
|
|
125
|
+
BANNER
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def print_header
|
|
129
|
+
puts "=" * 70
|
|
130
|
+
puts "Plural Tag Normalizer v#{VERSION}"
|
|
131
|
+
puts "=" * 70
|
|
132
|
+
puts "Mode: #{options[:dryrun] ? 'DRY RUN (no changes will be made)' : 'LIVE (will modify database)'}"
|
|
133
|
+
puts "Verbose: #{options[:verbose] ? 'Yes' : 'No'}"
|
|
134
|
+
puts "Database: #{masked_database_url}"
|
|
135
|
+
puts "=" * 70
|
|
136
|
+
puts
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def masked_database_url
|
|
140
|
+
HTM.config.database.url&.gsub(/:[^:@]+@/, ':***@') || '(not configured)'
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def confirm_execution
|
|
144
|
+
return true if options[:dryrun]
|
|
145
|
+
|
|
146
|
+
puts "\033[1;33m⚠️ WARNING: This will modify the database!\033[0m"
|
|
147
|
+
puts
|
|
148
|
+
puts "This script will:"
|
|
149
|
+
puts " • Rename plural tags to singular forms"
|
|
150
|
+
puts " • Merge node associations when both plural and singular exist"
|
|
151
|
+
puts " • Delete redundant plural tags after merging"
|
|
152
|
+
puts
|
|
153
|
+
print "Are you sure you want to continue? [y/N] "
|
|
154
|
+
|
|
155
|
+
response = $stdin.gets&.strip&.downcase
|
|
156
|
+
unless response == 'y' || response == 'yes'
|
|
157
|
+
puts
|
|
158
|
+
puts "Aborted. No changes were made."
|
|
159
|
+
return false
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
puts
|
|
163
|
+
true
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def process_tags
|
|
167
|
+
all_tags = HTM::Models::Tag.order(:name)
|
|
168
|
+
total_count = all_tags.count
|
|
169
|
+
|
|
170
|
+
puts "Found #{total_count} tags to scan"
|
|
171
|
+
puts
|
|
172
|
+
|
|
173
|
+
if total_count == 0
|
|
174
|
+
puts "No tags found. Nothing to do."
|
|
175
|
+
return
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
progressbar = ProgressBar.create(
|
|
179
|
+
title: options[:dryrun] ? "Analyzing" : "Processing",
|
|
180
|
+
total: total_count,
|
|
181
|
+
format: "%t: |%B| %c/%C (%P%%) %e",
|
|
182
|
+
output: $stdout
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
all_tags.paged_each do |tag|
|
|
186
|
+
process_tag(tag)
|
|
187
|
+
progressbar.increment
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
puts
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def process_tag(tag)
|
|
194
|
+
@stats[:tags_scanned] += 1
|
|
195
|
+
|
|
196
|
+
# Singularize all levels of the tag
|
|
197
|
+
singular_name = singularize_tag(tag.name)
|
|
198
|
+
|
|
199
|
+
# If no change needed, skip
|
|
200
|
+
return if singular_name == tag.name
|
|
201
|
+
|
|
202
|
+
@stats[:plural_tags_found] += 1
|
|
203
|
+
log_verbose "Found plural tag: '#{tag.name}' -> '#{singular_name}'"
|
|
204
|
+
|
|
205
|
+
# Check if singular version already exists
|
|
206
|
+
existing_singular = HTM::Models::Tag.first(name: singular_name)
|
|
207
|
+
|
|
208
|
+
if existing_singular
|
|
209
|
+
# Merge: reassign node_tags from plural to singular, then delete plural
|
|
210
|
+
merge_tags(tag, existing_singular)
|
|
211
|
+
else
|
|
212
|
+
# Rename: just update the tag name
|
|
213
|
+
rename_tag(tag, singular_name)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def singularize_tag(tag_name)
|
|
218
|
+
# Use the TagService's singularization logic for consistency
|
|
219
|
+
HTM::TagService.singularize_tag_levels(tag_name)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def merge_tags(plural_tag, singular_tag)
|
|
223
|
+
log_verbose " Merging '#{plural_tag.name}' into '#{singular_tag.name}'"
|
|
224
|
+
|
|
225
|
+
# Get node IDs associated with plural tag
|
|
226
|
+
plural_node_ids = HTM::Models::NodeTag.where(tag_id: plural_tag.id).select_map(:node_id)
|
|
227
|
+
|
|
228
|
+
if plural_node_ids.empty?
|
|
229
|
+
log_verbose " No nodes to reassign"
|
|
230
|
+
else
|
|
231
|
+
# Find which nodes already have the singular tag
|
|
232
|
+
existing_node_ids = HTM::Models::NodeTag
|
|
233
|
+
.where(tag_id: singular_tag.id, node_id: plural_node_ids)
|
|
234
|
+
.select_map(:node_id)
|
|
235
|
+
|
|
236
|
+
new_node_ids = plural_node_ids - existing_node_ids
|
|
237
|
+
|
|
238
|
+
if options[:dryrun]
|
|
239
|
+
log_verbose " [DRY RUN] Would reassign #{new_node_ids.count} nodes from plural to singular"
|
|
240
|
+
log_verbose " [DRY RUN] Would delete #{existing_node_ids.count} duplicate node_tags"
|
|
241
|
+
log_verbose " [DRY RUN] Would delete plural tag '#{plural_tag.name}'"
|
|
242
|
+
@stats[:node_tags_reassigned] += new_node_ids.count
|
|
243
|
+
@stats[:tags_merged] += 1
|
|
244
|
+
else
|
|
245
|
+
begin
|
|
246
|
+
HTM.db.transaction do
|
|
247
|
+
# Reassign new nodes to singular tag
|
|
248
|
+
if new_node_ids.any?
|
|
249
|
+
HTM::Models::NodeTag.where(tag_id: plural_tag.id, node_id: new_node_ids)
|
|
250
|
+
.update(tag_id: singular_tag.id)
|
|
251
|
+
log_verbose " Reassigned #{new_node_ids.count} nodes to '#{singular_tag.name}'"
|
|
252
|
+
@stats[:node_tags_reassigned] += new_node_ids.count
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Delete duplicate node_tags (nodes that had both tags)
|
|
256
|
+
if existing_node_ids.any?
|
|
257
|
+
HTM::Models::NodeTag.where(tag_id: plural_tag.id, node_id: existing_node_ids).delete
|
|
258
|
+
log_verbose " Deleted #{existing_node_ids.count} duplicate node_tags"
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Delete the plural tag
|
|
262
|
+
plural_tag.destroy
|
|
263
|
+
log_verbose " Deleted plural tag '#{plural_tag.name}'"
|
|
264
|
+
@stats[:tags_merged] += 1
|
|
265
|
+
end
|
|
266
|
+
rescue Sequel::Error => e
|
|
267
|
+
error_msg = "Failed to merge '#{plural_tag.name}' into '#{singular_tag.name}': #{e.message}"
|
|
268
|
+
log_verbose " ERROR: #{error_msg}"
|
|
269
|
+
@stats[:errors] << error_msg
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def rename_tag(tag, new_name)
|
|
276
|
+
if options[:dryrun]
|
|
277
|
+
log_verbose " [DRY RUN] Would rename '#{tag.name}' to '#{new_name}'"
|
|
278
|
+
@stats[:tags_renamed] += 1
|
|
279
|
+
else
|
|
280
|
+
begin
|
|
281
|
+
tag.update(name: new_name)
|
|
282
|
+
log_verbose " Renamed '#{tag.name}' to '#{new_name}'"
|
|
283
|
+
@stats[:tags_renamed] += 1
|
|
284
|
+
rescue Sequel::Error => e
|
|
285
|
+
error_msg = "Failed to rename '#{tag.name}' to '#{new_name}': #{e.message}"
|
|
286
|
+
log_verbose " ERROR: #{error_msg}"
|
|
287
|
+
@stats[:errors] << error_msg
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def log_verbose(message)
|
|
293
|
+
puts message if options[:verbose]
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def print_summary
|
|
297
|
+
puts "=" * 70
|
|
298
|
+
puts "Summary"
|
|
299
|
+
puts "=" * 70
|
|
300
|
+
puts "Tags scanned: #{@stats[:tags_scanned]}"
|
|
301
|
+
puts "Plural tags found: #{@stats[:plural_tags_found]}"
|
|
302
|
+
puts "Tags renamed: #{@stats[:tags_renamed]}"
|
|
303
|
+
puts "Tags merged: #{@stats[:tags_merged]}"
|
|
304
|
+
puts "Node tags reassigned: #{@stats[:node_tags_reassigned]}"
|
|
305
|
+
|
|
306
|
+
if @stats[:errors].any?
|
|
307
|
+
puts
|
|
308
|
+
puts "\033[1;31mErrors (#{@stats[:errors].count}):\033[0m"
|
|
309
|
+
@stats[:errors].first(10).each { |e| puts " • #{e}" }
|
|
310
|
+
puts " ... and #{@stats[:errors].count - 10} more" if @stats[:errors].count > 10
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
puts
|
|
314
|
+
if options[:dryrun]
|
|
315
|
+
puts "\033[1;36mThis was a DRY RUN. No changes were made.\033[0m"
|
|
316
|
+
puts "Run with --no-dryrun to apply changes."
|
|
317
|
+
else
|
|
318
|
+
puts "\033[1;32m✓ Normalization complete!\033[0m"
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Run the script
|
|
324
|
+
if __FILE__ == $PROGRAM_NAME
|
|
325
|
+
begin
|
|
326
|
+
PluralTagNormalizer.run
|
|
327
|
+
rescue Interrupt
|
|
328
|
+
puts "\n\nAborted by user."
|
|
329
|
+
exit 130
|
|
330
|
+
rescue => e
|
|
331
|
+
warn "\033[1;31mFATAL ERROR: #{e.class.name} - #{e.message}\033[0m"
|
|
332
|
+
warn e.backtrace.first(10).join("\n") if ENV['DEBUG']
|
|
333
|
+
exit 1
|
|
334
|
+
end
|
|
335
|
+
end
|