ast-merge 1.1.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +198 -7
  4. data/README.md +208 -39
  5. data/exe/ast-merge-recipe +366 -0
  6. data/lib/ast/merge/conflict_resolver_base.rb +8 -1
  7. data/lib/ast/merge/content_match_refiner.rb +278 -0
  8. data/lib/ast/merge/debug_logger.rb +2 -1
  9. data/lib/ast/merge/detector/base.rb +193 -0
  10. data/lib/ast/merge/detector/fenced_code_block.rb +227 -0
  11. data/lib/ast/merge/detector/mergeable.rb +369 -0
  12. data/lib/ast/merge/detector/toml_frontmatter.rb +82 -0
  13. data/lib/ast/merge/detector/yaml_frontmatter.rb +82 -0
  14. data/lib/ast/merge/merge_result_base.rb +4 -1
  15. data/lib/ast/merge/navigable_statement.rb +630 -0
  16. data/lib/ast/merge/partial_template_merger.rb +432 -0
  17. data/lib/ast/merge/recipe/config.rb +198 -0
  18. data/lib/ast/merge/recipe/preset.rb +171 -0
  19. data/lib/ast/merge/recipe/runner.rb +254 -0
  20. data/lib/ast/merge/recipe/script_loader.rb +181 -0
  21. data/lib/ast/merge/recipe.rb +26 -0
  22. data/lib/ast/merge/rspec/dependency_tags.rb +252 -0
  23. data/lib/ast/merge/rspec/shared_examples/reproducible_merge.rb +3 -2
  24. data/lib/ast/merge/rspec.rb +33 -2
  25. data/lib/ast/merge/smart_merger_base.rb +86 -3
  26. data/lib/ast/merge/version.rb +1 -1
  27. data/lib/ast/merge.rb +10 -6
  28. data/sig/ast/merge.rbs +389 -2
  29. data.tar.gz.sig +0 -0
  30. metadata +60 -16
  31. metadata.gz.sig +0 -0
  32. data/lib/ast/merge/fenced_code_block_detector.rb +0 -313
  33. data/lib/ast/merge/region.rb +0 -124
  34. data/lib/ast/merge/region_detector_base.rb +0 -114
  35. data/lib/ast/merge/region_mergeable.rb +0 -364
  36. data/lib/ast/merge/toml_frontmatter_detector.rb +0 -88
  37. data/lib/ast/merge/yaml_frontmatter_detector.rb +0 -88
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # AST Merge Recipe Runner
5
+ #
6
+ # Run YAML-based merge recipes against target files.
7
+ # This is a shipped executable that can be used after installing the ast-merge gem.
8
+ #
9
+ # Usage:
10
+ # ast-merge-recipe RECIPE_FILE [options]
11
+ #
12
+ # Examples:
13
+ # ast-merge-recipe .merge-recipes/gem_family_section.yml --dry-run
14
+ # ast-merge-recipe .merge-recipes/gem_family_section.yml --verbose --parser=commonmarker
15
+
16
+ require "bundler/inline"
17
+ require "optparse"
18
+ require "yaml"
19
+
20
+ # Parse options first to get merge_gems before bundler/inline
21
+ options = {
22
+ dry_run: false,
23
+ verbose: false,
24
+ parser: :markly,
25
+ base_dir: Dir.pwd,
26
+ recipe_file: nil,
27
+ merge_gems: [],
28
+ dev_mode: ENV.fetch("KETTLE_RB_DEV", "false").casecmp?("true"),
29
+ dev_root: nil,
30
+ }
31
+
32
+ # Pre-parse to extract recipe file and check for merge_gems in recipe
33
+ # We need to do this before bundler/inline to know which gems to load
34
+ ARGV.each do |arg|
35
+ case arg
36
+ when /^--dev-root=(.+)$/
37
+ options[:dev_root] = File.expand_path($1)
38
+ when /^-/
39
+ # Skip options for now
40
+ else
41
+ options[:recipe_file] ||= arg
42
+ end
43
+ end
44
+
45
+ # If recipe file specified, try to load merge_gems from it
46
+ recipe_merge_gems = []
47
+ if options[:recipe_file] && File.exist?(options[:recipe_file])
48
+ begin
49
+ recipe_config = YAML.safe_load_file(options[:recipe_file], permitted_classes: [Symbol])
50
+ if recipe_config.is_a?(Hash) && recipe_config["merge_gems"]
51
+ recipe_merge_gems = Array(recipe_config["merge_gems"])
52
+ end
53
+ rescue
54
+ # Ignore errors here, we'll catch them later
55
+ end
56
+ end
57
+
58
+ # Determine dev root for local gems
59
+ dev_root = options[:dev_root] || ENV["AST_MERGE_DEV_ROOT"]
60
+ if options[:dev_mode] && dev_root.nil?
61
+ # Try to find dev root by looking for ast-merge directory
62
+ possible_roots = [
63
+ File.expand_path("../..", __FILE__),
64
+ File.expand_path("../../..", __FILE__),
65
+ Dir.pwd,
66
+ ]
67
+ dev_root = possible_roots.find { |p| File.exist?(File.join(p, "ast-merge.gemspec")) }
68
+ end
69
+
70
+ # Load dependencies via bundler/inline
71
+ gemfile do
72
+ source "https://gem.coop"
73
+
74
+ if options[:dev_mode] && dev_root
75
+ # Development mode - use local gems
76
+ gem "ast-merge", path: dev_root
77
+ gem "tree_haver", path: File.join(dev_root, "vendor/tree_haver")
78
+ gem "markdown-merge", path: File.join(dev_root, "vendor/markdown-merge")
79
+ gem "markly-merge", path: File.join(dev_root, "vendor/markly-merge")
80
+ gem "commonmarker-merge", path: File.join(dev_root, "vendor/commonmarker-merge")
81
+ gem "prism-merge", path: File.join(dev_root, "vendor/prism-merge")
82
+ gem "psych-merge", path: File.join(dev_root, "vendor/psych-merge")
83
+ else
84
+ # Production mode - use released gems
85
+ # gem.coop gems need a source block
86
+ gem "ast-merge"
87
+ gem "tree_haver"
88
+ gem "markdown-merge"
89
+ gem "markly-merge"
90
+ end
91
+
92
+ # Load additional merge gems specified in recipe
93
+ recipe_merge_gems.each do |gem_spec|
94
+ case gem_spec
95
+ when String
96
+ gem(gem_spec)
97
+ when Hash
98
+ name = gem_spec["name"] || gem_spec[:name]
99
+ gem_opts = {}
100
+ gem_opts[:version] = gem_spec["version"] || gem_spec[:version] if gem_spec["version"] || gem_spec[:version]
101
+ gem_opts[:path] = gem_spec["path"] || gem_spec[:path] if gem_spec["path"] || gem_spec[:path]
102
+ gem_opts[:git] = gem_spec["git"] || gem_spec[:git] if gem_spec["git"] || gem_spec[:git]
103
+ gem_opts[:branch] = gem_spec["branch"] || gem_spec[:branch] if gem_spec["branch"] || gem_spec[:branch]
104
+ gem_opts[:require] = gem_spec["require"] || gem_spec[:require] if gem_spec.key?("require") || gem_spec.key?(:require)
105
+
106
+ if gem_opts.empty?
107
+ gem(name)
108
+ else
109
+ gem(name, **gem_opts)
110
+ end
111
+ end
112
+ end
113
+
114
+ # Try to load table_tennis for nice output
115
+ gem "table_tennis", require: false
116
+ end
117
+
118
+ # Now load the actual libraries
119
+ require "ast-merge"
120
+
121
+ # Try to load table_tennis
122
+ begin
123
+ require "table_tennis"
124
+ HAS_TABLE_TENNIS = true
125
+ rescue LoadError
126
+ HAS_TABLE_TENNIS = false
127
+ end
128
+
129
+ # ANSI color helpers
130
+ module Colors
131
+ class << self
132
+ def green(str) = "\e[32m#{str}\e[0m"
133
+ def red(str) = "\e[31m#{str}\e[0m"
134
+ def yellow(str) = "\e[33m#{str}\e[0m"
135
+ def cyan(str) = "\e[36m#{str}\e[0m"
136
+ def bold(str) = "\e[1m#{str}\e[0m"
137
+ def dim(str) = "\e[2m#{str}\e[0m"
138
+ end
139
+ end
140
+
141
+ # Main runner class
142
+ class AstMergeRecipeCLI
143
+ VERSION = Ast::Merge::VERSION
144
+
145
+ def initialize
146
+ @options = {
147
+ dry_run: false,
148
+ verbose: false,
149
+ parser: :markly,
150
+ base_dir: Dir.pwd,
151
+ recipe_file: nil,
152
+ }
153
+ end
154
+
155
+ def run(argv = ARGV)
156
+ parse_options(argv)
157
+ validate_options!
158
+ execute_recipe
159
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
160
+ $stderr.puts Colors.red("ERROR: #{e.message}")
161
+ $stderr.puts
162
+ $stderr.puts @option_parser
163
+ exit(1)
164
+ rescue => e
165
+ $stderr.puts Colors.red("ERROR: #{e.message}")
166
+ $stderr.puts e.backtrace.first(5).join("\n") if @options[:verbose]
167
+ exit(1)
168
+ end
169
+
170
+ private
171
+
172
+ def parse_options(argv)
173
+ @option_parser = OptionParser.new do |opts|
174
+ opts.banner = "Usage: #{File.basename($0)} RECIPE_FILE [options]"
175
+ opts.separator("")
176
+ opts.separator("Run a YAML-based merge recipe against target files.")
177
+ opts.separator("")
178
+ opts.separator("Options:")
179
+
180
+ opts.on("-n", "--dry-run", "Show what would change without modifying files") do
181
+ @options[:dry_run] = true
182
+ end
183
+
184
+ opts.on("-v", "--verbose", "Show detailed output") do
185
+ @options[:verbose] = true
186
+ end
187
+
188
+ opts.on(
189
+ "-p",
190
+ "--parser=PARSER",
191
+ String,
192
+ "Parser to use (markly, commonmarker, prism, psych)",
193
+ "Default: markly",
194
+ ) do |parser|
195
+ @options[:parser] = parser.to_sym
196
+ end
197
+
198
+ opts.on(
199
+ "-d",
200
+ "--base-dir=DIR",
201
+ String,
202
+ "Base directory for path resolution",
203
+ "Default: current directory",
204
+ ) do |dir|
205
+ @options[:base_dir] = File.expand_path(dir)
206
+ end
207
+
208
+ opts.on(
209
+ "--dev-root=DIR",
210
+ String,
211
+ "Root directory for development gems (implies dev mode)",
212
+ ) do |dir|
213
+ # Already handled in pre-parse
214
+ end
215
+
216
+ opts.on("-V", "--version", "Show version") do
217
+ puts "ast-merge-recipe #{VERSION}"
218
+ exit(0)
219
+ end
220
+
221
+ opts.on("-h", "--help", "Show this help message") do
222
+ puts opts
223
+ puts
224
+ puts "Examples:"
225
+ puts " #{File.basename($0)} .merge-recipes/gem_family_section.yml --dry-run"
226
+ puts " #{File.basename($0)} recipe.yml --verbose --parser=commonmarker"
227
+ puts
228
+ puts "Recipe YAML format:"
229
+ puts " See lib/ast/merge/recipe/README.md for full documentation"
230
+ exit(0)
231
+ end
232
+ end
233
+
234
+ # Parse options, leaving non-option arguments
235
+ remaining = @option_parser.parse(argv)
236
+
237
+ # First non-option argument is the recipe file
238
+ @options[:recipe_file] = remaining.shift
239
+
240
+ # Warn about extra arguments
241
+ if remaining.any?
242
+ $stderr.puts Colors.yellow("WARNING: Ignoring extra arguments: #{remaining.join(", ")}")
243
+ end
244
+ end
245
+
246
+ def validate_options!
247
+ unless @options[:recipe_file]
248
+ $stderr.puts Colors.red("ERROR: No recipe file specified")
249
+ $stderr.puts
250
+ $stderr.puts @option_parser
251
+ exit(1)
252
+ end
253
+
254
+ recipe_path = File.expand_path(@options[:recipe_file])
255
+ unless File.exist?(recipe_path)
256
+ $stderr.puts Colors.red("ERROR: Recipe file not found: #{recipe_path}")
257
+ exit(1)
258
+ end
259
+
260
+ @options[:recipe_file] = recipe_path
261
+ end
262
+
263
+ def execute_recipe
264
+ print_header
265
+
266
+ # Load recipe
267
+ recipe = Ast::Merge::Recipe::Config.load(@options[:recipe_file])
268
+ print_recipe_info(recipe)
269
+
270
+ # Create runner
271
+ runner = Ast::Merge::Recipe::Runner.new(
272
+ recipe,
273
+ dry_run: @options[:dry_run],
274
+ base_dir: @options[:base_dir],
275
+ parser: @options[:parser],
276
+ verbose: @options[:verbose],
277
+ )
278
+
279
+ # Run and display results
280
+ puts Colors.cyan("Processing files...")
281
+ puts
282
+
283
+ runner.run do |result|
284
+ print_result(result)
285
+ end
286
+
287
+ print_summary(runner)
288
+
289
+ # Exit with error if there were failures
290
+ exit(1) if runner.summary[:errors] > 0
291
+ end
292
+
293
+ def print_header
294
+ puts Colors.bold("=" * 70)
295
+ puts Colors.bold("AST Merge Recipe Runner")
296
+ puts Colors.bold("=" * 70)
297
+ puts
298
+ end
299
+
300
+ def print_recipe_info(recipe)
301
+ puts Colors.cyan("Recipe: #{recipe.name}")
302
+ puts Colors.dim(" #{recipe.description}") if recipe.description
303
+ puts
304
+ puts Colors.yellow("Mode: #{@options[:dry_run] ? "DRY RUN" : "LIVE"}")
305
+ puts Colors.dim("Parser: #{@options[:parser]}")
306
+ puts
307
+ end
308
+
309
+ def print_result(result)
310
+ symbol = status_symbol(result.status)
311
+ puts " #{symbol} #{result.relative_path}"
312
+
313
+ if @options[:verbose] || result.status == :error
314
+ puts Colors.dim(" #{result.message}") if result.message
315
+ end
316
+
317
+ if @options[:verbose] && result.stats
318
+ puts Colors.dim(" Stats: #{result.stats.inspect}")
319
+ end
320
+
321
+ if result.error && @options[:verbose]
322
+ puts Colors.red(" #{result.error.class}: #{result.error.message}")
323
+ puts Colors.dim(" #{result.error.backtrace&.first(3)&.join("\n ")}")
324
+ end
325
+ end
326
+
327
+ def print_summary(runner)
328
+ puts
329
+ puts Colors.bold("=" * 70)
330
+ puts Colors.bold("Summary")
331
+ puts Colors.bold("=" * 70)
332
+ puts
333
+
334
+ summary = runner.summary
335
+
336
+ if HAS_TABLE_TENNIS
337
+ puts TableTennis.new(runner.summary_table)
338
+ else
339
+ puts " Total files: #{summary[:total]}"
340
+ if @options[:dry_run]
341
+ puts " Would update: #{summary[:would_update]}"
342
+ else
343
+ puts " Updated: #{summary[:updated]}"
344
+ end
345
+ puts " Unchanged: #{summary[:unchanged]}"
346
+ puts " Skipped (no anchor):#{summary[:skipped]}"
347
+ puts " Errors: #{summary[:errors]}" if summary[:errors] > 0
348
+ end
349
+
350
+ puts
351
+ end
352
+
353
+ def status_symbol(status)
354
+ case status
355
+ when :updated then Colors.green("✓")
356
+ when :would_update then Colors.yellow("~")
357
+ when :unchanged then Colors.dim("○")
358
+ when :skipped then Colors.dim("-")
359
+ when :error then Colors.red("✗")
360
+ else "?"
361
+ end
362
+ end
363
+ end
364
+
365
+ # Run the CLI
366
+ AstMergeRecipeCLI.new.run
@@ -118,6 +118,9 @@ module Ast
118
118
  # @return [Boolean] Whether to add template-only nodes (batch strategy)
119
119
  attr_reader :add_template_only_nodes
120
120
 
121
+ # @return [Object, nil] Match refiner for fuzzy matching
122
+ attr_reader :match_refiner
123
+
121
124
  # Initialize the conflict resolver
122
125
  #
123
126
  # @param strategy [Symbol] Resolution strategy (:node, :batch, or :boundary)
@@ -129,7 +132,9 @@ module Ast
129
132
  # @param template_analysis [Object] Analysis of the template file
130
133
  # @param dest_analysis [Object] Analysis of the destination file
131
134
  # @param add_template_only_nodes [Boolean] Whether to add nodes only in template (batch/boundary strategy)
132
- def initialize(strategy:, preference:, template_analysis:, dest_analysis:, add_template_only_nodes: false)
135
+ # @param match_refiner [#call, nil] Optional match refiner for fuzzy matching
136
+ # @param options [Hash] Additional options for forward compatibility
137
+ def initialize(strategy:, preference:, template_analysis:, dest_analysis:, add_template_only_nodes: false, match_refiner: nil, **options)
133
138
  unless %i[node batch boundary].include?(strategy)
134
139
  raise ArgumentError, "Invalid strategy: #{strategy}. Must be :node, :batch, or :boundary"
135
140
  end
@@ -141,6 +146,8 @@ module Ast
141
146
  @template_analysis = template_analysis
142
147
  @dest_analysis = dest_analysis
143
148
  @add_template_only_nodes = add_template_only_nodes
149
+ @match_refiner = match_refiner
150
+ # **options captured for forward compatibility - subclasses may use additional options
144
151
  end
145
152
 
146
153
  # Resolve conflicts using the configured strategy
@@ -0,0 +1,278 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ast
4
+ module Merge
5
+ # Match refiner for text content-based fuzzy matching.
6
+ #
7
+ # This refiner uses Levenshtein distance to pair nodes that have similar
8
+ # but not identical text content. It's useful for matching nodes where
9
+ # the content has been slightly modified (typos, rewording, etc.).
10
+ #
11
+ # Unlike signature-based matching which requires exact content hashes,
12
+ # this refiner allows fuzzy matching based on text similarity. This is
13
+ # particularly useful for:
14
+ # - Paragraphs with minor edits
15
+ # - Headings with slight rewording
16
+ # - Comments with updated text
17
+ # - Any text-based node type
18
+ #
19
+ # @example Basic usage
20
+ # refiner = ContentMatchRefiner.new(threshold: 0.7)
21
+ # matches = refiner.call(template_nodes, dest_nodes)
22
+ #
23
+ # @example With specific node types
24
+ # # Only match paragraphs and headings
25
+ # refiner = ContentMatchRefiner.new(
26
+ # threshold: 0.6,
27
+ # node_types: [:paragraph, :heading]
28
+ # )
29
+ #
30
+ # @example With custom content extractor
31
+ # refiner = ContentMatchRefiner.new(
32
+ # threshold: 0.7,
33
+ # content_extractor: ->(node) { node.text_content.downcase.strip }
34
+ # )
35
+ #
36
+ # @example Combined with other refiners
37
+ # merger = SmartMerger.new(
38
+ # template,
39
+ # destination,
40
+ # match_refiner: [
41
+ # ContentMatchRefiner.new(threshold: 0.7, node_types: [:paragraph]),
42
+ # TableMatchRefiner.new(threshold: 0.5)
43
+ # ]
44
+ # )
45
+ #
46
+ # @see MatchRefinerBase Base class
47
+ class ContentMatchRefiner < MatchRefinerBase
48
+ # Default weights for content similarity scoring
49
+ DEFAULT_WEIGHTS = {
50
+ content: 0.7, # Text content similarity (Levenshtein)
51
+ length: 0.15, # Length similarity
52
+ position: 0.15, # Position similarity in document
53
+ }.freeze
54
+
55
+ # @return [Hash] Scoring weights
56
+ attr_reader :weights
57
+
58
+ # @return [Proc, nil] Custom content extraction function
59
+ attr_reader :content_extractor
60
+
61
+ # Initialize a content match refiner.
62
+ #
63
+ # @param threshold [Float] Minimum score to accept a match (default: 0.5)
64
+ # @param node_types [Array<Symbol>] Node types to process (empty = all)
65
+ # @param weights [Hash] Custom scoring weights
66
+ # @param content_extractor [Proc, nil] Custom function to extract text from nodes
67
+ # Should accept a node and return a String
68
+ # @param options [Hash] Additional options for forward compatibility
69
+ def initialize(
70
+ threshold: DEFAULT_THRESHOLD,
71
+ node_types: [],
72
+ weights: {},
73
+ content_extractor: nil,
74
+ **options
75
+ )
76
+ super(threshold: threshold, node_types: node_types, **options)
77
+ @weights = DEFAULT_WEIGHTS.merge(weights)
78
+ @content_extractor = content_extractor
79
+ end
80
+
81
+ # Find matches between unmatched nodes based on content similarity.
82
+ #
83
+ # @param template_nodes [Array] Unmatched nodes from template
84
+ # @param dest_nodes [Array] Unmatched nodes from destination
85
+ # @param context [Hash] Additional context (may contain :template_analysis, :dest_analysis)
86
+ # @return [Array<MatchResult>] Array of content-based matches
87
+ def call(template_nodes, dest_nodes, context = {})
88
+ template_filtered = filter_nodes(template_nodes)
89
+ dest_filtered = filter_nodes(dest_nodes)
90
+
91
+ return [] if template_filtered.empty? || dest_filtered.empty?
92
+
93
+ # Build position information for scoring
94
+ total_template = template_filtered.size
95
+ total_dest = dest_filtered.size
96
+
97
+ greedy_match(template_filtered, dest_filtered) do |t_node, d_node|
98
+ t_idx = template_filtered.index(t_node) || 0
99
+ d_idx = dest_filtered.index(d_node) || 0
100
+
101
+ compute_content_similarity(
102
+ t_node,
103
+ d_node,
104
+ t_idx,
105
+ d_idx,
106
+ total_template,
107
+ total_dest,
108
+ )
109
+ end
110
+ end
111
+
112
+ protected
113
+
114
+ # Filter nodes by configured node types.
115
+ #
116
+ # @param nodes [Array] Nodes to filter
117
+ # @return [Array] Filtered nodes (matching node_types, or all if empty)
118
+ def filter_nodes(nodes)
119
+ return nodes if node_types.empty?
120
+
121
+ nodes.select { |n| handles_type?(extract_node_type(n)) }
122
+ end
123
+
124
+ # Extract the type from a node.
125
+ #
126
+ # Handles wrapped nodes (merge_type) and raw nodes (type).
127
+ #
128
+ # @param node [Object] The node
129
+ # @return [Symbol, nil] The node type
130
+ def extract_node_type(node)
131
+ if NodeTyping.typed_node?(node)
132
+ NodeTyping.merge_type_for(node)
133
+ elsif node.respond_to?(:merge_type) && node.merge_type
134
+ node.merge_type
135
+ elsif node.respond_to?(:type)
136
+ type = node.type
137
+ type.is_a?(Symbol) ? type : type.to_s.to_sym
138
+ end
139
+ end
140
+
141
+ # Extract text content from a node.
142
+ #
143
+ # Uses the custom content_extractor if provided, otherwise tries
144
+ # common methods for getting text content.
145
+ #
146
+ # @param node [Object] The node
147
+ # @return [String] The text content
148
+ def extract_content(node)
149
+ return @content_extractor.call(node) if @content_extractor
150
+
151
+ # Try common content extraction methods
152
+ if node.respond_to?(:text_content)
153
+ node.text_content.to_s
154
+ elsif node.respond_to?(:string_content)
155
+ node.string_content.to_s
156
+ elsif node.respond_to?(:content)
157
+ node.content.to_s
158
+ elsif node.respond_to?(:text)
159
+ node.text.to_s
160
+ elsif node.respond_to?(:to_s)
161
+ node.to_s
162
+ else
163
+ ""
164
+ end
165
+ end
166
+
167
+ # Compute similarity score between two nodes based on content.
168
+ #
169
+ # @param t_node [Object] Template node
170
+ # @param d_node [Object] Destination node
171
+ # @param t_idx [Integer] Template node index
172
+ # @param d_idx [Integer] Destination node index
173
+ # @param total_t [Integer] Total template nodes
174
+ # @param total_d [Integer] Total destination nodes
175
+ # @return [Float] Similarity score (0.0-1.0)
176
+ def compute_content_similarity(t_node, d_node, t_idx, d_idx, total_t, total_d)
177
+ t_content = extract_content(t_node)
178
+ d_content = extract_content(d_node)
179
+
180
+ # Calculate component scores
181
+ content_score = string_similarity(t_content, d_content)
182
+ length_score = length_similarity(t_content, d_content)
183
+ position_score = position_similarity(t_idx, d_idx, total_t, total_d)
184
+
185
+ # Weighted combination
186
+ weights[:content] * content_score +
187
+ weights[:length] * length_score +
188
+ weights[:position] * position_score
189
+ end
190
+
191
+ # Calculate string similarity using Levenshtein distance.
192
+ #
193
+ # @param str1 [String] First string
194
+ # @param str2 [String] Second string
195
+ # @return [Float] Similarity score (0.0-1.0)
196
+ def string_similarity(str1, str2)
197
+ return 1.0 if str1 == str2
198
+ return 0.0 if str1.empty? || str2.empty?
199
+
200
+ distance = levenshtein_distance(str1, str2)
201
+ max_len = [str1.length, str2.length].max
202
+ 1.0 - (distance.to_f / max_len)
203
+ end
204
+
205
+ # Calculate length similarity between two strings.
206
+ #
207
+ # @param str1 [String] First string
208
+ # @param str2 [String] Second string
209
+ # @return [Float] Similarity score (0.0-1.0)
210
+ def length_similarity(str1, str2)
211
+ return 1.0 if str1.length == str2.length
212
+ return 0.0 if str1.empty? && str2.empty?
213
+
214
+ min_len = [str1.length, str2.length].min.to_f
215
+ max_len = [str1.length, str2.length].max.to_f
216
+ min_len / max_len
217
+ end
218
+
219
+ # Calculate position similarity in document.
220
+ #
221
+ # Nodes at similar relative positions score higher.
222
+ #
223
+ # @param idx1 [Integer] First node index
224
+ # @param idx2 [Integer] Second node index
225
+ # @param total1 [Integer] Total nodes in first collection
226
+ # @param total2 [Integer] Total nodes in second collection
227
+ # @return [Float] Similarity score (0.0-1.0)
228
+ def position_similarity(idx1, idx2, total1, total2)
229
+ # Normalize positions to 0.0-1.0 range
230
+ pos1 = (total1 > 1) ? idx1.to_f / (total1 - 1) : 0.5
231
+ pos2 = (total2 > 1) ? idx2.to_f / (total2 - 1) : 0.5
232
+
233
+ 1.0 - (pos1 - pos2).abs
234
+ end
235
+
236
+ # Calculate Levenshtein distance between two strings.
237
+ #
238
+ # Uses Wagner-Fischer algorithm with space optimization.
239
+ #
240
+ # @param str1 [String] First string
241
+ # @param str2 [String] Second string
242
+ # @return [Integer] Edit distance
243
+ def levenshtein_distance(str1, str2)
244
+ return str2.length if str1.empty?
245
+ return str1.length if str2.empty?
246
+
247
+ # Use shorter string as columns for space efficiency
248
+ if str1.length > str2.length
249
+ str1, str2 = str2, str1
250
+ end
251
+
252
+ m = str1.length
253
+ n = str2.length
254
+
255
+ # Only need two rows at a time
256
+ prev_row = (0..m).to_a
257
+ curr_row = Array.new(m + 1)
258
+
259
+ (1..n).each do |j|
260
+ curr_row[0] = j
261
+
262
+ (1..m).each do |i|
263
+ cost = (str1[i - 1] == str2[j - 1]) ? 0 : 1
264
+ curr_row[i] = [
265
+ curr_row[i - 1] + 1, # insertion
266
+ prev_row[i] + 1, # deletion
267
+ prev_row[i - 1] + cost, # substitution
268
+ ].min
269
+ end
270
+
271
+ prev_row, curr_row = curr_row, prev_row
272
+ end
273
+
274
+ prev_row[m]
275
+ end
276
+ end
277
+ end
278
+ end
@@ -71,8 +71,9 @@ module Ast
71
71
  # @note Shared examples require +silent_stream+ and +rspec-stubbed_env+ gems.
72
72
  module DebugLogger
73
73
  # Benchmark is optional - gracefully degrade if not available
74
+ # Use autoload to defer loading until actually needed
74
75
  BENCHMARK_AVAILABLE = begin
75
- require "benchmark"
76
+ autoload(:Benchmark, "benchmark")
76
77
  true
77
78
  rescue LoadError
78
79
  # :nocov: