ast-merge 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +194 -1
  4. data/README.md +235 -53
  5. data/exe/ast-merge-recipe +366 -0
  6. data/lib/ast/merge/ast_node.rb +224 -24
  7. data/lib/ast/merge/comment/block.rb +6 -0
  8. data/lib/ast/merge/comment/empty.rb +6 -0
  9. data/lib/ast/merge/comment/line.rb +6 -0
  10. data/lib/ast/merge/comment/parser.rb +9 -7
  11. data/lib/ast/merge/conflict_resolver_base.rb +8 -1
  12. data/lib/ast/merge/content_match_refiner.rb +278 -0
  13. data/lib/ast/merge/debug_logger.rb +6 -1
  14. data/lib/ast/merge/detector/base.rb +193 -0
  15. data/lib/ast/merge/detector/fenced_code_block.rb +227 -0
  16. data/lib/ast/merge/detector/mergeable.rb +369 -0
  17. data/lib/ast/merge/detector/toml_frontmatter.rb +82 -0
  18. data/lib/ast/merge/detector/yaml_frontmatter.rb +82 -0
  19. data/lib/ast/merge/file_analyzable.rb +5 -3
  20. data/lib/ast/merge/freeze_node_base.rb +1 -1
  21. data/lib/ast/merge/match_refiner_base.rb +1 -1
  22. data/lib/ast/merge/match_score_base.rb +1 -1
  23. data/lib/ast/merge/merge_result_base.rb +4 -1
  24. data/lib/ast/merge/merger_config.rb +33 -31
  25. data/lib/ast/merge/navigable_statement.rb +630 -0
  26. data/lib/ast/merge/partial_template_merger.rb +432 -0
  27. data/lib/ast/merge/recipe/config.rb +198 -0
  28. data/lib/ast/merge/recipe/preset.rb +171 -0
  29. data/lib/ast/merge/recipe/runner.rb +254 -0
  30. data/lib/ast/merge/recipe/script_loader.rb +181 -0
  31. data/lib/ast/merge/recipe.rb +26 -0
  32. data/lib/ast/merge/rspec/dependency_tags.rb +252 -0
  33. data/lib/ast/merge/rspec/shared_examples/reproducible_merge.rb +3 -2
  34. data/lib/ast/merge/rspec.rb +33 -2
  35. data/lib/ast/merge/section_typing.rb +52 -50
  36. data/lib/ast/merge/smart_merger_base.rb +86 -3
  37. data/lib/ast/merge/text/line_node.rb +42 -9
  38. data/lib/ast/merge/text/section_splitter.rb +12 -10
  39. data/lib/ast/merge/text/word_node.rb +47 -14
  40. data/lib/ast/merge/version.rb +1 -1
  41. data/lib/ast/merge.rb +10 -6
  42. data/sig/ast/merge.rbs +389 -2
  43. data.tar.gz.sig +0 -0
  44. metadata +76 -12
  45. metadata.gz.sig +0 -0
  46. data/lib/ast/merge/fenced_code_block_detector.rb +0 -211
  47. data/lib/ast/merge/region.rb +0 -124
  48. data/lib/ast/merge/region_detector_base.rb +0 -114
  49. data/lib/ast/merge/region_mergeable.rb +0 -364
  50. data/lib/ast/merge/toml_frontmatter_detector.rb +0 -88
  51. data/lib/ast/merge/yaml_frontmatter_detector.rb +0 -108
@@ -69,13 +69,15 @@ module Ast
69
69
  end
70
70
  end
71
71
 
72
- # Class method for convenient one-shot parsing.
73
- #
74
- # @param lines [Array<String>] Source lines
75
- # @param style [Style, Symbol, nil] Comment style
76
- # @return [Array<AstNode>] Parsed nodes
77
- def self.parse(lines, style: nil)
78
- new(lines, style: style).parse
72
+ class << self
73
+ # Parse lines as comments.
74
+ #
75
+ # @param lines [Array<String>] Source lines
76
+ # @param style [Style, Symbol, nil] Comment style
77
+ # @return [Array<AstNode>] Parsed nodes
78
+ def parse(lines, style: nil)
79
+ new(lines, style: style).parse
80
+ end
79
81
  end
80
82
 
81
83
  private
@@ -118,6 +118,9 @@ module Ast
118
118
  # @return [Boolean] Whether to add template-only nodes (batch strategy)
119
119
  attr_reader :add_template_only_nodes
120
120
 
121
+ # @return [Object, nil] Match refiner for fuzzy matching
122
+ attr_reader :match_refiner
123
+
121
124
  # Initialize the conflict resolver
122
125
  #
123
126
  # @param strategy [Symbol] Resolution strategy (:node, :batch, or :boundary)
@@ -129,7 +132,9 @@ module Ast
129
132
  # @param template_analysis [Object] Analysis of the template file
130
133
  # @param dest_analysis [Object] Analysis of the destination file
131
134
  # @param add_template_only_nodes [Boolean] Whether to add nodes only in template (batch/boundary strategy)
132
- def initialize(strategy:, preference:, template_analysis:, dest_analysis:, add_template_only_nodes: false)
135
+ # @param match_refiner [#call, nil] Optional match refiner for fuzzy matching
136
+ # @param options [Hash] Additional options for forward compatibility
137
+ def initialize(strategy:, preference:, template_analysis:, dest_analysis:, add_template_only_nodes: false, match_refiner: nil, **options)
133
138
  unless %i[node batch boundary].include?(strategy)
134
139
  raise ArgumentError, "Invalid strategy: #{strategy}. Must be :node, :batch, or :boundary"
135
140
  end
@@ -141,6 +146,8 @@ module Ast
141
146
  @template_analysis = template_analysis
142
147
  @dest_analysis = dest_analysis
143
148
  @add_template_only_nodes = add_template_only_nodes
149
+ @match_refiner = match_refiner
150
+ # **options captured for forward compatibility - subclasses may use additional options
144
151
  end
145
152
 
146
153
  # Resolve conflicts using the configured strategy
@@ -0,0 +1,278 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ast
4
+ module Merge
5
+ # Match refiner for text content-based fuzzy matching.
6
+ #
7
+ # This refiner uses Levenshtein distance to pair nodes that have similar
8
+ # but not identical text content. It's useful for matching nodes where
9
+ # the content has been slightly modified (typos, rewording, etc.).
10
+ #
11
+ # Unlike signature-based matching which requires exact content hashes,
12
+ # this refiner allows fuzzy matching based on text similarity. This is
13
+ # particularly useful for:
14
+ # - Paragraphs with minor edits
15
+ # - Headings with slight rewording
16
+ # - Comments with updated text
17
+ # - Any text-based node type
18
+ #
19
+ # @example Basic usage
20
+ # refiner = ContentMatchRefiner.new(threshold: 0.7)
21
+ # matches = refiner.call(template_nodes, dest_nodes)
22
+ #
23
+ # @example With specific node types
24
+ # # Only match paragraphs and headings
25
+ # refiner = ContentMatchRefiner.new(
26
+ # threshold: 0.6,
27
+ # node_types: [:paragraph, :heading]
28
+ # )
29
+ #
30
+ # @example With custom content extractor
31
+ # refiner = ContentMatchRefiner.new(
32
+ # threshold: 0.7,
33
+ # content_extractor: ->(node) { node.text_content.downcase.strip }
34
+ # )
35
+ #
36
+ # @example Combined with other refiners
37
+ # merger = SmartMerger.new(
38
+ # template,
39
+ # destination,
40
+ # match_refiner: [
41
+ # ContentMatchRefiner.new(threshold: 0.7, node_types: [:paragraph]),
42
+ # TableMatchRefiner.new(threshold: 0.5)
43
+ # ]
44
+ # )
45
+ #
46
+ # @see MatchRefinerBase Base class
47
+ class ContentMatchRefiner < MatchRefinerBase
48
+ # Default weights for content similarity scoring
49
+ DEFAULT_WEIGHTS = {
50
+ content: 0.7, # Text content similarity (Levenshtein)
51
+ length: 0.15, # Length similarity
52
+ position: 0.15, # Position similarity in document
53
+ }.freeze
54
+
55
+ # @return [Hash] Scoring weights
56
+ attr_reader :weights
57
+
58
+ # @return [Proc, nil] Custom content extraction function
59
+ attr_reader :content_extractor
60
+
61
+ # Initialize a content match refiner.
62
+ #
63
+ # @param threshold [Float] Minimum score to accept a match (default: 0.5)
64
+ # @param node_types [Array<Symbol>] Node types to process (empty = all)
65
+ # @param weights [Hash] Custom scoring weights
66
+ # @param content_extractor [Proc, nil] Custom function to extract text from nodes
67
+ # Should accept a node and return a String
68
+ # @param options [Hash] Additional options for forward compatibility
69
+ def initialize(
70
+ threshold: DEFAULT_THRESHOLD,
71
+ node_types: [],
72
+ weights: {},
73
+ content_extractor: nil,
74
+ **options
75
+ )
76
+ super(threshold: threshold, node_types: node_types, **options)
77
+ @weights = DEFAULT_WEIGHTS.merge(weights)
78
+ @content_extractor = content_extractor
79
+ end
80
+
81
+ # Find matches between unmatched nodes based on content similarity.
82
+ #
83
+ # @param template_nodes [Array] Unmatched nodes from template
84
+ # @param dest_nodes [Array] Unmatched nodes from destination
85
+ # @param context [Hash] Additional context (may contain :template_analysis, :dest_analysis)
86
+ # @return [Array<MatchResult>] Array of content-based matches
87
+ def call(template_nodes, dest_nodes, context = {})
88
+ template_filtered = filter_nodes(template_nodes)
89
+ dest_filtered = filter_nodes(dest_nodes)
90
+
91
+ return [] if template_filtered.empty? || dest_filtered.empty?
92
+
93
+ # Build position information for scoring
94
+ total_template = template_filtered.size
95
+ total_dest = dest_filtered.size
96
+
97
+ greedy_match(template_filtered, dest_filtered) do |t_node, d_node|
98
+ t_idx = template_filtered.index(t_node) || 0
99
+ d_idx = dest_filtered.index(d_node) || 0
100
+
101
+ compute_content_similarity(
102
+ t_node,
103
+ d_node,
104
+ t_idx,
105
+ d_idx,
106
+ total_template,
107
+ total_dest,
108
+ )
109
+ end
110
+ end
111
+
112
+ protected
113
+
114
+ # Filter nodes by configured node types.
115
+ #
116
+ # @param nodes [Array] Nodes to filter
117
+ # @return [Array] Filtered nodes (matching node_types, or all if empty)
118
+ def filter_nodes(nodes)
119
+ return nodes if node_types.empty?
120
+
121
+ nodes.select { |n| handles_type?(extract_node_type(n)) }
122
+ end
123
+
124
+ # Extract the type from a node.
125
+ #
126
+ # Handles wrapped nodes (merge_type) and raw nodes (type).
127
+ #
128
+ # @param node [Object] The node
129
+ # @return [Symbol, nil] The node type
130
+ def extract_node_type(node)
131
+ if NodeTyping.typed_node?(node)
132
+ NodeTyping.merge_type_for(node)
133
+ elsif node.respond_to?(:merge_type) && node.merge_type
134
+ node.merge_type
135
+ elsif node.respond_to?(:type)
136
+ type = node.type
137
+ type.is_a?(Symbol) ? type : type.to_s.to_sym
138
+ end
139
+ end
140
+
141
+ # Extract text content from a node.
142
+ #
143
+ # Uses the custom content_extractor if provided, otherwise tries
144
+ # common methods for getting text content.
145
+ #
146
+ # @param node [Object] The node
147
+ # @return [String] The text content
148
+ def extract_content(node)
149
+ return @content_extractor.call(node) if @content_extractor
150
+
151
+ # Try common content extraction methods
152
+ if node.respond_to?(:text_content)
153
+ node.text_content.to_s
154
+ elsif node.respond_to?(:string_content)
155
+ node.string_content.to_s
156
+ elsif node.respond_to?(:content)
157
+ node.content.to_s
158
+ elsif node.respond_to?(:text)
159
+ node.text.to_s
160
+ elsif node.respond_to?(:to_s)
161
+ node.to_s
162
+ else
163
+ ""
164
+ end
165
+ end
166
+
167
+ # Compute similarity score between two nodes based on content.
168
+ #
169
+ # @param t_node [Object] Template node
170
+ # @param d_node [Object] Destination node
171
+ # @param t_idx [Integer] Template node index
172
+ # @param d_idx [Integer] Destination node index
173
+ # @param total_t [Integer] Total template nodes
174
+ # @param total_d [Integer] Total destination nodes
175
+ # @return [Float] Similarity score (0.0-1.0)
176
+ def compute_content_similarity(t_node, d_node, t_idx, d_idx, total_t, total_d)
177
+ t_content = extract_content(t_node)
178
+ d_content = extract_content(d_node)
179
+
180
+ # Calculate component scores
181
+ content_score = string_similarity(t_content, d_content)
182
+ length_score = length_similarity(t_content, d_content)
183
+ position_score = position_similarity(t_idx, d_idx, total_t, total_d)
184
+
185
+ # Weighted combination
186
+ weights[:content] * content_score +
187
+ weights[:length] * length_score +
188
+ weights[:position] * position_score
189
+ end
190
+
191
+ # Calculate string similarity using Levenshtein distance.
192
+ #
193
+ # @param str1 [String] First string
194
+ # @param str2 [String] Second string
195
+ # @return [Float] Similarity score (0.0-1.0)
196
+ def string_similarity(str1, str2)
197
+ return 1.0 if str1 == str2
198
+ return 0.0 if str1.empty? || str2.empty?
199
+
200
+ distance = levenshtein_distance(str1, str2)
201
+ max_len = [str1.length, str2.length].max
202
+ 1.0 - (distance.to_f / max_len)
203
+ end
204
+
205
+ # Calculate length similarity between two strings.
206
+ #
207
+ # @param str1 [String] First string
208
+ # @param str2 [String] Second string
209
+ # @return [Float] Similarity score (0.0-1.0)
210
+ def length_similarity(str1, str2)
211
+ return 1.0 if str1.length == str2.length
212
+ return 0.0 if str1.empty? && str2.empty?
213
+
214
+ min_len = [str1.length, str2.length].min.to_f
215
+ max_len = [str1.length, str2.length].max.to_f
216
+ min_len / max_len
217
+ end
218
+
219
+ # Calculate position similarity in document.
220
+ #
221
+ # Nodes at similar relative positions score higher.
222
+ #
223
+ # @param idx1 [Integer] First node index
224
+ # @param idx2 [Integer] Second node index
225
+ # @param total1 [Integer] Total nodes in first collection
226
+ # @param total2 [Integer] Total nodes in second collection
227
+ # @return [Float] Similarity score (0.0-1.0)
228
+ def position_similarity(idx1, idx2, total1, total2)
229
+ # Normalize positions to 0.0-1.0 range
230
+ pos1 = (total1 > 1) ? idx1.to_f / (total1 - 1) : 0.5
231
+ pos2 = (total2 > 1) ? idx2.to_f / (total2 - 1) : 0.5
232
+
233
+ 1.0 - (pos1 - pos2).abs
234
+ end
235
+
236
+ # Calculate Levenshtein distance between two strings.
237
+ #
238
+ # Uses Wagner-Fischer algorithm with space optimization.
239
+ #
240
+ # @param str1 [String] First string
241
+ # @param str2 [String] Second string
242
+ # @return [Integer] Edit distance
243
+ def levenshtein_distance(str1, str2)
244
+ return str2.length if str1.empty?
245
+ return str1.length if str2.empty?
246
+
247
+ # Use shorter string as columns for space efficiency
248
+ if str1.length > str2.length
249
+ str1, str2 = str2, str1
250
+ end
251
+
252
+ m = str1.length
253
+ n = str2.length
254
+
255
+ # Only need two rows at a time
256
+ prev_row = (0..m).to_a
257
+ curr_row = Array.new(m + 1)
258
+
259
+ (1..n).each do |j|
260
+ curr_row[0] = j
261
+
262
+ (1..m).each do |i|
263
+ cost = (str1[i - 1] == str2[j - 1]) ? 0 : 1
264
+ curr_row[i] = [
265
+ curr_row[i - 1] + 1, # insertion
266
+ prev_row[i] + 1, # deletion
267
+ prev_row[i - 1] + cost, # substitution
268
+ ].min
269
+ end
270
+
271
+ prev_row, curr_row = curr_row, prev_row
272
+ end
273
+
274
+ prev_row[m]
275
+ end
276
+ end
277
+ end
278
+ end
@@ -71,8 +71,9 @@ module Ast
71
71
  # @note Shared examples require +silent_stream+ and +rspec-stubbed_env+ gems.
72
72
  module DebugLogger
73
73
  # Benchmark is optional - gracefully degrade if not available
74
+ # Use autoload to defer loading until actually needed
74
75
  BENCHMARK_AVAILABLE = begin
75
- require "benchmark"
76
+ autoload(:Benchmark, "benchmark")
76
77
  true
77
78
  rescue LoadError
78
79
  # :nocov:
@@ -83,10 +84,14 @@ module Ast
83
84
 
84
85
  class << self
85
86
  # @return [String] Environment variable name to check for debug mode
87
+ # rubocop:disable ThreadSafety/ClassAndModuleAttributes - Configuration attribute, set once at load time
86
88
  attr_accessor :env_var_name
89
+ # rubocop:enable ThreadSafety/ClassAndModuleAttributes
87
90
 
88
91
  # @return [String] Prefix for log messages
92
+ # rubocop:disable ThreadSafety/ClassAndModuleAttributes - Configuration attribute, set once at load time
89
93
  attr_accessor :log_prefix
94
+ # rubocop:enable ThreadSafety/ClassAndModuleAttributes
90
95
 
91
96
  # Hook called when a module extends Ast::Merge::DebugLogger.
92
97
  # Sets up attr_accessor for env_var_name and log_prefix on the extending module,
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ast
4
+ module Merge
5
+ # Detector namespace for region detection and merging functionality.
6
+ #
7
+ # Regions are portions of a document that can be handled by a specialized
8
+ # merger. For example, YAML frontmatter in a Markdown file, or Ruby code
9
+ # blocks that should be merged with Prism.
10
+ #
11
+ # @example Detecting regions
12
+ # detector = Ast::Merge::Detector::FencedCodeBlock.ruby
13
+ # regions = detector.detect_all(markdown_content)
14
+ # regions.each do |region|
15
+ # puts "Found #{region.type} at lines #{region.start_line}-#{region.end_line}"
16
+ # end
17
+ #
18
+ # @see Detector::Region Data struct for detected regions
19
+ # @see Detector::Base Base class for detectors
20
+ # @see Detector::Mergeable Mixin for region-aware merging
21
+ #
22
+ module Detector
23
+ # Represents a detected region within a document.
24
+ #
25
+ # Regions are portions of a document that can be handled by a specialized
26
+ # merger. For example, YAML frontmatter in a Markdown file, or a Ruby code
27
+ # block that should be merged using a Ruby-aware merger.
28
+ #
29
+ # @example Creating a region for YAML frontmatter
30
+ # Region.new(
31
+ # type: :yaml_frontmatter,
32
+ # content: "title: My Doc\nversion: 1.0\n",
33
+ # start_line: 1,
34
+ # end_line: 4,
35
+ # delimiters: ["---", "---"],
36
+ # metadata: { format: :yaml }
37
+ # )
38
+ #
39
+ # @api public
40
+ Region = Struct.new(
41
+ # @return [Symbol] The type of region (e.g., :yaml_frontmatter, :ruby_code_block)
42
+ :type,
43
+
44
+ # @return [String] The raw string content of this region (inner content, without delimiters)
45
+ :content,
46
+
47
+ # @return [Integer] 1-indexed start line in the original document
48
+ :start_line,
49
+
50
+ # @return [Integer] 1-indexed end line in the original document
51
+ :end_line,
52
+
53
+ # @return [Array<String>, nil] Delimiter strings to reconstruct the region
54
+ :delimiters,
55
+
56
+ # @return [Hash, nil] Optional metadata for detector-specific information
57
+ :metadata,
58
+ keyword_init: true,
59
+ ) do
60
+ # Returns the line range covered by this region.
61
+ # @return [Range]
62
+ def line_range
63
+ start_line..end_line
64
+ end
65
+
66
+ # Returns the number of lines this region spans.
67
+ # @return [Integer]
68
+ def line_count
69
+ end_line - start_line + 1
70
+ end
71
+
72
+ # Reconstructs the full region text including delimiters.
73
+ # @return [String]
74
+ def full_text
75
+ return content if delimiters.nil? || delimiters.empty?
76
+
77
+ opening = delimiters[0] || ""
78
+ closing = delimiters[1] || ""
79
+ "#{opening}\n#{content}#{closing}"
80
+ end
81
+
82
+ # Checks if this region contains the given line number.
83
+ # @param line [Integer] The line number to check (1-indexed)
84
+ # @return [Boolean]
85
+ def contains_line?(line)
86
+ line_range.cover?(line)
87
+ end
88
+
89
+ # Checks if this region overlaps with another region.
90
+ # @param other [Region] Another region
91
+ # @return [Boolean]
92
+ def overlaps?(other)
93
+ line_range.cover?(other.start_line) ||
94
+ line_range.cover?(other.end_line) ||
95
+ other.line_range.cover?(start_line)
96
+ end
97
+
98
+ # @return [String]
99
+ def to_s
100
+ "Region<#{type}:#{start_line}-#{end_line}>"
101
+ end
102
+
103
+ # @return [String]
104
+ def inspect
105
+ truncated = if content && content.length > 30
106
+ "#{content[0, 30]}..."
107
+ else
108
+ content.inspect
109
+ end
110
+ "#{self} #{truncated}"
111
+ end
112
+ end
113
+
114
+ # Base class for region detection.
115
+ #
116
+ # Region detectors identify portions of a document that should be handled
117
+ # by a specialized merger.
118
+ #
119
+ # Subclasses must implement:
120
+ # - {#region_type} - Returns the type symbol for detected regions
121
+ # - {#detect_all} - Finds all regions of this type in a document
122
+ #
123
+ # @example Implementing a custom detector
124
+ # class MyBlockDetector < Ast::Merge::Detector::Base
125
+ # def region_type
126
+ # :my_block
127
+ # end
128
+ #
129
+ # def detect_all(source)
130
+ # # Return array of Region structs
131
+ # []
132
+ # end
133
+ # end
134
+ #
135
+ # @abstract Subclass and implement {#region_type} and {#detect_all}
136
+ # @api public
137
+ #
138
+ class Base
139
+ # Returns the type symbol for regions detected by this detector.
140
+ # @return [Symbol]
141
+ # @abstract
142
+ def region_type
143
+ raise NotImplementedError, "#{self.class}#region_type must be implemented"
144
+ end
145
+
146
+ # Detects all regions of this type in the given source.
147
+ # @param _source [String] The full document content to scan
148
+ # @return [Array<Region>] All detected regions, sorted by start_line
149
+ # @abstract
150
+ def detect_all(_source)
151
+ raise NotImplementedError, "#{self.class}#detect_all must be implemented"
152
+ end
153
+
154
+ # Whether to strip delimiters from content before passing to merger.
155
+ # @return [Boolean]
156
+ def strip_delimiters?
157
+ true
158
+ end
159
+
160
+ # A human-readable name for this detector.
161
+ # @return [String]
162
+ def name
163
+ self.class.name || "AnonymousDetector"
164
+ end
165
+
166
+ # @return [String]
167
+ def inspect
168
+ "#<#{name} region_type=#{region_type}>"
169
+ end
170
+
171
+ protected
172
+
173
+ # Helper to build a Region struct.
174
+ # @return [Region]
175
+ def build_region(type:, content:, start_line:, end_line:, delimiters: nil, metadata: nil)
176
+ Region.new(
177
+ type: type,
178
+ content: content,
179
+ start_line: start_line,
180
+ end_line: end_line,
181
+ delimiters: delimiters,
182
+ metadata: metadata || {},
183
+ )
184
+ end
185
+ end
186
+
187
+ autoload :FencedCodeBlock, "ast/merge/detector/fenced_code_block"
188
+ autoload :YamlFrontmatter, "ast/merge/detector/yaml_frontmatter"
189
+ autoload :TomlFrontmatter, "ast/merge/detector/toml_frontmatter"
190
+ autoload :Mergeable, "ast/merge/detector/mergeable"
191
+ end
192
+ end
193
+ end