ast-merge 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +165 -7
  4. data/README.md +208 -39
  5. data/exe/ast-merge-recipe +366 -0
  6. data/lib/ast/merge/conflict_resolver_base.rb +8 -1
  7. data/lib/ast/merge/content_match_refiner.rb +278 -0
  8. data/lib/ast/merge/debug_logger.rb +2 -1
  9. data/lib/ast/merge/detector/base.rb +193 -0
  10. data/lib/ast/merge/detector/fenced_code_block.rb +227 -0
  11. data/lib/ast/merge/detector/mergeable.rb +369 -0
  12. data/lib/ast/merge/detector/toml_frontmatter.rb +82 -0
  13. data/lib/ast/merge/detector/yaml_frontmatter.rb +82 -0
  14. data/lib/ast/merge/merge_result_base.rb +4 -1
  15. data/lib/ast/merge/navigable_statement.rb +630 -0
  16. data/lib/ast/merge/partial_template_merger.rb +432 -0
  17. data/lib/ast/merge/recipe/config.rb +198 -0
  18. data/lib/ast/merge/recipe/preset.rb +171 -0
  19. data/lib/ast/merge/recipe/runner.rb +254 -0
  20. data/lib/ast/merge/recipe/script_loader.rb +181 -0
  21. data/lib/ast/merge/recipe.rb +26 -0
  22. data/lib/ast/merge/rspec/dependency_tags.rb +252 -0
  23. data/lib/ast/merge/rspec/shared_examples/reproducible_merge.rb +3 -2
  24. data/lib/ast/merge/rspec.rb +33 -2
  25. data/lib/ast/merge/smart_merger_base.rb +86 -3
  26. data/lib/ast/merge/version.rb +1 -1
  27. data/lib/ast/merge.rb +10 -6
  28. data/sig/ast/merge.rbs +389 -2
  29. data.tar.gz.sig +0 -0
  30. metadata +58 -14
  31. metadata.gz.sig +0 -0
  32. data/lib/ast/merge/fenced_code_block_detector.rb +0 -313
  33. data/lib/ast/merge/region.rb +0 -124
  34. data/lib/ast/merge/region_detector_base.rb +0 -114
  35. data/lib/ast/merge/region_mergeable.rb +0 -364
  36. data/lib/ast/merge/toml_frontmatter_detector.rb +0 -88
  37. data/lib/ast/merge/yaml_frontmatter_detector.rb +0 -88
@@ -1,313 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Ast
4
- module Merge
5
- # Detects fenced code blocks with a specific language identifier.
6
- #
7
- # This detector finds Markdown-style fenced code blocks (using ``` or ~~~)
8
- # that have a specific language identifier. It can be configured for any
9
- # language: ruby, json, yaml, mermaid, etc.
10
- #
11
- # ## When to Use This Detector
12
- #
13
- # **Use FencedCodeBlockDetector when:**
14
- # - Working with raw Markdown text without parsing to AST
15
- # - Quick extraction from strings without parser dependencies
16
- # - Custom text processing requiring line-level precision
17
- # - Operating on source text directly (e.g., linters, formatters)
18
- #
19
- # **Do NOT use FencedCodeBlockDetector when:**
20
- # - Working with parsed Markdown AST (use native code block nodes instead)
21
- # - Integrating with markdown-merge's CodeBlockMerger (it uses native nodes)
22
- # - Using tree_haver's unified Markdown backend API
23
- #
24
- # ## Comparison: FencedCodeBlockDetector vs Native AST Nodes
25
- #
26
- # ### Native AST Approach (Preferred for AST-based Tools)
27
- #
28
- # When working with parsed Markdown AST via tree_haver (commonmarker/markly backends):
29
- #
30
- # ```ruby
31
- # # markdown-merge's CodeBlockMerger uses this approach:
32
- # language = node.fence_info.split(/\s+/).first # e.g., "ruby"
33
- # content = node.string_content # Raw code inside block
34
- #
35
- # # Then delegate to language-specific parser:
36
- # case language
37
- # when "ruby"
38
- # merger = Prism::Merge::SmartMerger.new(template, dest, preference: :destination)
39
- # merged_content = merger.merge # Prism parses Ruby code into full AST!
40
- # when "yaml"
41
- # merger = Psych::Merge::SmartMerger.new(template, dest, preference: :destination)
42
- # merged_content = merger.merge # Psych parses YAML into AST!
43
- # when "json"
44
- # merger = Json::Merge::SmartMerger.new(template, dest, preference: :destination)
45
- # merged_content = merger.merge # JSON parser creates AST!
46
- # when "bash"
47
- # merger = Bash::Merge::SmartMerger.new(template, dest, preference: :destination)
48
- # merged_content = merger.merge # tree-sitter parses bash into AST!
49
- # end
50
- # ```
51
- #
52
- # **Advantages of Native AST approach:**
53
- # - ✓ Parser handles all edge cases (nested backticks, indentation, etc.)
54
- # - ✓ Respects node boundaries from authoritative source
55
- # - ✓ No regex brittleness
56
- # - ✓ Automatic handling of ``` and ~~~ fence styles
57
- # - ✓ Enables TRUE language-aware merging (not just text replacement)
58
- # - ✓ Language-specific parsers create full ASTs of embedded code
59
- # - ✓ Smart merging at semantic level (method definitions, YAML keys, JSON properties)
60
- #
61
- # ### Text-Based Approach (This Class)
62
- #
63
- # When working with raw text:
64
- #
65
- # ```ruby
66
- # detector = FencedCodeBlockDetector.ruby
67
- # regions = detector.detect_all(markdown_text)
68
- # regions.each do |region|
69
- # puts "Ruby code at lines #{region.start_line}-#{region.end_line}"
70
- # # region.content is just a string - NO parsing happens
71
- # end
72
- # ```
73
- #
74
- # **Limitations of text-based approach:**
75
- # - • Uses regex to find blocks (may miss edge cases)
76
- # - • Returns strings, not parsed structures
77
- # - • Cannot perform semantic merging
78
- # - • Manual handling of fence variations
79
- # - • No language-specific intelligence
80
- #
81
- # ## Real-World Example: markdown-merge Inner Code Block Merging
82
- #
83
- # When `inner_merge_code_blocks: true` is enabled in markdown-merge:
84
- #
85
- # 1. **Markdown Parser** (commonmarker/markly) parses markdown into AST
86
- # - Creates code_block nodes with `fence_info` and `string_content`
87
- #
88
- # 2. **CodeBlockMerger** extracts code using native node properties:
89
- # ```ruby
90
- # language = node.fence_info.split(/\s+/).first
91
- # template_code = template_node.string_content
92
- # dest_code = dest_node.string_content
93
- # ```
94
- #
95
- # 3. **Language-Specific Parser** creates FULL AST of the embedded code:
96
- # - `Prism::Merge` → Prism parses Ruby into complete AST (ClassNode, DefNode, etc.)
97
- # - `Psych::Merge` → Psych parses YAML into document structure
98
- # - `Json::Merge` → JSON parser creates object/array tree
99
- # - `Bash::Merge` → tree-sitter creates bash statement AST
100
- #
101
- # 4. **Smart Merger** performs SEMANTIC merging at AST level:
102
- # - Ruby: Merges class definitions, preserves custom methods
103
- # - YAML: Merges keys, preserves custom configuration values
104
- # - JSON: Merges objects, destination values win on conflicts
105
- # - Bash: Merges statements, preserves custom exports
106
- #
107
- # 5. **Result** is intelligently merged code, not simple text concatenation!
108
- #
109
- # **This means:** The embedded code is FULLY PARSED by its native language parser,
110
- # enabling true semantic-level merging. FencedCodeBlockDetector would only find
111
- # the text boundaries - it cannot perform this semantic merging.
112
- #
113
- # @example Detecting Ruby code blocks
114
- # detector = FencedCodeBlockDetector.new("ruby", aliases: ["rb"])
115
- # regions = detector.detect_all(markdown_source)
116
- #
117
- # @example Using factory methods
118
- # detector = FencedCodeBlockDetector.ruby
119
- # detector = FencedCodeBlockDetector.yaml
120
- # detector = FencedCodeBlockDetector.json
121
- #
122
- # @api public
123
- class FencedCodeBlockDetector < RegionDetectorBase
124
- # @return [String] The primary language identifier
125
- attr_reader :language
126
-
127
- # @return [Array<String>] Alternative language identifiers
128
- attr_reader :aliases
129
-
130
- # Creates a new detector for the specified language.
131
- #
132
- # @param language [String, Symbol] The language identifier (e.g., "ruby", "json")
133
- # @param aliases [Array<String, Symbol>] Alternative identifiers (e.g., ["rb"] for ruby)
134
- def initialize(language, aliases: [])
135
- super()
136
- @language = language.to_s.downcase
137
- @aliases = aliases.map { |a| a.to_s.downcase }
138
- @all_identifiers = [@language] + @aliases
139
- end
140
-
141
- # @return [Symbol] The region type (e.g., :ruby_code_block)
142
- def region_type
143
- :"#{@language}_code_block"
144
- end
145
-
146
- # Check if a language identifier matches this detector.
147
- #
148
- # @param lang [String] The language identifier to check
149
- # @return [Boolean] true if the language matches
150
- def matches_language?(lang)
151
- @all_identifiers.include?(lang.to_s.downcase)
152
- end
153
-
154
- # Detects all fenced code blocks with the configured language.
155
- #
156
- # @param source [String] The full document content
157
- # @return [Array<Region>] All detected code blocks, sorted by start_line
158
- def detect_all(source)
159
- return [] if source.nil? || source.empty?
160
-
161
- regions = []
162
- lines = source.lines
163
- in_block = false
164
- start_line = nil
165
- content_lines = []
166
- current_language = nil
167
- fence_char = nil
168
- fence_length = nil
169
- indent = ""
170
-
171
- lines.each_with_index do |line, idx|
172
- line_num = idx + 1
173
-
174
- if !in_block
175
- # Match opening fence: ```lang or ~~~lang (optionally indented)
176
- match = line.match(/^(\s*)(`{3,}|~{3,})(\w*)\s*$/)
177
- if match
178
- indent = match[1] || ""
179
- fence = match[2]
180
- lang = match[3].downcase
181
-
182
- if @all_identifiers.include?(lang)
183
- in_block = true
184
- start_line = line_num
185
- content_lines = []
186
- current_language = lang
187
- fence_char = fence[0]
188
- fence_length = fence.length
189
- end
190
- end
191
- elsif line.match?(/^#{Regexp.escape(indent)}#{Regexp.escape(fence_char)}{#{fence_length},}\s*$/)
192
- # Match closing fence (must use same char, same indent, and at least same length)
193
- opening_fence = "#{fence_char * fence_length}#{current_language}"
194
- closing_fence = fence_char * fence_length
195
-
196
- regions << build_region(
197
- type: region_type,
198
- content: content_lines.join,
199
- start_line: start_line,
200
- end_line: line_num,
201
- delimiters: [opening_fence, closing_fence],
202
- metadata: {language: current_language, indent: indent.empty? ? nil : indent},
203
- )
204
- in_block = false
205
- start_line = nil
206
- content_lines = []
207
- current_language = nil
208
- fence_char = nil
209
- fence_length = nil
210
- indent = ""
211
- else
212
- # Accumulate content lines (strip the indent if present)
213
- content_lines << if indent.empty?
214
- line
215
- else
216
- # Strip the common indent from content lines
217
- line.sub(/^#{Regexp.escape(indent)}/, "")
218
- end
219
- end
220
- end
221
-
222
- # Note: Unclosed blocks are ignored (no region created)
223
- regions
224
- end
225
-
226
- # @return [String] A description of this detector
227
- def inspect
228
- aliases_str = @aliases.empty? ? "" : " aliases=#{@aliases.inspect}"
229
- "#<#{self.class.name} language=#{@language}#{aliases_str}>"
230
- end
231
-
232
- class << self
233
- # Creates a detector for Ruby code blocks.
234
- # @return [FencedCodeBlockDetector]
235
- def ruby
236
- new("ruby", aliases: ["rb"])
237
- end
238
-
239
- # Creates a detector for JSON code blocks.
240
- # @return [FencedCodeBlockDetector]
241
- def json
242
- new("json")
243
- end
244
-
245
- # Creates a detector for YAML code blocks.
246
- # @return [FencedCodeBlockDetector]
247
- def yaml
248
- new("yaml", aliases: ["yml"])
249
- end
250
-
251
- # Creates a detector for TOML code blocks.
252
- # @return [FencedCodeBlockDetector]
253
- def toml
254
- new("toml")
255
- end
256
-
257
- # Creates a detector for Mermaid diagram blocks.
258
- # @return [FencedCodeBlockDetector]
259
- def mermaid
260
- new("mermaid")
261
- end
262
-
263
- # Creates a detector for JavaScript code blocks.
264
- # @return [FencedCodeBlockDetector]
265
- def javascript
266
- new("javascript", aliases: ["js"])
267
- end
268
-
269
- # Creates a detector for TypeScript code blocks.
270
- # @return [FencedCodeBlockDetector]
271
- def typescript
272
- new("typescript", aliases: ["ts"])
273
- end
274
-
275
- # Creates a detector for Python code blocks.
276
- # @return [FencedCodeBlockDetector]
277
- def python
278
- new("python", aliases: ["py"])
279
- end
280
-
281
- # Creates a detector for Bash/Shell code blocks.
282
- # @return [FencedCodeBlockDetector]
283
- def bash
284
- new("bash", aliases: ["sh", "shell", "zsh"])
285
- end
286
-
287
- # Creates a detector for SQL code blocks.
288
- # @return [FencedCodeBlockDetector]
289
- def sql
290
- new("sql")
291
- end
292
-
293
- # Creates a detector for HTML code blocks.
294
- # @return [FencedCodeBlockDetector]
295
- def html
296
- new("html")
297
- end
298
-
299
- # Creates a detector for CSS code blocks.
300
- # @return [FencedCodeBlockDetector]
301
- def css
302
- new("css")
303
- end
304
-
305
- # Creates a detector for Markdown code blocks (nested markdown).
306
- # @return [FencedCodeBlockDetector]
307
- def markdown
308
- new("markdown", aliases: ["md"])
309
- end
310
- end
311
- end
312
- end
313
- end
@@ -1,124 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Ast
4
- module Merge
5
- # Represents a detected region within a document.
6
- #
7
- # Regions are portions of a document that can be handled by a specialized
8
- # merger. For example, YAML frontmatter in a Markdown file, or a Ruby code
9
- # block that should be merged using a Ruby-aware merger.
10
- #
11
- # @example Creating a region for YAML frontmatter
12
- # Region.new(
13
- # type: :yaml_frontmatter,
14
- # content: "title: My Doc\nversion: 1.0\n",
15
- # start_line: 1,
16
- # end_line: 4,
17
- # delimiters: ["---", "---"],
18
- # metadata: { format: :yaml }
19
- # )
20
- #
21
- # @example Creating a region for a Ruby code block
22
- # Region.new(
23
- # type: :ruby_code_block,
24
- # content: "def hello\n puts 'world'\nend\n",
25
- # start_line: 5,
26
- # end_line: 9,
27
- # delimiters: ["```ruby", "```"],
28
- # metadata: { language: "ruby" }
29
- # )
30
- #
31
- # @api public
32
- Region = Struct.new(
33
- # @return [Symbol] The type of region (e.g., :yaml_frontmatter, :ruby_code_block)
34
- :type,
35
-
36
- # @return [String] The raw string content of this region (inner content, without delimiters)
37
- :content,
38
-
39
- # @return [Integer] 1-indexed start line in the original document
40
- :start_line,
41
-
42
- # @return [Integer] 1-indexed end line in the original document
43
- :end_line,
44
-
45
- # @return [Array<String>, nil] Delimiter strings to reconstruct the region
46
- # ["```ruby", "```"] - [opening_delimiter, closing_delimiter]
47
- :delimiters,
48
-
49
- # @return [Hash, nil] Optional metadata for detector-specific information
50
- # (e.g., { language: "ruby" }, { format: :yaml })
51
- :metadata,
52
- keyword_init: true,
53
- ) do
54
- # Returns the line range covered by this region.
55
- #
56
- # @return [Range] The range from start_line to end_line (inclusive)
57
- # @example
58
- # region.line_range # => 1..4
59
- def line_range
60
- start_line..end_line
61
- end
62
-
63
- # Returns the number of lines this region spans.
64
- #
65
- # @return [Integer] The number of lines
66
- # @example
67
- # region.line_count # => 4
68
- def line_count
69
- end_line - start_line + 1
70
- end
71
-
72
- # Reconstructs the full region text including delimiters.
73
- #
74
- # @return [String] The complete region with start and end delimiters
75
- # @example
76
- # region.full_text
77
- # # => "```ruby\ndef hello\n puts 'world'\nend\n```"
78
- def full_text
79
- return content if delimiters.nil? || delimiters.empty?
80
-
81
- opening = delimiters[0] || ""
82
- closing = delimiters[1] || ""
83
- "#{opening}\n#{content}#{closing}"
84
- end
85
-
86
- # Checks if this region overlaps with the given line number.
87
- #
88
- # @param line [Integer] The line number to check (1-indexed)
89
- # @return [Boolean] true if the line is within this region
90
- def contains_line?(line)
91
- line_range.cover?(line)
92
- end
93
-
94
- # Checks if this region overlaps with another region.
95
- #
96
- # @param other [Region] Another region to check for overlap
97
- # @return [Boolean] true if the regions overlap
98
- def overlaps?(other)
99
- line_range.cover?(other.start_line) ||
100
- line_range.cover?(other.end_line) ||
101
- other.line_range.cover?(start_line)
102
- end
103
-
104
- # Returns a short string representation of the region.
105
- #
106
- # @return [String] A concise string describing the region
107
- def to_s
108
- "Region<#{type}:#{start_line}-#{end_line}>"
109
- end
110
-
111
- # Returns a detailed human-readable representation of the region.
112
- #
113
- # @return [String] A string describing the region with truncated content
114
- def inspect
115
- truncated = if content && content.length > 30
116
- "#{content[0, 30]}..."
117
- else
118
- content.inspect
119
- end
120
- "#{self} #{truncated}"
121
- end
122
- end
123
- end
124
- end
@@ -1,114 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Ast
4
- module Merge
5
- # Base class for region detection.
6
- #
7
- # Region detectors identify portions of a document that should be handled
8
- # by a specialized merger. For example, detecting YAML frontmatter in a
9
- # Markdown file, or Ruby code blocks that should be merged with Prism.
10
- #
11
- # Subclasses must implement:
12
- # - {#region_type} - Returns the type symbol for detected regions
13
- # - {#detect_all} - Finds all regions of this type in a document
14
- #
15
- # @example Implementing a custom detector
16
- # class MyBlockDetector < Ast::Merge::RegionDetectorBase
17
- # def region_type
18
- # :my_block
19
- # end
20
- #
21
- # def detect_all(source)
22
- # # Return array of Region structs
23
- # []
24
- # end
25
- # end
26
- #
27
- # @abstract Subclass and implement {#region_type} and {#detect_all}
28
- # @api public
29
- class RegionDetectorBase
30
- # Returns the type symbol for regions detected by this detector.
31
- #
32
- # This symbol is used to identify the region type in the Region struct
33
- # and for matching regions between template and destination documents.
34
- #
35
- # @return [Symbol] The region type (e.g., :yaml_frontmatter, :ruby_code_block)
36
- # @abstract Subclasses must implement this method
37
- def region_type
38
- raise NotImplementedError, "#{self.class}#region_type must be implemented"
39
- end
40
-
41
- # Detects all regions of this type in the given source.
42
- #
43
- # @param source [String] The full document content to scan
44
- # @return [Array<Region>] All detected regions, sorted by start_line
45
- # @abstract Subclasses must implement this method
46
- #
47
- # @example Return value structure
48
- # [
49
- # Region.new(
50
- # type: :yaml_frontmatter,
51
- # content: "title: My Doc\n",
52
- # start_line: 1,
53
- # end_line: 3,
54
- # delimiters: { start: "---\n", end: "---\n" },
55
- # metadata: { format: :yaml }
56
- # )
57
- # ]
58
- def detect_all(source)
59
- raise NotImplementedError, "#{self.class}#detect_all must be implemented"
60
- end
61
-
62
- # Whether to strip delimiters from content before passing to merger.
63
- #
64
- # When true (default), only the inner content is passed to the region's
65
- # merger. The delimiters are stored separately and reattached after merging.
66
- #
67
- # When false, the full content including delimiters is passed to the merger,
68
- # which must then handle the delimiters itself.
69
- #
70
- # @return [Boolean] true if delimiters should be stripped (default: true)
71
- def strip_delimiters?
72
- true
73
- end
74
-
75
- # A human-readable name for this detector.
76
- #
77
- # Used in error messages and debugging output.
78
- #
79
- # @return [String] The detector name
80
- def name
81
- self.class.name || "AnonymousDetector"
82
- end
83
-
84
- # Returns a string representation of this detector.
85
- #
86
- # @return [String] A description of the detector
87
- def inspect
88
- "#<#{name} region_type=#{region_type}>"
89
- end
90
-
91
- protected
92
-
93
- # Helper to build a Region struct with common defaults.
94
- #
95
- # @param type [Symbol] The region type
96
- # @param content [String] The inner content (without delimiters)
97
- # @param start_line [Integer] 1-indexed start line
98
- # @param end_line [Integer] 1-indexed end line
99
- # @param delimiters [Hash, nil] { start: String, end: String }
100
- # @param metadata [Hash, nil] Additional metadata
101
- # @return [Region] A new Region struct
102
- def build_region(type:, content:, start_line:, end_line:, delimiters: nil, metadata: nil)
103
- Region.new(
104
- type: type,
105
- content: content,
106
- start_line: start_line,
107
- end_line: end_line,
108
- delimiters: delimiters,
109
- metadata: metadata || {},
110
- )
111
- end
112
- end
113
- end
114
- end