ast-merge 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +194 -1
- data/README.md +235 -53
- data/exe/ast-merge-recipe +366 -0
- data/lib/ast/merge/ast_node.rb +224 -24
- data/lib/ast/merge/comment/block.rb +6 -0
- data/lib/ast/merge/comment/empty.rb +6 -0
- data/lib/ast/merge/comment/line.rb +6 -0
- data/lib/ast/merge/comment/parser.rb +9 -7
- data/lib/ast/merge/conflict_resolver_base.rb +8 -1
- data/lib/ast/merge/content_match_refiner.rb +278 -0
- data/lib/ast/merge/debug_logger.rb +6 -1
- data/lib/ast/merge/detector/base.rb +193 -0
- data/lib/ast/merge/detector/fenced_code_block.rb +227 -0
- data/lib/ast/merge/detector/mergeable.rb +369 -0
- data/lib/ast/merge/detector/toml_frontmatter.rb +82 -0
- data/lib/ast/merge/detector/yaml_frontmatter.rb +82 -0
- data/lib/ast/merge/file_analyzable.rb +5 -3
- data/lib/ast/merge/freeze_node_base.rb +1 -1
- data/lib/ast/merge/match_refiner_base.rb +1 -1
- data/lib/ast/merge/match_score_base.rb +1 -1
- data/lib/ast/merge/merge_result_base.rb +4 -1
- data/lib/ast/merge/merger_config.rb +33 -31
- data/lib/ast/merge/navigable_statement.rb +630 -0
- data/lib/ast/merge/partial_template_merger.rb +432 -0
- data/lib/ast/merge/recipe/config.rb +198 -0
- data/lib/ast/merge/recipe/preset.rb +171 -0
- data/lib/ast/merge/recipe/runner.rb +254 -0
- data/lib/ast/merge/recipe/script_loader.rb +181 -0
- data/lib/ast/merge/recipe.rb +26 -0
- data/lib/ast/merge/rspec/dependency_tags.rb +252 -0
- data/lib/ast/merge/rspec/shared_examples/reproducible_merge.rb +3 -2
- data/lib/ast/merge/rspec.rb +33 -2
- data/lib/ast/merge/section_typing.rb +52 -50
- data/lib/ast/merge/smart_merger_base.rb +86 -3
- data/lib/ast/merge/text/line_node.rb +42 -9
- data/lib/ast/merge/text/section_splitter.rb +12 -10
- data/lib/ast/merge/text/word_node.rb +47 -14
- data/lib/ast/merge/version.rb +1 -1
- data/lib/ast/merge.rb +10 -6
- data/sig/ast/merge.rbs +389 -2
- data.tar.gz.sig +0 -0
- metadata +76 -12
- metadata.gz.sig +0 -0
- data/lib/ast/merge/fenced_code_block_detector.rb +0 -211
- data/lib/ast/merge/region.rb +0 -124
- data/lib/ast/merge/region_detector_base.rb +0 -114
- data/lib/ast/merge/region_mergeable.rb +0 -364
- data/lib/ast/merge/toml_frontmatter_detector.rb +0 -88
- data/lib/ast/merge/yaml_frontmatter_detector.rb +0 -108
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ast
|
|
4
|
+
module Merge
|
|
5
|
+
module Detector
|
|
6
|
+
# Detects fenced code blocks with a specific language identifier.
|
|
7
|
+
#
|
|
8
|
+
# This detector finds Markdown-style fenced code blocks (using ``` or ~~~)
|
|
9
|
+
# that have a specific language identifier. It can be configured for any
|
|
10
|
+
# language: ruby, json, yaml, mermaid, etc.
|
|
11
|
+
#
|
|
12
|
+
# ## When to Use This Detector
|
|
13
|
+
#
|
|
14
|
+
# **Use FencedCodeBlock when:**
|
|
15
|
+
# - Working with raw Markdown text without parsing to AST
|
|
16
|
+
# - Quick extraction from strings without parser dependencies
|
|
17
|
+
# - Custom text processing requiring line-level precision
|
|
18
|
+
# - Operating on source text directly (e.g., linters, formatters)
|
|
19
|
+
#
|
|
20
|
+
# **Do NOT use FencedCodeBlock when:**
|
|
21
|
+
# - Working with parsed Markdown AST (use native code block nodes instead)
|
|
22
|
+
# - Integrating with markdown-merge's CodeBlockMerger (it uses native nodes)
|
|
23
|
+
# - Using tree_haver's unified Markdown backend API
|
|
24
|
+
#
|
|
25
|
+
# @example Detecting Ruby code blocks
|
|
26
|
+
# detector = FencedCodeBlock.new("ruby", aliases: ["rb"])
|
|
27
|
+
# regions = detector.detect_all(markdown_source)
|
|
28
|
+
#
|
|
29
|
+
# @example Using factory methods
|
|
30
|
+
# detector = FencedCodeBlock.ruby
|
|
31
|
+
# detector = FencedCodeBlock.yaml
|
|
32
|
+
# detector = FencedCodeBlock.json
|
|
33
|
+
#
|
|
34
|
+
# @api public
|
|
35
|
+
#
|
|
36
|
+
class FencedCodeBlock < Base
|
|
37
|
+
# @return [String] The primary language identifier
|
|
38
|
+
attr_reader :language
|
|
39
|
+
|
|
40
|
+
# @return [Array<String>] Alternative language identifiers
|
|
41
|
+
attr_reader :aliases
|
|
42
|
+
|
|
43
|
+
# Creates a new detector for the specified language.
|
|
44
|
+
#
|
|
45
|
+
# @param language [String, Symbol] The language identifier (e.g., "ruby", "json")
|
|
46
|
+
# @param aliases [Array<String, Symbol>] Alternative identifiers (e.g., ["rb"] for ruby)
|
|
47
|
+
def initialize(language, aliases: [])
|
|
48
|
+
super()
|
|
49
|
+
@language = language.to_s.downcase
|
|
50
|
+
@aliases = aliases.map { |a| a.to_s.downcase }
|
|
51
|
+
@all_identifiers = [@language] + @aliases
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# @return [Symbol] The region type (e.g., :ruby_code_block)
|
|
55
|
+
def region_type
|
|
56
|
+
:"#{@language}_code_block"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Check if a language identifier matches this detector.
|
|
60
|
+
#
|
|
61
|
+
# @param lang [String] The language identifier to check
|
|
62
|
+
# @return [Boolean] true if the language matches
|
|
63
|
+
def matches_language?(lang)
|
|
64
|
+
@all_identifiers.include?(lang.to_s.downcase)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Detects all fenced code blocks with the configured language.
|
|
68
|
+
#
|
|
69
|
+
# @param source [String] The full document content
|
|
70
|
+
# @return [Array<Region>] All detected code blocks, sorted by start_line
|
|
71
|
+
def detect_all(source)
|
|
72
|
+
return [] if source.nil? || source.empty?
|
|
73
|
+
|
|
74
|
+
regions = []
|
|
75
|
+
lines = source.lines
|
|
76
|
+
in_block = false
|
|
77
|
+
start_line = nil
|
|
78
|
+
content_lines = []
|
|
79
|
+
current_language = nil
|
|
80
|
+
fence_char = nil
|
|
81
|
+
fence_length = nil
|
|
82
|
+
indent = ""
|
|
83
|
+
|
|
84
|
+
lines.each_with_index do |line, idx|
|
|
85
|
+
line_num = idx + 1
|
|
86
|
+
|
|
87
|
+
if !in_block
|
|
88
|
+
# Match opening fence: ```lang or ~~~lang (optionally indented)
|
|
89
|
+
match = line.match(/^(\s*)(`{3,}|~{3,})(\w*)\s*$/)
|
|
90
|
+
if match
|
|
91
|
+
indent = match[1] || ""
|
|
92
|
+
fence = match[2]
|
|
93
|
+
lang = match[3].downcase
|
|
94
|
+
|
|
95
|
+
if @all_identifiers.include?(lang)
|
|
96
|
+
in_block = true
|
|
97
|
+
start_line = line_num
|
|
98
|
+
content_lines = []
|
|
99
|
+
current_language = lang
|
|
100
|
+
fence_char = fence[0]
|
|
101
|
+
fence_length = fence.length
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
elsif line.match?(/^#{Regexp.escape(indent)}#{Regexp.escape(fence_char)}{#{fence_length},}\s*$/)
|
|
105
|
+
# Match closing fence (must use same char, same indent, and at least same length)
|
|
106
|
+
opening_fence = "#{fence_char * fence_length}#{current_language}"
|
|
107
|
+
closing_fence = fence_char * fence_length
|
|
108
|
+
|
|
109
|
+
regions << build_region(
|
|
110
|
+
type: region_type,
|
|
111
|
+
content: content_lines.join,
|
|
112
|
+
start_line: start_line,
|
|
113
|
+
end_line: line_num,
|
|
114
|
+
delimiters: [opening_fence, closing_fence],
|
|
115
|
+
metadata: {language: current_language, indent: indent.empty? ? nil : indent},
|
|
116
|
+
)
|
|
117
|
+
in_block = false
|
|
118
|
+
start_line = nil
|
|
119
|
+
content_lines = []
|
|
120
|
+
current_language = nil
|
|
121
|
+
fence_char = nil
|
|
122
|
+
fence_length = nil
|
|
123
|
+
indent = ""
|
|
124
|
+
else
|
|
125
|
+
# Accumulate content lines (strip the indent if present)
|
|
126
|
+
content_lines << if indent.empty?
|
|
127
|
+
line
|
|
128
|
+
else
|
|
129
|
+
# Strip the common indent from content lines
|
|
130
|
+
line.sub(/^#{Regexp.escape(indent)}/, "")
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Note: Unclosed blocks are ignored (no region created)
|
|
136
|
+
regions
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @return [String] A description of this detector
|
|
140
|
+
def inspect
|
|
141
|
+
aliases_str = @aliases.empty? ? "" : " aliases=#{@aliases.inspect}"
|
|
142
|
+
"#<#{self.class.name} language=#{@language}#{aliases_str}>"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
class << self
|
|
146
|
+
# Creates a detector for Ruby code blocks.
|
|
147
|
+
# @return [FencedCodeBlock]
|
|
148
|
+
def ruby
|
|
149
|
+
new("ruby", aliases: ["rb"])
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Creates a detector for JSON code blocks.
|
|
153
|
+
# @return [FencedCodeBlock]
|
|
154
|
+
def json
|
|
155
|
+
new("json")
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Creates a detector for YAML code blocks.
|
|
159
|
+
# @return [FencedCodeBlock]
|
|
160
|
+
def yaml
|
|
161
|
+
new("yaml", aliases: ["yml"])
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Creates a detector for TOML code blocks.
|
|
165
|
+
# @return [FencedCodeBlock]
|
|
166
|
+
def toml
|
|
167
|
+
new("toml")
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Creates a detector for Mermaid diagram blocks.
|
|
171
|
+
# @return [FencedCodeBlock]
|
|
172
|
+
def mermaid
|
|
173
|
+
new("mermaid")
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Creates a detector for JavaScript code blocks.
|
|
177
|
+
# @return [FencedCodeBlock]
|
|
178
|
+
def javascript
|
|
179
|
+
new("javascript", aliases: ["js"])
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Creates a detector for TypeScript code blocks.
|
|
183
|
+
# @return [FencedCodeBlock]
|
|
184
|
+
def typescript
|
|
185
|
+
new("typescript", aliases: ["ts"])
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Creates a detector for Python code blocks.
|
|
189
|
+
# @return [FencedCodeBlock]
|
|
190
|
+
def python
|
|
191
|
+
new("python", aliases: ["py"])
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Creates a detector for Bash/Shell code blocks.
|
|
195
|
+
# @return [FencedCodeBlock]
|
|
196
|
+
def bash
|
|
197
|
+
new("bash", aliases: ["sh", "shell", "zsh"])
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Creates a detector for SQL code blocks.
|
|
201
|
+
# @return [FencedCodeBlock]
|
|
202
|
+
def sql
|
|
203
|
+
new("sql")
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Creates a detector for HTML code blocks.
|
|
207
|
+
# @return [FencedCodeBlock]
|
|
208
|
+
def html
|
|
209
|
+
new("html")
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Creates a detector for CSS code blocks.
|
|
213
|
+
# @return [FencedCodeBlock]
|
|
214
|
+
def css
|
|
215
|
+
new("css")
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Creates a detector for Markdown code blocks (nested markdown).
|
|
219
|
+
# @return [FencedCodeBlock]
|
|
220
|
+
def markdown
|
|
221
|
+
new("markdown", aliases: ["md"])
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ast
|
|
4
|
+
module Merge
|
|
5
|
+
module Detector
|
|
6
|
+
##
|
|
7
|
+
# Mixin for adding region support to SmartMerger classes.
|
|
8
|
+
#
|
|
9
|
+
# This module provides functionality for detecting and handling regions
|
|
10
|
+
# within documents that should be merged with different strategies.
|
|
11
|
+
# Regions are portions of a document (like YAML frontmatter or fenced
|
|
12
|
+
# code blocks) that may require specialized merging.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic region configuration
|
|
15
|
+
# class SmartMerger
|
|
16
|
+
# include Detector::Mergeable
|
|
17
|
+
#
|
|
18
|
+
# def initialize(template, dest, regions: [], region_placeholder: nil)
|
|
19
|
+
# @template_content = template
|
|
20
|
+
# @dest_content = dest
|
|
21
|
+
# setup_regions(regions: regions, region_placeholder: region_placeholder)
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# @example With YAML frontmatter regions
|
|
26
|
+
# merger = SmartMerger.new(
|
|
27
|
+
# template,
|
|
28
|
+
# dest,
|
|
29
|
+
# regions: [
|
|
30
|
+
# {
|
|
31
|
+
# detector: Detector::YamlFrontmatter.new,
|
|
32
|
+
# merger_class: SomeYamlMerger,
|
|
33
|
+
# merger_options: { preserve_order: true }
|
|
34
|
+
# }
|
|
35
|
+
# ]
|
|
36
|
+
# )
|
|
37
|
+
#
|
|
38
|
+
# @example With nested regions (code blocks in markdown)
|
|
39
|
+
# merger = SmartMerger.new(
|
|
40
|
+
# template,
|
|
41
|
+
# dest,
|
|
42
|
+
# regions: [
|
|
43
|
+
# {
|
|
44
|
+
# detector: Detector::FencedCodeBlock.ruby,
|
|
45
|
+
# merger_class: Prism::Merge::SmartMerger,
|
|
46
|
+
# regions: [...] # Nested regions!
|
|
47
|
+
# }
|
|
48
|
+
# ]
|
|
49
|
+
# )
|
|
50
|
+
#
|
|
51
|
+
# @see Base For implementing custom detectors
|
|
52
|
+
# @see Region The data struct for detected regions
|
|
53
|
+
#
|
|
54
|
+
module Mergeable
|
|
55
|
+
# Default placeholder prefix for extracted regions
|
|
56
|
+
DEFAULT_PLACEHOLDER_PREFIX = "<<<AST_MERGE_REGION_"
|
|
57
|
+
DEFAULT_PLACEHOLDER_SUFFIX = ">>>"
|
|
58
|
+
|
|
59
|
+
##
|
|
60
|
+
# Configuration for a single region type.
|
|
61
|
+
#
|
|
62
|
+
# @attr detector [Base] Detector instance for finding regions
|
|
63
|
+
# @attr merger_class [Class, nil] Merger class for merging region content (nil to skip merging)
|
|
64
|
+
# @attr merger_options [Hash] Options to pass to the region merger
|
|
65
|
+
# @attr regions [Array<Hash>] Nested region configurations (recursive)
|
|
66
|
+
#
|
|
67
|
+
Config = Struct.new(:detector, :merger_class, :merger_options, :regions, keyword_init: true) do
|
|
68
|
+
def initialize(detector:, merger_class: nil, merger_options: {}, regions: [])
|
|
69
|
+
super(
|
|
70
|
+
detector: detector,
|
|
71
|
+
merger_class: merger_class,
|
|
72
|
+
merger_options: merger_options || {},
|
|
73
|
+
regions: regions || [],
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
##
|
|
79
|
+
# Extracted region with its content and placeholder.
|
|
80
|
+
#
|
|
81
|
+
# @attr region [Region] The detected region
|
|
82
|
+
# @attr config [Config] The configuration that matched this region
|
|
83
|
+
# @attr placeholder [String] The placeholder used in the document
|
|
84
|
+
# @attr merged_content [String, nil] The merged content (set after merging)
|
|
85
|
+
#
|
|
86
|
+
ExtractedRegion = Struct.new(:region, :config, :placeholder, :merged_content, keyword_init: true)
|
|
87
|
+
|
|
88
|
+
##
|
|
89
|
+
# Set up region handling for this merger instance.
|
|
90
|
+
#
|
|
91
|
+
# @param regions [Array<Hash>] Array of region configurations
|
|
92
|
+
# @param region_placeholder [String, nil] Custom placeholder prefix (optional)
|
|
93
|
+
# @raise [ArgumentError] if regions configuration is invalid
|
|
94
|
+
#
|
|
95
|
+
def setup_regions(regions:, region_placeholder: nil)
|
|
96
|
+
@region_configs = build_region_configs(regions)
|
|
97
|
+
@region_placeholder_prefix = region_placeholder || DEFAULT_PLACEHOLDER_PREFIX
|
|
98
|
+
@extracted_template_regions = []
|
|
99
|
+
@extracted_dest_regions = []
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
##
|
|
103
|
+
# Check if this merger has region configurations.
|
|
104
|
+
#
|
|
105
|
+
# @return [Boolean] true if regions are configured
|
|
106
|
+
#
|
|
107
|
+
def regions_configured?
|
|
108
|
+
@region_configs && !@region_configs.empty?
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
##
|
|
112
|
+
# Extract regions from the template content, replacing with placeholders.
|
|
113
|
+
#
|
|
114
|
+
# @param content [String] Template content
|
|
115
|
+
# @return [String] Content with regions replaced by placeholders
|
|
116
|
+
# @raise [PlaceholderCollisionError] if content contains placeholder text
|
|
117
|
+
#
|
|
118
|
+
def extract_template_regions(content)
|
|
119
|
+
return content unless regions_configured?
|
|
120
|
+
|
|
121
|
+
extract_regions(content, @extracted_template_regions)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
##
|
|
125
|
+
# Extract regions from the destination content, replacing with placeholders.
|
|
126
|
+
#
|
|
127
|
+
# @param content [String] Destination content
|
|
128
|
+
# @return [String] Content with regions replaced by placeholders
|
|
129
|
+
# @raise [PlaceholderCollisionError] if content contains placeholder text
|
|
130
|
+
#
|
|
131
|
+
def extract_dest_regions(content)
|
|
132
|
+
return content unless regions_configured?
|
|
133
|
+
|
|
134
|
+
extract_regions(content, @extracted_dest_regions)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
##
|
|
138
|
+
# Merge extracted regions and substitute them back into the merged content.
|
|
139
|
+
#
|
|
140
|
+
# @param merged_content [String] The merged content with placeholders
|
|
141
|
+
# @return [String] Content with placeholders replaced by merged regions
|
|
142
|
+
#
|
|
143
|
+
def substitute_merged_regions(merged_content)
|
|
144
|
+
return merged_content unless regions_configured?
|
|
145
|
+
|
|
146
|
+
result = merged_content
|
|
147
|
+
|
|
148
|
+
# Process regions in reverse order of extraction to handle nested placeholders
|
|
149
|
+
# We need to merge template and dest regions by their placeholder index
|
|
150
|
+
merge_and_substitute_regions(result)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
private
|
|
154
|
+
|
|
155
|
+
##
|
|
156
|
+
# Build Config objects from configuration hashes.
|
|
157
|
+
#
|
|
158
|
+
# @param configs [Array<Hash>] Array of configuration hashes
|
|
159
|
+
# @return [Array<Config>] Array of Config objects
|
|
160
|
+
#
|
|
161
|
+
def build_region_configs(configs)
|
|
162
|
+
return [] if configs.nil? || configs.empty?
|
|
163
|
+
|
|
164
|
+
configs.map do |config|
|
|
165
|
+
case config
|
|
166
|
+
when Config
|
|
167
|
+
config
|
|
168
|
+
when Hash
|
|
169
|
+
Config.new(
|
|
170
|
+
detector: config[:detector],
|
|
171
|
+
merger_class: config[:merger_class],
|
|
172
|
+
merger_options: config[:merger_options] || {},
|
|
173
|
+
regions: config[:regions] || [],
|
|
174
|
+
)
|
|
175
|
+
else
|
|
176
|
+
raise ArgumentError, "Invalid region config: #{config.inspect}"
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
##
|
|
182
|
+
# Extract regions from content, replacing with placeholders.
|
|
183
|
+
#
|
|
184
|
+
# @param content [String] Content to process
|
|
185
|
+
# @param storage [Array<ExtractedRegion>] Array to store extracted regions
|
|
186
|
+
# @return [String] Content with placeholders
|
|
187
|
+
#
|
|
188
|
+
def extract_regions(content, storage)
|
|
189
|
+
validate_no_placeholder_collision!(content)
|
|
190
|
+
|
|
191
|
+
result = content
|
|
192
|
+
region_index = storage.size
|
|
193
|
+
|
|
194
|
+
@region_configs.each do |config|
|
|
195
|
+
regions = config.detector.detect_all(result)
|
|
196
|
+
|
|
197
|
+
# Process regions in reverse order to maintain correct positions
|
|
198
|
+
regions.sort_by { |r| -r.start_line }.each do |region|
|
|
199
|
+
placeholder = build_placeholder(region_index)
|
|
200
|
+
region_index += 1
|
|
201
|
+
|
|
202
|
+
extracted = ExtractedRegion.new(
|
|
203
|
+
region: region,
|
|
204
|
+
config: config,
|
|
205
|
+
placeholder: placeholder,
|
|
206
|
+
merged_content: nil,
|
|
207
|
+
)
|
|
208
|
+
storage.unshift(extracted) # Add to front since we process in reverse
|
|
209
|
+
|
|
210
|
+
# Replace the region with the placeholder
|
|
211
|
+
result = replace_region_with_placeholder(result, region, placeholder)
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
storage.sort_by! { |e| placeholder_index(e.placeholder) }
|
|
216
|
+
result
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
##
|
|
220
|
+
# Validate that the content doesn't contain placeholder text.
|
|
221
|
+
#
|
|
222
|
+
# @param content [String] Content to validate
|
|
223
|
+
# @raise [PlaceholderCollisionError] if placeholder is found
|
|
224
|
+
#
|
|
225
|
+
def validate_no_placeholder_collision!(content)
|
|
226
|
+
return if content.nil? || content.empty?
|
|
227
|
+
|
|
228
|
+
if content.include?(@region_placeholder_prefix)
|
|
229
|
+
raise PlaceholderCollisionError, @region_placeholder_prefix
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
##
|
|
234
|
+
# Build a placeholder string for a given index.
|
|
235
|
+
#
|
|
236
|
+
# @param index [Integer] The region index
|
|
237
|
+
# @return [String] The placeholder string
|
|
238
|
+
#
|
|
239
|
+
def build_placeholder(index)
|
|
240
|
+
"#{@region_placeholder_prefix}#{index}#{DEFAULT_PLACEHOLDER_SUFFIX}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
##
|
|
244
|
+
# Extract the index from a placeholder string.
|
|
245
|
+
#
|
|
246
|
+
# @param placeholder [String] The placeholder string
|
|
247
|
+
# @return [Integer] The extracted index
|
|
248
|
+
#
|
|
249
|
+
def placeholder_index(placeholder)
|
|
250
|
+
placeholder.match(/#{Regexp.escape(@region_placeholder_prefix)}(\d+)/)[1].to_i
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
##
|
|
254
|
+
# Replace a region in content with a placeholder.
|
|
255
|
+
#
|
|
256
|
+
# @param content [String] The content
|
|
257
|
+
# @param region [Region] The region to replace
|
|
258
|
+
# @param placeholder [String] The placeholder to insert
|
|
259
|
+
# @return [String] Content with region replaced
|
|
260
|
+
#
|
|
261
|
+
def replace_region_with_placeholder(content, region, placeholder)
|
|
262
|
+
lines = content.lines
|
|
263
|
+
# Region line numbers are 1-indexed
|
|
264
|
+
start_idx = region.start_line - 1
|
|
265
|
+
end_idx = region.end_line - 1
|
|
266
|
+
|
|
267
|
+
# Replace the region lines with the placeholder
|
|
268
|
+
before = lines[0...start_idx]
|
|
269
|
+
after = lines[(end_idx + 1)..]
|
|
270
|
+
|
|
271
|
+
# Preserve the newline style
|
|
272
|
+
newline = content.include?("\r\n") ? "\r\n" : "\n"
|
|
273
|
+
placeholder_line = "#{placeholder}#{newline}"
|
|
274
|
+
|
|
275
|
+
(before + [placeholder_line] + (after || [])).join
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
##
|
|
279
|
+
# Merge and substitute regions back into the merged content.
|
|
280
|
+
#
|
|
281
|
+
# @param content [String] Merged content with placeholders
|
|
282
|
+
# @return [String] Content with merged regions substituted
|
|
283
|
+
#
|
|
284
|
+
def merge_and_substitute_regions(content)
|
|
285
|
+
result = content
|
|
286
|
+
|
|
287
|
+
# Build a mapping of placeholder index to extracted regions from both sources
|
|
288
|
+
template_by_idx = @extracted_template_regions.each_with_object({}) do |e, h|
|
|
289
|
+
h[placeholder_index(e.placeholder)] = e
|
|
290
|
+
end
|
|
291
|
+
dest_by_idx = @extracted_dest_regions.each_with_object({}) do |e, h|
|
|
292
|
+
h[placeholder_index(e.placeholder)] = e
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Find all placeholder indices in the merged content
|
|
296
|
+
all_indices = (template_by_idx.keys + dest_by_idx.keys).uniq.sort
|
|
297
|
+
|
|
298
|
+
all_indices.each do |idx|
|
|
299
|
+
template_extracted = template_by_idx[idx]
|
|
300
|
+
dest_extracted = dest_by_idx[idx]
|
|
301
|
+
placeholder = build_placeholder(idx)
|
|
302
|
+
|
|
303
|
+
merged_region_content = merge_region(template_extracted, dest_extracted)
|
|
304
|
+
result = result.gsub(placeholder, merged_region_content) if merged_region_content
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
result
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
##
|
|
311
|
+
# Merge a region from template and destination.
|
|
312
|
+
#
|
|
313
|
+
# @param template_extracted [ExtractedRegion, nil] Template region
|
|
314
|
+
# @param dest_extracted [ExtractedRegion, nil] Destination region
|
|
315
|
+
# @return [String, nil] Merged region content, or nil if no content
|
|
316
|
+
#
|
|
317
|
+
def merge_region(template_extracted, dest_extracted)
|
|
318
|
+
config = template_extracted&.config || dest_extracted&.config
|
|
319
|
+
return unless config
|
|
320
|
+
|
|
321
|
+
template_region = template_extracted&.region
|
|
322
|
+
dest_region = dest_extracted&.region
|
|
323
|
+
|
|
324
|
+
# Get the full text (including delimiters) for each region
|
|
325
|
+
template_text = template_region&.full_text || ""
|
|
326
|
+
dest_text = dest_region&.full_text || ""
|
|
327
|
+
|
|
328
|
+
# If no merger class, prefer destination content (preserve customizations)
|
|
329
|
+
unless config.merger_class
|
|
330
|
+
return dest_text.empty? ? template_text : dest_text
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Extract just the content (without delimiters) for merging
|
|
334
|
+
template_content = template_region&.content || ""
|
|
335
|
+
dest_content = dest_region&.content || ""
|
|
336
|
+
|
|
337
|
+
# Build merger options, including nested regions if configured
|
|
338
|
+
merger_options = config.merger_options.dup
|
|
339
|
+
merger_options[:regions] = config.regions unless config.regions.empty?
|
|
340
|
+
|
|
341
|
+
# Create the merger and merge the region content
|
|
342
|
+
merger = config.merger_class.new(template_content, dest_content, **merger_options)
|
|
343
|
+
merged_content = merger.merge
|
|
344
|
+
|
|
345
|
+
# Reconstruct with delimiters
|
|
346
|
+
reconstruct_region_with_delimiters(template_region || dest_region, merged_content)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
##
|
|
350
|
+
# Reconstruct a region with its delimiters around the merged content.
|
|
351
|
+
#
|
|
352
|
+
# @param region [Region] The original region (for delimiter info)
|
|
353
|
+
# @param content [String] The merged content
|
|
354
|
+
# @return [String] Full region text with delimiters
|
|
355
|
+
#
|
|
356
|
+
def reconstruct_region_with_delimiters(region, content)
|
|
357
|
+
return content unless region&.delimiters
|
|
358
|
+
|
|
359
|
+
opening, closing = region.delimiters
|
|
360
|
+
|
|
361
|
+
# Ensure content ends with newline if it doesn't
|
|
362
|
+
normalized_content = content.end_with?("\n") ? content : "#{content}\n"
|
|
363
|
+
|
|
364
|
+
"#{opening}\n#{normalized_content}#{closing}\n"
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ast
|
|
4
|
+
module Merge
|
|
5
|
+
module Detector
|
|
6
|
+
##
|
|
7
|
+
# Detects TOML frontmatter at the beginning of a document.
|
|
8
|
+
#
|
|
9
|
+
# TOML frontmatter is delimited by `+++` at the start and end,
|
|
10
|
+
# and must begin on the first line of the document (optionally
|
|
11
|
+
# preceded by a UTF-8 BOM). This format is commonly used by
|
|
12
|
+
# Hugo and other static site generators.
|
|
13
|
+
#
|
|
14
|
+
# @example TOML frontmatter
|
|
15
|
+
# +++
|
|
16
|
+
# title = "My Document"
|
|
17
|
+
# author = "Jane Doe"
|
|
18
|
+
# +++
|
|
19
|
+
#
|
|
20
|
+
# @example Usage
|
|
21
|
+
# detector = TomlFrontmatter.new
|
|
22
|
+
# regions = detector.detect_all(markdown_source)
|
|
23
|
+
# # => [#<Region type=:toml_frontmatter content="title = \"My Document\"\n...">]
|
|
24
|
+
#
|
|
25
|
+
# @see YamlFrontmatter For YAML frontmatter detection
|
|
26
|
+
#
|
|
27
|
+
class TomlFrontmatter < Base
|
|
28
|
+
##
|
|
29
|
+
# Pattern for detecting TOML frontmatter.
|
|
30
|
+
# - Must start at beginning of document (or after BOM)
|
|
31
|
+
# - Opening delimiter is `+++` followed by optional whitespace and newline
|
|
32
|
+
# - Content is captured (non-greedy)
|
|
33
|
+
# - Closing delimiter is `+++` at start of line, followed by optional whitespace and newline/EOF
|
|
34
|
+
#
|
|
35
|
+
FRONTMATTER_PATTERN = /\A(?:\xEF\xBB\xBF)?(\+\+\+[ \t]*\r?\n)(.*?)(^\+\+\+[ \t]*(?:\r?\n|\z))/m
|
|
36
|
+
|
|
37
|
+
##
|
|
38
|
+
# @return [Symbol] the type identifier for TOML frontmatter regions
|
|
39
|
+
#
|
|
40
|
+
def region_type
|
|
41
|
+
:toml_frontmatter
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
##
|
|
45
|
+
# Detects TOML frontmatter at the beginning of the document.
|
|
46
|
+
#
|
|
47
|
+
# @param source [String] the source document to scan
|
|
48
|
+
# @return [Array<Region>] array containing at most one Region for frontmatter
|
|
49
|
+
#
|
|
50
|
+
def detect_all(source)
|
|
51
|
+
return [] if source.nil? || source.empty?
|
|
52
|
+
|
|
53
|
+
match = source.match(FRONTMATTER_PATTERN)
|
|
54
|
+
return [] unless match
|
|
55
|
+
|
|
56
|
+
opening_delimiter = match[1]
|
|
57
|
+
content = match[2]
|
|
58
|
+
closing_delimiter = match[3]
|
|
59
|
+
|
|
60
|
+
# Calculate line numbers
|
|
61
|
+
start_line = 1
|
|
62
|
+
|
|
63
|
+
# Count total newlines in the full match to determine end line
|
|
64
|
+
full_match = match[0]
|
|
65
|
+
total_newlines = full_match.count("\n")
|
|
66
|
+
end_line = total_newlines + (full_match.end_with?("\n") ? 0 : 1)
|
|
67
|
+
|
|
68
|
+
[
|
|
69
|
+
Region.new(
|
|
70
|
+
type: region_type,
|
|
71
|
+
content: content,
|
|
72
|
+
start_line: start_line,
|
|
73
|
+
end_line: end_line,
|
|
74
|
+
delimiters: [opening_delimiter.strip, closing_delimiter.strip],
|
|
75
|
+
metadata: {format: :toml},
|
|
76
|
+
),
|
|
77
|
+
]
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|