markdown-merge 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +251 -0
  4. data/CITATION.cff +20 -0
  5. data/CODE_OF_CONDUCT.md +134 -0
  6. data/CONTRIBUTING.md +227 -0
  7. data/FUNDING.md +74 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +1087 -0
  10. data/REEK +0 -0
  11. data/RUBOCOP.md +71 -0
  12. data/SECURITY.md +21 -0
  13. data/lib/markdown/merge/cleanse/block_spacing.rb +253 -0
  14. data/lib/markdown/merge/cleanse/code_fence_spacing.rb +294 -0
  15. data/lib/markdown/merge/cleanse/condensed_link_refs.rb +405 -0
  16. data/lib/markdown/merge/cleanse.rb +42 -0
  17. data/lib/markdown/merge/code_block_merger.rb +300 -0
  18. data/lib/markdown/merge/conflict_resolver.rb +128 -0
  19. data/lib/markdown/merge/debug_logger.rb +26 -0
  20. data/lib/markdown/merge/document_problems.rb +190 -0
  21. data/lib/markdown/merge/file_aligner.rb +196 -0
  22. data/lib/markdown/merge/file_analysis.rb +353 -0
  23. data/lib/markdown/merge/file_analysis_base.rb +629 -0
  24. data/lib/markdown/merge/freeze_node.rb +93 -0
  25. data/lib/markdown/merge/gap_line_node.rb +136 -0
  26. data/lib/markdown/merge/link_definition_formatter.rb +49 -0
  27. data/lib/markdown/merge/link_definition_node.rb +157 -0
  28. data/lib/markdown/merge/link_parser.rb +421 -0
  29. data/lib/markdown/merge/link_reference_rehydrator.rb +320 -0
  30. data/lib/markdown/merge/markdown_structure.rb +123 -0
  31. data/lib/markdown/merge/merge_result.rb +166 -0
  32. data/lib/markdown/merge/node_type_normalizer.rb +126 -0
  33. data/lib/markdown/merge/output_builder.rb +166 -0
  34. data/lib/markdown/merge/partial_template_merger.rb +334 -0
  35. data/lib/markdown/merge/smart_merger.rb +221 -0
  36. data/lib/markdown/merge/smart_merger_base.rb +621 -0
  37. data/lib/markdown/merge/table_match_algorithm.rb +504 -0
  38. data/lib/markdown/merge/table_match_refiner.rb +136 -0
  39. data/lib/markdown/merge/version.rb +12 -0
  40. data/lib/markdown/merge/whitespace_normalizer.rb +251 -0
  41. data/lib/markdown/merge.rb +149 -0
  42. data/lib/markdown-merge.rb +4 -0
  43. data/sig/markdown/merge.rbs +341 -0
  44. data.tar.gz.sig +0 -0
  45. metadata +365 -0
  46. metadata.gz.sig +0 -0
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Rehydrates inline links and images to use link reference definitions.
6
+ #
7
+ # When markdown is processed through `to_commonmark`, reference-style links
8
+ # `[text][label]` are converted to inline links `[text](url)`.
9
+ # This class reverses that transformation by:
10
+ # 1. Parsing link reference definitions from content using {LinkParser}
11
+ # 2. Finding inline links/images using {LinkParser}'s PEG-based parsing
12
+ # 3. Replacing inline URLs with reference labels where a definition exists
13
+ #
14
+ # Uses Parslet-based parsing for robust handling of:
15
+ # - Emoji in labels (e.g., `[🖼️galtzo-discord]`)
16
+ # - Nested brackets (for linked images like `[![alt][ref]](url)`)
17
+ # - Multi-byte UTF-8 characters
18
+ #
19
+ # @example Standalone usage
20
+ # content = <<~MD
21
+ # Check out [Example](https://example.com) for more info.
22
+ #
23
+ # [example]: https://example.com
24
+ # MD
25
+ # result = LinkReferenceRehydrator.rehydrate(content)
26
+ # # => "Check out [Example][example] for more info.\n\n[example]: https://example.com\n"
27
+ #
28
+ class LinkReferenceRehydrator
29
+ # @return [String] The original content
30
+ attr_reader :content
31
+
32
+ # @return [DocumentProblems] Problems found during rehydration
33
+ attr_reader :problems
34
+
35
+ class << self
36
+ # Rehydrate inline links/images to reference style (class method).
37
+ #
38
+ # @param content [String] Content to rehydrate
39
+ # @return [String] Rehydrated content
40
+ def rehydrate(content)
41
+ new(content).rehydrate
42
+ end
43
+ end
44
+
45
+ # Initialize a new rehydrator.
46
+ #
47
+ # @param content [String] Content to process
48
+ def initialize(content)
49
+ @content = content
50
+ @problems = DocumentProblems.new
51
+ @link_definitions = nil
52
+ @duplicate_definitions = nil
53
+ @url_to_label = nil
54
+ @parser = LinkParser.new
55
+ @rehydration_count = 0
56
+ end
57
+
58
+ # Get the map of URLs to their preferred label.
59
+ #
60
+ # @return [Hash<String, String>] URL => label mapping
61
+ def link_definitions
62
+ build_definition_maps unless @link_definitions
63
+ @link_definitions
64
+ end
65
+
66
+ # Get duplicate definitions (multiple labels for same URL).
67
+ #
68
+ # @return [Hash<String, Array<String>>] URL => [labels] for duplicates only
69
+ def duplicate_definitions
70
+ build_definition_maps unless @duplicate_definitions
71
+ @duplicate_definitions
72
+ end
73
+
74
+ # Rehydrate inline links and images to use reference definitions.
75
+ #
76
+ # Uses a tree-based approach to handle nested structures like linked images
77
+ # `[![alt](img-url)](link-url)`. The parser builds a tree of link constructs,
78
+ # and we process them in leaf-first (post-order) traversal to ensure
79
+ # inner replacements are applied before outer ones.
80
+ #
81
+ # For linked images, this means:
82
+ # 1. First, the inner image `![alt](img-url)` is replaced with `![alt][img-label]`
83
+ # 2. Then, the outer link's text is updated to include the replaced image
84
+ # 3. Finally, the outer link `[![alt][img-label]](link-url)` is replaced with `[![alt][img-label]][link-label]`
85
+ #
86
+ # This is done in a single pass by tracking replacement offsets.
87
+ #
88
+ # @return [String] Rehydrated content
89
+ def rehydrate
90
+ build_definition_maps unless @link_definitions
91
+ record_duplicate_problems
92
+
93
+ return content if @url_to_label.empty?
94
+
95
+ # Use the new tree-based approach
96
+ # 1. Find all link constructs with proper nesting detection
97
+ tree = @parser.find_all_link_constructs(content)
98
+
99
+ # 2. Collect all replacements using recursive tree processing
100
+ # This properly handles nested structures by processing children first
101
+ # and adjusting parent text to include child replacements
102
+ replacements = collect_nested_replacements(tree, content)
103
+
104
+ # 3. Apply replacements in reverse position order
105
+ result = content.dup
106
+ replacements.sort_by { |r| -r[:start_pos] }.each do |replacement|
107
+ result = result[0...replacement[:start_pos]] +
108
+ replacement[:replacement] +
109
+ result[replacement[:end_pos]..]
110
+ end
111
+
112
+ result
113
+ end
114
+
115
+ # Check if rehydration made any changes.
116
+ #
117
+ # @return [Boolean] true if any links were rehydrated
118
+ def changed?
119
+ @rehydration_count.positive?
120
+ end
121
+
122
+ # Get count of links/images rehydrated.
123
+ #
124
+ # @return [Integer] Number of rehydrations performed
125
+ attr_reader :rehydration_count
126
+
127
+ private
128
+
129
+ # Collect replacements from tree structure, processing children first.
130
+ #
131
+ # This method recursively processes the tree in post-order (children before parents).
132
+ # When a child is replaced, the parent's text is updated to include the child's
133
+ # replacement before the parent is processed.
134
+ #
135
+ # @param items [Array<Hash>] Tree items from find_all_link_constructs
136
+ # @param text [String] The current text (used for extracting updated content)
137
+ # @return [Array<Hash>] Replacements with :start_pos, :end_pos, :replacement
138
+ def collect_nested_replacements(items, text)
139
+ replacements = []
140
+
141
+ items.each do |item|
142
+ if item[:children]&.any?
143
+ # Process children first and collect their replacements
144
+ child_replacements = collect_nested_replacements(item[:children], text)
145
+
146
+ # Try to process the parent with updated text content
147
+ parent_replacement = process_parent_with_children(item, child_replacements)
148
+
149
+ if parent_replacement
150
+ # Parent was successfully processed - use ONLY the parent replacement
151
+ # (it already includes the transformed child content)
152
+ replacements << parent_replacement
153
+ else
154
+ # Parent couldn't be processed (no matching label, has title, etc.)
155
+ # Include the child replacements instead
156
+ replacements.concat(child_replacements)
157
+ end
158
+ else
159
+ # Leaf node - process directly
160
+ replacement = if item[:type] == :image
161
+ process_image(item)
162
+ else
163
+ process_link(item)
164
+ end
165
+ replacements << replacement if replacement
166
+ end
167
+ end
168
+
169
+ replacements
170
+ end
171
+
172
+ # Process a parent item that has children, accounting for child replacements.
173
+ #
174
+ # For a linked image like `[![alt](img-url)](link-url)`:
175
+ # 1. The child image was already processed: `![alt](img-url)` → `![alt][img-label]`
176
+ # 2. We need to build the new parent text: `[![alt][img-label]][link-label]`
177
+ #
178
+ # @param item [Hash] Parent item with :children
179
+ # @param child_replacements [Array<Hash>] Replacements made by children
180
+ # @return [Hash, nil] Replacement for the parent, or nil if not applicable
181
+ def process_parent_with_children(item, child_replacements)
182
+ # Get the label for the parent's URL
183
+ label = @url_to_label[item[:url]]
184
+ return unless label
185
+
186
+ # Check if parent has a title (can't rehydrate if it does)
187
+ if item[:title] && !item[:title].empty?
188
+ @problems.add(
189
+ :link_has_title,
190
+ severity: :info,
191
+ text: item[:text],
192
+ url: item[:url],
193
+ title: item[:title],
194
+ )
195
+ return
196
+ end
197
+
198
+ # Build the new link text by applying child replacements to the original text
199
+ # Extract the original "text" part of the link (between [ and ])
200
+ original_text = item[:text] || ""
201
+
202
+ # Apply child replacements to build the new text content
203
+ # Children positions are relative to the document, so we need to adjust
204
+ new_text = original_text.dup
205
+
206
+ # Sort child replacements by position (reverse order for safe replacement)
207
+ sorted_children = child_replacements.sort_by { |r| -r[:start_pos] }
208
+
209
+ sorted_children.each do |child_rep|
210
+ # Calculate position relative to the link text start
211
+ # The link text starts at item[:start_pos] + 1 (after the '[')
212
+ text_start = item[:start_pos] + 1
213
+ relative_start = child_rep[:start_pos] - text_start
214
+ relative_end = child_rep[:end_pos] - text_start
215
+
216
+ # Only apply if the child is within the text portion
217
+ if relative_start >= 0 && relative_end <= new_text.length
218
+ new_text = new_text[0...relative_start] + child_rep[:replacement] + new_text[relative_end..]
219
+ end
220
+ end
221
+
222
+ @rehydration_count += 1
223
+ {
224
+ start_pos: item[:start_pos],
225
+ end_pos: item[:end_pos],
226
+ replacement: "[#{new_text}][#{label}]",
227
+ }
228
+ end
229
+
230
+ def build_definition_maps
231
+ @link_definitions = {}
232
+ @duplicate_definitions = {}
233
+ @url_to_label = {}
234
+ url_to_all_labels = Hash.new { |h, k| h[k] = [] }
235
+
236
+ definitions = @parser.parse_definitions(content)
237
+
238
+ definitions.each do |defn|
239
+ url_to_all_labels[defn[:url]] << defn[:label]
240
+ end
241
+
242
+ url_to_all_labels.each do |url, labels|
243
+ sorted = labels.sort_by.with_index { |l, i| [l.length, i] }
244
+ best_label = sorted.first
245
+
246
+ @link_definitions[url] = best_label
247
+ @url_to_label[url] = best_label
248
+
249
+ @duplicate_definitions[url] = labels if labels.size > 1
250
+ end
251
+ end
252
+
253
+ def record_duplicate_problems
254
+ @duplicate_definitions.each do |url, labels|
255
+ @problems.add(
256
+ :duplicate_link_definition,
257
+ severity: :warning,
258
+ url: url,
259
+ labels: labels,
260
+ selected_label: @url_to_label[url],
261
+ )
262
+ end
263
+ end
264
+
265
+ def process_link(link)
266
+ url = link[:url]
267
+ title = link[:title]
268
+ link_text = link[:text]
269
+
270
+ if title && !title.empty?
271
+ @problems.add(
272
+ :link_has_title,
273
+ severity: :info,
274
+ text: link_text,
275
+ url: url,
276
+ title: title,
277
+ )
278
+ return
279
+ end
280
+
281
+ label = @url_to_label[url]
282
+ return unless label
283
+
284
+ @rehydration_count += 1
285
+ {
286
+ start_pos: link[:start_pos],
287
+ end_pos: link[:end_pos],
288
+ replacement: "[#{link_text}][#{label}]",
289
+ }
290
+ end
291
+
292
+ def process_image(image)
293
+ url = image[:url]
294
+ title = image[:title]
295
+ alt_text = image[:alt]
296
+
297
+ if title && !title.empty?
298
+ @problems.add(
299
+ :image_has_title,
300
+ severity: :info,
301
+ alt: alt_text,
302
+ url: url,
303
+ title: title,
304
+ )
305
+ return
306
+ end
307
+
308
+ label = @url_to_label[url]
309
+ return unless label
310
+
311
+ @rehydration_count += 1
312
+ {
313
+ start_pos: image[:start_pos],
314
+ end_pos: image[:end_pos],
315
+ replacement: "![#{alt_text}][#{label}]",
316
+ }
317
+ end
318
+ end
319
+ end
320
+ end
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Defines structural spacing rules for markdown elements.
6
+ #
7
+ # When merging markdown from different sources, gap lines from the original
8
+ # sources may not exist at transition points (e.g., when a dest-only table
9
+ # is followed by a template-only table). This module defines which node types
10
+ # require spacing before/after them for proper markdown formatting.
11
+ #
12
+ # Node types are categorized by their spacing needs:
13
+ # - NEEDS_BLANK_BEFORE: Nodes that need a blank line before them (headings, tables, etc.)
14
+ # - NEEDS_BLANK_AFTER: Nodes that need a blank line after them
15
+ # - CONTIGUOUS_TYPES: Nodes that should NOT have blank lines between consecutive instances
16
+ # (e.g., link_definition blocks should be together)
17
+ #
18
+ # @example
19
+ # MarkdownStructure.needs_blank_before?(:table) # => true
20
+ # MarkdownStructure.needs_blank_after?(:heading) # => true
21
+ # MarkdownStructure.contiguous_type?(:link_definition) # => true
22
+ module MarkdownStructure
23
+ # Node types that should have a blank line BEFORE them
24
+ # (when preceded by other content)
25
+ NEEDS_BLANK_BEFORE = %i[
26
+ heading
27
+ table
28
+ code_block
29
+ thematic_break
30
+ list
31
+ block_quote
32
+ ].freeze
33
+
34
+ # Node types that should have a blank line AFTER them
35
+ # (when followed by other content)
36
+ NEEDS_BLANK_AFTER = %i[
37
+ heading
38
+ table
39
+ code_block
40
+ thematic_break
41
+ list
42
+ block_quote
43
+ link_definition
44
+ ].freeze
45
+
46
+ # Node types that should be contiguous (no blank lines between consecutive
47
+ # nodes of the same type). These form "blocks" that should stay together.
48
+ CONTIGUOUS_TYPES = %i[
49
+ link_definition
50
+ ].freeze
51
+
52
+ class << self
53
+ # Check if a node type needs a blank line before it
54
+ #
55
+ # @param node_type [Symbol, String] Node type to check
56
+ # @return [Boolean]
57
+ def needs_blank_before?(node_type)
58
+ NEEDS_BLANK_BEFORE.include?(node_type.to_sym)
59
+ end
60
+
61
+ # Check if a node type needs a blank line after it
62
+ #
63
+ # @param node_type [Symbol, String] Node type to check
64
+ # @return [Boolean]
65
+ def needs_blank_after?(node_type)
66
+ NEEDS_BLANK_AFTER.include?(node_type.to_sym)
67
+ end
68
+
69
+ # Check if a node type is a contiguous type (should not have blank lines
70
+ # between consecutive nodes of the same type).
71
+ #
72
+ # @param node_type [Symbol, String] Node type to check
73
+ # @return [Boolean]
74
+ def contiguous_type?(node_type)
75
+ CONTIGUOUS_TYPES.include?(node_type.to_sym)
76
+ end
77
+
78
+ # Check if we should insert a blank line between two node types
79
+ #
80
+ # Rules:
81
+ # 1. If both types are the same contiguous type, NO blank line
82
+ # 2. If previous node needs blank after, YES blank line
83
+ # 3. If next node needs blank before, YES blank line
84
+ #
85
+ # @param prev_type [Symbol, String, nil] Previous node type
86
+ # @param next_type [Symbol, String, nil] Next node type
87
+ # @return [Boolean]
88
+ def needs_blank_between?(prev_type, next_type)
89
+ return false if prev_type.nil? || next_type.nil?
90
+
91
+ prev_sym = prev_type.to_sym
92
+ next_sym = next_type.to_sym
93
+
94
+ # Same contiguous type - no blank line between them
95
+ if prev_sym == next_sym && contiguous_type?(prev_sym)
96
+ return false
97
+ end
98
+
99
+ needs_blank_after?(prev_sym) || needs_blank_before?(next_sym)
100
+ end
101
+
102
+ # Get the node type from a node object
103
+ #
104
+ # Priority order:
105
+ # 1. merge_type - Explicit merge behavior classification (preferred)
106
+ # 2. type - Parser-specific type fallback
107
+ #
108
+ # @param node [Object] Node to get type from
109
+ # @return [Symbol, nil] Node type
110
+ def node_type(node)
111
+ return unless node
112
+
113
+ # Prefer merge_type when available - it's the explicit merge behavior classifier
114
+ if node.respond_to?(:merge_type)
115
+ node.merge_type.to_sym
116
+ elsif node.respond_to?(:type)
117
+ node.type.to_sym
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Represents the result of a Markdown merge operation.
6
+ #
7
+ # Inherits from Ast::Merge::MergeResultBase to provide consistent result
8
+ # handling across all merge gems. Contains the merged content along
9
+ # with metadata about conflicts, frozen sections, and changes made.
10
+ #
11
+ # @example Successful merge
12
+ # result = SmartMerger.merge(source_a, source_b)
13
+ # if result.success?
14
+ # File.write("merged.md", result.content)
15
+ # end
16
+ #
17
+ # @example Handling conflicts
18
+ # result = SmartMerger.merge(source_a, source_b)
19
+ # if result.conflicts?
20
+ # result.conflicts.each do |conflict|
21
+ # puts "Conflict at: #{conflict[:location]}"
22
+ # end
23
+ # end
24
+ #
25
+ # @example Checking for document problems
26
+ # result = SmartMerger.merge(source_a, source_b, normalize_whitespace: true)
27
+ # result.problems.by_category(:excessive_whitespace).each do |problem|
28
+ # puts "Whitespace issue at line #{problem.details[:line]}"
29
+ # end
30
+ #
31
+ # @see Ast::Merge::MergeResultBase Base class
32
+ # @see DocumentProblems For problem tracking
33
+ class MergeResult < Ast::Merge::MergeResultBase
34
+ # @return [DocumentProblems] Problems found during merge
35
+ attr_reader :problems
36
+
37
+ # Initialize a new MergeResult
38
+ #
39
+ # @param content [String, nil] Merged content (nil if merge failed)
40
+ # @param conflicts [Array<Hash>] Conflict descriptions
41
+ # @param frozen_blocks [Array<Hash>] Preserved frozen block info
42
+ # @param stats [Hash] Merge statistics
43
+ # @param problems [DocumentProblems, nil] Document problems found
44
+ # @param options [Hash] Additional options for forward compatibility
45
+ def initialize(content:, conflicts: [], frozen_blocks: [], stats: {}, problems: nil, **options)
46
+ super(
47
+ conflicts: conflicts,
48
+ frozen_blocks: frozen_blocks,
49
+ stats: default_stats.merge(stats),
50
+ **options
51
+ )
52
+ @content_raw = content
53
+ @problems = problems || DocumentProblems.new
54
+ end
55
+
56
+ # Get the merged content as a string.
57
+ # Overrides base class to return string content directly.
58
+ #
59
+ # @return [String, nil] The merged Markdown content
60
+ def content
61
+ @content_raw
62
+ end
63
+
64
+ # Check if content has been set (not nil).
65
+ # Overrides base class for string-based content.
66
+ #
67
+ # @return [Boolean]
68
+ def content?
69
+ !@content_raw.nil?
70
+ end
71
+
72
+ # Get content as a string (alias for content in this class).
73
+ #
74
+ # @return [String, nil] The merged content
75
+ def content_string
76
+ @content_raw
77
+ end
78
+
79
+ # Check if merge was successful (no unresolved conflicts)
80
+ #
81
+ # @return [Boolean] True if merge succeeded
82
+ def success?
83
+ conflicts.empty? && content?
84
+ end
85
+
86
+ # Check if there are unresolved conflicts
87
+ #
88
+ # @return [Boolean] True if conflicts exist
89
+ def conflicts?
90
+ !conflicts.empty?
91
+ end
92
+
93
+ # Check if any frozen blocks were preserved
94
+ #
95
+ # @return [Boolean] True if frozen blocks were preserved
96
+ def has_frozen_blocks?
97
+ !frozen_blocks.empty?
98
+ end
99
+
100
+ # Get count of nodes added during merge
101
+ #
102
+ # @return [Integer] Number of nodes added
103
+ def nodes_added
104
+ stats[:nodes_added] || 0
105
+ end
106
+
107
+ # Get count of nodes removed during merge
108
+ #
109
+ # @return [Integer] Number of nodes removed
110
+ def nodes_removed
111
+ stats[:nodes_removed] || 0
112
+ end
113
+
114
+ # Get count of nodes modified during merge
115
+ #
116
+ # @return [Integer] Number of nodes modified
117
+ def nodes_modified
118
+ stats[:nodes_modified] || 0
119
+ end
120
+
121
+ # Get merge duration in milliseconds
122
+ #
123
+ # @return [Float, nil] Merge time in milliseconds
124
+ def merge_time_ms
125
+ stats[:merge_time_ms]
126
+ end
127
+
128
+ # Get count of frozen blocks preserved
129
+ #
130
+ # @return [Integer] Number of frozen blocks
131
+ def frozen_count
132
+ frozen_blocks.size
133
+ end
134
+
135
+ # String representation for debugging
136
+ #
137
+ # @return [String] Debug representation
138
+ def inspect
139
+ status = success? ? "success" : "failed"
140
+ "#<#{self.class.name} #{status} conflicts=#{conflicts.size} frozen=#{frozen_blocks.size} " \
141
+ "added=#{nodes_added} removed=#{nodes_removed} modified=#{nodes_modified}>"
142
+ end
143
+
144
+ # Convert to string (returns merged content)
145
+ #
146
+ # @return [String] The merged content or empty string
147
+ def to_s
148
+ content || ""
149
+ end
150
+
151
+ private
152
+
153
+ # Default statistics structure
154
+ #
155
+ # @return [Hash] Default stats hash
156
+ def default_stats
157
+ {
158
+ nodes_added: 0,
159
+ nodes_removed: 0,
160
+ nodes_modified: 0,
161
+ merge_time_ms: 0,
162
+ }
163
+ end
164
+ end
165
+ end
166
+ end