markdown-merge 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +251 -0
  4. data/CITATION.cff +20 -0
  5. data/CODE_OF_CONDUCT.md +134 -0
  6. data/CONTRIBUTING.md +227 -0
  7. data/FUNDING.md +74 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +1087 -0
  10. data/REEK +0 -0
  11. data/RUBOCOP.md +71 -0
  12. data/SECURITY.md +21 -0
  13. data/lib/markdown/merge/cleanse/block_spacing.rb +253 -0
  14. data/lib/markdown/merge/cleanse/code_fence_spacing.rb +294 -0
  15. data/lib/markdown/merge/cleanse/condensed_link_refs.rb +405 -0
  16. data/lib/markdown/merge/cleanse.rb +42 -0
  17. data/lib/markdown/merge/code_block_merger.rb +300 -0
  18. data/lib/markdown/merge/conflict_resolver.rb +128 -0
  19. data/lib/markdown/merge/debug_logger.rb +26 -0
  20. data/lib/markdown/merge/document_problems.rb +190 -0
  21. data/lib/markdown/merge/file_aligner.rb +196 -0
  22. data/lib/markdown/merge/file_analysis.rb +353 -0
  23. data/lib/markdown/merge/file_analysis_base.rb +629 -0
  24. data/lib/markdown/merge/freeze_node.rb +93 -0
  25. data/lib/markdown/merge/gap_line_node.rb +136 -0
  26. data/lib/markdown/merge/link_definition_formatter.rb +49 -0
  27. data/lib/markdown/merge/link_definition_node.rb +157 -0
  28. data/lib/markdown/merge/link_parser.rb +421 -0
  29. data/lib/markdown/merge/link_reference_rehydrator.rb +320 -0
  30. data/lib/markdown/merge/markdown_structure.rb +123 -0
  31. data/lib/markdown/merge/merge_result.rb +166 -0
  32. data/lib/markdown/merge/node_type_normalizer.rb +126 -0
  33. data/lib/markdown/merge/output_builder.rb +166 -0
  34. data/lib/markdown/merge/partial_template_merger.rb +334 -0
  35. data/lib/markdown/merge/smart_merger.rb +221 -0
  36. data/lib/markdown/merge/smart_merger_base.rb +621 -0
  37. data/lib/markdown/merge/table_match_algorithm.rb +504 -0
  38. data/lib/markdown/merge/table_match_refiner.rb +136 -0
  39. data/lib/markdown/merge/version.rb +12 -0
  40. data/lib/markdown/merge/whitespace_normalizer.rb +251 -0
  41. data/lib/markdown/merge.rb +149 -0
  42. data/lib/markdown-merge.rb +4 -0
  43. data/sig/markdown/merge.rbs +341 -0
  44. data.tar.gz.sig +0 -0
  45. metadata +365 -0
  46. metadata.gz.sig +0 -0
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Represents a frozen block of Markdown content that should be preserved during merges.
6
+ #
7
+ # Freeze blocks are marked with HTML comments:
8
+ # <!-- markdown-merge:freeze -->
9
+ # ... frozen content ...
10
+ # <!-- markdown-merge:unfreeze -->
11
+ #
12
+ # Content within freeze blocks is preserved exactly as-is during merge operations,
13
+ # preventing automated tools from modifying manually-curated sections.
14
+ #
15
+ # @example Basic freeze block
16
+ # <!-- markdown-merge:freeze -->
17
+ # ## Custom Section
18
+ # This content will not be modified by merge operations.
19
+ # <!-- markdown-merge:unfreeze -->
20
+ #
21
+ # @example Freeze block with reason
22
+ # <!-- markdown-merge:freeze Manual TOC -->
23
+ # ## Table of Contents
24
+ # - [Introduction](#introduction)
25
+ # - [Usage](#usage)
26
+ # <!-- markdown-merge:unfreeze -->
27
+ #
28
+ # @see Ast::Merge::FreezeNodeBase Base class
29
+ class FreezeNode < Ast::Merge::FreezeNodeBase
30
+ # Initialize a new FreezeNode
31
+ #
32
+ # @param start_line [Integer] Starting line number (1-indexed)
33
+ # @param end_line [Integer] Ending line number (1-indexed)
34
+ # @param content [String] Raw Markdown content within the block
35
+ # @param start_marker [String] The freeze marker comment
36
+ # @param end_marker [String] The unfreeze marker comment
37
+ # @param nodes [Array] Parsed nodes within the block
38
+ # @param reason [String, nil] Optional reason extracted from marker
39
+ def initialize(start_line:, end_line:, content:, start_marker:, end_marker:, nodes: [], reason: nil)
40
+ # Let the base class handle reason extraction via pattern_for
41
+ super(
42
+ start_line: start_line,
43
+ end_line: end_line,
44
+ content: content,
45
+ nodes: nodes,
46
+ start_marker: start_marker,
47
+ end_marker: end_marker,
48
+ pattern_type: :html_comment,
49
+ reason: reason
50
+ )
51
+ end
52
+
53
+ # Generate a signature for matching this freeze block
54
+ #
55
+ # Signatures are based on the normalized content, allowing freeze blocks
56
+ # with the same content to be matched across files.
57
+ #
58
+ # @return [Array<Symbol, String>] Signature array [:freeze_block, content_digest]
59
+ def signature
60
+ [:freeze_block, Digest::SHA256.hexdigest(content.strip)[0, 16]]
61
+ end
62
+
63
+ # Get the full text including markers
64
+ #
65
+ # @return [String] Complete freeze block with markers
66
+ def full_text
67
+ "#{start_marker}\n#{content}\n#{end_marker}"
68
+ end
69
+
70
+ # Get line count of the freeze block
71
+ #
72
+ # @return [Integer] Number of lines
73
+ def line_count
74
+ end_line - start_line + 1
75
+ end
76
+
77
+ # Check if block contains a specific node type
78
+ #
79
+ # @param type [Symbol] Node type to check for (e.g., :heading, :paragraph)
80
+ # @return [Boolean] True if block contains the node type
81
+ def contains_type?(type)
82
+ nodes.any? { |node| node.type == type }
83
+ end
84
+
85
+ # String representation for debugging
86
+ #
87
+ # @return [String] Debug representation
88
+ def inspect
89
+ "#<#{self.class.name} lines=#{start_line}..#{end_line} nodes=#{nodes.size} reason=#{reason.inspect}>"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Represents a "gap" line that exists between parsed Markdown nodes.
6
+ #
7
+ # Markdown parsers like Markly consume certain content during parsing (like
8
+ # link reference definitions) and don't preserve blank lines between nodes
9
+ # in the AST. This class represents lines that fall into "gaps" between
10
+ # parsed nodes, allowing them to be preserved during merge operations.
11
+ #
12
+ # Gap lines include:
13
+ # - Blank lines between sections
14
+ # - Link reference definitions (handled specially by LinkDefinitionNode)
15
+ # - Any other content consumed by the parser
16
+ #
17
+ # @example
18
+ # node = GapLineNode.new("", line_number: 5)
19
+ # node.type # => :gap_line
20
+ # node.blank? # => true
21
+ # node.signature # => [:gap_line, 5, ""]
22
+ class GapLineNode < Ast::Merge::AstNode
23
+ # @return [String] The line content (may be empty for blank lines)
24
+ attr_reader :content
25
+
26
+ # @return [Integer] 1-based line number
27
+ attr_reader :line_number
28
+
29
+ # @return [Object, nil] The preceding structural node (for context-aware signatures)
30
+ # This is set after integration to avoid circular dependencies during creation
31
+ attr_accessor :preceding_node
32
+
33
+ # Initialize a new GapLineNode
34
+ #
35
+ # @param content [String] The line content (without trailing newline)
36
+ # @param line_number [Integer] 1-based line number
37
+ def initialize(content, line_number:)
38
+ @content = content.chomp
39
+ @line_number = line_number
40
+ @preceding_node = nil # Set later during integration
41
+
42
+ location = Ast::Merge::AstNode::Location.new(
43
+ start_line: line_number,
44
+ end_line: line_number,
45
+ start_column: 0,
46
+ end_column: @content.length,
47
+ )
48
+
49
+ super(slice: @content, location: location)
50
+ end
51
+
52
+ # TreeHaver::Node protocol: type
53
+ # @return [Symbol] :gap_line
54
+ def type
55
+ :gap_line
56
+ end
57
+
58
+ # Alias for compatibility with wrapped nodes that have merge_type
59
+ # @return [Symbol] :gap_line
60
+ alias_method :merge_type, :type
61
+
62
+ # Generate a signature for matching gap lines.
63
+ # Gap lines are matched by their position relative to the preceding structural node.
64
+ # This allows blank lines after a heading in template to match blank lines after
65
+ # the same heading in destination, even if they're on different absolute line numbers.
66
+ #
67
+ # For gap lines at the start of the document (no preceding node), we use line number.
68
+ # For gap lines after a structural node, we use offset from that node's end line.
69
+ #
70
+ # @return [Array] Signature array
71
+ def signature
72
+ if @preceding_node&.respond_to?(:source_position)
73
+ pos = @preceding_node.source_position
74
+ preceding_end_line = pos[:end_line] if pos
75
+
76
+ if preceding_end_line
77
+ # Offset from preceding node's end (e.g., heading ends on line 1, gap is line 2, offset = 1)
78
+ offset = @line_number - preceding_end_line
79
+
80
+ # Use the preceding node's type as context (simpler than full signature)
81
+ # This works because gap lines after headings match gap lines after headings, etc.
82
+ preceding_type = @preceding_node.respond_to?(:type) ? @preceding_node.type : :unknown
83
+
84
+ [:gap_line_after, preceding_type, offset, @content]
85
+ else
86
+ # Fallback if we can't get position
87
+ [:gap_line, @line_number, @content]
88
+ end
89
+ else
90
+ # No preceding node - use absolute line number (for gaps at document start)
91
+ [:gap_line, @line_number, @content]
92
+ end
93
+ end
94
+
95
+ # TreeHaver::Node protocol: source_position
96
+ # @return [Hash] Position info for source extraction
97
+ def source_position
98
+ {
99
+ start_line: @line_number,
100
+ end_line: @line_number,
101
+ start_column: 0,
102
+ end_column: @content.length,
103
+ }
104
+ end
105
+
106
+ # TreeHaver::Node protocol: children (none)
107
+ # @return [Array] Empty array
108
+ def children
109
+ []
110
+ end
111
+
112
+ # TreeHaver::Node protocol: text
113
+ # @return [String] The line content
114
+ def text
115
+ @content
116
+ end
117
+
118
+ # Check if this is a blank line
119
+ # @return [Boolean] true if line is empty or whitespace only
120
+ def blank?
121
+ @content.strip.empty?
122
+ end
123
+
124
+ # Convert to commonmark format
125
+ # @return [String] The line with trailing newline
126
+ def to_commonmark
127
+ "#{@content}\n"
128
+ end
129
+
130
+ # For debugging
131
+ def inspect
132
+ "#<#{self.class.name} line=#{@line_number} content=#{@content.inspect}>"
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Formats link reference definitions for output.
6
+ #
7
+ # Markdown parsers (especially cmark-based ones like Markly/Commonmarker)
8
+ # consume link reference definitions during parsing and resolve them into
9
+ # inline links. This means they don't appear as nodes in the AST.
10
+ #
11
+ # This formatter reconstructs the markdown syntax from LinkDefinitionNode
12
+ # instances so they can be included in the merged output.
13
+ #
14
+ # @example
15
+ # node = LinkDefinitionNode.new(
16
+ # "[ref]: https://example.com \"Title\"",
17
+ # label: "ref",
18
+ # url: "https://example.com",
19
+ # title: "Title"
20
+ # )
21
+ # LinkDefinitionFormatter.format(node)
22
+ # # => "[ref]: https://example.com \"Title\""
23
+ module LinkDefinitionFormatter
24
+ class << self
25
+ # Format a link definition node
26
+ #
27
+ # @param node [LinkDefinitionNode] The link definition node
28
+ # @return [String] Formatted link definition
29
+ def format(node)
30
+ return node.content if node.content && !node.content.empty?
31
+
32
+ # Reconstruct from components
33
+ output = "[#{node.label}]: #{node.url}"
34
+ output += " \"#{node.title}\"" if node.title && !node.title.empty?
35
+ output
36
+ end
37
+
38
+ # Format multiple link definitions
39
+ #
40
+ # @param nodes [Array<LinkDefinitionNode>] Link definition nodes
41
+ # @param separator [String] Separator between definitions
42
+ # @return [String] Formatted link definitions
43
+ def format_all(nodes, separator: "\n")
44
+ nodes.map { |node| format(node) }.join(separator)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Represents a link reference definition that was consumed by the Markdown parser.
6
+ #
7
+ # Markdown parsers like Markly (libcmark-gfm) consume link reference definitions
8
+ # during parsing and resolve them into inline links. This means they don't appear
9
+ # as nodes in the AST. This class represents these "consumed" definitions so they
10
+ # can be preserved during merge operations.
11
+ #
12
+ # Link reference definitions have the form:
13
+ # [label]: url "optional title"
14
+ # [label]: url 'optional title'
15
+ # [label]: url (optional title)
16
+ # [label]: <url> "optional title"
17
+ #
18
+ # Uses {LinkParser} for robust parsing that handles:
19
+ # - Emoji in labels (e.g., `[🖼️galtzo-discord]`)
20
+ # - Multi-byte UTF-8 characters
21
+ # - Nested brackets in labels
22
+ #
23
+ # @example
24
+ # node = LinkDefinitionNode.new(
25
+ # "[ref]: https://example.com",
26
+ # line_number: 10,
27
+ # label: "ref",
28
+ # url: "https://example.com"
29
+ # )
30
+ # node.type # => :link_definition
31
+ # node.label # => "ref"
32
+ # node.url # => "https://example.com"
33
+ # node.signature # => [:link_definition, "ref"]
34
+ class LinkDefinitionNode < Ast::Merge::AstNode
35
+ # @return [String] The link label (reference name)
36
+ attr_reader :label
37
+
38
+ # @return [String] The URL
39
+ attr_reader :url
40
+
41
+ # @return [String, nil] Optional title
42
+ attr_reader :title
43
+
44
+ # @return [String] The full original line content
45
+ attr_reader :content
46
+
47
+ # Initialize a new LinkDefinitionNode
48
+ #
49
+ # @param content [String] The full line content
50
+ # @param line_number [Integer] 1-based line number
51
+ # @param label [String] The link label
52
+ # @param url [String] The URL
53
+ # @param title [String, nil] Optional title
54
+ def initialize(content, line_number:, label:, url:, title: nil)
55
+ @content = content
56
+ @label = label
57
+ @url = url
58
+ @title = title
59
+
60
+ location = Ast::Merge::AstNode::Location.new(
61
+ start_line: line_number,
62
+ end_line: line_number,
63
+ start_column: 0,
64
+ end_column: content.length,
65
+ )
66
+
67
+ super(slice: content, location: location)
68
+ end
69
+
70
+ class << self
71
+ # Shared parser instance for parsing link definitions
72
+ # @return [LinkParser]
73
+ def parser
74
+ @parser ||= LinkParser.new # rubocop:disable ThreadSafety/ClassInstanceVariable
75
+ end
76
+
77
+ # Parse a line and create a LinkDefinitionNode if it's a link definition.
78
+ #
79
+ # @param line [String] The line content
80
+ # @param line_number [Integer] 1-based line number
81
+ # @return [LinkDefinitionNode, nil] Node if line is a link definition, nil otherwise
82
+ def parse(line, line_number:)
83
+ result = parser.parse_definition_line(line.chomp)
84
+ return unless result
85
+
86
+ new(
87
+ line.chomp,
88
+ line_number: line_number,
89
+ label: result[:label],
90
+ url: result[:url],
91
+ title: result[:title],
92
+ )
93
+ end
94
+
95
+ # Check if a line looks like a link reference definition.
96
+ #
97
+ # @param line [String] The line to check
98
+ # @return [Boolean] true if line matches link definition pattern
99
+ def link_definition?(line)
100
+ !parser.parse_definition_line(line.strip).nil?
101
+ end
102
+ end
103
+
104
+ # TreeHaver::Node protocol: type
105
+ # @return [Symbol] :link_definition
106
+ def type
107
+ :link_definition
108
+ end
109
+
110
+ # Alias for compatibility with wrapped nodes that have merge_type
111
+ # @return [Symbol] :link_definition
112
+ alias_method :merge_type, :type
113
+
114
+ # Generate a signature for matching link definitions.
115
+ # Link definitions are matched by their label (case-insensitive in Markdown).
116
+ #
117
+ # @return [Array] Signature array [:link_definition, lowercase_label]
118
+ def signature
119
+ [:link_definition, @label.downcase]
120
+ end
121
+
122
+ # TreeHaver::Node protocol: source_position
123
+ # @return [Hash] Position info for source extraction
124
+ def source_position
125
+ {
126
+ start_line: @location.start_line,
127
+ end_line: @location.end_line,
128
+ start_column: @location.start_column,
129
+ end_column: @location.end_column,
130
+ }
131
+ end
132
+
133
+ # TreeHaver::Node protocol: children (none for link definitions)
134
+ # @return [Array] Empty array
135
+ def children
136
+ []
137
+ end
138
+
139
+ # TreeHaver::Node protocol: text
140
+ # @return [String] The full line content
141
+ def text
142
+ @content
143
+ end
144
+
145
+ # Convert to commonmark format (just returns the original content)
146
+ # @return [String] The link definition line
147
+ def to_commonmark
148
+ "#{@content}\n"
149
+ end
150
+
151
+ # For debugging
152
+ def inspect
153
+ "#<#{self.class.name} [#{@label}]: #{@url}>"
154
+ end
155
+ end
156
+ end
157
+ end