RubyGems - markdown-merge - Versions diffs - 1.0.0 - Mend

markdown-merge 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +251 -0
data/CITATION.cff +20 -0
data/CODE_OF_CONDUCT.md +134 -0
data/CONTRIBUTING.md +227 -0
data/FUNDING.md +74 -0
data/LICENSE.txt +21 -0
data/README.md +1087 -0
data/REEK +0 -0
data/RUBOCOP.md +71 -0
data/SECURITY.md +21 -0
data/lib/markdown/merge/cleanse/block_spacing.rb +253 -0
data/lib/markdown/merge/cleanse/code_fence_spacing.rb +294 -0
data/lib/markdown/merge/cleanse/condensed_link_refs.rb +405 -0
data/lib/markdown/merge/cleanse.rb +42 -0
data/lib/markdown/merge/code_block_merger.rb +300 -0
data/lib/markdown/merge/conflict_resolver.rb +128 -0
data/lib/markdown/merge/debug_logger.rb +26 -0
data/lib/markdown/merge/document_problems.rb +190 -0
data/lib/markdown/merge/file_aligner.rb +196 -0
data/lib/markdown/merge/file_analysis.rb +353 -0
data/lib/markdown/merge/file_analysis_base.rb +629 -0
data/lib/markdown/merge/freeze_node.rb +93 -0
data/lib/markdown/merge/gap_line_node.rb +136 -0
data/lib/markdown/merge/link_definition_formatter.rb +49 -0
data/lib/markdown/merge/link_definition_node.rb +157 -0
data/lib/markdown/merge/link_parser.rb +421 -0
data/lib/markdown/merge/link_reference_rehydrator.rb +320 -0
data/lib/markdown/merge/markdown_structure.rb +123 -0
data/lib/markdown/merge/merge_result.rb +166 -0
data/lib/markdown/merge/node_type_normalizer.rb +126 -0
data/lib/markdown/merge/output_builder.rb +166 -0
data/lib/markdown/merge/partial_template_merger.rb +334 -0
data/lib/markdown/merge/smart_merger.rb +221 -0
data/lib/markdown/merge/smart_merger_base.rb +621 -0
data/lib/markdown/merge/table_match_algorithm.rb +504 -0
data/lib/markdown/merge/table_match_refiner.rb +136 -0
data/lib/markdown/merge/version.rb +12 -0
data/lib/markdown/merge/whitespace_normalizer.rb +251 -0
data/lib/markdown/merge.rb +149 -0
data/lib/markdown-merge.rb +4 -0
data/sig/markdown/merge.rbs +341 -0
data.tar.gz.sig +0 -0
metadata +365 -0
metadata.gz.sig +0 -0

data/lib/markdown/merge/link_reference_rehydrator.rb ADDED Viewed

@@ -0,0 +1,320 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Rehydrates inline links and images to use link reference definitions.
+    #
+    # When markdown is processed through `to_commonmark`, reference-style links
+    # `[text][label]` are converted to inline links `[text](url)`.
+    # This class reverses that transformation by:
+    # 1. Parsing link reference definitions from content using {LinkParser}
+    # 2. Finding inline links/images using {LinkParser}'s PEG-based parsing
+    # 3. Replacing inline URLs with reference labels where a definition exists
+    #
+    # Uses Parslet-based parsing for robust handling of:
+    # - Emoji in labels (e.g., `[🖼️galtzo-discord]`)
+    # - Nested brackets (for linked images like `[![alt][ref]](url)`)
+    # - Multi-byte UTF-8 characters
+    #
+    # @example Standalone usage
+    #   content = <<~MD
+    #     Check out [Example](https://example.com) for more info.
+    #
+    #     [example]: https://example.com
+    #   MD
+    #   result = LinkReferenceRehydrator.rehydrate(content)
+    #   # => "Check out [Example][example] for more info.\n\n[example]: https://example.com\n"
+    #
+    class LinkReferenceRehydrator
+      # @return [String] The original content
+      attr_reader :content
+      # @return [DocumentProblems] Problems found during rehydration
+      attr_reader :problems
+      class << self
+        # Rehydrate inline links/images to reference style (class method).
+        #
+        # @param content [String] Content to rehydrate
+        # @return [String] Rehydrated content
+        def rehydrate(content)
+          new(content).rehydrate
+        end
+      end
+      # Initialize a new rehydrator.
+      #
+      # @param content [String] Content to process
+      def initialize(content)
+        @content = content
+        @problems = DocumentProblems.new
+        @link_definitions = nil
+        @duplicate_definitions = nil
+        @url_to_label = nil
+        @parser = LinkParser.new
+        @rehydration_count = 0
+      end
+      # Get the map of URLs to their preferred label.
+      #
+      # @return [Hash<String, String>] URL => label mapping
+      def link_definitions
+        build_definition_maps unless @link_definitions
+        @link_definitions
+      end
+      # Get duplicate definitions (multiple labels for same URL).
+      #
+      # @return [Hash<String, Array<String>>] URL => [labels] for duplicates only
+      def duplicate_definitions
+        build_definition_maps unless @duplicate_definitions
+        @duplicate_definitions
+      end
+      # Rehydrate inline links and images to use reference definitions.
+      #
+      # Uses a tree-based approach to handle nested structures like linked images
+      # `[![alt](img-url)](link-url)`. The parser builds a tree of link constructs,
+      # and we process them in leaf-first (post-order) traversal to ensure
+      # inner replacements are applied before outer ones.
+      #
+      # For linked images, this means:
+      # 1. First, the inner image `![alt](img-url)` is replaced with `![alt][img-label]`
+      # 2. Then, the outer link's text is updated to include the replaced image
+      # 3. Finally, the outer link `[![alt][img-label]](link-url)` is replaced with `[![alt][img-label]][link-label]`
+      #
+      # This is done in a single pass by tracking replacement offsets.
+      #
+      # @return [String] Rehydrated content
+      def rehydrate
+        build_definition_maps unless @link_definitions
+        record_duplicate_problems
+        return content if @url_to_label.empty?
+        # Use the new tree-based approach
+        # 1. Find all link constructs with proper nesting detection
+        tree = @parser.find_all_link_constructs(content)
+        # 2. Collect all replacements using recursive tree processing
+        # This properly handles nested structures by processing children first
+        # and adjusting parent text to include child replacements
+        replacements = collect_nested_replacements(tree, content)
+        # 3. Apply replacements in reverse position order
+        result = content.dup
+        replacements.sort_by { |r| -r[:start_pos] }.each do |replacement|
+          result = result[0...replacement[:start_pos]] +
+            replacement[:replacement] +
+            result[replacement[:end_pos]..]
+        end
+        result
+      end
+      # Check if rehydration made any changes.
+      #
+      # @return [Boolean] true if any links were rehydrated
+      def changed?
+        @rehydration_count.positive?
+      end
+      # Get count of links/images rehydrated.
+      #
+      # @return [Integer] Number of rehydrations performed
+      attr_reader :rehydration_count
+      private
+      # Collect replacements from tree structure, processing children first.
+      #
+      # This method recursively processes the tree in post-order (children before parents).
+      # When a child is replaced, the parent's text is updated to include the child's
+      # replacement before the parent is processed.
+      #
+      # @param items [Array<Hash>] Tree items from find_all_link_constructs
+      # @param text [String] The current text (used for extracting updated content)
+      # @return [Array<Hash>] Replacements with :start_pos, :end_pos, :replacement
+      def collect_nested_replacements(items, text)
+        replacements = []
+        items.each do |item|
+          if item[:children]&.any?
+            # Process children first and collect their replacements
+            child_replacements = collect_nested_replacements(item[:children], text)
+            # Try to process the parent with updated text content
+            parent_replacement = process_parent_with_children(item, child_replacements)
+            if parent_replacement
+              # Parent was successfully processed - use ONLY the parent replacement
+              # (it already includes the transformed child content)
+              replacements << parent_replacement
+            else
+              # Parent couldn't be processed (no matching label, has title, etc.)
+              # Include the child replacements instead
+              replacements.concat(child_replacements)
+            end
+          else
+            # Leaf node - process directly
+            replacement = if item[:type] == :image
+              process_image(item)
+            else
+              process_link(item)
+            end
+            replacements << replacement if replacement
+          end
+        end
+        replacements
+      end
+      # Process a parent item that has children, accounting for child replacements.
+      #
+      # For a linked image like `[![alt](img-url)](link-url)`:
+      # 1. The child image was already processed: `![alt](img-url)` → `![alt][img-label]`
+      # 2. We need to build the new parent text: `[![alt][img-label]][link-label]`
+      #
+      # @param item [Hash] Parent item with :children
+      # @param child_replacements [Array<Hash>] Replacements made by children
+      # @return [Hash, nil] Replacement for the parent, or nil if not applicable
+      def process_parent_with_children(item, child_replacements)
+        # Get the label for the parent's URL
+        label = @url_to_label[item[:url]]
+        return unless label
+        # Check if parent has a title (can't rehydrate if it does)
+        if item[:title] && !item[:title].empty?
+          @problems.add(
+            :link_has_title,
+            severity: :info,
+            text: item[:text],
+            url: item[:url],
+            title: item[:title],
+          )
+          return
+        end
+        # Build the new link text by applying child replacements to the original text
+        # Extract the original "text" part of the link (between [ and ])
+        original_text = item[:text] || ""
+        # Apply child replacements to build the new text content
+        # Children positions are relative to the document, so we need to adjust
+        new_text = original_text.dup
+        # Sort child replacements by position (reverse order for safe replacement)
+        sorted_children = child_replacements.sort_by { |r| -r[:start_pos] }
+        sorted_children.each do |child_rep|
+          # Calculate position relative to the link text start
+          # The link text starts at item[:start_pos] + 1 (after the '[')
+          text_start = item[:start_pos] + 1
+          relative_start = child_rep[:start_pos] - text_start
+          relative_end = child_rep[:end_pos] - text_start
+          # Only apply if the child is within the text portion
+          if relative_start >= 0 && relative_end <= new_text.length
+            new_text = new_text[0...relative_start] + child_rep[:replacement] + new_text[relative_end..]
+          end
+        end
+        @rehydration_count += 1
+        {
+          start_pos: item[:start_pos],
+          end_pos: item[:end_pos],
+          replacement: "[#{new_text}][#{label}]",
+        }
+      end
+      def build_definition_maps
+        @link_definitions = {}
+        @duplicate_definitions = {}
+        @url_to_label = {}
+        url_to_all_labels = Hash.new { |h, k| h[k] = [] }
+        definitions = @parser.parse_definitions(content)
+        definitions.each do |defn|
+          url_to_all_labels[defn[:url]] << defn[:label]
+        end
+        url_to_all_labels.each do |url, labels|
+          sorted = labels.sort_by.with_index { |l, i| [l.length, i] }
+          best_label = sorted.first
+          @link_definitions[url] = best_label
+          @url_to_label[url] = best_label
+          @duplicate_definitions[url] = labels if labels.size > 1
+        end
+      end
+      def record_duplicate_problems
+        @duplicate_definitions.each do |url, labels|
+          @problems.add(
+            :duplicate_link_definition,
+            severity: :warning,
+            url: url,
+            labels: labels,
+            selected_label: @url_to_label[url],
+          )
+        end
+      end
+      def process_link(link)
+        url = link[:url]
+        title = link[:title]
+        link_text = link[:text]
+        if title && !title.empty?
+          @problems.add(
+            :link_has_title,
+            severity: :info,
+            text: link_text,
+            url: url,
+            title: title,
+          )
+          return
+        end
+        label = @url_to_label[url]
+        return unless label
+        @rehydration_count += 1
+        {
+          start_pos: link[:start_pos],
+          end_pos: link[:end_pos],
+          replacement: "[#{link_text}][#{label}]",
+        }
+      end
+      def process_image(image)
+        url = image[:url]
+        title = image[:title]
+        alt_text = image[:alt]
+        if title && !title.empty?
+          @problems.add(
+            :image_has_title,
+            severity: :info,
+            alt: alt_text,
+            url: url,
+            title: title,
+          )
+          return
+        end
+        label = @url_to_label[url]
+        return unless label
+        @rehydration_count += 1
+        {
+          start_pos: image[:start_pos],
+          end_pos: image[:end_pos],
+          replacement: "![#{alt_text}][#{label}]",
+        }
+      end
+    end
+  end
+end

data/lib/markdown/merge/markdown_structure.rb ADDED Viewed

@@ -0,0 +1,123 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Defines structural spacing rules for markdown elements.
+    #
+    # When merging markdown from different sources, gap lines from the original
+    # sources may not exist at transition points (e.g., when a dest-only table
+    # is followed by a template-only table). This module defines which node types
+    # require spacing before/after them for proper markdown formatting.
+    #
+    # Node types are categorized by their spacing needs:
+    # - NEEDS_BLANK_BEFORE: Nodes that need a blank line before them (headings, tables, etc.)
+    # - NEEDS_BLANK_AFTER: Nodes that need a blank line after them
+    # - CONTIGUOUS_TYPES: Nodes that should NOT have blank lines between consecutive instances
+    #   (e.g., link_definition blocks should be together)
+    #
+    # @example
+    #   MarkdownStructure.needs_blank_before?(:table)  # => true
+    #   MarkdownStructure.needs_blank_after?(:heading) # => true
+    #   MarkdownStructure.contiguous_type?(:link_definition) # => true
+    module MarkdownStructure
+      # Node types that should have a blank line BEFORE them
+      # (when preceded by other content)
+      NEEDS_BLANK_BEFORE = %i[
+        heading
+        table
+        code_block
+        thematic_break
+        list
+        block_quote
+      ].freeze
+      # Node types that should have a blank line AFTER them
+      # (when followed by other content)
+      NEEDS_BLANK_AFTER = %i[
+        heading
+        table
+        code_block
+        thematic_break
+        list
+        block_quote
+        link_definition
+      ].freeze
+      # Node types that should be contiguous (no blank lines between consecutive
+      # nodes of the same type). These form "blocks" that should stay together.
+      CONTIGUOUS_TYPES = %i[
+        link_definition
+      ].freeze
+      class << self
+        # Check if a node type needs a blank line before it
+        #
+        # @param node_type [Symbol, String] Node type to check
+        # @return [Boolean]
+        def needs_blank_before?(node_type)
+          NEEDS_BLANK_BEFORE.include?(node_type.to_sym)
+        end
+        # Check if a node type needs a blank line after it
+        #
+        # @param node_type [Symbol, String] Node type to check
+        # @return [Boolean]
+        def needs_blank_after?(node_type)
+          NEEDS_BLANK_AFTER.include?(node_type.to_sym)
+        end
+        # Check if a node type is a contiguous type (should not have blank lines
+        # between consecutive nodes of the same type).
+        #
+        # @param node_type [Symbol, String] Node type to check
+        # @return [Boolean]
+        def contiguous_type?(node_type)
+          CONTIGUOUS_TYPES.include?(node_type.to_sym)
+        end
+        # Check if we should insert a blank line between two node types
+        #
+        # Rules:
+        # 1. If both types are the same contiguous type, NO blank line
+        # 2. If previous node needs blank after, YES blank line
+        # 3. If next node needs blank before, YES blank line
+        #
+        # @param prev_type [Symbol, String, nil] Previous node type
+        # @param next_type [Symbol, String, nil] Next node type
+        # @return [Boolean]
+        def needs_blank_between?(prev_type, next_type)
+          return false if prev_type.nil? || next_type.nil?
+          prev_sym = prev_type.to_sym
+          next_sym = next_type.to_sym
+          # Same contiguous type - no blank line between them
+          if prev_sym == next_sym && contiguous_type?(prev_sym)
+            return false
+          end
+          needs_blank_after?(prev_sym) || needs_blank_before?(next_sym)
+        end
+        # Get the node type from a node object
+        #
+        # Priority order:
+        # 1. merge_type - Explicit merge behavior classification (preferred)
+        # 2. type - Parser-specific type fallback
+        #
+        # @param node [Object] Node to get type from
+        # @return [Symbol, nil] Node type
+        def node_type(node)
+          return unless node
+          # Prefer merge_type when available - it's the explicit merge behavior classifier
+          if node.respond_to?(:merge_type)
+            node.merge_type.to_sym
+          elsif node.respond_to?(:type)
+            node.type.to_sym
+          end
+        end
+      end
+    end
+  end
+end

data/lib/markdown/merge/merge_result.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Represents the result of a Markdown merge operation.
+    #
+    # Inherits from Ast::Merge::MergeResultBase to provide consistent result
+    # handling across all merge gems. Contains the merged content along
+    # with metadata about conflicts, frozen sections, and changes made.
+    #
+    # @example Successful merge
+    #   result = SmartMerger.merge(source_a, source_b)
+    #   if result.success?
+    #     File.write("merged.md", result.content)
+    #   end
+    #
+    # @example Handling conflicts
+    #   result = SmartMerger.merge(source_a, source_b)
+    #   if result.conflicts?
+    #     result.conflicts.each do |conflict|
+    #       puts "Conflict at: #{conflict[:location]}"
+    #     end
+    #   end
+    #
+    # @example Checking for document problems
+    #   result = SmartMerger.merge(source_a, source_b, normalize_whitespace: true)
+    #   result.problems.by_category(:excessive_whitespace).each do |problem|
+    #     puts "Whitespace issue at line #{problem.details[:line]}"
+    #   end
+    #
+    # @see Ast::Merge::MergeResultBase Base class
+    # @see DocumentProblems For problem tracking
+    class MergeResult < Ast::Merge::MergeResultBase
+      # @return [DocumentProblems] Problems found during merge
+      attr_reader :problems
+      # Initialize a new MergeResult
+      #
+      # @param content [String, nil] Merged content (nil if merge failed)
+      # @param conflicts [Array<Hash>] Conflict descriptions
+      # @param frozen_blocks [Array<Hash>] Preserved frozen block info
+      # @param stats [Hash] Merge statistics
+      # @param problems [DocumentProblems, nil] Document problems found
+      # @param options [Hash] Additional options for forward compatibility
+      def initialize(content:, conflicts: [], frozen_blocks: [], stats: {}, problems: nil, **options)
+        super(
+          conflicts: conflicts,
+          frozen_blocks: frozen_blocks,
+          stats: default_stats.merge(stats),
+          **options
+        )
+        @content_raw = content
+        @problems = problems || DocumentProblems.new
+      end
+      # Get the merged content as a string.
+      # Overrides base class to return string content directly.
+      #
+      # @return [String, nil] The merged Markdown content
+      def content
+        @content_raw
+      end
+      # Check if content has been set (not nil).
+      # Overrides base class for string-based content.
+      #
+      # @return [Boolean]
+      def content?
+        !@content_raw.nil?
+      end
+      # Get content as a string (alias for content in this class).
+      #
+      # @return [String, nil] The merged content
+      def content_string
+        @content_raw
+      end
+      # Check if merge was successful (no unresolved conflicts)
+      #
+      # @return [Boolean] True if merge succeeded
+      def success?
+        conflicts.empty? && content?
+      end
+      # Check if there are unresolved conflicts
+      #
+      # @return [Boolean] True if conflicts exist
+      def conflicts?
+        !conflicts.empty?
+      end
+      # Check if any frozen blocks were preserved
+      #
+      # @return [Boolean] True if frozen blocks were preserved
+      def has_frozen_blocks?
+        !frozen_blocks.empty?
+      end
+      # Get count of nodes added during merge
+      #
+      # @return [Integer] Number of nodes added
+      def nodes_added
+        stats[:nodes_added] || 0
+      end
+      # Get count of nodes removed during merge
+      #
+      # @return [Integer] Number of nodes removed
+      def nodes_removed
+        stats[:nodes_removed] || 0
+      end
+      # Get count of nodes modified during merge
+      #
+      # @return [Integer] Number of nodes modified
+      def nodes_modified
+        stats[:nodes_modified] || 0
+      end
+      # Get merge duration in milliseconds
+      #
+      # @return [Float, nil] Merge time in milliseconds
+      def merge_time_ms
+        stats[:merge_time_ms]
+      end
+      # Get count of frozen blocks preserved
+      #
+      # @return [Integer] Number of frozen blocks
+      def frozen_count
+        frozen_blocks.size
+      end
+      # String representation for debugging
+      #
+      # @return [String] Debug representation
+      def inspect
+        status = success? ? "success" : "failed"
+        "#<#{self.class.name} #{status} conflicts=#{conflicts.size} frozen=#{frozen_blocks.size} " \
+          "added=#{nodes_added} removed=#{nodes_removed} modified=#{nodes_modified}>"
+      end
+      # Convert to string (returns merged content)
+      #
+      # @return [String] The merged content or empty string
+      def to_s
+        content || ""
+      end
+      private
+      # Default statistics structure
+      #
+      # @return [Hash] Default stats hash
+      def default_stats
+        {
+          nodes_added: 0,
+          nodes_removed: 0,
+          nodes_modified: 0,
+          merge_time_ms: 0,
+        }
+      end
+    end
+  end
+end