RubyGems - markdown-merge - Versions diffs - 1.0.0 - Mend

markdown-merge 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +251 -0
data/CITATION.cff +20 -0
data/CODE_OF_CONDUCT.md +134 -0
data/CONTRIBUTING.md +227 -0
data/FUNDING.md +74 -0
data/LICENSE.txt +21 -0
data/README.md +1087 -0
data/REEK +0 -0
data/RUBOCOP.md +71 -0
data/SECURITY.md +21 -0
data/lib/markdown/merge/cleanse/block_spacing.rb +253 -0
data/lib/markdown/merge/cleanse/code_fence_spacing.rb +294 -0
data/lib/markdown/merge/cleanse/condensed_link_refs.rb +405 -0
data/lib/markdown/merge/cleanse.rb +42 -0
data/lib/markdown/merge/code_block_merger.rb +300 -0
data/lib/markdown/merge/conflict_resolver.rb +128 -0
data/lib/markdown/merge/debug_logger.rb +26 -0
data/lib/markdown/merge/document_problems.rb +190 -0
data/lib/markdown/merge/file_aligner.rb +196 -0
data/lib/markdown/merge/file_analysis.rb +353 -0
data/lib/markdown/merge/file_analysis_base.rb +629 -0
data/lib/markdown/merge/freeze_node.rb +93 -0
data/lib/markdown/merge/gap_line_node.rb +136 -0
data/lib/markdown/merge/link_definition_formatter.rb +49 -0
data/lib/markdown/merge/link_definition_node.rb +157 -0
data/lib/markdown/merge/link_parser.rb +421 -0
data/lib/markdown/merge/link_reference_rehydrator.rb +320 -0
data/lib/markdown/merge/markdown_structure.rb +123 -0
data/lib/markdown/merge/merge_result.rb +166 -0
data/lib/markdown/merge/node_type_normalizer.rb +126 -0
data/lib/markdown/merge/output_builder.rb +166 -0
data/lib/markdown/merge/partial_template_merger.rb +334 -0
data/lib/markdown/merge/smart_merger.rb +221 -0
data/lib/markdown/merge/smart_merger_base.rb +621 -0
data/lib/markdown/merge/table_match_algorithm.rb +504 -0
data/lib/markdown/merge/table_match_refiner.rb +136 -0
data/lib/markdown/merge/version.rb +12 -0
data/lib/markdown/merge/whitespace_normalizer.rb +251 -0
data/lib/markdown/merge.rb +149 -0
data/lib/markdown-merge.rb +4 -0
data/sig/markdown/merge.rbs +341 -0
data.tar.gz.sig +0 -0
metadata +365 -0
metadata.gz.sig +0 -0

data/lib/markdown/merge/file_aligner.rb ADDED Viewed

@@ -0,0 +1,196 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Aligns Markdown block elements between template and destination files.
+    #
+    # Uses structural signatures to match headings, paragraphs, lists, code blocks,
+    # and other block elements. The alignment is then used by SmartMerger to
+    # determine how to combine the files.
+    #
+    # @example Basic usage
+    #   aligner = FileAligner.new(template_analysis, dest_analysis)
+    #   alignment = aligner.align
+    #   alignment.each do |entry|
+    #     case entry[:type]
+    #     when :match
+    #       # Both files have this element
+    #     when :template_only
+    #       # Only in template
+    #     when :dest_only
+    #       # Only in destination
+    #     end
+    #   end
+    #
+    # @see FileAnalysisBase
+    # @see SmartMergerBase
+    class FileAligner
+      # @return [FileAnalysisBase] Template file analysis
+      attr_reader :template_analysis
+      # @return [FileAnalysisBase] Destination file analysis
+      attr_reader :dest_analysis
+      # @return [#call, nil] Optional match refiner for fuzzy matching
+      attr_reader :match_refiner
+      # Initialize a file aligner
+      #
+      # @param template_analysis [FileAnalysisBase] Analysis of the template file
+      # @param dest_analysis [FileAnalysisBase] Analysis of the destination file
+      # @param match_refiner [#call, nil] Optional match refiner for fuzzy matching
+      def initialize(template_analysis, dest_analysis, match_refiner: nil)
+        @template_analysis = template_analysis
+        @dest_analysis = dest_analysis
+        @match_refiner = match_refiner
+      end
+      # Perform alignment between template and destination statements
+      #
+      # @return [Array<Hash>] Alignment entries with type, indices, and nodes
+      def align
+        template_statements = @template_analysis.statements
+        dest_statements = @dest_analysis.statements
+        # Build signature maps
+        template_by_sig = build_signature_map(template_statements, @template_analysis)
+        dest_by_sig = build_signature_map(dest_statements, @dest_analysis)
+        # Track which indices have been matched
+        matched_template = Set.new
+        matched_dest = Set.new
+        alignment = []
+        # First pass: find matches by signature
+        template_by_sig.each do |sig, template_indices|
+          next unless dest_by_sig.key?(sig)
+          dest_indices = dest_by_sig[sig]
+          # Match indices pairwise (first template with first dest, etc.)
+          template_indices.zip(dest_indices).each do |t_idx, d_idx|
+            next unless t_idx && d_idx
+            alignment << {
+              type: :match,
+              template_index: t_idx,
+              dest_index: d_idx,
+              signature: sig,
+              template_node: template_statements[t_idx],
+              dest_node: dest_statements[d_idx],
+            }
+            matched_template << t_idx
+            matched_dest << d_idx
+          end
+        end
+        # Apply match refiner to find additional fuzzy matches
+        if @match_refiner
+          unmatched_t_nodes = template_statements.each_with_index.reject { |_, i| matched_template.include?(i) }.map(&:first)
+          unmatched_d_nodes = dest_statements.each_with_index.reject { |_, i| matched_dest.include?(i) }.map(&:first)
+          unless unmatched_t_nodes.empty? || unmatched_d_nodes.empty?
+            refiner_matches = @match_refiner.call(unmatched_t_nodes, unmatched_d_nodes, {
+              template_analysis: @template_analysis,
+              dest_analysis: @dest_analysis,
+            })
+            refiner_matches.each do |match|
+              t_idx = template_statements.index(match.template_node)
+              d_idx = dest_statements.index(match.dest_node)
+              next unless t_idx && d_idx
+              next if matched_template.include?(t_idx) || matched_dest.include?(d_idx)
+              alignment << {
+                type: :match,
+                template_index: t_idx,
+                dest_index: d_idx,
+                signature: [:refined_match, match.score],
+                template_node: match.template_node,
+                dest_node: match.dest_node,
+              }
+              matched_template << t_idx
+              matched_dest << d_idx
+            end
+          end
+        end
+        # Second pass: add template-only entries
+        template_statements.each_with_index do |stmt, idx|
+          next if matched_template.include?(idx)
+          alignment << {
+            type: :template_only,
+            template_index: idx,
+            dest_index: nil,
+            signature: @template_analysis.signature_at(idx),
+            template_node: stmt,
+            dest_node: nil,
+          }
+        end
+        # Third pass: add dest-only entries
+        dest_statements.each_with_index do |stmt, idx|
+          next if matched_dest.include?(idx)
+          alignment << {
+            type: :dest_only,
+            template_index: nil,
+            dest_index: idx,
+            signature: @dest_analysis.signature_at(idx),
+            template_node: nil,
+            dest_node: stmt,
+          }
+        end
+        # Sort by appearance order (destination order for matched/dest-only, then template-only)
+        alignment.sort_by! do |entry|
+          case entry[:type]
+          when :match
+            [0, entry[:dest_index]]
+          when :dest_only
+            [0, entry[:dest_index]]
+          when :template_only
+            [1, entry[:template_index]]
+          else
+            # :nocov: defensive - only :match, :dest_only, :template_only types are created
+            [2, 0] # Unknown types sort last
+            # :nocov:
+          end
+        end
+        DebugLogger.debug("Alignment complete", {
+          total: alignment.size,
+          matches: alignment.count { |e| e[:type] == :match },
+          template_only: alignment.count { |e| e[:type] == :template_only },
+          dest_only: alignment.count { |e| e[:type] == :dest_only },
+        })
+        alignment
+      end
+      private
+      # Build a map from signatures to statement indices
+      #
+      # @param statements [Array] List of statements
+      # @param analysis [FileAnalysisBase] Analysis for signature generation
+      # @return [Hash<Array, Array<Integer>>] Map from signature to indices
+      def build_signature_map(statements, analysis)
+        map = Hash.new { |h, k| h[k] = [] }
+        statements.each_with_index do |_stmt, idx|
+          sig = analysis.signature_at(idx)
+          # :nocov: defensive - signature_at always returns a value for valid indices
+          map[sig] << idx if sig
+          # :nocov:
+        end
+        map
+      end
+    end
+  end
+end

data/lib/markdown/merge/file_analysis.rb ADDED Viewed

@@ -0,0 +1,353 @@
+# frozen_string_literal: true
+require "digest"
+module Markdown
+  module Merge
+    # File analysis for Markdown files using tree_haver backends.
+    #
+    # Extends FileAnalysisBase with backend-agnostic parsing via tree_haver.
+    # Supports both Commonmarker and Markly backends through tree_haver's
+    # unified API.
+    #
+    # Parses Markdown source code and extracts:
+    # - Top-level block elements (headings, paragraphs, lists, code blocks, etc.)
+    # - Freeze blocks marked with HTML comments
+    # - Structural signatures for matching elements between files
+    #
+    # All nodes are wrapped with canonical types via NodeTypeNormalizer,
+    # enabling portable merge rules across backends.
+    #
+    # Freeze blocks are marked with HTML comments:
+    #   <!-- markdown-merge:freeze -->
+    #   ... content to preserve ...
+    #   <!-- markdown-merge:unfreeze -->
+    #
+    # @example Basic usage with auto backend
+    #   analysis = FileAnalysis.new(markdown_source)
+    #   analysis.statements.each do |node|
+    #     puts "#{node.merge_type}: #{node.type}"
+    #   end
+    #
+    # @example With specific backend
+    #   analysis = FileAnalysis.new(markdown_source, backend: :markly)
+    #
+    # @example With custom freeze token
+    #   analysis = FileAnalysis.new(source, freeze_token: "my-merge")
+    #   # Looks for: <!-- my-merge:freeze --> / <!-- my-merge:unfreeze -->
+    #
+    # @see FileAnalysisBase Base class
+    # @see NodeTypeNormalizer Type normalization
+    class FileAnalysis < FileAnalysisBase
+      # Default freeze token for identifying freeze blocks
+      # @return [String]
+      DEFAULT_FREEZE_TOKEN = "markdown-merge"
+      # @return [Symbol] The backend being used (:commonmarker, :markly)
+      attr_reader :backend
+      # @return [Hash] Parser-specific options
+      attr_reader :parser_options
+      # Initialize file analysis with tree_haver backend.
+      #
+      # @param source [String] Markdown source code to analyze
+      # @param backend [Symbol] Backend to use (:commonmarker, :markly, :auto)
+      # @param freeze_token [String] Token for freeze block markers
+      # @param signature_generator [Proc, nil] Custom signature generator
+      # @param parser_options [Hash] Backend-specific parser options
+      #   For commonmarker: { options: {} }
+      #   For markly: { flags: Markly::DEFAULT, extensions: [:table] }
+      def initialize(
+        source,
+        backend: :auto,
+        freeze_token: DEFAULT_FREEZE_TOKEN,
+        signature_generator: nil,
+        **parser_options
+      )
+        @requested_backend = backend
+        @parser_options = parser_options
+        # Resolve and initialize the backend
+        @backend = resolve_backend(backend)
+        @parser = create_parser
+        super(source, freeze_token: freeze_token, signature_generator: signature_generator)
+      end
+      # Parse the source document using tree_haver backend.
+      #
+      # Error handling follows the same pattern as other *-merge gems:
+      # - TreeHaver::Error (which inherits from Exception, not StandardError) is caught
+      # - TreeHaver::NotAvailable is a subclass of TreeHaver::Error, so it's also caught
+      # - When an error occurs, the error is stored in @errors and nil is returned
+      # - SmartMergerBase#parse_and_analyze checks valid? and raises the appropriate parse error
+      #
+      # @param source [String] Markdown source to parse
+      # @return [Object, nil] Root document node from tree_haver, or nil on error
+      def parse_document(source)
+        tree = @parser.parse(source)
+        tree.root_node
+      rescue TreeHaver::Error => e
+        # TreeHaver::Error inherits from Exception, not StandardError.
+        # This also catches TreeHaver::NotAvailable (subclass of Error).
+        @errors << e.message
+        nil
+      end
+      # Get the next sibling of a node.
+      #
+      # Handles differences between backends:
+      # - Commonmarker: node.next_sibling
+      # - Markly: node.next
+      #
+      # @param node [Object] Current node
+      # @return [Object, nil] Next sibling or nil
+      def next_sibling(node)
+        # tree_haver normalizes this, but handle both patterns for safety
+        if node.respond_to?(:next_sibling)
+          node.next_sibling
+        elsif node.respond_to?(:next)
+          node.next
+        end
+      end
+      # Returns the FreezeNode class to use.
+      #
+      # @return [Class] Markdown::Merge::FreezeNode
+      def freeze_node_class
+        FreezeNode
+      end
+      # Check if value is a tree_haver node.
+      #
+      # @param value [Object] Value to check
+      # @return [Boolean] true if this is a parser node
+      def parser_node?(value)
+        # Check for tree_haver node or wrapped node
+        return true if value.respond_to?(:type) && value.respond_to?(:source_position)
+        return true if Ast::Merge::NodeTyping.typed_node?(value)
+        false
+      end
+      # Override to detect tree_haver nodes for signature generator fallthrough
+      # @param value [Object] The value to check
+      # @return [Boolean] true if this is a fallthrough node
+      def fallthrough_node?(value)
+        Ast::Merge::NodeTyping.typed_node?(value) ||
+          value.is_a?(Ast::Merge::FreezeNodeBase) ||
+          parser_node?(value) ||
+          super
+      end
+      # Compute signature for a tree_haver node.
+      #
+      # Uses canonical types from NodeTypeNormalizer for portable signatures.
+      #
+      # @param node [Object] The node (may be wrapped)
+      # @return [Array, nil] Signature array
+      def compute_parser_signature(node)
+        # Get canonical type from wrapper or normalize raw type
+        canonical_type = if Ast::Merge::NodeTyping.typed_node?(node)
+          Ast::Merge::NodeTyping.merge_type_for(node)
+        else
+          NodeTypeNormalizer.canonical_type(node.type, @backend)
+        end
+        # Unwrap to access underlying node methods
+        raw_node = Ast::Merge::NodeTyping.unwrap(node)
+        case canonical_type
+        when :heading
+          # Content-based: Match headings by level and text content
+          [:heading, raw_node.header_level, extract_text_content(raw_node)]
+        when :paragraph
+          # Content-based: Match paragraphs by content hash (first 32 chars of digest)
+          text = extract_text_content(raw_node)
+          [:paragraph, Digest::SHA256.hexdigest(text)[0, 32]]
+        when :code_block
+          # Content-based: Match code blocks by fence info and content hash
+          content = safe_string_content(raw_node)
+          fence_info = raw_node.respond_to?(:fence_info) ? raw_node.fence_info : nil
+          [:code_block, fence_info, Digest::SHA256.hexdigest(content)[0, 16]]
+        when :list
+          # Structure-based: Match lists by type and item count (content may differ)
+          list_type = raw_node.respond_to?(:list_type) ? raw_node.list_type : nil
+          [:list, list_type, count_children(raw_node)]
+        when :block_quote
+          # Content-based: Match block quotes by content hash
+          text = extract_text_content(raw_node)
+          [:block_quote, Digest::SHA256.hexdigest(text)[0, 16]]
+        when :thematic_break
+          # Structure-based: All thematic breaks are equivalent
+          [:thematic_break]
+        when :html_block
+          # Content-based: Match HTML blocks by content hash
+          content = safe_string_content(raw_node)
+          [:html_block, Digest::SHA256.hexdigest(content)[0, 16]]
+        when :table
+          # Content-based: Match tables by structure and header content
+          header_content = extract_table_header_content(raw_node)
+          [:table, count_children(raw_node), Digest::SHA256.hexdigest(header_content)[0, 16]]
+        when :footnote_definition
+          # Name/label-based: Match footnotes by name or label
+          label = raw_node.respond_to?(:name) ? raw_node.name : safe_string_content(raw_node)
+          [:footnote_definition, label]
+        when :custom_block
+          # Content-based: Match custom blocks by content hash
+          text = extract_text_content(raw_node)
+          [:custom_block, Digest::SHA256.hexdigest(text)[0, 16]]
+        else
+          # Unknown type - use canonical type and position
+          pos = raw_node.source_position
+          [:unknown, canonical_type, pos&.dig(:start_line)]
+        end
+      end
+      # Extract all text content from a node and its children.
+      #
+      # Override for tree_haver nodes which don't have a `walk` method.
+      # Uses recursive traversal via `children` instead.
+      #
+      # @param node [Object] The node
+      # @return [String] Concatenated text content
+      def extract_text_content(node)
+        text_parts = []
+        collect_text_recursive(node, text_parts)
+        text_parts.join
+      end
+      # Safely get string content from a node.
+      #
+      # Override for tree_haver nodes which use `text` instead of `string_content`.
+      #
+      # @param node [Object] The node
+      # @return [String] String content or empty string
+      def safe_string_content(node)
+        if node.respond_to?(:string_content)
+          node.string_content.to_s
+        elsif node.respond_to?(:text)
+          node.text.to_s
+        else
+          extract_text_content(node)
+        end
+      rescue TypeError, NoMethodError
+        extract_text_content(node)
+      end
+      # Collect top-level nodes from document, wrapping with canonical types.
+      #
+      # @return [Array<Object>] Wrapped nodes
+      def collect_top_level_nodes
+        nodes = []
+        child = @document.first_child
+        while child
+          # Wrap each node with its canonical type
+          wrapped = NodeTypeNormalizer.wrap(child, @backend)
+          nodes << wrapped
+          child = next_sibling(child)
+        end
+        nodes
+      end
+      private
+      # Recursively collect text content from a node and its descendants.
+      #
+      # Uses NodeTypeNormalizer to map backend-specific types to canonical types,
+      # enabling portable type checking across different markdown parsers.
+      #
+      # NOTE: We use `type` here instead of `merge_type` because this method operates
+      # on child nodes (text, code), not top-level statements.
+      # Only top-level statements are wrapped by NodeTypeNormalizer with `merge_type`.
+      # However, we use NodeTypeNormalizer.canonical_type to normalize the raw type.
+      #
+      # @param node [Object] The node to traverse
+      # @param text_parts [Array<String>] Array to accumulate text into
+      # @return [void]
+      def collect_text_recursive(node, text_parts)
+        # Normalize the type using NodeTypeNormalizer for backend portability
+        canonical_type = NodeTypeNormalizer.canonical_type(node.type, @backend)
+        # Collect text from text and code nodes
+        if canonical_type == :text || canonical_type == :code
+          content = if node.respond_to?(:string_content)
+            node.string_content.to_s
+          elsif node.respond_to?(:text)
+            node.text.to_s
+          else
+            ""
+          end
+          text_parts << content unless content.empty?
+        end
+        # Recurse into children
+        node.children.each do |child|
+          collect_text_recursive(child, text_parts)
+        end
+      end
+      # Resolve the backend to use.
+      #
+      # For :auto, attempts commonmarker first, then markly.
+      # tree_haver handles the actual availability checking.
+      #
+      # @param backend [Symbol] Requested backend
+      # @return [Symbol] Resolved backend (:commonmarker or :markly)
+      def resolve_backend(backend)
+        return backend unless backend == :auto
+        # Try commonmarker first, then markly
+        if TreeHaver::BackendRegistry.available?(:commonmarker)
+          :commonmarker
+        elsif TreeHaver::BackendRegistry.available?(:markly)
+          :markly
+        else
+          # Let tree_haver raise the appropriate error
+          :commonmarker
+        end
+      end
+      # Create a parser for the resolved backend.
+      #
+      # @return [Object] tree_haver parser instance
+      def create_parser
+        case @backend
+        when :commonmarker
+          create_commonmarker_parser
+        when :markly
+          create_markly_parser
+        else
+          raise ArgumentError, "Unknown backend: #{@backend}"
+        end
+      end
+      # Create a Commonmarker parser via commonmarker-merge backend.
+      #
+      # @return [Commonmarker::Merge::Backend::Parser]
+      def create_commonmarker_parser
+        parser = Commonmarker::Merge::Backend::Parser.new
+        # Default options enable table extension for GFM compatibility
+        default_options = {extension: {table: true}}
+        options = default_options.merge(@parser_options[:options] || {})
+        parser.language = Commonmarker::Merge::Backend::Language.markdown(options: options)
+        parser
+      end
+      # Create a Markly parser via markly-merge backend.
+      #
+      # @return [Markly::Merge::Backend::Parser]
+      def create_markly_parser
+        parser = Markly::Merge::Backend::Parser.new
+        flags = @parser_options[:flags]
+        extensions = @parser_options[:extensions] || [:table]
+        parser.language = Markly::Merge::Backend::Language.markdown(
+          flags: flags,
+          extensions: extensions,
+        )
+        parser
+      end
+    end
+  end
+end