RubyGems - markdown-merge - Versions diffs - 1.0.0 - Mend

markdown-merge 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +251 -0
data/CITATION.cff +20 -0
data/CODE_OF_CONDUCT.md +134 -0
data/CONTRIBUTING.md +227 -0
data/FUNDING.md +74 -0
data/LICENSE.txt +21 -0
data/README.md +1087 -0
data/REEK +0 -0
data/RUBOCOP.md +71 -0
data/SECURITY.md +21 -0
data/lib/markdown/merge/cleanse/block_spacing.rb +253 -0
data/lib/markdown/merge/cleanse/code_fence_spacing.rb +294 -0
data/lib/markdown/merge/cleanse/condensed_link_refs.rb +405 -0
data/lib/markdown/merge/cleanse.rb +42 -0
data/lib/markdown/merge/code_block_merger.rb +300 -0
data/lib/markdown/merge/conflict_resolver.rb +128 -0
data/lib/markdown/merge/debug_logger.rb +26 -0
data/lib/markdown/merge/document_problems.rb +190 -0
data/lib/markdown/merge/file_aligner.rb +196 -0
data/lib/markdown/merge/file_analysis.rb +353 -0
data/lib/markdown/merge/file_analysis_base.rb +629 -0
data/lib/markdown/merge/freeze_node.rb +93 -0
data/lib/markdown/merge/gap_line_node.rb +136 -0
data/lib/markdown/merge/link_definition_formatter.rb +49 -0
data/lib/markdown/merge/link_definition_node.rb +157 -0
data/lib/markdown/merge/link_parser.rb +421 -0
data/lib/markdown/merge/link_reference_rehydrator.rb +320 -0
data/lib/markdown/merge/markdown_structure.rb +123 -0
data/lib/markdown/merge/merge_result.rb +166 -0
data/lib/markdown/merge/node_type_normalizer.rb +126 -0
data/lib/markdown/merge/output_builder.rb +166 -0
data/lib/markdown/merge/partial_template_merger.rb +334 -0
data/lib/markdown/merge/smart_merger.rb +221 -0
data/lib/markdown/merge/smart_merger_base.rb +621 -0
data/lib/markdown/merge/table_match_algorithm.rb +504 -0
data/lib/markdown/merge/table_match_refiner.rb +136 -0
data/lib/markdown/merge/version.rb +12 -0
data/lib/markdown/merge/whitespace_normalizer.rb +251 -0
data/lib/markdown/merge.rb +149 -0
data/lib/markdown-merge.rb +4 -0
data/sig/markdown/merge.rbs +341 -0
data.tar.gz.sig +0 -0
metadata +365 -0
metadata.gz.sig +0 -0

data/lib/markdown/merge/node_type_normalizer.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Alias for the shared normalizer module from ast-merge
+    NodeTypingNormalizer = Ast::Merge::NodeTyping::Normalizer
+    # Normalizes backend-specific node types to canonical markdown types.
+    #
+    # Uses Ast::Merge::NodeTyping::Wrapper to wrap nodes with canonical
+    # merge_type, allowing portable merge rules across backends.
+    #
+    # ## Thread Safety
+    #
+    # All backend registration and lookup operations are thread-safe via
+    # the shared Ast::Merge::NodeTyping::Normalizer module.
+    #
+    # ## Extensibility
+    #
+    # New backends can be registered at runtime:
+    #
+    # @example Registering a new backend
+    #   NodeTypeNormalizer.register_backend(:tree_sitter_markdown, {
+    #     atx_heading: :heading,
+    #     setext_heading: :heading,
+    #     fenced_code_block: :code_block,
+    #     indented_code_block: :code_block,
+    #     paragraph: :paragraph,
+    #     bullet_list: :list,
+    #     ordered_list: :list,
+    #     block_quote: :block_quote,
+    #     thematic_break: :thematic_break,
+    #     html_block: :html_block,
+    #     pipe_table: :table,
+    #   })
+    #
+    # ## Canonical Types
+    #
+    # The following canonical types are used for portable merge rules:
+    # - `:heading` - Headers/headings (H1-H6)
+    # - `:paragraph` - Text paragraphs
+    # - `:code_block` - Fenced or indented code blocks
+    # - `:list` - Ordered or unordered lists
+    # - `:block_quote` - Block quotations
+    # - `:thematic_break` - Horizontal rules
+    # - `:html_block` - Raw HTML blocks
+    # - `:table` - Tables (GFM extension)
+    # - `:footnote_definition` - Footnote definitions
+    # - `:custom_block` - Custom/extension blocks
+    #
+    # @see Ast::Merge::NodeTyping::Wrapper
+    # @see Ast::Merge::NodeTyping::Normalizer
+    module NodeTypeNormalizer
+      extend NodeTypingNormalizer
+      # Configure default backend mappings.
+      # Maps backend-specific type symbols to canonical type symbols.
+      #
+      # Includes both top-level block types and child node types (table rows, cells, etc.)
+      # to enable consistent type checking across the entire AST.
+      configure_normalizer(
+        commonmarker: {
+          # Block types (top-level statements)
+          heading: :heading,
+          paragraph: :paragraph,
+          code_block: :code_block,
+          list: :list,
+          block_quote: :block_quote,
+          thematic_break: :thematic_break,
+          html_block: :html_block,
+          table: :table,
+          footnote_definition: :footnote_definition,
+          # Table child types
+          table_row: :table_row,
+          table_cell: :table_cell,
+          table_header: :table_header,  # Some parsers distinguish header rows
+          # List child types
+          list_item: :list_item,
+          item: :list_item,             # Alias
+          # Inline types (usually not top-level, but map them anyway)
+          text: :text,
+          softbreak: :softbreak,
+          linebreak: :linebreak,
+          code: :code,
+          code_inline: :code,           # Alias used by some parsers
+          html_inline: :html_inline,
+          emph: :emph,
+          strong: :strong,
+          link: :link,
+          image: :image,
+        }.freeze,
+        markly: {
+          # Block types - note different names from commonmarker
+          header: :heading,           # markly uses :header, not :heading
+          paragraph: :paragraph,
+          code_block: :code_block,
+          list: :list,
+          blockquote: :block_quote,   # markly uses :blockquote, not :block_quote
+          hrule: :thematic_break,     # markly uses :hrule, not :thematic_break
+          html: :html_block,          # markly uses :html, not :html_block
+          table: :table,
+          footnote_definition: :footnote_definition,
+          custom_block: :custom_block,
+          # Table child types
+          table_row: :table_row,
+          table_cell: :table_cell,
+          table_header: :table_header,
+          # List child types
+          list_item: :list_item,
+          item: :list_item,
+          # Inline types
+          text: :text,
+          softbreak: :softbreak,
+          linebreak: :linebreak,
+          code: :code,
+          code_inline: :code,
+          html_inline: :html_inline,
+          emph: :emph,
+          strong: :strong,
+          link: :link,
+          image: :image,
+        }.freeze,
+      )
+    end
+  end
+end

data/lib/markdown/merge/output_builder.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Builds markdown output from merge operations.
+    #
+    # Handles markdown-specific concerns like:
+    # - Extracting source from original nodes
+    # - Reconstructing consumed link reference definitions
+    # - Preserving gap lines (blank line spacing)
+    # - Automatic structural spacing (blank lines between tables, headings, etc.)
+    # - Assembling final merged content
+    #
+    # Unlike Emitter classes used in JSON/YAML/etc, OutputBuilder focuses on
+    # source preservation and reconstruction rather than generation from scratch.
+    #
+    # @example Basic usage
+    #   builder = OutputBuilder.new
+    #   builder.add_node_source(node, analysis)
+    #   builder.add_link_definition(link_def_node)
+    #   builder.add_gap_line(count: 2)
+    #   content = builder.to_s
+    class OutputBuilder
+      # Initialize a new OutputBuilder
+      #
+      # @param preserve_formatting [Boolean] Whether to preserve original formatting
+      # @param auto_spacing [Boolean] Whether to automatically insert blank lines between structural elements
+      def initialize(preserve_formatting: true, auto_spacing: true)
+        @parts = []
+        @preserve_formatting = preserve_formatting
+        @auto_spacing = auto_spacing
+        @last_node_type = nil  # Track previous node type for spacing decisions
+      end
+      # Add a node's source content
+      #
+      # Automatically inserts structural blank lines when transitioning between
+      # certain node types (tables, headings, code blocks, etc.) if auto_spacing is enabled.
+      #
+      # @param node [Object] Node to add (can be parser node, FreezeNode, LinkDefinitionNode, etc.)
+      # @param analysis [FileAnalysisBase] Analysis for accessing source
+      def add_node_source(node, analysis)
+        # Determine node type for spacing decisions
+        current_type = MarkdownStructure.node_type(node)
+        # Auto-spacing logic:
+        # - Skip for gap_line and freeze_block (they handle their own spacing)
+        # - Skip if last node was a gap_line (we already have spacing)
+        # - Otherwise, check MarkdownStructure.needs_blank_between? which handles
+        #   contiguous types (like link_definitions that shouldn't have blanks between them)
+        unless [:gap_line, :freeze_block].include?(current_type) ||
+            @last_node_type == :gap_line
+          if @auto_spacing && @last_node_type && current_type
+            if MarkdownStructure.needs_blank_between?(@last_node_type, current_type)
+              # Only add spacing if we don't already have adequate blank lines
+              # Check the last part to see if it already ends with blank line(s)
+              unless @parts.empty? || @parts.last&.end_with?("\n\n")
+                add_gap_line(count: 1)
+              end
+            end
+          end
+        end
+        content = extract_source(node, analysis)
+        if content && !content.empty?
+          @parts << content
+          # Update last node type (track all node types for proper spacing)
+          @last_node_type = current_type
+        end
+      end
+      # Add a reconstructed link definition
+      #
+      # @param node [LinkDefinitionNode] Link definition node
+      def add_link_definition(node)
+        formatted = LinkDefinitionFormatter.format(node)
+        @parts << formatted if formatted && !formatted.empty?
+      end
+      # Add gap lines (blank line preservation)
+      #
+      # @param count [Integer] Number of blank lines to add
+      def add_gap_line(count: 1)
+        @parts << ("\n" * count) if count > 0
+      end
+      # Add raw text content
+      #
+      # @param text [String] Raw text to add
+      def add_raw(text)
+        @parts << text if text && !text.empty?
+      end
+      # Get final content
+      #
+      # @return [String] Assembled markdown content
+      def to_s
+        @parts.join
+      end
+      # Check if builder has any content
+      #
+      # @return [Boolean]
+      def empty?
+        @parts.empty?
+      end
+      # Clear all content
+      def clear
+        @parts.clear
+      end
+      private
+      # Extract source content from a node
+      #
+      # @param node [Object] Node to extract from
+      # @param analysis [FileAnalysisBase] Analysis for source access
+      # @return [String, nil] Extracted content
+      def extract_source(node, analysis)
+        case node
+        when LinkDefinitionNode
+          # Link definitions need reconstruction with trailing newline
+          "#{LinkDefinitionFormatter.format(node)}\n"
+        when GapLineNode
+          # Gap lines are single blank lines
+          "\n"
+        when Ast::Merge::FreezeNodeBase
+          # Freeze blocks have their full text
+          node.full_text
+        else
+          # Regular nodes - extract from source
+          extract_parser_node_source(node, analysis)
+        end
+      end
+      # Extract source from a parser-specific node
+      #
+      # @param node [Object] Parser node
+      # @param analysis [FileAnalysisBase] Analysis for source access
+      # @return [String, nil] Extracted content
+      def extract_parser_node_source(node, analysis)
+        # Try source_position method first (used by some nodes)
+        if node.respond_to?(:source_position)
+          pos = node.source_position
+          start_line = pos&.dig(:start_line)
+          end_line = pos&.dig(:end_line)
+          if start_line && end_line
+            return analysis.source_range(start_line, end_line)
+          elsif node.respond_to?(:to_commonmark)
+            # Fallback to commonmark rendering
+            return node.to_commonmark
+          end
+        end
+        # Try direct start_line/end_line attributes
+        return unless node.respond_to?(:start_line) && node.respond_to?(:end_line)
+        return unless node.start_line && node.end_line
+        # Extract source range (formatting preservation handled elsewhere)
+        analysis.source_range(node.start_line, node.end_line)
+      end
+    end
+  end
+end

data/lib/markdown/merge/partial_template_merger.rb ADDED Viewed

@@ -0,0 +1,334 @@
+# frozen_string_literal: true
+module Markdown
+  module Merge
+    # Markdown-specific implementation of PartialTemplateMerger.
+    #
+    # Merges a partial template into a specific section of a destination markdown document.
+    # This class extends the parser-agnostic base with markdown-specific logic for:
+    # - Heading-level-aware section boundaries
+    # - Source-based text extraction to preserve link references and table formatting
+    # - Backend-specific parser initialization (Markly, Commonmarker)
+    #
+    # @example Basic usage
+    #   merger = Markdown::Merge::PartialTemplateMerger.new(
+    #     template: template_content,
+    #     destination: destination_content,
+    #     anchor: { type: :heading, text: /Gem Family/ },
+    #     backend: :markly
+    #   )
+    #   result = merger.merge
+    #   puts result.content
+    #
+    # @example With boundary
+    #   merger = Markdown::Merge::PartialTemplateMerger.new(
+    #     template: template_content,
+    #     destination: destination_content,
+    #     anchor: { type: :heading, text: /Installation/ },
+    #     boundary: { type: :heading },  # Stop at next heading
+    #     backend: :markly
+    #   )
+    #
+    class PartialTemplateMerger < Ast::Merge::PartialTemplateMergerBase
+      # Re-export Result class from base for convenience
+      Result = Ast::Merge::PartialTemplateMergerBase::Result
+      # @return [Symbol] Backend to use (:markly, :commonmarker)
+      attr_reader :backend
+      # Initialize a markdown PartialTemplateMerger.
+      #
+      # @param template [String] The template content (the section to merge in)
+      # @param destination [String] The destination content
+      # @param anchor [Hash] Anchor matcher: { type: :heading, text: /pattern/ }
+      # @param boundary [Hash, nil] Boundary matcher (defaults to same type as anchor)
+      # @param backend [Symbol] Backend to use (:markly, :commonmarker)
+      # @param preference [Symbol, Hash] Which content wins (:template, :destination, or per-type hash)
+      # @param add_missing [Boolean, Proc] Whether to add template nodes not in destination
+      # @param when_missing [Symbol] What to do if section not found (:skip, :append, :prepend)
+      # @param replace_mode [Boolean] If true, template replaces section entirely (no merge)
+      # @param signature_generator [Proc, nil] Custom signature generator for SmartMerger
+      # @param node_typing [Hash, nil] Node typing configuration for per-type preferences
+      # @param match_refiner [Object, nil] Match refiner for fuzzy matching (e.g., ContentMatchRefiner)
+      # @param normalize_whitespace [Boolean] If true, collapse excessive blank lines. Default: false
+      # @param rehydrate_link_references [Boolean] If true, convert inline links to reference style. Default: false
+      def initialize(
+        template:,
+        destination:,
+        anchor:,
+        boundary: nil,
+        backend: :markly,
+        preference: :template,
+        add_missing: true,
+        when_missing: :skip,
+        replace_mode: false,
+        signature_generator: nil,
+        node_typing: nil,
+        match_refiner: nil,
+        normalize_whitespace: false,
+        rehydrate_link_references: false
+      )
+        validate_backend!(backend)
+        @backend = backend
+        @normalize_whitespace = normalize_whitespace
+        @rehydrate_link_references = rehydrate_link_references
+        super(
+          template: template,
+          destination: destination,
+          anchor: anchor,
+          boundary: boundary,
+          preference: preference,
+          add_missing: add_missing,
+          when_missing: when_missing,
+          replace_mode: replace_mode,
+          signature_generator: signature_generator,
+          node_typing: node_typing,
+          match_refiner: match_refiner,
+        )
+      end
+      # Perform the partial template merge with post-processing.
+      #
+      # @return [Result] The merge result
+      def merge
+        result = super
+        # Apply post-processing if enabled
+        if result.changed && (@normalize_whitespace || @rehydrate_link_references)
+          content = result.content
+          problems = DocumentProblems.new
+          if @normalize_whitespace
+            normalizer = WhitespaceNormalizer.new(content)
+            content = normalizer.normalize
+            problems.merge!(normalizer.problems)
+          end
+          if @rehydrate_link_references
+            rehydrator = LinkReferenceRehydrator.new(content)
+            content = rehydrator.rehydrate
+            problems.merge!(rehydrator.problems)
+          end
+          # Return new result with transformed content and problems
+          Result.new(
+            content: content,
+            has_section: result.has_section,
+            changed: result.changed,
+            stats: result.stats.merge(problems: problems.all),
+            injection_point: result.injection_point,
+            message: result.message,
+          )
+        else
+          result
+        end
+      end
+      protected
+      # Validate the backend parameter.
+      #
+      # @param backend [Symbol] The backend to validate
+      # @raise [ArgumentError] If backend is not supported
+      def validate_backend!(backend)
+        valid_backends = [:markly, :commonmarker]
+        return if valid_backends.include?(backend.to_sym)
+        raise ArgumentError, "Unknown backend: #{backend}. Supported: #{valid_backends.join(", ")}"
+      end
+      # Create a FileAnalysis for the given content.
+      #
+      # @param content [String] The content to analyze
+      # @return [FileAnalysis] A FileAnalysis instance
+      def create_analysis(content)
+        FileAnalysis.new(content, backend: backend)
+      end
+      # Create a SmartMerger for merging the section.
+      #
+      # @param template_content [String] The template content
+      # @param destination_content [String] The destination section content
+      # @return [SmartMerger] A SmartMerger instance
+      def create_smart_merger(template_content, destination_content)
+        # Build options hash, only including non-nil values
+        options = {
+          preference: preference,
+          add_template_only_nodes: add_missing,
+          backend: backend,
+        }
+        # Use custom signature generator if provided, otherwise use position-based
+        # table matching to ensure tables with different structures still match
+        # within a section merge context.
+        options[:signature_generator] = signature_generator || build_position_based_signature_generator
+        options[:node_typing] = node_typing if node_typing
+        options[:match_refiner] = match_refiner if match_refiner
+        SmartMerger.new(template_content, destination_content, **options)
+      end
+      # Build a signature generator that uses type-based matching for tables.
+      #
+      # This ensures that tables within a section are matched by type alone,
+      # allowing template tables to replace destination tables regardless of
+      # their exact structure (different headers, columns, etc.).
+      #
+      # In the context of partial template merging, this is the desired behavior:
+      # - Sections typically contain one table of each logical role
+      # - Template table should replace the destination table
+      # - Different table structures should still match by ordinal position
+      #
+      # The algorithm uses a stateless approach that assigns the same signature
+      # to all tables. Since PartialTemplateMerger merges **one section at a time**,
+      # each section typically has few tables, and the first table in template
+      # will match and replace the first table in destination.
+      #
+      # For more precise control over multiple tables within a section, provide
+      # a custom signature_generator.
+      #
+      # @return [Proc] A signature generator proc
+      def build_position_based_signature_generator
+        # Simple stateless approach: all tables get the same base signature.
+        # When preference is :template, this causes template table to replace
+        # destination table, which is the desired behavior.
+        #
+        # NOTE: If a section has multiple tables, they will ALL match each other,
+        # potentially causing unexpected behavior. For such cases, users should
+        # provide a custom signature_generator.
+        lambda do |node|
+          type_str = node.type.to_s
+          if type_str == "table"
+            # All tables within a section merge get the same signature.
+            # This ensures template table replaces destination table.
+            [:table, :section_table]
+          else
+            # Return node for default signature computation
+            node
+          end
+        end
+      end
+      # Find where the section ends.
+      #
+      # For headings, finds the next heading of same or higher level.
+      # For other node types, finds the next node of the same type.
+      #
+      # NOTE: For headings, we ALWAYS use heading-level-aware logic, ignoring
+      # any boundary from InjectionPointFinder. This is because InjectionPointFinder
+      # uses tree_depth for boundary detection, but in Markdown all headings are
+      # siblings at the same tree depth regardless of their level (H2, H3, H4 etc).
+      # Heading level semantics require comparing the actual heading level numbers.
+      #
+      # @param statements [Array<Navigable::Statement>] All statements
+      # @param injection_point [Navigable::InjectionPoint] The injection point
+      # @return [Integer] Index of the last statement in the section
+      def find_section_end(statements, injection_point)
+        anchor = injection_point.anchor
+        anchor_type = anchor.type
+        # For headings, ALWAYS use heading-level-aware logic
+        # This overrides any boundary from InjectionPointFinder because tree_depth
+        # doesn't reflect heading level semantics in Markdown
+        if heading_type?(anchor_type)
+          anchor_level = get_heading_level(anchor)
+          ((anchor.index + 1)...statements.length).each do |idx|
+            stmt = statements[idx]
+            if heading_type?(stmt.type)
+              stmt_level = get_heading_level(stmt)
+              if stmt_level && anchor_level && stmt_level <= anchor_level
+                # Found next heading of same or higher level - section ends before it
+                return idx - 1
+              end
+            end
+          end
+          # No boundary heading found - section extends to end of document
+          return statements.length - 1
+        end
+        # For non-headings, use boundary if specified and found
+        if injection_point.boundary
+          return injection_point.boundary.index - 1
+        end
+        # Otherwise, find next node of same type
+        ((anchor.index + 1)...statements.length).each do |idx|
+          stmt = statements[idx]
+          if stmt.type == anchor_type
+            return idx - 1
+          end
+        end
+        # Section extends to end of document
+        statements.length - 1
+      end
+      # Convert a node to its source text.
+      #
+      # Prefers source-based extraction to preserve original formatting
+      # (link references, table padding, etc.). Falls back to to_commonmark.
+      #
+      # @param node [Object] The node to convert
+      # @param analysis [FileAnalysis, nil] The analysis object for source lookup
+      # @return [String] The source text
+      def node_to_text(node, analysis = nil)
+        # Unwrap if needed
+        inner = node
+        while inner.respond_to?(:inner_node) && inner.inner_node != inner
+          inner = inner.inner_node
+        end
+        # Prefer source-based extraction to preserve original formatting
+        # (link references, table padding, etc.)
+        if analysis&.respond_to?(:source_range)
+          pos = inner.source_position if inner.respond_to?(:source_position)
+          if pos
+            start_line = pos[:start_line]
+            end_line = pos[:end_line]
+            if start_line && end_line && start_line > 0
+              source_text = analysis.source_range(start_line, end_line)
+              # source_range already adds trailing newlines, don't add another
+              return source_text unless source_text.empty?
+            end
+          end
+        end
+        # Fallback to to_commonmark (for nodes without source position)
+        if inner.respond_to?(:to_commonmark)
+          inner.to_commonmark.to_s
+        elsif inner.respond_to?(:to_s)
+          inner.to_s
+        else
+          ""
+        end
+      end
+      private
+      # Check if a type represents a heading node.
+      #
+      # @param type [Symbol, String] The node type
+      # @return [Boolean] true if this is a heading type
+      def heading_type?(type)
+        type.to_s == "heading" || type == :heading || type == :header
+      end
+      # Get the heading level from a statement.
+      #
+      # @param stmt [NavigableStatement] The statement
+      # @return [Integer, nil] The heading level (1-6) or nil
+      def get_heading_level(stmt)
+        inner = stmt.respond_to?(:unwrapped_node) ? stmt.unwrapped_node : stmt.node
+        if inner.respond_to?(:header_level)
+          inner.header_level
+        elsif inner.respond_to?(:level)
+          inner.level
+        end
+      end
+    end
+  end
+end