RubyGems - ast-merge - Versions diffs - 1.0.0 → 2.0.0 - Mend

ast-merge 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +194 -1
data/README.md +235 -53
data/exe/ast-merge-recipe +366 -0
data/lib/ast/merge/ast_node.rb +224 -24
data/lib/ast/merge/comment/block.rb +6 -0
data/lib/ast/merge/comment/empty.rb +6 -0
data/lib/ast/merge/comment/line.rb +6 -0
data/lib/ast/merge/comment/parser.rb +9 -7
data/lib/ast/merge/conflict_resolver_base.rb +8 -1
data/lib/ast/merge/content_match_refiner.rb +278 -0
data/lib/ast/merge/debug_logger.rb +6 -1
data/lib/ast/merge/detector/base.rb +193 -0
data/lib/ast/merge/detector/fenced_code_block.rb +227 -0
data/lib/ast/merge/detector/mergeable.rb +369 -0
data/lib/ast/merge/detector/toml_frontmatter.rb +82 -0
data/lib/ast/merge/detector/yaml_frontmatter.rb +82 -0
data/lib/ast/merge/file_analyzable.rb +5 -3
data/lib/ast/merge/freeze_node_base.rb +1 -1
data/lib/ast/merge/match_refiner_base.rb +1 -1
data/lib/ast/merge/match_score_base.rb +1 -1
data/lib/ast/merge/merge_result_base.rb +4 -1
data/lib/ast/merge/merger_config.rb +33 -31
data/lib/ast/merge/navigable_statement.rb +630 -0
data/lib/ast/merge/partial_template_merger.rb +432 -0
data/lib/ast/merge/recipe/config.rb +198 -0
data/lib/ast/merge/recipe/preset.rb +171 -0
data/lib/ast/merge/recipe/runner.rb +254 -0
data/lib/ast/merge/recipe/script_loader.rb +181 -0
data/lib/ast/merge/recipe.rb +26 -0
data/lib/ast/merge/rspec/dependency_tags.rb +252 -0
data/lib/ast/merge/rspec/shared_examples/reproducible_merge.rb +3 -2
data/lib/ast/merge/rspec.rb +33 -2
data/lib/ast/merge/section_typing.rb +52 -50
data/lib/ast/merge/smart_merger_base.rb +86 -3
data/lib/ast/merge/text/line_node.rb +42 -9
data/lib/ast/merge/text/section_splitter.rb +12 -10
data/lib/ast/merge/text/word_node.rb +47 -14
data/lib/ast/merge/version.rb +1 -1
data/lib/ast/merge.rb +10 -6
data/sig/ast/merge.rbs +389 -2
data.tar.gz.sig +0 -0
metadata +76 -12
metadata.gz.sig +0 -0
data/lib/ast/merge/fenced_code_block_detector.rb +0 -211
data/lib/ast/merge/region.rb +0 -124
data/lib/ast/merge/region_detector_base.rb +0 -114
data/lib/ast/merge/region_mergeable.rb +0 -364
data/lib/ast/merge/toml_frontmatter_detector.rb +0 -88
data/lib/ast/merge/yaml_frontmatter_detector.rb +0 -108

data/lib/ast/merge/comment/parser.rb CHANGED Viewed

@@ -69,13 +69,15 @@ module Ast
           end
         end
-        # Class method for convenient one-shot parsing.
-        #
-        # @param lines [Array<String>] Source lines
-        # @param style [Style, Symbol, nil] Comment style
-        # @return [Array<AstNode>] Parsed nodes
-        def self.parse(lines, style: nil)
-          new(lines, style: style).parse
+        class << self
+          # Parse lines as comments.
+          #
+          # @param lines [Array<String>] Source lines
+          # @param style [Style, Symbol, nil] Comment style
+          # @return [Array<AstNode>] Parsed nodes
+          def parse(lines, style: nil)
+            new(lines, style: style).parse
+          end
         end
         private

data/lib/ast/merge/conflict_resolver_base.rb CHANGED Viewed

@@ -118,6 +118,9 @@ module Ast
       # @return [Boolean] Whether to add template-only nodes (batch strategy)
       attr_reader :add_template_only_nodes
+      # @return [Object, nil] Match refiner for fuzzy matching
+      attr_reader :match_refiner
       # Initialize the conflict resolver
       #
       # @param strategy [Symbol] Resolution strategy (:node, :batch, or :boundary)
@@ -129,7 +132,9 @@ module Ast
       # @param template_analysis [Object] Analysis of the template file
       # @param dest_analysis [Object] Analysis of the destination file
       # @param add_template_only_nodes [Boolean] Whether to add nodes only in template (batch/boundary strategy)
-      def initialize(strategy:, preference:, template_analysis:, dest_analysis:, add_template_only_nodes: false)
+      # @param match_refiner [#call, nil] Optional match refiner for fuzzy matching
+      # @param options [Hash] Additional options for forward compatibility
+      def initialize(strategy:, preference:, template_analysis:, dest_analysis:, add_template_only_nodes: false, match_refiner: nil, **options)
         unless %i[node batch boundary].include?(strategy)
           raise ArgumentError, "Invalid strategy: #{strategy}. Must be :node, :batch, or :boundary"
         end
@@ -141,6 +146,8 @@ module Ast
         @template_analysis = template_analysis
         @dest_analysis = dest_analysis
         @add_template_only_nodes = add_template_only_nodes
+        @match_refiner = match_refiner
+        # **options captured for forward compatibility - subclasses may use additional options
       end
       # Resolve conflicts using the configured strategy

data/lib/ast/merge/content_match_refiner.rb ADDED Viewed

@@ -0,0 +1,278 @@
+# frozen_string_literal: true
+module Ast
+  module Merge
+    # Match refiner for text content-based fuzzy matching.
+    #
+    # This refiner uses Levenshtein distance to pair nodes that have similar
+    # but not identical text content. It's useful for matching nodes where
+    # the content has been slightly modified (typos, rewording, etc.).
+    #
+    # Unlike signature-based matching which requires exact content hashes,
+    # this refiner allows fuzzy matching based on text similarity. This is
+    # particularly useful for:
+    # - Paragraphs with minor edits
+    # - Headings with slight rewording
+    # - Comments with updated text
+    # - Any text-based node type
+    #
+    # @example Basic usage
+    #   refiner = ContentMatchRefiner.new(threshold: 0.7)
+    #   matches = refiner.call(template_nodes, dest_nodes)
+    #
+    # @example With specific node types
+    #   # Only match paragraphs and headings
+    #   refiner = ContentMatchRefiner.new(
+    #     threshold: 0.6,
+    #     node_types: [:paragraph, :heading]
+    #   )
+    #
+    # @example With custom content extractor
+    #   refiner = ContentMatchRefiner.new(
+    #     threshold: 0.7,
+    #     content_extractor: ->(node) { node.text_content.downcase.strip }
+    #   )
+    #
+    # @example Combined with other refiners
+    #   merger = SmartMerger.new(
+    #     template,
+    #     destination,
+    #     match_refiner: [
+    #       ContentMatchRefiner.new(threshold: 0.7, node_types: [:paragraph]),
+    #       TableMatchRefiner.new(threshold: 0.5)
+    #     ]
+    #   )
+    #
+    # @see MatchRefinerBase Base class
+    class ContentMatchRefiner < MatchRefinerBase
+      # Default weights for content similarity scoring
+      DEFAULT_WEIGHTS = {
+        content: 0.7,   # Text content similarity (Levenshtein)
+        length: 0.15,   # Length similarity
+        position: 0.15, # Position similarity in document
+      }.freeze
+      # @return [Hash] Scoring weights
+      attr_reader :weights
+      # @return [Proc, nil] Custom content extraction function
+      attr_reader :content_extractor
+      # Initialize a content match refiner.
+      #
+      # @param threshold [Float] Minimum score to accept a match (default: 0.5)
+      # @param node_types [Array<Symbol>] Node types to process (empty = all)
+      # @param weights [Hash] Custom scoring weights
+      # @param content_extractor [Proc, nil] Custom function to extract text from nodes
+      #   Should accept a node and return a String
+      # @param options [Hash] Additional options for forward compatibility
+      def initialize(
+        threshold: DEFAULT_THRESHOLD,
+        node_types: [],
+        weights: {},
+        content_extractor: nil,
+        **options
+      )
+        super(threshold: threshold, node_types: node_types, **options)
+        @weights = DEFAULT_WEIGHTS.merge(weights)
+        @content_extractor = content_extractor
+      end
+      # Find matches between unmatched nodes based on content similarity.
+      #
+      # @param template_nodes [Array] Unmatched nodes from template
+      # @param dest_nodes [Array] Unmatched nodes from destination
+      # @param context [Hash] Additional context (may contain :template_analysis, :dest_analysis)
+      # @return [Array<MatchResult>] Array of content-based matches
+      def call(template_nodes, dest_nodes, context = {})
+        template_filtered = filter_nodes(template_nodes)
+        dest_filtered = filter_nodes(dest_nodes)
+        return [] if template_filtered.empty? || dest_filtered.empty?
+        # Build position information for scoring
+        total_template = template_filtered.size
+        total_dest = dest_filtered.size
+        greedy_match(template_filtered, dest_filtered) do |t_node, d_node|
+          t_idx = template_filtered.index(t_node) || 0
+          d_idx = dest_filtered.index(d_node) || 0
+          compute_content_similarity(
+            t_node,
+            d_node,
+            t_idx,
+            d_idx,
+            total_template,
+            total_dest,
+          )
+        end
+      end
+      protected
+      # Filter nodes by configured node types.
+      #
+      # @param nodes [Array] Nodes to filter
+      # @return [Array] Filtered nodes (matching node_types, or all if empty)
+      def filter_nodes(nodes)
+        return nodes if node_types.empty?
+        nodes.select { |n| handles_type?(extract_node_type(n)) }
+      end
+      # Extract the type from a node.
+      #
+      # Handles wrapped nodes (merge_type) and raw nodes (type).
+      #
+      # @param node [Object] The node
+      # @return [Symbol, nil] The node type
+      def extract_node_type(node)
+        if NodeTyping.typed_node?(node)
+          NodeTyping.merge_type_for(node)
+        elsif node.respond_to?(:merge_type) && node.merge_type
+          node.merge_type
+        elsif node.respond_to?(:type)
+          type = node.type
+          type.is_a?(Symbol) ? type : type.to_s.to_sym
+        end
+      end
+      # Extract text content from a node.
+      #
+      # Uses the custom content_extractor if provided, otherwise tries
+      # common methods for getting text content.
+      #
+      # @param node [Object] The node
+      # @return [String] The text content
+      def extract_content(node)
+        return @content_extractor.call(node) if @content_extractor
+        # Try common content extraction methods
+        if node.respond_to?(:text_content)
+          node.text_content.to_s
+        elsif node.respond_to?(:string_content)
+          node.string_content.to_s
+        elsif node.respond_to?(:content)
+          node.content.to_s
+        elsif node.respond_to?(:text)
+          node.text.to_s
+        elsif node.respond_to?(:to_s)
+          node.to_s
+        else
+          ""
+        end
+      end
+      # Compute similarity score between two nodes based on content.
+      #
+      # @param t_node [Object] Template node
+      # @param d_node [Object] Destination node
+      # @param t_idx [Integer] Template node index
+      # @param d_idx [Integer] Destination node index
+      # @param total_t [Integer] Total template nodes
+      # @param total_d [Integer] Total destination nodes
+      # @return [Float] Similarity score (0.0-1.0)
+      def compute_content_similarity(t_node, d_node, t_idx, d_idx, total_t, total_d)
+        t_content = extract_content(t_node)
+        d_content = extract_content(d_node)
+        # Calculate component scores
+        content_score = string_similarity(t_content, d_content)
+        length_score = length_similarity(t_content, d_content)
+        position_score = position_similarity(t_idx, d_idx, total_t, total_d)
+        # Weighted combination
+        weights[:content] * content_score +
+          weights[:length] * length_score +
+          weights[:position] * position_score
+      end
+      # Calculate string similarity using Levenshtein distance.
+      #
+      # @param str1 [String] First string
+      # @param str2 [String] Second string
+      # @return [Float] Similarity score (0.0-1.0)
+      def string_similarity(str1, str2)
+        return 1.0 if str1 == str2
+        return 0.0 if str1.empty? || str2.empty?
+        distance = levenshtein_distance(str1, str2)
+        max_len = [str1.length, str2.length].max
+        1.0 - (distance.to_f / max_len)
+      end
+      # Calculate length similarity between two strings.
+      #
+      # @param str1 [String] First string
+      # @param str2 [String] Second string
+      # @return [Float] Similarity score (0.0-1.0)
+      def length_similarity(str1, str2)
+        return 1.0 if str1.length == str2.length
+        return 0.0 if str1.empty? && str2.empty?
+        min_len = [str1.length, str2.length].min.to_f
+        max_len = [str1.length, str2.length].max.to_f
+        min_len / max_len
+      end
+      # Calculate position similarity in document.
+      #
+      # Nodes at similar relative positions score higher.
+      #
+      # @param idx1 [Integer] First node index
+      # @param idx2 [Integer] Second node index
+      # @param total1 [Integer] Total nodes in first collection
+      # @param total2 [Integer] Total nodes in second collection
+      # @return [Float] Similarity score (0.0-1.0)
+      def position_similarity(idx1, idx2, total1, total2)
+        # Normalize positions to 0.0-1.0 range
+        pos1 = (total1 > 1) ? idx1.to_f / (total1 - 1) : 0.5
+        pos2 = (total2 > 1) ? idx2.to_f / (total2 - 1) : 0.5
+        1.0 - (pos1 - pos2).abs
+      end
+      # Calculate Levenshtein distance between two strings.
+      #
+      # Uses Wagner-Fischer algorithm with space optimization.
+      #
+      # @param str1 [String] First string
+      # @param str2 [String] Second string
+      # @return [Integer] Edit distance
+      def levenshtein_distance(str1, str2)
+        return str2.length if str1.empty?
+        return str1.length if str2.empty?
+        # Use shorter string as columns for space efficiency
+        if str1.length > str2.length
+          str1, str2 = str2, str1
+        end
+        m = str1.length
+        n = str2.length
+        # Only need two rows at a time
+        prev_row = (0..m).to_a
+        curr_row = Array.new(m + 1)
+        (1..n).each do |j|
+          curr_row[0] = j
+          (1..m).each do |i|
+            cost = (str1[i - 1] == str2[j - 1]) ? 0 : 1
+            curr_row[i] = [
+              curr_row[i - 1] + 1,      # insertion
+              prev_row[i] + 1,          # deletion
+              prev_row[i - 1] + cost,   # substitution
+            ].min
+          end
+          prev_row, curr_row = curr_row, prev_row
+        end
+        prev_row[m]
+      end
+    end
+  end
+end

data/lib/ast/merge/debug_logger.rb CHANGED Viewed

@@ -71,8 +71,9 @@ module Ast
     # @note Shared examples require +silent_stream+ and +rspec-stubbed_env+ gems.
     module DebugLogger
       # Benchmark is optional - gracefully degrade if not available
+      # Use autoload to defer loading until actually needed
       BENCHMARK_AVAILABLE = begin
-        require "benchmark"
+        autoload(:Benchmark, "benchmark")
         true
       rescue LoadError
         # :nocov:
@@ -83,10 +84,14 @@ module Ast
       class << self
         # @return [String] Environment variable name to check for debug mode
+        # rubocop:disable ThreadSafety/ClassAndModuleAttributes - Configuration attribute, set once at load time
         attr_accessor :env_var_name
+        # rubocop:enable ThreadSafety/ClassAndModuleAttributes
         # @return [String] Prefix for log messages
+        # rubocop:disable ThreadSafety/ClassAndModuleAttributes - Configuration attribute, set once at load time
         attr_accessor :log_prefix
+        # rubocop:enable ThreadSafety/ClassAndModuleAttributes
         # Hook called when a module extends Ast::Merge::DebugLogger.
         # Sets up attr_accessor for env_var_name and log_prefix on the extending module,

data/lib/ast/merge/detector/base.rb ADDED Viewed

@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+module Ast
+  module Merge
+    # Detector namespace for region detection and merging functionality.
+    #
+    # Regions are portions of a document that can be handled by a specialized
+    # merger. For example, YAML frontmatter in a Markdown file, or Ruby code
+    # blocks that should be merged with Prism.
+    #
+    # @example Detecting regions
+    #   detector = Ast::Merge::Detector::FencedCodeBlock.ruby
+    #   regions = detector.detect_all(markdown_content)
+    #   regions.each do |region|
+    #     puts "Found #{region.type} at lines #{region.start_line}-#{region.end_line}"
+    #   end
+    #
+    # @see Detector::Region Data struct for detected regions
+    # @see Detector::Base Base class for detectors
+    # @see Detector::Mergeable Mixin for region-aware merging
+    #
+    module Detector
+      # Represents a detected region within a document.
+      #
+      # Regions are portions of a document that can be handled by a specialized
+      # merger. For example, YAML frontmatter in a Markdown file, or a Ruby code
+      # block that should be merged using a Ruby-aware merger.
+      #
+      # @example Creating a region for YAML frontmatter
+      #   Region.new(
+      #     type: :yaml_frontmatter,
+      #     content: "title: My Doc\nversion: 1.0\n",
+      #     start_line: 1,
+      #     end_line: 4,
+      #     delimiters: ["---", "---"],
+      #     metadata: { format: :yaml }
+      #   )
+      #
+      # @api public
+      Region = Struct.new(
+        # @return [Symbol] The type of region (e.g., :yaml_frontmatter, :ruby_code_block)
+        :type,
+        # @return [String] The raw string content of this region (inner content, without delimiters)
+        :content,
+        # @return [Integer] 1-indexed start line in the original document
+        :start_line,
+        # @return [Integer] 1-indexed end line in the original document
+        :end_line,
+        # @return [Array<String>, nil] Delimiter strings to reconstruct the region
+        :delimiters,
+        # @return [Hash, nil] Optional metadata for detector-specific information
+        :metadata,
+        keyword_init: true,
+      ) do
+        # Returns the line range covered by this region.
+        # @return [Range]
+        def line_range
+          start_line..end_line
+        end
+        # Returns the number of lines this region spans.
+        # @return [Integer]
+        def line_count
+          end_line - start_line + 1
+        end
+        # Reconstructs the full region text including delimiters.
+        # @return [String]
+        def full_text
+          return content if delimiters.nil? || delimiters.empty?
+          opening = delimiters[0] || ""
+          closing = delimiters[1] || ""
+          "#{opening}\n#{content}#{closing}"
+        end
+        # Checks if this region contains the given line number.
+        # @param line [Integer] The line number to check (1-indexed)
+        # @return [Boolean]
+        def contains_line?(line)
+          line_range.cover?(line)
+        end
+        # Checks if this region overlaps with another region.
+        # @param other [Region] Another region
+        # @return [Boolean]
+        def overlaps?(other)
+          line_range.cover?(other.start_line) ||
+            line_range.cover?(other.end_line) ||
+            other.line_range.cover?(start_line)
+        end
+        # @return [String]
+        def to_s
+          "Region<#{type}:#{start_line}-#{end_line}>"
+        end
+        # @return [String]
+        def inspect
+          truncated = if content && content.length > 30
+            "#{content[0, 30]}..."
+          else
+            content.inspect
+          end
+          "#{self} #{truncated}"
+        end
+      end
+      # Base class for region detection.
+      #
+      # Region detectors identify portions of a document that should be handled
+      # by a specialized merger.
+      #
+      # Subclasses must implement:
+      # - {#region_type} - Returns the type symbol for detected regions
+      # - {#detect_all} - Finds all regions of this type in a document
+      #
+      # @example Implementing a custom detector
+      #   class MyBlockDetector < Ast::Merge::Detector::Base
+      #     def region_type
+      #       :my_block
+      #     end
+      #
+      #     def detect_all(source)
+      #       # Return array of Region structs
+      #       []
+      #     end
+      #   end
+      #
+      # @abstract Subclass and implement {#region_type} and {#detect_all}
+      # @api public
+      #
+      class Base
+        # Returns the type symbol for regions detected by this detector.
+        # @return [Symbol]
+        # @abstract
+        def region_type
+          raise NotImplementedError, "#{self.class}#region_type must be implemented"
+        end
+        # Detects all regions of this type in the given source.
+        # @param _source [String] The full document content to scan
+        # @return [Array<Region>] All detected regions, sorted by start_line
+        # @abstract
+        def detect_all(_source)
+          raise NotImplementedError, "#{self.class}#detect_all must be implemented"
+        end
+        # Whether to strip delimiters from content before passing to merger.
+        # @return [Boolean]
+        def strip_delimiters?
+          true
+        end
+        # A human-readable name for this detector.
+        # @return [String]
+        def name
+          self.class.name || "AnonymousDetector"
+        end
+        # @return [String]
+        def inspect
+          "#<#{name} region_type=#{region_type}>"
+        end
+        protected
+        # Helper to build a Region struct.
+        # @return [Region]
+        def build_region(type:, content:, start_line:, end_line:, delimiters: nil, metadata: nil)
+          Region.new(
+            type: type,
+            content: content,
+            start_line: start_line,
+            end_line: end_line,
+            delimiters: delimiters,
+            metadata: metadata || {},
+          )
+        end
+      end
+      autoload :FencedCodeBlock, "ast/merge/detector/fenced_code_block"
+      autoload :YamlFrontmatter, "ast/merge/detector/yaml_frontmatter"
+      autoload :TomlFrontmatter, "ast/merge/detector/toml_frontmatter"
+      autoload :Mergeable, "ast/merge/detector/mergeable"
+    end
+  end
+end