RubyGems - tree_haver - Versions diffs - 1.0.0 → 2.0.0 - Mend

tree_haver 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +47 -3
data/README.md +130 -76
data/lib/tree_haver/backends/citrus.rb +302 -0
data/lib/tree_haver/backends/ffi.rb +75 -17
data/lib/tree_haver/backends/java.rb +11 -7
data/lib/tree_haver/backends/mri.rb +10 -20
data/lib/tree_haver/backends/rust.rb +8 -20
data/lib/tree_haver/grammar_finder.rb +1 -1
data/lib/tree_haver/node.rb +376 -0
data/lib/tree_haver/path_validator.rb +18 -3
data/lib/tree_haver/tree.rb +205 -0
data/lib/tree_haver/version.rb +2 -2
data/lib/tree_haver.rb +44 -229
data/sig/tree_haver/backends.rbs +68 -1
data/sig/tree_haver/path_validator.rbs +1 -0
data/sig/tree_haver.rbs +95 -9
data.tar.gz.sig +0 -0
metadata +11 -8
metadata.gz.sig +0 -0

data/lib/tree_haver/node.rb ADDED Viewed

@@ -0,0 +1,376 @@
+# frozen_string_literal: true
+module TreeHaver
+  # Point class that works as both a Hash and an object with row/column accessors
+  #
+  # This provides compatibility with code expecting either:
+  # - Hash access: point[:row], point[:column]
+  # - Method access: point.row, point.column
+  class Point
+    attr_reader :row, :column
+    def initialize(row, column)
+      @row = row
+      @column = column
+    end
+    # Hash-like access for compatibility
+    def [](key)
+      case key
+      when :row, "row" then @row
+      when :column, "column" then @column
+      end
+    end
+    def to_h
+      {row: @row, column: @column}
+    end
+    def to_s
+      "(#{@row}, #{@column})"
+    end
+    def inspect
+      "#<TreeHaver::Point row=#{@row} column=#{@column}>"
+    end
+  end
+  # Unified Node wrapper providing a consistent API across all backends
+  #
+  # This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
+  # and provides a unified interface so code works identically regardless of which backend
+  # is being used.
+  #
+  # The wrapper automatically maps backend differences:
+  # - TreeStump uses `node.kind` → mapped to `node.type`
+  # - TreeStump uses `node.is_named?` → mapped to `node.named?`
+  # - All backends return consistent Point objects from position methods
+  #
+  # @example Basic node traversal
+  #   tree = parser.parse(source)
+  #   root = tree.root_node
+  #
+  #   puts root.type        # => "document"
+  #   puts root.start_byte  # => 0
+  #   puts root.text        # => full source text
+  #
+  #   root.children.each do |child|
+  #     puts "#{child.type} at line #{child.start_point.row + 1}"
+  #   end
+  #
+  # @example Position information
+  #   node = tree.root_node.children.first
+  #
+  #   # Point objects work as both objects and hashes
+  #   point = node.start_point
+  #   point.row              # => 0 (method access)
+  #   point[:row]            # => 0 (hash access)
+  #   point.column           # => 0
+  #
+  #   # Byte offsets
+  #   node.start_byte        # => 0
+  #   node.end_byte          # => 23
+  #
+  # @example Error detection
+  #   if node.has_error?
+  #     puts "Parse error in subtree"
+  #   end
+  #
+  #   if node.missing?
+  #     puts "This node was inserted by error recovery"
+  #   end
+  #
+  # @example Accessing backend-specific features
+  #   # Via passthrough (method_missing delegates to inner_node)
+  #   node.grammar_name  # TreeStump-specific, automatically delegated
+  #
+  #   # Or explicitly via inner_node
+  #   node.inner_node.grammar_name  # Same result
+  #
+  #   # Check if backend supports a feature
+  #   if node.inner_node.respond_to?(:some_feature)
+  #     node.some_feature
+  #   end
+  #
+  # @note This is the key to tree_haver's "write once, run anywhere" promise
+  class Node
+    # The wrapped backend-specific node object
+    #
+    # This provides direct access to the underlying backend node for advanced usage
+    # when you need backend-specific features not exposed by the unified API.
+    #
+    # @return [Object] The underlying node (TreeSitter::Node, TreeStump::Node, etc.)
+    # @example Accessing backend-specific methods
+    #   # TreeStump-specific: grammar information
+    #   if node.inner_node.respond_to?(:grammar_name)
+    #     puts node.inner_node.grammar_name  # => "toml"
+    #     puts node.inner_node.grammar_id    # => Integer
+    #   end
+    #
+    #   # Check backend type
+    #   case node.inner_node.class.name
+    #   when /TreeStump/
+    #     # TreeStump-specific code
+    #   when /TreeSitter/
+    #     # ruby_tree_sitter-specific code
+    #   end
+    attr_reader :inner_node
+    # The source text for text extraction
+    # @return [String]
+    attr_reader :source
+    # @param node [Object] Backend-specific node object
+    # @param source [String] Source text for text extraction
+    def initialize(node, source: nil)
+      @inner_node = node
+      @source = source
+    end
+    # Get the node's type/kind as a string
+    #
+    # Maps backend-specific methods to a unified API:
+    # - ruby_tree_sitter: node.type
+    # - tree_stump: node.kind
+    # - FFI: node.type
+    #
+    # @return [String] The node type
+    def type
+      if @inner_node.respond_to?(:type)
+        @inner_node.type.to_s
+      elsif @inner_node.respond_to?(:kind)
+        @inner_node.kind.to_s
+      else
+        raise TreeHaver::Error, "Backend node does not support type/kind"
+      end
+    end
+    # Get the node's start byte offset
+    # @return [Integer]
+    def start_byte
+      @inner_node.start_byte
+    end
+    # Get the node's end byte offset
+    # @return [Integer]
+    def end_byte
+      @inner_node.end_byte
+    end
+    # Get the node's start position (row, column)
+    #
+    # @return [Point] with row and column accessors (also works as Hash)
+    def start_point
+      if @inner_node.respond_to?(:start_point)
+        point = @inner_node.start_point
+        Point.new(point.row, point.column)
+      elsif @inner_node.respond_to?(:start_position)
+        point = @inner_node.start_position
+        Point.new(point.row, point.column)
+      else
+        raise TreeHaver::Error, "Backend node does not support start_point/start_position"
+      end
+    end
+    # Get the node's end position (row, column)
+    #
+    # @return [Point] with row and column accessors (also works as Hash)
+    def end_point
+      if @inner_node.respond_to?(:end_point)
+        point = @inner_node.end_point
+        Point.new(point.row, point.column)
+      elsif @inner_node.respond_to?(:end_position)
+        point = @inner_node.end_position
+        Point.new(point.row, point.column)
+      else
+        raise TreeHaver::Error, "Backend node does not support end_point/end_position"
+      end
+    end
+    # Get the node's text content
+    #
+    # @return [String]
+    def text
+      if @inner_node.respond_to?(:text)
+        @inner_node.text
+      elsif @source
+        # Fallback: extract from source using byte positions
+        @source[start_byte...end_byte] || ""
+      else
+        raise TreeHaver::Error, "Cannot extract text: node has no text method and no source provided"
+      end
+    end
+    # Check if the node has an error
+    # @return [Boolean]
+    def has_error?
+      @inner_node.has_error?
+    end
+    # Check if the node is missing
+    # @return [Boolean]
+    def missing?
+      return false unless @inner_node.respond_to?(:missing?)
+      @inner_node.missing?
+    end
+    # Check if the node is named
+    # @return [Boolean]
+    def named?
+      if @inner_node.respond_to?(:named?)
+        @inner_node.named?
+      elsif @inner_node.respond_to?(:is_named?)
+        @inner_node.is_named?
+      else
+        true # Default to true if not supported
+      end
+    end
+    # Get the number of children
+    # @return [Integer]
+    def child_count
+      @inner_node.child_count
+    end
+    # Get a child by index
+    #
+    # @param index [Integer] Child index
+    # @return [Node, nil] Wrapped child node
+    def child(index)
+      child_node = @inner_node.child(index)
+      return if child_node.nil?
+      Node.new(child_node, source: @source)
+    end
+    # Get all children as wrapped nodes
+    #
+    # @return [Array<Node>] Array of wrapped child nodes
+    def children
+      (0...child_count).map { |i| child(i) }.compact
+    end
+    # Get named children only
+    #
+    # @return [Array<Node>] Array of named child nodes
+    def named_children
+      children.select(&:named?)
+    end
+    # Iterate over children
+    #
+    # @yield [Node] Each child node
+    # @return [Enumerator, nil]
+    def each(&block)
+      return to_enum(__method__) unless block_given?
+      children.each(&block)
+    end
+    # Get a child by field name
+    #
+    # @param name [String, Symbol] Field name
+    # @return [Node, nil] The child node for that field
+    def child_by_field_name(name)
+      if @inner_node.respond_to?(:child_by_field_name)
+        child_node = @inner_node.child_by_field_name(name.to_s)
+        return if child_node.nil?
+        Node.new(child_node, source: @source)
+      else
+        # Not all backends support field names
+        nil
+      end
+    end
+    # Alias for child_by_field_name
+    alias_method :field, :child_by_field_name
+    # Get the parent node
+    #
+    # @return [Node, nil] The parent node
+    def parent
+      return unless @inner_node.respond_to?(:parent)
+      parent_node = @inner_node.parent
+      return if parent_node.nil?
+      Node.new(parent_node, source: @source)
+    end
+    # Get next sibling
+    #
+    # @return [Node, nil]
+    def next_sibling
+      return unless @inner_node.respond_to?(:next_sibling)
+      sibling = @inner_node.next_sibling
+      return if sibling.nil?
+      Node.new(sibling, source: @source)
+    end
+    # Get previous sibling
+    #
+    # @return [Node, nil]
+    def prev_sibling
+      return unless @inner_node.respond_to?(:prev_sibling)
+      sibling = @inner_node.prev_sibling
+      return if sibling.nil?
+      Node.new(sibling, source: @source)
+    end
+    # String representation for debugging
+    # @return [String]
+    def inspect
+      "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
+    end
+    # String representation
+    # @return [String]
+    def to_s
+      text
+    end
+    # Check if node responds to a method (includes delegation to inner_node)
+    #
+    # @param method_name [Symbol] method to check
+    # @param include_private [Boolean] include private methods
+    # @return [Boolean]
+    def respond_to_missing?(method_name, include_private = false)
+      @inner_node.respond_to?(method_name, include_private) || super
+    end
+    # Delegate unknown methods to the underlying backend-specific node
+    #
+    # This provides passthrough access for advanced usage when you need
+    # backend-specific features not exposed by TreeHaver's unified API.
+    #
+    # The delegation is automatic and transparent - you can call backend-specific
+    # methods directly on the TreeHaver::Node and they'll be forwarded to the
+    # underlying node implementation.
+    #
+    # @param method_name [Symbol] method to call
+    # @param args [Array] arguments to pass
+    # @param block [Proc] block to pass
+    # @return [Object] result from the underlying node
+    #
+    # @example Using TreeStump-specific methods
+    #   # These methods don't exist in the unified API but are in TreeStump
+    #   node.grammar_name      # => "toml" (delegated to inner_node)
+    #   node.grammar_id        # => Integer (delegated to inner_node)
+    #   node.kind_id           # => Integer (delegated to inner_node)
+    #
+    # @example Safe usage with respond_to? check
+    #   if node.respond_to?(:grammar_name)
+    #     puts "Using #{node.grammar_name} grammar"
+    #   end
+    #
+    # @example Equivalent explicit access
+    #   node.grammar_name              # Via passthrough (method_missing)
+    #   node.inner_node.grammar_name   # Explicit access (same result)
+    #
+    # @note This maintains backward compatibility with code written for
+    #   specific backends while providing the benefits of the unified API
+    def method_missing(method_name, *args, **kwargs, &block)
+      if @inner_node.respond_to?(method_name)
+        @inner_node.public_send(method_name, *args, **kwargs, &block)
+      else
+        super
+      end
+    end
+  end
+end

data/lib/tree_haver/path_validator.rb CHANGED Viewed

@@ -186,7 +186,8 @@ module TreeHaver
       return false if path.include?("/./") || path.end_with?("/.")
       # Validate extension
-      return false unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
+      # Allow versioned .so files like .so.0, .so.14, etc. (common on Linux)
+      return false unless has_valid_extension?(path)
       # Validate filename portion
       filename = File.basename(path)
@@ -312,8 +313,8 @@ module TreeHaver
       errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
       errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
-      unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
-        errors << "Path does not have allowed extension (#{ALLOWED_EXTENSIONS.join(", ")})"
+      unless has_valid_extension?(path)
+        errors << "Path does not have allowed extension (.so, .so.X, .dylib, .dll)"
       end
       filename = File.basename(path)
@@ -329,5 +330,19 @@ module TreeHaver
       # Match Windows absolute paths like C:\path or D:/path
       path.match?(/\A[A-Za-z]:[\\\/]/)
     end
+    # @api private
+    # Check if path has a valid library extension
+    # Allows: .so, .dylib, .dll, and versioned .so files like .so.0, .so.14
+    def has_valid_extension?(path)
+      # Check for exact matches first (.so, .dylib, .dll)
+      return true if ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
+      # Check for versioned .so files (Linux convention)
+      # e.g., libtree-sitter.so.0, libtree-sitter.so.14
+      return true if path.match?(/\.so\.\d+\z/)
+      false
+    end
   end
 end

data/lib/tree_haver/tree.rb ADDED Viewed

@@ -0,0 +1,205 @@
+# frozen_string_literal: true
+module TreeHaver
+  # Unified Tree wrapper providing a consistent API across all backends
+  #
+  # This class wraps backend-specific tree objects and provides a unified interface.
+  # It stores the source text to enable text extraction from nodes.
+  #
+  # @example Basic usage
+  #   parser = TreeHaver::Parser.new
+  #   parser.language = TreeHaver::Language.toml
+  #   tree = parser.parse(source)
+  #   root = tree.root_node
+  #   puts root.type
+  #
+  # @example Incremental parsing (if backend supports it)
+  #   tree = parser.parse("x = 1")
+  #   # Edit the source: "x = 1" → "x = 42"
+  #   tree.edit(
+  #     start_byte: 4,
+  #     old_end_byte: 5,
+  #     new_end_byte: 6,
+  #     start_point: { row: 0, column: 4 },
+  #     old_end_point: { row: 0, column: 5 },
+  #     new_end_point: { row: 0, column: 6 }
+  #   )
+  #   new_tree = parser.parse_string(tree, "x = 42")
+  #
+  # @example Accessing backend-specific features
+  #   # Via passthrough (method_missing delegates to inner_tree)
+  #   tree.some_backend_specific_method  # Automatically delegated
+  #
+  #   # Or explicitly via inner_tree
+  #   tree.inner_tree.some_backend_specific_method
+  class Tree
+    # The wrapped backend-specific tree object
+    #
+    # This provides direct access to the underlying backend tree for advanced usage
+    # when you need backend-specific features not exposed by the unified API.
+    #
+    # @return [Object] The underlying tree (TreeSitter::Tree, TreeStump::Tree, etc.)
+    # @example Accessing backend-specific methods
+    #   # Print DOT graph (TreeStump-specific)
+    #   if tree.inner_tree.respond_to?(:print_dot_graph)
+    #     File.open("tree.dot", "w") do |f|
+    #       tree.inner_tree.print_dot_graph(f)
+    #     end
+    #   end
+    attr_reader :inner_tree
+    # The source text
+    #
+    # Stored to enable text extraction from nodes via byte offsets.
+    #
+    # @return [String] The original source code
+    attr_reader :source
+    # @param tree [Object] Backend-specific tree object
+    # @param source [String] Source text for node text extraction
+    def initialize(tree, source: nil)
+      @inner_tree = tree
+      @source = source
+    end
+    # Get the root node of the tree
+    #
+    # @return [Node] Wrapped root node
+    def root_node
+      root = @inner_tree.root_node
+      return if root.nil?
+      Node.new(root, source: @source)
+    end
+    # Mark the tree as edited for incremental re-parsing
+    #
+    # Call this method after the source code has been modified but before
+    # re-parsing. This tells tree-sitter which parts of the tree are
+    # invalidated so it can efficiently re-parse only the affected regions.
+    #
+    # Not all backends support incremental parsing. Use {#supports_editing?}
+    # to check before calling this method.
+    #
+    # @param start_byte [Integer] byte offset where the edit starts
+    # @param old_end_byte [Integer] byte offset where the old text ended
+    # @param new_end_byte [Integer] byte offset where the new text ends
+    # @param start_point [Hash] starting position as `{ row:, column: }`
+    # @param old_end_point [Hash] old ending position as `{ row:, column: }`
+    # @param new_end_point [Hash] new ending position as `{ row:, column: }`
+    # @return [void]
+    # @raise [TreeHaver::NotAvailable] if the backend doesn't support incremental parsing
+    # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing
+    #
+    # @example Incremental parsing workflow
+    #   # Original source: "x = 1"
+    #   tree = parser.parse("x = 1")
+    #
+    #   # Edit the source: replace "1" with "42" at byte offset 4
+    #   tree.edit(
+    #     start_byte: 4,
+    #     old_end_byte: 5,     # "1" ends at byte 5
+    #     new_end_byte: 6,     # "42" ends at byte 6
+    #     start_point: { row: 0, column: 4 },
+    #     old_end_point: { row: 0, column: 5 },
+    #     new_end_point: { row: 0, column: 6 }
+    #   )
+    #
+    #   # Re-parse with the edited tree for incremental parsing
+    #   new_tree = parser.parse_string(tree, "x = 42")
+    def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
+      @inner_tree.edit(
+        start_byte: start_byte,
+        old_end_byte: old_end_byte,
+        new_end_byte: new_end_byte,
+        start_point: start_point,
+        old_end_point: old_end_point,
+        new_end_point: new_end_point,
+      )
+    rescue NoMethodError => e
+      # Re-raise as NotAvailable if it's about the edit method
+      raise unless e.name == :edit || e.message.include?("edit")
+      raise TreeHaver::NotAvailable,
+        "Incremental parsing not supported by current backend. " \
+          "Use MRI (ruby_tree_sitter), Rust (tree_stump), or Java (java-tree-sitter) backend."
+    end
+    # Check if the current backend supports incremental parsing
+    #
+    # Incremental parsing allows tree-sitter to reuse unchanged nodes when
+    # re-parsing edited source code, improving performance for large files
+    # with small edits.
+    #
+    # @return [Boolean] true if {#edit} can be called on this tree
+    # @example
+    #   if tree.supports_editing?
+    #     tree.edit(...)
+    #     new_tree = parser.parse_string(tree, edited_source)
+    #   else
+    #     # Fall back to full re-parse
+    #     new_tree = parser.parse(edited_source)
+    #   end
+    def supports_editing?
+      # Try to get the edit method to verify it exists
+      # This is more reliable than respond_to? with Delegator wrappers
+      @inner_tree.method(:edit)
+      true
+    rescue NameError
+      # NameError is the parent class of NoMethodError, so this catches both
+      false
+    end
+    # String representation
+    # @return [String]
+    def inspect
+      "#<#{self.class} source_length=#{@source&.bytesize || "unknown"}>"
+    end
+    # Check if tree responds to a method (includes delegation to inner_tree)
+    #
+    # @param method_name [Symbol] method to check
+    # @param include_private [Boolean] include private methods
+    # @return [Boolean]
+    def respond_to_missing?(method_name, include_private = false)
+      @inner_tree.respond_to?(method_name, include_private) || super
+    end
+    # Delegate unknown methods to the underlying backend-specific tree
+    #
+    # This provides passthrough access for advanced usage when you need
+    # backend-specific features not exposed by TreeHaver's unified API.
+    #
+    # The delegation is automatic and transparent - you can call backend-specific
+    # methods directly on the TreeHaver::Tree and they'll be forwarded to the
+    # underlying tree implementation.
+    #
+    # @param method_name [Symbol] method to call
+    # @param args [Array] arguments to pass
+    # @param block [Proc] block to pass
+    # @return [Object] result from the underlying tree
+    #
+    # @example Using TreeStump-specific methods
+    #   # print_dot_graph is TreeStump-specific
+    #   File.open("tree.dot", "w") do |f|
+    #     tree.print_dot_graph(f)  # Delegated to inner_tree
+    #   end
+    #
+    # @example Safe usage with respond_to? check
+    #   if tree.respond_to?(:print_dot_graph)
+    #     File.open("tree.dot", "w") { |f| tree.print_dot_graph(f) }
+    #   end
+    #
+    # @example Equivalent explicit access
+    #   tree.print_dot_graph(file)              # Via passthrough (method_missing)
+    #   tree.inner_tree.print_dot_graph(file)   # Explicit access (same result)
+    #
+    # @note This maintains backward compatibility with code written for
+    #   specific backends while providing the benefits of the unified API
+    def method_missing(method_name, *args, **kwargs, &block)
+      if @inner_tree.respond_to?(method_name)
+        @inner_tree.public_send(method_name, *args, **kwargs, &block)
+      else
+        super
+      end
+    end
+  end
+end

data/lib/tree_haver/version.rb CHANGED Viewed

@@ -9,8 +9,8 @@ module TreeHaver
   module Version
     # Current version of the tree_haver gem
     #
-    # @return [String] the version string (e.g., "1.0.0")
-    VERSION = "1.0.0"
+    # @return [String] the version string (e.g., "2.0.0")
+    VERSION = "2.0.0"
   end
   # Traditional location for VERSION constant