RubyGems - tree_haver - Versions diffs - 1.0.0 → 3.0.0 - Mend

tree_haver 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +236 -3
data/CONTRIBUTING.md +100 -0
data/README.md +470 -85
data/lib/tree_haver/backends/citrus.rb +423 -0
data/lib/tree_haver/backends/ffi.rb +405 -150
data/lib/tree_haver/backends/java.rb +63 -10
data/lib/tree_haver/backends/mri.rb +154 -27
data/lib/tree_haver/backends/rust.rb +58 -27
data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
data/lib/tree_haver/grammar_finder.rb +42 -7
data/lib/tree_haver/language_registry.rb +62 -71
data/lib/tree_haver/node.rb +526 -0
data/lib/tree_haver/path_validator.rb +47 -27
data/lib/tree_haver/tree.rb +259 -0
data/lib/tree_haver/version.rb +2 -2
data/lib/tree_haver.rb +741 -285
data/sig/tree_haver/backends.rbs +68 -1
data/sig/tree_haver/path_validator.rbs +1 -0
data/sig/tree_haver.rbs +95 -9
data.tar.gz.sig +0 -0
metadata +12 -8
metadata.gz.sig +0 -0

data/lib/tree_haver/node.rb ADDED Viewed

@@ -0,0 +1,526 @@
+# frozen_string_literal: true
+module TreeHaver
+  # Point class that works as both a Hash and an object with row/column accessors
+  #
+  # This provides compatibility with code expecting either:
+  # - Hash access: point[:row], point[:column]
+  # - Method access: point.row, point.column
+  class Point
+    attr_reader :row, :column
+    def initialize(row, column)
+      @row = row
+      @column = column
+    end
+    # Hash-like access for compatibility
+    def [](key)
+      case key
+      when :row, "row" then @row
+      when :column, "column" then @column
+      end
+    end
+    def to_h
+      {row: @row, column: @column}
+    end
+    def to_s
+      "(#{@row}, #{@column})"
+    end
+    def inspect
+      "#<TreeHaver::Point row=#{@row} column=#{@column}>"
+    end
+  end
+  # Unified Node wrapper providing a consistent API across all backends
+  #
+  # This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
+  # and provides a unified interface so code works identically regardless of which backend
+  # is being used.
+  #
+  # The wrapper automatically maps backend differences:
+  # - TreeStump uses `node.kind` → mapped to `node.type`
+  # - TreeStump uses `node.is_named?` → mapped to `node.named?`
+  # - All backends return consistent Point objects from position methods
+  #
+  # @example Basic node traversal
+  #   tree = parser.parse(source)
+  #   root = tree.root_node
+  #
+  #   puts root.type        # => "document"
+  #   puts root.start_byte  # => 0
+  #   puts root.text        # => full source text
+  #
+  #   root.children.each do |child|
+  #     puts "#{child.type} at line #{child.start_point.row + 1}"
+  #   end
+  #
+  # @example Position information
+  #   node = tree.root_node.children.first
+  #
+  #   # Point objects work as both objects and hashes
+  #   point = node.start_point
+  #   point.row              # => 0 (method access)
+  #   point[:row]            # => 0 (hash access)
+  #   point.column           # => 0
+  #
+  #   # Byte offsets
+  #   node.start_byte        # => 0
+  #   node.end_byte          # => 23
+  #
+  # @example Error detection
+  #   if node.has_error?
+  #     puts "Parse error in subtree"
+  #   end
+  #
+  #   if node.missing?
+  #     puts "This node was inserted by error recovery"
+  #   end
+  #
+  # @example Accessing backend-specific features
+  #   # Via passthrough (method_missing delegates to inner_node)
+  #   node.grammar_name  # TreeStump-specific, automatically delegated
+  #
+  #   # Or explicitly via inner_node
+  #   node.inner_node.grammar_name  # Same result
+  #
+  #   # Check if backend supports a feature
+  #   if node.inner_node.respond_to?(:some_feature)
+  #     node.some_feature
+  #   end
+  #
+  # @note This is the key to tree_haver's "write once, run anywhere" promise
+  class Node
+    include Comparable
+    # The wrapped backend-specific node object
+    #
+    # This provides direct access to the underlying backend node for advanced usage
+    # when you need backend-specific features not exposed by the unified API.
+    #
+    # @return [Object] The underlying node (TreeSitter::Node, TreeStump::Node, etc.)
+    # @example Accessing backend-specific methods
+    #   # TreeStump-specific: grammar information
+    #   if node.inner_node.respond_to?(:grammar_name)
+    #     puts node.inner_node.grammar_name  # => "toml"
+    #     puts node.inner_node.grammar_id    # => Integer
+    #   end
+    #
+    #   # Check backend type
+    #   case node.inner_node.class.name
+    #   when /TreeStump/
+    #     # TreeStump-specific code
+    #   when /TreeSitter/
+    #     # ruby_tree_sitter-specific code
+    #   end
+    attr_reader :inner_node
+    # The source text for text extraction
+    # @return [String]
+    attr_reader :source
+    # @param node [Object] Backend-specific node object
+    # @param source [String] Source text for text extraction
+    def initialize(node, source: nil)
+      @inner_node = node
+      @source = source
+    end
+    # Get the node's type/kind as a string
+    #
+    # Maps backend-specific methods to a unified API:
+    # - ruby_tree_sitter: node.type
+    # - tree_stump: node.kind
+    # - FFI: node.type
+    #
+    # @return [String] The node type
+    def type
+      if @inner_node.respond_to?(:type)
+        @inner_node.type.to_s
+      elsif @inner_node.respond_to?(:kind)
+        @inner_node.kind.to_s
+      else
+        raise TreeHaver::Error, "Backend node does not support type/kind"
+      end
+    end
+    # Get the node's start byte offset
+    # @return [Integer]
+    def start_byte
+      @inner_node.start_byte
+    end
+    # Get the node's end byte offset
+    # @return [Integer]
+    def end_byte
+      @inner_node.end_byte
+    end
+    # Get the node's start position (row, column)
+    #
+    # @return [Point] with row and column accessors (also works as Hash)
+    def start_point
+      if @inner_node.respond_to?(:start_point)
+        point = @inner_node.start_point
+        Point.new(point.row, point.column)
+      elsif @inner_node.respond_to?(:start_position)
+        point = @inner_node.start_position
+        Point.new(point.row, point.column)
+      else
+        raise TreeHaver::Error, "Backend node does not support start_point/start_position"
+      end
+    end
+    # Get the node's end position (row, column)
+    #
+    # @return [Point] with row and column accessors (also works as Hash)
+    def end_point
+      if @inner_node.respond_to?(:end_point)
+        point = @inner_node.end_point
+        Point.new(point.row, point.column)
+      elsif @inner_node.respond_to?(:end_position)
+        point = @inner_node.end_position
+        Point.new(point.row, point.column)
+      else
+        raise TreeHaver::Error, "Backend node does not support end_point/end_position"
+      end
+    end
+    # Get the node's text content
+    #
+    # @return [String]
+    def text
+      if @inner_node.respond_to?(:text)
+        @inner_node.text
+      elsif @source
+        # Fallback: extract from source using byte positions
+        @source[start_byte...end_byte] || ""
+      else
+        raise TreeHaver::Error, "Cannot extract text: node has no text method and no source provided"
+      end
+    end
+    # Check if the node has an error
+    # @return [Boolean]
+    def has_error?
+      @inner_node.has_error?
+    end
+    # Check if the node is missing
+    # @return [Boolean]
+    def missing?
+      return false unless @inner_node.respond_to?(:missing?)
+      @inner_node.missing?
+    end
+    # Check if the node is named
+    # @return [Boolean]
+    def named?
+      if @inner_node.respond_to?(:named?)
+        @inner_node.named?
+      elsif @inner_node.respond_to?(:is_named?)
+        @inner_node.is_named?
+      else
+        true # Default to true if not supported
+      end
+    end
+    # Check if the node is structural (non-terminal)
+    #
+    # In tree-sitter, this is equivalent to being a "named" node.
+    # Named nodes represent actual syntactic constructs (e.g., table, keyvalue, string)
+    # while anonymous nodes are syntax/punctuation (e.g., [, =, whitespace).
+    #
+    # For Citrus backends, this checks if the node is a non-terminal rule.
+    #
+    # @return [Boolean] true if this is a structural (non-terminal) node
+    def structural?
+      # Delegate to inner_node if it has its own structural? method (e.g., Citrus)
+      if @inner_node.respond_to?(:structural?)
+        @inner_node.structural?
+      else
+        # For tree-sitter backends, named? is equivalent to structural?
+        # Named nodes are syntactic constructs; anonymous nodes are punctuation
+        named?
+      end
+    end
+    # Get the number of children
+    # @return [Integer]
+    def child_count
+      @inner_node.child_count
+    end
+    # Get a child by index
+    #
+    # @param index [Integer] Child index
+    # @return [Node, nil] Wrapped child node
+    def child(index)
+      child_node = @inner_node.child(index)
+      return if child_node.nil?
+      Node.new(child_node, source: @source)
+    end
+    # Get a named child by index
+    #
+    # Returns the nth named child (skipping unnamed children).
+    # Uses backend's native named_child if available, otherwise provides fallback.
+    #
+    # @param index [Integer] Named child index (0-based)
+    # @return [Node, nil] Wrapped named child node, or nil if index out of bounds
+    def named_child(index)
+      # Try native implementation first
+      if @inner_node.respond_to?(:named_child)
+        child_node = @inner_node.named_child(index)
+        return if child_node.nil?
+        return Node.new(child_node, source: @source)
+      end
+      # Fallback: manually iterate through children and count named ones
+      named_count = 0
+      (0...child_count).each do |i|
+        child_node = @inner_node.child(i)
+        next if child_node.nil?
+        # Check if this child is named
+        is_named = if child_node.respond_to?(:named?)
+          child_node.named?
+        elsif child_node.respond_to?(:is_named?)
+          child_node.is_named?
+        else
+          true  # Assume named if we can't determine
+        end
+        if is_named
+          return Node.new(child_node, source: @source) if named_count == index
+          named_count += 1
+        end
+      end
+      nil  # Index out of bounds
+    end
+    # Get the count of named children
+    #
+    # Uses backend's native named_child_count if available, otherwise provides fallback.
+    #
+    # @return [Integer] Number of named children
+    def named_child_count
+      # Try native implementation first
+      if @inner_node.respond_to?(:named_child_count)
+        return @inner_node.named_child_count
+      end
+      # Fallback: count named children manually
+      count = 0
+      (0...child_count).each do |i|
+        child_node = @inner_node.child(i)
+        next if child_node.nil?
+        # Check if this child is named
+        is_named = if child_node.respond_to?(:named?)
+          child_node.named?
+        elsif child_node.respond_to?(:is_named?)
+          child_node.is_named?
+        else
+          true  # Assume named if we can't determine
+        end
+        count += 1 if is_named
+      end
+      count
+    end
+    # Get all children as wrapped nodes
+    #
+    # @return [Array<Node>] Array of wrapped child nodes
+    def children
+      (0...child_count).map { |i| child(i) }.compact
+    end
+    # Get named children only
+    #
+    # @return [Array<Node>] Array of named child nodes
+    def named_children
+      children.select(&:named?)
+    end
+    # Iterate over children
+    #
+    # @yield [Node] Each child node
+    # @return [Enumerator, nil]
+    def each(&block)
+      return to_enum(__method__) unless block_given?
+      children.each(&block)
+    end
+    # Get a child by field name
+    #
+    # @param name [String, Symbol] Field name
+    # @return [Node, nil] The child node for that field
+    def child_by_field_name(name)
+      if @inner_node.respond_to?(:child_by_field_name)
+        child_node = @inner_node.child_by_field_name(name.to_s)
+        return if child_node.nil?
+        Node.new(child_node, source: @source)
+      else
+        # Not all backends support field names
+        nil
+      end
+    end
+    # Alias for child_by_field_name
+    alias_method :field, :child_by_field_name
+    # Get the parent node
+    #
+    # @return [Node, nil] The parent node
+    def parent
+      return unless @inner_node.respond_to?(:parent)
+      parent_node = @inner_node.parent
+      return if parent_node.nil?
+      Node.new(parent_node, source: @source)
+    end
+    # Get next sibling
+    #
+    # @return [Node, nil]
+    def next_sibling
+      return unless @inner_node.respond_to?(:next_sibling)
+      sibling = @inner_node.next_sibling
+      return if sibling.nil?
+      Node.new(sibling, source: @source)
+    end
+    # Get previous sibling
+    #
+    # @return [Node, nil]
+    def prev_sibling
+      return unless @inner_node.respond_to?(:prev_sibling)
+      sibling = @inner_node.prev_sibling
+      return if sibling.nil?
+      Node.new(sibling, source: @source)
+    end
+    # String representation for debugging
+    # @return [String]
+    def inspect
+      "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
+    end
+    # String representation
+    # @return [String]
+    def to_s
+      text
+    end
+    # Compare nodes for ordering (used by Comparable module)
+    #
+    # Nodes are ordered by their position in the source:
+    # 1. First by start_byte (earlier nodes come first)
+    # 2. Then by end_byte for tie-breaking (shorter spans come first)
+    # 3. Then by type for deterministic ordering
+    #
+    # This allows nodes to be sorted by position and used in sorted collections.
+    # The Comparable module provides <, <=, ==, >=, >, and between? based on this.
+    #
+    # @param other [Node] node to compare with
+    # @return [Integer, nil] -1, 0, 1, or nil if not comparable
+    def <=>(other)
+      return unless other.is_a?(Node)
+      # Compare by position first (start_byte, then end_byte)
+      cmp = start_byte <=> other.start_byte
+      return cmp unless cmp.zero?
+      cmp = end_byte <=> other.end_byte
+      return cmp unless cmp.zero?
+      # For nodes at the same position with same span, compare by type
+      type <=> other.type
+    end
+    # Check equality based on inner_node identity
+    #
+    # Two nodes are equal if they wrap the same backend node object.
+    # This is separate from the <=> comparison which orders by position.
+    # Nodes at the same position but wrapping different backend nodes are
+    # equal according to <=> (positional equality) but not equal according to == (identity equality).
+    #
+    # Note: We override Comparable's default == behavior to check inner_node identity
+    # rather than just relying on <=> returning 0, because we want identity-based
+    # equality for testing and collection membership, not position-based equality.
+    #
+    # @param other [Object] object to compare with
+    # @return [Boolean] true if both nodes wrap the same inner_node
+    def ==(other)
+      return false unless other.is_a?(Node)
+      @inner_node == other.inner_node
+    end
+    # Alias for == to support both styles
+    alias_method :eql?, :==
+    # Generate hash value for this node
+    #
+    # Uses the hash of the inner_node to ensure nodes wrapping the same
+    # backend node have the same hash value.
+    #
+    # @return [Integer] hash value
+    def hash
+      @inner_node.hash
+    end
+    # Check if node responds to a method (includes delegation to inner_node)
+    #
+    # @param method_name [Symbol] method to check
+    # @param include_private [Boolean] include private methods
+    # @return [Boolean]
+    def respond_to_missing?(method_name, include_private = false)
+      @inner_node.respond_to?(method_name, include_private) || super
+    end
+    # Delegate unknown methods to the underlying backend-specific node
+    #
+    # This provides passthrough access for advanced usage when you need
+    # backend-specific features not exposed by TreeHaver's unified API.
+    #
+    # The delegation is automatic and transparent - you can call backend-specific
+    # methods directly on the TreeHaver::Node and they'll be forwarded to the
+    # underlying node implementation.
+    #
+    # @param method_name [Symbol] method to call
+    # @param args [Array] arguments to pass
+    # @param block [Proc] block to pass
+    # @return [Object] result from the underlying node
+    #
+    # @example Using TreeStump-specific methods
+    #   # These methods don't exist in the unified API but are in TreeStump
+    #   node.grammar_name      # => "toml" (delegated to inner_node)
+    #   node.grammar_id        # => Integer (delegated to inner_node)
+    #   node.kind_id           # => Integer (delegated to inner_node)
+    #
+    # @example Safe usage with respond_to? check
+    #   if node.respond_to?(:grammar_name)
+    #     puts "Using #{node.grammar_name} grammar"
+    #   end
+    #
+    # @example Equivalent explicit access
+    #   node.grammar_name              # Via passthrough (method_missing)
+    #   node.inner_node.grammar_name   # Explicit access (same result)
+    #
+    # @note This maintains backward compatibility with code written for
+    #   specific backends while providing the benefits of the unified API
+    def method_missing(method_name, *args, **kwargs, &block)
+      if @inner_node.respond_to?(method_name)
+        @inner_node.public_send(method_name, *args, **kwargs, &block)
+      else
+        super
+      end
+    end
+  end
+end

data/lib/tree_haver/path_validator.rb CHANGED Viewed

@@ -60,7 +60,7 @@ module TreeHaver
     # Pattern for valid symbol names (C identifier format)
     VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
-    @custom_trusted_directories = []
+    @custom_trusted_directories = [] # rubocop:disable ThreadSafety/MutableClassInstanceVariable
     @mutex = Mutex.new
     module_function
@@ -75,18 +75,15 @@ module TreeHaver
       @mutex.synchronize { dirs.concat(@custom_trusted_directories) }
       # Add directories from environment variable
-      env_dirs = ENV[TRUSTED_DIRS_ENV_VAR]
-      if env_dirs
-        env_dirs.split(",").each do |dir|
-          expanded = File.expand_path(dir.strip)
-          # :nocov:
-          # File.expand_path always returns absolute paths on Unix/macOS.
-          # This guard exists for defensive programming on exotic platforms
-          # where expand_path might behave differently, but cannot be tested
-          # in standard CI environments.
-          dirs << expanded if expanded.start_with?("/")
-          # :nocov:
-        end
+      ENV[TRUSTED_DIRS_ENV_VAR]&.split(",")&.each do |dir|
+        expanded = File.expand_path(dir.strip)
+        # :nocov:
+        # File.expand_path always returns absolute paths on Unix/macOS.
+        # This guard exists for defensive programming on exotic platforms
+        # where expand_path might behave differently, but cannot be tested
+        # in standard CI environments.
+        dirs << expanded if expanded.start_with?("/")
+        # :nocov:
       end
       dirs.uniq
@@ -186,7 +183,8 @@ module TreeHaver
       return false if path.include?("/./") || path.end_with?("/.")
       # Validate extension
-      return false unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
+      # Allow versioned .so files like .so.0, .so.14, etc. (common on Linux)
+      return false unless has_valid_extension?(path)
       # Validate filename portion
       filename = File.basename(path)
@@ -211,21 +209,29 @@ module TreeHaver
       return false if path.nil?
       # Resolve the real path to handle symlinks
-      check_path = begin
-        File.realpath(path)
-      rescue Errno::ENOENT
-        # File doesn't exist yet, check the directory
-        dir = File.dirname(path)
-        begin
-          File.realpath(dir)
-        rescue Errno::ENOENT
-          return false
-        end
-      end
+      check_path = resolve_check_path(path)
+      return false if check_path.nil?
       trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
     end
+    # Resolve a path to its real path for trust checking
+    #
+    # @param path [String] the path to resolve
+    # @return [String, nil] the resolved path or nil if unresolvable
+    # @api private
+    def resolve_check_path(path)
+      File.realpath(path)
+    rescue Errno::ENOENT
+      # File doesn't exist yet, check the directory
+      dir = File.dirname(path)
+      begin
+        File.realpath(dir)
+      rescue Errno::ENOENT
+        nil
+      end
+    end
     # Validate a language name is safe
     #
     # Language names are used to construct:
@@ -312,8 +318,8 @@ module TreeHaver
       errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
       errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
-      unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
-        errors << "Path does not have allowed extension (#{ALLOWED_EXTENSIONS.join(", ")})"
+      unless has_valid_extension?(path)
+        errors << "Path does not have allowed extension (.so, .so.X, .dylib, .dll)"
       end
       filename = File.basename(path)
@@ -329,5 +335,19 @@ module TreeHaver
       # Match Windows absolute paths like C:\path or D:/path
       path.match?(/\A[A-Za-z]:[\\\/]/)
     end
+    # @api private
+    # Check if path has a valid library extension
+    # Allows: .so, .dylib, .dll, and versioned .so files like .so.0, .so.14
+    def has_valid_extension?(path)
+      # Check for exact matches first (.so, .dylib, .dll)
+      return true if ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
+      # Check for versioned .so files (Linux convention)
+      # e.g., libtree-sitter.so.0, libtree-sitter.so.14
+      return true if path.match?(/\.so\.\d+\z/)
+      false
+    end
   end
 end