tree_haver 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,376 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Point class that works as both a Hash and an object with row/column accessors
5
+ #
6
+ # This provides compatibility with code expecting either:
7
+ # - Hash access: point[:row], point[:column]
8
+ # - Method access: point.row, point.column
9
+ class Point
10
+ attr_reader :row, :column
11
+
12
+ def initialize(row, column)
13
+ @row = row
14
+ @column = column
15
+ end
16
+
17
+ # Hash-like access for compatibility
18
+ def [](key)
19
+ case key
20
+ when :row, "row" then @row
21
+ when :column, "column" then @column
22
+ end
23
+ end
24
+
25
+ def to_h
26
+ {row: @row, column: @column}
27
+ end
28
+
29
+ def to_s
30
+ "(#{@row}, #{@column})"
31
+ end
32
+
33
+ def inspect
34
+ "#<TreeHaver::Point row=#{@row} column=#{@column}>"
35
+ end
36
+ end
37
+
38
+ # Unified Node wrapper providing a consistent API across all backends
39
+ #
40
+ # This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
41
+ # and provides a unified interface so code works identically regardless of which backend
42
+ # is being used.
43
+ #
44
+ # The wrapper automatically maps backend differences:
45
+ # - TreeStump uses `node.kind` → mapped to `node.type`
46
+ # - TreeStump uses `node.is_named?` → mapped to `node.named?`
47
+ # - All backends return consistent Point objects from position methods
48
+ #
49
+ # @example Basic node traversal
50
+ # tree = parser.parse(source)
51
+ # root = tree.root_node
52
+ #
53
+ # puts root.type # => "document"
54
+ # puts root.start_byte # => 0
55
+ # puts root.text # => full source text
56
+ #
57
+ # root.children.each do |child|
58
+ # puts "#{child.type} at line #{child.start_point.row + 1}"
59
+ # end
60
+ #
61
+ # @example Position information
62
+ # node = tree.root_node.children.first
63
+ #
64
+ # # Point objects work as both objects and hashes
65
+ # point = node.start_point
66
+ # point.row # => 0 (method access)
67
+ # point[:row] # => 0 (hash access)
68
+ # point.column # => 0
69
+ #
70
+ # # Byte offsets
71
+ # node.start_byte # => 0
72
+ # node.end_byte # => 23
73
+ #
74
+ # @example Error detection
75
+ # if node.has_error?
76
+ # puts "Parse error in subtree"
77
+ # end
78
+ #
79
+ # if node.missing?
80
+ # puts "This node was inserted by error recovery"
81
+ # end
82
+ #
83
+ # @example Accessing backend-specific features
84
+ # # Via passthrough (method_missing delegates to inner_node)
85
+ # node.grammar_name # TreeStump-specific, automatically delegated
86
+ #
87
+ # # Or explicitly via inner_node
88
+ # node.inner_node.grammar_name # Same result
89
+ #
90
+ # # Check if backend supports a feature
91
+ # if node.inner_node.respond_to?(:some_feature)
92
+ # node.some_feature
93
+ # end
94
+ #
95
+ # @note This is the key to tree_haver's "write once, run anywhere" promise
96
+ class Node
97
+ # The wrapped backend-specific node object
98
+ #
99
+ # This provides direct access to the underlying backend node for advanced usage
100
+ # when you need backend-specific features not exposed by the unified API.
101
+ #
102
+ # @return [Object] The underlying node (TreeSitter::Node, TreeStump::Node, etc.)
103
+ # @example Accessing backend-specific methods
104
+ # # TreeStump-specific: grammar information
105
+ # if node.inner_node.respond_to?(:grammar_name)
106
+ # puts node.inner_node.grammar_name # => "toml"
107
+ # puts node.inner_node.grammar_id # => Integer
108
+ # end
109
+ #
110
+ # # Check backend type
111
+ # case node.inner_node.class.name
112
+ # when /TreeStump/
113
+ # # TreeStump-specific code
114
+ # when /TreeSitter/
115
+ # # ruby_tree_sitter-specific code
116
+ # end
117
+ attr_reader :inner_node
118
+
119
+ # The source text for text extraction
120
+ # @return [String]
121
+ attr_reader :source
122
+
123
+ # @param node [Object] Backend-specific node object
124
+ # @param source [String] Source text for text extraction
125
+ def initialize(node, source: nil)
126
+ @inner_node = node
127
+ @source = source
128
+ end
129
+
130
+ # Get the node's type/kind as a string
131
+ #
132
+ # Maps backend-specific methods to a unified API:
133
+ # - ruby_tree_sitter: node.type
134
+ # - tree_stump: node.kind
135
+ # - FFI: node.type
136
+ #
137
+ # @return [String] The node type
138
+ def type
139
+ if @inner_node.respond_to?(:type)
140
+ @inner_node.type.to_s
141
+ elsif @inner_node.respond_to?(:kind)
142
+ @inner_node.kind.to_s
143
+ else
144
+ raise TreeHaver::Error, "Backend node does not support type/kind"
145
+ end
146
+ end
147
+
148
+ # Get the node's start byte offset
149
+ # @return [Integer]
150
+ def start_byte
151
+ @inner_node.start_byte
152
+ end
153
+
154
+ # Get the node's end byte offset
155
+ # @return [Integer]
156
+ def end_byte
157
+ @inner_node.end_byte
158
+ end
159
+
160
+ # Get the node's start position (row, column)
161
+ #
162
+ # @return [Point] with row and column accessors (also works as Hash)
163
+ def start_point
164
+ if @inner_node.respond_to?(:start_point)
165
+ point = @inner_node.start_point
166
+ Point.new(point.row, point.column)
167
+ elsif @inner_node.respond_to?(:start_position)
168
+ point = @inner_node.start_position
169
+ Point.new(point.row, point.column)
170
+ else
171
+ raise TreeHaver::Error, "Backend node does not support start_point/start_position"
172
+ end
173
+ end
174
+
175
+ # Get the node's end position (row, column)
176
+ #
177
+ # @return [Point] with row and column accessors (also works as Hash)
178
+ def end_point
179
+ if @inner_node.respond_to?(:end_point)
180
+ point = @inner_node.end_point
181
+ Point.new(point.row, point.column)
182
+ elsif @inner_node.respond_to?(:end_position)
183
+ point = @inner_node.end_position
184
+ Point.new(point.row, point.column)
185
+ else
186
+ raise TreeHaver::Error, "Backend node does not support end_point/end_position"
187
+ end
188
+ end
189
+
190
+ # Get the node's text content
191
+ #
192
+ # @return [String]
193
+ def text
194
+ if @inner_node.respond_to?(:text)
195
+ @inner_node.text
196
+ elsif @source
197
+ # Fallback: extract from source using byte positions
198
+ @source[start_byte...end_byte] || ""
199
+ else
200
+ raise TreeHaver::Error, "Cannot extract text: node has no text method and no source provided"
201
+ end
202
+ end
203
+
204
+ # Check if the node has an error
205
+ # @return [Boolean]
206
+ def has_error?
207
+ @inner_node.has_error?
208
+ end
209
+
210
+ # Check if the node is missing
211
+ # @return [Boolean]
212
+ def missing?
213
+ return false unless @inner_node.respond_to?(:missing?)
214
+ @inner_node.missing?
215
+ end
216
+
217
+ # Check if the node is named
218
+ # @return [Boolean]
219
+ def named?
220
+ if @inner_node.respond_to?(:named?)
221
+ @inner_node.named?
222
+ elsif @inner_node.respond_to?(:is_named?)
223
+ @inner_node.is_named?
224
+ else
225
+ true # Default to true if not supported
226
+ end
227
+ end
228
+
229
+ # Get the number of children
230
+ # @return [Integer]
231
+ def child_count
232
+ @inner_node.child_count
233
+ end
234
+
235
+ # Get a child by index
236
+ #
237
+ # @param index [Integer] Child index
238
+ # @return [Node, nil] Wrapped child node
239
+ def child(index)
240
+ child_node = @inner_node.child(index)
241
+ return if child_node.nil?
242
+ Node.new(child_node, source: @source)
243
+ end
244
+
245
+ # Get all children as wrapped nodes
246
+ #
247
+ # @return [Array<Node>] Array of wrapped child nodes
248
+ def children
249
+ (0...child_count).map { |i| child(i) }.compact
250
+ end
251
+
252
+ # Get named children only
253
+ #
254
+ # @return [Array<Node>] Array of named child nodes
255
+ def named_children
256
+ children.select(&:named?)
257
+ end
258
+
259
+ # Iterate over children
260
+ #
261
+ # @yield [Node] Each child node
262
+ # @return [Enumerator, nil]
263
+ def each(&block)
264
+ return to_enum(__method__) unless block_given?
265
+ children.each(&block)
266
+ end
267
+
268
+ # Get a child by field name
269
+ #
270
+ # @param name [String, Symbol] Field name
271
+ # @return [Node, nil] The child node for that field
272
+ def child_by_field_name(name)
273
+ if @inner_node.respond_to?(:child_by_field_name)
274
+ child_node = @inner_node.child_by_field_name(name.to_s)
275
+ return if child_node.nil?
276
+ Node.new(child_node, source: @source)
277
+ else
278
+ # Not all backends support field names
279
+ nil
280
+ end
281
+ end
282
+
283
+ # Alias for child_by_field_name
284
+ alias_method :field, :child_by_field_name
285
+
286
+ # Get the parent node
287
+ #
288
+ # @return [Node, nil] The parent node
289
+ def parent
290
+ return unless @inner_node.respond_to?(:parent)
291
+ parent_node = @inner_node.parent
292
+ return if parent_node.nil?
293
+ Node.new(parent_node, source: @source)
294
+ end
295
+
296
+ # Get next sibling
297
+ #
298
+ # @return [Node, nil]
299
+ def next_sibling
300
+ return unless @inner_node.respond_to?(:next_sibling)
301
+ sibling = @inner_node.next_sibling
302
+ return if sibling.nil?
303
+ Node.new(sibling, source: @source)
304
+ end
305
+
306
+ # Get previous sibling
307
+ #
308
+ # @return [Node, nil]
309
+ def prev_sibling
310
+ return unless @inner_node.respond_to?(:prev_sibling)
311
+ sibling = @inner_node.prev_sibling
312
+ return if sibling.nil?
313
+ Node.new(sibling, source: @source)
314
+ end
315
+
316
+ # String representation for debugging
317
+ # @return [String]
318
+ def inspect
319
+ "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
320
+ end
321
+
322
+ # String representation
323
+ # @return [String]
324
+ def to_s
325
+ text
326
+ end
327
+
328
+ # Check if node responds to a method (includes delegation to inner_node)
329
+ #
330
+ # @param method_name [Symbol] method to check
331
+ # @param include_private [Boolean] include private methods
332
+ # @return [Boolean]
333
+ def respond_to_missing?(method_name, include_private = false)
334
+ @inner_node.respond_to?(method_name, include_private) || super
335
+ end
336
+
337
+ # Delegate unknown methods to the underlying backend-specific node
338
+ #
339
+ # This provides passthrough access for advanced usage when you need
340
+ # backend-specific features not exposed by TreeHaver's unified API.
341
+ #
342
+ # The delegation is automatic and transparent - you can call backend-specific
343
+ # methods directly on the TreeHaver::Node and they'll be forwarded to the
344
+ # underlying node implementation.
345
+ #
346
+ # @param method_name [Symbol] method to call
347
+ # @param args [Array] arguments to pass
348
+ # @param block [Proc] block to pass
349
+ # @return [Object] result from the underlying node
350
+ #
351
+ # @example Using TreeStump-specific methods
352
+ # # These methods don't exist in the unified API but are in TreeStump
353
+ # node.grammar_name # => "toml" (delegated to inner_node)
354
+ # node.grammar_id # => Integer (delegated to inner_node)
355
+ # node.kind_id # => Integer (delegated to inner_node)
356
+ #
357
+ # @example Safe usage with respond_to? check
358
+ # if node.respond_to?(:grammar_name)
359
+ # puts "Using #{node.grammar_name} grammar"
360
+ # end
361
+ #
362
+ # @example Equivalent explicit access
363
+ # node.grammar_name # Via passthrough (method_missing)
364
+ # node.inner_node.grammar_name # Explicit access (same result)
365
+ #
366
+ # @note This maintains backward compatibility with code written for
367
+ # specific backends while providing the benefits of the unified API
368
+ def method_missing(method_name, *args, **kwargs, &block)
369
+ if @inner_node.respond_to?(method_name)
370
+ @inner_node.public_send(method_name, *args, **kwargs, &block)
371
+ else
372
+ super
373
+ end
374
+ end
375
+ end
376
+ end
@@ -186,7 +186,8 @@ module TreeHaver
186
186
  return false if path.include?("/./") || path.end_with?("/.")
187
187
 
188
188
  # Validate extension
189
- return false unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
189
+ # Allow versioned .so files like .so.0, .so.14, etc. (common on Linux)
190
+ return false unless has_valid_extension?(path)
190
191
 
191
192
  # Validate filename portion
192
193
  filename = File.basename(path)
@@ -312,8 +313,8 @@ module TreeHaver
312
313
  errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
313
314
  errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
314
315
 
315
- unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
316
- errors << "Path does not have allowed extension (#{ALLOWED_EXTENSIONS.join(", ")})"
316
+ unless has_valid_extension?(path)
317
+ errors << "Path does not have allowed extension (.so, .so.X, .dylib, .dll)"
317
318
  end
318
319
 
319
320
  filename = File.basename(path)
@@ -329,5 +330,19 @@ module TreeHaver
329
330
  # Match Windows absolute paths like C:\path or D:/path
330
331
  path.match?(/\A[A-Za-z]:[\\\/]/)
331
332
  end
333
+
334
+ # @api private
335
+ # Check if path has a valid library extension
336
+ # Allows: .so, .dylib, .dll, and versioned .so files like .so.0, .so.14
337
+ def has_valid_extension?(path)
338
+ # Check for exact matches first (.so, .dylib, .dll)
339
+ return true if ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
340
+
341
+ # Check for versioned .so files (Linux convention)
342
+ # e.g., libtree-sitter.so.0, libtree-sitter.so.14
343
+ return true if path.match?(/\.so\.\d+\z/)
344
+
345
+ false
346
+ end
332
347
  end
333
348
  end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Unified Tree wrapper providing a consistent API across all backends
5
+ #
6
+ # This class wraps backend-specific tree objects and provides a unified interface.
7
+ # It stores the source text to enable text extraction from nodes.
8
+ #
9
+ # @example Basic usage
10
+ # parser = TreeHaver::Parser.new
11
+ # parser.language = TreeHaver::Language.toml
12
+ # tree = parser.parse(source)
13
+ # root = tree.root_node
14
+ # puts root.type
15
+ #
16
+ # @example Incremental parsing (if backend supports it)
17
+ # tree = parser.parse("x = 1")
18
+ # # Edit the source: "x = 1" → "x = 42"
19
+ # tree.edit(
20
+ # start_byte: 4,
21
+ # old_end_byte: 5,
22
+ # new_end_byte: 6,
23
+ # start_point: { row: 0, column: 4 },
24
+ # old_end_point: { row: 0, column: 5 },
25
+ # new_end_point: { row: 0, column: 6 }
26
+ # )
27
+ # new_tree = parser.parse_string(tree, "x = 42")
28
+ #
29
+ # @example Accessing backend-specific features
30
+ # # Via passthrough (method_missing delegates to inner_tree)
31
+ # tree.some_backend_specific_method # Automatically delegated
32
+ #
33
+ # # Or explicitly via inner_tree
34
+ # tree.inner_tree.some_backend_specific_method
35
+ class Tree
36
+ # The wrapped backend-specific tree object
37
+ #
38
+ # This provides direct access to the underlying backend tree for advanced usage
39
+ # when you need backend-specific features not exposed by the unified API.
40
+ #
41
+ # @return [Object] The underlying tree (TreeSitter::Tree, TreeStump::Tree, etc.)
42
+ # @example Accessing backend-specific methods
43
+ # # Print DOT graph (TreeStump-specific)
44
+ # if tree.inner_tree.respond_to?(:print_dot_graph)
45
+ # File.open("tree.dot", "w") do |f|
46
+ # tree.inner_tree.print_dot_graph(f)
47
+ # end
48
+ # end
49
+ attr_reader :inner_tree
50
+
51
+ # The source text
52
+ #
53
+ # Stored to enable text extraction from nodes via byte offsets.
54
+ #
55
+ # @return [String] The original source code
56
+ attr_reader :source
57
+
58
+ # @param tree [Object] Backend-specific tree object
59
+ # @param source [String] Source text for node text extraction
60
+ def initialize(tree, source: nil)
61
+ @inner_tree = tree
62
+ @source = source
63
+ end
64
+
65
+ # Get the root node of the tree
66
+ #
67
+ # @return [Node] Wrapped root node
68
+ def root_node
69
+ root = @inner_tree.root_node
70
+ return if root.nil?
71
+ Node.new(root, source: @source)
72
+ end
73
+
74
+ # Mark the tree as edited for incremental re-parsing
75
+ #
76
+ # Call this method after the source code has been modified but before
77
+ # re-parsing. This tells tree-sitter which parts of the tree are
78
+ # invalidated so it can efficiently re-parse only the affected regions.
79
+ #
80
+ # Not all backends support incremental parsing. Use {#supports_editing?}
81
+ # to check before calling this method.
82
+ #
83
+ # @param start_byte [Integer] byte offset where the edit starts
84
+ # @param old_end_byte [Integer] byte offset where the old text ended
85
+ # @param new_end_byte [Integer] byte offset where the new text ends
86
+ # @param start_point [Hash] starting position as `{ row:, column: }`
87
+ # @param old_end_point [Hash] old ending position as `{ row:, column: }`
88
+ # @param new_end_point [Hash] new ending position as `{ row:, column: }`
89
+ # @return [void]
90
+ # @raise [TreeHaver::NotAvailable] if the backend doesn't support incremental parsing
91
+ # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing
92
+ #
93
+ # @example Incremental parsing workflow
94
+ # # Original source: "x = 1"
95
+ # tree = parser.parse("x = 1")
96
+ #
97
+ # # Edit the source: replace "1" with "42" at byte offset 4
98
+ # tree.edit(
99
+ # start_byte: 4,
100
+ # old_end_byte: 5, # "1" ends at byte 5
101
+ # new_end_byte: 6, # "42" ends at byte 6
102
+ # start_point: { row: 0, column: 4 },
103
+ # old_end_point: { row: 0, column: 5 },
104
+ # new_end_point: { row: 0, column: 6 }
105
+ # )
106
+ #
107
+ # # Re-parse with the edited tree for incremental parsing
108
+ # new_tree = parser.parse_string(tree, "x = 42")
109
+ def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
110
+ @inner_tree.edit(
111
+ start_byte: start_byte,
112
+ old_end_byte: old_end_byte,
113
+ new_end_byte: new_end_byte,
114
+ start_point: start_point,
115
+ old_end_point: old_end_point,
116
+ new_end_point: new_end_point,
117
+ )
118
+ rescue NoMethodError => e
119
+ # Re-raise as NotAvailable if it's about the edit method
120
+ raise unless e.name == :edit || e.message.include?("edit")
121
+ raise TreeHaver::NotAvailable,
122
+ "Incremental parsing not supported by current backend. " \
123
+ "Use MRI (ruby_tree_sitter), Rust (tree_stump), or Java (java-tree-sitter) backend."
124
+ end
125
+
126
+ # Check if the current backend supports incremental parsing
127
+ #
128
+ # Incremental parsing allows tree-sitter to reuse unchanged nodes when
129
+ # re-parsing edited source code, improving performance for large files
130
+ # with small edits.
131
+ #
132
+ # @return [Boolean] true if {#edit} can be called on this tree
133
+ # @example
134
+ # if tree.supports_editing?
135
+ # tree.edit(...)
136
+ # new_tree = parser.parse_string(tree, edited_source)
137
+ # else
138
+ # # Fall back to full re-parse
139
+ # new_tree = parser.parse(edited_source)
140
+ # end
141
+ def supports_editing?
142
+ # Try to get the edit method to verify it exists
143
+ # This is more reliable than respond_to? with Delegator wrappers
144
+ @inner_tree.method(:edit)
145
+ true
146
+ rescue NameError
147
+ # NameError is the parent class of NoMethodError, so this catches both
148
+ false
149
+ end
150
+
151
+ # String representation
152
+ # @return [String]
153
+ def inspect
154
+ "#<#{self.class} source_length=#{@source&.bytesize || "unknown"}>"
155
+ end
156
+
157
+ # Check if tree responds to a method (includes delegation to inner_tree)
158
+ #
159
+ # @param method_name [Symbol] method to check
160
+ # @param include_private [Boolean] include private methods
161
+ # @return [Boolean]
162
+ def respond_to_missing?(method_name, include_private = false)
163
+ @inner_tree.respond_to?(method_name, include_private) || super
164
+ end
165
+
166
+ # Delegate unknown methods to the underlying backend-specific tree
167
+ #
168
+ # This provides passthrough access for advanced usage when you need
169
+ # backend-specific features not exposed by TreeHaver's unified API.
170
+ #
171
+ # The delegation is automatic and transparent - you can call backend-specific
172
+ # methods directly on the TreeHaver::Tree and they'll be forwarded to the
173
+ # underlying tree implementation.
174
+ #
175
+ # @param method_name [Symbol] method to call
176
+ # @param args [Array] arguments to pass
177
+ # @param block [Proc] block to pass
178
+ # @return [Object] result from the underlying tree
179
+ #
180
+ # @example Using TreeStump-specific methods
181
+ # # print_dot_graph is TreeStump-specific
182
+ # File.open("tree.dot", "w") do |f|
183
+ # tree.print_dot_graph(f) # Delegated to inner_tree
184
+ # end
185
+ #
186
+ # @example Safe usage with respond_to? check
187
+ # if tree.respond_to?(:print_dot_graph)
188
+ # File.open("tree.dot", "w") { |f| tree.print_dot_graph(f) }
189
+ # end
190
+ #
191
+ # @example Equivalent explicit access
192
+ # tree.print_dot_graph(file) # Via passthrough (method_missing)
193
+ # tree.inner_tree.print_dot_graph(file) # Explicit access (same result)
194
+ #
195
+ # @note This maintains backward compatibility with code written for
196
+ # specific backends while providing the benefits of the unified API
197
+ def method_missing(method_name, *args, **kwargs, &block)
198
+ if @inner_tree.respond_to?(method_name)
199
+ @inner_tree.public_send(method_name, *args, **kwargs, &block)
200
+ else
201
+ super
202
+ end
203
+ end
204
+ end
205
+ end
@@ -9,8 +9,8 @@ module TreeHaver
9
9
  module Version
10
10
  # Current version of the tree_haver gem
11
11
  #
12
- # @return [String] the version string (e.g., "1.0.0")
13
- VERSION = "1.0.0"
12
+ # @return [String] the version string (e.g., "2.0.0")
13
+ VERSION = "2.0.0"
14
14
  end
15
15
 
16
16
  # Traditional location for VERSION constant