tree_haver 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Point class that works as both a Hash and an object with row/column accessors
5
+ #
6
+ # This provides compatibility with code expecting either:
7
+ # - Hash access: point[:row], point[:column]
8
+ # - Method access: point.row, point.column
9
+ class Point
10
+ attr_reader :row, :column
11
+
12
+ def initialize(row, column)
13
+ @row = row
14
+ @column = column
15
+ end
16
+
17
+ # Hash-like access for compatibility
18
+ def [](key)
19
+ case key
20
+ when :row, "row" then @row
21
+ when :column, "column" then @column
22
+ end
23
+ end
24
+
25
+ def to_h
26
+ {row: @row, column: @column}
27
+ end
28
+
29
+ def to_s
30
+ "(#{@row}, #{@column})"
31
+ end
32
+
33
+ def inspect
34
+ "#<TreeHaver::Point row=#{@row} column=#{@column}>"
35
+ end
36
+ end
37
+
38
+ # Unified Node wrapper providing a consistent API across all backends
39
+ #
40
+ # This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
41
+ # and provides a unified interface so code works identically regardless of which backend
42
+ # is being used.
43
+ #
44
+ # The wrapper automatically maps backend differences:
45
+ # - TreeStump uses `node.kind` → mapped to `node.type`
46
+ # - TreeStump uses `node.is_named?` → mapped to `node.named?`
47
+ # - All backends return consistent Point objects from position methods
48
+ #
49
+ # @example Basic node traversal
50
+ # tree = parser.parse(source)
51
+ # root = tree.root_node
52
+ #
53
+ # puts root.type # => "document"
54
+ # puts root.start_byte # => 0
55
+ # puts root.text # => full source text
56
+ #
57
+ # root.children.each do |child|
58
+ # puts "#{child.type} at line #{child.start_point.row + 1}"
59
+ # end
60
+ #
61
+ # @example Position information
62
+ # node = tree.root_node.children.first
63
+ #
64
+ # # Point objects work as both objects and hashes
65
+ # point = node.start_point
66
+ # point.row # => 0 (method access)
67
+ # point[:row] # => 0 (hash access)
68
+ # point.column # => 0
69
+ #
70
+ # # Byte offsets
71
+ # node.start_byte # => 0
72
+ # node.end_byte # => 23
73
+ #
74
+ # @example Error detection
75
+ # if node.has_error?
76
+ # puts "Parse error in subtree"
77
+ # end
78
+ #
79
+ # if node.missing?
80
+ # puts "This node was inserted by error recovery"
81
+ # end
82
+ #
83
+ # @example Accessing backend-specific features
84
+ # # Via passthrough (method_missing delegates to inner_node)
85
+ # node.grammar_name # TreeStump-specific, automatically delegated
86
+ #
87
+ # # Or explicitly via inner_node
88
+ # node.inner_node.grammar_name # Same result
89
+ #
90
+ # # Check if backend supports a feature
91
+ # if node.inner_node.respond_to?(:some_feature)
92
+ # node.some_feature
93
+ # end
94
+ #
95
+ # @note This is the key to tree_haver's "write once, run anywhere" promise
96
+ class Node
97
+ include Comparable
98
+
99
+ # The wrapped backend-specific node object
100
+ #
101
+ # This provides direct access to the underlying backend node for advanced usage
102
+ # when you need backend-specific features not exposed by the unified API.
103
+ #
104
+ # @return [Object] The underlying node (TreeSitter::Node, TreeStump::Node, etc.)
105
+ # @example Accessing backend-specific methods
106
+ # # TreeStump-specific: grammar information
107
+ # if node.inner_node.respond_to?(:grammar_name)
108
+ # puts node.inner_node.grammar_name # => "toml"
109
+ # puts node.inner_node.grammar_id # => Integer
110
+ # end
111
+ #
112
+ # # Check backend type
113
+ # case node.inner_node.class.name
114
+ # when /TreeStump/
115
+ # # TreeStump-specific code
116
+ # when /TreeSitter/
117
+ # # ruby_tree_sitter-specific code
118
+ # end
119
+ attr_reader :inner_node
120
+
121
+ # The source text for text extraction
122
+ # @return [String]
123
+ attr_reader :source
124
+
125
+ # @param node [Object] Backend-specific node object
126
+ # @param source [String] Source text for text extraction
127
+ def initialize(node, source: nil)
128
+ @inner_node = node
129
+ @source = source
130
+ end
131
+
132
+ # Get the node's type/kind as a string
133
+ #
134
+ # Maps backend-specific methods to a unified API:
135
+ # - ruby_tree_sitter: node.type
136
+ # - tree_stump: node.kind
137
+ # - FFI: node.type
138
+ #
139
+ # @return [String] The node type
140
+ def type
141
+ if @inner_node.respond_to?(:type)
142
+ @inner_node.type.to_s
143
+ elsif @inner_node.respond_to?(:kind)
144
+ @inner_node.kind.to_s
145
+ else
146
+ raise TreeHaver::Error, "Backend node does not support type/kind"
147
+ end
148
+ end
149
+
150
+ # Get the node's start byte offset
151
+ # @return [Integer]
152
+ def start_byte
153
+ @inner_node.start_byte
154
+ end
155
+
156
+ # Get the node's end byte offset
157
+ # @return [Integer]
158
+ def end_byte
159
+ @inner_node.end_byte
160
+ end
161
+
162
+ # Get the node's start position (row, column)
163
+ #
164
+ # @return [Point] with row and column accessors (also works as Hash)
165
+ def start_point
166
+ if @inner_node.respond_to?(:start_point)
167
+ point = @inner_node.start_point
168
+ Point.new(point.row, point.column)
169
+ elsif @inner_node.respond_to?(:start_position)
170
+ point = @inner_node.start_position
171
+ Point.new(point.row, point.column)
172
+ else
173
+ raise TreeHaver::Error, "Backend node does not support start_point/start_position"
174
+ end
175
+ end
176
+
177
+ # Get the node's end position (row, column)
178
+ #
179
+ # @return [Point] with row and column accessors (also works as Hash)
180
+ def end_point
181
+ if @inner_node.respond_to?(:end_point)
182
+ point = @inner_node.end_point
183
+ Point.new(point.row, point.column)
184
+ elsif @inner_node.respond_to?(:end_position)
185
+ point = @inner_node.end_position
186
+ Point.new(point.row, point.column)
187
+ else
188
+ raise TreeHaver::Error, "Backend node does not support end_point/end_position"
189
+ end
190
+ end
191
+
192
+ # Get the node's text content
193
+ #
194
+ # @return [String]
195
+ def text
196
+ if @inner_node.respond_to?(:text)
197
+ @inner_node.text
198
+ elsif @source
199
+ # Fallback: extract from source using byte positions
200
+ @source[start_byte...end_byte] || ""
201
+ else
202
+ raise TreeHaver::Error, "Cannot extract text: node has no text method and no source provided"
203
+ end
204
+ end
205
+
206
+ # Check if the node has an error
207
+ # @return [Boolean]
208
+ def has_error?
209
+ @inner_node.has_error?
210
+ end
211
+
212
+ # Check if the node is missing
213
+ # @return [Boolean]
214
+ def missing?
215
+ return false unless @inner_node.respond_to?(:missing?)
216
+ @inner_node.missing?
217
+ end
218
+
219
+ # Check if the node is named
220
+ # @return [Boolean]
221
+ def named?
222
+ if @inner_node.respond_to?(:named?)
223
+ @inner_node.named?
224
+ elsif @inner_node.respond_to?(:is_named?)
225
+ @inner_node.is_named?
226
+ else
227
+ true # Default to true if not supported
228
+ end
229
+ end
230
+
231
+ # Check if the node is structural (non-terminal)
232
+ #
233
+ # In tree-sitter, this is equivalent to being a "named" node.
234
+ # Named nodes represent actual syntactic constructs (e.g., table, keyvalue, string)
235
+ # while anonymous nodes are syntax/punctuation (e.g., [, =, whitespace).
236
+ #
237
+ # For Citrus backends, this checks if the node is a non-terminal rule.
238
+ #
239
+ # @return [Boolean] true if this is a structural (non-terminal) node
240
+ def structural?
241
+ # Delegate to inner_node if it has its own structural? method (e.g., Citrus)
242
+ if @inner_node.respond_to?(:structural?)
243
+ @inner_node.structural?
244
+ else
245
+ # For tree-sitter backends, named? is equivalent to structural?
246
+ # Named nodes are syntactic constructs; anonymous nodes are punctuation
247
+ named?
248
+ end
249
+ end
250
+
251
+ # Get the number of children
252
+ # @return [Integer]
253
+ def child_count
254
+ @inner_node.child_count
255
+ end
256
+
257
+ # Get a child by index
258
+ #
259
+ # @param index [Integer] Child index
260
+ # @return [Node, nil] Wrapped child node
261
+ def child(index)
262
+ child_node = @inner_node.child(index)
263
+ return if child_node.nil?
264
+ Node.new(child_node, source: @source)
265
+ end
266
+
267
+ # Get a named child by index
268
+ #
269
+ # Returns the nth named child (skipping unnamed children).
270
+ # Uses backend's native named_child if available, otherwise provides fallback.
271
+ #
272
+ # @param index [Integer] Named child index (0-based)
273
+ # @return [Node, nil] Wrapped named child node, or nil if index out of bounds
274
+ def named_child(index)
275
+ # Try native implementation first
276
+ if @inner_node.respond_to?(:named_child)
277
+ child_node = @inner_node.named_child(index)
278
+ return if child_node.nil?
279
+ return Node.new(child_node, source: @source)
280
+ end
281
+
282
+ # Fallback: manually iterate through children and count named ones
283
+ named_count = 0
284
+ (0...child_count).each do |i|
285
+ child_node = @inner_node.child(i)
286
+ next if child_node.nil?
287
+
288
+ # Check if this child is named
289
+ is_named = if child_node.respond_to?(:named?)
290
+ child_node.named?
291
+ elsif child_node.respond_to?(:is_named?)
292
+ child_node.is_named?
293
+ else
294
+ true # Assume named if we can't determine
295
+ end
296
+
297
+ if is_named
298
+ return Node.new(child_node, source: @source) if named_count == index
299
+ named_count += 1
300
+ end
301
+ end
302
+
303
+ nil # Index out of bounds
304
+ end
305
+
306
+ # Get the count of named children
307
+ #
308
+ # Uses backend's native named_child_count if available, otherwise provides fallback.
309
+ #
310
+ # @return [Integer] Number of named children
311
+ def named_child_count
312
+ # Try native implementation first
313
+ if @inner_node.respond_to?(:named_child_count)
314
+ return @inner_node.named_child_count
315
+ end
316
+
317
+ # Fallback: count named children manually
318
+ count = 0
319
+ (0...child_count).each do |i|
320
+ child_node = @inner_node.child(i)
321
+ next if child_node.nil?
322
+
323
+ # Check if this child is named
324
+ is_named = if child_node.respond_to?(:named?)
325
+ child_node.named?
326
+ elsif child_node.respond_to?(:is_named?)
327
+ child_node.is_named?
328
+ else
329
+ true # Assume named if we can't determine
330
+ end
331
+
332
+ count += 1 if is_named
333
+ end
334
+
335
+ count
336
+ end
337
+
338
+ # Get all children as wrapped nodes
339
+ #
340
+ # @return [Array<Node>] Array of wrapped child nodes
341
+ def children
342
+ (0...child_count).map { |i| child(i) }.compact
343
+ end
344
+
345
+ # Get named children only
346
+ #
347
+ # @return [Array<Node>] Array of named child nodes
348
+ def named_children
349
+ children.select(&:named?)
350
+ end
351
+
352
+ # Iterate over children
353
+ #
354
+ # @yield [Node] Each child node
355
+ # @return [Enumerator, nil]
356
+ def each(&block)
357
+ return to_enum(__method__) unless block_given?
358
+ children.each(&block)
359
+ end
360
+
361
+ # Get a child by field name
362
+ #
363
+ # @param name [String, Symbol] Field name
364
+ # @return [Node, nil] The child node for that field
365
+ def child_by_field_name(name)
366
+ if @inner_node.respond_to?(:child_by_field_name)
367
+ child_node = @inner_node.child_by_field_name(name.to_s)
368
+ return if child_node.nil?
369
+ Node.new(child_node, source: @source)
370
+ else
371
+ # Not all backends support field names
372
+ nil
373
+ end
374
+ end
375
+
376
+ # Alias for child_by_field_name
377
+ alias_method :field, :child_by_field_name
378
+
379
+ # Get the parent node
380
+ #
381
+ # @return [Node, nil] The parent node
382
+ def parent
383
+ return unless @inner_node.respond_to?(:parent)
384
+ parent_node = @inner_node.parent
385
+ return if parent_node.nil?
386
+ Node.new(parent_node, source: @source)
387
+ end
388
+
389
+ # Get next sibling
390
+ #
391
+ # @return [Node, nil]
392
+ def next_sibling
393
+ return unless @inner_node.respond_to?(:next_sibling)
394
+ sibling = @inner_node.next_sibling
395
+ return if sibling.nil?
396
+ Node.new(sibling, source: @source)
397
+ end
398
+
399
+ # Get previous sibling
400
+ #
401
+ # @return [Node, nil]
402
+ def prev_sibling
403
+ return unless @inner_node.respond_to?(:prev_sibling)
404
+ sibling = @inner_node.prev_sibling
405
+ return if sibling.nil?
406
+ Node.new(sibling, source: @source)
407
+ end
408
+
409
+ # String representation for debugging
410
+ # @return [String]
411
+ def inspect
412
+ "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
413
+ end
414
+
415
+ # String representation
416
+ # @return [String]
417
+ def to_s
418
+ text
419
+ end
420
+
421
+ # Compare nodes for ordering (used by Comparable module)
422
+ #
423
+ # Nodes are ordered by their position in the source:
424
+ # 1. First by start_byte (earlier nodes come first)
425
+ # 2. Then by end_byte for tie-breaking (shorter spans come first)
426
+ # 3. Then by type for deterministic ordering
427
+ #
428
+ # This allows nodes to be sorted by position and used in sorted collections.
429
+ # The Comparable module provides <, <=, ==, >=, >, and between? based on this.
430
+ #
431
+ # @param other [Node] node to compare with
432
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
433
+ def <=>(other)
434
+ return unless other.is_a?(Node)
435
+
436
+ # Compare by position first (start_byte, then end_byte)
437
+ cmp = start_byte <=> other.start_byte
438
+ return cmp unless cmp.zero?
439
+
440
+ cmp = end_byte <=> other.end_byte
441
+ return cmp unless cmp.zero?
442
+
443
+ # For nodes at the same position with same span, compare by type
444
+ type <=> other.type
445
+ end
446
+
447
+ # Check equality based on inner_node identity
448
+ #
449
+ # Two nodes are equal if they wrap the same backend node object.
450
+ # This is separate from the <=> comparison which orders by position.
451
+ # Nodes at the same position but wrapping different backend nodes are
452
+ # equal according to <=> (positional equality) but not equal according to == (identity equality).
453
+ #
454
+ # Note: We override Comparable's default == behavior to check inner_node identity
455
+ # rather than just relying on <=> returning 0, because we want identity-based
456
+ # equality for testing and collection membership, not position-based equality.
457
+ #
458
+ # @param other [Object] object to compare with
459
+ # @return [Boolean] true if both nodes wrap the same inner_node
460
+ def ==(other)
461
+ return false unless other.is_a?(Node)
462
+ @inner_node == other.inner_node
463
+ end
464
+
465
+ # Alias for == to support both styles
466
+ alias_method :eql?, :==
467
+
468
+ # Generate hash value for this node
469
+ #
470
+ # Uses the hash of the inner_node to ensure nodes wrapping the same
471
+ # backend node have the same hash value.
472
+ #
473
+ # @return [Integer] hash value
474
+ def hash
475
+ @inner_node.hash
476
+ end
477
+
478
+ # Check if node responds to a method (includes delegation to inner_node)
479
+ #
480
+ # @param method_name [Symbol] method to check
481
+ # @param include_private [Boolean] include private methods
482
+ # @return [Boolean]
483
+ def respond_to_missing?(method_name, include_private = false)
484
+ @inner_node.respond_to?(method_name, include_private) || super
485
+ end
486
+
487
+ # Delegate unknown methods to the underlying backend-specific node
488
+ #
489
+ # This provides passthrough access for advanced usage when you need
490
+ # backend-specific features not exposed by TreeHaver's unified API.
491
+ #
492
+ # The delegation is automatic and transparent - you can call backend-specific
493
+ # methods directly on the TreeHaver::Node and they'll be forwarded to the
494
+ # underlying node implementation.
495
+ #
496
+ # @param method_name [Symbol] method to call
497
+ # @param args [Array] arguments to pass
498
+ # @param block [Proc] block to pass
499
+ # @return [Object] result from the underlying node
500
+ #
501
+ # @example Using TreeStump-specific methods
502
+ # # These methods don't exist in the unified API but are in TreeStump
503
+ # node.grammar_name # => "toml" (delegated to inner_node)
504
+ # node.grammar_id # => Integer (delegated to inner_node)
505
+ # node.kind_id # => Integer (delegated to inner_node)
506
+ #
507
+ # @example Safe usage with respond_to? check
508
+ # if node.respond_to?(:grammar_name)
509
+ # puts "Using #{node.grammar_name} grammar"
510
+ # end
511
+ #
512
+ # @example Equivalent explicit access
513
+ # node.grammar_name # Via passthrough (method_missing)
514
+ # node.inner_node.grammar_name # Explicit access (same result)
515
+ #
516
+ # @note This maintains backward compatibility with code written for
517
+ # specific backends while providing the benefits of the unified API
518
+ def method_missing(method_name, *args, **kwargs, &block)
519
+ if @inner_node.respond_to?(method_name)
520
+ @inner_node.public_send(method_name, *args, **kwargs, &block)
521
+ else
522
+ super
523
+ end
524
+ end
525
+ end
526
+ end
@@ -60,7 +60,7 @@ module TreeHaver
60
60
  # Pattern for valid symbol names (C identifier format)
61
61
  VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
62
62
 
63
- @custom_trusted_directories = []
63
+ @custom_trusted_directories = [] # rubocop:disable ThreadSafety/MutableClassInstanceVariable
64
64
  @mutex = Mutex.new
65
65
 
66
66
  module_function
@@ -75,18 +75,15 @@ module TreeHaver
75
75
  @mutex.synchronize { dirs.concat(@custom_trusted_directories) }
76
76
 
77
77
  # Add directories from environment variable
78
- env_dirs = ENV[TRUSTED_DIRS_ENV_VAR]
79
- if env_dirs
80
- env_dirs.split(",").each do |dir|
81
- expanded = File.expand_path(dir.strip)
82
- # :nocov:
83
- # File.expand_path always returns absolute paths on Unix/macOS.
84
- # This guard exists for defensive programming on exotic platforms
85
- # where expand_path might behave differently, but cannot be tested
86
- # in standard CI environments.
87
- dirs << expanded if expanded.start_with?("/")
88
- # :nocov:
89
- end
78
+ ENV[TRUSTED_DIRS_ENV_VAR]&.split(",")&.each do |dir|
79
+ expanded = File.expand_path(dir.strip)
80
+ # :nocov:
81
+ # File.expand_path always returns absolute paths on Unix/macOS.
82
+ # This guard exists for defensive programming on exotic platforms
83
+ # where expand_path might behave differently, but cannot be tested
84
+ # in standard CI environments.
85
+ dirs << expanded if expanded.start_with?("/")
86
+ # :nocov:
90
87
  end
91
88
 
92
89
  dirs.uniq
@@ -186,7 +183,8 @@ module TreeHaver
186
183
  return false if path.include?("/./") || path.end_with?("/.")
187
184
 
188
185
  # Validate extension
189
- return false unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
186
+ # Allow versioned .so files like .so.0, .so.14, etc. (common on Linux)
187
+ return false unless has_valid_extension?(path)
190
188
 
191
189
  # Validate filename portion
192
190
  filename = File.basename(path)
@@ -211,21 +209,29 @@ module TreeHaver
211
209
  return false if path.nil?
212
210
 
213
211
  # Resolve the real path to handle symlinks
214
- check_path = begin
215
- File.realpath(path)
216
- rescue Errno::ENOENT
217
- # File doesn't exist yet, check the directory
218
- dir = File.dirname(path)
219
- begin
220
- File.realpath(dir)
221
- rescue Errno::ENOENT
222
- return false
223
- end
224
- end
212
+ check_path = resolve_check_path(path)
213
+ return false if check_path.nil?
225
214
 
226
215
  trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
227
216
  end
228
217
 
218
+ # Resolve a path to its real path for trust checking
219
+ #
220
+ # @param path [String] the path to resolve
221
+ # @return [String, nil] the resolved path or nil if unresolvable
222
+ # @api private
223
+ def resolve_check_path(path)
224
+ File.realpath(path)
225
+ rescue Errno::ENOENT
226
+ # File doesn't exist yet, check the directory
227
+ dir = File.dirname(path)
228
+ begin
229
+ File.realpath(dir)
230
+ rescue Errno::ENOENT
231
+ nil
232
+ end
233
+ end
234
+
229
235
  # Validate a language name is safe
230
236
  #
231
237
  # Language names are used to construct:
@@ -312,8 +318,8 @@ module TreeHaver
312
318
  errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
313
319
  errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
314
320
 
315
- unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
316
- errors << "Path does not have allowed extension (#{ALLOWED_EXTENSIONS.join(", ")})"
321
+ unless has_valid_extension?(path)
322
+ errors << "Path does not have allowed extension (.so, .so.X, .dylib, .dll)"
317
323
  end
318
324
 
319
325
  filename = File.basename(path)
@@ -329,5 +335,19 @@ module TreeHaver
329
335
  # Match Windows absolute paths like C:\path or D:/path
330
336
  path.match?(/\A[A-Za-z]:[\\\/]/)
331
337
  end
338
+
339
+ # @api private
340
+ # Check if path has a valid library extension
341
+ # Allows: .so, .dylib, .dll, and versioned .so files like .so.0, .so.14
342
+ def has_valid_extension?(path)
343
+ # Check for exact matches first (.so, .dylib, .dll)
344
+ return true if ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
345
+
346
+ # Check for versioned .so files (Linux convention)
347
+ # e.g., libtree-sitter.so.0, libtree-sitter.so.14
348
+ return true if path.match?(/\.so\.\d+\z/)
349
+
350
+ false
351
+ end
332
352
  end
333
353
  end