tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,6 +94,8 @@ module TreeHaver
94
94
  #
95
95
  # @note This is the key to tree_haver's "write once, run anywhere" promise
96
96
  class Node
97
+ include Comparable
98
+
97
99
  # The wrapped backend-specific node object
98
100
  #
99
101
  # This provides direct access to the underlying backend node for advanced usage
@@ -163,10 +165,20 @@ module TreeHaver
163
165
  def start_point
164
166
  if @inner_node.respond_to?(:start_point)
165
167
  point = @inner_node.start_point
166
- Point.new(point.row, point.column)
168
+ # Handle both Point objects and hashes
169
+ if point.is_a?(Hash)
170
+ Point.new(point[:row], point[:column])
171
+ else
172
+ Point.new(point.row, point.column)
173
+ end
167
174
  elsif @inner_node.respond_to?(:start_position)
168
175
  point = @inner_node.start_position
169
- Point.new(point.row, point.column)
176
+ # Handle both Point objects and hashes
177
+ if point.is_a?(Hash)
178
+ Point.new(point[:row], point[:column])
179
+ else
180
+ Point.new(point.row, point.column)
181
+ end
170
182
  else
171
183
  raise TreeHaver::Error, "Backend node does not support start_point/start_position"
172
184
  end
@@ -178,15 +190,71 @@ module TreeHaver
178
190
  def end_point
179
191
  if @inner_node.respond_to?(:end_point)
180
192
  point = @inner_node.end_point
181
- Point.new(point.row, point.column)
193
+ # Handle both Point objects and hashes
194
+ if point.is_a?(Hash)
195
+ Point.new(point[:row], point[:column])
196
+ else
197
+ Point.new(point.row, point.column)
198
+ end
182
199
  elsif @inner_node.respond_to?(:end_position)
183
200
  point = @inner_node.end_position
184
- Point.new(point.row, point.column)
201
+ # Handle both Point objects and hashes
202
+ if point.is_a?(Hash)
203
+ Point.new(point[:row], point[:column])
204
+ else
205
+ Point.new(point.row, point.column)
206
+ end
185
207
  else
186
208
  raise TreeHaver::Error, "Backend node does not support end_point/end_position"
187
209
  end
188
210
  end
189
211
 
212
+ # Get the 1-based line number where this node starts
213
+ #
214
+ # Convenience method that converts 0-based row to 1-based line number.
215
+ # This is useful for error messages and matching with editor line numbers.
216
+ #
217
+ # @return [Integer] 1-based line number
218
+ def start_line
219
+ start_point.row + 1
220
+ end
221
+
222
+ # Get the 1-based line number where this node ends
223
+ #
224
+ # Convenience method that converts 0-based row to 1-based line number.
225
+ #
226
+ # @return [Integer] 1-based line number
227
+ def end_line
228
+ end_point.row + 1
229
+ end
230
+
231
+ # Get position information as a hash
232
+ #
233
+ # Returns a hash with 1-based line numbers and 0-based columns.
234
+ # This format is compatible with *-merge gems' FileAnalysisBase.
235
+ #
236
+ # @return [Hash{Symbol => Integer}] Position hash
237
+ # @example
238
+ # node.source_position
239
+ # # => { start_line: 1, end_line: 3, start_column: 0, end_column: 10 }
240
+ def source_position
241
+ {
242
+ start_line: start_line,
243
+ end_line: end_line,
244
+ start_column: start_point.column,
245
+ end_column: end_point.column,
246
+ }
247
+ end
248
+
249
+ # Get the first child node
250
+ #
251
+ # Convenience method for iteration patterns that expect first_child.
252
+ #
253
+ # @return [Node, nil] First child node or nil if no children
254
+ def first_child
255
+ child(0)
256
+ end
257
+
190
258
  # Get the node's text content
191
259
  #
192
260
  # @return [String]
@@ -226,6 +294,26 @@ module TreeHaver
226
294
  end
227
295
  end
228
296
 
297
+ # Check if the node is structural (non-terminal)
298
+ #
299
+ # In tree-sitter, this is equivalent to being a "named" node.
300
+ # Named nodes represent actual syntactic constructs (e.g., table, keyvalue, string)
301
+ # while anonymous nodes are syntax/punctuation (e.g., [, =, whitespace).
302
+ #
303
+ # For Citrus backends, this checks if the node is a non-terminal rule.
304
+ #
305
+ # @return [Boolean] true if this is a structural (non-terminal) node
306
+ def structural?
307
+ # Delegate to inner_node if it has its own structural? method (e.g., Citrus)
308
+ if @inner_node.respond_to?(:structural?)
309
+ @inner_node.structural?
310
+ else
311
+ # For tree-sitter backends, named? is equivalent to structural?
312
+ # Named nodes are syntactic constructs; anonymous nodes are punctuation
313
+ named?
314
+ end
315
+ end
316
+
229
317
  # Get the number of children
230
318
  # @return [Integer]
231
319
  def child_count
@@ -242,6 +330,77 @@ module TreeHaver
242
330
  Node.new(child_node, source: @source)
243
331
  end
244
332
 
333
+ # Get a named child by index
334
+ #
335
+ # Returns the nth named child (skipping unnamed children).
336
+ # Uses backend's native named_child if available, otherwise provides fallback.
337
+ #
338
+ # @param index [Integer] Named child index (0-based)
339
+ # @return [Node, nil] Wrapped named child node, or nil if index out of bounds
340
+ def named_child(index)
341
+ # Try native implementation first
342
+ if @inner_node.respond_to?(:named_child)
343
+ child_node = @inner_node.named_child(index)
344
+ return if child_node.nil?
345
+ return Node.new(child_node, source: @source)
346
+ end
347
+
348
+ # Fallback: manually iterate through children and count named ones
349
+ named_count = 0
350
+ (0...child_count).each do |i|
351
+ child_node = @inner_node.child(i)
352
+ next if child_node.nil?
353
+
354
+ # Check if this child is named
355
+ is_named = if child_node.respond_to?(:named?)
356
+ child_node.named?
357
+ elsif child_node.respond_to?(:is_named?)
358
+ child_node.is_named?
359
+ else
360
+ true # Assume named if we can't determine
361
+ end
362
+
363
+ if is_named
364
+ return Node.new(child_node, source: @source) if named_count == index
365
+ named_count += 1
366
+ end
367
+ end
368
+
369
+ nil # Index out of bounds
370
+ end
371
+
372
+ # Get the count of named children
373
+ #
374
+ # Uses backend's native named_child_count if available, otherwise provides fallback.
375
+ #
376
+ # @return [Integer] Number of named children
377
+ def named_child_count
378
+ # Try native implementation first
379
+ if @inner_node.respond_to?(:named_child_count)
380
+ return @inner_node.named_child_count
381
+ end
382
+
383
+ # Fallback: count named children manually
384
+ count = 0
385
+ (0...child_count).each do |i|
386
+ child_node = @inner_node.child(i)
387
+ next if child_node.nil?
388
+
389
+ # Check if this child is named
390
+ is_named = if child_node.respond_to?(:named?)
391
+ child_node.named?
392
+ elsif child_node.respond_to?(:is_named?)
393
+ child_node.is_named?
394
+ else
395
+ true # Assume named if we can't determine
396
+ end
397
+
398
+ count += 1 if is_named
399
+ end
400
+
401
+ count
402
+ end
403
+
245
404
  # Get all children as wrapped nodes
246
405
  #
247
406
  # @return [Array<Node>] Array of wrapped child nodes
@@ -325,6 +484,63 @@ module TreeHaver
325
484
  text
326
485
  end
327
486
 
487
+ # Compare nodes for ordering (used by Comparable module)
488
+ #
489
+ # Nodes are ordered by their position in the source:
490
+ # 1. First by start_byte (earlier nodes come first)
491
+ # 2. Then by end_byte for tie-breaking (shorter spans come first)
492
+ # 3. Then by type for deterministic ordering
493
+ #
494
+ # This allows nodes to be sorted by position and used in sorted collections.
495
+ # The Comparable module provides <, <=, ==, >=, >, and between? based on this.
496
+ #
497
+ # @param other [Node] node to compare with
498
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
499
+ def <=>(other)
500
+ return unless other.is_a?(Node)
501
+
502
+ # Compare by position first (start_byte, then end_byte)
503
+ cmp = start_byte <=> other.start_byte
504
+ return cmp if cmp.nonzero?
505
+
506
+ cmp = end_byte <=> other.end_byte
507
+ return cmp if cmp.nonzero?
508
+
509
+ # For nodes at the same position with same span, compare by type
510
+ type <=> other.type
511
+ end
512
+
513
+ # Check equality based on inner_node identity
514
+ #
515
+ # Two nodes are equal if they wrap the same backend node object.
516
+ # This is separate from the <=> comparison which orders by position.
517
+ # Nodes at the same position but wrapping different backend nodes are
518
+ # equal according to <=> (positional equality) but not equal according to == (identity equality).
519
+ #
520
+ # Note: We override Comparable's default == behavior to check inner_node identity
521
+ # rather than just relying on <=> returning 0, because we want identity-based
522
+ # equality for testing and collection membership, not position-based equality.
523
+ #
524
+ # @param other [Object] object to compare with
525
+ # @return [Boolean] true if both nodes wrap the same inner_node
526
+ def ==(other)
527
+ return false unless other.is_a?(Node)
528
+ @inner_node == other.inner_node
529
+ end
530
+
531
+ # Alias for == to support both styles
532
+ alias_method :eql?, :==
533
+
534
+ # Generate hash value for this node
535
+ #
536
+ # Uses the hash of the inner_node to ensure nodes wrapping the same
537
+ # backend node have the same hash value.
538
+ #
539
+ # @return [Integer] hash value
540
+ def hash
541
+ @inner_node.hash
542
+ end
543
+
328
544
  # Check if node responds to a method (includes delegation to inner_node)
329
545
  #
330
546
  # @param method_name [Symbol] method to check
@@ -60,7 +60,7 @@ module TreeHaver
60
60
  # Pattern for valid symbol names (C identifier format)
61
61
  VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
62
62
 
63
- @custom_trusted_directories = []
63
+ @custom_trusted_directories = [] # rubocop:disable ThreadSafety/MutableClassInstanceVariable
64
64
  @mutex = Mutex.new
65
65
 
66
66
  module_function
@@ -75,18 +75,15 @@ module TreeHaver
75
75
  @mutex.synchronize { dirs.concat(@custom_trusted_directories) }
76
76
 
77
77
  # Add directories from environment variable
78
- env_dirs = ENV[TRUSTED_DIRS_ENV_VAR]
79
- if env_dirs
80
- env_dirs.split(",").each do |dir|
81
- expanded = File.expand_path(dir.strip)
82
- # :nocov:
83
- # File.expand_path always returns absolute paths on Unix/macOS.
84
- # This guard exists for defensive programming on exotic platforms
85
- # where expand_path might behave differently, but cannot be tested
86
- # in standard CI environments.
87
- dirs << expanded if expanded.start_with?("/")
88
- # :nocov:
89
- end
78
+ ENV[TRUSTED_DIRS_ENV_VAR]&.split(",")&.each do |dir|
79
+ expanded = File.expand_path(dir.strip)
80
+ # :nocov:
81
+ # File.expand_path always returns absolute paths on Unix/macOS.
82
+ # This guard exists for defensive programming on exotic platforms
83
+ # where expand_path might behave differently, but cannot be tested
84
+ # in standard CI environments.
85
+ dirs << expanded if expanded.start_with?("/")
86
+ # :nocov:
90
87
  end
91
88
 
92
89
  dirs.uniq
@@ -212,21 +209,29 @@ module TreeHaver
212
209
  return false if path.nil?
213
210
 
214
211
  # Resolve the real path to handle symlinks
215
- check_path = begin
216
- File.realpath(path)
217
- rescue Errno::ENOENT
218
- # File doesn't exist yet, check the directory
219
- dir = File.dirname(path)
220
- begin
221
- File.realpath(dir)
222
- rescue Errno::ENOENT
223
- return false
224
- end
225
- end
212
+ check_path = resolve_check_path(path)
213
+ return false if check_path.nil?
226
214
 
227
215
  trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
228
216
  end
229
217
 
218
+ # Resolve a path to its real path for trust checking
219
+ #
220
+ # @param path [String] the path to resolve
221
+ # @return [String, nil] the resolved path or nil if unresolvable
222
+ # @api private
223
+ def resolve_check_path(path)
224
+ File.realpath(path)
225
+ rescue Errno::ENOENT
226
+ # File doesn't exist yet, check the directory
227
+ dir = File.dirname(path)
228
+ begin
229
+ File.realpath(dir)
230
+ rescue Errno::ENOENT
231
+ nil
232
+ end
233
+ end
234
+
230
235
  # Validate a language name is safe
231
236
  #
232
237
  # Language names are used to construct:
@@ -6,6 +6,26 @@ module TreeHaver
6
6
  # This class wraps backend-specific tree objects and provides a unified interface.
7
7
  # It stores the source text to enable text extraction from nodes.
8
8
  #
9
+ # == Wrapping/Unwrapping Contract
10
+ #
11
+ # TreeHaver follows a consistent pattern for object wrapping:
12
+ #
13
+ # 1. **TreeHaver::Parser** (top level) handles ALL wrapping/unwrapping
14
+ # 2. **Backends** work exclusively with raw backend objects
15
+ # 3. **User-facing API** uses only TreeHaver wrapper classes
16
+ #
17
+ # Specifically for trees:
18
+ # - Backend Parser#parse returns raw backend tree (TreeSitter::Tree, TreeStump::Tree, etc.)
19
+ # - TreeHaver::Parser#parse wraps it in TreeHaver::Tree
20
+ # - TreeHaver::Parser#parse_string unwraps old_tree before passing to backend
21
+ # - Backend Parser#parse_string receives raw backend tree, returns raw backend tree
22
+ # - TreeHaver::Parser#parse_string wraps the returned tree
23
+ #
24
+ # This ensures:
25
+ # - Backends are simple and consistent
26
+ # - All complexity is in one place (TreeHaver top level)
27
+ # - Users always work with TreeHaver wrapper classes
28
+ #
9
29
  # @example Basic usage
10
30
  # parser = TreeHaver::Parser.new
11
31
  # parser.language = TreeHaver::Language.toml
@@ -107,14 +127,30 @@ module TreeHaver
107
127
  # # Re-parse with the edited tree for incremental parsing
108
128
  # new_tree = parser.parse_string(tree, "x = 42")
109
129
  def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
110
- @inner_tree.edit(
111
- start_byte: start_byte,
112
- old_end_byte: old_end_byte,
113
- new_end_byte: new_end_byte,
114
- start_point: start_point,
115
- old_end_point: old_end_point,
116
- new_end_point: new_end_point,
117
- )
130
+ # MRI backend (ruby_tree_sitter) requires an InputEdit object
131
+ if defined?(::TreeSitter::InputEdit) && @inner_tree.is_a?(::TreeSitter::Tree)
132
+ input_edit = ::TreeSitter::InputEdit.new
133
+ input_edit.start_byte = start_byte
134
+ input_edit.old_end_byte = old_end_byte
135
+ input_edit.new_end_byte = new_end_byte
136
+
137
+ # Convert hash points to Point objects if needed
138
+ input_edit.start_point = make_point(start_point)
139
+ input_edit.old_end_point = make_point(old_end_point)
140
+ input_edit.new_end_point = make_point(new_end_point)
141
+
142
+ @inner_tree.edit(input_edit)
143
+ else
144
+ # Other backends may accept keyword arguments directly
145
+ @inner_tree.edit(
146
+ start_byte: start_byte,
147
+ old_end_byte: old_end_byte,
148
+ new_end_byte: new_end_byte,
149
+ start_point: start_point,
150
+ old_end_point: old_end_point,
151
+ new_end_point: new_end_point,
152
+ )
153
+ end
118
154
  rescue NoMethodError => e
119
155
  # Re-raise as NotAvailable if it's about the edit method
120
156
  raise unless e.name == :edit || e.message.include?("edit")
@@ -123,6 +159,23 @@ module TreeHaver
123
159
  "Use MRI (ruby_tree_sitter), Rust (tree_stump), or Java (java-tree-sitter) backend."
124
160
  end
125
161
 
162
+ private
163
+
164
+ # Convert a point hash to a TreeSitter::Point if available
165
+ # @api private
166
+ def make_point(point_hash)
167
+ if defined?(::TreeSitter::Point)
168
+ pt = ::TreeSitter::Point.new
169
+ pt.row = point_hash[:row]
170
+ pt.column = point_hash[:column]
171
+ pt
172
+ else
173
+ point_hash
174
+ end
175
+ end
176
+
177
+ public
178
+
126
179
  # Check if the current backend supports incremental parsing
127
180
  #
128
181
  # Incremental parsing allows tree-sitter to reuse unchanged nodes when
@@ -151,7 +204,8 @@ module TreeHaver
151
204
  # String representation
152
205
  # @return [String]
153
206
  def inspect
154
- "#<#{self.class} source_length=#{@source&.bytesize || "unknown"}>"
207
+ inner_class = @inner_tree ? @inner_tree.class.name : "nil"
208
+ "#<#{self.class} source_length=#{@source&.bytesize || "unknown"} inner_tree=#{inner_class}>"
155
209
  end
156
210
 
157
211
  # Check if tree responds to a method (includes delegation to inner_tree)
@@ -9,8 +9,8 @@ module TreeHaver
9
9
  module Version
10
10
  # Current version of the tree_haver gem
11
11
  #
12
- # @return [String] the version string (e.g., "2.0.0")
13
- VERSION = "2.0.0"
12
+ # @return [String] the version string (e.g., "3.0.0")
13
+ VERSION = "3.1.0"
14
14
  end
15
15
 
16
16
  # Traditional location for VERSION constant