tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,579 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Unified Node wrapper providing a consistent API across all backends
5
- #
6
- # This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
7
- # and provides a unified interface so code works identically regardless of which backend
8
- # is being used.
9
- #
10
- # The wrapper automatically maps backend differences:
11
- # - TreeStump uses `node.kind` → mapped to `node.type`
12
- # - TreeStump uses `node.is_named?` → mapped to `node.named?`
13
- # - All backends return consistent Point objects from position methods
14
- #
15
- # @example Basic node traversal
16
- # tree = parser.parse(source)
17
- # root = tree.root_node
18
- #
19
- # puts root.type # => "document"
20
- # puts root.start_byte # => 0
21
- # puts root.text # => full source text
22
- #
23
- # root.children.each do |child|
24
- # puts "#{child.type} at line #{child.start_point.row + 1}"
25
- # end
26
- #
27
- # @example Position information
28
- # node = tree.root_node.children.first
29
- #
30
- # # Point objects work as both objects and hashes
31
- # point = node.start_point
32
- # point.row # => 0 (method access)
33
- # point[:row] # => 0 (hash access)
34
- # point.column # => 0
35
- #
36
- # # Byte offsets
37
- # node.start_byte # => 0
38
- # node.end_byte # => 23
39
- #
40
- # @example Error detection
41
- # if node.has_error?
42
- # puts "Parse error in subtree"
43
- # end
44
- #
45
- # if node.missing?
46
- # puts "This node was inserted by error recovery"
47
- # end
48
- #
49
- # @example Accessing backend-specific features
50
- # # Via passthrough (method_missing delegates to inner_node)
51
- # node.grammar_name # TreeStump-specific, automatically delegated
52
- #
53
- # # Or explicitly via inner_node
54
- # node.inner_node.grammar_name # Same result
55
- #
56
- # # Check if backend supports a feature
57
- # if node.inner_node.respond_to?(:some_feature)
58
- # node.some_feature
59
- # end
60
- #
61
- # @note This is the key to tree_haver's "write once, run anywhere" promise
62
- class Node < Base::Node
63
- # The wrapped backend-specific node object
64
- #
65
- # This provides direct access to the underlying backend node for advanced usage
66
- # when you need backend-specific features not exposed by the unified API.
67
- #
68
- # @return [Object] The underlying node (TreeSitter::Node, TreeStump::Node, etc.)
69
- # @example Accessing backend-specific methods
70
- # # TreeStump-specific: grammar information
71
- # if node.inner_node.respond_to?(:grammar_name)
72
- # puts node.inner_node.grammar_name # => "toml"
73
- # puts node.inner_node.grammar_id # => Integer
74
- # end
75
- #
76
- # # Check backend type
77
- # case node.inner_node.class.name
78
- # when /TreeStump/
79
- # # TreeStump-specific code
80
- # when /TreeSitter/
81
- # # ruby_tree_sitter-specific code
82
- # end
83
- # NOTE: inner_node is inherited from Base::Node
84
-
85
- # The source text for text extraction
86
- # @return [String]
87
- # NOTE: source is inherited from Base::Node
88
-
89
- # @param node [Object] Backend-specific node object
90
- # @param source [String] Source text for text extraction
91
- def initialize(node, source: nil)
92
- super(node, source: source)
93
- end
94
-
95
- # Get the node's type/kind as a string
96
- #
97
- # Maps backend-specific methods to a unified API:
98
- # - ruby_tree_sitter: node.type
99
- # - tree_stump: node.kind
100
- # - FFI: node.type
101
- #
102
- # @return [String] The node type
103
- def type
104
- if @inner_node.respond_to?(:type)
105
- @inner_node.type.to_s
106
- elsif @inner_node.respond_to?(:kind)
107
- @inner_node.kind.to_s
108
- else
109
- raise TreeHaver::Error, "Backend node does not support type/kind"
110
- end
111
- end
112
-
113
- # Alias for type (tree_stump compatibility)
114
- #
115
- # tree_stump uses `kind` instead of `type` for node types.
116
- # This method delegates to `type` so either can be used.
117
- #
118
- # @return [String] The node type
119
- def kind
120
- type
121
- end
122
-
123
- def start_byte
124
- @inner_node.start_byte
125
- end
126
-
127
- # Get the node's end byte offset
128
- # @return [Integer]
129
- def end_byte
130
- @inner_node.end_byte
131
- end
132
-
133
- # Get the node's start position (row, column)
134
- #
135
- # @return [Point] with row and column accessors (also works as Hash)
136
- def start_point
137
- if @inner_node.respond_to?(:start_point)
138
- point = @inner_node.start_point
139
- # Handle both Point objects and hashes
140
- if point.is_a?(Hash)
141
- Point.new(point[:row], point[:column])
142
- else
143
- Point.new(point.row, point.column)
144
- end
145
- elsif @inner_node.respond_to?(:start_position)
146
- point = @inner_node.start_position
147
- # Handle both Point objects and hashes
148
- if point.is_a?(Hash)
149
- Point.new(point[:row], point[:column])
150
- else
151
- Point.new(point.row, point.column)
152
- end
153
- else
154
- raise TreeHaver::Error, "Backend node does not support start_point/start_position"
155
- end
156
- end
157
-
158
- # Get the node's end position (row, column)
159
- #
160
- # @return [Point] with row and column accessors (also works as Hash)
161
- def end_point
162
- if @inner_node.respond_to?(:end_point)
163
- point = @inner_node.end_point
164
- # Handle both Point objects and hashes
165
- if point.is_a?(Hash)
166
- Point.new(point[:row], point[:column])
167
- else
168
- Point.new(point.row, point.column)
169
- end
170
- elsif @inner_node.respond_to?(:end_position)
171
- point = @inner_node.end_position
172
- # Handle both Point objects and hashes
173
- if point.is_a?(Hash)
174
- Point.new(point[:row], point[:column])
175
- else
176
- Point.new(point.row, point.column)
177
- end
178
- else
179
- raise TreeHaver::Error, "Backend node does not support end_point/end_position"
180
- end
181
- end
182
-
183
- # Get the 1-based line number where this node starts
184
- #
185
- # Convenience method that converts 0-based row to 1-based line number.
186
- # This is useful for error messages and matching with editor line numbers.
187
- #
188
- # @return [Integer] 1-based line number
189
- def start_line
190
- start_point.row + 1
191
- end
192
-
193
- # Get the 1-based line number where this node ends
194
- #
195
- # Convenience method that converts 0-based row to 1-based line number.
196
- #
197
- # @return [Integer] 1-based line number
198
- def end_line
199
- end_point.row + 1
200
- end
201
-
202
- # Get position information as a hash
203
- #
204
- # Returns a hash with 1-based line numbers and 0-based columns.
205
- # This format is compatible with *-merge gems' FileAnalysisBase.
206
- #
207
- # @return [Hash{Symbol => Integer}] Position hash
208
- # @example
209
- # node.source_position
210
- # # => { start_line: 1, end_line: 3, start_column: 0, end_column: 10 }
211
- def source_position
212
- {
213
- start_line: start_line,
214
- end_line: end_line,
215
- start_column: start_point.column,
216
- end_column: end_point.column,
217
- }
218
- end
219
-
220
- # Get the first child node
221
- #
222
- # Convenience method for iteration patterns that expect first_child.
223
- #
224
- # @return [Node, nil] First child node or nil if no children
225
- def first_child
226
- child(0)
227
- end
228
-
229
- # Get the node's text content
230
- #
231
- # @return [String]
232
- def text
233
- if @inner_node.respond_to?(:text)
234
- # Some backends (like TreeStump) require source as argument
235
- # Check arity to determine how to call
236
- arity = @inner_node.method(:text).arity
237
- if arity == 0 || arity == -1
238
- # No required arguments, or optional arguments only
239
- @inner_node.text
240
- elsif arity >= 1 && @source
241
- # Has required argument(s) - pass source
242
- @inner_node.text(@source)
243
- elsif @source
244
- # Fallback to byte extraction
245
- @source[start_byte...end_byte] || ""
246
- else
247
- raise TreeHaver::Error, "Cannot extract text: backend requires source but none provided"
248
- end
249
- elsif @source
250
- # Fallback: extract from source using byte positions
251
- @source[start_byte...end_byte] || ""
252
- else
253
- raise TreeHaver::Error, "Cannot extract text: node has no text method and no source provided"
254
- end
255
- end
256
-
257
- # Check if the node has an error
258
- # @return [Boolean]
259
- def has_error?
260
- @inner_node.has_error?
261
- end
262
-
263
- # Check if the node is missing
264
- # @return [Boolean]
265
- def missing?
266
- @inner_node.missing?
267
- end
268
-
269
- # Check if the node is named
270
- # @return [Boolean]
271
- def named?
272
- if @inner_node.respond_to?(:named?)
273
- @inner_node.named?
274
- elsif @inner_node.respond_to?(:is_named?)
275
- @inner_node.is_named?
276
- else
277
- true # Default to true if not supported
278
- end
279
- end
280
-
281
- # Check if the node is structural (non-terminal)
282
- #
283
- # In tree-sitter, this is equivalent to being a "named" node.
284
- # Named nodes represent actual syntactic constructs (e.g., table, keyvalue, string)
285
- # while anonymous nodes are syntax/punctuation (e.g., [, =, whitespace).
286
- #
287
- # For Citrus backends, this checks if the node is a non-terminal rule.
288
- #
289
- # @return [Boolean] true if this is a structural (non-terminal) node
290
- def structural?
291
- # Delegate to inner_node if it has its own structural? method (e.g., Citrus)
292
- if @inner_node.respond_to?(:structural?)
293
- @inner_node.structural?
294
- else
295
- # For tree-sitter backends, named? is equivalent to structural?
296
- # Named nodes are syntactic constructs; anonymous nodes are punctuation
297
- named?
298
- end
299
- end
300
-
301
- # Get the number of children
302
- # @return [Integer]
303
- def child_count
304
- @inner_node.child_count
305
- end
306
-
307
- # Get a child by index
308
- #
309
- # @param index [Integer] Child index
310
- # @return [Node, nil] Wrapped child node, or nil if index out of bounds
311
- def child(index)
312
- child_node = @inner_node.child(index)
313
- return if child_node.nil?
314
- Node.new(child_node, source: @source)
315
- rescue IndexError
316
- # Some backends (e.g., MRI w/ ruby_tree_sitter) raise IndexError for out of bounds
317
- nil
318
- end
319
-
320
- # Get a named child by index
321
- #
322
- # Returns the nth named child (skipping unnamed children).
323
- # Uses backend's native named_child if available, otherwise provides fallback.
324
- #
325
- # @param index [Integer] Named child index (0-based)
326
- # @return [Node, nil] Wrapped named child node, or nil if index out of bounds
327
- def named_child(index)
328
- # Try native implementation first
329
- if @inner_node.respond_to?(:named_child)
330
- child_node = @inner_node.named_child(index)
331
- return if child_node.nil?
332
- return Node.new(child_node, source: @source)
333
- end
334
-
335
- # Fallback: manually iterate through children and count named ones
336
- named_count = 0
337
- (0...child_count).each do |i|
338
- child_node = @inner_node.child(i)
339
- next if child_node.nil?
340
-
341
- # Check if this child is named
342
- is_named = if child_node.respond_to?(:named?)
343
- child_node.named?
344
- elsif child_node.respond_to?(:is_named?)
345
- child_node.is_named?
346
- else
347
- true # Assume named if we can't determine
348
- end
349
-
350
- if is_named
351
- return Node.new(child_node, source: @source) if named_count == index
352
- named_count += 1
353
- end
354
- end
355
-
356
- nil # Index out of bounds
357
- end
358
-
359
- # Get the count of named children
360
- #
361
- # Uses backend's native named_child_count if available, otherwise provides fallback.
362
- #
363
- # @return [Integer] Number of named children
364
- def named_child_count
365
- # Try native implementation first
366
- if @inner_node.respond_to?(:named_child_count)
367
- return @inner_node.named_child_count
368
- end
369
-
370
- # Fallback: count named children manually
371
- count = 0
372
- (0...child_count).each do |i|
373
- child_node = @inner_node.child(i)
374
- next if child_node.nil?
375
-
376
- # Check if this child is named
377
- is_named = if child_node.respond_to?(:named?)
378
- child_node.named?
379
- elsif child_node.respond_to?(:is_named?)
380
- child_node.is_named?
381
- else
382
- true # Assume named if we can't determine
383
- end
384
-
385
- count += 1 if is_named
386
- end
387
-
388
- count
389
- end
390
-
391
- # Get all children as wrapped nodes
392
- #
393
- # @return [Array<Node>] Array of wrapped child nodes
394
- def children
395
- (0...child_count).map { |i| child(i) }.compact
396
- end
397
-
398
- # Get named children only
399
- #
400
- # @return [Array<Node>] Array of named child nodes
401
- def named_children
402
- children.select(&:named?)
403
- end
404
-
405
- # Iterate over children
406
- #
407
- # @yield [Node] Each child node
408
- # @return [Enumerator, nil]
409
- def each(&block)
410
- return to_enum(__method__) unless block_given?
411
- children.each(&block)
412
- end
413
-
414
- # Get a child by field name
415
- #
416
- # @param name [String, Symbol] Field name
417
- # @return [Node, nil] The child node for that field
418
- def child_by_field_name(name)
419
- if @inner_node.respond_to?(:child_by_field_name)
420
- child_node = @inner_node.child_by_field_name(name.to_s)
421
- return if child_node.nil?
422
- Node.new(child_node, source: @source)
423
- else
424
- # Not all backends support field names
425
- nil
426
- end
427
- end
428
-
429
- # Alias for child_by_field_name
430
- alias_method :field, :child_by_field_name
431
-
432
- # Get the parent node
433
- #
434
- # @return [Node, nil] The parent node
435
- def parent
436
- return unless @inner_node.respond_to?(:parent)
437
- parent_node = @inner_node.parent
438
- return if parent_node.nil?
439
- Node.new(parent_node, source: @source)
440
- end
441
-
442
- # Get next sibling
443
- #
444
- # @return [Node, nil]
445
- def next_sibling
446
- return unless @inner_node.respond_to?(:next_sibling)
447
- sibling = @inner_node.next_sibling
448
- return if sibling.nil?
449
- Node.new(sibling, source: @source)
450
- end
451
-
452
- # Get previous sibling
453
- #
454
- # @return [Node, nil]
455
- def prev_sibling
456
- return unless @inner_node.respond_to?(:prev_sibling)
457
- sibling = @inner_node.prev_sibling
458
- return if sibling.nil?
459
- Node.new(sibling, source: @source)
460
- end
461
-
462
- # String representation for debugging
463
- # @return [String]
464
- def inspect
465
- "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
466
- end
467
-
468
- # String representation
469
- # @return [String]
470
- def to_s
471
- text
472
- end
473
-
474
- # Compare nodes for ordering (used by Comparable module)
475
- #
476
- # Nodes are ordered by their position in the source:
477
- # 1. First by start_byte (earlier nodes come first)
478
- # 2. Then by end_byte for tie-breaking (shorter spans come first)
479
- # 3. Then by type for deterministic ordering
480
- #
481
- # This allows nodes to be sorted by position and used in sorted collections.
482
- # The Comparable module provides <, <=, ==, >=, >, and between? based on this.
483
- #
484
- # @param other [Node] node to compare with
485
- # @return [Integer, nil] -1, 0, 1, or nil if not comparable
486
- def <=>(other)
487
- return unless other.is_a?(Node)
488
-
489
- # Compare by position first (start_byte, then end_byte)
490
- cmp = start_byte <=> other.start_byte
491
- return cmp if cmp.nonzero?
492
-
493
- cmp = end_byte <=> other.end_byte
494
- return cmp if cmp.nonzero?
495
-
496
- # For nodes at the same position with same span, compare by type
497
- type <=> other.type
498
- end
499
-
500
- # Check equality based on inner_node identity
501
- #
502
- # Two nodes are equal if they wrap the same backend node object.
503
- # This is separate from the <=> comparison which orders by position.
504
- # Nodes at the same position but wrapping different backend nodes are
505
- # equal according to <=> (positional equality) but not equal according to == (identity equality).
506
- #
507
- # Note: We override Comparable's default == behavior to check inner_node identity
508
- # rather than just relying on <=> returning 0, because we want identity-based
509
- # equality for testing and collection membership, not position-based equality.
510
- #
511
- # @param other [Object] object to compare with
512
- # @return [Boolean] true if both nodes wrap the same inner_node
513
- def ==(other)
514
- return false unless other.is_a?(Node)
515
- @inner_node == other.inner_node
516
- end
517
-
518
- # Alias for == to support both styles
519
- alias_method :eql?, :==
520
-
521
- # Generate hash value for this node
522
- #
523
- # Uses the hash of the inner_node to ensure nodes wrapping the same
524
- # backend node have the same hash value.
525
- #
526
- # @return [Integer] hash value
527
- def hash
528
- @inner_node.hash
529
- end
530
-
531
- # Check if node responds to a method (includes delegation to inner_node)
532
- #
533
- # @param method_name [Symbol] method to check
534
- # @param include_private [Boolean] include private methods
535
- # @return [Boolean]
536
- def respond_to_missing?(method_name, include_private = false)
537
- @inner_node.respond_to?(method_name, include_private) || super
538
- end
539
-
540
- # Delegate unknown methods to the underlying backend-specific node
541
- #
542
- # This provides passthrough access for advanced usage when you need
543
- # backend-specific features not exposed by TreeHaver's unified API.
544
- #
545
- # The delegation is automatic and transparent - you can call backend-specific
546
- # methods directly on the TreeHaver::Node and they'll be forwarded to the
547
- # underlying node implementation.
548
- #
549
- # @param method_name [Symbol] method to call
550
- # @param args [Array] arguments to pass
551
- # @param block [Proc] block to pass
552
- # @return [Object] result from the underlying node
553
- #
554
- # @example Using TreeStump-specific methods
555
- # # These methods don't exist in the unified API but are in TreeStump
556
- # node.grammar_name # => "toml" (delegated to inner_node)
557
- # node.grammar_id # => Integer (delegated to inner_node)
558
- # node.kind_id # => Integer (delegated to inner_node)
559
- #
560
- # @example Safe usage with respond_to? check
561
- # if node.respond_to?(:grammar_name)
562
- # puts "Using #{node.grammar_name} grammar"
563
- # end
564
- #
565
- # @example Equivalent explicit access
566
- # node.grammar_name # Via passthrough (method_missing)
567
- # node.inner_node.grammar_name # Explicit access (same result)
568
- #
569
- # @note This maintains backward compatibility with code written for
570
- # specific backends while providing the benefits of the unified API
571
- def method_missing(method_name, *args, **kwargs, &block)
572
- if @inner_node.respond_to?(method_name)
573
- @inner_node.public_send(method_name, *args, **kwargs, &block)
574
- else
575
- super
576
- end
577
- end
578
- end
579
- end