tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,1009 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Backends
5
- # FFI-based backend for calling libtree-sitter directly
6
- #
7
- # This backend uses Ruby FFI (JNR-FFI on JRuby) to call the native tree-sitter
8
- # C library without requiring MRI C extensions.
9
- #
10
- # The FFI backend currently supports:
11
- # - Parsing source code
12
- # - AST node traversal
13
- # - Accessing node types and children
14
- #
15
- # Not yet supported:
16
- # - Query API (tree-sitter queries/patterns)
17
- #
18
- # == Tree/Node Architecture
19
- #
20
- # This backend defines raw `FFI::Tree` and `FFI::Node` wrapper classes that
21
- # provide minimal FFI bindings to the tree-sitter C structs. These are **not**
22
- # intended for direct use by application code.
23
- #
24
- # The wrapping hierarchy is:
25
- # FFI::Tree/Node (raw FFI wrappers) → TreeHaver::Tree/Node → Base::Tree/Node
26
- #
27
- # When you use `TreeHaver::Parser#parse`:
28
- # 1. `FFI::Parser#parse` returns an `FFI::Tree` (raw pointer wrapper)
29
- # 2. `TreeHaver::Parser` wraps it in `TreeHaver::Tree` (adds source storage)
30
- # 3. `TreeHaver::Tree#root_node` wraps `FFI::Node` in `TreeHaver::Node`
31
- #
32
- # The `TreeHaver::Tree` and `TreeHaver::Node` wrappers provide the full unified
33
- # API including `#children`, `#text`, `#source`, `#source_position`, etc.
34
- #
35
- # This differs from pure-Ruby backends (Citrus, Parslet, Prism, Psych) which
36
- # define Tree/Node classes that directly inherit from Base::Tree/Base::Node.
37
- #
38
- # @see TreeHaver::Tree The wrapper class users should interact with
39
- # @see TreeHaver::Node The wrapper class users should interact with
40
- # @see TreeHaver::Base::Tree Base class documenting the Tree API contract
41
- # @see TreeHaver::Base::Node Base class documenting the Node API contract
42
- #
43
- # == Platform Compatibility
44
- #
45
- # - MRI Ruby: ✓ Full support
46
- # - JRuby: ✓ Full support (uses JNR-FFI)
47
- # - TruffleRuby: ✗ TruffleRuby's FFI doesn't support STRUCT_BY_VALUE return types
48
- # (used by ts_tree_root_node, ts_node_child, ts_node_start_point, etc.)
49
- #
50
- # @note Requires the `ffi` gem and libtree-sitter shared library to be installed
51
- # @see https://github.com/ffi/ffi Ruby FFI
52
- # @see https://tree-sitter.github.io/tree-sitter/ tree-sitter
53
- module FFI
54
- # Module-level availability and capability methods
55
- #
56
- # These methods provide API consistency with other backends.
57
- class << self
58
- # Check if the FFI backend is available
59
- #
60
- # The FFI backend requires:
61
- # - The ffi gem to be installed
62
- # - NOT running on TruffleRuby (STRUCT_BY_VALUE limitation)
63
- # - MRI backend (ruby_tree_sitter) not already loaded (symbol conflicts)
64
- #
65
- # @return [Boolean] true if FFI backend can be used
66
- # @example
67
- # if TreeHaver::Backends::FFI.available?
68
- # puts "FFI backend is ready"
69
- # end
70
- def available?
71
- return false unless ffi_gem_available?
72
-
73
- # Check if MRI backend has been loaded (which blocks FFI)
74
- !defined?(::TreeSitter::Parser)
75
- end
76
-
77
- # Check if the FFI gem can be loaded and is usable for tree-sitter
78
- #
79
- # @return [Boolean] true if FFI gem can be loaded and works with tree-sitter
80
- # @api private
81
- # @note Returns false on TruffleRuby because TruffleRuby's FFI doesn't support
82
- # STRUCT_BY_VALUE return types (used by ts_tree_root_node, ts_node_child, etc.)
83
- def ffi_gem_available?
84
- return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
85
- @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
86
-
87
- @loaded = begin # rubocop:disable ThreadSafety/ClassInstanceVariable
88
- # TruffleRuby's FFI doesn't support STRUCT_BY_VALUE return types
89
- # which tree-sitter uses extensively (ts_tree_root_node, ts_node_child, etc.)
90
- # :nocov: TruffleRuby returns false early - subsequent FFI code paths unreachable on TruffleRuby
91
- if RUBY_ENGINE == "truffleruby"
92
- false
93
- # :nocov:
94
- else
95
- require "ffi"
96
- true
97
- end
98
- rescue LoadError
99
- false
100
- # :nocov: defensive code - StandardError during require is extremely rare
101
- rescue StandardError
102
- false
103
- # :nocov:
104
- end
105
- @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
106
- end
107
-
108
- # Reset the load state (primarily for testing)
109
- #
110
- # @return [void]
111
- # @api private
112
- def reset!
113
- @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
114
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
115
- end
116
-
117
- # Get capabilities supported by this backend
118
- #
119
- # @return [Hash{Symbol => Object}] capability map
120
- # @example
121
- # TreeHaver::Backends::FFI.capabilities
122
- # # => { backend: :ffi, parse: true, query: false, bytes_field: true }
123
- def capabilities
124
- return {} unless available?
125
- {
126
- backend: :ffi,
127
- parse: true,
128
- query: false, # Query API not yet implemented in FFI backend
129
- bytes_field: true,
130
- incremental: false,
131
- }
132
- end
133
- end
134
-
135
- # Native FFI bindings to libtree-sitter
136
- #
137
- # This module handles loading the tree-sitter runtime library and defining
138
- # FFI function attachments for the core tree-sitter API.
139
- #
140
- # All FFI operations are lazy - nothing is loaded until actually needed.
141
- # This prevents polluting the Ruby environment at require time.
142
- #
143
- # @api private
144
- module Native
145
- class << self
146
- # Lazily extend with FFI::Library only when needed
147
- #
148
- # @return [Boolean] true if FFI was successfully extended
149
- def ensure_ffi_extended!
150
- return true if @ffi_extended
151
-
152
- unless FFI.ffi_gem_available?
153
- raise TreeHaver::NotAvailable, "FFI gem is not available"
154
- end
155
-
156
- extend(::FFI::Library)
157
-
158
- define_ts_point_struct!
159
- define_ts_node_struct!
160
- @ffi_extended = true
161
- end
162
-
163
- # Define the TSPoint struct lazily
164
- # @api private
165
- def define_ts_point_struct!
166
- return if const_defined?(:TSPoint, false)
167
-
168
- # FFI struct representation of TSPoint
169
- # Mirrors the C struct layout: struct { uint32_t row; uint32_t column; }
170
- ts_point_class = Class.new(::FFI::Struct) do
171
- layout :row,
172
- :uint32,
173
- :column,
174
- :uint32
175
- end
176
- const_set(:TSPoint, ts_point_class)
177
- typedef(ts_point_class.by_value, :ts_point)
178
- end
179
-
180
- # Define the TSNode struct lazily
181
- # @api private
182
- def define_ts_node_struct!
183
- return if const_defined?(:TSNode, false)
184
-
185
- # FFI struct representation of TSNode
186
- # Mirrors the C struct layout used by tree-sitter
187
- ts_node_class = Class.new(::FFI::Struct) do
188
- layout :context,
189
- [:uint32, 4],
190
- :id,
191
- :pointer,
192
- :tree,
193
- :pointer
194
- end
195
- const_set(:TSNode, ts_node_class)
196
- typedef(ts_node_class.by_value, :ts_node)
197
- end
198
-
199
- # Get the TSNode class, ensuring it's defined
200
- # @return [Class] the TSNode FFI struct class
201
- def ts_node_class
202
- ensure_ffi_extended!
203
- const_get(:TSNode)
204
- end
205
-
206
- # Get list of candidate library names for loading libtree-sitter
207
- #
208
- # The list is built dynamically to respect environment variables set at runtime.
209
- # If TREE_SITTER_RUNTIME_LIB is set, it is tried first.
210
- #
211
- # @note TREE_SITTER_LIB is intentionally NOT supported
212
- # @return [Array<String>] list of library names to try
213
- def lib_candidates
214
- [
215
- ENV["TREE_SITTER_RUNTIME_LIB"],
216
- "tree-sitter",
217
- "libtree-sitter.so.0",
218
- "libtree-sitter.so",
219
- "libtree-sitter.dylib",
220
- "libtree-sitter.dll",
221
- ].compact
222
- end
223
-
224
- # Load the tree-sitter runtime library
225
- #
226
- # Tries each candidate library name in order until one succeeds.
227
- # After loading, attaches FFI function definitions for the tree-sitter API.
228
- #
229
- # @raise [TreeHaver::NotAvailable] if no library can be loaded
230
- # @return [void]
231
- def try_load!
232
- return if @loaded
233
-
234
- ensure_ffi_extended!
235
-
236
- # Warn about potential conflicts with MRI backend
237
- if defined?(::TreeSitter) && defined?(::TreeSitter::Parser)
238
- warn("TreeHaver: FFI backend loading after ruby_tree_sitter (MRI backend). " \
239
- "This may cause symbol conflicts due to different libtree-sitter versions. " \
240
- "Consider using only one backend per process, or set TREE_SITTER_RUNTIME_LIB " \
241
- "to match the version used by ruby_tree_sitter.") if $VERBOSE
242
- end
243
-
244
- last_error = nil
245
- candidates = lib_candidates
246
- lib_loaded = false
247
- candidates.each do |name|
248
- ffi_lib(name)
249
- lib_loaded = true
250
- break
251
- rescue LoadError => e
252
- # Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
253
- last_error = e
254
- end
255
-
256
- unless lib_loaded
257
- # :nocov:
258
- tried = candidates.join(", ")
259
- env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
260
- msg = if last_error
261
- "Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
262
- else
263
- "Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
264
- end
265
- raise TreeHaver::NotAvailable, msg
266
- # :nocov:
267
- end
268
-
269
- # Attach functions after lib is selected
270
- # Note: TruffleRuby's FFI doesn't support STRUCT_BY_VALUE return types,
271
- # so these attach_function calls will fail on TruffleRuby.
272
- attach_function(:ts_parser_new, [], :pointer)
273
- attach_function(:ts_parser_delete, [:pointer], :void)
274
- attach_function(:ts_parser_set_language, [:pointer, :pointer], :bool)
275
- attach_function(:ts_parser_parse_string, [:pointer, :pointer, :string, :uint32], :pointer)
276
-
277
- attach_function(:ts_tree_delete, [:pointer], :void)
278
- attach_function(:ts_tree_root_node, [:pointer], :ts_node)
279
-
280
- attach_function(:ts_node_type, [:ts_node], :string)
281
- attach_function(:ts_node_child_count, [:ts_node], :uint32)
282
- attach_function(:ts_node_child, [:ts_node, :uint32], :ts_node)
283
- attach_function(:ts_node_child_by_field_name, [:ts_node, :string, :uint32], :ts_node)
284
- attach_function(:ts_node_start_byte, [:ts_node], :uint32)
285
- attach_function(:ts_node_end_byte, [:ts_node], :uint32)
286
- attach_function(:ts_node_start_point, [:ts_node], :ts_point)
287
- attach_function(:ts_node_end_point, [:ts_node], :ts_point)
288
- attach_function(:ts_node_is_null, [:ts_node], :bool)
289
- attach_function(:ts_node_is_named, [:ts_node], :bool)
290
- attach_function(:ts_node_is_missing, [:ts_node], :bool)
291
- attach_function(:ts_node_has_error, [:ts_node], :bool)
292
-
293
- # Node navigation functions
294
- attach_function(:ts_node_parent, [:ts_node], :ts_node)
295
- attach_function(:ts_node_next_sibling, [:ts_node], :ts_node)
296
- attach_function(:ts_node_prev_sibling, [:ts_node], :ts_node)
297
- attach_function(:ts_node_next_named_sibling, [:ts_node], :ts_node)
298
- attach_function(:ts_node_prev_named_sibling, [:ts_node], :ts_node)
299
- attach_function(:ts_node_named_child, [:ts_node, :uint32], :ts_node)
300
- attach_function(:ts_node_named_child_count, [:ts_node], :uint32)
301
-
302
- # Descendant lookup functions
303
- attach_function(:ts_node_descendant_for_byte_range, [:ts_node, :uint32, :uint32], :ts_node)
304
- attach_function(:ts_node_descendant_for_point_range, [:ts_node, :ts_point, :ts_point], :ts_node)
305
- attach_function(:ts_node_named_descendant_for_byte_range, [:ts_node, :uint32, :uint32], :ts_node)
306
- attach_function(:ts_node_named_descendant_for_point_range, [:ts_node, :ts_point, :ts_point], :ts_node)
307
-
308
- # Only mark as fully loaded after all attach_function calls succeed
309
- @loaded = true
310
- end
311
-
312
- def loaded?
313
- !!@loaded
314
- end
315
- end
316
- end
317
-
318
- # Represents a tree-sitter language loaded via FFI
319
- #
320
- # Holds a pointer to a TSLanguage struct from a loaded shared library.
321
- class Language
322
- include Comparable
323
-
324
- # The FFI pointer to the TSLanguage struct
325
- # @return [FFI::Pointer]
326
- attr_reader :pointer
327
-
328
- # The backend this language is for
329
- # @return [Symbol]
330
- attr_reader :backend
331
-
332
- # The path this language was loaded from (if known)
333
- # @return [String, nil]
334
- attr_reader :path
335
-
336
- # The symbol name (if known)
337
- # @return [String, nil]
338
- attr_reader :symbol
339
-
340
- # @api private
341
- # @param ptr [FFI::Pointer] pointer to TSLanguage
342
- # @param lib [FFI::DynamicLibrary, nil] the opened dynamic library
343
- # (kept as an instance variable to prevent it being GC'd/unloaded)
344
- # @param path [String, nil] path language was loaded from
345
- # @param symbol [String, nil] symbol name
346
- def initialize(ptr, lib = nil, path: nil, symbol: nil)
347
- @pointer = ptr
348
- @backend = :ffi
349
- @path = path
350
- @symbol = symbol
351
- # Keep a reference to the DynamicLibrary that produced the language
352
- # pointer so it isn't garbage-collected and unloaded while the
353
- # pointer is still in use by the parser. Not keeping this reference
354
- # can lead to the language pointer becoming invalid and causing
355
- # segmentation faults when passed to native functions.
356
- @library = lib
357
- end
358
-
359
- # Compare languages for equality
360
- #
361
- # FFI languages are equal if they have the same backend, path, and symbol.
362
- # Path and symbol uniquely identify a loaded language.
363
- #
364
- # @param other [Object] object to compare with
365
- # @return [Integer, nil] -1, 0, 1, or nil if not comparable
366
- def <=>(other)
367
- return unless other.is_a?(Language)
368
- return unless other.backend == @backend
369
-
370
- # Compare by path first, then symbol
371
- cmp = (@path || "") <=> (other.path || "")
372
- return cmp if cmp.nonzero?
373
-
374
- (@symbol || "") <=> (other.symbol || "")
375
- end
376
-
377
- # Hash value for this language (for use in Sets/Hashes)
378
- # @return [Integer]
379
- def hash
380
- [@backend, @path, @symbol].hash
381
- end
382
-
383
- # Alias eql? to ==
384
- alias_method :eql?, :==
385
-
386
- # Get the language name
387
- #
388
- # Derives a name from the symbol or path.
389
- #
390
- # @return [Symbol] language name
391
- def language_name
392
- # Try to derive from symbol (e.g., "tree_sitter_toml" -> :toml)
393
- if @symbol
394
- name = @symbol.to_s.sub(/^tree_sitter_/, "")
395
- return name.to_sym
396
- end
397
-
398
- # Try to derive from path (e.g., "/path/to/libtree-sitter-toml.so" -> :toml)
399
- if @path
400
- name = LibraryPathUtils.derive_language_name_from_path(@path)
401
- return name.to_sym if name
402
- end
403
-
404
- :unknown
405
- end
406
-
407
- # Alias for language_name (API compatibility)
408
- alias_method :name, :language_name
409
-
410
- # Convert to FFI pointer for passing to native functions
411
- #
412
- # @return [FFI::Pointer]
413
- def to_ptr
414
- @pointer
415
- end
416
-
417
- # Load a language from a shared library
418
- #
419
- # The library must export a function that returns a pointer to a TSLanguage struct.
420
- # Symbol resolution uses this precedence (when symbol: not provided):
421
- # 1. ENV["TREE_SITTER_LANG_SYMBOL"]
422
- # 2. Guessed from filename (e.g., "libtree-sitter-toml.so" → "tree_sitter_toml")
423
- # 3. Default fallback ("tree_sitter_toml")
424
- #
425
- # @param path [String] absolute path to the language shared library
426
- # @param symbol [String, nil] explicit exported function name (highest precedence)
427
- # @param name [String, nil] optional logical name (accepted for compatibility, not used)
428
- # @return [Language] loaded language handle
429
- # @raise [TreeHaver::NotAvailable] if FFI not available or library cannot be loaded
430
- # @example
431
- # lang = TreeHaver::Backends::FFI::Language.from_library(
432
- # "/usr/local/lib/libtree-sitter-toml.so",
433
- # symbol: "tree_sitter_toml"
434
- # )
435
- class << self
436
- def from_library(path, symbol: nil, name: nil)
437
- raise TreeHaver::NotAvailable, "FFI not available" unless Backends::FFI.available?
438
-
439
- # Check for MRI backend conflict BEFORE loading the grammar
440
- # If ruby_tree_sitter has already loaded this grammar file, the dynamic
441
- # linker will return the cached library with symbols resolved against
442
- # MRI's statically-linked tree-sitter, causing segfaults when FFI
443
- # tries to use the pointer with its dynamically-linked libtree-sitter.
444
- if defined?(::TreeSitter::Language)
445
- # MRI backend has been loaded - check if it might have loaded this grammar
446
- # We can't reliably detect which grammars MRI loaded, so we warn and
447
- # attempt to proceed. The segfault will occur when setting language on parser.
448
- warn("TreeHaver: FFI backend loading grammar after ruby_tree_sitter (MRI backend). " \
449
- "This may cause segfaults due to tree-sitter symbol conflicts. " \
450
- "For reliable operation, use only one backend per process.") if $VERBOSE
451
- end
452
-
453
- # Ensure the core libtree-sitter runtime is loaded first so
454
- # the language shared library resolves its symbols against the
455
- # same runtime. This prevents cases where the language pointer
456
- # is incompatible with the parser (different lib instances).
457
- Native.try_load!
458
-
459
- begin
460
- # Prefer resolving symbols immediately and globally so the
461
- # language library links to the already-loaded libtree-sitter
462
- # (RTLD_NOW | RTLD_GLOBAL). If those constants are not present
463
- # fall back to RTLD_LAZY for maximum compatibility.
464
- flags = if defined?(::FFI::DynamicLibrary::RTLD_NOW) && defined?(::FFI::DynamicLibrary::RTLD_GLOBAL)
465
- ::FFI::DynamicLibrary::RTLD_NOW | ::FFI::DynamicLibrary::RTLD_GLOBAL
466
- else
467
- ::FFI::DynamicLibrary::RTLD_LAZY
468
- end
469
-
470
- dl = ::FFI::DynamicLibrary.open(path, flags)
471
- rescue LoadError, RuntimeError => e
472
- # TruffleRuby raises RuntimeError instead of LoadError when a shared library cannot be opened
473
- raise TreeHaver::NotAvailable, "Could not open language library at #{path}: #{e.message}"
474
- end
475
-
476
- requested = symbol || ENV["TREE_SITTER_LANG_SYMBOL"]
477
- # Use shared utility for consistent symbol derivation across backends
478
- guessed_symbol = LibraryPathUtils.derive_symbol_from_path(path)
479
- # If an override was provided (arg or ENV), treat it as strict and do not fall back.
480
- # Only when no override is provided do we attempt guessed and default candidates.
481
- candidates = if requested && !requested.to_s.empty?
482
- [requested]
483
- else
484
- [guessed_symbol, "tree_sitter_toml"].compact.uniq
485
- end
486
-
487
- func = nil
488
- last_err = nil
489
- candidates.each do |name|
490
- addr = dl.find_function(name)
491
- func = ::FFI::Function.new(:pointer, [], addr)
492
- break
493
- rescue StandardError => e
494
- last_err = e
495
- end
496
- unless func
497
- env_used = []
498
- env_used << "TREE_SITTER_LANG_SYMBOL=#{ENV["TREE_SITTER_LANG_SYMBOL"]}" if ENV["TREE_SITTER_LANG_SYMBOL"]
499
- detail = env_used.empty? ? "" : " Env overrides: #{env_used.join(", ")}."
500
- raise TreeHaver::NotAvailable, "Could not resolve language symbol in #{path} (tried: #{candidates.join(", ")}).#{detail} #{last_err&.message}"
501
- end
502
-
503
- # Only ensure the core lib is loaded when we actually need to interact with it
504
- # (e.g., during parsing). Creating the Language handle does not require core to be loaded.
505
- ptr = func.call
506
- raise TreeHaver::NotAvailable, "Language factory returned NULL for #{path}" if ptr.null?
507
- # Pass the opened DynamicLibrary into the Language instance so the
508
- # library handle remains alive for the lifetime of the Language.
509
- new(ptr, dl, path: path, symbol: symbol)
510
- end
511
-
512
- # Backward-compatible alias
513
- alias_method :from_path, :from_library
514
- end
515
- end
516
-
517
- # FFI-based tree-sitter parser
518
- #
519
- # Wraps a TSParser pointer and manages its lifecycle with a finalizer.
520
- class Parser
521
- # Create a new parser instance
522
- #
523
- # @raise [TreeHaver::NotAvailable] if FFI not available or parser creation fails
524
- def initialize
525
- raise TreeHaver::NotAvailable, "FFI not available" unless Backends::FFI.available?
526
-
527
- Native.try_load!
528
- @parser = Native.ts_parser_new
529
- raise TreeHaver::NotAvailable, "Failed to create ts_parser" if @parser.null?
530
-
531
- # Note: We intentionally do NOT register a finalizer here because:
532
- # 1. ts_parser_delete can segfault if called during certain GC scenarios
533
- # 2. The native library may be unloaded before finalizers run
534
- # 3. Parser cleanup happens automatically on process exit
535
- # 4. Long-running processes should explicitly manage parser lifecycle
536
- #
537
- # If you need explicit cleanup in long-running processes, store the
538
- # parser in an instance variable and call a cleanup method explicitly
539
- # when done, rather than relying on GC finalizers.
540
- end
541
-
542
- # Set the language for this parser
543
- #
544
- # Note: FFI backend is special - it receives the wrapped Language object
545
- # because it needs to call to_ptr to get the FFI pointer. TreeHaver::Parser
546
- # detects FFI Language wrappers (respond_to?(:to_ptr)) and passes them through.
547
- #
548
- # @param lang [Language] the FFI language wrapper (not unwrapped)
549
- # @return [Language] the language that was set
550
- # @raise [TreeHaver::NotAvailable] if setting the language fails
551
- def language=(lang)
552
- # Defensive check: ensure we received an FFI Language wrapper
553
- unless lang.is_a?(Language)
554
- raise TreeHaver::NotAvailable,
555
- "FFI backend expected FFI::Language wrapper, got #{lang.class}. " \
556
- "This usually means TreeHaver::Parser#unwrap_language passed the wrong type. " \
557
- "Check that language caching respects backend boundaries."
558
- end
559
-
560
- # Additional check: verify the language is actually for FFI backend
561
- if lang.respond_to?(:backend) && lang.backend != :ffi
562
- raise TreeHaver::NotAvailable,
563
- "FFI backend received Language for wrong backend: #{lang.backend}. " \
564
- "Expected :ffi backend. Class: #{lang.class}. " \
565
- "Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
566
- end
567
-
568
- # Verify the DynamicLibrary is still valid (not GC'd)
569
- # The Language stores @library to prevent this, but let's verify
570
- lib = lang.instance_variable_get(:@library)
571
- if lib.nil?
572
- raise TreeHaver::NotAvailable,
573
- "FFI Language has no library reference. The dynamic library may have been unloaded. " \
574
- "Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
575
- end
576
-
577
- # Verify the language has a valid pointer
578
- ptr = lang.to_ptr
579
-
580
- # Check ptr is actually an FFI::Pointer
581
- unless ptr.is_a?(::FFI::Pointer)
582
- raise TreeHaver::NotAvailable,
583
- "FFI Language#to_ptr returned #{ptr.class}, expected FFI::Pointer. " \
584
- "Language class: #{lang.class}. " \
585
- "Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
586
- end
587
-
588
- ptr_address = ptr.address
589
-
590
- # Check for NULL (0x0)
591
- if ptr.nil? || ptr_address.zero?
592
- raise TreeHaver::NotAvailable,
593
- "FFI Language has NULL pointer. Language may not have loaded correctly. " \
594
- "Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
595
- end
596
-
597
- # Check for small invalid addresses (< 4KB are typically unmapped memory)
598
- # Common invalid addresses like 0x40 (64) indicate corrupted or uninitialized pointers
599
- if ptr_address < 4096
600
- raise TreeHaver::NotAvailable,
601
- "FFI Language has invalid pointer (address 0x#{ptr_address.to_s(16)}). " \
602
- "This usually indicates the language library was unloaded or never loaded correctly. " \
603
- "Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
604
- end
605
-
606
- # Note: MRI backend conflict is now handled by TreeHaver::BackendConflict
607
- # at a higher level (in TreeHaver.resolve_backend_module)
608
-
609
- # lang is a wrapped FFI::Language that has to_ptr method
610
- ok = Native.ts_parser_set_language(@parser, ptr)
611
- raise TreeHaver::NotAvailable, "Failed to set language on parser" unless ok
612
-
613
- lang # rubocop:disable Lint/Void (intentional return value)
614
- end
615
-
616
- # Parse source code into a syntax tree
617
- #
618
- # @param source [String] the source code to parse (should be UTF-8)
619
- # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
620
- # @raise [TreeHaver::NotAvailable] if parsing fails
621
- def parse(source)
622
- src = String(source)
623
- tree_ptr = Native.ts_parser_parse_string(@parser, ::FFI::Pointer::NULL, src, src.bytesize)
624
- raise TreeHaver::NotAvailable, "Parse returned NULL" if tree_ptr.null?
625
-
626
- # Return raw FFI::Tree - TreeHaver::Parser will wrap it
627
- Tree.new(tree_ptr)
628
- end
629
- end
630
-
631
- # FFI-based tree-sitter tree
632
- #
633
- # Wraps a TSTree pointer and manages its lifecycle with a finalizer.
634
- #
635
- # Note: Tree objects DO use finalizers (unlike Parser objects) because:
636
- # 1. Trees are typically short-lived and numerous (one per parse)
637
- # 2. ts_tree_delete is safer than ts_parser_delete during GC
638
- # 3. Memory leaks from accumulated trees are more problematic
639
- # 4. The finalizer silently ignores errors for safety
640
- class Tree
641
- # @api private
642
- # @param ptr [FFI::Pointer] pointer to TSTree
643
- def initialize(ptr)
644
- @ptr = ptr
645
- ObjectSpace.define_finalizer(self, self.class.finalizer(@ptr))
646
- end
647
-
648
- # @api private
649
- # @param ptr [FFI::Pointer] pointer to TSTree
650
- class << self
651
- # Returns a finalizer proc that deletes the tree
652
- #
653
- # This is public API for testing purposes, but not intended for
654
- # direct use. The finalizer is automatically registered when
655
- # creating a Tree object.
656
- #
657
- # @return [Proc] finalizer that deletes the tree
658
- def finalizer(ptr)
659
- proc {
660
- begin
661
- Native.ts_tree_delete(ptr)
662
- rescue StandardError
663
- # Silently ignore errors during finalization to prevent crashes
664
- # during GC. If the library is unloaded or ptr is invalid, we
665
- # don't want to crash the entire process.
666
- nil
667
- end
668
- }
669
- end
670
- end
671
-
672
- # Get the root node of the syntax tree
673
- #
674
- # @return [Node] the root node
675
- def root_node
676
- node_val = Native.ts_tree_root_node(@ptr)
677
- Node.new(node_val)
678
- end
679
- end
680
-
681
- # FFI-based tree-sitter node (raw backend node)
682
- #
683
- # This is a **raw backend node** that wraps a TSNode by-value struct from the
684
- # tree-sitter C API. It provides the minimal interface needed for tree-sitter
685
- # operations but is NOT intended for direct use by application code.
686
- #
687
- # == Architecture Note
688
- #
689
- # Unlike pure-Ruby backends (Citrus, Parslet, Prism, Psych) which define Node
690
- # classes that inherit from `TreeHaver::Base::Node`, tree-sitter backends (MRI,
691
- # Rust, FFI, Java) define raw wrapper classes that get wrapped by `TreeHaver::Node`.
692
- #
693
- # The wrapping hierarchy is:
694
- # FFI::Node (this class) → TreeHaver::Node → Base::Node
695
- #
696
- # When you use `TreeHaver::Parser#parse`, the returned tree's nodes are already
697
- # wrapped in `TreeHaver::Node`, which provides the full unified API including:
698
- # - `#children` - Array of child nodes
699
- # - `#text` - Extract text from source
700
- # - `#first_child`, `#last_child` - Convenience accessors
701
- # - `#start_line`, `#end_line` - 1-based line numbers
702
- # - `#source_position` - Hash with position info
703
- # - `#each`, `#map`, etc. - Enumerable methods
704
- # - `#to_s`, `#inspect` - String representations
705
- #
706
- # This raw class only implements methods that require direct FFI calls to the
707
- # tree-sitter C library. The wrapper adds Ruby-level conveniences.
708
- #
709
- # @api private
710
- # @see TreeHaver::Node The wrapper class users should interact with
711
- # @see TreeHaver::Base::Node The base class documenting the full Node API
712
- class Node
713
- include Enumerable
714
-
715
- # @api private
716
- # @param ts_node_value [Native::TSNode] the TSNode struct (by value)
717
- def initialize(ts_node_value)
718
- # Store by-value struct (FFI will copy); methods pass it back by value
719
- @val = ts_node_value
720
- end
721
-
722
- # Get the type name of this node
723
- #
724
- # @return [String] the node type (e.g., "document", "table", "pair")
725
- def type
726
- Native.ts_node_type(@val)
727
- end
728
-
729
- # Get the number of children
730
- #
731
- # @return [Integer] child count
732
- def child_count
733
- Native.ts_node_child_count(@val)
734
- end
735
-
736
- # Get a child by index
737
- #
738
- # @param index [Integer] child index
739
- # @return [Node, nil] child node or nil if index out of bounds
740
- def child(index)
741
- return if index >= child_count || index < 0
742
- child_node = Native.ts_node_child(@val, index)
743
- Node.new(child_node)
744
- end
745
-
746
- # Get a child node by field name
747
- #
748
- # Tree-sitter grammars define named fields for certain child positions.
749
- # For example, in JSON, a "pair" node has "key" and "value" fields.
750
- #
751
- # @param field_name [String] the field name to look up
752
- # @return [Node, nil] the child node, or nil if no child has that field
753
- # @example Get the key from a JSON pair
754
- # pair.child_by_field_name("key") #=> Node (type: "string")
755
- # pair.child_by_field_name("value") #=> Node (type: "string" or "number", etc.)
756
- def child_by_field_name(field_name)
757
- name = String(field_name)
758
- child_node = Native.ts_node_child_by_field_name(@val, name, name.bytesize)
759
- # ts_node_child_by_field_name returns a null node if field not found
760
- return if Native.ts_node_is_null(child_node)
761
-
762
- Node.new(child_node)
763
- end
764
-
765
- # Get start byte offset
766
- #
767
- # @return [Integer]
768
- def start_byte
769
- Native.ts_node_start_byte(@val)
770
- end
771
-
772
- # Get end byte offset
773
- #
774
- # @return [Integer]
775
- def end_byte
776
- Native.ts_node_end_byte(@val)
777
- end
778
-
779
- # Get start point
780
- #
781
- # @return [TreeHaver::Point] with row and column
782
- def start_point
783
- point = Native.ts_node_start_point(@val)
784
- # TSPoint is returned by value as an FFI::Struct with :row and :column fields
785
- TreeHaver::Point.new(point[:row], point[:column])
786
- end
787
-
788
- # Get end point
789
- #
790
- # @return [TreeHaver::Point] with row and column
791
- def end_point
792
- point = Native.ts_node_end_point(@val)
793
- # TSPoint is returned by value as an FFI::Struct with :row and :column fields
794
- TreeHaver::Point.new(point[:row], point[:column])
795
- end
796
-
797
- # Check if node has error
798
- #
799
- # Returns true if this node or any of its descendants have a syntax error.
800
- # This is the FFI equivalent of tree-sitter's ts_node_has_error.
801
- #
802
- # @return [Boolean] true if node subtree contains errors
803
- def has_error?
804
- # Explicit boolean conversion ensures consistent behavior across Ruby versions
805
- # FFI :bool return type may behave differently on some platforms
806
- !!Native.ts_node_has_error(@val)
807
- end
808
-
809
- # Check if this is a MISSING node
810
- #
811
- # A MISSING node represents a token that was expected by the grammar
812
- # but was not found in the source. Tree-sitter inserts MISSING nodes
813
- # to allow parsing to continue despite syntax errors.
814
- #
815
- # @return [Boolean] true if this is a MISSING node
816
- def missing?
817
- !!Native.ts_node_is_missing(@val)
818
- end
819
-
820
- # Check if this is a named node
821
- #
822
- # Named nodes represent syntactic constructs (e.g., "pair", "object").
823
- # Anonymous nodes represent syntax/punctuation (e.g., "{", ",").
824
- #
825
- # @return [Boolean] true if this is a named node
826
- def named?
827
- !!Native.ts_node_is_named(@val)
828
- end
829
-
830
- # Get the parent node
831
- #
832
- # @return [Node, nil] parent node or nil if this is the root
833
- def parent
834
- parent_node = Native.ts_node_parent(@val)
835
- return if Native.ts_node_is_null(parent_node)
836
-
837
- Node.new(parent_node)
838
- end
839
-
840
- # Get the next sibling node
841
- #
842
- # @return [Node, nil] next sibling or nil if none
843
- def next_sibling
844
- sibling = Native.ts_node_next_sibling(@val)
845
- return if Native.ts_node_is_null(sibling)
846
-
847
- Node.new(sibling)
848
- end
849
-
850
- # Get the previous sibling node
851
- #
852
- # @return [Node, nil] previous sibling or nil if none
853
- def prev_sibling
854
- sibling = Native.ts_node_prev_sibling(@val)
855
- return if Native.ts_node_is_null(sibling)
856
-
857
- Node.new(sibling)
858
- end
859
-
860
- # Get the next named sibling node
861
- #
862
- # @return [Node, nil] next named sibling or nil if none
863
- def next_named_sibling
864
- sibling = Native.ts_node_next_named_sibling(@val)
865
- return if Native.ts_node_is_null(sibling)
866
-
867
- Node.new(sibling)
868
- end
869
-
870
- # Get the previous named sibling node
871
- #
872
- # @return [Node, nil] previous named sibling or nil if none
873
- def prev_named_sibling
874
- sibling = Native.ts_node_prev_named_sibling(@val)
875
- return if Native.ts_node_is_null(sibling)
876
-
877
- Node.new(sibling)
878
- end
879
-
880
- # Get a named child by index
881
- #
882
- # @param index [Integer] named child index (0-based)
883
- # @return [Node, nil] named child or nil if index out of bounds
884
- def named_child(index)
885
- return if index < 0 || index >= named_child_count
886
-
887
- child_node = Native.ts_node_named_child(@val, index)
888
- return if Native.ts_node_is_null(child_node)
889
-
890
- Node.new(child_node)
891
- end
892
-
893
- # Get the count of named children
894
- #
895
- # @return [Integer] number of named children
896
- def named_child_count
897
- Native.ts_node_named_child_count(@val)
898
- end
899
-
900
- # Find the smallest descendant that spans the given byte range
901
- #
902
- # @param start_byte [Integer] start byte offset
903
- # @param end_byte [Integer] end byte offset
904
- # @return [Node, nil] descendant node or nil if not found
905
- def descendant_for_byte_range(start_byte, end_byte)
906
- node = Native.ts_node_descendant_for_byte_range(@val, start_byte, end_byte)
907
- return if Native.ts_node_is_null(node)
908
-
909
- Node.new(node)
910
- end
911
-
912
- # Find the smallest named descendant that spans the given byte range
913
- #
914
- # @param start_byte [Integer] start byte offset
915
- # @param end_byte [Integer] end byte offset
916
- # @return [Node, nil] named descendant node or nil if not found
917
- def named_descendant_for_byte_range(start_byte, end_byte)
918
- node = Native.ts_node_named_descendant_for_byte_range(@val, start_byte, end_byte)
919
- return if Native.ts_node_is_null(node)
920
-
921
- Node.new(node)
922
- end
923
-
924
- # Find the smallest descendant that spans the given point range
925
- #
926
- # @param start_point [TreeHaver::Point, Hash] start point with :row and :column
927
- # @param end_point [TreeHaver::Point, Hash] end point with :row and :column
928
- # @return [Node, nil] descendant node or nil if not found
929
- def descendant_for_point_range(start_point, end_point)
930
- start_pt = Native::TSPoint.new
931
- start_pt[:row] = start_point.respond_to?(:row) ? start_point.row : start_point[:row]
932
- start_pt[:column] = start_point.respond_to?(:column) ? start_point.column : start_point[:column]
933
-
934
- end_pt = Native::TSPoint.new
935
- end_pt[:row] = end_point.respond_to?(:row) ? end_point.row : end_point[:row]
936
- end_pt[:column] = end_point.respond_to?(:column) ? end_point.column : end_point[:column]
937
-
938
- node = Native.ts_node_descendant_for_point_range(@val, start_pt, end_pt)
939
- return if Native.ts_node_is_null(node)
940
-
941
- Node.new(node)
942
- end
943
-
944
- # Find the smallest named descendant that spans the given point range
945
- #
946
- # @param start_point [TreeHaver::Point, Hash] start point with :row and :column
947
- # @param end_point [TreeHaver::Point, Hash] end point with :row and :column
948
- # @return [Node, nil] named descendant node or nil if not found
949
- def named_descendant_for_point_range(start_point, end_point)
950
- start_pt = Native::TSPoint.new
951
- start_pt[:row] = start_point.respond_to?(:row) ? start_point.row : start_point[:row]
952
- start_pt[:column] = start_point.respond_to?(:column) ? start_point.column : start_point[:column]
953
-
954
- end_pt = Native::TSPoint.new
955
- end_pt[:row] = end_point.respond_to?(:row) ? end_point.row : end_point[:row]
956
- end_pt[:column] = end_point.respond_to?(:column) ? end_point.column : end_point[:column]
957
-
958
- node = Native.ts_node_named_descendant_for_point_range(@val, start_pt, end_pt)
959
- return if Native.ts_node_is_null(node)
960
-
961
- Node.new(node)
962
- end
963
-
964
- # Iterate over child nodes
965
- #
966
- # @yieldparam child [Node] each child node
967
- # @return [Enumerator, nil] an enumerator if no block given, nil otherwise
968
- def each
969
- return enum_for(:each) unless block_given?
970
-
971
- count = child_count
972
- i = 0
973
- while i < count
974
- child = Native.ts_node_child(@val, i)
975
- yield Node.new(child)
976
- i += 1
977
- end
978
- nil
979
- end
980
-
981
- # Compare nodes for ordering (used by Comparable module)
982
- #
983
- # Nodes are ordered by their position in the source:
984
- # 1. First by start_byte (earlier nodes come first)
985
- # 2. Then by end_byte for tie-breaking (shorter spans come first)
986
- # 3. Then by type for deterministic ordering
987
- #
988
- # @param other [Node] node to compare with
989
- # @return [Integer, nil] -1, 0, 1, or nil if not comparable
990
- def <=>(other)
991
- return unless other.is_a?(Node)
992
-
993
- cmp = start_byte <=> other.start_byte
994
- return cmp if cmp.nonzero?
995
-
996
- cmp = end_byte <=> other.end_byte
997
- return cmp if cmp.nonzero?
998
-
999
- type <=> other.type
1000
- end
1001
- end
1002
-
1003
- # Register the availability checker for RSpec dependency tags
1004
- TreeHaver::BackendRegistry.register_availability_checker(:ffi) do
1005
- available?
1006
- end
1007
- end
1008
- end
1009
- end