tree_haver 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +190 -1
- data/CONTRIBUTING.md +100 -0
- data/README.md +342 -11
- data/lib/tree_haver/backends/citrus.rb +141 -20
- data/lib/tree_haver/backends/ffi.rb +338 -141
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/mri.rb +154 -17
- data/lib/tree_haver/backends/rust.rb +59 -16
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +150 -0
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +697 -56
- data.tar.gz.sig +0 -0
- metadata +5 -4
- metadata.gz.sig +0 -0
|
@@ -1,16 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
# The load condition isn't really worth testing, so :nocov:
|
|
5
|
-
# :nocov:
|
|
6
|
-
begin
|
|
7
|
-
require "ffi"
|
|
8
|
-
FFI_AVAILABLE = true
|
|
9
|
-
rescue LoadError
|
|
10
|
-
FFI_AVAILABLE = false
|
|
11
|
-
end
|
|
12
|
-
# :nocov:
|
|
13
|
-
|
|
14
4
|
module Backends
|
|
15
5
|
# FFI-based backend for calling libtree-sitter directly
|
|
16
6
|
#
|
|
@@ -30,146 +20,196 @@ module TreeHaver
|
|
|
30
20
|
# @see https://github.com/ffi/ffi Ruby FFI
|
|
31
21
|
# @see https://tree-sitter.github.io/tree-sitter/ tree-sitter
|
|
32
22
|
module FFI
|
|
23
|
+
# Check if the FFI gem is available (lazy evaluation)
|
|
24
|
+
#
|
|
25
|
+
# This method lazily checks for FFI gem availability to avoid
|
|
26
|
+
# polluting the environment at load time.
|
|
27
|
+
#
|
|
28
|
+
# @return [Boolean] true if FFI gem can be loaded
|
|
29
|
+
# @api private
|
|
30
|
+
def self.ffi_gem_available?
|
|
31
|
+
return @ffi_gem_available if defined?(@ffi_gem_available)
|
|
32
|
+
|
|
33
|
+
@ffi_gem_available = begin
|
|
34
|
+
require "ffi"
|
|
35
|
+
true
|
|
36
|
+
rescue LoadError
|
|
37
|
+
false
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
33
41
|
# Native FFI bindings to libtree-sitter
|
|
34
42
|
#
|
|
35
43
|
# This module handles loading the tree-sitter runtime library and defining
|
|
36
44
|
# FFI function attachments for the core tree-sitter API.
|
|
37
45
|
#
|
|
46
|
+
# All FFI operations are lazy - nothing is loaded until actually needed.
|
|
47
|
+
# This prevents polluting the Ruby environment at require time.
|
|
48
|
+
#
|
|
38
49
|
# @api private
|
|
39
50
|
module Native
|
|
40
|
-
|
|
41
|
-
extend
|
|
42
|
-
|
|
43
|
-
# FFI struct representation of TSNode
|
|
44
|
-
#
|
|
45
|
-
# Mirrors the C struct layout used by tree-sitter. TSNode is passed
|
|
46
|
-
# by value in the tree-sitter C API.
|
|
51
|
+
class << self
|
|
52
|
+
# Lazily extend with FFI::Library only when needed
|
|
47
53
|
#
|
|
48
|
-
# @
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
# @return [Boolean] true if FFI was successfully extended
|
|
55
|
+
def ensure_ffi_extended!
|
|
56
|
+
return true if @ffi_extended
|
|
57
|
+
|
|
58
|
+
unless FFI.ffi_gem_available?
|
|
59
|
+
raise TreeHaver::NotAvailable, "FFI gem is not available"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
extend(::FFI::Library)
|
|
63
|
+
|
|
64
|
+
define_ts_node_struct!
|
|
65
|
+
@ffi_extended = true
|
|
56
66
|
end
|
|
57
67
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
[
|
|
73
|
-
ENV["TREE_SITTER_RUNTIME_LIB"],
|
|
74
|
-
"tree-sitter",
|
|
75
|
-
"libtree-sitter.so.0",
|
|
76
|
-
"libtree-sitter.so",
|
|
77
|
-
"libtree-sitter.dylib",
|
|
78
|
-
"libtree-sitter.dll",
|
|
79
|
-
].compact
|
|
68
|
+
# Define the TSNode struct lazily
|
|
69
|
+
# @api private
|
|
70
|
+
def define_ts_node_struct!
|
|
71
|
+
return if const_defined?(:TSNode, false)
|
|
72
|
+
|
|
73
|
+
# FFI struct representation of TSNode
|
|
74
|
+
# Mirrors the C struct layout used by tree-sitter
|
|
75
|
+
ts_node_class = Class.new(::FFI::Struct) do
|
|
76
|
+
layout :context,
|
|
77
|
+
[:uint32, 4],
|
|
78
|
+
:id,
|
|
79
|
+
:pointer,
|
|
80
|
+
:tree,
|
|
81
|
+
:pointer
|
|
80
82
|
end
|
|
83
|
+
const_set(:TSNode, ts_node_class)
|
|
84
|
+
typedef(ts_node_class.by_value, :ts_node)
|
|
85
|
+
end
|
|
81
86
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# @return [void]
|
|
89
|
-
# @example
|
|
90
|
-
# TreeHaver::Backends::FFI::Native.try_load!
|
|
91
|
-
def try_load!
|
|
92
|
-
return if @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
93
|
-
last_error = nil
|
|
94
|
-
candidates = lib_candidates
|
|
95
|
-
candidates.each do |name|
|
|
96
|
-
ffi_lib(name)
|
|
97
|
-
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
98
|
-
break
|
|
99
|
-
rescue ::FFI::NotFoundError, LoadError => e
|
|
100
|
-
last_error = e
|
|
101
|
-
end
|
|
102
|
-
unless @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
103
|
-
# :nocov:
|
|
104
|
-
# This failure path cannot be tested in a shared test suite because:
|
|
105
|
-
# 1. Once FFI loads a library via ffi_lib, it cannot be unloaded
|
|
106
|
-
# 2. Other tests may load the library first (test order is randomized)
|
|
107
|
-
# 3. The @loaded flag can be reset, but ffi_lib state persists
|
|
108
|
-
# ENV precedence is tested implicitly by parsing tests that work when
|
|
109
|
-
# TREE_SITTER_RUNTIME_LIB is set correctly in the environment.
|
|
110
|
-
tried = candidates.join(", ")
|
|
111
|
-
env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
|
|
112
|
-
msg = if last_error
|
|
113
|
-
"Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
|
|
114
|
-
else
|
|
115
|
-
"Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
|
|
116
|
-
end
|
|
117
|
-
raise TreeHaver::NotAvailable, msg
|
|
118
|
-
# :nocov:
|
|
119
|
-
end
|
|
87
|
+
# Get the TSNode class, ensuring it's defined
|
|
88
|
+
# @return [Class] the TSNode FFI struct class
|
|
89
|
+
def ts_node_class
|
|
90
|
+
ensure_ffi_extended!
|
|
91
|
+
const_get(:TSNode)
|
|
92
|
+
end
|
|
120
93
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
94
|
+
# Get list of candidate library names for loading libtree-sitter
|
|
95
|
+
#
|
|
96
|
+
# The list is built dynamically to respect environment variables set at runtime.
|
|
97
|
+
# If TREE_SITTER_RUNTIME_LIB is set, it is tried first.
|
|
98
|
+
#
|
|
99
|
+
# @note TREE_SITTER_LIB is intentionally NOT supported
|
|
100
|
+
# @return [Array<String>] list of library names to try
|
|
101
|
+
def lib_candidates
|
|
102
|
+
[
|
|
103
|
+
ENV["TREE_SITTER_RUNTIME_LIB"],
|
|
104
|
+
"tree-sitter",
|
|
105
|
+
"libtree-sitter.so.0",
|
|
106
|
+
"libtree-sitter.so",
|
|
107
|
+
"libtree-sitter.dylib",
|
|
108
|
+
"libtree-sitter.dll",
|
|
109
|
+
].compact
|
|
110
|
+
end
|
|
126
111
|
|
|
127
|
-
|
|
128
|
-
|
|
112
|
+
# Load the tree-sitter runtime library
|
|
113
|
+
#
|
|
114
|
+
# Tries each candidate library name in order until one succeeds.
|
|
115
|
+
# After loading, attaches FFI function definitions for the tree-sitter API.
|
|
116
|
+
#
|
|
117
|
+
# @raise [TreeHaver::NotAvailable] if no library can be loaded
|
|
118
|
+
# @return [void]
|
|
119
|
+
def try_load!
|
|
120
|
+
return if @loaded
|
|
121
|
+
|
|
122
|
+
ensure_ffi_extended!
|
|
123
|
+
|
|
124
|
+
# Warn about potential conflicts with MRI backend
|
|
125
|
+
if defined?(::TreeSitter) && defined?(::TreeSitter::Parser)
|
|
126
|
+
warn("TreeHaver: FFI backend loading after ruby_tree_sitter (MRI backend). " \
|
|
127
|
+
"This may cause symbol conflicts due to different libtree-sitter versions. " \
|
|
128
|
+
"Consider using only one backend per process, or set TREE_SITTER_RUNTIME_LIB " \
|
|
129
|
+
"to match the version used by ruby_tree_sitter.") if $VERBOSE
|
|
130
|
+
end
|
|
129
131
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
132
|
+
last_error = nil
|
|
133
|
+
candidates = lib_candidates
|
|
134
|
+
candidates.each do |name|
|
|
135
|
+
ffi_lib(name)
|
|
136
|
+
@loaded = true
|
|
137
|
+
break
|
|
138
|
+
rescue ::FFI::NotFoundError, LoadError => e
|
|
139
|
+
last_error = e
|
|
133
140
|
end
|
|
134
141
|
|
|
135
|
-
|
|
136
|
-
|
|
142
|
+
unless @loaded
|
|
143
|
+
# :nocov:
|
|
144
|
+
tried = candidates.join(", ")
|
|
145
|
+
env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
|
|
146
|
+
msg = if last_error
|
|
147
|
+
"Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
|
|
148
|
+
else
|
|
149
|
+
"Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
|
|
150
|
+
end
|
|
151
|
+
raise TreeHaver::NotAvailable, msg
|
|
152
|
+
# :nocov:
|
|
137
153
|
end
|
|
154
|
+
|
|
155
|
+
# Attach functions after lib is selected
|
|
156
|
+
attach_function(:ts_parser_new, [], :pointer)
|
|
157
|
+
attach_function(:ts_parser_delete, [:pointer], :void)
|
|
158
|
+
attach_function(:ts_parser_set_language, [:pointer, :pointer], :bool)
|
|
159
|
+
attach_function(:ts_parser_parse_string, [:pointer, :pointer, :string, :uint32], :pointer)
|
|
160
|
+
|
|
161
|
+
attach_function(:ts_tree_delete, [:pointer], :void)
|
|
162
|
+
attach_function(:ts_tree_root_node, [:pointer], :ts_node)
|
|
163
|
+
|
|
164
|
+
attach_function(:ts_node_type, [:ts_node], :string)
|
|
165
|
+
attach_function(:ts_node_child_count, [:ts_node], :uint32)
|
|
166
|
+
attach_function(:ts_node_child, [:ts_node, :uint32], :ts_node)
|
|
167
|
+
attach_function(:ts_node_start_byte, [:ts_node], :uint32)
|
|
168
|
+
attach_function(:ts_node_end_byte, [:ts_node], :uint32)
|
|
169
|
+
attach_function(:ts_node_start_point, [:ts_node], :pointer)
|
|
170
|
+
attach_function(:ts_node_end_point, [:ts_node], :pointer)
|
|
171
|
+
attach_function(:ts_node_is_null, [:ts_node], :bool)
|
|
172
|
+
attach_function(:ts_node_is_named, [:ts_node], :bool)
|
|
138
173
|
end
|
|
139
|
-
else
|
|
140
|
-
# :nocov:
|
|
141
|
-
# Fallback stubs when FFI gem is not installed.
|
|
142
|
-
# These paths cannot be tested in a test suite where FFI is a dependency,
|
|
143
|
-
# since the gem is always available. They provide graceful degradation
|
|
144
|
-
# for environments where FFI cannot be installed.
|
|
145
|
-
class << self
|
|
146
|
-
def try_load!
|
|
147
|
-
raise TreeHaver::NotAvailable, "FFI not available"
|
|
148
|
-
end
|
|
149
174
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
end
|
|
175
|
+
def loaded?
|
|
176
|
+
!!@loaded
|
|
153
177
|
end
|
|
154
|
-
# :nocov:
|
|
155
178
|
end
|
|
156
179
|
end
|
|
157
180
|
|
|
158
181
|
class << self
|
|
159
182
|
# Check if the FFI backend is available
|
|
160
183
|
#
|
|
161
|
-
# Returns true if
|
|
162
|
-
#
|
|
184
|
+
# Returns true if:
|
|
185
|
+
# 1. The `ffi` gem is present
|
|
186
|
+
# 2. MRI backend (ruby_tree_sitter) has NOT been loaded
|
|
187
|
+
#
|
|
188
|
+
# FFI and MRI backends conflict at the libtree-sitter level.
|
|
189
|
+
# Once MRI loads, using FFI will cause segfaults.
|
|
163
190
|
#
|
|
164
|
-
# @return [Boolean] true if FFI
|
|
191
|
+
# @return [Boolean] true if FFI backend can be used
|
|
165
192
|
# @example
|
|
166
193
|
# if TreeHaver::Backends::FFI.available?
|
|
167
194
|
# puts "FFI backend is ready"
|
|
168
195
|
# end
|
|
169
196
|
def available?
|
|
170
|
-
return false unless
|
|
171
|
-
|
|
172
|
-
|
|
197
|
+
return false unless TreeHaver::Backends::FFI.ffi_gem_available?
|
|
198
|
+
|
|
199
|
+
# Check if MRI backend has been loaded (which blocks FFI)
|
|
200
|
+
!defined?(::TreeSitter::Parser)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Reset the load state (primarily for testing)
|
|
204
|
+
#
|
|
205
|
+
# Note: FFI backend doesn't maintain load state like other backends,
|
|
206
|
+
# but this method is provided for API consistency.
|
|
207
|
+
#
|
|
208
|
+
# @return [void]
|
|
209
|
+
# @api private
|
|
210
|
+
def reset!
|
|
211
|
+
# FFI backend uses constant-time availability check, no state to reset
|
|
212
|
+
nil
|
|
173
213
|
end
|
|
174
214
|
|
|
175
215
|
# Get capabilities supported by this backend
|
|
@@ -193,16 +233,70 @@ module TreeHaver
|
|
|
193
233
|
#
|
|
194
234
|
# Holds a pointer to a TSLanguage struct from a loaded shared library.
|
|
195
235
|
class Language
|
|
236
|
+
include Comparable
|
|
237
|
+
|
|
196
238
|
# The FFI pointer to the TSLanguage struct
|
|
197
239
|
# @return [FFI::Pointer]
|
|
198
240
|
attr_reader :pointer
|
|
199
241
|
|
|
242
|
+
# The backend this language is for
|
|
243
|
+
# @return [Symbol]
|
|
244
|
+
attr_reader :backend
|
|
245
|
+
|
|
246
|
+
# The path this language was loaded from (if known)
|
|
247
|
+
# @return [String, nil]
|
|
248
|
+
attr_reader :path
|
|
249
|
+
|
|
250
|
+
# The symbol name (if known)
|
|
251
|
+
# @return [String, nil]
|
|
252
|
+
attr_reader :symbol
|
|
253
|
+
|
|
200
254
|
# @api private
|
|
201
255
|
# @param ptr [FFI::Pointer] pointer to TSLanguage
|
|
202
|
-
|
|
256
|
+
# @param lib [FFI::DynamicLibrary, nil] the opened dynamic library
|
|
257
|
+
# (kept as an instance variable to prevent it being GC'd/unloaded)
|
|
258
|
+
# @param path [String, nil] path language was loaded from
|
|
259
|
+
# @param symbol [String, nil] symbol name
|
|
260
|
+
def initialize(ptr, lib = nil, path: nil, symbol: nil)
|
|
203
261
|
@pointer = ptr
|
|
262
|
+
@backend = :ffi
|
|
263
|
+
@path = path
|
|
264
|
+
@symbol = symbol
|
|
265
|
+
# Keep a reference to the DynamicLibrary that produced the language
|
|
266
|
+
# pointer so it isn't garbage-collected and unloaded while the
|
|
267
|
+
# pointer is still in use by the parser. Not keeping this reference
|
|
268
|
+
# can lead to the language pointer becoming invalid and causing
|
|
269
|
+
# segmentation faults when passed to native functions.
|
|
270
|
+
@library = lib
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Compare languages for equality
|
|
274
|
+
#
|
|
275
|
+
# FFI languages are equal if they have the same backend, path, and symbol.
|
|
276
|
+
# Path and symbol uniquely identify a loaded language.
|
|
277
|
+
#
|
|
278
|
+
# @param other [Object] object to compare with
|
|
279
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
280
|
+
def <=>(other)
|
|
281
|
+
return unless other.is_a?(Language)
|
|
282
|
+
return unless other.backend == @backend
|
|
283
|
+
|
|
284
|
+
# Compare by path first, then symbol
|
|
285
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
286
|
+
return cmp unless cmp.zero?
|
|
287
|
+
|
|
288
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
204
289
|
end
|
|
205
290
|
|
|
291
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
292
|
+
# @return [Integer]
|
|
293
|
+
def hash
|
|
294
|
+
[@backend, @path, @symbol].hash
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Alias eql? to ==
|
|
298
|
+
alias_method :eql?, :==
|
|
299
|
+
|
|
206
300
|
# Convert to FFI pointer for passing to native functions
|
|
207
301
|
#
|
|
208
302
|
# @return [FFI::Pointer]
|
|
@@ -231,8 +325,39 @@ module TreeHaver
|
|
|
231
325
|
class << self
|
|
232
326
|
def from_library(path, symbol: nil, name: nil)
|
|
233
327
|
raise TreeHaver::NotAvailable, "FFI not available" unless Backends::FFI.available?
|
|
328
|
+
|
|
329
|
+
# Check for MRI backend conflict BEFORE loading the grammar
|
|
330
|
+
# If ruby_tree_sitter has already loaded this grammar file, the dynamic
|
|
331
|
+
# linker will return the cached library with symbols resolved against
|
|
332
|
+
# MRI's statically-linked tree-sitter, causing segfaults when FFI
|
|
333
|
+
# tries to use the pointer with its dynamically-linked libtree-sitter.
|
|
334
|
+
if defined?(::TreeSitter::Language)
|
|
335
|
+
# MRI backend has been loaded - check if it might have loaded this grammar
|
|
336
|
+
# We can't reliably detect which grammars MRI loaded, so we warn and
|
|
337
|
+
# attempt to proceed. The segfault will occur when setting language on parser.
|
|
338
|
+
warn("TreeHaver: FFI backend loading grammar after ruby_tree_sitter (MRI backend). " \
|
|
339
|
+
"This may cause segfaults due to tree-sitter symbol conflicts. " \
|
|
340
|
+
"For reliable operation, use only one backend per process.") if $VERBOSE
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Ensure the core libtree-sitter runtime is loaded first so
|
|
344
|
+
# the language shared library resolves its symbols against the
|
|
345
|
+
# same runtime. This prevents cases where the language pointer
|
|
346
|
+
# is incompatible with the parser (different lib instances).
|
|
347
|
+
Native.try_load!
|
|
348
|
+
|
|
234
349
|
begin
|
|
235
|
-
|
|
350
|
+
# Prefer resolving symbols immediately and globally so the
|
|
351
|
+
# language library links to the already-loaded libtree-sitter
|
|
352
|
+
# (RTLD_NOW | RTLD_GLOBAL). If those constants are not present
|
|
353
|
+
# fall back to RTLD_LAZY for maximum compatibility.
|
|
354
|
+
flags = if defined?(::FFI::DynamicLibrary::RTLD_NOW) && defined?(::FFI::DynamicLibrary::RTLD_GLOBAL)
|
|
355
|
+
::FFI::DynamicLibrary::RTLD_NOW | ::FFI::DynamicLibrary::RTLD_GLOBAL
|
|
356
|
+
else
|
|
357
|
+
::FFI::DynamicLibrary::RTLD_LAZY
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
dl = ::FFI::DynamicLibrary.open(path, flags)
|
|
236
361
|
rescue LoadError => e
|
|
237
362
|
raise TreeHaver::NotAvailable, "Could not open language library at #{path}: #{e.message}"
|
|
238
363
|
end
|
|
@@ -268,7 +393,9 @@ module TreeHaver
|
|
|
268
393
|
# (e.g., during parsing). Creating the Language handle does not require core to be loaded.
|
|
269
394
|
ptr = func.call
|
|
270
395
|
raise TreeHaver::NotAvailable, "Language factory returned NULL for #{path}" if ptr.null?
|
|
271
|
-
|
|
396
|
+
# Pass the opened DynamicLibrary into the Language instance so the
|
|
397
|
+
# library handle remains alive for the lifetime of the Language.
|
|
398
|
+
new(ptr, dl, path: path, symbol: symbol)
|
|
272
399
|
end
|
|
273
400
|
|
|
274
401
|
# Backward-compatible alias
|
|
@@ -290,31 +417,86 @@ module TreeHaver
|
|
|
290
417
|
@parser = Native.ts_parser_new
|
|
291
418
|
raise TreeHaver::NotAvailable, "Failed to create ts_parser" if @parser.null?
|
|
292
419
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
#
|
|
298
|
-
#
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
begin
|
|
303
|
-
Native.ts_parser_delete(ptr)
|
|
304
|
-
rescue StandardError
|
|
305
|
-
nil
|
|
306
|
-
end
|
|
307
|
-
}
|
|
308
|
-
end
|
|
420
|
+
# Note: We intentionally do NOT register a finalizer here because:
|
|
421
|
+
# 1. ts_parser_delete can segfault if called during certain GC scenarios
|
|
422
|
+
# 2. The native library may be unloaded before finalizers run
|
|
423
|
+
# 3. Parser cleanup happens automatically on process exit
|
|
424
|
+
# 4. Long-running processes should explicitly manage parser lifecycle
|
|
425
|
+
#
|
|
426
|
+
# If you need explicit cleanup in long-running processes, store the
|
|
427
|
+
# parser in an instance variable and call a cleanup method explicitly
|
|
428
|
+
# when done, rather than relying on GC finalizers.
|
|
309
429
|
end
|
|
310
430
|
|
|
311
431
|
# Set the language for this parser
|
|
312
432
|
#
|
|
313
|
-
#
|
|
433
|
+
# Note: FFI backend is special - it receives the wrapped Language object
|
|
434
|
+
# because it needs to call to_ptr to get the FFI pointer. TreeHaver::Parser
|
|
435
|
+
# detects FFI Language wrappers (respond_to?(:to_ptr)) and passes them through.
|
|
436
|
+
#
|
|
437
|
+
# @param lang [Language] the FFI language wrapper (not unwrapped)
|
|
314
438
|
# @return [Language] the language that was set
|
|
315
439
|
# @raise [TreeHaver::NotAvailable] if setting the language fails
|
|
316
440
|
def language=(lang)
|
|
317
|
-
|
|
441
|
+
# Defensive check: ensure we received an FFI Language wrapper
|
|
442
|
+
unless lang.is_a?(Language)
|
|
443
|
+
raise TreeHaver::NotAvailable,
|
|
444
|
+
"FFI backend expected FFI::Language wrapper, got #{lang.class}. " \
|
|
445
|
+
"This usually means TreeHaver::Parser#unwrap_language passed the wrong type. " \
|
|
446
|
+
"Check that language caching respects backend boundaries."
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Additional check: verify the language is actually for FFI backend
|
|
450
|
+
if lang.respond_to?(:backend) && lang.backend != :ffi
|
|
451
|
+
raise TreeHaver::NotAvailable,
|
|
452
|
+
"FFI backend received Language for wrong backend: #{lang.backend}. " \
|
|
453
|
+
"Expected :ffi backend. Class: #{lang.class}. " \
|
|
454
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Verify the DynamicLibrary is still valid (not GC'd)
|
|
458
|
+
# The Language stores @library to prevent this, but let's verify
|
|
459
|
+
lib = lang.instance_variable_get(:@library)
|
|
460
|
+
if lib.nil?
|
|
461
|
+
raise TreeHaver::NotAvailable,
|
|
462
|
+
"FFI Language has no library reference. The dynamic library may have been unloaded. " \
|
|
463
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Verify the language has a valid pointer
|
|
467
|
+
ptr = lang.to_ptr
|
|
468
|
+
|
|
469
|
+
# Check ptr is actually an FFI::Pointer
|
|
470
|
+
unless ptr.is_a?(::FFI::Pointer)
|
|
471
|
+
raise TreeHaver::NotAvailable,
|
|
472
|
+
"FFI Language#to_ptr returned #{ptr.class}, expected FFI::Pointer. " \
|
|
473
|
+
"Language class: #{lang.class}. " \
|
|
474
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
ptr_address = ptr.address
|
|
478
|
+
|
|
479
|
+
# Check for NULL (0x0)
|
|
480
|
+
if ptr.nil? || ptr_address.zero?
|
|
481
|
+
raise TreeHaver::NotAvailable,
|
|
482
|
+
"FFI Language has NULL pointer. Language may not have loaded correctly. " \
|
|
483
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Check for small invalid addresses (< 4KB are typically unmapped memory)
|
|
487
|
+
# Common invalid addresses like 0x40 (64) indicate corrupted or uninitialized pointers
|
|
488
|
+
if ptr_address < 4096
|
|
489
|
+
raise TreeHaver::NotAvailable,
|
|
490
|
+
"FFI Language has invalid pointer (address 0x#{ptr_address.to_s(16)}). " \
|
|
491
|
+
"This usually indicates the language library was unloaded or never loaded correctly. " \
|
|
492
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
# Note: MRI backend conflict is now handled by TreeHaver::BackendConflict
|
|
496
|
+
# at a higher level (in TreeHaver.resolve_backend_module)
|
|
497
|
+
|
|
498
|
+
# lang is a wrapped FFI::Language that has to_ptr method
|
|
499
|
+
ok = Native.ts_parser_set_language(@parser, ptr)
|
|
318
500
|
raise TreeHaver::NotAvailable, "Failed to set language on parser" unless ok
|
|
319
501
|
|
|
320
502
|
lang
|
|
@@ -323,21 +505,27 @@ module TreeHaver
|
|
|
323
505
|
# Parse source code into a syntax tree
|
|
324
506
|
#
|
|
325
507
|
# @param source [String] the source code to parse (should be UTF-8)
|
|
326
|
-
# @return [
|
|
508
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
327
509
|
# @raise [TreeHaver::NotAvailable] if parsing fails
|
|
328
510
|
def parse(source)
|
|
329
511
|
src = String(source)
|
|
330
512
|
tree_ptr = Native.ts_parser_parse_string(@parser, ::FFI::Pointer::NULL, src, src.bytesize)
|
|
331
513
|
raise TreeHaver::NotAvailable, "Parse returned NULL" if tree_ptr.null?
|
|
332
514
|
|
|
333
|
-
|
|
334
|
-
|
|
515
|
+
# Return raw FFI::Tree - TreeHaver::Parser will wrap it
|
|
516
|
+
Tree.new(tree_ptr)
|
|
335
517
|
end
|
|
336
518
|
end
|
|
337
519
|
|
|
338
520
|
# FFI-based tree-sitter tree
|
|
339
521
|
#
|
|
340
522
|
# Wraps a TSTree pointer and manages its lifecycle with a finalizer.
|
|
523
|
+
#
|
|
524
|
+
# Note: Tree objects DO use finalizers (unlike Parser objects) because:
|
|
525
|
+
# 1. Trees are typically short-lived and numerous (one per parse)
|
|
526
|
+
# 2. ts_tree_delete is safer than ts_parser_delete during GC
|
|
527
|
+
# 3. Memory leaks from accumulated trees are more problematic
|
|
528
|
+
# 4. The finalizer silently ignores errors for safety
|
|
341
529
|
class Tree
|
|
342
530
|
# @api private
|
|
343
531
|
# @param ptr [FFI::Pointer] pointer to TSTree
|
|
@@ -349,12 +537,21 @@ module TreeHaver
|
|
|
349
537
|
# @api private
|
|
350
538
|
# @param ptr [FFI::Pointer] pointer to TSTree
|
|
351
539
|
class << self
|
|
540
|
+
# Returns a finalizer proc that deletes the tree
|
|
541
|
+
#
|
|
542
|
+
# This is public API for testing purposes, but not intended for
|
|
543
|
+
# direct use. The finalizer is automatically registered when
|
|
544
|
+
# creating a Tree object.
|
|
545
|
+
#
|
|
352
546
|
# @return [Proc] finalizer that deletes the tree
|
|
353
547
|
def finalizer(ptr)
|
|
354
548
|
proc {
|
|
355
549
|
begin
|
|
356
550
|
Native.ts_tree_delete(ptr)
|
|
357
551
|
rescue StandardError
|
|
552
|
+
# Silently ignore errors during finalization to prevent crashes
|
|
553
|
+
# during GC. If the library is unloaded or ptr is invalid, we
|
|
554
|
+
# don't want to crash the entire process.
|
|
358
555
|
nil
|
|
359
556
|
end
|
|
360
557
|
}
|