tree_haver 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +285 -1
- data/CONTRIBUTING.md +132 -0
- data/README.md +529 -36
- data/lib/tree_haver/backends/citrus.rb +177 -20
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +341 -142
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +183 -17
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +60 -17
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +115 -11
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +220 -4
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +835 -75
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +9 -4
- metadata.gz.sig +0 -0
|
@@ -1,16 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
# The load condition isn't really worth testing, so :nocov:
|
|
5
|
-
# :nocov:
|
|
6
|
-
begin
|
|
7
|
-
require "ffi"
|
|
8
|
-
FFI_AVAILABLE = true
|
|
9
|
-
rescue LoadError
|
|
10
|
-
FFI_AVAILABLE = false
|
|
11
|
-
end
|
|
12
|
-
# :nocov:
|
|
13
|
-
|
|
14
4
|
module Backends
|
|
15
5
|
# FFI-based backend for calling libtree-sitter directly
|
|
16
6
|
#
|
|
@@ -30,146 +20,198 @@ module TreeHaver
|
|
|
30
20
|
# @see https://github.com/ffi/ffi Ruby FFI
|
|
31
21
|
# @see https://tree-sitter.github.io/tree-sitter/ tree-sitter
|
|
32
22
|
module FFI
|
|
23
|
+
# Check if the FFI gem is available (lazy evaluation)
|
|
24
|
+
#
|
|
25
|
+
# This method lazily checks for FFI gem availability to avoid
|
|
26
|
+
# polluting the environment at load time.
|
|
27
|
+
class << self
|
|
28
|
+
# Check if the FFI gem can be loaded
|
|
29
|
+
# @return [Boolean] true if FFI gem can be loaded
|
|
30
|
+
# @api private
|
|
31
|
+
def ffi_gem_available?
|
|
32
|
+
return @ffi_gem_available if defined?(@ffi_gem_available)
|
|
33
|
+
|
|
34
|
+
@ffi_gem_available = begin
|
|
35
|
+
require "ffi"
|
|
36
|
+
true
|
|
37
|
+
rescue LoadError
|
|
38
|
+
false
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
33
43
|
# Native FFI bindings to libtree-sitter
|
|
34
44
|
#
|
|
35
45
|
# This module handles loading the tree-sitter runtime library and defining
|
|
36
46
|
# FFI function attachments for the core tree-sitter API.
|
|
37
47
|
#
|
|
48
|
+
# All FFI operations are lazy - nothing is loaded until actually needed.
|
|
49
|
+
# This prevents polluting the Ruby environment at require time.
|
|
50
|
+
#
|
|
38
51
|
# @api private
|
|
39
52
|
module Native
|
|
40
|
-
|
|
41
|
-
extend
|
|
42
|
-
|
|
43
|
-
# FFI struct representation of TSNode
|
|
44
|
-
#
|
|
45
|
-
# Mirrors the C struct layout used by tree-sitter. TSNode is passed
|
|
46
|
-
# by value in the tree-sitter C API.
|
|
53
|
+
class << self
|
|
54
|
+
# Lazily extend with FFI::Library only when needed
|
|
47
55
|
#
|
|
48
|
-
# @
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
# @return [Boolean] true if FFI was successfully extended
|
|
57
|
+
def ensure_ffi_extended!
|
|
58
|
+
return true if @ffi_extended
|
|
59
|
+
|
|
60
|
+
unless FFI.ffi_gem_available?
|
|
61
|
+
raise TreeHaver::NotAvailable, "FFI gem is not available"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
extend(::FFI::Library)
|
|
65
|
+
|
|
66
|
+
define_ts_node_struct!
|
|
67
|
+
@ffi_extended = true
|
|
56
68
|
end
|
|
57
69
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
[
|
|
73
|
-
ENV["TREE_SITTER_RUNTIME_LIB"],
|
|
74
|
-
"tree-sitter",
|
|
75
|
-
"libtree-sitter.so.0",
|
|
76
|
-
"libtree-sitter.so",
|
|
77
|
-
"libtree-sitter.dylib",
|
|
78
|
-
"libtree-sitter.dll",
|
|
79
|
-
].compact
|
|
70
|
+
# Define the TSNode struct lazily
|
|
71
|
+
# @api private
|
|
72
|
+
def define_ts_node_struct!
|
|
73
|
+
return if const_defined?(:TSNode, false)
|
|
74
|
+
|
|
75
|
+
# FFI struct representation of TSNode
|
|
76
|
+
# Mirrors the C struct layout used by tree-sitter
|
|
77
|
+
ts_node_class = Class.new(::FFI::Struct) do
|
|
78
|
+
layout :context,
|
|
79
|
+
[:uint32, 4],
|
|
80
|
+
:id,
|
|
81
|
+
:pointer,
|
|
82
|
+
:tree,
|
|
83
|
+
:pointer
|
|
80
84
|
end
|
|
85
|
+
const_set(:TSNode, ts_node_class)
|
|
86
|
+
typedef(ts_node_class.by_value, :ts_node)
|
|
87
|
+
end
|
|
81
88
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# @return [void]
|
|
89
|
-
# @example
|
|
90
|
-
# TreeHaver::Backends::FFI::Native.try_load!
|
|
91
|
-
def try_load!
|
|
92
|
-
return if @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
93
|
-
last_error = nil
|
|
94
|
-
candidates = lib_candidates
|
|
95
|
-
candidates.each do |name|
|
|
96
|
-
ffi_lib(name)
|
|
97
|
-
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
98
|
-
break
|
|
99
|
-
rescue ::FFI::NotFoundError, LoadError => e
|
|
100
|
-
last_error = e
|
|
101
|
-
end
|
|
102
|
-
unless @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
103
|
-
# :nocov:
|
|
104
|
-
# This failure path cannot be tested in a shared test suite because:
|
|
105
|
-
# 1. Once FFI loads a library via ffi_lib, it cannot be unloaded
|
|
106
|
-
# 2. Other tests may load the library first (test order is randomized)
|
|
107
|
-
# 3. The @loaded flag can be reset, but ffi_lib state persists
|
|
108
|
-
# ENV precedence is tested implicitly by parsing tests that work when
|
|
109
|
-
# TREE_SITTER_RUNTIME_LIB is set correctly in the environment.
|
|
110
|
-
tried = candidates.join(", ")
|
|
111
|
-
env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
|
|
112
|
-
msg = if last_error
|
|
113
|
-
"Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
|
|
114
|
-
else
|
|
115
|
-
"Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
|
|
116
|
-
end
|
|
117
|
-
raise TreeHaver::NotAvailable, msg
|
|
118
|
-
# :nocov:
|
|
119
|
-
end
|
|
89
|
+
# Get the TSNode class, ensuring it's defined
|
|
90
|
+
# @return [Class] the TSNode FFI struct class
|
|
91
|
+
def ts_node_class
|
|
92
|
+
ensure_ffi_extended!
|
|
93
|
+
const_get(:TSNode)
|
|
94
|
+
end
|
|
120
95
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
96
|
+
# Get list of candidate library names for loading libtree-sitter
|
|
97
|
+
#
|
|
98
|
+
# The list is built dynamically to respect environment variables set at runtime.
|
|
99
|
+
# If TREE_SITTER_RUNTIME_LIB is set, it is tried first.
|
|
100
|
+
#
|
|
101
|
+
# @note TREE_SITTER_LIB is intentionally NOT supported
|
|
102
|
+
# @return [Array<String>] list of library names to try
|
|
103
|
+
def lib_candidates
|
|
104
|
+
[
|
|
105
|
+
ENV["TREE_SITTER_RUNTIME_LIB"],
|
|
106
|
+
"tree-sitter",
|
|
107
|
+
"libtree-sitter.so.0",
|
|
108
|
+
"libtree-sitter.so",
|
|
109
|
+
"libtree-sitter.dylib",
|
|
110
|
+
"libtree-sitter.dll",
|
|
111
|
+
].compact
|
|
112
|
+
end
|
|
126
113
|
|
|
127
|
-
|
|
128
|
-
|
|
114
|
+
# Load the tree-sitter runtime library
|
|
115
|
+
#
|
|
116
|
+
# Tries each candidate library name in order until one succeeds.
|
|
117
|
+
# After loading, attaches FFI function definitions for the tree-sitter API.
|
|
118
|
+
#
|
|
119
|
+
# @raise [TreeHaver::NotAvailable] if no library can be loaded
|
|
120
|
+
# @return [void]
|
|
121
|
+
def try_load!
|
|
122
|
+
return if @loaded
|
|
123
|
+
|
|
124
|
+
ensure_ffi_extended!
|
|
125
|
+
|
|
126
|
+
# Warn about potential conflicts with MRI backend
|
|
127
|
+
if defined?(::TreeSitter) && defined?(::TreeSitter::Parser)
|
|
128
|
+
warn("TreeHaver: FFI backend loading after ruby_tree_sitter (MRI backend). " \
|
|
129
|
+
"This may cause symbol conflicts due to different libtree-sitter versions. " \
|
|
130
|
+
"Consider using only one backend per process, or set TREE_SITTER_RUNTIME_LIB " \
|
|
131
|
+
"to match the version used by ruby_tree_sitter.") if $VERBOSE
|
|
132
|
+
end
|
|
129
133
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
134
|
+
last_error = nil
|
|
135
|
+
candidates = lib_candidates
|
|
136
|
+
candidates.each do |name|
|
|
137
|
+
ffi_lib(name)
|
|
138
|
+
@loaded = true
|
|
139
|
+
break
|
|
140
|
+
rescue ::FFI::NotFoundError, LoadError => e
|
|
141
|
+
last_error = e
|
|
133
142
|
end
|
|
134
143
|
|
|
135
|
-
|
|
136
|
-
|
|
144
|
+
unless @loaded
|
|
145
|
+
# :nocov:
|
|
146
|
+
tried = candidates.join(", ")
|
|
147
|
+
env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
|
|
148
|
+
msg = if last_error
|
|
149
|
+
"Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
|
|
150
|
+
else
|
|
151
|
+
"Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
|
|
152
|
+
end
|
|
153
|
+
raise TreeHaver::NotAvailable, msg
|
|
154
|
+
# :nocov:
|
|
137
155
|
end
|
|
156
|
+
|
|
157
|
+
# Attach functions after lib is selected
|
|
158
|
+
attach_function(:ts_parser_new, [], :pointer)
|
|
159
|
+
attach_function(:ts_parser_delete, [:pointer], :void)
|
|
160
|
+
attach_function(:ts_parser_set_language, [:pointer, :pointer], :bool)
|
|
161
|
+
attach_function(:ts_parser_parse_string, [:pointer, :pointer, :string, :uint32], :pointer)
|
|
162
|
+
|
|
163
|
+
attach_function(:ts_tree_delete, [:pointer], :void)
|
|
164
|
+
attach_function(:ts_tree_root_node, [:pointer], :ts_node)
|
|
165
|
+
|
|
166
|
+
attach_function(:ts_node_type, [:ts_node], :string)
|
|
167
|
+
attach_function(:ts_node_child_count, [:ts_node], :uint32)
|
|
168
|
+
attach_function(:ts_node_child, [:ts_node, :uint32], :ts_node)
|
|
169
|
+
attach_function(:ts_node_start_byte, [:ts_node], :uint32)
|
|
170
|
+
attach_function(:ts_node_end_byte, [:ts_node], :uint32)
|
|
171
|
+
attach_function(:ts_node_start_point, [:ts_node], :pointer)
|
|
172
|
+
attach_function(:ts_node_end_point, [:ts_node], :pointer)
|
|
173
|
+
attach_function(:ts_node_is_null, [:ts_node], :bool)
|
|
174
|
+
attach_function(:ts_node_is_named, [:ts_node], :bool)
|
|
138
175
|
end
|
|
139
|
-
else
|
|
140
|
-
# :nocov:
|
|
141
|
-
# Fallback stubs when FFI gem is not installed.
|
|
142
|
-
# These paths cannot be tested in a test suite where FFI is a dependency,
|
|
143
|
-
# since the gem is always available. They provide graceful degradation
|
|
144
|
-
# for environments where FFI cannot be installed.
|
|
145
|
-
class << self
|
|
146
|
-
def try_load!
|
|
147
|
-
raise TreeHaver::NotAvailable, "FFI not available"
|
|
148
|
-
end
|
|
149
176
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
end
|
|
177
|
+
def loaded?
|
|
178
|
+
!!@loaded
|
|
153
179
|
end
|
|
154
|
-
# :nocov:
|
|
155
180
|
end
|
|
156
181
|
end
|
|
157
182
|
|
|
158
183
|
class << self
|
|
159
184
|
# Check if the FFI backend is available
|
|
160
185
|
#
|
|
161
|
-
# Returns true if
|
|
162
|
-
#
|
|
186
|
+
# Returns true if:
|
|
187
|
+
# 1. The `ffi` gem is present
|
|
188
|
+
# 2. MRI backend (ruby_tree_sitter) has NOT been loaded
|
|
189
|
+
#
|
|
190
|
+
# FFI and MRI backends conflict at the libtree-sitter level.
|
|
191
|
+
# Once MRI loads, using FFI will cause segfaults.
|
|
163
192
|
#
|
|
164
|
-
# @return [Boolean] true if FFI
|
|
193
|
+
# @return [Boolean] true if FFI backend can be used
|
|
165
194
|
# @example
|
|
166
195
|
# if TreeHaver::Backends::FFI.available?
|
|
167
196
|
# puts "FFI backend is ready"
|
|
168
197
|
# end
|
|
169
198
|
def available?
|
|
170
|
-
return false unless
|
|
171
|
-
|
|
172
|
-
|
|
199
|
+
return false unless TreeHaver::Backends::FFI.ffi_gem_available?
|
|
200
|
+
|
|
201
|
+
# Check if MRI backend has been loaded (which blocks FFI)
|
|
202
|
+
!defined?(::TreeSitter::Parser)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Reset the load state (primarily for testing)
|
|
206
|
+
#
|
|
207
|
+
# Note: FFI backend doesn't maintain load state like other backends,
|
|
208
|
+
# but this method is provided for API consistency.
|
|
209
|
+
#
|
|
210
|
+
# @return [void]
|
|
211
|
+
# @api private
|
|
212
|
+
def reset!
|
|
213
|
+
# FFI backend uses constant-time availability check, no state to reset
|
|
214
|
+
nil
|
|
173
215
|
end
|
|
174
216
|
|
|
175
217
|
# Get capabilities supported by this backend
|
|
@@ -193,16 +235,70 @@ module TreeHaver
|
|
|
193
235
|
#
|
|
194
236
|
# Holds a pointer to a TSLanguage struct from a loaded shared library.
|
|
195
237
|
class Language
|
|
238
|
+
include Comparable
|
|
239
|
+
|
|
196
240
|
# The FFI pointer to the TSLanguage struct
|
|
197
241
|
# @return [FFI::Pointer]
|
|
198
242
|
attr_reader :pointer
|
|
199
243
|
|
|
244
|
+
# The backend this language is for
|
|
245
|
+
# @return [Symbol]
|
|
246
|
+
attr_reader :backend
|
|
247
|
+
|
|
248
|
+
# The path this language was loaded from (if known)
|
|
249
|
+
# @return [String, nil]
|
|
250
|
+
attr_reader :path
|
|
251
|
+
|
|
252
|
+
# The symbol name (if known)
|
|
253
|
+
# @return [String, nil]
|
|
254
|
+
attr_reader :symbol
|
|
255
|
+
|
|
200
256
|
# @api private
|
|
201
257
|
# @param ptr [FFI::Pointer] pointer to TSLanguage
|
|
202
|
-
|
|
258
|
+
# @param lib [FFI::DynamicLibrary, nil] the opened dynamic library
|
|
259
|
+
# (kept as an instance variable to prevent it being GC'd/unloaded)
|
|
260
|
+
# @param path [String, nil] path language was loaded from
|
|
261
|
+
# @param symbol [String, nil] symbol name
|
|
262
|
+
def initialize(ptr, lib = nil, path: nil, symbol: nil)
|
|
203
263
|
@pointer = ptr
|
|
264
|
+
@backend = :ffi
|
|
265
|
+
@path = path
|
|
266
|
+
@symbol = symbol
|
|
267
|
+
# Keep a reference to the DynamicLibrary that produced the language
|
|
268
|
+
# pointer so it isn't garbage-collected and unloaded while the
|
|
269
|
+
# pointer is still in use by the parser. Not keeping this reference
|
|
270
|
+
# can lead to the language pointer becoming invalid and causing
|
|
271
|
+
# segmentation faults when passed to native functions.
|
|
272
|
+
@library = lib
|
|
204
273
|
end
|
|
205
274
|
|
|
275
|
+
# Compare languages for equality
|
|
276
|
+
#
|
|
277
|
+
# FFI languages are equal if they have the same backend, path, and symbol.
|
|
278
|
+
# Path and symbol uniquely identify a loaded language.
|
|
279
|
+
#
|
|
280
|
+
# @param other [Object] object to compare with
|
|
281
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
282
|
+
def <=>(other)
|
|
283
|
+
return unless other.is_a?(Language)
|
|
284
|
+
return unless other.backend == @backend
|
|
285
|
+
|
|
286
|
+
# Compare by path first, then symbol
|
|
287
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
288
|
+
return cmp if cmp.nonzero?
|
|
289
|
+
|
|
290
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
294
|
+
# @return [Integer]
|
|
295
|
+
def hash
|
|
296
|
+
[@backend, @path, @symbol].hash
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Alias eql? to ==
|
|
300
|
+
alias_method :eql?, :==
|
|
301
|
+
|
|
206
302
|
# Convert to FFI pointer for passing to native functions
|
|
207
303
|
#
|
|
208
304
|
# @return [FFI::Pointer]
|
|
@@ -231,8 +327,39 @@ module TreeHaver
|
|
|
231
327
|
class << self
|
|
232
328
|
def from_library(path, symbol: nil, name: nil)
|
|
233
329
|
raise TreeHaver::NotAvailable, "FFI not available" unless Backends::FFI.available?
|
|
330
|
+
|
|
331
|
+
# Check for MRI backend conflict BEFORE loading the grammar
|
|
332
|
+
# If ruby_tree_sitter has already loaded this grammar file, the dynamic
|
|
333
|
+
# linker will return the cached library with symbols resolved against
|
|
334
|
+
# MRI's statically-linked tree-sitter, causing segfaults when FFI
|
|
335
|
+
# tries to use the pointer with its dynamically-linked libtree-sitter.
|
|
336
|
+
if defined?(::TreeSitter::Language)
|
|
337
|
+
# MRI backend has been loaded - check if it might have loaded this grammar
|
|
338
|
+
# We can't reliably detect which grammars MRI loaded, so we warn and
|
|
339
|
+
# attempt to proceed. The segfault will occur when setting language on parser.
|
|
340
|
+
warn("TreeHaver: FFI backend loading grammar after ruby_tree_sitter (MRI backend). " \
|
|
341
|
+
"This may cause segfaults due to tree-sitter symbol conflicts. " \
|
|
342
|
+
"For reliable operation, use only one backend per process.") if $VERBOSE
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Ensure the core libtree-sitter runtime is loaded first so
|
|
346
|
+
# the language shared library resolves its symbols against the
|
|
347
|
+
# same runtime. This prevents cases where the language pointer
|
|
348
|
+
# is incompatible with the parser (different lib instances).
|
|
349
|
+
Native.try_load!
|
|
350
|
+
|
|
234
351
|
begin
|
|
235
|
-
|
|
352
|
+
# Prefer resolving symbols immediately and globally so the
|
|
353
|
+
# language library links to the already-loaded libtree-sitter
|
|
354
|
+
# (RTLD_NOW | RTLD_GLOBAL). If those constants are not present
|
|
355
|
+
# fall back to RTLD_LAZY for maximum compatibility.
|
|
356
|
+
flags = if defined?(::FFI::DynamicLibrary::RTLD_NOW) && defined?(::FFI::DynamicLibrary::RTLD_GLOBAL)
|
|
357
|
+
::FFI::DynamicLibrary::RTLD_NOW | ::FFI::DynamicLibrary::RTLD_GLOBAL
|
|
358
|
+
else
|
|
359
|
+
::FFI::DynamicLibrary::RTLD_LAZY
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
dl = ::FFI::DynamicLibrary.open(path, flags)
|
|
236
363
|
rescue LoadError => e
|
|
237
364
|
raise TreeHaver::NotAvailable, "Could not open language library at #{path}: #{e.message}"
|
|
238
365
|
end
|
|
@@ -268,7 +395,9 @@ module TreeHaver
|
|
|
268
395
|
# (e.g., during parsing). Creating the Language handle does not require core to be loaded.
|
|
269
396
|
ptr = func.call
|
|
270
397
|
raise TreeHaver::NotAvailable, "Language factory returned NULL for #{path}" if ptr.null?
|
|
271
|
-
|
|
398
|
+
# Pass the opened DynamicLibrary into the Language instance so the
|
|
399
|
+
# library handle remains alive for the lifetime of the Language.
|
|
400
|
+
new(ptr, dl, path: path, symbol: symbol)
|
|
272
401
|
end
|
|
273
402
|
|
|
274
403
|
# Backward-compatible alias
|
|
@@ -290,54 +419,115 @@ module TreeHaver
|
|
|
290
419
|
@parser = Native.ts_parser_new
|
|
291
420
|
raise TreeHaver::NotAvailable, "Failed to create ts_parser" if @parser.null?
|
|
292
421
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
#
|
|
298
|
-
#
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
begin
|
|
303
|
-
Native.ts_parser_delete(ptr)
|
|
304
|
-
rescue StandardError
|
|
305
|
-
nil
|
|
306
|
-
end
|
|
307
|
-
}
|
|
308
|
-
end
|
|
422
|
+
# Note: We intentionally do NOT register a finalizer here because:
|
|
423
|
+
# 1. ts_parser_delete can segfault if called during certain GC scenarios
|
|
424
|
+
# 2. The native library may be unloaded before finalizers run
|
|
425
|
+
# 3. Parser cleanup happens automatically on process exit
|
|
426
|
+
# 4. Long-running processes should explicitly manage parser lifecycle
|
|
427
|
+
#
|
|
428
|
+
# If you need explicit cleanup in long-running processes, store the
|
|
429
|
+
# parser in an instance variable and call a cleanup method explicitly
|
|
430
|
+
# when done, rather than relying on GC finalizers.
|
|
309
431
|
end
|
|
310
432
|
|
|
311
433
|
# Set the language for this parser
|
|
312
434
|
#
|
|
313
|
-
#
|
|
435
|
+
# Note: FFI backend is special - it receives the wrapped Language object
|
|
436
|
+
# because it needs to call to_ptr to get the FFI pointer. TreeHaver::Parser
|
|
437
|
+
# detects FFI Language wrappers (respond_to?(:to_ptr)) and passes them through.
|
|
438
|
+
#
|
|
439
|
+
# @param lang [Language] the FFI language wrapper (not unwrapped)
|
|
314
440
|
# @return [Language] the language that was set
|
|
315
441
|
# @raise [TreeHaver::NotAvailable] if setting the language fails
|
|
316
442
|
def language=(lang)
|
|
317
|
-
|
|
443
|
+
# Defensive check: ensure we received an FFI Language wrapper
|
|
444
|
+
unless lang.is_a?(Language)
|
|
445
|
+
raise TreeHaver::NotAvailable,
|
|
446
|
+
"FFI backend expected FFI::Language wrapper, got #{lang.class}. " \
|
|
447
|
+
"This usually means TreeHaver::Parser#unwrap_language passed the wrong type. " \
|
|
448
|
+
"Check that language caching respects backend boundaries."
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# Additional check: verify the language is actually for FFI backend
|
|
452
|
+
if lang.respond_to?(:backend) && lang.backend != :ffi
|
|
453
|
+
raise TreeHaver::NotAvailable,
|
|
454
|
+
"FFI backend received Language for wrong backend: #{lang.backend}. " \
|
|
455
|
+
"Expected :ffi backend. Class: #{lang.class}. " \
|
|
456
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
# Verify the DynamicLibrary is still valid (not GC'd)
|
|
460
|
+
# The Language stores @library to prevent this, but let's verify
|
|
461
|
+
lib = lang.instance_variable_get(:@library)
|
|
462
|
+
if lib.nil?
|
|
463
|
+
raise TreeHaver::NotAvailable,
|
|
464
|
+
"FFI Language has no library reference. The dynamic library may have been unloaded. " \
|
|
465
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
# Verify the language has a valid pointer
|
|
469
|
+
ptr = lang.to_ptr
|
|
470
|
+
|
|
471
|
+
# Check ptr is actually an FFI::Pointer
|
|
472
|
+
unless ptr.is_a?(::FFI::Pointer)
|
|
473
|
+
raise TreeHaver::NotAvailable,
|
|
474
|
+
"FFI Language#to_ptr returned #{ptr.class}, expected FFI::Pointer. " \
|
|
475
|
+
"Language class: #{lang.class}. " \
|
|
476
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
ptr_address = ptr.address
|
|
480
|
+
|
|
481
|
+
# Check for NULL (0x0)
|
|
482
|
+
if ptr.nil? || ptr_address.zero?
|
|
483
|
+
raise TreeHaver::NotAvailable,
|
|
484
|
+
"FFI Language has NULL pointer. Language may not have loaded correctly. " \
|
|
485
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
# Check for small invalid addresses (< 4KB are typically unmapped memory)
|
|
489
|
+
# Common invalid addresses like 0x40 (64) indicate corrupted or uninitialized pointers
|
|
490
|
+
if ptr_address < 4096
|
|
491
|
+
raise TreeHaver::NotAvailable,
|
|
492
|
+
"FFI Language has invalid pointer (address 0x#{ptr_address.to_s(16)}). " \
|
|
493
|
+
"This usually indicates the language library was unloaded or never loaded correctly. " \
|
|
494
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Note: MRI backend conflict is now handled by TreeHaver::BackendConflict
|
|
498
|
+
# at a higher level (in TreeHaver.resolve_backend_module)
|
|
499
|
+
|
|
500
|
+
# lang is a wrapped FFI::Language that has to_ptr method
|
|
501
|
+
ok = Native.ts_parser_set_language(@parser, ptr)
|
|
318
502
|
raise TreeHaver::NotAvailable, "Failed to set language on parser" unless ok
|
|
319
503
|
|
|
320
|
-
lang
|
|
504
|
+
lang # rubocop:disable Lint/Void (intentional return value)
|
|
321
505
|
end
|
|
322
506
|
|
|
323
507
|
# Parse source code into a syntax tree
|
|
324
508
|
#
|
|
325
509
|
# @param source [String] the source code to parse (should be UTF-8)
|
|
326
|
-
# @return [
|
|
510
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
327
511
|
# @raise [TreeHaver::NotAvailable] if parsing fails
|
|
328
512
|
def parse(source)
|
|
329
513
|
src = String(source)
|
|
330
514
|
tree_ptr = Native.ts_parser_parse_string(@parser, ::FFI::Pointer::NULL, src, src.bytesize)
|
|
331
515
|
raise TreeHaver::NotAvailable, "Parse returned NULL" if tree_ptr.null?
|
|
332
516
|
|
|
333
|
-
|
|
334
|
-
|
|
517
|
+
# Return raw FFI::Tree - TreeHaver::Parser will wrap it
|
|
518
|
+
Tree.new(tree_ptr)
|
|
335
519
|
end
|
|
336
520
|
end
|
|
337
521
|
|
|
338
522
|
# FFI-based tree-sitter tree
|
|
339
523
|
#
|
|
340
524
|
# Wraps a TSTree pointer and manages its lifecycle with a finalizer.
|
|
525
|
+
#
|
|
526
|
+
# Note: Tree objects DO use finalizers (unlike Parser objects) because:
|
|
527
|
+
# 1. Trees are typically short-lived and numerous (one per parse)
|
|
528
|
+
# 2. ts_tree_delete is safer than ts_parser_delete during GC
|
|
529
|
+
# 3. Memory leaks from accumulated trees are more problematic
|
|
530
|
+
# 4. The finalizer silently ignores errors for safety
|
|
341
531
|
class Tree
|
|
342
532
|
# @api private
|
|
343
533
|
# @param ptr [FFI::Pointer] pointer to TSTree
|
|
@@ -349,12 +539,21 @@ module TreeHaver
|
|
|
349
539
|
# @api private
|
|
350
540
|
# @param ptr [FFI::Pointer] pointer to TSTree
|
|
351
541
|
class << self
|
|
542
|
+
# Returns a finalizer proc that deletes the tree
|
|
543
|
+
#
|
|
544
|
+
# This is public API for testing purposes, but not intended for
|
|
545
|
+
# direct use. The finalizer is automatically registered when
|
|
546
|
+
# creating a Tree object.
|
|
547
|
+
#
|
|
352
548
|
# @return [Proc] finalizer that deletes the tree
|
|
353
549
|
def finalizer(ptr)
|
|
354
550
|
proc {
|
|
355
551
|
begin
|
|
356
552
|
Native.ts_tree_delete(ptr)
|
|
357
553
|
rescue StandardError
|
|
554
|
+
# Silently ignore errors during finalization to prevent crashes
|
|
555
|
+
# during GC. If the library is unloaded or ptr is invalid, we
|
|
556
|
+
# don't want to crash the entire process.
|
|
358
557
|
nil
|
|
359
558
|
end
|
|
360
559
|
}
|