tree_haver 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +236 -3
- data/CONTRIBUTING.md +100 -0
- data/README.md +470 -85
- data/lib/tree_haver/backends/citrus.rb +423 -0
- data/lib/tree_haver/backends/ffi.rb +405 -150
- data/lib/tree_haver/backends/java.rb +63 -10
- data/lib/tree_haver/backends/mri.rb +154 -27
- data/lib/tree_haver/backends/rust.rb +58 -27
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +526 -0
- data/lib/tree_haver/path_validator.rb +47 -27
- data/lib/tree_haver/tree.rb +259 -0
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +741 -285
- data/sig/tree_haver/backends.rbs +68 -1
- data/sig/tree_haver/path_validator.rbs +1 -0
- data/sig/tree_haver.rbs +95 -9
- data.tar.gz.sig +0 -0
- metadata +12 -8
- metadata.gz.sig +0 -0
|
@@ -1,20 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
# The load condition isn't really worth testing, so :nocov:
|
|
5
|
-
# :nocov:
|
|
6
|
-
begin
|
|
7
|
-
require "ffi"
|
|
8
|
-
FFI_AVAILABLE = true
|
|
9
|
-
rescue LoadError
|
|
10
|
-
FFI_AVAILABLE = false
|
|
11
|
-
end
|
|
12
|
-
# :nocov:
|
|
13
|
-
|
|
14
4
|
module Backends
|
|
15
5
|
# FFI-based backend for calling libtree-sitter directly
|
|
16
6
|
#
|
|
17
|
-
# This backend uses Ruby FFI (JNR-FFI on JRuby) to call the native
|
|
7
|
+
# This backend uses Ruby FFI (JNR-FFI on JRuby) to call the native tree-sitter
|
|
18
8
|
# C library without requiring MRI C extensions. This makes it compatible with
|
|
19
9
|
# JRuby, TruffleRuby, and other Ruby implementations that support FFI.
|
|
20
10
|
#
|
|
@@ -24,152 +14,202 @@ module TreeHaver
|
|
|
24
14
|
# - Accessing node types and children
|
|
25
15
|
#
|
|
26
16
|
# Not yet supported:
|
|
27
|
-
# - Query API (
|
|
17
|
+
# - Query API (tree-sitter queries/patterns)
|
|
28
18
|
#
|
|
29
19
|
# @note Requires the `ffi` gem and libtree-sitter shared library to be installed
|
|
30
20
|
# @see https://github.com/ffi/ffi Ruby FFI
|
|
31
|
-
# @see https://tree-sitter.github.io/tree-sitter/
|
|
21
|
+
# @see https://tree-sitter.github.io/tree-sitter/ tree-sitter
|
|
32
22
|
module FFI
|
|
23
|
+
# Check if the FFI gem is available (lazy evaluation)
|
|
24
|
+
#
|
|
25
|
+
# This method lazily checks for FFI gem availability to avoid
|
|
26
|
+
# polluting the environment at load time.
|
|
27
|
+
#
|
|
28
|
+
# @return [Boolean] true if FFI gem can be loaded
|
|
29
|
+
# @api private
|
|
30
|
+
def self.ffi_gem_available?
|
|
31
|
+
return @ffi_gem_available if defined?(@ffi_gem_available)
|
|
32
|
+
|
|
33
|
+
@ffi_gem_available = begin
|
|
34
|
+
require "ffi"
|
|
35
|
+
true
|
|
36
|
+
rescue LoadError
|
|
37
|
+
false
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
33
41
|
# Native FFI bindings to libtree-sitter
|
|
34
42
|
#
|
|
35
|
-
# This module handles loading the
|
|
36
|
-
# FFI function attachments for the core
|
|
43
|
+
# This module handles loading the tree-sitter runtime library and defining
|
|
44
|
+
# FFI function attachments for the core tree-sitter API.
|
|
45
|
+
#
|
|
46
|
+
# All FFI operations are lazy - nothing is loaded until actually needed.
|
|
47
|
+
# This prevents polluting the Ruby environment at require time.
|
|
37
48
|
#
|
|
38
49
|
# @api private
|
|
39
50
|
module Native
|
|
40
|
-
|
|
41
|
-
extend
|
|
42
|
-
|
|
43
|
-
# FFI struct representation of TSNode
|
|
44
|
-
#
|
|
45
|
-
# Mirrors the C struct layout used by Tree-sitter. TSNode is passed
|
|
46
|
-
# by value in the Tree-sitter C API.
|
|
51
|
+
class << self
|
|
52
|
+
# Lazily extend with FFI::Library only when needed
|
|
47
53
|
#
|
|
48
|
-
# @
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
# @return [Boolean] true if FFI was successfully extended
|
|
55
|
+
def ensure_ffi_extended!
|
|
56
|
+
return true if @ffi_extended
|
|
57
|
+
|
|
58
|
+
unless FFI.ffi_gem_available?
|
|
59
|
+
raise TreeHaver::NotAvailable, "FFI gem is not available"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
extend(::FFI::Library)
|
|
63
|
+
|
|
64
|
+
define_ts_node_struct!
|
|
65
|
+
@ffi_extended = true
|
|
56
66
|
end
|
|
57
67
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
[
|
|
73
|
-
ENV["TREE_SITTER_RUNTIME_LIB"],
|
|
74
|
-
"tree-sitter",
|
|
75
|
-
"libtree-sitter.so.0",
|
|
76
|
-
"libtree-sitter.so",
|
|
77
|
-
"libtree-sitter.dylib",
|
|
78
|
-
"libtree-sitter.dll",
|
|
79
|
-
].compact
|
|
68
|
+
# Define the TSNode struct lazily
|
|
69
|
+
# @api private
|
|
70
|
+
def define_ts_node_struct!
|
|
71
|
+
return if const_defined?(:TSNode, false)
|
|
72
|
+
|
|
73
|
+
# FFI struct representation of TSNode
|
|
74
|
+
# Mirrors the C struct layout used by tree-sitter
|
|
75
|
+
ts_node_class = Class.new(::FFI::Struct) do
|
|
76
|
+
layout :context,
|
|
77
|
+
[:uint32, 4],
|
|
78
|
+
:id,
|
|
79
|
+
:pointer,
|
|
80
|
+
:tree,
|
|
81
|
+
:pointer
|
|
80
82
|
end
|
|
83
|
+
const_set(:TSNode, ts_node_class)
|
|
84
|
+
typedef(ts_node_class.by_value, :ts_node)
|
|
85
|
+
end
|
|
81
86
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# @return [void]
|
|
89
|
-
# @example
|
|
90
|
-
# TreeHaver::Backends::FFI::Native.try_load!
|
|
91
|
-
def try_load!
|
|
92
|
-
return if @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
93
|
-
last_error = nil
|
|
94
|
-
candidates = lib_candidates
|
|
95
|
-
candidates.each do |name|
|
|
96
|
-
ffi_lib(name)
|
|
97
|
-
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
98
|
-
break
|
|
99
|
-
rescue ::FFI::NotFoundError, LoadError => e
|
|
100
|
-
last_error = e
|
|
101
|
-
end
|
|
102
|
-
unless @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
103
|
-
# :nocov:
|
|
104
|
-
# This failure path cannot be tested in a shared test suite because:
|
|
105
|
-
# 1. Once FFI loads a library via ffi_lib, it cannot be unloaded
|
|
106
|
-
# 2. Other tests may load the library first (test order is randomized)
|
|
107
|
-
# 3. The @loaded flag can be reset, but ffi_lib state persists
|
|
108
|
-
# ENV precedence is tested implicitly by parsing tests that work when
|
|
109
|
-
# TREE_SITTER_RUNTIME_LIB is set correctly in the environment.
|
|
110
|
-
tried = candidates.join(", ")
|
|
111
|
-
env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
|
|
112
|
-
msg = if last_error
|
|
113
|
-
"Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
|
|
114
|
-
else
|
|
115
|
-
"Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
|
|
116
|
-
end
|
|
117
|
-
raise TreeHaver::NotAvailable, msg
|
|
118
|
-
# :nocov:
|
|
119
|
-
end
|
|
87
|
+
# Get the TSNode class, ensuring it's defined
|
|
88
|
+
# @return [Class] the TSNode FFI struct class
|
|
89
|
+
def ts_node_class
|
|
90
|
+
ensure_ffi_extended!
|
|
91
|
+
const_get(:TSNode)
|
|
92
|
+
end
|
|
120
93
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
94
|
+
# Get list of candidate library names for loading libtree-sitter
|
|
95
|
+
#
|
|
96
|
+
# The list is built dynamically to respect environment variables set at runtime.
|
|
97
|
+
# If TREE_SITTER_RUNTIME_LIB is set, it is tried first.
|
|
98
|
+
#
|
|
99
|
+
# @note TREE_SITTER_LIB is intentionally NOT supported
|
|
100
|
+
# @return [Array<String>] list of library names to try
|
|
101
|
+
def lib_candidates
|
|
102
|
+
[
|
|
103
|
+
ENV["TREE_SITTER_RUNTIME_LIB"],
|
|
104
|
+
"tree-sitter",
|
|
105
|
+
"libtree-sitter.so.0",
|
|
106
|
+
"libtree-sitter.so",
|
|
107
|
+
"libtree-sitter.dylib",
|
|
108
|
+
"libtree-sitter.dll",
|
|
109
|
+
].compact
|
|
110
|
+
end
|
|
126
111
|
|
|
127
|
-
|
|
128
|
-
|
|
112
|
+
# Load the tree-sitter runtime library
|
|
113
|
+
#
|
|
114
|
+
# Tries each candidate library name in order until one succeeds.
|
|
115
|
+
# After loading, attaches FFI function definitions for the tree-sitter API.
|
|
116
|
+
#
|
|
117
|
+
# @raise [TreeHaver::NotAvailable] if no library can be loaded
|
|
118
|
+
# @return [void]
|
|
119
|
+
def try_load!
|
|
120
|
+
return if @loaded
|
|
121
|
+
|
|
122
|
+
ensure_ffi_extended!
|
|
123
|
+
|
|
124
|
+
# Warn about potential conflicts with MRI backend
|
|
125
|
+
if defined?(::TreeSitter) && defined?(::TreeSitter::Parser)
|
|
126
|
+
warn("TreeHaver: FFI backend loading after ruby_tree_sitter (MRI backend). " \
|
|
127
|
+
"This may cause symbol conflicts due to different libtree-sitter versions. " \
|
|
128
|
+
"Consider using only one backend per process, or set TREE_SITTER_RUNTIME_LIB " \
|
|
129
|
+
"to match the version used by ruby_tree_sitter.") if $VERBOSE
|
|
130
|
+
end
|
|
129
131
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
132
|
+
last_error = nil
|
|
133
|
+
candidates = lib_candidates
|
|
134
|
+
candidates.each do |name|
|
|
135
|
+
ffi_lib(name)
|
|
136
|
+
@loaded = true
|
|
137
|
+
break
|
|
138
|
+
rescue ::FFI::NotFoundError, LoadError => e
|
|
139
|
+
last_error = e
|
|
133
140
|
end
|
|
134
141
|
|
|
135
|
-
|
|
136
|
-
|
|
142
|
+
unless @loaded
|
|
143
|
+
# :nocov:
|
|
144
|
+
tried = candidates.join(", ")
|
|
145
|
+
env_hint = ENV["TREE_SITTER_RUNTIME_LIB"] ? " TREE_SITTER_RUNTIME_LIB=#{ENV["TREE_SITTER_RUNTIME_LIB"]}." : ""
|
|
146
|
+
msg = if last_error
|
|
147
|
+
"Could not load libtree-sitter (tried: #{tried}).#{env_hint} #{last_error.class}: #{last_error.message}"
|
|
148
|
+
else
|
|
149
|
+
"Could not load libtree-sitter (tried: #{tried}).#{env_hint}"
|
|
150
|
+
end
|
|
151
|
+
raise TreeHaver::NotAvailable, msg
|
|
152
|
+
# :nocov:
|
|
137
153
|
end
|
|
154
|
+
|
|
155
|
+
# Attach functions after lib is selected
|
|
156
|
+
attach_function(:ts_parser_new, [], :pointer)
|
|
157
|
+
attach_function(:ts_parser_delete, [:pointer], :void)
|
|
158
|
+
attach_function(:ts_parser_set_language, [:pointer, :pointer], :bool)
|
|
159
|
+
attach_function(:ts_parser_parse_string, [:pointer, :pointer, :string, :uint32], :pointer)
|
|
160
|
+
|
|
161
|
+
attach_function(:ts_tree_delete, [:pointer], :void)
|
|
162
|
+
attach_function(:ts_tree_root_node, [:pointer], :ts_node)
|
|
163
|
+
|
|
164
|
+
attach_function(:ts_node_type, [:ts_node], :string)
|
|
165
|
+
attach_function(:ts_node_child_count, [:ts_node], :uint32)
|
|
166
|
+
attach_function(:ts_node_child, [:ts_node, :uint32], :ts_node)
|
|
167
|
+
attach_function(:ts_node_start_byte, [:ts_node], :uint32)
|
|
168
|
+
attach_function(:ts_node_end_byte, [:ts_node], :uint32)
|
|
169
|
+
attach_function(:ts_node_start_point, [:ts_node], :pointer)
|
|
170
|
+
attach_function(:ts_node_end_point, [:ts_node], :pointer)
|
|
171
|
+
attach_function(:ts_node_is_null, [:ts_node], :bool)
|
|
172
|
+
attach_function(:ts_node_is_named, [:ts_node], :bool)
|
|
138
173
|
end
|
|
139
|
-
else
|
|
140
|
-
# :nocov:
|
|
141
|
-
# Fallback stubs when FFI gem is not installed.
|
|
142
|
-
# These paths cannot be tested in a test suite where FFI is a dependency,
|
|
143
|
-
# since the gem is always available. They provide graceful degradation
|
|
144
|
-
# for environments where FFI cannot be installed.
|
|
145
|
-
class << self
|
|
146
|
-
def try_load!
|
|
147
|
-
raise TreeHaver::NotAvailable, "FFI not available"
|
|
148
|
-
end
|
|
149
174
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
end
|
|
175
|
+
def loaded?
|
|
176
|
+
!!@loaded
|
|
153
177
|
end
|
|
154
|
-
# :nocov:
|
|
155
178
|
end
|
|
156
179
|
end
|
|
157
180
|
|
|
158
181
|
class << self
|
|
159
182
|
# Check if the FFI backend is available
|
|
160
183
|
#
|
|
161
|
-
# Returns true if
|
|
162
|
-
#
|
|
184
|
+
# Returns true if:
|
|
185
|
+
# 1. The `ffi` gem is present
|
|
186
|
+
# 2. MRI backend (ruby_tree_sitter) has NOT been loaded
|
|
163
187
|
#
|
|
164
|
-
#
|
|
188
|
+
# FFI and MRI backends conflict at the libtree-sitter level.
|
|
189
|
+
# Once MRI loads, using FFI will cause segfaults.
|
|
190
|
+
#
|
|
191
|
+
# @return [Boolean] true if FFI backend can be used
|
|
165
192
|
# @example
|
|
166
193
|
# if TreeHaver::Backends::FFI.available?
|
|
167
194
|
# puts "FFI backend is ready"
|
|
168
195
|
# end
|
|
169
196
|
def available?
|
|
170
|
-
return false unless
|
|
171
|
-
|
|
172
|
-
|
|
197
|
+
return false unless TreeHaver::Backends::FFI.ffi_gem_available?
|
|
198
|
+
|
|
199
|
+
# Check if MRI backend has been loaded (which blocks FFI)
|
|
200
|
+
!defined?(::TreeSitter::Parser)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Reset the load state (primarily for testing)
|
|
204
|
+
#
|
|
205
|
+
# Note: FFI backend doesn't maintain load state like other backends,
|
|
206
|
+
# but this method is provided for API consistency.
|
|
207
|
+
#
|
|
208
|
+
# @return [void]
|
|
209
|
+
# @api private
|
|
210
|
+
def reset!
|
|
211
|
+
# FFI backend uses constant-time availability check, no state to reset
|
|
212
|
+
nil
|
|
173
213
|
end
|
|
174
214
|
|
|
175
215
|
# Get capabilities supported by this backend
|
|
@@ -189,20 +229,74 @@ module TreeHaver
|
|
|
189
229
|
end
|
|
190
230
|
end
|
|
191
231
|
|
|
192
|
-
# Represents a
|
|
232
|
+
# Represents a tree-sitter language loaded via FFI
|
|
193
233
|
#
|
|
194
234
|
# Holds a pointer to a TSLanguage struct from a loaded shared library.
|
|
195
235
|
class Language
|
|
236
|
+
include Comparable
|
|
237
|
+
|
|
196
238
|
# The FFI pointer to the TSLanguage struct
|
|
197
239
|
# @return [FFI::Pointer]
|
|
198
240
|
attr_reader :pointer
|
|
199
241
|
|
|
242
|
+
# The backend this language is for
|
|
243
|
+
# @return [Symbol]
|
|
244
|
+
attr_reader :backend
|
|
245
|
+
|
|
246
|
+
# The path this language was loaded from (if known)
|
|
247
|
+
# @return [String, nil]
|
|
248
|
+
attr_reader :path
|
|
249
|
+
|
|
250
|
+
# The symbol name (if known)
|
|
251
|
+
# @return [String, nil]
|
|
252
|
+
attr_reader :symbol
|
|
253
|
+
|
|
200
254
|
# @api private
|
|
201
255
|
# @param ptr [FFI::Pointer] pointer to TSLanguage
|
|
202
|
-
|
|
256
|
+
# @param lib [FFI::DynamicLibrary, nil] the opened dynamic library
|
|
257
|
+
# (kept as an instance variable to prevent it being GC'd/unloaded)
|
|
258
|
+
# @param path [String, nil] path language was loaded from
|
|
259
|
+
# @param symbol [String, nil] symbol name
|
|
260
|
+
def initialize(ptr, lib = nil, path: nil, symbol: nil)
|
|
203
261
|
@pointer = ptr
|
|
262
|
+
@backend = :ffi
|
|
263
|
+
@path = path
|
|
264
|
+
@symbol = symbol
|
|
265
|
+
# Keep a reference to the DynamicLibrary that produced the language
|
|
266
|
+
# pointer so it isn't garbage-collected and unloaded while the
|
|
267
|
+
# pointer is still in use by the parser. Not keeping this reference
|
|
268
|
+
# can lead to the language pointer becoming invalid and causing
|
|
269
|
+
# segmentation faults when passed to native functions.
|
|
270
|
+
@library = lib
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Compare languages for equality
|
|
274
|
+
#
|
|
275
|
+
# FFI languages are equal if they have the same backend, path, and symbol.
|
|
276
|
+
# Path and symbol uniquely identify a loaded language.
|
|
277
|
+
#
|
|
278
|
+
# @param other [Object] object to compare with
|
|
279
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
280
|
+
def <=>(other)
|
|
281
|
+
return unless other.is_a?(Language)
|
|
282
|
+
return unless other.backend == @backend
|
|
283
|
+
|
|
284
|
+
# Compare by path first, then symbol
|
|
285
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
286
|
+
return cmp unless cmp.zero?
|
|
287
|
+
|
|
288
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
204
289
|
end
|
|
205
290
|
|
|
291
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
292
|
+
# @return [Integer]
|
|
293
|
+
def hash
|
|
294
|
+
[@backend, @path, @symbol].hash
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Alias eql? to ==
|
|
298
|
+
alias_method :eql?, :==
|
|
299
|
+
|
|
206
300
|
# Convert to FFI pointer for passing to native functions
|
|
207
301
|
#
|
|
208
302
|
# @return [FFI::Pointer]
|
|
@@ -231,8 +325,39 @@ module TreeHaver
|
|
|
231
325
|
class << self
|
|
232
326
|
def from_library(path, symbol: nil, name: nil)
|
|
233
327
|
raise TreeHaver::NotAvailable, "FFI not available" unless Backends::FFI.available?
|
|
328
|
+
|
|
329
|
+
# Check for MRI backend conflict BEFORE loading the grammar
|
|
330
|
+
# If ruby_tree_sitter has already loaded this grammar file, the dynamic
|
|
331
|
+
# linker will return the cached library with symbols resolved against
|
|
332
|
+
# MRI's statically-linked tree-sitter, causing segfaults when FFI
|
|
333
|
+
# tries to use the pointer with its dynamically-linked libtree-sitter.
|
|
334
|
+
if defined?(::TreeSitter::Language)
|
|
335
|
+
# MRI backend has been loaded - check if it might have loaded this grammar
|
|
336
|
+
# We can't reliably detect which grammars MRI loaded, so we warn and
|
|
337
|
+
# attempt to proceed. The segfault will occur when setting language on parser.
|
|
338
|
+
warn("TreeHaver: FFI backend loading grammar after ruby_tree_sitter (MRI backend). " \
|
|
339
|
+
"This may cause segfaults due to tree-sitter symbol conflicts. " \
|
|
340
|
+
"For reliable operation, use only one backend per process.") if $VERBOSE
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Ensure the core libtree-sitter runtime is loaded first so
|
|
344
|
+
# the language shared library resolves its symbols against the
|
|
345
|
+
# same runtime. This prevents cases where the language pointer
|
|
346
|
+
# is incompatible with the parser (different lib instances).
|
|
347
|
+
Native.try_load!
|
|
348
|
+
|
|
234
349
|
begin
|
|
235
|
-
|
|
350
|
+
# Prefer resolving symbols immediately and globally so the
|
|
351
|
+
# language library links to the already-loaded libtree-sitter
|
|
352
|
+
# (RTLD_NOW | RTLD_GLOBAL). If those constants are not present
|
|
353
|
+
# fall back to RTLD_LAZY for maximum compatibility.
|
|
354
|
+
flags = if defined?(::FFI::DynamicLibrary::RTLD_NOW) && defined?(::FFI::DynamicLibrary::RTLD_GLOBAL)
|
|
355
|
+
::FFI::DynamicLibrary::RTLD_NOW | ::FFI::DynamicLibrary::RTLD_GLOBAL
|
|
356
|
+
else
|
|
357
|
+
::FFI::DynamicLibrary::RTLD_LAZY
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
dl = ::FFI::DynamicLibrary.open(path, flags)
|
|
236
361
|
rescue LoadError => e
|
|
237
362
|
raise TreeHaver::NotAvailable, "Could not open language library at #{path}: #{e.message}"
|
|
238
363
|
end
|
|
@@ -268,7 +393,9 @@ module TreeHaver
|
|
|
268
393
|
# (e.g., during parsing). Creating the Language handle does not require core to be loaded.
|
|
269
394
|
ptr = func.call
|
|
270
395
|
raise TreeHaver::NotAvailable, "Language factory returned NULL for #{path}" if ptr.null?
|
|
271
|
-
|
|
396
|
+
# Pass the opened DynamicLibrary into the Language instance so the
|
|
397
|
+
# library handle remains alive for the lifetime of the Language.
|
|
398
|
+
new(ptr, dl, path: path, symbol: symbol)
|
|
272
399
|
end
|
|
273
400
|
|
|
274
401
|
# Backward-compatible alias
|
|
@@ -276,7 +403,7 @@ module TreeHaver
|
|
|
276
403
|
end
|
|
277
404
|
end
|
|
278
405
|
|
|
279
|
-
# FFI-based
|
|
406
|
+
# FFI-based tree-sitter parser
|
|
280
407
|
#
|
|
281
408
|
# Wraps a TSParser pointer and manages its lifecycle with a finalizer.
|
|
282
409
|
class Parser
|
|
@@ -290,31 +417,86 @@ module TreeHaver
|
|
|
290
417
|
@parser = Native.ts_parser_new
|
|
291
418
|
raise TreeHaver::NotAvailable, "Failed to create ts_parser" if @parser.null?
|
|
292
419
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
#
|
|
298
|
-
#
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
begin
|
|
303
|
-
Native.ts_parser_delete(ptr)
|
|
304
|
-
rescue StandardError
|
|
305
|
-
nil
|
|
306
|
-
end
|
|
307
|
-
}
|
|
308
|
-
end
|
|
420
|
+
# Note: We intentionally do NOT register a finalizer here because:
|
|
421
|
+
# 1. ts_parser_delete can segfault if called during certain GC scenarios
|
|
422
|
+
# 2. The native library may be unloaded before finalizers run
|
|
423
|
+
# 3. Parser cleanup happens automatically on process exit
|
|
424
|
+
# 4. Long-running processes should explicitly manage parser lifecycle
|
|
425
|
+
#
|
|
426
|
+
# If you need explicit cleanup in long-running processes, store the
|
|
427
|
+
# parser in an instance variable and call a cleanup method explicitly
|
|
428
|
+
# when done, rather than relying on GC finalizers.
|
|
309
429
|
end
|
|
310
430
|
|
|
311
431
|
# Set the language for this parser
|
|
312
432
|
#
|
|
313
|
-
#
|
|
433
|
+
# Note: FFI backend is special - it receives the wrapped Language object
|
|
434
|
+
# because it needs to call to_ptr to get the FFI pointer. TreeHaver::Parser
|
|
435
|
+
# detects FFI Language wrappers (respond_to?(:to_ptr)) and passes them through.
|
|
436
|
+
#
|
|
437
|
+
# @param lang [Language] the FFI language wrapper (not unwrapped)
|
|
314
438
|
# @return [Language] the language that was set
|
|
315
439
|
# @raise [TreeHaver::NotAvailable] if setting the language fails
|
|
316
440
|
def language=(lang)
|
|
317
|
-
|
|
441
|
+
# Defensive check: ensure we received an FFI Language wrapper
|
|
442
|
+
unless lang.is_a?(Language)
|
|
443
|
+
raise TreeHaver::NotAvailable,
|
|
444
|
+
"FFI backend expected FFI::Language wrapper, got #{lang.class}. " \
|
|
445
|
+
"This usually means TreeHaver::Parser#unwrap_language passed the wrong type. " \
|
|
446
|
+
"Check that language caching respects backend boundaries."
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Additional check: verify the language is actually for FFI backend
|
|
450
|
+
if lang.respond_to?(:backend) && lang.backend != :ffi
|
|
451
|
+
raise TreeHaver::NotAvailable,
|
|
452
|
+
"FFI backend received Language for wrong backend: #{lang.backend}. " \
|
|
453
|
+
"Expected :ffi backend. Class: #{lang.class}. " \
|
|
454
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Verify the DynamicLibrary is still valid (not GC'd)
|
|
458
|
+
# The Language stores @library to prevent this, but let's verify
|
|
459
|
+
lib = lang.instance_variable_get(:@library)
|
|
460
|
+
if lib.nil?
|
|
461
|
+
raise TreeHaver::NotAvailable,
|
|
462
|
+
"FFI Language has no library reference. The dynamic library may have been unloaded. " \
|
|
463
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Verify the language has a valid pointer
|
|
467
|
+
ptr = lang.to_ptr
|
|
468
|
+
|
|
469
|
+
# Check ptr is actually an FFI::Pointer
|
|
470
|
+
unless ptr.is_a?(::FFI::Pointer)
|
|
471
|
+
raise TreeHaver::NotAvailable,
|
|
472
|
+
"FFI Language#to_ptr returned #{ptr.class}, expected FFI::Pointer. " \
|
|
473
|
+
"Language class: #{lang.class}. " \
|
|
474
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
ptr_address = ptr.address
|
|
478
|
+
|
|
479
|
+
# Check for NULL (0x0)
|
|
480
|
+
if ptr.nil? || ptr_address.zero?
|
|
481
|
+
raise TreeHaver::NotAvailable,
|
|
482
|
+
"FFI Language has NULL pointer. Language may not have loaded correctly. " \
|
|
483
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Check for small invalid addresses (< 4KB are typically unmapped memory)
|
|
487
|
+
# Common invalid addresses like 0x40 (64) indicate corrupted or uninitialized pointers
|
|
488
|
+
if ptr_address < 4096
|
|
489
|
+
raise TreeHaver::NotAvailable,
|
|
490
|
+
"FFI Language has invalid pointer (address 0x#{ptr_address.to_s(16)}). " \
|
|
491
|
+
"This usually indicates the language library was unloaded or never loaded correctly. " \
|
|
492
|
+
"Path: #{lang.path.inspect}, Symbol: #{lang.symbol.inspect}"
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
# Note: MRI backend conflict is now handled by TreeHaver::BackendConflict
|
|
496
|
+
# at a higher level (in TreeHaver.resolve_backend_module)
|
|
497
|
+
|
|
498
|
+
# lang is a wrapped FFI::Language that has to_ptr method
|
|
499
|
+
ok = Native.ts_parser_set_language(@parser, ptr)
|
|
318
500
|
raise TreeHaver::NotAvailable, "Failed to set language on parser" unless ok
|
|
319
501
|
|
|
320
502
|
lang
|
|
@@ -323,20 +505,27 @@ module TreeHaver
|
|
|
323
505
|
# Parse source code into a syntax tree
|
|
324
506
|
#
|
|
325
507
|
# @param source [String] the source code to parse (should be UTF-8)
|
|
326
|
-
# @return [Tree]
|
|
508
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
327
509
|
# @raise [TreeHaver::NotAvailable] if parsing fails
|
|
328
510
|
def parse(source)
|
|
329
511
|
src = String(source)
|
|
330
512
|
tree_ptr = Native.ts_parser_parse_string(@parser, ::FFI::Pointer::NULL, src, src.bytesize)
|
|
331
513
|
raise TreeHaver::NotAvailable, "Parse returned NULL" if tree_ptr.null?
|
|
332
514
|
|
|
515
|
+
# Return raw FFI::Tree - TreeHaver::Parser will wrap it
|
|
333
516
|
Tree.new(tree_ptr)
|
|
334
517
|
end
|
|
335
518
|
end
|
|
336
519
|
|
|
337
|
-
# FFI-based
|
|
520
|
+
# FFI-based tree-sitter tree
|
|
338
521
|
#
|
|
339
522
|
# Wraps a TSTree pointer and manages its lifecycle with a finalizer.
|
|
523
|
+
#
|
|
524
|
+
# Note: Tree objects DO use finalizers (unlike Parser objects) because:
|
|
525
|
+
# 1. Trees are typically short-lived and numerous (one per parse)
|
|
526
|
+
# 2. ts_tree_delete is safer than ts_parser_delete during GC
|
|
527
|
+
# 3. Memory leaks from accumulated trees are more problematic
|
|
528
|
+
# 4. The finalizer silently ignores errors for safety
|
|
340
529
|
class Tree
|
|
341
530
|
# @api private
|
|
342
531
|
# @param ptr [FFI::Pointer] pointer to TSTree
|
|
@@ -348,12 +537,21 @@ module TreeHaver
|
|
|
348
537
|
# @api private
|
|
349
538
|
# @param ptr [FFI::Pointer] pointer to TSTree
|
|
350
539
|
class << self
|
|
540
|
+
# Returns a finalizer proc that deletes the tree
|
|
541
|
+
#
|
|
542
|
+
# This is public API for testing purposes, but not intended for
|
|
543
|
+
# direct use. The finalizer is automatically registered when
|
|
544
|
+
# creating a Tree object.
|
|
545
|
+
#
|
|
351
546
|
# @return [Proc] finalizer that deletes the tree
|
|
352
547
|
def finalizer(ptr)
|
|
353
548
|
proc {
|
|
354
549
|
begin
|
|
355
550
|
Native.ts_tree_delete(ptr)
|
|
356
551
|
rescue StandardError
|
|
552
|
+
# Silently ignore errors during finalization to prevent crashes
|
|
553
|
+
# during GC. If the library is unloaded or ptr is invalid, we
|
|
554
|
+
# don't want to crash the entire process.
|
|
357
555
|
nil
|
|
358
556
|
end
|
|
359
557
|
}
|
|
@@ -369,10 +567,10 @@ module TreeHaver
|
|
|
369
567
|
end
|
|
370
568
|
end
|
|
371
569
|
|
|
372
|
-
# FFI-based
|
|
570
|
+
# FFI-based tree-sitter node
|
|
373
571
|
#
|
|
374
572
|
# Wraps a TSNode by-value struct. TSNode is passed by value in the
|
|
375
|
-
#
|
|
573
|
+
# tree-sitter C API, so we store the struct value directly.
|
|
376
574
|
class Node
|
|
377
575
|
# @api private
|
|
378
576
|
# @param ts_node_value [Native::TSNode] the TSNode struct (by value)
|
|
@@ -388,6 +586,63 @@ module TreeHaver
|
|
|
388
586
|
Native.ts_node_type(@val)
|
|
389
587
|
end
|
|
390
588
|
|
|
589
|
+
# Get the number of children
|
|
590
|
+
#
|
|
591
|
+
# @return [Integer] child count
|
|
592
|
+
def child_count
|
|
593
|
+
Native.ts_node_child_count(@val)
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# Get a child by index
|
|
597
|
+
#
|
|
598
|
+
# @param index [Integer] child index
|
|
599
|
+
# @return [Node, nil] child node or nil if index out of bounds
|
|
600
|
+
def child(index)
|
|
601
|
+
return if index >= child_count || index < 0
|
|
602
|
+
child_node = Native.ts_node_child(@val, index)
|
|
603
|
+
Node.new(child_node)
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
# Get start byte offset
|
|
607
|
+
#
|
|
608
|
+
# @return [Integer]
|
|
609
|
+
def start_byte
|
|
610
|
+
Native.ts_node_start_byte(@val)
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
# Get end byte offset
|
|
614
|
+
#
|
|
615
|
+
# @return [Integer]
|
|
616
|
+
def end_byte
|
|
617
|
+
Native.ts_node_end_byte(@val)
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
# Get start point
|
|
621
|
+
#
|
|
622
|
+
# @return [Object] with row and column
|
|
623
|
+
def start_point
|
|
624
|
+
# FFI backend would need to implement ts_node_start_point
|
|
625
|
+
# For now, return a simple struct
|
|
626
|
+
Struct.new(:row, :column).new(0, Native.ts_node_start_byte(@val))
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
# Get end point
|
|
630
|
+
#
|
|
631
|
+
# @return [Object] with row and column
|
|
632
|
+
def end_point
|
|
633
|
+
# FFI backend would need to implement ts_node_end_point
|
|
634
|
+
# For now, return a simple struct
|
|
635
|
+
Struct.new(:row, :column).new(0, Native.ts_node_end_byte(@val))
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
# Check if node has error
|
|
639
|
+
#
|
|
640
|
+
# @return [Boolean]
|
|
641
|
+
def has_error?
|
|
642
|
+
# Would need ts_node_has_error implementation
|
|
643
|
+
false
|
|
644
|
+
end
|
|
645
|
+
|
|
391
646
|
# Iterate over child nodes
|
|
392
647
|
#
|
|
393
648
|
# @yieldparam child [Node] each child node
|
|
@@ -395,7 +650,7 @@ module TreeHaver
|
|
|
395
650
|
def each
|
|
396
651
|
return enum_for(:each) unless block_given?
|
|
397
652
|
|
|
398
|
-
count =
|
|
653
|
+
count = child_count
|
|
399
654
|
i = 0
|
|
400
655
|
while i < count
|
|
401
656
|
child = Native.ts_node_child(@val, i)
|