tree_haver 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +285 -1
- data/CONTRIBUTING.md +132 -0
- data/README.md +529 -36
- data/lib/tree_haver/backends/citrus.rb +177 -20
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +341 -142
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +183 -17
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +60 -17
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +115 -11
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +220 -4
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +835 -75
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +9 -4
- metadata.gz.sig +0 -0
data/lib/tree_haver.rb
CHANGED
|
@@ -3,16 +3,24 @@
|
|
|
3
3
|
# External gems
|
|
4
4
|
require "version_gem"
|
|
5
5
|
|
|
6
|
+
# Standard library
|
|
7
|
+
require "set"
|
|
8
|
+
|
|
6
9
|
# This gem
|
|
7
10
|
require_relative "tree_haver/version"
|
|
8
11
|
require_relative "tree_haver/language_registry"
|
|
9
12
|
|
|
10
|
-
# TreeHaver is a cross-Ruby adapter for
|
|
13
|
+
# TreeHaver is a cross-Ruby adapter for code parsing with 10 backends.
|
|
14
|
+
#
|
|
15
|
+
# Provides a unified API for parsing source code across MRI Ruby, JRuby, and TruffleRuby
|
|
16
|
+
# using tree-sitter grammars or language-specific native parsers.
|
|
11
17
|
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
18
|
+
# Supports 10 backends:
|
|
19
|
+
# - Tree-sitter: MRI (C), Rust, FFI, Java
|
|
20
|
+
# - Native parsers: Prism (Ruby), Psych (YAML), Commonmarker (Markdown), Markly (GFM)
|
|
21
|
+
# - Pure Ruby: Citrus (portable fallback)
|
|
14
22
|
#
|
|
15
|
-
# @example Basic usage with
|
|
23
|
+
# @example Basic usage with tree-sitter
|
|
16
24
|
# # Load a language grammar
|
|
17
25
|
# language = TreeHaver::Language.from_library(
|
|
18
26
|
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
@@ -27,8 +35,28 @@ require_relative "tree_haver/language_registry"
|
|
|
27
35
|
# tree = parser.parse("[package]\nname = \"my-app\"")
|
|
28
36
|
# root = tree.root_node
|
|
29
37
|
#
|
|
30
|
-
# #
|
|
31
|
-
# root.
|
|
38
|
+
# # Use unified Position API (works across all backends)
|
|
39
|
+
# puts root.start_line # => 1 (1-based)
|
|
40
|
+
# puts root.source_position # => {start_line:, end_line:, start_column:, end_column:}
|
|
41
|
+
#
|
|
42
|
+
# @example Using language-specific backends
|
|
43
|
+
# # Parse Ruby with Prism
|
|
44
|
+
# TreeHaver.backend = :prism
|
|
45
|
+
# parser = TreeHaver::Parser.new
|
|
46
|
+
# parser.language = TreeHaver::Backends::Prism::Language.ruby
|
|
47
|
+
# tree = parser.parse("class Example; end")
|
|
48
|
+
#
|
|
49
|
+
# # Parse YAML with Psych
|
|
50
|
+
# TreeHaver.backend = :psych
|
|
51
|
+
# parser = TreeHaver::Parser.new
|
|
52
|
+
# parser.language = TreeHaver::Backends::Psych::Language.yaml
|
|
53
|
+
# tree = parser.parse("key: value")
|
|
54
|
+
#
|
|
55
|
+
# # Parse Markdown with Commonmarker
|
|
56
|
+
# TreeHaver.backend = :commonmarker
|
|
57
|
+
# parser = TreeHaver::Parser.new
|
|
58
|
+
# parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
|
|
59
|
+
# tree = parser.parse("# Heading\nParagraph")
|
|
32
60
|
#
|
|
33
61
|
# @example Using language registration
|
|
34
62
|
# TreeHaver.register_language(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
|
|
@@ -40,29 +68,31 @@ require_relative "tree_haver/language_registry"
|
|
|
40
68
|
# finder.register! if finder.available?
|
|
41
69
|
# language = TreeHaver::Language.toml
|
|
42
70
|
#
|
|
43
|
-
# @example Using GrammarFinder in a *-merge gem
|
|
44
|
-
# # Each merge gem (toml-merge, json-merge, bash-merge) uses the same pattern
|
|
45
|
-
# finder = TreeHaver::GrammarFinder.new(:toml) # or :json, :bash, etc.
|
|
46
|
-
# if finder.available?
|
|
47
|
-
# finder.register!
|
|
48
|
-
# else
|
|
49
|
-
# warn finder.not_found_message
|
|
50
|
-
# end
|
|
51
|
-
#
|
|
52
71
|
# @example Selecting a backend
|
|
53
|
-
# TreeHaver.backend = :
|
|
54
|
-
# TreeHaver.backend = :
|
|
55
|
-
# TreeHaver.backend = :
|
|
72
|
+
# TreeHaver.backend = :mri # Force MRI (ruby_tree_sitter)
|
|
73
|
+
# TreeHaver.backend = :rust # Force Rust (tree_stump)
|
|
74
|
+
# TreeHaver.backend = :ffi # Force FFI
|
|
75
|
+
# TreeHaver.backend = :java # Force Java (JRuby)
|
|
76
|
+
# TreeHaver.backend = :prism # Force Prism (Ruby)
|
|
77
|
+
# TreeHaver.backend = :psych # Force Psych (YAML)
|
|
78
|
+
# TreeHaver.backend = :commonmarker # Force Commonmarker (Markdown)
|
|
79
|
+
# TreeHaver.backend = :markly # Force Markly (GFM)
|
|
80
|
+
# TreeHaver.backend = :citrus # Force Citrus (pure Ruby)
|
|
81
|
+
# TreeHaver.backend = :auto # Auto-select (default)
|
|
56
82
|
#
|
|
57
83
|
# @see https://tree-sitter.github.io/tree-sitter/ tree-sitter documentation
|
|
58
84
|
# @see GrammarFinder For automatic grammar library discovery
|
|
85
|
+
# @see Backends For available parsing backends
|
|
59
86
|
module TreeHaver
|
|
60
87
|
# Base error class for TreeHaver exceptions
|
|
88
|
+
# @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
|
|
61
89
|
#
|
|
62
90
|
# @abstract Subclass to create specific error types
|
|
63
|
-
class Error <
|
|
91
|
+
class Error < Exception; end # rubocop:disable Lint/InheritException
|
|
64
92
|
|
|
65
93
|
# Raised when a requested backend or feature is not available
|
|
94
|
+
# These are serious errors that extends Exception (not StandardError).
|
|
95
|
+
# @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
|
|
66
96
|
#
|
|
67
97
|
# This can occur when:
|
|
68
98
|
# - Required native libraries are not installed
|
|
@@ -77,6 +107,30 @@ module TreeHaver
|
|
|
77
107
|
# end
|
|
78
108
|
class NotAvailable < Error; end
|
|
79
109
|
|
|
110
|
+
# Raised when attempting to use backends that are known to conflict
|
|
111
|
+
#
|
|
112
|
+
# This is a serious error that extends Exception (not StandardError) because
|
|
113
|
+
# it prevents a segmentation fault. The MRI backend (ruby_tree_sitter) and
|
|
114
|
+
# FFI backend cannot coexist in the same process - once MRI loads, FFI will
|
|
115
|
+
# segfault when trying to set a language on a parser.
|
|
116
|
+
#
|
|
117
|
+
# This protection can be disabled with `TreeHaver.backend_protect = false`
|
|
118
|
+
# but doing so risks segfaults.
|
|
119
|
+
#
|
|
120
|
+
# @example Handling backend conflicts
|
|
121
|
+
# begin
|
|
122
|
+
# # This will raise if MRI was already used
|
|
123
|
+
# TreeHaver.with_backend(:ffi) { parser.language = lang }
|
|
124
|
+
# rescue TreeHaver::BackendConflict => e
|
|
125
|
+
# puts "Backend conflict: #{e.message}"
|
|
126
|
+
# # Fall back to a compatible backend
|
|
127
|
+
# end
|
|
128
|
+
#
|
|
129
|
+
# @example Disabling protection (not recommended)
|
|
130
|
+
# TreeHaver.backend_protect = false
|
|
131
|
+
# # Now you can test backend conflicts (at risk of segfaults)
|
|
132
|
+
class BackendConflict < Error; end
|
|
133
|
+
|
|
80
134
|
# Namespace for backend implementations
|
|
81
135
|
#
|
|
82
136
|
# TreeHaver provides multiple backends to support different Ruby implementations:
|
|
@@ -85,12 +139,36 @@ module TreeHaver
|
|
|
85
139
|
# - {Backends::FFI} - Uses Ruby FFI to call libtree-sitter directly
|
|
86
140
|
# - {Backends::Java} - Uses JRuby's Java integration
|
|
87
141
|
# - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
|
|
142
|
+
# - {Backends::Prism} - Uses Ruby's built-in Prism parser (Ruby-only, stdlib in 3.4+)
|
|
88
143
|
module Backends
|
|
89
144
|
autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
|
|
90
145
|
autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
|
|
91
146
|
autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
|
|
92
147
|
autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
|
|
93
148
|
autoload :Citrus, File.join(__dir__, "tree_haver", "backends", "citrus")
|
|
149
|
+
autoload :Prism, File.join(__dir__, "tree_haver", "backends", "prism")
|
|
150
|
+
autoload :Psych, File.join(__dir__, "tree_haver", "backends", "psych")
|
|
151
|
+
autoload :Commonmarker, File.join(__dir__, "tree_haver", "backends", "commonmarker")
|
|
152
|
+
autoload :Markly, File.join(__dir__, "tree_haver", "backends", "markly")
|
|
153
|
+
|
|
154
|
+
# Known backend conflicts
|
|
155
|
+
#
|
|
156
|
+
# Maps each backend to an array of backends that block it from working.
|
|
157
|
+
# For example, :ffi is blocked by :mri because once ruby_tree_sitter loads,
|
|
158
|
+
# FFI calls to ts_parser_set_language will segfault.
|
|
159
|
+
#
|
|
160
|
+
# @return [Hash{Symbol => Array<Symbol>}]
|
|
161
|
+
BLOCKED_BY = {
|
|
162
|
+
mri: [],
|
|
163
|
+
rust: [],
|
|
164
|
+
ffi: [:mri], # FFI segfaults if MRI (ruby_tree_sitter) has been loaded
|
|
165
|
+
java: [],
|
|
166
|
+
citrus: [],
|
|
167
|
+
prism: [], # Prism has no conflicts with other backends
|
|
168
|
+
psych: [], # Psych has no conflicts with other backends
|
|
169
|
+
commonmarker: [], # Commonmarker has no conflicts with other backends
|
|
170
|
+
markly: [], # Markly has no conflicts with other backends
|
|
171
|
+
}.freeze
|
|
94
172
|
end
|
|
95
173
|
|
|
96
174
|
# Security utilities for validating paths before loading shared libraries
|
|
@@ -121,6 +199,19 @@ module TreeHaver
|
|
|
121
199
|
# @see PathValidator
|
|
122
200
|
autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
|
|
123
201
|
|
|
202
|
+
# Citrus grammar finder for discovering and registering Citrus-based parsers
|
|
203
|
+
#
|
|
204
|
+
# @example Register toml-rb
|
|
205
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
206
|
+
# language: :toml,
|
|
207
|
+
# gem_name: "toml-rb",
|
|
208
|
+
# grammar_const: "TomlRB::Document"
|
|
209
|
+
# )
|
|
210
|
+
# finder.register! if finder.available?
|
|
211
|
+
#
|
|
212
|
+
# @see CitrusGrammarFinder
|
|
213
|
+
autoload :CitrusGrammarFinder, File.join(__dir__, "tree_haver", "citrus_grammar_finder")
|
|
214
|
+
|
|
124
215
|
# Unified Node wrapper providing consistent API across backends
|
|
125
216
|
autoload :Node, File.join(__dir__, "tree_haver", "node")
|
|
126
217
|
|
|
@@ -132,6 +223,77 @@ module TreeHaver
|
|
|
132
223
|
# @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
|
|
133
224
|
# @note Can be set via ENV["TREE_HAVER_BACKEND"]
|
|
134
225
|
class << self
|
|
226
|
+
# Whether backend conflict protection is enabled
|
|
227
|
+
#
|
|
228
|
+
# When true (default), TreeHaver will raise BackendConflict if you try to
|
|
229
|
+
# use a backend that is known to conflict with a previously used backend.
|
|
230
|
+
# For example, FFI will not work after MRI has been used.
|
|
231
|
+
#
|
|
232
|
+
# Set to false to disable protection (useful for testing compatibility).
|
|
233
|
+
#
|
|
234
|
+
# @return [Boolean]
|
|
235
|
+
# @example Disable protection for testing
|
|
236
|
+
# TreeHaver.backend_protect = false
|
|
237
|
+
def backend_protect=(value)
|
|
238
|
+
@backend_protect_mutex ||= Mutex.new
|
|
239
|
+
@backend_protect_mutex.synchronize { @backend_protect = value }
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Check if backend conflict protection is enabled
|
|
243
|
+
#
|
|
244
|
+
# @return [Boolean] true if protection is enabled (default)
|
|
245
|
+
def backend_protect?
|
|
246
|
+
return @backend_protect if defined?(@backend_protect) # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
247
|
+
true # Default is protected
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Alias for backend_protect?
|
|
251
|
+
def backend_protect
|
|
252
|
+
backend_protect?
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Track which backends have been used in this process
|
|
256
|
+
#
|
|
257
|
+
# @return [Set<Symbol>] set of backend symbols that have been used
|
|
258
|
+
def backends_used
|
|
259
|
+
@backends_used ||= Set.new # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Record that a backend has been used
|
|
263
|
+
#
|
|
264
|
+
# @param backend [Symbol] the backend that was used
|
|
265
|
+
# @return [void]
|
|
266
|
+
# @api private
|
|
267
|
+
def record_backend_usage(backend)
|
|
268
|
+
backends_used << backend
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Check if a backend would conflict with previously used backends
|
|
272
|
+
#
|
|
273
|
+
# @param backend [Symbol] the backend to check
|
|
274
|
+
# @return [Array<Symbol>] list of previously used backends that block this one
|
|
275
|
+
def conflicting_backends_for(backend)
|
|
276
|
+
blockers = Backends::BLOCKED_BY[backend] || []
|
|
277
|
+
blockers & backends_used.to_a
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Check if using a backend would cause a conflict
|
|
281
|
+
#
|
|
282
|
+
# @param backend [Symbol] the backend to check
|
|
283
|
+
# @raise [BackendConflict] if protection is enabled and there's a conflict
|
|
284
|
+
# @return [void]
|
|
285
|
+
def check_backend_conflict!(backend)
|
|
286
|
+
return unless backend_protect?
|
|
287
|
+
|
|
288
|
+
conflicts = conflicting_backends_for(backend)
|
|
289
|
+
return if conflicts.empty?
|
|
290
|
+
|
|
291
|
+
raise BackendConflict,
|
|
292
|
+
"Cannot use #{backend} backend: it is blocked by previously used backend(s): #{conflicts.join(", ")}. " \
|
|
293
|
+
"The #{backend} backend will segfault when #{conflicts.first} has already loaded. " \
|
|
294
|
+
"To disable this protection (at risk of segfaults), set TreeHaver.backend_protect = false"
|
|
295
|
+
end
|
|
296
|
+
|
|
135
297
|
# @example
|
|
136
298
|
# TreeHaver.backend # => :auto
|
|
137
299
|
def backend
|
|
@@ -141,6 +303,10 @@ module TreeHaver
|
|
|
141
303
|
when "ffi" then :ffi
|
|
142
304
|
when "java" then :java
|
|
143
305
|
when "citrus" then :citrus
|
|
306
|
+
when "prism" then :prism
|
|
307
|
+
when "psych" then :psych
|
|
308
|
+
when "commonmarker" then :commonmarker
|
|
309
|
+
when "markly" then :markly
|
|
144
310
|
else :auto
|
|
145
311
|
end
|
|
146
312
|
end
|
|
@@ -171,6 +337,211 @@ module TreeHaver
|
|
|
171
337
|
@backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
172
338
|
end
|
|
173
339
|
|
|
340
|
+
# Thread-local backend context storage
|
|
341
|
+
#
|
|
342
|
+
# Returns a hash containing the thread-local backend context with keys:
|
|
343
|
+
# - :backend - The backend name (Symbol) or nil if using global default
|
|
344
|
+
# - :depth - The nesting depth (Integer) for proper cleanup
|
|
345
|
+
#
|
|
346
|
+
# @return [Hash{Symbol => Object}] context hash with :backend and :depth keys
|
|
347
|
+
# @example
|
|
348
|
+
# ctx = TreeHaver.current_backend_context
|
|
349
|
+
# ctx[:backend] # => nil or :ffi, :mri, etc.
|
|
350
|
+
# ctx[:depth] # => 0, 1, 2, etc.
|
|
351
|
+
def current_backend_context
|
|
352
|
+
Thread.current[:tree_haver_backend_context] ||= {
|
|
353
|
+
backend: nil, # nil means "use global default"
|
|
354
|
+
depth: 0, # Track nesting depth for proper cleanup
|
|
355
|
+
}
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Get the effective backend for current context
|
|
359
|
+
#
|
|
360
|
+
# Priority: thread-local context → global @backend → :auto
|
|
361
|
+
#
|
|
362
|
+
# @return [Symbol] the backend to use
|
|
363
|
+
# @example
|
|
364
|
+
# TreeHaver.effective_backend # => :auto (default)
|
|
365
|
+
# @example With thread-local context
|
|
366
|
+
# TreeHaver.with_backend(:ffi) do
|
|
367
|
+
# TreeHaver.effective_backend # => :ffi
|
|
368
|
+
# end
|
|
369
|
+
def effective_backend
|
|
370
|
+
ctx = current_backend_context
|
|
371
|
+
ctx[:backend] || backend || :auto
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Execute a block with a specific backend in thread-local context
|
|
375
|
+
#
|
|
376
|
+
# This method provides temporary, thread-safe backend switching for a block of code.
|
|
377
|
+
# The backend setting is automatically restored when the block exits, even if
|
|
378
|
+
# an exception is raised. Supports nesting—inner blocks override outer blocks,
|
|
379
|
+
# and each level is properly unwound.
|
|
380
|
+
#
|
|
381
|
+
# Thread Safety: Each thread maintains its own backend context, so concurrent
|
|
382
|
+
# threads can safely use different backends without interfering with each other.
|
|
383
|
+
#
|
|
384
|
+
# Use Cases:
|
|
385
|
+
# - Testing: Test the same code path with different backends
|
|
386
|
+
# - Performance comparison: Benchmark parsing with different backends
|
|
387
|
+
# - Fallback scenarios: Try one backend, fall back to another on failure
|
|
388
|
+
# - Thread isolation: Different threads can use different backends safely
|
|
389
|
+
#
|
|
390
|
+
# @param name [Symbol, String] backend name (:mri, :rust, :ffi, :java, :citrus, :auto)
|
|
391
|
+
# @yield block to execute with the specified backend
|
|
392
|
+
# @return [Object] the return value of the block
|
|
393
|
+
# @raise [ArgumentError] if backend name is nil
|
|
394
|
+
# @raise [BackendConflict] if the requested backend conflicts with a previously used backend
|
|
395
|
+
#
|
|
396
|
+
# @example Basic usage
|
|
397
|
+
# TreeHaver.with_backend(:mri) do
|
|
398
|
+
# parser = TreeHaver::Parser.new
|
|
399
|
+
# tree = parser.parse(source)
|
|
400
|
+
# end
|
|
401
|
+
# # Backend is automatically restored here
|
|
402
|
+
#
|
|
403
|
+
# @example Nested blocks (inner overrides outer)
|
|
404
|
+
# TreeHaver.with_backend(:rust) do
|
|
405
|
+
# parser1 = TreeHaver::Parser.new # Uses :rust
|
|
406
|
+
# TreeHaver.with_backend(:citrus) do
|
|
407
|
+
# parser2 = TreeHaver::Parser.new # Uses :citrus
|
|
408
|
+
# end
|
|
409
|
+
# parser3 = TreeHaver::Parser.new # Back to :rust
|
|
410
|
+
# end
|
|
411
|
+
#
|
|
412
|
+
# @example Testing multiple backends
|
|
413
|
+
# [:mri, :rust, :citrus].each do |backend_name|
|
|
414
|
+
# TreeHaver.with_backend(backend_name) do
|
|
415
|
+
# parser = TreeHaver::Parser.new
|
|
416
|
+
# result = parser.parse(source)
|
|
417
|
+
# puts "#{backend_name}: #{result.root_node.type}"
|
|
418
|
+
# end
|
|
419
|
+
# end
|
|
420
|
+
#
|
|
421
|
+
# @example Exception safety (backend restored even on error)
|
|
422
|
+
# TreeHaver.with_backend(:mri) do
|
|
423
|
+
# raise "Something went wrong"
|
|
424
|
+
# rescue
|
|
425
|
+
# # Handle error
|
|
426
|
+
# end
|
|
427
|
+
# # Backend is still restored to its previous value
|
|
428
|
+
#
|
|
429
|
+
# @example Thread isolation
|
|
430
|
+
# threads = [:mri, :rust].map do |backend_name|
|
|
431
|
+
# Thread.new do
|
|
432
|
+
# TreeHaver.with_backend(backend_name) do
|
|
433
|
+
# # Each thread uses its own backend independently
|
|
434
|
+
# TreeHaver::Parser.new
|
|
435
|
+
# end
|
|
436
|
+
# end
|
|
437
|
+
# end
|
|
438
|
+
# threads.each(&:join)
|
|
439
|
+
#
|
|
440
|
+
# @see #effective_backend
|
|
441
|
+
# @see #current_backend_context
|
|
442
|
+
def with_backend(name)
|
|
443
|
+
raise ArgumentError, "Backend name required" if name.nil?
|
|
444
|
+
|
|
445
|
+
# Get context FIRST to ensure it exists
|
|
446
|
+
ctx = current_backend_context
|
|
447
|
+
old_backend = ctx[:backend]
|
|
448
|
+
old_depth = ctx[:depth]
|
|
449
|
+
|
|
450
|
+
begin
|
|
451
|
+
# Set new backend and increment depth
|
|
452
|
+
ctx[:backend] = name.to_sym
|
|
453
|
+
ctx[:depth] += 1
|
|
454
|
+
|
|
455
|
+
# Execute block
|
|
456
|
+
yield
|
|
457
|
+
ensure
|
|
458
|
+
# Restore previous backend and depth
|
|
459
|
+
# This ensures proper unwinding even with exceptions
|
|
460
|
+
ctx[:backend] = old_backend
|
|
461
|
+
ctx[:depth] = old_depth
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
# Resolve the effective backend considering explicit override
|
|
466
|
+
#
|
|
467
|
+
# Priority: explicit > thread context > global > :auto
|
|
468
|
+
#
|
|
469
|
+
# @param explicit_backend [Symbol, String, nil] explicitly requested backend
|
|
470
|
+
# @return [Symbol] the backend to use
|
|
471
|
+
# @example
|
|
472
|
+
# TreeHaver.resolve_effective_backend(:ffi) # => :ffi
|
|
473
|
+
# @example With thread-local context
|
|
474
|
+
# TreeHaver.with_backend(:mri) do
|
|
475
|
+
# TreeHaver.resolve_effective_backend(nil) # => :mri
|
|
476
|
+
# TreeHaver.resolve_effective_backend(:ffi) # => :ffi (explicit wins)
|
|
477
|
+
# end
|
|
478
|
+
def resolve_effective_backend(explicit_backend = nil)
|
|
479
|
+
return explicit_backend.to_sym if explicit_backend
|
|
480
|
+
effective_backend
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
# Get backend module for a specific backend (with explicit override)
|
|
484
|
+
#
|
|
485
|
+
# @param explicit_backend [Symbol, String, nil] explicitly requested backend
|
|
486
|
+
# @return [Module, nil] the backend module or nil if not available
|
|
487
|
+
# @raise [BackendConflict] if the backend conflicts with previously used backends
|
|
488
|
+
# @example
|
|
489
|
+
# mod = TreeHaver.resolve_backend_module(:ffi)
|
|
490
|
+
# mod.capabilities[:backend] # => :ffi
|
|
491
|
+
def resolve_backend_module(explicit_backend = nil)
|
|
492
|
+
# Temporarily override effective backend
|
|
493
|
+
requested = resolve_effective_backend(explicit_backend)
|
|
494
|
+
|
|
495
|
+
mod = case requested
|
|
496
|
+
when :mri
|
|
497
|
+
Backends::MRI
|
|
498
|
+
when :rust
|
|
499
|
+
Backends::Rust
|
|
500
|
+
when :ffi
|
|
501
|
+
Backends::FFI
|
|
502
|
+
when :java
|
|
503
|
+
Backends::Java
|
|
504
|
+
when :citrus
|
|
505
|
+
Backends::Citrus
|
|
506
|
+
when :prism
|
|
507
|
+
Backends::Prism
|
|
508
|
+
when :psych
|
|
509
|
+
Backends::Psych
|
|
510
|
+
when :commonmarker
|
|
511
|
+
Backends::Commonmarker
|
|
512
|
+
when :markly
|
|
513
|
+
Backends::Markly
|
|
514
|
+
when :auto
|
|
515
|
+
backend_module # Fall back to normal resolution for :auto
|
|
516
|
+
else
|
|
517
|
+
# Unknown backend name - return nil to trigger error in caller
|
|
518
|
+
nil
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
# Return nil if the module doesn't exist
|
|
522
|
+
return unless mod
|
|
523
|
+
|
|
524
|
+
# Check for backend conflicts FIRST, before checking availability
|
|
525
|
+
# This is critical because the conflict causes the backend to report unavailable
|
|
526
|
+
# We want to raise a clear error explaining WHY it's unavailable
|
|
527
|
+
# Use the requested backend name directly (not capabilities) because
|
|
528
|
+
# capabilities may be empty when the backend is blocked/unavailable
|
|
529
|
+
check_backend_conflict!(requested) if requested && requested != :auto
|
|
530
|
+
|
|
531
|
+
# Now check if the backend is available
|
|
532
|
+
# Why assume modules without available? are available?
|
|
533
|
+
# - Some backends might be mocked in tests without an available? method
|
|
534
|
+
# - This makes the code more defensive and test-friendly
|
|
535
|
+
# - It allows graceful degradation if a backend module is incomplete
|
|
536
|
+
# - Backward compatibility: if a module doesn't declare availability, assume it works
|
|
537
|
+
return if mod.respond_to?(:available?) && !mod.available?
|
|
538
|
+
|
|
539
|
+
# Record that this backend is being used
|
|
540
|
+
record_backend_usage(requested) if requested && requested != :auto
|
|
541
|
+
|
|
542
|
+
mod
|
|
543
|
+
end
|
|
544
|
+
|
|
174
545
|
# Determine the concrete backend module to use
|
|
175
546
|
#
|
|
176
547
|
# This method performs backend auto-selection when backend is :auto.
|
|
@@ -185,7 +556,7 @@ module TreeHaver
|
|
|
185
556
|
# puts "Using #{mod.capabilities[:backend]} backend"
|
|
186
557
|
# end
|
|
187
558
|
def backend_module
|
|
188
|
-
case backend
|
|
559
|
+
case effective_backend # Changed from: backend
|
|
189
560
|
when :mri
|
|
190
561
|
Backends::MRI
|
|
191
562
|
when :rust
|
|
@@ -196,6 +567,14 @@ module TreeHaver
|
|
|
196
567
|
Backends::Java
|
|
197
568
|
when :citrus
|
|
198
569
|
Backends::Citrus
|
|
570
|
+
when :prism
|
|
571
|
+
Backends::Prism
|
|
572
|
+
when :psych
|
|
573
|
+
Backends::Psych
|
|
574
|
+
when :commonmarker
|
|
575
|
+
Backends::Commonmarker
|
|
576
|
+
when :markly
|
|
577
|
+
Backends::Markly
|
|
199
578
|
else
|
|
200
579
|
# auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
|
|
201
580
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && Backends::Java.available?
|
|
@@ -240,44 +619,102 @@ module TreeHaver
|
|
|
240
619
|
# Allows opting-in dynamic helpers like TreeHaver::Language.toml without
|
|
241
620
|
# advertising all names by default.
|
|
242
621
|
|
|
243
|
-
# Register a language helper by name
|
|
622
|
+
# Register a language helper by name (backend-agnostic)
|
|
244
623
|
#
|
|
245
624
|
# After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
|
|
246
|
-
# to load the registered language.
|
|
625
|
+
# to load the registered language. TreeHaver will automatically use the appropriate
|
|
626
|
+
# grammar based on the active backend.
|
|
627
|
+
#
|
|
628
|
+
# The `name` parameter is an arbitrary identifier you choose - it doesn't need to
|
|
629
|
+
# match the actual language name. This is useful for:
|
|
630
|
+
# - Testing: Use unique names like `:toml_test` to avoid collisions
|
|
631
|
+
# - Aliasing: Register the same grammar under multiple names
|
|
632
|
+
# - Versioning: Register different grammar versions as `:ruby_2` and `:ruby_3`
|
|
633
|
+
#
|
|
634
|
+
# The actual grammar identity comes from `path`/`symbol` (tree-sitter) or
|
|
635
|
+
# `grammar_module` (Citrus), not from the name.
|
|
247
636
|
#
|
|
248
|
-
#
|
|
249
|
-
#
|
|
637
|
+
# IMPORTANT: This method INTENTIONALLY allows registering BOTH a tree-sitter
|
|
638
|
+
# library AND a Citrus grammar for the same language IN A SINGLE CALL.
|
|
639
|
+
# This is achieved by using separate `if` statements (not `elsif`) and no early
|
|
640
|
+
# returns. This design is deliberate and provides significant benefits:
|
|
641
|
+
#
|
|
642
|
+
# Why register both backends for one language?
|
|
643
|
+
# - Backend flexibility: Code works regardless of which backend is active
|
|
644
|
+
# - Performance testing: Compare tree-sitter vs Citrus performance
|
|
645
|
+
# - Gradual migration: Transition between backends without breaking code
|
|
646
|
+
# - Fallback scenarios: Use Citrus when tree-sitter library unavailable
|
|
647
|
+
# - Platform portability: tree-sitter on Linux/Mac, Citrus on JRuby/Windows
|
|
648
|
+
#
|
|
649
|
+
# The active backend determines which registration is used automatically.
|
|
650
|
+
# No code changes needed to switch backends - just change TreeHaver.backend.
|
|
651
|
+
#
|
|
652
|
+
# @param name [Symbol, String] identifier for this registration (can be any name you choose)
|
|
653
|
+
# @param path [String, nil] absolute path to the language shared library (for tree-sitter)
|
|
250
654
|
# @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
|
|
655
|
+
# @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
|
|
656
|
+
# @param gem_name [String, nil] optional gem name for error messages
|
|
251
657
|
# @return [void]
|
|
252
|
-
# @example Register
|
|
658
|
+
# @example Register tree-sitter grammar only
|
|
253
659
|
# TreeHaver.register_language(
|
|
254
660
|
# :toml,
|
|
255
661
|
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
256
662
|
# symbol: "tree_sitter_toml"
|
|
257
663
|
# )
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
664
|
+
# @example Register Citrus grammar only
|
|
665
|
+
# TreeHaver.register_language(
|
|
666
|
+
# :toml,
|
|
667
|
+
# grammar_module: TomlRB::Document,
|
|
668
|
+
# gem_name: "toml-rb"
|
|
669
|
+
# )
|
|
670
|
+
# @example Register BOTH backends in separate calls
|
|
671
|
+
# TreeHaver.register_language(
|
|
672
|
+
# :toml,
|
|
673
|
+
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
674
|
+
# symbol: "tree_sitter_toml"
|
|
675
|
+
# )
|
|
676
|
+
# TreeHaver.register_language(
|
|
677
|
+
# :toml,
|
|
678
|
+
# grammar_module: TomlRB::Document,
|
|
679
|
+
# gem_name: "toml-rb"
|
|
680
|
+
# )
|
|
681
|
+
# @example Register BOTH backends in ONE call (recommended for maximum flexibility)
|
|
682
|
+
# TreeHaver.register_language(
|
|
683
|
+
# :toml,
|
|
684
|
+
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
685
|
+
# symbol: "tree_sitter_toml",
|
|
686
|
+
# grammar_module: TomlRB::Document,
|
|
687
|
+
# gem_name: "toml-rb"
|
|
688
|
+
# )
|
|
689
|
+
# # Now TreeHaver::Language.toml works with ANY backend!
|
|
690
|
+
def register_language(name, path: nil, symbol: nil, grammar_module: nil, gem_name: nil)
|
|
691
|
+
# Register tree-sitter backend if path provided
|
|
692
|
+
# Note: Uses `if` not `elsif` so both backends can be registered in one call
|
|
693
|
+
if path
|
|
694
|
+
LanguageRegistry.register(name, :tree_sitter, path: path, symbol: symbol)
|
|
695
|
+
end
|
|
261
696
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
LanguageRegistry.unregister(name)
|
|
270
|
-
end
|
|
697
|
+
# Register Citrus backend if grammar_module provided
|
|
698
|
+
# Note: Uses `if` not `elsif` so both backends can be registered in one call
|
|
699
|
+
# This allows maximum flexibility - register once, use with any backend
|
|
700
|
+
if grammar_module
|
|
701
|
+
unless grammar_module.respond_to?(:parse)
|
|
702
|
+
raise ArgumentError, "Grammar module must respond to :parse"
|
|
703
|
+
end
|
|
271
704
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
705
|
+
LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
# Require at least one backend to be registered
|
|
709
|
+
if path.nil? && grammar_module.nil?
|
|
710
|
+
raise ArgumentError, "Must provide at least one of: path (tree-sitter) or grammar_module (Citrus)"
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
# Note: No early return! This method intentionally processes both `if` blocks
|
|
714
|
+
# above to allow registering multiple backends for the same language.
|
|
715
|
+
# Both tree-sitter and Citrus can be registered simultaneously for maximum
|
|
716
|
+
# flexibility. See method documentation for rationale.
|
|
717
|
+
nil
|
|
281
718
|
end
|
|
282
719
|
|
|
283
720
|
# Fetch a registered language entry
|
|
@@ -338,6 +775,7 @@ module TreeHaver
|
|
|
338
775
|
# @param symbol [String, nil] name of the exported function (defaults to auto-detection)
|
|
339
776
|
# @param name [String, nil] logical name for the language (used in caching)
|
|
340
777
|
# @param validate [Boolean] if true, validates path and symbol for safety (default: true)
|
|
778
|
+
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
341
779
|
# @return [Language] loaded language handle
|
|
342
780
|
# @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
|
|
343
781
|
# @raise [ArgumentError] if path or symbol fails security validation
|
|
@@ -347,7 +785,13 @@ module TreeHaver
|
|
|
347
785
|
# symbol: "tree_sitter_toml",
|
|
348
786
|
# name: "toml"
|
|
349
787
|
# )
|
|
350
|
-
|
|
788
|
+
# @example With explicit backend
|
|
789
|
+
# language = TreeHaver::Language.from_library(
|
|
790
|
+
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
791
|
+
# symbol: "tree_sitter_toml",
|
|
792
|
+
# backend: :ffi
|
|
793
|
+
# )
|
|
794
|
+
def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
|
|
351
795
|
if validate
|
|
352
796
|
unless PathValidator.safe_library_path?(path)
|
|
353
797
|
errors = PathValidator.validation_errors(path)
|
|
@@ -360,11 +804,20 @@ module TreeHaver
|
|
|
360
804
|
end
|
|
361
805
|
end
|
|
362
806
|
|
|
363
|
-
mod = TreeHaver.
|
|
364
|
-
|
|
807
|
+
mod = TreeHaver.resolve_backend_module(backend)
|
|
808
|
+
|
|
809
|
+
if mod.nil?
|
|
810
|
+
if backend
|
|
811
|
+
raise NotAvailable, "Requested backend #{backend.inspect} is not available"
|
|
812
|
+
else
|
|
813
|
+
raise NotAvailable, "No TreeHaver backend is available"
|
|
814
|
+
end
|
|
815
|
+
end
|
|
816
|
+
|
|
365
817
|
# Backend must implement .from_library; fallback to .from_path for older impls
|
|
366
|
-
# Include ENV vars in cache key since they affect
|
|
367
|
-
|
|
818
|
+
# Include effective backend AND ENV vars in cache key since they affect loading
|
|
819
|
+
effective_b = TreeHaver.resolve_effective_backend(backend)
|
|
820
|
+
key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
|
|
368
821
|
LanguageRegistry.fetch(key) do
|
|
369
822
|
if mod::Language.respond_to?(:from_library)
|
|
370
823
|
mod::Language.from_library(path, symbol: symbol, name: name)
|
|
@@ -380,31 +833,102 @@ module TreeHaver
|
|
|
380
833
|
# Dynamic helper to load a registered language by name
|
|
381
834
|
#
|
|
382
835
|
# After registering a language with {TreeHaver.register_language},
|
|
383
|
-
# you can load it using a method call
|
|
836
|
+
# you can load it using a method call. The appropriate backend will be
|
|
837
|
+
# used based on registration and current backend.
|
|
384
838
|
#
|
|
385
|
-
# @example
|
|
839
|
+
# @example With tree-sitter
|
|
386
840
|
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
387
841
|
# language = TreeHaver::Language.toml
|
|
388
842
|
#
|
|
389
|
-
# @example With
|
|
390
|
-
#
|
|
843
|
+
# @example With both backends
|
|
844
|
+
# TreeHaver.register_language(:toml,
|
|
845
|
+
# path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
846
|
+
# TreeHaver.register_language(:toml,
|
|
847
|
+
# grammar_module: TomlRB::Document)
|
|
848
|
+
# language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
|
|
391
849
|
#
|
|
392
850
|
# @param method_name [Symbol] the registered language name
|
|
393
|
-
# @param args [Array] positional arguments
|
|
394
|
-
# @param kwargs [Hash] keyword arguments
|
|
851
|
+
# @param args [Array] positional arguments
|
|
852
|
+
# @param kwargs [Hash] keyword arguments
|
|
395
853
|
# @return [Language] loaded language handle
|
|
396
854
|
# @raise [NoMethodError] if the language name is not registered
|
|
397
855
|
def method_missing(method_name, *args, **kwargs, &block)
|
|
398
856
|
# Resolve only if the language name was registered
|
|
399
|
-
|
|
400
|
-
return super unless
|
|
401
|
-
|
|
402
|
-
#
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
857
|
+
all_backends = TreeHaver.registered_language(method_name)
|
|
858
|
+
return super unless all_backends
|
|
859
|
+
|
|
860
|
+
# Check current backend
|
|
861
|
+
current_backend = TreeHaver.backend_module
|
|
862
|
+
|
|
863
|
+
# Determine which backend type to use
|
|
864
|
+
backend_type = if current_backend == Backends::Citrus
|
|
865
|
+
:citrus
|
|
866
|
+
else
|
|
867
|
+
:tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
# Get backend-specific registration
|
|
871
|
+
reg = all_backends[backend_type]
|
|
872
|
+
|
|
873
|
+
# If Citrus backend is active
|
|
874
|
+
if backend_type == :citrus
|
|
875
|
+
if reg && reg[:grammar_module]
|
|
876
|
+
return Backends::Citrus::Language.new(reg[:grammar_module])
|
|
877
|
+
end
|
|
878
|
+
|
|
879
|
+
# Fall back to error if no Citrus grammar registered
|
|
880
|
+
raise NotAvailable,
|
|
881
|
+
"Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
|
|
882
|
+
"Either register a Citrus grammar or use a tree-sitter backend. " \
|
|
883
|
+
"Registered backends: #{all_backends.keys.inspect}"
|
|
884
|
+
end
|
|
885
|
+
|
|
886
|
+
# For tree-sitter backends, try to load from path
|
|
887
|
+
# If that fails, fall back to Citrus if available
|
|
888
|
+
if reg && reg[:path]
|
|
889
|
+
path = kwargs[:path] || args.first || reg[:path]
|
|
890
|
+
# Symbol priority: kwargs override > registration > derive from method_name
|
|
891
|
+
symbol = if kwargs.key?(:symbol)
|
|
892
|
+
kwargs[:symbol]
|
|
893
|
+
elsif reg[:symbol]
|
|
894
|
+
reg[:symbol]
|
|
895
|
+
else
|
|
896
|
+
"tree_sitter_#{method_name}"
|
|
897
|
+
end
|
|
898
|
+
# Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
|
|
899
|
+
# Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
|
|
900
|
+
# e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
|
|
901
|
+
name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
|
|
902
|
+
|
|
903
|
+
begin
|
|
904
|
+
return from_library(path, symbol: symbol, name: name)
|
|
905
|
+
rescue NotAvailable, ArgumentError, LoadError, FFI::NotFoundError => _e
|
|
906
|
+
# Tree-sitter failed to load - check for Citrus fallback
|
|
907
|
+
# This handles cases where:
|
|
908
|
+
# - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
|
|
909
|
+
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
|
|
910
|
+
# - Invalid arguments were provided (ArgumentError)
|
|
911
|
+
citrus_reg = all_backends[:citrus]
|
|
912
|
+
if citrus_reg && citrus_reg[:grammar_module]
|
|
913
|
+
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
914
|
+
end
|
|
915
|
+
# No Citrus fallback available, re-raise the original error
|
|
916
|
+
raise
|
|
917
|
+
end
|
|
918
|
+
end
|
|
919
|
+
|
|
920
|
+
# No tree-sitter path registered - check for Citrus fallback
|
|
921
|
+
# This enables auto-fallback when tree-sitter grammar is not installed
|
|
922
|
+
# but a Citrus grammar (pure Ruby) is available
|
|
923
|
+
citrus_reg = all_backends[:citrus]
|
|
924
|
+
if citrus_reg && citrus_reg[:grammar_module]
|
|
925
|
+
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
926
|
+
end
|
|
927
|
+
|
|
928
|
+
# No appropriate registration found
|
|
929
|
+
raise ArgumentError,
|
|
930
|
+
"No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
|
|
931
|
+
"Registered backends: #{all_backends.keys.inspect}"
|
|
408
932
|
end
|
|
409
933
|
|
|
410
934
|
# @api private
|
|
@@ -419,6 +943,29 @@ module TreeHaver
|
|
|
419
943
|
# A Parser is used to parse source code into a syntax tree. You must
|
|
420
944
|
# set a language before parsing.
|
|
421
945
|
#
|
|
946
|
+
# == Wrapping/Unwrapping Responsibility
|
|
947
|
+
#
|
|
948
|
+
# TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
|
|
949
|
+
#
|
|
950
|
+
# **Language objects:**
|
|
951
|
+
# - Unwraps Language wrappers before passing to backend.language=
|
|
952
|
+
# - MRI backend receives ::TreeSitter::Language
|
|
953
|
+
# - Rust backend receives String (language name)
|
|
954
|
+
# - FFI backend receives wrapped Language (needs to_ptr)
|
|
955
|
+
#
|
|
956
|
+
# **Tree objects:**
|
|
957
|
+
# - parse() receives raw source, backend returns raw tree, Parser wraps it
|
|
958
|
+
# - parse_string() unwraps old_tree before passing to backend, wraps returned tree
|
|
959
|
+
# - Backends always work with raw backend trees, never TreeHaver::Tree
|
|
960
|
+
#
|
|
961
|
+
# **Node objects:**
|
|
962
|
+
# - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
|
|
963
|
+
#
|
|
964
|
+
# This design ensures:
|
|
965
|
+
# - Principle of Least Surprise: wrapping happens at boundaries, consistently
|
|
966
|
+
# - Backends are simple: they don't need to know about TreeHaver wrappers
|
|
967
|
+
# - Single Responsibility: wrapping logic is only in TreeHaver::Parser
|
|
968
|
+
#
|
|
422
969
|
# @example Basic parsing
|
|
423
970
|
# parser = TreeHaver::Parser.new
|
|
424
971
|
# parser.language = TreeHaver::Language.toml
|
|
@@ -426,11 +973,76 @@ module TreeHaver
|
|
|
426
973
|
class Parser
|
|
427
974
|
# Create a new parser instance
|
|
428
975
|
#
|
|
429
|
-
# @
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
976
|
+
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
977
|
+
# @raise [NotAvailable] if no backend is available or requested backend is unavailable
|
|
978
|
+
# @example Default (uses context/global)
|
|
979
|
+
# parser = TreeHaver::Parser.new
|
|
980
|
+
# @example Explicit backend
|
|
981
|
+
# parser = TreeHaver::Parser.new(backend: :ffi)
|
|
982
|
+
def initialize(backend: nil)
|
|
983
|
+
# Convert string backend names to symbols for consistency
|
|
984
|
+
backend = backend.to_sym if backend.is_a?(String)
|
|
985
|
+
|
|
986
|
+
mod = TreeHaver.resolve_backend_module(backend)
|
|
987
|
+
|
|
988
|
+
if mod.nil?
|
|
989
|
+
if backend
|
|
990
|
+
raise NotAvailable, "Requested backend #{backend.inspect} is not available"
|
|
991
|
+
else
|
|
992
|
+
raise NotAvailable, "No TreeHaver backend is available"
|
|
993
|
+
end
|
|
994
|
+
end
|
|
995
|
+
|
|
996
|
+
# Try to create the parser, with fallback to Citrus if tree-sitter fails
|
|
997
|
+
# This enables auto-fallback when tree-sitter runtime isn't available
|
|
998
|
+
begin
|
|
999
|
+
@impl = mod::Parser.new
|
|
1000
|
+
@explicit_backend = backend # Remember for introspection (always a Symbol or nil)
|
|
1001
|
+
rescue NoMethodError, FFI::NotFoundError, LoadError => e
|
|
1002
|
+
# Tree-sitter backend failed (likely missing runtime library)
|
|
1003
|
+
# Try Citrus as fallback if we weren't explicitly asked for a specific backend
|
|
1004
|
+
if backend.nil? || backend == :auto
|
|
1005
|
+
if Backends::Citrus.available?
|
|
1006
|
+
@impl = Backends::Citrus::Parser.new
|
|
1007
|
+
@explicit_backend = :citrus
|
|
1008
|
+
else
|
|
1009
|
+
# No fallback available, re-raise original error
|
|
1010
|
+
raise NotAvailable, "Tree-sitter backend failed: #{e.message}. " \
|
|
1011
|
+
"Citrus fallback not available. Install tree-sitter runtime or citrus gem."
|
|
1012
|
+
end
|
|
1013
|
+
else
|
|
1014
|
+
# Explicit backend was requested, don't fallback
|
|
1015
|
+
raise
|
|
1016
|
+
end
|
|
1017
|
+
end
|
|
1018
|
+
end
|
|
1019
|
+
|
|
1020
|
+
# Get the backend this parser is using (for introspection)
|
|
1021
|
+
#
|
|
1022
|
+
# Returns the actual backend in use, resolving :auto to the concrete backend.
|
|
1023
|
+
#
|
|
1024
|
+
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
|
|
1025
|
+
def backend
|
|
1026
|
+
if @explicit_backend && @explicit_backend != :auto
|
|
1027
|
+
@explicit_backend
|
|
1028
|
+
else
|
|
1029
|
+
# Determine actual backend from the implementation class
|
|
1030
|
+
case @impl.class.name
|
|
1031
|
+
when /MRI/
|
|
1032
|
+
:mri
|
|
1033
|
+
when /Rust/
|
|
1034
|
+
:rust
|
|
1035
|
+
when /FFI/
|
|
1036
|
+
:ffi
|
|
1037
|
+
when /Java/
|
|
1038
|
+
:java
|
|
1039
|
+
when /Citrus/
|
|
1040
|
+
:citrus
|
|
1041
|
+
else
|
|
1042
|
+
# Fallback to effective_backend if we can't determine from class name
|
|
1043
|
+
TreeHaver.effective_backend
|
|
1044
|
+
end
|
|
1045
|
+
end
|
|
434
1046
|
end
|
|
435
1047
|
|
|
436
1048
|
# Set the language grammar for this parser
|
|
@@ -440,9 +1052,154 @@ module TreeHaver
|
|
|
440
1052
|
# @example
|
|
441
1053
|
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
442
1054
|
def language=(lang)
|
|
443
|
-
|
|
1055
|
+
# Check if this is a Citrus language - if so, we need a Citrus parser
|
|
1056
|
+
# This enables automatic backend switching when tree-sitter fails and
|
|
1057
|
+
# falls back to Citrus
|
|
1058
|
+
if lang.is_a?(Backends::Citrus::Language)
|
|
1059
|
+
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
1060
|
+
# Switch to Citrus parser to match the Citrus language
|
|
1061
|
+
@impl = Backends::Citrus::Parser.new
|
|
1062
|
+
@explicit_backend = :citrus
|
|
1063
|
+
end
|
|
1064
|
+
end
|
|
1065
|
+
|
|
1066
|
+
# Unwrap the language before passing to backend
|
|
1067
|
+
# Backends receive raw language objects, never TreeHaver wrappers
|
|
1068
|
+
inner_lang = unwrap_language(lang)
|
|
1069
|
+
@impl.language = inner_lang
|
|
1070
|
+
# Return the original (possibly wrapped) language for consistency
|
|
1071
|
+
lang # rubocop:disable Lint/Void (intentional return value)
|
|
444
1072
|
end
|
|
445
1073
|
|
|
1074
|
+
private
|
|
1075
|
+
|
|
1076
|
+
# Unwrap a language object to extract the raw backend language
|
|
1077
|
+
#
|
|
1078
|
+
# This method is smart about backend compatibility:
|
|
1079
|
+
# 1. If language has a backend attribute, checks if it matches current backend
|
|
1080
|
+
# 2. If mismatch detected, attempts to reload language for correct backend
|
|
1081
|
+
# 3. If reload successful, uses new language; otherwise continues with original
|
|
1082
|
+
# 4. Unwraps the language wrapper to get raw backend object
|
|
1083
|
+
#
|
|
1084
|
+
# @param lang [Object] wrapped or raw language object
|
|
1085
|
+
# @return [Object] raw backend language object appropriate for current backend
|
|
1086
|
+
# @api private
|
|
1087
|
+
def unwrap_language(lang)
|
|
1088
|
+
# Check if this is a TreeHaver language wrapper with backend info
|
|
1089
|
+
if lang.respond_to?(:backend)
|
|
1090
|
+
# Verify backend compatibility FIRST
|
|
1091
|
+
# This prevents passing languages from wrong backends to native code
|
|
1092
|
+
# Exception: :auto backend is permissive - accepts any language
|
|
1093
|
+
current_backend = backend
|
|
1094
|
+
|
|
1095
|
+
if lang.backend != current_backend && current_backend != :auto
|
|
1096
|
+
# Backend mismatch! Try to reload for correct backend
|
|
1097
|
+
reloaded = try_reload_language_for_backend(lang, current_backend)
|
|
1098
|
+
if reloaded
|
|
1099
|
+
lang = reloaded
|
|
1100
|
+
else
|
|
1101
|
+
# Couldn't reload - this is an error
|
|
1102
|
+
raise TreeHaver::Error,
|
|
1103
|
+
"Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
|
|
1104
|
+
"Cannot reload language for correct backend. " \
|
|
1105
|
+
"Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
|
|
1106
|
+
end
|
|
1107
|
+
end
|
|
1108
|
+
|
|
1109
|
+
# Get the current parser's language (if set)
|
|
1110
|
+
current_lang = @impl.respond_to?(:language) ? @impl.language : nil
|
|
1111
|
+
|
|
1112
|
+
# Language mismatch detected! The parser might have a different language set
|
|
1113
|
+
# Compare the actual language objects using Comparable
|
|
1114
|
+
if current_lang && lang != current_lang
|
|
1115
|
+
# Different language being set (e.g., switching from TOML to JSON)
|
|
1116
|
+
# This is fine, just informational
|
|
1117
|
+
end
|
|
1118
|
+
end
|
|
1119
|
+
|
|
1120
|
+
# Unwrap based on backend type
|
|
1121
|
+
# All TreeHaver Language wrappers have the backend attribute
|
|
1122
|
+
unless lang.respond_to?(:backend)
|
|
1123
|
+
# This shouldn't happen - all our wrappers have backend attribute
|
|
1124
|
+
# If we get here, it's likely a raw backend object that was passed directly
|
|
1125
|
+
raise TreeHaver::Error,
|
|
1126
|
+
"Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
|
|
1127
|
+
"Use TreeHaver::Language.from_library to create language objects."
|
|
1128
|
+
end
|
|
1129
|
+
|
|
1130
|
+
case lang.backend
|
|
1131
|
+
when :mri
|
|
1132
|
+
return lang.to_language if lang.respond_to?(:to_language)
|
|
1133
|
+
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
1134
|
+
when :rust
|
|
1135
|
+
return lang.name if lang.respond_to?(:name)
|
|
1136
|
+
when :ffi
|
|
1137
|
+
return lang # FFI needs wrapper for to_ptr
|
|
1138
|
+
when :java
|
|
1139
|
+
return lang.impl if lang.respond_to?(:impl)
|
|
1140
|
+
when :citrus
|
|
1141
|
+
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
1142
|
+
when :prism
|
|
1143
|
+
return lang # Prism backend expects the Language wrapper
|
|
1144
|
+
when :psych
|
|
1145
|
+
return lang # Psych backend expects the Language wrapper
|
|
1146
|
+
when :commonmarker
|
|
1147
|
+
return lang # Commonmarker backend expects the Language wrapper
|
|
1148
|
+
when :markly
|
|
1149
|
+
return lang # Markly backend expects the Language wrapper
|
|
1150
|
+
else
|
|
1151
|
+
# Unknown backend (e.g., test backend)
|
|
1152
|
+
# Try generic unwrapping methods for flexibility in testing
|
|
1153
|
+
return lang.to_language if lang.respond_to?(:to_language)
|
|
1154
|
+
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
1155
|
+
return lang.impl if lang.respond_to?(:impl)
|
|
1156
|
+
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
1157
|
+
return lang.name if lang.respond_to?(:name)
|
|
1158
|
+
|
|
1159
|
+
# If nothing works, pass through as-is
|
|
1160
|
+
# This allows test languages to be passed directly
|
|
1161
|
+
return lang
|
|
1162
|
+
end
|
|
1163
|
+
|
|
1164
|
+
# Shouldn't reach here, but just in case
|
|
1165
|
+
lang
|
|
1166
|
+
end
|
|
1167
|
+
|
|
1168
|
+
# Try to reload a language for the current backend
|
|
1169
|
+
#
|
|
1170
|
+
# This handles the case where a language was loaded for one backend,
|
|
1171
|
+
# but is now being used with a different backend (e.g., after backend switch).
|
|
1172
|
+
#
|
|
1173
|
+
# @param lang [Object] language object with metadata
|
|
1174
|
+
# @param target_backend [Symbol] backend to reload for
|
|
1175
|
+
# @return [Object, nil] reloaded language or nil if reload not possible
|
|
1176
|
+
# @api private
|
|
1177
|
+
def try_reload_language_for_backend(lang, target_backend)
|
|
1178
|
+
# Can't reload without path information
|
|
1179
|
+
return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
|
|
1180
|
+
|
|
1181
|
+
# For tree-sitter backends, reload from path
|
|
1182
|
+
if lang.respond_to?(:path) && lang.path
|
|
1183
|
+
begin
|
|
1184
|
+
# Use Language.from_library which respects current backend
|
|
1185
|
+
return Language.from_library(
|
|
1186
|
+
lang.path,
|
|
1187
|
+
symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
|
|
1188
|
+
name: lang.respond_to?(:name) ? lang.name : nil,
|
|
1189
|
+
)
|
|
1190
|
+
rescue => e
|
|
1191
|
+
# Reload failed, continue with original
|
|
1192
|
+
warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
|
|
1193
|
+
return
|
|
1194
|
+
end
|
|
1195
|
+
end
|
|
1196
|
+
|
|
1197
|
+
# For Citrus, can't really reload as it's just a module reference
|
|
1198
|
+
nil
|
|
1199
|
+
end
|
|
1200
|
+
|
|
1201
|
+
public
|
|
1202
|
+
|
|
446
1203
|
# Parse source code into a syntax tree
|
|
447
1204
|
#
|
|
448
1205
|
# @param source [String] the source code to parse (should be UTF-8)
|
|
@@ -452,7 +1209,8 @@ module TreeHaver
|
|
|
452
1209
|
# puts tree.root_node.type
|
|
453
1210
|
def parse(source)
|
|
454
1211
|
tree_impl = @impl.parse(source)
|
|
455
|
-
|
|
1212
|
+
# Wrap backend tree with source so Node#text works
|
|
1213
|
+
Tree.new(tree_impl, source: source)
|
|
456
1214
|
end
|
|
457
1215
|
|
|
458
1216
|
# Parse source code into a syntax tree (with optional incremental parsing)
|
|
@@ -501,10 +1259,12 @@ module TreeHaver
|
|
|
501
1259
|
old_tree
|
|
502
1260
|
end
|
|
503
1261
|
tree_impl = @impl.parse_string(old_impl, source)
|
|
504
|
-
|
|
1262
|
+
# Wrap backend tree with source so Node#text works
|
|
1263
|
+
Tree.new(tree_impl, source: source)
|
|
505
1264
|
elsif @impl.respond_to?(:parse_string)
|
|
506
1265
|
tree_impl = @impl.parse_string(nil, source)
|
|
507
|
-
|
|
1266
|
+
# Wrap backend tree with source so Node#text works
|
|
1267
|
+
Tree.new(tree_impl, source: source)
|
|
508
1268
|
else
|
|
509
1269
|
# Fallback for backends that don't support parse_string
|
|
510
1270
|
parse(source)
|