tree_haver 5.0.5 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/tree_haver/backend_context.rb +28 -0
- data/lib/tree_haver/backend_registry.rb +19 -432
- data/lib/tree_haver/contracts.rb +460 -0
- data/lib/tree_haver/kaitai_backend.rb +30 -0
- data/lib/tree_haver/language_pack.rb +190 -0
- data/lib/tree_haver/peg_backends.rb +76 -0
- data/lib/tree_haver/version.rb +1 -12
- data/lib/tree_haver.rb +7 -1316
- data.tar.gz.sig +0 -0
- metadata +34 -251
- metadata.gz.sig +0 -0
- data/CHANGELOG.md +0 -1393
- data/CITATION.cff +0 -20
- data/CODE_OF_CONDUCT.md +0 -134
- data/CONTRIBUTING.md +0 -359
- data/FUNDING.md +0 -74
- data/LICENSE.txt +0 -21
- data/README.md +0 -2320
- data/REEK +0 -0
- data/RUBOCOP.md +0 -71
- data/SECURITY.md +0 -21
- data/lib/tree_haver/backend_api.rb +0 -349
- data/lib/tree_haver/backends/citrus.rb +0 -487
- data/lib/tree_haver/backends/ffi.rb +0 -1009
- data/lib/tree_haver/backends/java.rb +0 -893
- data/lib/tree_haver/backends/mri.rb +0 -362
- data/lib/tree_haver/backends/parslet.rb +0 -560
- data/lib/tree_haver/backends/prism.rb +0 -471
- data/lib/tree_haver/backends/psych.rb +0 -375
- data/lib/tree_haver/backends/rust.rb +0 -239
- data/lib/tree_haver/base/language.rb +0 -98
- data/lib/tree_haver/base/node.rb +0 -322
- data/lib/tree_haver/base/parser.rb +0 -24
- data/lib/tree_haver/base/point.rb +0 -48
- data/lib/tree_haver/base/tree.rb +0 -128
- data/lib/tree_haver/base.rb +0 -12
- data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
- data/lib/tree_haver/compat.rb +0 -43
- data/lib/tree_haver/grammar_finder.rb +0 -374
- data/lib/tree_haver/language.rb +0 -295
- data/lib/tree_haver/language_registry.rb +0 -190
- data/lib/tree_haver/library_path_utils.rb +0 -80
- data/lib/tree_haver/node.rb +0 -579
- data/lib/tree_haver/parser.rb +0 -438
- data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
- data/lib/tree_haver/path_validator.rb +0 -353
- data/lib/tree_haver/point.rb +0 -27
- data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
- data/lib/tree_haver/rspec/testable_node.rb +0 -217
- data/lib/tree_haver/rspec.rb +0 -33
- data/lib/tree_haver/tree.rb +0 -258
- data/sig/tree_haver/backends.rbs +0 -352
- data/sig/tree_haver/grammar_finder.rbs +0 -29
- data/sig/tree_haver/path_validator.rbs +0 -32
- data/sig/tree_haver.rbs +0 -234
data/lib/tree_haver.rb
CHANGED
|
@@ -1,1322 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# External gems
|
|
4
|
-
require "version_gem"
|
|
5
|
-
|
|
6
|
-
# Standard library
|
|
7
|
-
require "set"
|
|
8
|
-
|
|
9
|
-
# This gem - only version can be required (never autoloaded)
|
|
10
3
|
require_relative "tree_haver/version"
|
|
4
|
+
require_relative "tree_haver/backend_registry"
|
|
5
|
+
require_relative "tree_haver/backend_context"
|
|
6
|
+
require_relative "tree_haver/contracts"
|
|
7
|
+
require_relative "tree_haver/peg_backends"
|
|
8
|
+
require_relative "tree_haver/kaitai_backend"
|
|
9
|
+
require_relative "tree_haver/language_pack"
|
|
11
10
|
|
|
12
|
-
# TreeHaver is a cross-Ruby adapter for code parsing with 10 backends.
|
|
13
|
-
#
|
|
14
|
-
# Provides a unified API for parsing source code across MRI Ruby, JRuby, and TruffleRuby
|
|
15
|
-
# using tree-sitter grammars or language-specific native parsers.
|
|
16
|
-
#
|
|
17
|
-
# == Backends
|
|
18
|
-
#
|
|
19
|
-
# Supports 9 backends:
|
|
20
|
-
# - Tree-sitter: MRI (C), Rust, FFI, Java
|
|
21
|
-
# - Native parsers: Prism (Ruby), Psych (YAML), Commonmarker (Markdown), Markly (GFM)
|
|
22
|
-
# - Pure Ruby: Citrus (portable fallback)
|
|
23
|
-
#
|
|
24
|
-
# == Platform Compatibility
|
|
25
|
-
#
|
|
26
|
-
# Not all backends work on all Ruby platforms:
|
|
27
|
-
#
|
|
28
|
-
# | Backend | MRI | JRuby | TruffleRuby |
|
|
29
|
-
# |--------------|-----|-------|-------------|
|
|
30
|
-
# | MRI (C ext) | ✓ | ✗ | ✗ |
|
|
31
|
-
# | Rust | ✓ | ✗ | ✗ |
|
|
32
|
-
# | FFI | ✓ | ✓ | ✗ |
|
|
33
|
-
# | Java | ✗ | ✓ | ✗ |
|
|
34
|
-
# | Prism | ✓ | ✓ | ✓ |
|
|
35
|
-
# | Psych | ✓ | ✓ | ✓ |
|
|
36
|
-
# | Citrus | ✓ | ✓ | ✓ |
|
|
37
|
-
# | Commonmarker | ✓ | ✗ | ? |
|
|
38
|
-
# | Markly | ✓ | ✗ | ? |
|
|
39
|
-
#
|
|
40
|
-
# - JRuby: Cannot load native C/Rust extensions; use FFI, Java, or pure Ruby backends
|
|
41
|
-
# - TruffleRuby: FFI doesn't support STRUCT_BY_VALUE; magnus/rb-sys incompatible with C API;
|
|
42
|
-
# use Prism, Psych, Citrus, or potentially Commonmarker/Markly
|
|
43
|
-
#
|
|
44
|
-
# @example Basic usage with tree-sitter
|
|
45
|
-
# # Load a language grammar
|
|
46
|
-
# language = TreeHaver::Language.from_library(
|
|
47
|
-
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
48
|
-
# symbol: "tree_sitter_toml"
|
|
49
|
-
# )
|
|
50
|
-
#
|
|
51
|
-
# # Create and configure a parser
|
|
52
|
-
# parser = TreeHaver::Parser.new
|
|
53
|
-
# parser.language = language
|
|
54
|
-
#
|
|
55
|
-
# # Parse source code
|
|
56
|
-
# tree = parser.parse("[package]\nname = \"my-app\"")
|
|
57
|
-
# root = tree.root_node
|
|
58
|
-
#
|
|
59
|
-
# # Use unified Position API (works across all backends)
|
|
60
|
-
# puts root.start_line # => 1 (1-based)
|
|
61
|
-
# puts root.source_position # => {start_line:, end_line:, start_column:, end_column:}
|
|
62
|
-
#
|
|
63
|
-
# @example Using language-specific backends
|
|
64
|
-
# # Parse Ruby with Prism
|
|
65
|
-
# TreeHaver.backend = :prism
|
|
66
|
-
# parser = TreeHaver::Parser.new
|
|
67
|
-
# parser.language = TreeHaver::Backends::Prism::Language.ruby
|
|
68
|
-
# tree = parser.parse("class Example; end")
|
|
69
|
-
#
|
|
70
|
-
# # Parse YAML with Psych
|
|
71
|
-
# TreeHaver.backend = :psych
|
|
72
|
-
# parser = TreeHaver::Parser.new
|
|
73
|
-
# parser.language = TreeHaver::Backends::Psych::Language.yaml
|
|
74
|
-
# tree = parser.parse("key: value")
|
|
75
|
-
#
|
|
76
|
-
# # Parse Markdown with Commonmarker
|
|
77
|
-
# TreeHaver.backend = :commonmarker
|
|
78
|
-
# parser = TreeHaver::Parser.new
|
|
79
|
-
# parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
|
|
80
|
-
# tree = parser.parse("# Heading\nParagraph")
|
|
81
|
-
#
|
|
82
|
-
# @example Using language registration
|
|
83
|
-
# TreeHaver.register_language(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
|
|
84
|
-
# language = TreeHaver::Language.toml
|
|
85
|
-
#
|
|
86
|
-
# @example Using GrammarFinder for automatic discovery
|
|
87
|
-
# # GrammarFinder automatically locates grammar libraries on the system
|
|
88
|
-
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
89
|
-
# finder.register! if finder.available?
|
|
90
|
-
# language = TreeHaver::Language.toml
|
|
91
|
-
#
|
|
92
|
-
# @example Selecting a backend
|
|
93
|
-
# TreeHaver.backend = :mri # Force MRI (ruby_tree_sitter)
|
|
94
|
-
# TreeHaver.backend = :rust # Force Rust (tree_stump)
|
|
95
|
-
# TreeHaver.backend = :ffi # Force FFI
|
|
96
|
-
# TreeHaver.backend = :java # Force Java (JRuby)
|
|
97
|
-
# TreeHaver.backend = :prism # Force Prism (Ruby)
|
|
98
|
-
# TreeHaver.backend = :psych # Force Psych (YAML)
|
|
99
|
-
# TreeHaver.backend = :commonmarker # Force Commonmarker (Markdown)
|
|
100
|
-
# TreeHaver.backend = :markly # Force Markly (GFM)
|
|
101
|
-
# TreeHaver.backend = :citrus # Force Citrus (pure Ruby)
|
|
102
|
-
# TreeHaver.backend = :auto # Auto-select (default)
|
|
103
|
-
#
|
|
104
|
-
# @see https://tree-sitter.github.io/tree-sitter/ tree-sitter documentation
|
|
105
|
-
# @see GrammarFinder For automatic grammar library discovery
|
|
106
|
-
# @see Backends For available parsing backends
|
|
107
11
|
module TreeHaver
|
|
108
|
-
|
|
109
|
-
autoload :LibraryPathUtils, File.join(__dir__, "tree_haver", "library_path_utils")
|
|
110
|
-
autoload :LanguageRegistry, File.join(__dir__, "tree_haver", "language_registry")
|
|
111
|
-
autoload :BackendAPI, File.join(__dir__, "tree_haver", "backend_api")
|
|
112
|
-
autoload :BackendRegistry, File.join(__dir__, "tree_haver", "backend_registry")
|
|
113
|
-
|
|
114
|
-
# Base classes for backend implementations
|
|
115
|
-
autoload :Base, File.join(__dir__, "tree_haver", "base")
|
|
116
|
-
|
|
117
|
-
# Base error class for TreeHaver exceptions
|
|
118
|
-
# @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
|
|
119
|
-
#
|
|
120
|
-
# @abstract Subclass to create specific error types
|
|
121
|
-
class Error < Exception; end # rubocop:disable Lint/InheritException
|
|
122
|
-
|
|
123
|
-
# Raised when a requested backend or feature is not available
|
|
124
|
-
# These are serious errors that extends Exception (not StandardError).
|
|
125
|
-
# @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
|
|
126
|
-
#
|
|
127
|
-
# This can occur when:
|
|
128
|
-
# - Required native libraries are not installed
|
|
129
|
-
# - The selected backend is not compatible with the current Ruby implementation
|
|
130
|
-
# - A language grammar cannot be loaded
|
|
131
|
-
#
|
|
132
|
-
# @example Handling unavailable backends
|
|
133
|
-
# begin
|
|
134
|
-
# language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
135
|
-
# rescue TreeHaver::NotAvailable => e
|
|
136
|
-
# puts "Grammar not available: #{e.message}"
|
|
137
|
-
# end
|
|
138
|
-
class NotAvailable < Error; end
|
|
139
|
-
|
|
140
|
-
# Raised when attempting to use backends that are known to conflict
|
|
141
|
-
#
|
|
142
|
-
# This is a serious error that extends Exception (not StandardError) because
|
|
143
|
-
# it prevents a segmentation fault. The MRI backend (ruby_tree_sitter) and
|
|
144
|
-
# FFI backend cannot coexist in the same process - once MRI loads, FFI will
|
|
145
|
-
# segfault when trying to set a language on a parser.
|
|
146
|
-
#
|
|
147
|
-
# This protection can be disabled with `TreeHaver.backend_protect = false`
|
|
148
|
-
# but doing so risks segfaults.
|
|
149
|
-
#
|
|
150
|
-
# @example Handling backend conflicts
|
|
151
|
-
# begin
|
|
152
|
-
# # This will raise if MRI was already used
|
|
153
|
-
# TreeHaver.with_backend(:ffi) { parser.language = lang }
|
|
154
|
-
# rescue TreeHaver::BackendConflict => e
|
|
155
|
-
# puts "Backend conflict: #{e.message}"
|
|
156
|
-
# # Fall back to a compatible backend
|
|
157
|
-
# end
|
|
158
|
-
#
|
|
159
|
-
# @example Disabling protection (not recommended)
|
|
160
|
-
# TreeHaver.backend_protect = false
|
|
161
|
-
# # Now you can test backend conflicts (at risk of segfaults)
|
|
162
|
-
class BackendConflict < Error; end
|
|
163
|
-
|
|
164
|
-
# Default Citrus configurations for known languages
|
|
165
|
-
#
|
|
166
|
-
# These are used by {TreeHaver.parser_for} when no explicit citrus_config is provided
|
|
167
|
-
# and tree-sitter backends are not available (e.g., on TruffleRuby).
|
|
168
|
-
#
|
|
169
|
-
# @api private
|
|
170
|
-
CITRUS_DEFAULTS = {
|
|
171
|
-
toml: {
|
|
172
|
-
gem_name: "toml-rb",
|
|
173
|
-
grammar_const: "TomlRB::Document",
|
|
174
|
-
require_path: "toml-rb",
|
|
175
|
-
},
|
|
176
|
-
}.freeze
|
|
177
|
-
|
|
178
|
-
# Default Parslet configurations for known languages
|
|
179
|
-
#
|
|
180
|
-
# These are used by {TreeHaver.parser_for} when no explicit parslet_config is provided
|
|
181
|
-
# and tree-sitter backends are not available (e.g., on TruffleRuby).
|
|
182
|
-
#
|
|
183
|
-
# @api private
|
|
184
|
-
PARSLET_DEFAULTS = {
|
|
185
|
-
toml: {
|
|
186
|
-
gem_name: "toml",
|
|
187
|
-
grammar_const: "TOML::Parslet",
|
|
188
|
-
require_path: "toml",
|
|
189
|
-
},
|
|
190
|
-
}.freeze
|
|
191
|
-
|
|
192
|
-
# Namespace for backend implementations
|
|
193
|
-
#
|
|
194
|
-
# TreeHaver provides multiple backends to support different Ruby implementations:
|
|
195
|
-
# - {Backends::MRI} - Uses ruby_tree_sitter (MRI C extension)
|
|
196
|
-
# - {Backends::Rust} - Uses tree_stump (Rust extension with precompiled binaries)
|
|
197
|
-
# - {Backends::FFI} - Uses Ruby FFI to call libtree-sitter directly
|
|
198
|
-
# - {Backends::Java} - Uses JRuby's Java integration
|
|
199
|
-
# - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
|
|
200
|
-
# - {Backends::Parslet} - Uses Parslet PEG parser (pure Ruby, portable)
|
|
201
|
-
# - {Backends::Prism} - Uses Ruby's built-in Prism parser (Ruby-only, stdlib in 3.4+)
|
|
202
|
-
# - {Backends::Psych} - Uses Ruby's built-in Psych parser (YAML-only, stdlib)
|
|
203
|
-
module Backends
|
|
204
|
-
autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
|
|
205
|
-
autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
|
|
206
|
-
autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
|
|
207
|
-
autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
|
|
208
|
-
autoload :Citrus, File.join(__dir__, "tree_haver", "backends", "citrus")
|
|
209
|
-
autoload :Parslet, File.join(__dir__, "tree_haver", "backends", "parslet")
|
|
210
|
-
autoload :Prism, File.join(__dir__, "tree_haver", "backends", "prism")
|
|
211
|
-
autoload :Psych, File.join(__dir__, "tree_haver", "backends", "psych")
|
|
212
|
-
|
|
213
|
-
# Maps each backend to an array of backends that block it from working.
|
|
214
|
-
# For example, :ffi is blocked by :mri because once ruby_tree_sitter loads,
|
|
215
|
-
# FFI calls to ts_parser_set_language will segfault.
|
|
216
|
-
#
|
|
217
|
-
# @return [Hash{Symbol => Array<Symbol>}]
|
|
218
|
-
BLOCKED_BY = {
|
|
219
|
-
mri: [],
|
|
220
|
-
rust: [],
|
|
221
|
-
ffi: [:mri], # FFI segfaults if MRI (ruby_tree_sitter) has been loaded
|
|
222
|
-
java: [],
|
|
223
|
-
citrus: [],
|
|
224
|
-
parslet: [], # Parslet has no conflicts with other backends
|
|
225
|
-
prism: [], # Prism has no conflicts with other backends
|
|
226
|
-
psych: [], # Psych has no conflicts with other backends
|
|
227
|
-
}.freeze
|
|
228
|
-
|
|
229
|
-
# Pure Ruby backends that parse specific languages
|
|
230
|
-
# These are language-specific and register themselves via LanguageRegistry
|
|
231
|
-
#
|
|
232
|
-
# @return [Hash{Symbol => Hash}] Maps backend name to language and module info
|
|
233
|
-
PURE_RUBY_BACKENDS = {
|
|
234
|
-
prism: {language: :ruby, module_name: "Prism"},
|
|
235
|
-
psych: {language: :yaml, module_name: "Psych"},
|
|
236
|
-
}.freeze
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
# Security utilities for validating paths before loading shared libraries
|
|
240
|
-
#
|
|
241
|
-
# @example Validate a path
|
|
242
|
-
# TreeHaver::PathValidator.safe_library_path?("/usr/lib/libtree-sitter-toml.so")
|
|
243
|
-
# # => true
|
|
244
|
-
#
|
|
245
|
-
# @see PathValidator
|
|
246
|
-
autoload :PathValidator, File.join(__dir__, "tree_haver", "path_validator")
|
|
247
|
-
|
|
248
|
-
# Generic grammar finder utility with built-in security validations
|
|
249
|
-
#
|
|
250
|
-
# GrammarFinder provides platform-aware discovery of tree-sitter grammar
|
|
251
|
-
# libraries for any language. It validates paths from environment variables
|
|
252
|
-
# to prevent path traversal and other attacks.
|
|
253
|
-
#
|
|
254
|
-
# @example Find and register a language
|
|
255
|
-
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
256
|
-
# finder.register! if finder.available?
|
|
257
|
-
# language = TreeHaver::Language.toml
|
|
258
|
-
#
|
|
259
|
-
# @example Secure mode (trusted directories only)
|
|
260
|
-
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
261
|
-
# path = finder.find_library_path_safe # Ignores ENV, only trusted dirs
|
|
262
|
-
#
|
|
263
|
-
# @see GrammarFinder
|
|
264
|
-
# @see PathValidator
|
|
265
|
-
autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
|
|
266
|
-
|
|
267
|
-
# Citrus grammar finder for discovering and registering Citrus-based parsers
|
|
268
|
-
#
|
|
269
|
-
# @example Register toml-rb
|
|
270
|
-
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
271
|
-
# language: :toml,
|
|
272
|
-
# gem_name: "toml-rb",
|
|
273
|
-
# grammar_const: "TomlRB::Document"
|
|
274
|
-
# )
|
|
275
|
-
# finder.register! if finder.available?
|
|
276
|
-
#
|
|
277
|
-
# @see CitrusGrammarFinder
|
|
278
|
-
autoload :CitrusGrammarFinder, File.join(__dir__, "tree_haver", "citrus_grammar_finder")
|
|
279
|
-
|
|
280
|
-
# Parslet grammar finder for discovering and registering Parslet-based parsers
|
|
281
|
-
#
|
|
282
|
-
# @example Register toml gem
|
|
283
|
-
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
284
|
-
# language: :toml,
|
|
285
|
-
# gem_name: "toml",
|
|
286
|
-
# grammar_const: "TOML::Parslet"
|
|
287
|
-
# )
|
|
288
|
-
# finder.register! if finder.available?
|
|
289
|
-
#
|
|
290
|
-
# @see ParsletGrammarFinder
|
|
291
|
-
autoload :ParsletGrammarFinder, File.join(__dir__, "tree_haver", "parslet_grammar_finder")
|
|
292
|
-
|
|
293
|
-
# Point class for position information (row, column)
|
|
294
|
-
autoload :Point, File.join(__dir__, "tree_haver", "point")
|
|
295
|
-
|
|
296
|
-
# Unified Node wrapper providing consistent API across backends
|
|
297
|
-
autoload :Node, File.join(__dir__, "tree_haver", "node")
|
|
298
|
-
|
|
299
|
-
# Unified Tree wrapper providing consistent API across backends
|
|
300
|
-
autoload :Tree, File.join(__dir__, "tree_haver", "tree")
|
|
301
|
-
|
|
302
|
-
# Language class for loading grammar shared libraries
|
|
303
|
-
autoload :Language, File.join(__dir__, "tree_haver", "language")
|
|
304
|
-
|
|
305
|
-
# Parser class for parsing source code into syntax trees
|
|
306
|
-
autoload :Parser, File.join(__dir__, "tree_haver", "parser")
|
|
307
|
-
|
|
308
|
-
# Native tree-sitter backends that support loading shared libraries (.so files)
|
|
309
|
-
# These backends wrap the tree-sitter C library via various bindings.
|
|
310
|
-
# Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are excluded.
|
|
311
|
-
NATIVE_BACKENDS = %i[mri rust ffi java].freeze
|
|
312
|
-
|
|
313
|
-
# Get the current backend selection
|
|
314
|
-
#
|
|
315
|
-
# @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
|
|
316
|
-
# @note Can be set via ENV["TREE_HAVER_BACKEND"]
|
|
317
|
-
class << self
|
|
318
|
-
# Whether backend conflict protection is enabled
|
|
319
|
-
#
|
|
320
|
-
# When true (default), TreeHaver will raise BackendConflict if you try to
|
|
321
|
-
# use a backend that is known to conflict with a previously used backend.
|
|
322
|
-
# For example, FFI will not work after MRI has been used.
|
|
323
|
-
#
|
|
324
|
-
# Set to false to disable protection (useful for testing compatibility).
|
|
325
|
-
#
|
|
326
|
-
# @return [Boolean]
|
|
327
|
-
# @example Disable protection for testing
|
|
328
|
-
# TreeHaver.backend_protect = false
|
|
329
|
-
def backend_protect=(value)
|
|
330
|
-
@backend_protect_mutex ||= Mutex.new
|
|
331
|
-
@backend_protect_mutex.synchronize { @backend_protect = value }
|
|
332
|
-
end
|
|
333
|
-
|
|
334
|
-
# Check if backend conflict protection is enabled
|
|
335
|
-
#
|
|
336
|
-
# @return [Boolean] true if protection is enabled (default)
|
|
337
|
-
def backend_protect?
|
|
338
|
-
return @backend_protect if defined?(@backend_protect) # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
339
|
-
true # Default is protected
|
|
340
|
-
end
|
|
341
|
-
|
|
342
|
-
# Alias for backend_protect?
|
|
343
|
-
def backend_protect
|
|
344
|
-
backend_protect?
|
|
345
|
-
end
|
|
346
|
-
|
|
347
|
-
# Track which backends have been used in this process
|
|
348
|
-
#
|
|
349
|
-
# @return [Set<Symbol>] set of backend symbols that have been used
|
|
350
|
-
def backends_used
|
|
351
|
-
@backends_used ||= Set.new # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
352
|
-
end
|
|
353
|
-
|
|
354
|
-
# Record that a backend has been used
|
|
355
|
-
#
|
|
356
|
-
# @param backend [Symbol] the backend that was used
|
|
357
|
-
# @return [void]
|
|
358
|
-
# @api private
|
|
359
|
-
def record_backend_usage(backend)
|
|
360
|
-
backends_used << backend
|
|
361
|
-
end
|
|
362
|
-
|
|
363
|
-
# Check if a backend would conflict with previously used backends
|
|
364
|
-
#
|
|
365
|
-
# @param backend [Symbol] the backend to check
|
|
366
|
-
# @return [Array<Symbol>] list of previously used backends that block this one
|
|
367
|
-
def conflicting_backends_for(backend)
|
|
368
|
-
blockers = Backends::BLOCKED_BY[backend] || []
|
|
369
|
-
blockers & backends_used.to_a
|
|
370
|
-
end
|
|
371
|
-
|
|
372
|
-
# Check if using a backend would cause a conflict
|
|
373
|
-
#
|
|
374
|
-
# @param backend [Symbol] the backend to check
|
|
375
|
-
# @raise [BackendConflict] if protection is enabled and there's a conflict
|
|
376
|
-
# @return [void]
|
|
377
|
-
def check_backend_conflict!(backend)
|
|
378
|
-
return unless backend_protect?
|
|
379
|
-
|
|
380
|
-
conflicts = conflicting_backends_for(backend)
|
|
381
|
-
return if conflicts.empty?
|
|
382
|
-
|
|
383
|
-
raise BackendConflict,
|
|
384
|
-
"Cannot use #{backend} backend: it is blocked by previously used backend(s): #{conflicts.join(", ")}. " \
|
|
385
|
-
"The #{backend} backend will segfault when #{conflicts.first} has already loaded. " \
|
|
386
|
-
"To disable this protection (at risk of segfaults), set TreeHaver.backend_protect = false"
|
|
387
|
-
end
|
|
388
|
-
|
|
389
|
-
# @example
|
|
390
|
-
# TreeHaver.backend # => :auto
|
|
391
|
-
def backend
|
|
392
|
-
return @backend if defined?(@backend) && @backend # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
393
|
-
|
|
394
|
-
@backend = parse_single_backend_env # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
395
|
-
end
|
|
396
|
-
|
|
397
|
-
# Valid native backend names (require native extensions)
|
|
398
|
-
VALID_NATIVE_BACKENDS = %w[mri rust ffi java].freeze
|
|
399
|
-
|
|
400
|
-
# Valid pure Ruby backend names (no native extensions)
|
|
401
|
-
VALID_RUBY_BACKENDS = %w[citrus parslet prism psych commonmarker markly].freeze
|
|
402
|
-
|
|
403
|
-
# All valid backend names
|
|
404
|
-
VALID_BACKENDS = (VALID_NATIVE_BACKENDS + VALID_RUBY_BACKENDS + %w[auto none]).freeze
|
|
405
|
-
|
|
406
|
-
# Get allowed native backends from TREE_HAVER_NATIVE_BACKEND environment variable
|
|
407
|
-
#
|
|
408
|
-
# Supports comma-separated values like "mri,ffi".
|
|
409
|
-
# Special values:
|
|
410
|
-
# - "auto" or empty/unset: automatically select from available native backends
|
|
411
|
-
# - "none": no native backends allowed (pure Ruby only)
|
|
412
|
-
#
|
|
413
|
-
# @return [Array<Symbol>] list of allowed native backend symbols, or [:auto] or [:none]
|
|
414
|
-
# @example Allow only MRI and FFI
|
|
415
|
-
# # TREE_HAVER_NATIVE_BACKEND=mri,ffi
|
|
416
|
-
# TreeHaver.allowed_native_backends # => [:mri, :ffi]
|
|
417
|
-
# @example Auto-select native backends (default)
|
|
418
|
-
# # TREE_HAVER_NATIVE_BACKEND not set, empty, or "auto"
|
|
419
|
-
# TreeHaver.allowed_native_backends # => [:auto]
|
|
420
|
-
# @example Disable all native backends
|
|
421
|
-
# # TREE_HAVER_NATIVE_BACKEND=none
|
|
422
|
-
# TreeHaver.allowed_native_backends # => [:none]
|
|
423
|
-
def allowed_native_backends
|
|
424
|
-
@allowed_native_backends ||= parse_backend_list_env("TREE_HAVER_NATIVE_BACKEND", VALID_NATIVE_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
425
|
-
end
|
|
426
|
-
|
|
427
|
-
# Get allowed Ruby backends from TREE_HAVER_RUBY_BACKEND environment variable
|
|
428
|
-
#
|
|
429
|
-
# Supports comma-separated values like "citrus,prism".
|
|
430
|
-
# Special values:
|
|
431
|
-
# - "auto" or empty/unset: automatically select from available Ruby backends
|
|
432
|
-
# - "none": no Ruby backends allowed (native only)
|
|
433
|
-
#
|
|
434
|
-
# @return [Array<Symbol>] list of allowed Ruby backend symbols, or [:auto] or [:none]
|
|
435
|
-
# @example Allow only Citrus
|
|
436
|
-
# # TREE_HAVER_RUBY_BACKEND=citrus
|
|
437
|
-
# TreeHaver.allowed_ruby_backends # => [:citrus]
|
|
438
|
-
# @example Auto-select Ruby backends (default)
|
|
439
|
-
# # TREE_HAVER_RUBY_BACKEND not set, empty, or "auto"
|
|
440
|
-
# TreeHaver.allowed_ruby_backends # => [:auto]
|
|
441
|
-
def allowed_ruby_backends
|
|
442
|
-
@allowed_ruby_backends ||= parse_backend_list_env("TREE_HAVER_RUBY_BACKEND", VALID_RUBY_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
443
|
-
end
|
|
444
|
-
|
|
445
|
-
# Check if a specific backend is allowed based on environment variables
|
|
446
|
-
#
|
|
447
|
-
# Checks TREE_HAVER_NATIVE_BACKEND for native backends and
|
|
448
|
-
# TREE_HAVER_RUBY_BACKEND for pure Ruby backends.
|
|
449
|
-
#
|
|
450
|
-
# @param backend_name [Symbol, String] the backend to check
|
|
451
|
-
# @return [Boolean] true if the backend is allowed
|
|
452
|
-
# @example
|
|
453
|
-
# # TREE_HAVER_NATIVE_BACKEND=mri
|
|
454
|
-
# TreeHaver.backend_allowed?(:mri) # => true
|
|
455
|
-
# TreeHaver.backend_allowed?(:ffi) # => false
|
|
456
|
-
# TreeHaver.backend_allowed?(:citrus) # => true (Ruby backends use separate env var)
|
|
457
|
-
def backend_allowed?(backend_name)
|
|
458
|
-
backend_sym = backend_name.to_sym
|
|
459
|
-
|
|
460
|
-
# Check if it's a native backend
|
|
461
|
-
if VALID_NATIVE_BACKENDS.include?(backend_sym.to_s)
|
|
462
|
-
allowed = allowed_native_backends
|
|
463
|
-
return true if allowed == [:auto]
|
|
464
|
-
return false if allowed == [:none]
|
|
465
|
-
return allowed.include?(backend_sym)
|
|
466
|
-
end
|
|
467
|
-
|
|
468
|
-
# Check if it's a Ruby backend
|
|
469
|
-
if VALID_RUBY_BACKENDS.include?(backend_sym.to_s)
|
|
470
|
-
allowed = allowed_ruby_backends
|
|
471
|
-
return true if allowed == [:auto]
|
|
472
|
-
return false if allowed == [:none]
|
|
473
|
-
return allowed.include?(backend_sym)
|
|
474
|
-
end
|
|
475
|
-
|
|
476
|
-
# Unknown backend or :auto - allow
|
|
477
|
-
true
|
|
478
|
-
end
|
|
479
|
-
|
|
480
|
-
# Set the backend to use
|
|
481
|
-
#
|
|
482
|
-
# @param name [Symbol, String, nil] backend name (:auto, :mri, :rust, :ffi, :java, :citrus)
|
|
483
|
-
# @return [Symbol, nil] the backend that was set
|
|
484
|
-
# @example Force FFI backend
|
|
485
|
-
# TreeHaver.backend = :ffi
|
|
486
|
-
# @example Force Rust backend
|
|
487
|
-
# TreeHaver.backend = :rust
|
|
488
|
-
def backend=(name)
|
|
489
|
-
@backend = name&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
490
|
-
end
|
|
491
|
-
|
|
492
|
-
# Reset backend selection memoization
|
|
493
|
-
#
|
|
494
|
-
# Primarily useful in tests to switch backends without cross-example leakage.
|
|
495
|
-
#
|
|
496
|
-
# @param to [Symbol, String, nil] backend name or nil to clear (defaults to :auto)
|
|
497
|
-
# @return [void]
|
|
498
|
-
# @example Reset to auto-selection
|
|
499
|
-
# TreeHaver.reset_backend!
|
|
500
|
-
# @example Reset to specific backend
|
|
501
|
-
# TreeHaver.reset_backend!(to: :ffi)
|
|
502
|
-
def reset_backend!(to: :auto)
|
|
503
|
-
@backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
504
|
-
@allowed_native_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
505
|
-
@allowed_ruby_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
506
|
-
end
|
|
507
|
-
|
|
508
|
-
# Register built-in pure Ruby backends in the LanguageRegistry
|
|
509
|
-
#
|
|
510
|
-
# This registers Prism, Psych, Commonmarker, and Markly using the same
|
|
511
|
-
# registration API that external backends use. This ensures consistent
|
|
512
|
-
# behavior whether a backend is built-in or provided by an external gem.
|
|
513
|
-
#
|
|
514
|
-
# Called automatically when TreeHaver is first used, but can be called
|
|
515
|
-
# manually in tests or when reset! has cleared the registry.
|
|
516
|
-
#
|
|
517
|
-
# @return [void]
|
|
518
|
-
# @example Manual registration (usually not needed)
|
|
519
|
-
# TreeHaver.register_builtin_backends!
|
|
520
|
-
def register_builtin_backends!
|
|
521
|
-
Backends::PURE_RUBY_BACKENDS.each do |backend_type, info|
|
|
522
|
-
language = info[:language]
|
|
523
|
-
module_name = info[:module_name]
|
|
524
|
-
|
|
525
|
-
# Get the backend module
|
|
526
|
-
backend_mod = Backends.const_get(module_name)
|
|
527
|
-
next unless backend_mod
|
|
528
|
-
|
|
529
|
-
# Register if available (lazy check - doesn't require the gem yet)
|
|
530
|
-
LanguageRegistry.register(
|
|
531
|
-
language,
|
|
532
|
-
backend_type,
|
|
533
|
-
backend_module: backend_mod,
|
|
534
|
-
gem_name: module_name.downcase,
|
|
535
|
-
)
|
|
536
|
-
end
|
|
537
|
-
end
|
|
538
|
-
|
|
539
|
-
# Check if built-in backends have been registered
|
|
540
|
-
#
|
|
541
|
-
# @return [Boolean]
|
|
542
|
-
# @api private
|
|
543
|
-
def builtin_backends_registered?
|
|
544
|
-
@builtin_backends_registered ||= false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
545
|
-
end
|
|
546
|
-
|
|
547
|
-
# Ensure built-in backends are registered (idempotent)
|
|
548
|
-
#
|
|
549
|
-
# @return [void]
|
|
550
|
-
# @api private
|
|
551
|
-
def ensure_builtin_backends_registered!
|
|
552
|
-
return if builtin_backends_registered?
|
|
553
|
-
register_builtin_backends!
|
|
554
|
-
@builtin_backends_registered = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
555
|
-
end
|
|
556
|
-
|
|
557
|
-
# Parse TREE_HAVER_BACKEND environment variable (single backend)
|
|
558
|
-
#
|
|
559
|
-
# @return [Symbol] the backend symbol (:auto if not set or invalid)
|
|
560
|
-
# @api private
|
|
561
|
-
def parse_single_backend_env
|
|
562
|
-
env_value = ENV["TREE_HAVER_BACKEND"]
|
|
563
|
-
return :auto if env_value.nil? || env_value.strip.empty?
|
|
564
|
-
|
|
565
|
-
name = env_value.strip.downcase
|
|
566
|
-
return :auto unless VALID_BACKENDS.include?(name) && name != "all" && name != "none"
|
|
567
|
-
|
|
568
|
-
name.to_sym
|
|
569
|
-
end
|
|
570
|
-
|
|
571
|
-
# Parse a backend list environment variable
|
|
572
|
-
#
|
|
573
|
-
# @param env_var [String] the environment variable name
|
|
574
|
-
# @param valid_backends [Array<String>] list of valid backend names
|
|
575
|
-
# @return [Array<Symbol>] list of backend symbols, or [:auto] or [:none]
|
|
576
|
-
# @api private
|
|
577
|
-
def parse_backend_list_env(env_var, valid_backends)
|
|
578
|
-
env_value = ENV[env_var]
|
|
579
|
-
|
|
580
|
-
# Empty or unset means "auto"
|
|
581
|
-
return [:auto] if env_value.nil? || env_value.strip.empty?
|
|
582
|
-
|
|
583
|
-
normalized = env_value.strip.downcase
|
|
584
|
-
|
|
585
|
-
# Handle special values
|
|
586
|
-
return [:auto] if normalized == "auto"
|
|
587
|
-
return [:none] if normalized == "none"
|
|
588
|
-
|
|
589
|
-
# Split on comma and parse each backend
|
|
590
|
-
backends = normalized.split(",").map(&:strip).uniq
|
|
591
|
-
|
|
592
|
-
# Convert to symbols, filtering out invalid ones
|
|
593
|
-
parsed = backends.filter_map do |name|
|
|
594
|
-
valid_backends.include?(name) ? name.to_sym : nil
|
|
595
|
-
end
|
|
596
|
-
|
|
597
|
-
# Return :auto if no valid backends found
|
|
598
|
-
parsed.empty? ? [:auto] : parsed
|
|
599
|
-
end
|
|
600
|
-
|
|
601
|
-
# Thread-local backend context storage
|
|
602
|
-
#
|
|
603
|
-
# Returns a hash containing the thread-local backend context with keys:
|
|
604
|
-
# - :backend - The backend name (Symbol) or nil if using global default
|
|
605
|
-
# - :depth - The nesting depth (Integer) for proper cleanup
|
|
606
|
-
#
|
|
607
|
-
# @return [Hash{Symbol => Object}] context hash with :backend and :depth keys
|
|
608
|
-
# @example
|
|
609
|
-
# ctx = TreeHaver.current_backend_context
|
|
610
|
-
# ctx[:backend] # => nil or :ffi, :mri, etc.
|
|
611
|
-
# ctx[:depth] # => 0, 1, 2, etc.
|
|
612
|
-
def current_backend_context
|
|
613
|
-
Thread.current[:tree_haver_backend_context] ||= {
|
|
614
|
-
backend: nil, # nil means "use global default"
|
|
615
|
-
depth: 0, # Track nesting depth for proper cleanup
|
|
616
|
-
}
|
|
617
|
-
end
|
|
618
|
-
|
|
619
|
-
# Get the effective backend for current context
|
|
620
|
-
#
|
|
621
|
-
# Priority: thread-local context → global @backend → :auto
|
|
622
|
-
#
|
|
623
|
-
# @return [Symbol] the backend to use
|
|
624
|
-
# @example
|
|
625
|
-
# TreeHaver.effective_backend # => :auto (default)
|
|
626
|
-
# @example With thread-local context
|
|
627
|
-
# TreeHaver.with_backend(:ffi) do
|
|
628
|
-
# TreeHaver.effective_backend # => :ffi
|
|
629
|
-
# end
|
|
630
|
-
def effective_backend
|
|
631
|
-
ctx = current_backend_context
|
|
632
|
-
ctx[:backend] || backend || :auto
|
|
633
|
-
end
|
|
634
|
-
|
|
635
|
-
# Execute a block with a specific backend in thread-local context
|
|
636
|
-
#
|
|
637
|
-
# This method provides temporary, thread-safe backend switching for a block of code.
|
|
638
|
-
# The backend setting is automatically restored when the block exits, even if
|
|
639
|
-
# an exception is raised. Supports nesting—inner blocks override outer blocks,
|
|
640
|
-
# and each level is properly unwound.
|
|
641
|
-
#
|
|
642
|
-
# Thread Safety: Each thread maintains its own backend context, so concurrent
|
|
643
|
-
# threads can safely use different backends without interfering with each other.
|
|
644
|
-
#
|
|
645
|
-
# Use Cases:
|
|
646
|
-
# - Testing: Test the same code path with different backends
|
|
647
|
-
# - Performance comparison: Benchmark parsing with different backends
|
|
648
|
-
# - Fallback scenarios: Try one backend, fall back to another on failure
|
|
649
|
-
# - Thread isolation: Different threads can use different backends safely
|
|
650
|
-
#
|
|
651
|
-
# @param name [Symbol, String] backend name (:mri, :rust, :ffi, :java, :citrus, :auto)
|
|
652
|
-
# @yield block to execute with the specified backend
|
|
653
|
-
# @return [Object] the return value of the block
|
|
654
|
-
# @raise [ArgumentError] if backend name is nil
|
|
655
|
-
# @raise [BackendConflict] if the requested backend conflicts with a previously used backend
|
|
656
|
-
#
|
|
657
|
-
# @example Basic usage
|
|
658
|
-
# TreeHaver.with_backend(:mri) do
|
|
659
|
-
# parser = TreeHaver::Parser.new
|
|
660
|
-
# tree = parser.parse(source)
|
|
661
|
-
# end
|
|
662
|
-
# # Backend is automatically restored here
|
|
663
|
-
#
|
|
664
|
-
# @example Nested blocks (inner overrides outer)
|
|
665
|
-
# TreeHaver.with_backend(:rust) do
|
|
666
|
-
# parser1 = TreeHaver::Parser.new # Uses :rust
|
|
667
|
-
# TreeHaver.with_backend(:citrus) do
|
|
668
|
-
# parser2 = TreeHaver::Parser.new # Uses :citrus
|
|
669
|
-
# end
|
|
670
|
-
# parser3 = TreeHaver::Parser.new # Back to :rust
|
|
671
|
-
# end
|
|
672
|
-
#
|
|
673
|
-
# @example Testing multiple backends
|
|
674
|
-
# [:mri, :rust, :citrus].each do |backend_name|
|
|
675
|
-
# TreeHaver.with_backend(backend_name) do
|
|
676
|
-
# parser = TreeHaver::Parser.new
|
|
677
|
-
# result = parser.parse(source)
|
|
678
|
-
# puts "#{backend_name}: #{result.root_node.type}"
|
|
679
|
-
# end
|
|
680
|
-
# end
|
|
681
|
-
#
|
|
682
|
-
# @example Exception safety (backend restored even on error)
|
|
683
|
-
# TreeHaver.with_backend(:mri) do
|
|
684
|
-
# raise "Something went wrong"
|
|
685
|
-
# rescue
|
|
686
|
-
# # Handle error
|
|
687
|
-
# end
|
|
688
|
-
# # Backend is still restored to its previous value
|
|
689
|
-
#
|
|
690
|
-
# @example Thread isolation
|
|
691
|
-
# threads = [:mri, :rust].map do |backend_name|
|
|
692
|
-
# Thread.new do
|
|
693
|
-
# TreeHaver.with_backend(backend_name) do
|
|
694
|
-
# # Each thread uses its own backend independently
|
|
695
|
-
# TreeHaver::Parser.new
|
|
696
|
-
# end
|
|
697
|
-
# end
|
|
698
|
-
# end
|
|
699
|
-
# threads.each(&:join)
|
|
700
|
-
#
|
|
701
|
-
# @see #effective_backend
|
|
702
|
-
# @see #current_backend_context
|
|
703
|
-
def with_backend(name)
|
|
704
|
-
raise ArgumentError, "Backend name required" if name.nil?
|
|
705
|
-
|
|
706
|
-
# Get context FIRST to ensure it exists
|
|
707
|
-
ctx = current_backend_context
|
|
708
|
-
old_backend = ctx[:backend]
|
|
709
|
-
old_depth = ctx[:depth]
|
|
710
|
-
|
|
711
|
-
begin
|
|
712
|
-
# Set new backend and increment depth
|
|
713
|
-
ctx[:backend] = name.to_sym
|
|
714
|
-
ctx[:depth] += 1
|
|
715
|
-
|
|
716
|
-
# Execute block
|
|
717
|
-
yield
|
|
718
|
-
ensure
|
|
719
|
-
# Restore previous backend and depth
|
|
720
|
-
# This ensures proper unwinding even with exceptions
|
|
721
|
-
ctx[:backend] = old_backend
|
|
722
|
-
ctx[:depth] = old_depth
|
|
723
|
-
end
|
|
724
|
-
end
|
|
725
|
-
|
|
726
|
-
# Resolve the effective backend considering explicit override
|
|
727
|
-
#
|
|
728
|
-
# Priority: explicit > thread context > global > :auto
|
|
729
|
-
#
|
|
730
|
-
# @param explicit_backend [Symbol, String, nil] explicitly requested backend
|
|
731
|
-
# @return [Symbol] the backend to use
|
|
732
|
-
# @example
|
|
733
|
-
# TreeHaver.resolve_effective_backend(:ffi) # => :ffi
|
|
734
|
-
# @example With thread-local context
|
|
735
|
-
# TreeHaver.with_backend(:mri) do
|
|
736
|
-
# TreeHaver.resolve_effective_backend(nil) # => :mri
|
|
737
|
-
# TreeHaver.resolve_effective_backend(:ffi) # => :ffi (explicit wins)
|
|
738
|
-
# end
|
|
739
|
-
def resolve_effective_backend(explicit_backend = nil)
|
|
740
|
-
return explicit_backend.to_sym if explicit_backend
|
|
741
|
-
effective_backend
|
|
742
|
-
end
|
|
743
|
-
|
|
744
|
-
# Get backend module for a specific backend (with explicit override)
|
|
745
|
-
#
|
|
746
|
-
# @param explicit_backend [Symbol, String, nil] explicitly requested backend
|
|
747
|
-
# @return [Module, nil] the backend module or nil if not available
|
|
748
|
-
# @raise [BackendConflict] if the backend conflicts with previously used backends
|
|
749
|
-
# @example
|
|
750
|
-
# mod = TreeHaver.resolve_backend_module(:ffi)
|
|
751
|
-
# mod.capabilities[:backend] # => :ffi
|
|
752
|
-
def resolve_backend_module(explicit_backend = nil)
|
|
753
|
-
# Temporarily override effective backend
|
|
754
|
-
requested = resolve_effective_backend(explicit_backend)
|
|
755
|
-
|
|
756
|
-
mod = case requested
|
|
757
|
-
when :mri
|
|
758
|
-
Backends::MRI
|
|
759
|
-
when :rust
|
|
760
|
-
Backends::Rust
|
|
761
|
-
when :ffi
|
|
762
|
-
Backends::FFI
|
|
763
|
-
when :java
|
|
764
|
-
Backends::Java
|
|
765
|
-
when :citrus
|
|
766
|
-
Backends::Citrus
|
|
767
|
-
when :parslet
|
|
768
|
-
Backends::Parslet
|
|
769
|
-
when :prism
|
|
770
|
-
Backends::Prism
|
|
771
|
-
when :psych
|
|
772
|
-
Backends::Psych
|
|
773
|
-
when :auto
|
|
774
|
-
backend_module # Fall back to normal resolution for :auto
|
|
775
|
-
else
|
|
776
|
-
# Check if this is a registered plugin backend
|
|
777
|
-
registered = registered_backend(requested)
|
|
778
|
-
return registered if registered
|
|
779
|
-
|
|
780
|
-
# Unknown backend name - return nil to trigger error in caller
|
|
781
|
-
nil
|
|
782
|
-
end
|
|
783
|
-
|
|
784
|
-
# Return nil if the module doesn't exist
|
|
785
|
-
return unless mod
|
|
786
|
-
|
|
787
|
-
# Check if the backend is allowed by environment variables FIRST
|
|
788
|
-
# This enforces TREE_HAVER_NATIVE_BACKEND and TREE_HAVER_RUBY_BACKEND as hard restrictions
|
|
789
|
-
return if requested && requested != :auto && !backend_allowed?(requested)
|
|
790
|
-
|
|
791
|
-
# Check for backend conflicts, before checking availability
|
|
792
|
-
# This is critical because the conflict causes the backend to report unavailable
|
|
793
|
-
# We want to raise a clear error explaining WHY it's unavailable
|
|
794
|
-
# Use the requested backend name directly (not capabilities) because
|
|
795
|
-
# capabilities may be empty when the backend is blocked/unavailable
|
|
796
|
-
check_backend_conflict!(requested) if requested && requested != :auto
|
|
797
|
-
|
|
798
|
-
# Now check if the backend is available
|
|
799
|
-
# Why assume modules without available? are available?
|
|
800
|
-
# - Some backends might be mocked in tests without an available? method
|
|
801
|
-
# - This makes the code more defensive and test-friendly
|
|
802
|
-
# - It allows graceful degradation if a backend module is incomplete
|
|
803
|
-
# - Backward compatibility: if a module doesn't declare availability, assume it works
|
|
804
|
-
return if mod.respond_to?(:available?) && !mod.available?
|
|
805
|
-
|
|
806
|
-
# Record that this backend is being used
|
|
807
|
-
record_backend_usage(requested) if requested && requested != :auto
|
|
808
|
-
|
|
809
|
-
mod
|
|
810
|
-
end
|
|
811
|
-
|
|
812
|
-
# Resolve a native tree-sitter backend module (for from_library)
|
|
813
|
-
#
|
|
814
|
-
# This method is similar to resolve_backend_module but ONLY considers
|
|
815
|
-
# backends that support loading shared libraries (.so files):
|
|
816
|
-
# - MRI (ruby_tree_sitter C extension)
|
|
817
|
-
# - Rust (tree_stump)
|
|
818
|
-
# - FFI (ffi gem with libtree-sitter)
|
|
819
|
-
# - Java (jtreesitter on JRuby)
|
|
820
|
-
#
|
|
821
|
-
# Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are NOT
|
|
822
|
-
# considered because they don't support from_library.
|
|
823
|
-
#
|
|
824
|
-
# @param explicit_backend [Symbol, String, nil] explicitly requested backend
|
|
825
|
-
# @return [Module, nil] the backend module or nil if none available
|
|
826
|
-
# @raise [BackendConflict] if the backend conflicts with previously used backends
|
|
827
|
-
def resolve_native_backend_module(explicit_backend = nil)
|
|
828
|
-
# Short-circuit on TruffleRuby: no native backends work
|
|
829
|
-
# - MRI: C extension, MRI only
|
|
830
|
-
# - Rust: magnus requires MRI's C API
|
|
831
|
-
# - FFI: STRUCT_BY_VALUE not supported
|
|
832
|
-
# - Java: requires JRuby's Java interop
|
|
833
|
-
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "truffleruby"
|
|
834
|
-
return unless explicit_backend # Auto-select: no backends available
|
|
835
|
-
# If explicit backend requested, let it fail with proper error below
|
|
836
|
-
end
|
|
837
|
-
|
|
838
|
-
# Get the effective backend (considers thread-local and global settings)
|
|
839
|
-
requested = resolve_effective_backend(explicit_backend)
|
|
840
|
-
|
|
841
|
-
# If the effective backend is a native backend, use it
|
|
842
|
-
if NATIVE_BACKENDS.include?(requested)
|
|
843
|
-
return resolve_backend_module(requested)
|
|
844
|
-
end
|
|
845
|
-
|
|
846
|
-
# If a specific non-native backend was explicitly requested, return nil
|
|
847
|
-
# (from_library only works with native backends that load .so files)
|
|
848
|
-
return if explicit_backend
|
|
849
|
-
|
|
850
|
-
# If effective backend is :auto, auto-select from native backends in priority order
|
|
851
|
-
# Note: non-native backends set via with_backend are NOT used here because
|
|
852
|
-
# from_library only works with native backends
|
|
853
|
-
native_priority = if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
|
854
|
-
%i[java ffi] # JRuby: Java first, then FFI
|
|
855
|
-
else
|
|
856
|
-
%i[mri rust ffi] # MRI: MRI first, then Rust, then FFI
|
|
857
|
-
end
|
|
858
|
-
|
|
859
|
-
native_priority.each do |backend|
|
|
860
|
-
# Rescue BackendConflict to allow iteration to continue
|
|
861
|
-
# This enables graceful fallback when a backend is blocked
|
|
862
|
-
|
|
863
|
-
mod = resolve_backend_module(backend)
|
|
864
|
-
return mod if mod
|
|
865
|
-
rescue BackendConflict
|
|
866
|
-
# This backend is blocked by a previously used backend, try the next one
|
|
867
|
-
next
|
|
868
|
-
end
|
|
869
|
-
|
|
870
|
-
nil # No native backend available
|
|
871
|
-
end
|
|
872
|
-
|
|
873
|
-
# Determine the concrete backend module to use
|
|
874
|
-
#
|
|
875
|
-
# This method performs backend auto-selection when backend is :auto.
|
|
876
|
-
# On JRuby, prefers Java backend if available, then FFI, then Citrus.
|
|
877
|
-
# On MRI, prefers MRI backend if available, then Rust, then FFI, then Citrus.
|
|
878
|
-
# Citrus is the final fallback as it's pure Ruby and works everywhere.
|
|
879
|
-
#
|
|
880
|
-
# @return [Module, nil] the backend module (Backends::MRI, Backends::Rust, Backends::FFI, Backends::Java, or Backends::Citrus), or nil if none available
|
|
881
|
-
# @example
|
|
882
|
-
# mod = TreeHaver.backend_module
|
|
883
|
-
# if mod
|
|
884
|
-
# puts "Using #{mod.capabilities[:backend]} backend"
|
|
885
|
-
# end
|
|
886
|
-
def backend_module
|
|
887
|
-
requested = effective_backend # Changed from: backend
|
|
888
|
-
|
|
889
|
-
# For explicit backends (not :auto), check for conflicts first
|
|
890
|
-
# If the backend is blocked, fall through to auto-select
|
|
891
|
-
if requested != :auto && backend_protect?
|
|
892
|
-
conflicts = conflicting_backends_for(requested)
|
|
893
|
-
unless conflicts.empty?
|
|
894
|
-
# The explicitly requested backend is blocked - fall through to auto-select
|
|
895
|
-
requested = :auto
|
|
896
|
-
end
|
|
897
|
-
end
|
|
898
|
-
|
|
899
|
-
case requested
|
|
900
|
-
when :mri
|
|
901
|
-
Backends::MRI
|
|
902
|
-
when :rust
|
|
903
|
-
Backends::Rust
|
|
904
|
-
when :ffi
|
|
905
|
-
Backends::FFI
|
|
906
|
-
when :java
|
|
907
|
-
Backends::Java
|
|
908
|
-
when :citrus
|
|
909
|
-
Backends::Citrus
|
|
910
|
-
when :parslet
|
|
911
|
-
Backends::Parslet
|
|
912
|
-
when :prism
|
|
913
|
-
Backends::Prism
|
|
914
|
-
when :psych
|
|
915
|
-
Backends::Psych
|
|
916
|
-
else
|
|
917
|
-
# auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
|
|
918
|
-
# Each backend must be both allowed (by ENV) and available (gem installed)
|
|
919
|
-
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && backend_allowed?(:java) && Backends::Java.available?
|
|
920
|
-
Backends::Java
|
|
921
|
-
elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:mri) && Backends::MRI.available?
|
|
922
|
-
Backends::MRI
|
|
923
|
-
elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:rust) && Backends::Rust.available?
|
|
924
|
-
Backends::Rust
|
|
925
|
-
elsif backend_allowed?(:ffi) && Backends::FFI.available?
|
|
926
|
-
Backends::FFI
|
|
927
|
-
elsif backend_allowed?(:citrus) && Backends::Citrus.available?
|
|
928
|
-
Backends::Citrus # Pure Ruby fallback
|
|
929
|
-
else
|
|
930
|
-
# No backend available
|
|
931
|
-
nil
|
|
932
|
-
end
|
|
933
|
-
end
|
|
934
|
-
end
|
|
935
|
-
|
|
936
|
-
# Get capabilities of the current backend
|
|
937
|
-
#
|
|
938
|
-
# Returns a hash describing what features the selected backend supports.
|
|
939
|
-
# Common keys include:
|
|
940
|
-
# - :backend - Symbol identifying the backend (:mri, :rust, :ffi, :java)
|
|
941
|
-
# - :parse - Whether parsing is implemented
|
|
942
|
-
# - :query - Whether the Query API is available
|
|
943
|
-
# - :bytes_field - Whether byte position fields are available
|
|
944
|
-
# - :incremental - Whether incremental parsing is supported
|
|
945
|
-
#
|
|
946
|
-
# @return [Hash{Symbol => Object}] capability map, or empty hash if no backend available
|
|
947
|
-
# @example
|
|
948
|
-
# TreeHaver.capabilities
|
|
949
|
-
# # => { backend: :mri, query: true, bytes_field: true }
|
|
950
|
-
def capabilities
|
|
951
|
-
mod = backend_module
|
|
952
|
-
return {} unless mod
|
|
953
|
-
mod.capabilities
|
|
954
|
-
end
|
|
955
|
-
|
|
956
|
-
# -- Language registration API -------------------------------------------------
|
|
957
|
-
# Delegates to LanguageRegistry for thread-safe registration and lookup.
|
|
958
|
-
# Allows opting-in dynamic helpers like TreeHaver::Language.toml without
|
|
959
|
-
# advertising all names by default.
|
|
960
|
-
|
|
961
|
-
# Register a language helper by name (backend-agnostic)
|
|
962
|
-
#
|
|
963
|
-
# After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
|
|
964
|
-
# to load the registered language. TreeHaver will automatically use the appropriate
|
|
965
|
-
# grammar based on the active backend.
|
|
966
|
-
#
|
|
967
|
-
# The `name` parameter is an arbitrary identifier you choose - it doesn't need to
|
|
968
|
-
# match the actual language name. This is useful for:
|
|
969
|
-
# - Testing: Use unique names like `:toml_test` to avoid collisions
|
|
970
|
-
# - Aliasing: Register the same grammar under multiple names
|
|
971
|
-
# - Versioning: Register different grammar versions as `:ruby_2` and `:ruby_3`
|
|
972
|
-
#
|
|
973
|
-
# The actual grammar identity comes from `path`/`symbol` (tree-sitter) or
|
|
974
|
-
# `grammar_module` (Citrus), not from the name.
|
|
975
|
-
#
|
|
976
|
-
# IMPORTANT: This method INTENTIONALLY allows registering BOTH a tree-sitter
|
|
977
|
-
# library AND a Citrus grammar for the same language IN A SINGLE CALL.
|
|
978
|
-
# This is achieved by using separate `if` statements (not `elsif`) and no early
|
|
979
|
-
# returns. This design is deliberate and provides significant benefits:
|
|
980
|
-
#
|
|
981
|
-
# Why register both backends for one language?
|
|
982
|
-
# - Backend flexibility: Code works regardless of which backend is active
|
|
983
|
-
# - Performance testing: Compare tree-sitter vs Citrus performance
|
|
984
|
-
# - Gradual migration: Transition between backends without breaking code
|
|
985
|
-
# - Fallback scenarios: Use Citrus when tree-sitter library unavailable
|
|
986
|
-
# - Platform portability: tree-sitter on Linux/Mac, Citrus on JRuby/Windows
|
|
987
|
-
#
|
|
988
|
-
# The active backend determines which registration is used automatically.
|
|
989
|
-
# No code changes needed to switch backends - just change TreeHaver.backend.
|
|
990
|
-
#
|
|
991
|
-
# @param name [Symbol, String] identifier for this registration (can be any name you choose)
|
|
992
|
-
# @param path [String, nil] absolute path to the language shared library (for tree-sitter)
|
|
993
|
-
# @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
|
|
994
|
-
# @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
|
|
995
|
-
# @param grammar_class [Class, nil] Parslet grammar class that inherits from Parslet::Parser
|
|
996
|
-
# @param backend_module [Module, nil] pure Ruby backend module with Language/Parser classes
|
|
997
|
-
# @param backend_type [Symbol, nil] backend type for backend_module (defaults to module name)
|
|
998
|
-
# @param gem_name [String, nil] optional gem name for error messages
|
|
999
|
-
# @return [void]
|
|
1000
|
-
# @example Register tree-sitter grammar only
|
|
1001
|
-
# TreeHaver.register_language(
|
|
1002
|
-
# :toml,
|
|
1003
|
-
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
1004
|
-
# symbol: "tree_sitter_toml"
|
|
1005
|
-
# )
|
|
1006
|
-
# @example Register Citrus grammar only
|
|
1007
|
-
# TreeHaver.register_language(
|
|
1008
|
-
# :toml,
|
|
1009
|
-
# grammar_module: TomlRB::Document,
|
|
1010
|
-
# gem_name: "toml-rb"
|
|
1011
|
-
# )
|
|
1012
|
-
# @example Register Parslet grammar only
|
|
1013
|
-
# TreeHaver.register_language(
|
|
1014
|
-
# :toml,
|
|
1015
|
-
# grammar_class: TOML::Parslet,
|
|
1016
|
-
# gem_name: "toml"
|
|
1017
|
-
# )
|
|
1018
|
-
# @example Register pure Ruby backend (external gem like rbs-merge)
|
|
1019
|
-
# TreeHaver.register_language(
|
|
1020
|
-
# :rbs,
|
|
1021
|
-
# backend_module: Rbs::Merge::Backends::RbsBackend,
|
|
1022
|
-
# backend_type: :rbs,
|
|
1023
|
-
# gem_name: "rbs"
|
|
1024
|
-
# )
|
|
1025
|
-
# @example Register BOTH backends in separate calls
|
|
1026
|
-
# TreeHaver.register_language(
|
|
1027
|
-
# :toml,
|
|
1028
|
-
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
1029
|
-
# symbol: "tree_sitter_toml"
|
|
1030
|
-
# )
|
|
1031
|
-
# TreeHaver.register_language(
|
|
1032
|
-
# :toml,
|
|
1033
|
-
# grammar_module: TomlRB::Document,
|
|
1034
|
-
# gem_name: "toml-rb"
|
|
1035
|
-
# )
|
|
1036
|
-
# @example Register BOTH backends in ONE call (recommended for maximum flexibility)
|
|
1037
|
-
# TreeHaver.register_language(
|
|
1038
|
-
# :toml,
|
|
1039
|
-
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
1040
|
-
# symbol: "tree_sitter_toml",
|
|
1041
|
-
# grammar_module: TomlRB::Document,
|
|
1042
|
-
# gem_name: "toml-rb"
|
|
1043
|
-
# )
|
|
1044
|
-
# # Now TreeHaver::Language.toml works with ANY backend!
|
|
1045
|
-
def register_language(name, path: nil, symbol: nil, grammar_module: nil, grammar_class: nil, backend_module: nil, backend_type: nil, gem_name: nil)
|
|
1046
|
-
# Register tree-sitter backend if path provided
|
|
1047
|
-
# Note: Uses `if` not `elsif` so both backends can be registered in one call
|
|
1048
|
-
if path
|
|
1049
|
-
LanguageRegistry.register(name, :tree_sitter, path: path, symbol: symbol)
|
|
1050
|
-
end
|
|
1051
|
-
|
|
1052
|
-
# Register Citrus backend if grammar_module provided
|
|
1053
|
-
# Note: Uses `if` not `elsif` so both backends can be registered in one call
|
|
1054
|
-
# This allows maximum flexibility - register once, use with any backend
|
|
1055
|
-
if grammar_module
|
|
1056
|
-
unless grammar_module.respond_to?(:parse)
|
|
1057
|
-
raise ArgumentError, "Grammar module must respond to :parse"
|
|
1058
|
-
end
|
|
1059
|
-
|
|
1060
|
-
LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
|
|
1061
|
-
end
|
|
1062
|
-
|
|
1063
|
-
# Register Parslet backend if grammar_class provided
|
|
1064
|
-
# Note: Uses `if` not `elsif` so multiple backends can be registered in one call
|
|
1065
|
-
if grammar_class
|
|
1066
|
-
unless grammar_class.respond_to?(:new)
|
|
1067
|
-
raise ArgumentError, "Grammar class must respond to :new"
|
|
1068
|
-
end
|
|
1069
|
-
|
|
1070
|
-
LanguageRegistry.register(name, :parslet, grammar_class: grammar_class, gem_name: gem_name)
|
|
1071
|
-
end
|
|
1072
|
-
|
|
1073
|
-
# Register pure Ruby backend if backend_module provided
|
|
1074
|
-
# This is used by external gems (like rbs-merge) to register their own backends
|
|
1075
|
-
if backend_module
|
|
1076
|
-
# Derive backend_type from module name if not provided
|
|
1077
|
-
type = backend_type || backend_module.name.split("::").last.downcase.to_sym
|
|
1078
|
-
LanguageRegistry.register(name, type, backend_module: backend_module, gem_name: gem_name)
|
|
1079
|
-
end
|
|
1080
|
-
|
|
1081
|
-
# Require at least one backend to be registered
|
|
1082
|
-
if path.nil? && grammar_module.nil? && grammar_class.nil? && backend_module.nil?
|
|
1083
|
-
raise ArgumentError, "Must provide at least one of: path (tree-sitter), grammar_module (Citrus), grammar_class (Parslet), or backend_module (pure Ruby)"
|
|
1084
|
-
end
|
|
1085
|
-
|
|
1086
|
-
# Note: No early return! This method intentionally processes all `if` blocks
|
|
1087
|
-
# above to allow registering multiple backends for the same language.
|
|
1088
|
-
# tree-sitter, Citrus, and Parslet can be registered simultaneously for maximum
|
|
1089
|
-
# flexibility. See method documentation for rationale.
|
|
1090
|
-
nil
|
|
1091
|
-
end
|
|
1092
|
-
|
|
1093
|
-
# Register a backend module
|
|
1094
|
-
#
|
|
1095
|
-
# Allows external gems to register their backend implementation so it can be
|
|
1096
|
-
# found by TreeHaver.backend = :name and other lookup methods.
|
|
1097
|
-
#
|
|
1098
|
-
# @param name [Symbol] backend name (e.g. :rbs, :commonmarker)
|
|
1099
|
-
# @param mod [Module] the backend module
|
|
1100
|
-
# @return [void]
|
|
1101
|
-
def register_backend(name, mod)
|
|
1102
|
-
@backend_registry ||= {}
|
|
1103
|
-
@backend_registry[name.to_sym] = mod
|
|
1104
|
-
end
|
|
1105
|
-
|
|
1106
|
-
# Get a registered backend module
|
|
1107
|
-
#
|
|
1108
|
-
# @param name [Symbol] backend name
|
|
1109
|
-
# @return [Module, nil] registered backend module
|
|
1110
|
-
def registered_backend(name)
|
|
1111
|
-
@backend_registry ||= {}
|
|
1112
|
-
@backend_registry[name.to_sym]
|
|
1113
|
-
end
|
|
1114
|
-
|
|
1115
|
-
# Fetch a registered language entry
|
|
1116
|
-
#
|
|
1117
|
-
# @api private
|
|
1118
|
-
# @param name [Symbol, String] language identifier
|
|
1119
|
-
# @return [Hash, nil] registration hash with keys :path and :symbol, or nil if not registered
|
|
1120
|
-
def registered_language(name)
|
|
1121
|
-
LanguageRegistry.registered(name)
|
|
1122
|
-
end
|
|
1123
|
-
|
|
1124
|
-
# Create a parser configured for a specific language
|
|
1125
|
-
#
|
|
1126
|
-
# Respects the effective backend setting (via TREE_HAVER_BACKEND env var,
|
|
1127
|
-
# TreeHaver.backend=, or with_backend block).
|
|
1128
|
-
#
|
|
1129
|
-
# Supports four types of backends:
|
|
1130
|
-
# 1. Tree-sitter native backends (auto-discovered or explicit path)
|
|
1131
|
-
# 2. Citrus grammars (pure Ruby, via CITRUS_DEFAULTS or explicit config)
|
|
1132
|
-
# 3. Parslet grammars (pure Ruby, via PARSLET_DEFAULTS or explicit config)
|
|
1133
|
-
# 4. Pure Ruby backends (registered via backend_module, e.g., Prism, Psych, RBS)
|
|
1134
|
-
#
|
|
1135
|
-
# @param language_name [Symbol, String] the language to parse (e.g., :toml, :json, :ruby, :yaml, :rbs)
|
|
1136
|
-
# @param library_path [String, nil] optional explicit path to tree-sitter grammar library
|
|
1137
|
-
# @param symbol [String, nil] optional tree-sitter symbol name (defaults to "tree_sitter_<name>")
|
|
1138
|
-
# @param citrus_config [Hash, nil] optional Citrus fallback configuration
|
|
1139
|
-
# @param parslet_config [Hash, nil] optional Parslet fallback configuration
|
|
1140
|
-
# @return [TreeHaver::Parser] configured parser with language set
|
|
1141
|
-
# @raise [TreeHaver::NotAvailable] if no parser backend is available for the language
|
|
1142
|
-
#
|
|
1143
|
-
# @example Basic usage (auto-discovers grammar)
|
|
1144
|
-
# parser = TreeHaver.parser_for(:toml)
|
|
1145
|
-
#
|
|
1146
|
-
# @example Force Citrus backend
|
|
1147
|
-
# TreeHaver.with_backend(:citrus) { TreeHaver.parser_for(:toml) }
|
|
1148
|
-
#
|
|
1149
|
-
# @example Force Parslet backend
|
|
1150
|
-
# TreeHaver.with_backend(:parslet) { TreeHaver.parser_for(:toml) }
|
|
1151
|
-
#
|
|
1152
|
-
# @example Use registered pure Ruby backend (e.g., RBS)
|
|
1153
|
-
# # First, rbs-merge registers its backend:
|
|
1154
|
-
# # TreeHaver.register_language(:rbs, backend_module: Rbs::Merge::RbsBackend, backend_type: :rbs)
|
|
1155
|
-
# parser = TreeHaver.parser_for(:rbs)
|
|
1156
|
-
def parser_for(language_name, library_path: nil, symbol: nil, citrus_config: nil, parslet_config: nil)
|
|
1157
|
-
# Ensure built-in pure Ruby backends are registered
|
|
1158
|
-
ensure_builtin_backends_registered!
|
|
1159
|
-
|
|
1160
|
-
name = language_name.to_sym
|
|
1161
|
-
symbol ||= "tree_sitter_#{name}"
|
|
1162
|
-
requested = effective_backend
|
|
1163
|
-
|
|
1164
|
-
# Determine which backends to try based on effective_backend
|
|
1165
|
-
# When a specific backend is requested, only try that backend
|
|
1166
|
-
try_tree_sitter = (requested == :auto) || NATIVE_BACKENDS.include?(requested)
|
|
1167
|
-
try_citrus = (requested == :auto) || (requested == :citrus)
|
|
1168
|
-
try_parslet = (requested == :auto) || (requested == :parslet)
|
|
1169
|
-
|
|
1170
|
-
# When Citrus or Parslet is explicitly requested, don't try tree-sitter
|
|
1171
|
-
if requested == :citrus || requested == :parslet
|
|
1172
|
-
try_tree_sitter = false
|
|
1173
|
-
end
|
|
1174
|
-
|
|
1175
|
-
language = nil
|
|
1176
|
-
|
|
1177
|
-
# First, check for registered pure Ruby backends
|
|
1178
|
-
# These take precedence when explicitly requested or when no other backend is available
|
|
1179
|
-
registration = registered_language(name)
|
|
1180
|
-
# Find any registered backend_module (not tree_sitter, citrus, or parslet)
|
|
1181
|
-
registration&.each do |backend_type, config|
|
|
1182
|
-
next if %i[tree_sitter citrus parslet].include?(backend_type)
|
|
1183
|
-
next unless config[:backend_module]
|
|
1184
|
-
|
|
1185
|
-
backend_mod = config[:backend_module]
|
|
1186
|
-
# Check if this backend is available
|
|
1187
|
-
next unless backend_mod.respond_to?(:available?) && backend_mod.available?
|
|
1188
|
-
|
|
1189
|
-
# If a specific backend was requested, only use if it matches
|
|
1190
|
-
next if requested != :auto && requested != backend_type
|
|
1191
|
-
|
|
1192
|
-
# Create parser from the backend module
|
|
1193
|
-
if backend_mod.const_defined?(:Parser)
|
|
1194
|
-
parser = backend_mod::Parser.new
|
|
1195
|
-
if backend_mod.const_defined?(:Language)
|
|
1196
|
-
lang_class = backend_mod::Language
|
|
1197
|
-
# Try to get language by name (e.g., Language.ruby, Language.yaml, Language.rbs)
|
|
1198
|
-
if lang_class.respond_to?(name)
|
|
1199
|
-
parser.language = lang_class.public_send(name)
|
|
1200
|
-
elsif lang_class.respond_to?(:from_library)
|
|
1201
|
-
parser.language = lang_class.from_library(nil, name: name)
|
|
1202
|
-
end
|
|
1203
|
-
end
|
|
1204
|
-
return parser
|
|
1205
|
-
end
|
|
1206
|
-
end
|
|
1207
|
-
|
|
1208
|
-
# Try tree-sitter if applicable
|
|
1209
|
-
if try_tree_sitter && !language
|
|
1210
|
-
language = load_tree_sitter_language(name, library_path: library_path, symbol: symbol)
|
|
1211
|
-
end
|
|
1212
|
-
|
|
1213
|
-
# Try Citrus if applicable
|
|
1214
|
-
if try_citrus && !language
|
|
1215
|
-
language = load_citrus_language(name, citrus_config: citrus_config)
|
|
1216
|
-
end
|
|
1217
|
-
|
|
1218
|
-
# Try Parslet if applicable
|
|
1219
|
-
if try_parslet && !language
|
|
1220
|
-
language = load_parslet_language(name, parslet_config: parslet_config)
|
|
1221
|
-
end
|
|
1222
|
-
|
|
1223
|
-
# Raise if nothing worked
|
|
1224
|
-
raise NotAvailable, "No parser available for #{name}. " \
|
|
1225
|
-
"Install tree-sitter-#{name} or configure a Citrus/Parslet grammar." unless language
|
|
1226
|
-
|
|
1227
|
-
# Create and configure parser
|
|
1228
|
-
parser = Parser.new
|
|
1229
|
-
parser.language = language
|
|
1230
|
-
parser
|
|
1231
|
-
end
|
|
1232
|
-
|
|
1233
|
-
private
|
|
1234
|
-
|
|
1235
|
-
# Load a tree-sitter language, either from registry or via auto-discovery
|
|
1236
|
-
# @return [Language, nil]
|
|
1237
|
-
# @raise [NotAvailable] if explicit library_path is provided but doesn't exist or can't load
|
|
1238
|
-
def load_tree_sitter_language(name, library_path: nil, symbol: nil)
|
|
1239
|
-
# If explicit path provided, it must work - don't swallow errors
|
|
1240
|
-
if library_path && !library_path.empty?
|
|
1241
|
-
raise NotAvailable, "Specified parser path does not exist: #{library_path}" unless File.exist?(library_path)
|
|
1242
|
-
register_language(name, path: library_path, symbol: symbol)
|
|
1243
|
-
return Language.public_send(name)
|
|
1244
|
-
end
|
|
1245
|
-
|
|
1246
|
-
# Auto-discovery: errors are acceptable, just return nil
|
|
1247
|
-
begin
|
|
1248
|
-
# Try already-registered tree-sitter language (not Citrus)
|
|
1249
|
-
# But only if the registered path actually exists - ignore stale/test registrations
|
|
1250
|
-
registration = registered_language(name)
|
|
1251
|
-
ts_reg = registration&.dig(:tree_sitter)
|
|
1252
|
-
if ts_reg && ts_reg[:path] && File.exist?(ts_reg[:path])
|
|
1253
|
-
return Language.public_send(name, symbol: symbol)
|
|
1254
|
-
end
|
|
1255
|
-
|
|
1256
|
-
# Auto-discover via GrammarFinder
|
|
1257
|
-
finder = GrammarFinder.new(name)
|
|
1258
|
-
if finder.available?
|
|
1259
|
-
finder.register!
|
|
1260
|
-
return Language.public_send(name)
|
|
1261
|
-
end
|
|
1262
|
-
rescue NotAvailable, ArgumentError, LoadError
|
|
1263
|
-
# Auto-discovery failed, that's okay
|
|
1264
|
-
end
|
|
1265
|
-
|
|
1266
|
-
nil
|
|
1267
|
-
end
|
|
1268
|
-
|
|
1269
|
-
# Load a Citrus language from configuration or defaults
|
|
1270
|
-
# @return [Language, nil]
|
|
1271
|
-
def load_citrus_language(name, citrus_config: nil)
|
|
1272
|
-
config = citrus_config || CITRUS_DEFAULTS[name] || {}
|
|
1273
|
-
return unless config[:gem_name] && config[:grammar_const]
|
|
1274
|
-
|
|
1275
|
-
finder = CitrusGrammarFinder.new(
|
|
1276
|
-
language: name,
|
|
1277
|
-
gem_name: config[:gem_name],
|
|
1278
|
-
grammar_const: config[:grammar_const],
|
|
1279
|
-
require_path: config[:require_path],
|
|
1280
|
-
)
|
|
1281
|
-
return unless finder.available?
|
|
1282
|
-
|
|
1283
|
-
finder.register!
|
|
1284
|
-
Language.public_send(name)
|
|
1285
|
-
rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
|
|
1286
|
-
nil
|
|
1287
|
-
end
|
|
1288
|
-
|
|
1289
|
-
# Load a Parslet language from configuration or defaults
|
|
1290
|
-
# @return [Language, nil]
|
|
1291
|
-
def load_parslet_language(name, parslet_config: nil)
|
|
1292
|
-
config = parslet_config || PARSLET_DEFAULTS[name] || {}
|
|
1293
|
-
return unless config[:gem_name] && config[:grammar_const]
|
|
1294
|
-
|
|
1295
|
-
finder = ParsletGrammarFinder.new(
|
|
1296
|
-
language: name,
|
|
1297
|
-
gem_name: config[:gem_name],
|
|
1298
|
-
grammar_const: config[:grammar_const],
|
|
1299
|
-
require_path: config[:require_path],
|
|
1300
|
-
)
|
|
1301
|
-
return unless finder.available?
|
|
1302
|
-
|
|
1303
|
-
finder.register!
|
|
1304
|
-
Language.public_send(name)
|
|
1305
|
-
rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
|
|
1306
|
-
nil
|
|
1307
|
-
end
|
|
1308
|
-
end
|
|
1309
|
-
|
|
1310
|
-
# Language and Parser classes have been moved to separate files:
|
|
1311
|
-
# - tree_haver/language.rb: TreeHaver::Language - loads grammar shared libraries
|
|
1312
|
-
# - tree_haver/parser.rb: TreeHaver::Parser - parses source code into syntax trees
|
|
1313
|
-
# - tree_haver/tree.rb: TreeHaver::Tree - unified wrapper providing consistent API
|
|
1314
|
-
# - tree_haver/node.rb: TreeHaver::Node - unified wrapper providing consistent API
|
|
1315
|
-
#
|
|
1316
|
-
# These provide a unified interface across all backends (MRI, Rust, FFI, Java, Citrus).
|
|
1317
|
-
# All backends now return properly wrapped TreeHaver::Tree and TreeHaver::Node objects.
|
|
1318
|
-
end # end module TreeHaver
|
|
1319
|
-
|
|
1320
|
-
TreeHaver::Version.class_eval do
|
|
1321
|
-
extend VersionGem::Basic
|
|
12
|
+
PACKAGE_NAME = "tree_haver"
|
|
1322
13
|
end
|