tree_haver 5.0.4 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/tree_haver/backend_context.rb +28 -0
- data/lib/tree_haver/backend_registry.rb +19 -432
- data/lib/tree_haver/contracts.rb +460 -0
- data/lib/tree_haver/kaitai_backend.rb +30 -0
- data/lib/tree_haver/language_pack.rb +190 -0
- data/lib/tree_haver/peg_backends.rb +76 -0
- data/lib/tree_haver/version.rb +1 -12
- data/lib/tree_haver.rb +7 -1316
- data.tar.gz.sig +0 -0
- metadata +34 -245
- metadata.gz.sig +0 -0
- data/CHANGELOG.md +0 -1366
- data/CITATION.cff +0 -20
- data/CODE_OF_CONDUCT.md +0 -134
- data/CONTRIBUTING.md +0 -359
- data/FUNDING.md +0 -74
- data/LICENSE.txt +0 -21
- data/README.md +0 -2347
- data/REEK +0 -0
- data/RUBOCOP.md +0 -71
- data/SECURITY.md +0 -21
- data/lib/tree_haver/backend_api.rb +0 -349
- data/lib/tree_haver/backends/citrus.rb +0 -487
- data/lib/tree_haver/backends/ffi.rb +0 -1009
- data/lib/tree_haver/backends/java.rb +0 -893
- data/lib/tree_haver/backends/mri.rb +0 -362
- data/lib/tree_haver/backends/parslet.rb +0 -560
- data/lib/tree_haver/backends/prism.rb +0 -471
- data/lib/tree_haver/backends/psych.rb +0 -375
- data/lib/tree_haver/backends/rust.rb +0 -239
- data/lib/tree_haver/base/language.rb +0 -98
- data/lib/tree_haver/base/node.rb +0 -322
- data/lib/tree_haver/base/parser.rb +0 -24
- data/lib/tree_haver/base/point.rb +0 -48
- data/lib/tree_haver/base/tree.rb +0 -128
- data/lib/tree_haver/base.rb +0 -12
- data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
- data/lib/tree_haver/compat.rb +0 -43
- data/lib/tree_haver/grammar_finder.rb +0 -374
- data/lib/tree_haver/language.rb +0 -295
- data/lib/tree_haver/language_registry.rb +0 -190
- data/lib/tree_haver/library_path_utils.rb +0 -80
- data/lib/tree_haver/node.rb +0 -579
- data/lib/tree_haver/parser.rb +0 -438
- data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
- data/lib/tree_haver/path_validator.rb +0 -353
- data/lib/tree_haver/point.rb +0 -27
- data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
- data/lib/tree_haver/rspec/testable_node.rb +0 -217
- data/lib/tree_haver/rspec.rb +0 -33
- data/lib/tree_haver/tree.rb +0 -258
- data/sig/tree_haver/backends.rbs +0 -352
- data/sig/tree_haver/grammar_finder.rbs +0 -29
- data/sig/tree_haver/path_validator.rbs +0 -32
- data/sig/tree_haver.rbs +0 -234
data/lib/tree_haver/parser.rb
DELETED
|
@@ -1,438 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module TreeHaver
|
|
4
|
-
# Unified Parser facade providing a consistent API across all backends
|
|
5
|
-
#
|
|
6
|
-
# This class acts as a facade/adapter that delegates to backend-specific
|
|
7
|
-
# parser implementations. It automatically selects the appropriate backend
|
|
8
|
-
# and provides a unified interface regardless of which parser is being used.
|
|
9
|
-
#
|
|
10
|
-
# == Backend Selection
|
|
11
|
-
#
|
|
12
|
-
# The parser automatically selects a backend based on:
|
|
13
|
-
# 1. Explicit `backend:` parameter in constructor
|
|
14
|
-
# 2. `TreeHaver.backend` global setting
|
|
15
|
-
# 3. `TREE_HAVER_BACKEND` environment variable
|
|
16
|
-
# 4. Auto-detection (tries available backends in order)
|
|
17
|
-
#
|
|
18
|
-
# == Supported Backends
|
|
19
|
-
#
|
|
20
|
-
# **Tree-sitter backends** (native, high-performance):
|
|
21
|
-
# - `:mri` - ruby_tree_sitter gem (C extension, MRI only)
|
|
22
|
-
# - `:rust` - tree_stump gem (Rust via magnus, MRI only)
|
|
23
|
-
# - `:ffi` - FFI bindings to libtree-sitter (MRI, JRuby)
|
|
24
|
-
# - `:java` - java-tree-sitter (JRuby only)
|
|
25
|
-
#
|
|
26
|
-
# **Pure Ruby backends** (portable, no native dependencies):
|
|
27
|
-
# - `:citrus` - Citrus PEG parser (e.g., toml-rb)
|
|
28
|
-
# - `:parslet` - Parslet PEG parser (e.g., toml gem)
|
|
29
|
-
# - `:prism` - Ruby's official parser (Ruby only)
|
|
30
|
-
# - `:psych` - YAML parser (stdlib)
|
|
31
|
-
#
|
|
32
|
-
# == Wrapping/Unwrapping Responsibility
|
|
33
|
-
#
|
|
34
|
-
# TreeHaver::Parser handles ALL object wrapping and unwrapping:
|
|
35
|
-
#
|
|
36
|
-
# **Language objects:**
|
|
37
|
-
# - Unwraps Language wrappers before passing to backend.language=
|
|
38
|
-
# - MRI backend receives ::TreeSitter::Language
|
|
39
|
-
# - Rust backend receives String (language name)
|
|
40
|
-
# - FFI backend receives wrapped Language (needs to_ptr)
|
|
41
|
-
# - Citrus backend receives grammar module
|
|
42
|
-
# - Parslet backend receives grammar class
|
|
43
|
-
#
|
|
44
|
-
# **Tree objects:**
|
|
45
|
-
# - parse() receives raw source, backend returns raw tree, Parser wraps it
|
|
46
|
-
# - parse_string() unwraps old_tree before passing to backend, wraps returned tree
|
|
47
|
-
# - Backends always work with raw backend trees, never TreeHaver::Tree
|
|
48
|
-
#
|
|
49
|
-
# **Node objects:**
|
|
50
|
-
# - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
|
|
51
|
-
#
|
|
52
|
-
# This design ensures:
|
|
53
|
-
# - Principle of Least Surprise: wrapping happens at boundaries, consistently
|
|
54
|
-
# - Backends are simple: they don't need to know about TreeHaver wrappers
|
|
55
|
-
# - Single Responsibility: wrapping logic is only in TreeHaver::Parser
|
|
56
|
-
#
|
|
57
|
-
# @example Basic parsing
|
|
58
|
-
# parser = TreeHaver::Parser.new
|
|
59
|
-
# parser.language = TreeHaver::Language.toml
|
|
60
|
-
# tree = parser.parse("[package]\nname = \"foo\"")
|
|
61
|
-
#
|
|
62
|
-
# @example Explicit backend selection
|
|
63
|
-
# parser = TreeHaver::Parser.new(backend: :citrus)
|
|
64
|
-
# parser.language = TreeHaver::Language.toml
|
|
65
|
-
# tree = parser.parse(toml_source)
|
|
66
|
-
#
|
|
67
|
-
# @see Base::Parser The base class defining the parser interface
|
|
68
|
-
# @see Backends::Citrus::Parser Citrus backend implementation
|
|
69
|
-
# @see Backends::Parslet::Parser Parslet backend implementation
|
|
70
|
-
# @see Backends::Prism::Parser Prism backend implementation
|
|
71
|
-
class Parser < Base::Parser
|
|
72
|
-
# Create a new parser instance
|
|
73
|
-
#
|
|
74
|
-
# The parser automatically selects the best available backend unless
|
|
75
|
-
# explicitly specified. Use the `backend:` parameter to force a specific backend.
|
|
76
|
-
#
|
|
77
|
-
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
78
|
-
# Valid values: :auto, :mri, :rust, :ffi, :java, :citrus, :parslet, :prism, :psych
|
|
79
|
-
# @raise [NotAvailable] if no backend is available or requested backend is unavailable
|
|
80
|
-
# @example Default (auto-selects best available backend)
|
|
81
|
-
# parser = TreeHaver::Parser.new
|
|
82
|
-
# @example Explicit backend
|
|
83
|
-
# parser = TreeHaver::Parser.new(backend: :citrus)
|
|
84
|
-
def initialize(backend: nil)
|
|
85
|
-
super() # Initialize @language from Base::Parser
|
|
86
|
-
|
|
87
|
-
# Convert string backend names to symbols for consistency
|
|
88
|
-
backend = backend.to_sym if backend.is_a?(String)
|
|
89
|
-
|
|
90
|
-
mod = TreeHaver.resolve_backend_module(backend)
|
|
91
|
-
|
|
92
|
-
if mod.nil?
|
|
93
|
-
if backend
|
|
94
|
-
raise NotAvailable, "Requested backend #{backend.inspect} is not available"
|
|
95
|
-
else
|
|
96
|
-
raise NotAvailable, "No TreeHaver backend is available"
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
# Try to create the parser, with fallback to pure Ruby if tree-sitter fails
|
|
101
|
-
# This enables auto-fallback when tree-sitter runtime isn't available
|
|
102
|
-
begin
|
|
103
|
-
@impl = mod::Parser.new
|
|
104
|
-
@explicit_backend = backend # Remember for introspection (always a Symbol or nil)
|
|
105
|
-
rescue NoMethodError, LoadError => e
|
|
106
|
-
# Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
|
|
107
|
-
handle_parser_creation_failure(e, backend)
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
# Handle parser creation failure with optional Citrus/Parslet fallback
|
|
112
|
-
#
|
|
113
|
-
# @param error [Exception] the error that caused parser creation to fail
|
|
114
|
-
# @param backend [Symbol, nil] the requested backend
|
|
115
|
-
# @raise [NotAvailable] if no fallback is available
|
|
116
|
-
# @api private
|
|
117
|
-
def handle_parser_creation_failure(error, backend)
|
|
118
|
-
# Tree-sitter backend failed (likely missing runtime library)
|
|
119
|
-
# Try Citrus or Parslet as fallback if we weren't explicitly asked for a specific backend
|
|
120
|
-
if backend.nil? || backend == :auto
|
|
121
|
-
if Backends::Citrus.available?
|
|
122
|
-
@impl = Backends::Citrus::Parser.new
|
|
123
|
-
@explicit_backend = :citrus
|
|
124
|
-
elsif Backends::Parslet.available?
|
|
125
|
-
@impl = Backends::Parslet::Parser.new
|
|
126
|
-
@explicit_backend = :parslet
|
|
127
|
-
else
|
|
128
|
-
# No fallback available, re-raise original error
|
|
129
|
-
raise NotAvailable, "Tree-sitter backend failed: #{error.message}. " \
|
|
130
|
-
"Citrus/Parslet fallback not available. Install tree-sitter runtime, citrus gem, or parslet gem."
|
|
131
|
-
end
|
|
132
|
-
else
|
|
133
|
-
# Explicit backend was requested, don't fallback
|
|
134
|
-
raise error
|
|
135
|
-
end
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
# Get the backend this parser is using (for introspection)
|
|
139
|
-
#
|
|
140
|
-
# Returns the actual backend in use, resolving :auto to the concrete backend.
|
|
141
|
-
#
|
|
142
|
-
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, :citrus, or :parslet)
|
|
143
|
-
def backend
|
|
144
|
-
if @explicit_backend && @explicit_backend != :auto
|
|
145
|
-
@explicit_backend
|
|
146
|
-
else
|
|
147
|
-
# Determine actual backend from the implementation class
|
|
148
|
-
case @impl.class.name
|
|
149
|
-
when /MRI/
|
|
150
|
-
:mri
|
|
151
|
-
when /Rust/
|
|
152
|
-
:rust
|
|
153
|
-
when /FFI/
|
|
154
|
-
:ffi
|
|
155
|
-
when /Java/
|
|
156
|
-
:java
|
|
157
|
-
when /Citrus/
|
|
158
|
-
:citrus
|
|
159
|
-
when /Parslet/
|
|
160
|
-
:parslet
|
|
161
|
-
else
|
|
162
|
-
# Fallback to effective_backend if we can't determine from class name
|
|
163
|
-
TreeHaver.effective_backend
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
# Set the language grammar for this parser
|
|
169
|
-
#
|
|
170
|
-
# The language must be compatible with the parser's backend. If a mismatch
|
|
171
|
-
# is detected (e.g., Citrus language on tree-sitter parser), the parser
|
|
172
|
-
# will automatically switch to the correct backend.
|
|
173
|
-
#
|
|
174
|
-
# @param lang [Language] the language to use for parsing
|
|
175
|
-
# @return [Language] the language that was set
|
|
176
|
-
# @example
|
|
177
|
-
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
178
|
-
def language=(lang)
|
|
179
|
-
# Auto-switch backend if language type doesn't match current parser
|
|
180
|
-
# This handles the case where Language.toml returns a Citrus/Parslet language
|
|
181
|
-
# but the parser was initialized with a tree-sitter backend
|
|
182
|
-
switch_backend_for_language(lang)
|
|
183
|
-
|
|
184
|
-
# Unwrap the language before passing to backend
|
|
185
|
-
# Backends receive raw language objects, never TreeHaver wrappers
|
|
186
|
-
inner_lang = unwrap_language(lang)
|
|
187
|
-
@impl.language = inner_lang
|
|
188
|
-
|
|
189
|
-
# Store on base class for API compatibility
|
|
190
|
-
@language = lang
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
# Parse source code into a syntax tree
|
|
194
|
-
#
|
|
195
|
-
# @param source [String] the source code to parse (should be UTF-8)
|
|
196
|
-
# @return [Tree] the parsed syntax tree
|
|
197
|
-
# @example
|
|
198
|
-
# tree = parser.parse("x = 1")
|
|
199
|
-
# puts tree.root_node.type
|
|
200
|
-
def parse(source)
|
|
201
|
-
tree_impl = @impl.parse(source)
|
|
202
|
-
# Wrap backend tree with source so Node#text works
|
|
203
|
-
Tree.new(tree_impl, source: source)
|
|
204
|
-
end
|
|
205
|
-
|
|
206
|
-
# Parse source code into a syntax tree (with optional incremental parsing)
|
|
207
|
-
#
|
|
208
|
-
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
209
|
-
# `parse_string(old_tree, source)`.
|
|
210
|
-
#
|
|
211
|
-
# == Incremental Parsing
|
|
212
|
-
#
|
|
213
|
-
# tree-sitter supports **incremental parsing** where you can pass a previously
|
|
214
|
-
# parsed tree along with edit information to efficiently re-parse only the
|
|
215
|
-
# changed portions of source code. This is a major performance optimization
|
|
216
|
-
# for editors and IDEs that need to re-parse on every keystroke.
|
|
217
|
-
#
|
|
218
|
-
# The workflow for incremental parsing is:
|
|
219
|
-
# 1. Parse the initial source: `tree = parser.parse_string(nil, source)`
|
|
220
|
-
# 2. User edits the source (e.g., inserts a character)
|
|
221
|
-
# 3. Call `tree.edit(...)` to update the tree's position data
|
|
222
|
-
# 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
|
|
223
|
-
# 5. tree-sitter reuses unchanged nodes, only re-parsing affected regions
|
|
224
|
-
#
|
|
225
|
-
# TreeHaver passes through to the underlying backend if it supports incremental
|
|
226
|
-
# parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
|
|
227
|
-
# to see if the current backend supports it.
|
|
228
|
-
#
|
|
229
|
-
# @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
|
|
230
|
-
# @param source [String] the source code to parse (should be UTF-8)
|
|
231
|
-
# @return [Tree] the parsed syntax tree
|
|
232
|
-
# @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing tree-sitter incremental parsing docs
|
|
233
|
-
# @see Tree#edit For marking edits before incremental re-parsing
|
|
234
|
-
# @example First parse (no old tree)
|
|
235
|
-
# tree = parser.parse_string(nil, "x = 1")
|
|
236
|
-
# @example Incremental parse
|
|
237
|
-
# tree.edit(start_byte: 4, old_end_byte: 5, new_end_byte: 6, ...)
|
|
238
|
-
# new_tree = parser.parse_string(tree, "x = 42")
|
|
239
|
-
def parse_string(old_tree, source)
|
|
240
|
-
# Pass through to backend if it supports incremental parsing
|
|
241
|
-
if old_tree && @impl.respond_to?(:parse_string)
|
|
242
|
-
# Extract the underlying implementation from our Tree wrapper
|
|
243
|
-
old_impl = if old_tree.respond_to?(:inner_tree)
|
|
244
|
-
old_tree.inner_tree
|
|
245
|
-
elsif old_tree.respond_to?(:instance_variable_get)
|
|
246
|
-
# Fallback for compatibility
|
|
247
|
-
old_tree.instance_variable_get(:@inner_tree) || old_tree.instance_variable_get(:@impl) || old_tree
|
|
248
|
-
else
|
|
249
|
-
old_tree
|
|
250
|
-
end
|
|
251
|
-
tree_impl = @impl.parse_string(old_impl, source)
|
|
252
|
-
# Wrap backend tree with source so Node#text works
|
|
253
|
-
Tree.new(tree_impl, source: source)
|
|
254
|
-
elsif @impl.respond_to?(:parse_string)
|
|
255
|
-
tree_impl = @impl.parse_string(nil, source)
|
|
256
|
-
# Wrap backend tree with source so Node#text works
|
|
257
|
-
Tree.new(tree_impl, source: source)
|
|
258
|
-
else
|
|
259
|
-
# Fallback for backends that don't support parse_string
|
|
260
|
-
parse(source)
|
|
261
|
-
end
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
private
|
|
265
|
-
|
|
266
|
-
# Switch backend if language type doesn't match current parser
|
|
267
|
-
#
|
|
268
|
-
# This is necessary because TreeHaver.parser_for may return a Language
|
|
269
|
-
# from a different backend than the Parser was initialized with.
|
|
270
|
-
# For example, Language.toml might return a Citrus::Language when
|
|
271
|
-
# tree-sitter-toml is not available, but Parser was initialized with :auto.
|
|
272
|
-
#
|
|
273
|
-
# @param lang [Object] The language object
|
|
274
|
-
# @api private
|
|
275
|
-
def switch_backend_for_language(lang)
|
|
276
|
-
return unless lang.respond_to?(:backend)
|
|
277
|
-
|
|
278
|
-
lang_backend = lang.backend
|
|
279
|
-
parser_backend = backend
|
|
280
|
-
|
|
281
|
-
# No switch needed if backends match
|
|
282
|
-
return if lang_backend == parser_backend
|
|
283
|
-
|
|
284
|
-
# Switch to matching backend parser
|
|
285
|
-
case lang_backend
|
|
286
|
-
when :citrus
|
|
287
|
-
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
288
|
-
@impl = Backends::Citrus::Parser.new
|
|
289
|
-
@explicit_backend = :citrus
|
|
290
|
-
end
|
|
291
|
-
when :parslet
|
|
292
|
-
unless @impl.is_a?(Backends::Parslet::Parser)
|
|
293
|
-
@impl = Backends::Parslet::Parser.new
|
|
294
|
-
@explicit_backend = :parslet
|
|
295
|
-
end
|
|
296
|
-
when :prism
|
|
297
|
-
unless @impl.is_a?(Backends::Prism::Parser)
|
|
298
|
-
@impl = Backends::Prism::Parser.new
|
|
299
|
-
@explicit_backend = :prism
|
|
300
|
-
end
|
|
301
|
-
when :psych
|
|
302
|
-
unless @impl.is_a?(Backends::Psych::Parser)
|
|
303
|
-
@impl = Backends::Psych::Parser.new
|
|
304
|
-
@explicit_backend = :psych
|
|
305
|
-
end
|
|
306
|
-
# Tree-sitter backends (:mri, :rust, :ffi, :java) - don't auto-switch between them
|
|
307
|
-
# as that would require reloading the language from the .so file
|
|
308
|
-
end
|
|
309
|
-
end
|
|
310
|
-
|
|
311
|
-
# Unwrap a language object to extract the raw backend language
|
|
312
|
-
#
|
|
313
|
-
# This method is smart about backend compatibility:
|
|
314
|
-
# 1. If language has a backend attribute, checks if it matches current backend
|
|
315
|
-
# 2. If mismatch detected, attempts to reload language for correct backend
|
|
316
|
-
# 3. If reload successful, uses new language; otherwise continues with original
|
|
317
|
-
# 4. Unwraps the language wrapper to get raw backend object
|
|
318
|
-
#
|
|
319
|
-
# @param lang [Object] wrapped or raw language object
|
|
320
|
-
# @return [Object] raw backend language object appropriate for current backend
|
|
321
|
-
# @api private
|
|
322
|
-
def unwrap_language(lang)
|
|
323
|
-
# Check if this is a TreeHaver language wrapper with backend info
|
|
324
|
-
if lang.respond_to?(:backend)
|
|
325
|
-
# Verify backend compatibility FIRST
|
|
326
|
-
# This prevents passing languages from wrong backends to native code
|
|
327
|
-
# Exception: :auto backend is permissive - accepts any language
|
|
328
|
-
current_backend = backend
|
|
329
|
-
|
|
330
|
-
if lang.backend != current_backend && current_backend != :auto
|
|
331
|
-
# Backend mismatch! Try to reload for correct backend
|
|
332
|
-
reloaded = try_reload_language_for_backend(lang, current_backend)
|
|
333
|
-
if reloaded
|
|
334
|
-
lang = reloaded
|
|
335
|
-
else
|
|
336
|
-
# Couldn't reload - this is an error
|
|
337
|
-
raise TreeHaver::Error,
|
|
338
|
-
"Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
|
|
339
|
-
"Cannot reload language for correct backend. " \
|
|
340
|
-
"Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
|
|
341
|
-
end
|
|
342
|
-
end
|
|
343
|
-
|
|
344
|
-
# Get the current parser's language (if set)
|
|
345
|
-
current_lang = @impl.respond_to?(:language) ? @impl.language : nil
|
|
346
|
-
|
|
347
|
-
# Language mismatch detected! The parser might have a different language set
|
|
348
|
-
# Compare the actual language objects using Comparable
|
|
349
|
-
if current_lang && lang != current_lang
|
|
350
|
-
# Different language being set (e.g., switching from TOML to JSON)
|
|
351
|
-
# This is fine, just informational
|
|
352
|
-
end
|
|
353
|
-
end
|
|
354
|
-
|
|
355
|
-
# Unwrap based on backend type
|
|
356
|
-
# All TreeHaver Language wrappers have the backend attribute
|
|
357
|
-
unless lang.respond_to?(:backend)
|
|
358
|
-
# This shouldn't happen - all our wrappers have backend attribute
|
|
359
|
-
# If we get here, it's likely a raw backend object that was passed directly
|
|
360
|
-
raise TreeHaver::Error,
|
|
361
|
-
"Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
|
|
362
|
-
"Use TreeHaver::Language.from_library to create language objects."
|
|
363
|
-
end
|
|
364
|
-
|
|
365
|
-
case lang.backend
|
|
366
|
-
when :mri
|
|
367
|
-
return lang.to_language if lang.respond_to?(:to_language)
|
|
368
|
-
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
369
|
-
lang
|
|
370
|
-
when :rust
|
|
371
|
-
return lang.name if lang.respond_to?(:name)
|
|
372
|
-
lang
|
|
373
|
-
when :ffi
|
|
374
|
-
lang # FFI needs wrapper for to_ptr
|
|
375
|
-
when :java
|
|
376
|
-
lang.impl if lang.respond_to?(:impl)
|
|
377
|
-
when :citrus
|
|
378
|
-
lang # Citrus backend accepts Language wrapper (handles both)
|
|
379
|
-
when :parslet
|
|
380
|
-
lang # Parslet backend accepts Language wrapper (handles both)
|
|
381
|
-
when :prism
|
|
382
|
-
lang # Prism backend expects the Language wrapper
|
|
383
|
-
when :psych
|
|
384
|
-
lang # Psych backend expects the Language wrapper
|
|
385
|
-
when :commonmarker
|
|
386
|
-
lang # Commonmarker backend expects the Language wrapper
|
|
387
|
-
when :markly
|
|
388
|
-
lang # Markly backend expects the Language wrapper
|
|
389
|
-
else
|
|
390
|
-
# Unknown backend (e.g., test backend)
|
|
391
|
-
# Try generic unwrapping methods for flexibility in testing
|
|
392
|
-
return lang.to_language if lang.respond_to?(:to_language)
|
|
393
|
-
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
394
|
-
return lang.impl if lang.respond_to?(:impl)
|
|
395
|
-
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
396
|
-
return lang.grammar_class if lang.respond_to?(:grammar_class)
|
|
397
|
-
return lang.name if lang.respond_to?(:name)
|
|
398
|
-
|
|
399
|
-
# If nothing works, pass through as-is
|
|
400
|
-
# This allows test languages to be passed directly
|
|
401
|
-
lang
|
|
402
|
-
end
|
|
403
|
-
end
|
|
404
|
-
|
|
405
|
-
# Try to reload a language for the current backend
|
|
406
|
-
#
|
|
407
|
-
# This handles the case where a language was loaded for one backend,
|
|
408
|
-
# but is now being used with a different backend (e.g., after backend switch).
|
|
409
|
-
#
|
|
410
|
-
# @param lang [Object] language object with metadata
|
|
411
|
-
# @param target_backend [Symbol] backend to reload for
|
|
412
|
-
# @return [Object, nil] reloaded language or nil if reload not possible
|
|
413
|
-
# @api private
|
|
414
|
-
def try_reload_language_for_backend(lang, target_backend)
|
|
415
|
-
# Can't reload without path information
|
|
416
|
-
return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
|
|
417
|
-
|
|
418
|
-
# For tree-sitter backends, reload from path
|
|
419
|
-
if lang.respond_to?(:path) && lang.path
|
|
420
|
-
begin
|
|
421
|
-
# Use Language.from_library which respects current backend
|
|
422
|
-
return Language.from_library(
|
|
423
|
-
lang.path,
|
|
424
|
-
symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
|
|
425
|
-
name: lang.respond_to?(:name) ? lang.name : nil,
|
|
426
|
-
)
|
|
427
|
-
rescue => e
|
|
428
|
-
# Reload failed, continue with original
|
|
429
|
-
warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
|
|
430
|
-
return
|
|
431
|
-
end
|
|
432
|
-
end
|
|
433
|
-
|
|
434
|
-
# For Citrus, can't really reload as it's just a module reference
|
|
435
|
-
nil
|
|
436
|
-
end
|
|
437
|
-
end
|
|
438
|
-
end
|
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module TreeHaver
|
|
4
|
-
# Utility for finding and registering Parslet grammar gems.
|
|
5
|
-
#
|
|
6
|
-
# ParsletGrammarFinder provides language-agnostic discovery of Parslet grammar
|
|
7
|
-
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
-
# grammar and register it with tree_haver.
|
|
9
|
-
#
|
|
10
|
-
# Unlike tree-sitter grammars (which are .so files), Parslet grammars are
|
|
11
|
-
# Ruby classes that inherit from Parslet::Parser. This class handles the
|
|
12
|
-
# discovery and registration of these grammars.
|
|
13
|
-
#
|
|
14
|
-
# @example Basic usage with toml gem
|
|
15
|
-
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
16
|
-
# language: :toml,
|
|
17
|
-
# gem_name: "toml",
|
|
18
|
-
# grammar_const: "TOML::Parslet"
|
|
19
|
-
# )
|
|
20
|
-
# finder.register! if finder.available?
|
|
21
|
-
#
|
|
22
|
-
# @example With custom require path
|
|
23
|
-
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
24
|
-
# language: :json,
|
|
25
|
-
# gem_name: "json-parslet",
|
|
26
|
-
# grammar_const: "JsonParslet::Grammar",
|
|
27
|
-
# require_path: "json/parslet"
|
|
28
|
-
# )
|
|
29
|
-
#
|
|
30
|
-
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
-
# @see CitrusGrammarFinder For Citrus grammar discovery
|
|
32
|
-
class ParsletGrammarFinder
|
|
33
|
-
# @return [Symbol] the language identifier
|
|
34
|
-
attr_reader :language_name
|
|
35
|
-
|
|
36
|
-
# @return [String] the gem name to require
|
|
37
|
-
attr_reader :gem_name
|
|
38
|
-
|
|
39
|
-
# @return [String] the constant path to the grammar class (e.g., "TOML::Parslet")
|
|
40
|
-
attr_reader :grammar_const
|
|
41
|
-
|
|
42
|
-
# @return [String, nil] custom require path (defaults to gem_name)
|
|
43
|
-
attr_reader :require_path
|
|
44
|
-
|
|
45
|
-
# Initialize a Parslet grammar finder
|
|
46
|
-
#
|
|
47
|
-
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
48
|
-
# @param gem_name [String] the gem name (e.g., "toml")
|
|
49
|
-
# @param grammar_const [String] constant path to grammar class (e.g., "TOML::Parslet")
|
|
50
|
-
# @param require_path [String, nil] custom require path (defaults to gem_name as-is)
|
|
51
|
-
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
52
|
-
@language_name = language.to_sym
|
|
53
|
-
@gem_name = gem_name
|
|
54
|
-
@grammar_const = grammar_const
|
|
55
|
-
@require_path = require_path || gem_name
|
|
56
|
-
@load_attempted = false
|
|
57
|
-
@available = false
|
|
58
|
-
@grammar_class = nil
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Check if the Parslet grammar is available
|
|
62
|
-
#
|
|
63
|
-
# Attempts to require the gem and resolve the grammar constant.
|
|
64
|
-
# Result is cached after first call.
|
|
65
|
-
#
|
|
66
|
-
# @return [Boolean] true if grammar is available
|
|
67
|
-
def available?
|
|
68
|
-
return @available if @load_attempted
|
|
69
|
-
|
|
70
|
-
@load_attempted = true
|
|
71
|
-
debug = ENV["TREE_HAVER_DEBUG"]
|
|
72
|
-
|
|
73
|
-
# Guard against nil require_path (can happen if gem_name was nil)
|
|
74
|
-
if @require_path.nil? || @require_path.empty?
|
|
75
|
-
warn("ParsletGrammarFinder: require_path is nil or empty for #{@language_name}") if debug
|
|
76
|
-
@available = false
|
|
77
|
-
return false
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
begin
|
|
81
|
-
# Try to require the gem
|
|
82
|
-
require @require_path
|
|
83
|
-
|
|
84
|
-
# Try to resolve the constant
|
|
85
|
-
@grammar_class = resolve_constant(@grammar_const)
|
|
86
|
-
|
|
87
|
-
# Verify it can create a parser instance with a parse method
|
|
88
|
-
unless valid_grammar_class?(@grammar_class)
|
|
89
|
-
if debug
|
|
90
|
-
warn("ParsletGrammarFinder: #{@grammar_const} is not a valid Parslet grammar class")
|
|
91
|
-
warn("ParsletGrammarFinder: #{@grammar_const}.class = #{@grammar_class.class}")
|
|
92
|
-
end
|
|
93
|
-
@available = false
|
|
94
|
-
return false
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
@available = true
|
|
98
|
-
rescue LoadError => e
|
|
99
|
-
# :nocov: defensive - requires gem to not be installed
|
|
100
|
-
if debug
|
|
101
|
-
warn("ParsletGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
102
|
-
warn("ParsletGrammarFinder: LoadError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
103
|
-
end
|
|
104
|
-
@available = false
|
|
105
|
-
# :nocov:
|
|
106
|
-
rescue NameError => e
|
|
107
|
-
# :nocov: defensive - requires gem with missing constant
|
|
108
|
-
if debug
|
|
109
|
-
warn("ParsletGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
110
|
-
warn("ParsletGrammarFinder: NameError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
111
|
-
end
|
|
112
|
-
@available = false
|
|
113
|
-
# :nocov:
|
|
114
|
-
rescue TypeError => e
|
|
115
|
-
# :nocov: defensive - TruffleRuby-specific edge case
|
|
116
|
-
warn("ParsletGrammarFinder: TypeError during load of '#{@require_path}': #{e.class}: #{e.message}")
|
|
117
|
-
warn("ParsletGrammarFinder: This may be a TruffleRuby bundled_gems.rb issue")
|
|
118
|
-
if debug
|
|
119
|
-
warn("ParsletGrammarFinder: TypeError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
120
|
-
end
|
|
121
|
-
@available = false
|
|
122
|
-
# :nocov:
|
|
123
|
-
rescue => e
|
|
124
|
-
# :nocov: defensive - catch-all for unexpected errors
|
|
125
|
-
warn("ParsletGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
126
|
-
if debug
|
|
127
|
-
warn("ParsletGrammarFinder: backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
128
|
-
end
|
|
129
|
-
@available = false
|
|
130
|
-
# :nocov:
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
@available
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
# Get the resolved grammar class
|
|
137
|
-
#
|
|
138
|
-
# @return [Class, nil] the grammar class if available
|
|
139
|
-
def grammar_class
|
|
140
|
-
available? # Ensure we've tried to load
|
|
141
|
-
@grammar_class
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
# Register this Parslet grammar with TreeHaver
|
|
145
|
-
#
|
|
146
|
-
# After registration, the language can be used via:
|
|
147
|
-
# TreeHaver::Language.{language_name}
|
|
148
|
-
#
|
|
149
|
-
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
150
|
-
# @return [Boolean] true if registration succeeded
|
|
151
|
-
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
152
|
-
def register!(raise_on_missing: false)
|
|
153
|
-
unless available?
|
|
154
|
-
if raise_on_missing
|
|
155
|
-
raise NotAvailable, not_found_message
|
|
156
|
-
end
|
|
157
|
-
return false
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
TreeHaver.register_language(
|
|
161
|
-
@language_name,
|
|
162
|
-
grammar_class: @grammar_class,
|
|
163
|
-
gem_name: @gem_name,
|
|
164
|
-
)
|
|
165
|
-
true
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
# Get debug information about the search
|
|
169
|
-
#
|
|
170
|
-
# @return [Hash] diagnostic information
|
|
171
|
-
def search_info
|
|
172
|
-
{
|
|
173
|
-
language: @language_name,
|
|
174
|
-
gem_name: @gem_name,
|
|
175
|
-
grammar_const: @grammar_const,
|
|
176
|
-
require_path: @require_path,
|
|
177
|
-
available: available?,
|
|
178
|
-
grammar_class: @grammar_class&.name,
|
|
179
|
-
}
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
# Get a human-readable error message when grammar is not found
|
|
183
|
-
#
|
|
184
|
-
# @return [String] error message with installation hints
|
|
185
|
-
def not_found_message
|
|
186
|
-
"Parslet grammar for #{@language_name} not found. " \
|
|
187
|
-
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
private
|
|
191
|
-
|
|
192
|
-
# Resolve a constant path like "TOML::Parslet"
|
|
193
|
-
#
|
|
194
|
-
# @param const_path [String] constant path
|
|
195
|
-
# @return [Object] the constant
|
|
196
|
-
# @raise [NameError] if constant not found
|
|
197
|
-
def resolve_constant(const_path)
|
|
198
|
-
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
199
|
-
mod.const_get(const_name)
|
|
200
|
-
end
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
# Check if the class is a valid Parslet grammar
|
|
204
|
-
#
|
|
205
|
-
# @param klass [Class] the class to check
|
|
206
|
-
# @return [Boolean] true if valid
|
|
207
|
-
def valid_grammar_class?(klass)
|
|
208
|
-
return false unless klass.respond_to?(:new)
|
|
209
|
-
|
|
210
|
-
# Check if it's a Parslet::Parser subclass
|
|
211
|
-
if defined?(::Parslet::Parser)
|
|
212
|
-
return true if klass < ::Parslet::Parser
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
# Fallback: check if it can create an instance that responds to parse
|
|
216
|
-
begin
|
|
217
|
-
instance = klass.new
|
|
218
|
-
instance.respond_to?(:parse)
|
|
219
|
-
rescue StandardError
|
|
220
|
-
false
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
end
|
|
224
|
-
end
|