tree_haver 4.0.5 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +144 -1
- data/README.md +497 -356
- data/lib/tree_haver/backends/citrus.rb +98 -114
- data/lib/tree_haver/backends/ffi.rb +76 -13
- data/lib/tree_haver/backends/java.rb +99 -14
- data/lib/tree_haver/backends/mri.rb +25 -1
- data/lib/tree_haver/backends/parslet.rb +560 -0
- data/lib/tree_haver/backends/prism.rb +1 -1
- data/lib/tree_haver/backends/psych.rb +1 -1
- data/lib/tree_haver/backends/rust.rb +1 -1
- data/lib/tree_haver/base/node.rb +8 -1
- data/lib/tree_haver/language.rb +44 -13
- data/lib/tree_haver/parser.rb +128 -38
- data/lib/tree_haver/parslet_grammar_finder.rb +224 -0
- data/lib/tree_haver/point.rb +6 -44
- data/lib/tree_haver/rspec/dependency_tags.rb +40 -1
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +100 -13
- data.tar.gz.sig +0 -0
- metadata +15 -14
- metadata.gz.sig +0 -0
data/lib/tree_haver/language.rb
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
#
|
|
4
|
+
# Factory module for loading language grammars
|
|
5
5
|
#
|
|
6
6
|
# Language is the entry point for loading and using grammars. It provides
|
|
7
|
-
# a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus).
|
|
7
|
+
# a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus, Parslet).
|
|
8
|
+
#
|
|
9
|
+
# This is a module with only module methods (factory pattern), not a class.
|
|
10
|
+
# Backend-specific Language classes (e.g., Backends::Citrus::Language,
|
|
11
|
+
# Backends::Parslet::Language) inherit from Base::Language.
|
|
8
12
|
#
|
|
9
13
|
# For tree-sitter backends, languages are loaded from shared library files (.so/.dylib/.dll).
|
|
10
|
-
# For pure-Ruby backends (Citrus, Prism, Psych), languages are built-in or provided by gems.
|
|
14
|
+
# For pure-Ruby backends (Citrus, Parslet, Prism, Psych), languages are built-in or provided by gems.
|
|
11
15
|
#
|
|
12
16
|
# == Loading Languages
|
|
13
17
|
#
|
|
@@ -30,7 +34,9 @@ module TreeHaver
|
|
|
30
34
|
# @example Register and load a language
|
|
31
35
|
# TreeHaver.register_language(:toml, path: "/path/to/grammar.so")
|
|
32
36
|
# language = TreeHaver::Language.toml
|
|
33
|
-
|
|
37
|
+
#
|
|
38
|
+
# @see Base::Language The base class that backend Language classes inherit from
|
|
39
|
+
module Language
|
|
34
40
|
class << self
|
|
35
41
|
# Load a language grammar from a shared library (ruby_tree_sitter compatibility)
|
|
36
42
|
#
|
|
@@ -157,6 +163,8 @@ module TreeHaver
|
|
|
157
163
|
# Determine which backend type to use
|
|
158
164
|
backend_type = if current_backend == Backends::Citrus
|
|
159
165
|
:citrus
|
|
166
|
+
elsif current_backend == Backends::Parslet
|
|
167
|
+
:parslet
|
|
160
168
|
else
|
|
161
169
|
:tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
|
|
162
170
|
end
|
|
@@ -177,6 +185,19 @@ module TreeHaver
|
|
|
177
185
|
"Registered backends: #{all_backends.keys.inspect}"
|
|
178
186
|
end
|
|
179
187
|
|
|
188
|
+
# If Parslet backend is active
|
|
189
|
+
if backend_type == :parslet
|
|
190
|
+
if reg && reg[:grammar_class]
|
|
191
|
+
return Backends::Parslet::Language.new(reg[:grammar_class])
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Fall back to error if no Parslet grammar registered
|
|
195
|
+
raise NotAvailable,
|
|
196
|
+
"Parslet backend is active but no Parslet grammar registered for :#{method_name}. " \
|
|
197
|
+
"Either register a Parslet grammar or use a tree-sitter backend. " \
|
|
198
|
+
"Registered backends: #{all_backends.keys.inspect}"
|
|
199
|
+
end
|
|
200
|
+
|
|
180
201
|
# For tree-sitter backends, try to load from path
|
|
181
202
|
# If that fails, fall back to Citrus if available
|
|
182
203
|
if reg && reg[:path]
|
|
@@ -203,15 +224,20 @@ module TreeHaver
|
|
|
203
224
|
end
|
|
204
225
|
end
|
|
205
226
|
|
|
206
|
-
# No tree-sitter path registered - check for Citrus fallback
|
|
227
|
+
# No tree-sitter path registered - check for Citrus or Parslet fallback
|
|
207
228
|
# This enables auto-fallback when tree-sitter grammar is not installed
|
|
208
|
-
# but a
|
|
229
|
+
# but a pure Ruby grammar (Citrus or Parslet) is available.
|
|
209
230
|
# Only fall back when backend is :auto - explicit native backend requests should fail.
|
|
210
231
|
if TreeHaver.effective_backend == :auto
|
|
211
232
|
citrus_reg = all_backends[:citrus]
|
|
212
233
|
if citrus_reg && citrus_reg[:grammar_module]
|
|
213
234
|
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
214
235
|
end
|
|
236
|
+
|
|
237
|
+
parslet_reg = all_backends[:parslet]
|
|
238
|
+
if parslet_reg && parslet_reg[:grammar_class]
|
|
239
|
+
return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
|
|
240
|
+
end
|
|
215
241
|
end
|
|
216
242
|
|
|
217
243
|
# No appropriate registration found
|
|
@@ -227,27 +253,27 @@ module TreeHaver
|
|
|
227
253
|
|
|
228
254
|
private
|
|
229
255
|
|
|
230
|
-
# Handle tree-sitter load failure with optional Citrus fallback
|
|
256
|
+
# Handle tree-sitter load failure with optional Citrus/Parslet fallback
|
|
231
257
|
#
|
|
232
258
|
# This handles cases where:
|
|
233
259
|
# - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
|
|
234
260
|
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError inherits from LoadError)
|
|
235
261
|
# - Invalid arguments were provided (ArgumentError)
|
|
236
262
|
#
|
|
237
|
-
# Fallback to Citrus ONLY happens when:
|
|
263
|
+
# Fallback to Citrus/Parslet ONLY happens when:
|
|
238
264
|
# - The effective backend is :auto (user didn't explicitly request a native backend)
|
|
239
|
-
# - A Citrus grammar is registered for the language
|
|
265
|
+
# - A Citrus or Parslet grammar is registered for the language
|
|
240
266
|
#
|
|
241
267
|
# If the user explicitly requested a native backend (:mri, :rust, :ffi, :java),
|
|
242
|
-
# we should NOT silently fall back to
|
|
268
|
+
# we should NOT silently fall back to pure Ruby - that would violate the user's intent.
|
|
243
269
|
#
|
|
244
270
|
# @param error [Exception] the original error
|
|
245
271
|
# @param all_backends [Hash] all registered backends for the language
|
|
246
|
-
# @return [Backends::Citrus::Language] if
|
|
272
|
+
# @return [Backends::Citrus::Language, Backends::Parslet::Language] if fallback available and allowed
|
|
247
273
|
# @raise [Exception] re-raises original error if no fallback or fallback not allowed
|
|
248
274
|
# @api private
|
|
249
275
|
def handle_tree_sitter_load_failure(error, all_backends)
|
|
250
|
-
# Only fall back to
|
|
276
|
+
# Only fall back to pure Ruby when backend is :auto
|
|
251
277
|
# If user explicitly requested a native backend, respect that choice
|
|
252
278
|
effective = TreeHaver.effective_backend
|
|
253
279
|
if effective == :auto
|
|
@@ -255,8 +281,13 @@ module TreeHaver
|
|
|
255
281
|
if citrus_reg && citrus_reg[:grammar_module]
|
|
256
282
|
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
257
283
|
end
|
|
284
|
+
|
|
285
|
+
parslet_reg = all_backends[:parslet]
|
|
286
|
+
if parslet_reg && parslet_reg[:grammar_class]
|
|
287
|
+
return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
|
|
288
|
+
end
|
|
258
289
|
end
|
|
259
|
-
# No
|
|
290
|
+
# No pure Ruby fallback allowed or available, re-raise the original error
|
|
260
291
|
raise error
|
|
261
292
|
end
|
|
262
293
|
end
|
data/lib/tree_haver/parser.rb
CHANGED
|
@@ -1,20 +1,45 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
#
|
|
4
|
+
# Unified Parser facade providing a consistent API across all backends
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
6
|
+
# This class acts as a facade/adapter that delegates to backend-specific
|
|
7
|
+
# parser implementations. It automatically selects the appropriate backend
|
|
8
|
+
# and provides a unified interface regardless of which parser is being used.
|
|
9
|
+
#
|
|
10
|
+
# == Backend Selection
|
|
11
|
+
#
|
|
12
|
+
# The parser automatically selects a backend based on:
|
|
13
|
+
# 1. Explicit `backend:` parameter in constructor
|
|
14
|
+
# 2. `TreeHaver.backend` global setting
|
|
15
|
+
# 3. `TREE_HAVER_BACKEND` environment variable
|
|
16
|
+
# 4. Auto-detection (tries available backends in order)
|
|
17
|
+
#
|
|
18
|
+
# == Supported Backends
|
|
19
|
+
#
|
|
20
|
+
# **Tree-sitter backends** (native, high-performance):
|
|
21
|
+
# - `:mri` - ruby_tree_sitter gem (C extension, MRI only)
|
|
22
|
+
# - `:rust` - tree_stump gem (Rust via magnus, MRI only)
|
|
23
|
+
# - `:ffi` - FFI bindings to libtree-sitter (MRI, JRuby)
|
|
24
|
+
# - `:java` - java-tree-sitter (JRuby only)
|
|
25
|
+
#
|
|
26
|
+
# **Pure Ruby backends** (portable, no native dependencies):
|
|
27
|
+
# - `:citrus` - Citrus PEG parser (e.g., toml-rb)
|
|
28
|
+
# - `:parslet` - Parslet PEG parser (e.g., toml gem)
|
|
29
|
+
# - `:prism` - Ruby's official parser (Ruby only)
|
|
30
|
+
# - `:psych` - YAML parser (stdlib)
|
|
8
31
|
#
|
|
9
32
|
# == Wrapping/Unwrapping Responsibility
|
|
10
33
|
#
|
|
11
|
-
# TreeHaver::Parser
|
|
34
|
+
# TreeHaver::Parser handles ALL object wrapping and unwrapping:
|
|
12
35
|
#
|
|
13
36
|
# **Language objects:**
|
|
14
37
|
# - Unwraps Language wrappers before passing to backend.language=
|
|
15
38
|
# - MRI backend receives ::TreeSitter::Language
|
|
16
39
|
# - Rust backend receives String (language name)
|
|
17
40
|
# - FFI backend receives wrapped Language (needs to_ptr)
|
|
41
|
+
# - Citrus backend receives grammar module
|
|
42
|
+
# - Parslet backend receives grammar class
|
|
18
43
|
#
|
|
19
44
|
# **Tree objects:**
|
|
20
45
|
# - parse() receives raw source, backend returns raw tree, Parser wraps it
|
|
@@ -33,16 +58,32 @@ module TreeHaver
|
|
|
33
58
|
# parser = TreeHaver::Parser.new
|
|
34
59
|
# parser.language = TreeHaver::Language.toml
|
|
35
60
|
# tree = parser.parse("[package]\nname = \"foo\"")
|
|
36
|
-
|
|
61
|
+
#
|
|
62
|
+
# @example Explicit backend selection
|
|
63
|
+
# parser = TreeHaver::Parser.new(backend: :citrus)
|
|
64
|
+
# parser.language = TreeHaver::Language.toml
|
|
65
|
+
# tree = parser.parse(toml_source)
|
|
66
|
+
#
|
|
67
|
+
# @see Base::Parser The base class defining the parser interface
|
|
68
|
+
# @see Backends::Citrus::Parser Citrus backend implementation
|
|
69
|
+
# @see Backends::Parslet::Parser Parslet backend implementation
|
|
70
|
+
# @see Backends::Prism::Parser Prism backend implementation
|
|
71
|
+
class Parser < Base::Parser
|
|
37
72
|
# Create a new parser instance
|
|
38
73
|
#
|
|
74
|
+
# The parser automatically selects the best available backend unless
|
|
75
|
+
# explicitly specified. Use the `backend:` parameter to force a specific backend.
|
|
76
|
+
#
|
|
39
77
|
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
78
|
+
# Valid values: :auto, :mri, :rust, :ffi, :java, :citrus, :parslet, :prism, :psych
|
|
40
79
|
# @raise [NotAvailable] if no backend is available or requested backend is unavailable
|
|
41
|
-
# @example Default (
|
|
80
|
+
# @example Default (auto-selects best available backend)
|
|
42
81
|
# parser = TreeHaver::Parser.new
|
|
43
82
|
# @example Explicit backend
|
|
44
|
-
# parser = TreeHaver::Parser.new(backend: :
|
|
83
|
+
# parser = TreeHaver::Parser.new(backend: :citrus)
|
|
45
84
|
def initialize(backend: nil)
|
|
85
|
+
super() # Initialize @language from Base::Parser
|
|
86
|
+
|
|
46
87
|
# Convert string backend names to symbols for consistency
|
|
47
88
|
backend = backend.to_sym if backend.is_a?(String)
|
|
48
89
|
|
|
@@ -56,7 +97,7 @@ module TreeHaver
|
|
|
56
97
|
end
|
|
57
98
|
end
|
|
58
99
|
|
|
59
|
-
# Try to create the parser, with fallback to
|
|
100
|
+
# Try to create the parser, with fallback to pure Ruby if tree-sitter fails
|
|
60
101
|
# This enables auto-fallback when tree-sitter runtime isn't available
|
|
61
102
|
begin
|
|
62
103
|
@impl = mod::Parser.new
|
|
@@ -67,7 +108,7 @@ module TreeHaver
|
|
|
67
108
|
end
|
|
68
109
|
end
|
|
69
110
|
|
|
70
|
-
# Handle parser creation failure with optional Citrus fallback
|
|
111
|
+
# Handle parser creation failure with optional Citrus/Parslet fallback
|
|
71
112
|
#
|
|
72
113
|
# @param error [Exception] the error that caused parser creation to fail
|
|
73
114
|
# @param backend [Symbol, nil] the requested backend
|
|
@@ -75,15 +116,18 @@ module TreeHaver
|
|
|
75
116
|
# @api private
|
|
76
117
|
def handle_parser_creation_failure(error, backend)
|
|
77
118
|
# Tree-sitter backend failed (likely missing runtime library)
|
|
78
|
-
# Try Citrus as fallback if we weren't explicitly asked for a specific backend
|
|
119
|
+
# Try Citrus or Parslet as fallback if we weren't explicitly asked for a specific backend
|
|
79
120
|
if backend.nil? || backend == :auto
|
|
80
121
|
if Backends::Citrus.available?
|
|
81
122
|
@impl = Backends::Citrus::Parser.new
|
|
82
123
|
@explicit_backend = :citrus
|
|
124
|
+
elsif Backends::Parslet.available?
|
|
125
|
+
@impl = Backends::Parslet::Parser.new
|
|
126
|
+
@explicit_backend = :parslet
|
|
83
127
|
else
|
|
84
128
|
# No fallback available, re-raise original error
|
|
85
129
|
raise NotAvailable, "Tree-sitter backend failed: #{error.message}. " \
|
|
86
|
-
"Citrus fallback not available. Install tree-sitter runtime or
|
|
130
|
+
"Citrus/Parslet fallback not available. Install tree-sitter runtime, citrus gem, or parslet gem."
|
|
87
131
|
end
|
|
88
132
|
else
|
|
89
133
|
# Explicit backend was requested, don't fallback
|
|
@@ -95,7 +139,7 @@ module TreeHaver
|
|
|
95
139
|
#
|
|
96
140
|
# Returns the actual backend in use, resolving :auto to the concrete backend.
|
|
97
141
|
#
|
|
98
|
-
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :
|
|
142
|
+
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, :citrus, or :parslet)
|
|
99
143
|
def backend
|
|
100
144
|
if @explicit_backend && @explicit_backend != :auto
|
|
101
145
|
@explicit_backend
|
|
@@ -112,6 +156,8 @@ module TreeHaver
|
|
|
112
156
|
:java
|
|
113
157
|
when /Citrus/
|
|
114
158
|
:citrus
|
|
159
|
+
when /Parslet/
|
|
160
|
+
:parslet
|
|
115
161
|
else
|
|
116
162
|
# Fallback to effective_backend if we can't determine from class name
|
|
117
163
|
TreeHaver.effective_backend
|
|
@@ -121,28 +167,27 @@ module TreeHaver
|
|
|
121
167
|
|
|
122
168
|
# Set the language grammar for this parser
|
|
123
169
|
#
|
|
170
|
+
# The language must be compatible with the parser's backend. If a mismatch
|
|
171
|
+
# is detected (e.g., Citrus language on tree-sitter parser), the parser
|
|
172
|
+
# will automatically switch to the correct backend.
|
|
173
|
+
#
|
|
124
174
|
# @param lang [Language] the language to use for parsing
|
|
125
175
|
# @return [Language] the language that was set
|
|
126
176
|
# @example
|
|
127
177
|
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
128
178
|
def language=(lang)
|
|
129
|
-
#
|
|
130
|
-
# This
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
134
|
-
# Switch to Citrus parser to match the Citrus language
|
|
135
|
-
@impl = Backends::Citrus::Parser.new
|
|
136
|
-
@explicit_backend = :citrus
|
|
137
|
-
end
|
|
138
|
-
end
|
|
179
|
+
# Auto-switch backend if language type doesn't match current parser
|
|
180
|
+
# This handles the case where Language.toml returns a Citrus/Parslet language
|
|
181
|
+
# but the parser was initialized with a tree-sitter backend
|
|
182
|
+
switch_backend_for_language(lang)
|
|
139
183
|
|
|
140
184
|
# Unwrap the language before passing to backend
|
|
141
185
|
# Backends receive raw language objects, never TreeHaver wrappers
|
|
142
186
|
inner_lang = unwrap_language(lang)
|
|
143
187
|
@impl.language = inner_lang
|
|
144
|
-
|
|
145
|
-
|
|
188
|
+
|
|
189
|
+
# Store on base class for API compatibility
|
|
190
|
+
@language = lang
|
|
146
191
|
end
|
|
147
192
|
|
|
148
193
|
# Parse source code into a syntax tree
|
|
@@ -218,6 +263,51 @@ module TreeHaver
|
|
|
218
263
|
|
|
219
264
|
private
|
|
220
265
|
|
|
266
|
+
# Switch backend if language type doesn't match current parser
|
|
267
|
+
#
|
|
268
|
+
# This is necessary because TreeHaver.parser_for may return a Language
|
|
269
|
+
# from a different backend than the Parser was initialized with.
|
|
270
|
+
# For example, Language.toml might return a Citrus::Language when
|
|
271
|
+
# tree-sitter-toml is not available, but Parser was initialized with :auto.
|
|
272
|
+
#
|
|
273
|
+
# @param lang [Object] The language object
|
|
274
|
+
# @api private
|
|
275
|
+
def switch_backend_for_language(lang)
|
|
276
|
+
return unless lang.respond_to?(:backend)
|
|
277
|
+
|
|
278
|
+
lang_backend = lang.backend
|
|
279
|
+
parser_backend = backend
|
|
280
|
+
|
|
281
|
+
# No switch needed if backends match
|
|
282
|
+
return if lang_backend == parser_backend
|
|
283
|
+
|
|
284
|
+
# Switch to matching backend parser
|
|
285
|
+
case lang_backend
|
|
286
|
+
when :citrus
|
|
287
|
+
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
288
|
+
@impl = Backends::Citrus::Parser.new
|
|
289
|
+
@explicit_backend = :citrus
|
|
290
|
+
end
|
|
291
|
+
when :parslet
|
|
292
|
+
unless @impl.is_a?(Backends::Parslet::Parser)
|
|
293
|
+
@impl = Backends::Parslet::Parser.new
|
|
294
|
+
@explicit_backend = :parslet
|
|
295
|
+
end
|
|
296
|
+
when :prism
|
|
297
|
+
unless @impl.is_a?(Backends::Prism::Parser)
|
|
298
|
+
@impl = Backends::Prism::Parser.new
|
|
299
|
+
@explicit_backend = :prism
|
|
300
|
+
end
|
|
301
|
+
when :psych
|
|
302
|
+
unless @impl.is_a?(Backends::Psych::Parser)
|
|
303
|
+
@impl = Backends::Psych::Parser.new
|
|
304
|
+
@explicit_backend = :psych
|
|
305
|
+
end
|
|
306
|
+
# Tree-sitter backends (:mri, :rust, :ffi, :java) - don't auto-switch between them
|
|
307
|
+
# as that would require reloading the language from the .so file
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
221
311
|
# Unwrap a language object to extract the raw backend language
|
|
222
312
|
#
|
|
223
313
|
# This method is smart about backend compatibility:
|
|
@@ -274,24 +364,26 @@ module TreeHaver
|
|
|
274
364
|
|
|
275
365
|
case lang.backend
|
|
276
366
|
when :mri
|
|
277
|
-
|
|
278
|
-
|
|
367
|
+
lang.to_language if lang.respond_to?(:to_language)
|
|
368
|
+
lang.inner_language if lang.respond_to?(:inner_language)
|
|
279
369
|
when :rust
|
|
280
|
-
|
|
370
|
+
lang.name if lang.respond_to?(:name)
|
|
281
371
|
when :ffi
|
|
282
|
-
|
|
372
|
+
lang # FFI needs wrapper for to_ptr
|
|
283
373
|
when :java
|
|
284
|
-
|
|
374
|
+
lang.impl if lang.respond_to?(:impl)
|
|
285
375
|
when :citrus
|
|
286
|
-
|
|
376
|
+
lang # Citrus backend accepts Language wrapper (handles both)
|
|
377
|
+
when :parslet
|
|
378
|
+
lang # Parslet backend accepts Language wrapper (handles both)
|
|
287
379
|
when :prism
|
|
288
|
-
|
|
380
|
+
lang # Prism backend expects the Language wrapper
|
|
289
381
|
when :psych
|
|
290
|
-
|
|
382
|
+
lang # Psych backend expects the Language wrapper
|
|
291
383
|
when :commonmarker
|
|
292
|
-
|
|
384
|
+
lang # Commonmarker backend expects the Language wrapper
|
|
293
385
|
when :markly
|
|
294
|
-
|
|
386
|
+
lang # Markly backend expects the Language wrapper
|
|
295
387
|
else
|
|
296
388
|
# Unknown backend (e.g., test backend)
|
|
297
389
|
# Try generic unwrapping methods for flexibility in testing
|
|
@@ -299,15 +391,13 @@ module TreeHaver
|
|
|
299
391
|
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
300
392
|
return lang.impl if lang.respond_to?(:impl)
|
|
301
393
|
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
394
|
+
return lang.grammar_class if lang.respond_to?(:grammar_class)
|
|
302
395
|
return lang.name if lang.respond_to?(:name)
|
|
303
396
|
|
|
304
397
|
# If nothing works, pass through as-is
|
|
305
398
|
# This allows test languages to be passed directly
|
|
306
|
-
|
|
399
|
+
lang
|
|
307
400
|
end
|
|
308
|
-
|
|
309
|
-
# Shouldn't reach here, but just in case
|
|
310
|
-
lang
|
|
311
401
|
end
|
|
312
402
|
|
|
313
403
|
# Try to reload a language for the current backend
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Utility for finding and registering Parslet grammar gems.
|
|
5
|
+
#
|
|
6
|
+
# ParsletGrammarFinder provides language-agnostic discovery of Parslet grammar
|
|
7
|
+
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
+
# grammar and register it with tree_haver.
|
|
9
|
+
#
|
|
10
|
+
# Unlike tree-sitter grammars (which are .so files), Parslet grammars are
|
|
11
|
+
# Ruby classes that inherit from Parslet::Parser. This class handles the
|
|
12
|
+
# discovery and registration of these grammars.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage with toml gem
|
|
15
|
+
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
16
|
+
# language: :toml,
|
|
17
|
+
# gem_name: "toml",
|
|
18
|
+
# grammar_const: "TOML::Parslet"
|
|
19
|
+
# )
|
|
20
|
+
# finder.register! if finder.available?
|
|
21
|
+
#
|
|
22
|
+
# @example With custom require path
|
|
23
|
+
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
24
|
+
# language: :json,
|
|
25
|
+
# gem_name: "json-parslet",
|
|
26
|
+
# grammar_const: "JsonParslet::Grammar",
|
|
27
|
+
# require_path: "json/parslet"
|
|
28
|
+
# )
|
|
29
|
+
#
|
|
30
|
+
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
+
# @see CitrusGrammarFinder For Citrus grammar discovery
|
|
32
|
+
class ParsletGrammarFinder
|
|
33
|
+
# @return [Symbol] the language identifier
|
|
34
|
+
attr_reader :language_name
|
|
35
|
+
|
|
36
|
+
# @return [String] the gem name to require
|
|
37
|
+
attr_reader :gem_name
|
|
38
|
+
|
|
39
|
+
# @return [String] the constant path to the grammar class (e.g., "TOML::Parslet")
|
|
40
|
+
attr_reader :grammar_const
|
|
41
|
+
|
|
42
|
+
# @return [String, nil] custom require path (defaults to gem_name)
|
|
43
|
+
attr_reader :require_path
|
|
44
|
+
|
|
45
|
+
# Initialize a Parslet grammar finder
|
|
46
|
+
#
|
|
47
|
+
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
48
|
+
# @param gem_name [String] the gem name (e.g., "toml")
|
|
49
|
+
# @param grammar_const [String] constant path to grammar class (e.g., "TOML::Parslet")
|
|
50
|
+
# @param require_path [String, nil] custom require path (defaults to gem_name as-is)
|
|
51
|
+
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
52
|
+
@language_name = language.to_sym
|
|
53
|
+
@gem_name = gem_name
|
|
54
|
+
@grammar_const = grammar_const
|
|
55
|
+
@require_path = require_path || gem_name
|
|
56
|
+
@load_attempted = false
|
|
57
|
+
@available = false
|
|
58
|
+
@grammar_class = nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Check if the Parslet grammar is available
|
|
62
|
+
#
|
|
63
|
+
# Attempts to require the gem and resolve the grammar constant.
|
|
64
|
+
# Result is cached after first call.
|
|
65
|
+
#
|
|
66
|
+
# @return [Boolean] true if grammar is available
|
|
67
|
+
def available?
|
|
68
|
+
return @available if @load_attempted
|
|
69
|
+
|
|
70
|
+
@load_attempted = true
|
|
71
|
+
debug = ENV["TREE_HAVER_DEBUG"]
|
|
72
|
+
|
|
73
|
+
# Guard against nil require_path (can happen if gem_name was nil)
|
|
74
|
+
if @require_path.nil? || @require_path.empty?
|
|
75
|
+
warn("ParsletGrammarFinder: require_path is nil or empty for #{@language_name}") if debug
|
|
76
|
+
@available = false
|
|
77
|
+
return false
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
begin
|
|
81
|
+
# Try to require the gem
|
|
82
|
+
require @require_path
|
|
83
|
+
|
|
84
|
+
# Try to resolve the constant
|
|
85
|
+
@grammar_class = resolve_constant(@grammar_const)
|
|
86
|
+
|
|
87
|
+
# Verify it can create a parser instance with a parse method
|
|
88
|
+
unless valid_grammar_class?(@grammar_class)
|
|
89
|
+
if debug
|
|
90
|
+
warn("ParsletGrammarFinder: #{@grammar_const} is not a valid Parslet grammar class")
|
|
91
|
+
warn("ParsletGrammarFinder: #{@grammar_const}.class = #{@grammar_class.class}")
|
|
92
|
+
end
|
|
93
|
+
@available = false
|
|
94
|
+
return false
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
@available = true
|
|
98
|
+
rescue LoadError => e
|
|
99
|
+
# :nocov: defensive - requires gem to not be installed
|
|
100
|
+
if debug
|
|
101
|
+
warn("ParsletGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
102
|
+
warn("ParsletGrammarFinder: LoadError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
103
|
+
end
|
|
104
|
+
@available = false
|
|
105
|
+
# :nocov:
|
|
106
|
+
rescue NameError => e
|
|
107
|
+
# :nocov: defensive - requires gem with missing constant
|
|
108
|
+
if debug
|
|
109
|
+
warn("ParsletGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
110
|
+
warn("ParsletGrammarFinder: NameError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
111
|
+
end
|
|
112
|
+
@available = false
|
|
113
|
+
# :nocov:
|
|
114
|
+
rescue TypeError => e
|
|
115
|
+
# :nocov: defensive - TruffleRuby-specific edge case
|
|
116
|
+
warn("ParsletGrammarFinder: TypeError during load of '#{@require_path}': #{e.class}: #{e.message}")
|
|
117
|
+
warn("ParsletGrammarFinder: This may be a TruffleRuby bundled_gems.rb issue")
|
|
118
|
+
if debug
|
|
119
|
+
warn("ParsletGrammarFinder: TypeError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
120
|
+
end
|
|
121
|
+
@available = false
|
|
122
|
+
# :nocov:
|
|
123
|
+
rescue => e
|
|
124
|
+
# :nocov: defensive - catch-all for unexpected errors
|
|
125
|
+
warn("ParsletGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
126
|
+
if debug
|
|
127
|
+
warn("ParsletGrammarFinder: backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
128
|
+
end
|
|
129
|
+
@available = false
|
|
130
|
+
# :nocov:
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
@available
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Get the resolved grammar class
|
|
137
|
+
#
|
|
138
|
+
# @return [Class, nil] the grammar class if available
|
|
139
|
+
def grammar_class
|
|
140
|
+
available? # Ensure we've tried to load
|
|
141
|
+
@grammar_class
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Register this Parslet grammar with TreeHaver
|
|
145
|
+
#
|
|
146
|
+
# After registration, the language can be used via:
|
|
147
|
+
# TreeHaver::Language.{language_name}
|
|
148
|
+
#
|
|
149
|
+
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
150
|
+
# @return [Boolean] true if registration succeeded
|
|
151
|
+
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
152
|
+
def register!(raise_on_missing: false)
|
|
153
|
+
unless available?
|
|
154
|
+
if raise_on_missing
|
|
155
|
+
raise NotAvailable, not_found_message
|
|
156
|
+
end
|
|
157
|
+
return false
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
TreeHaver.register_language(
|
|
161
|
+
@language_name,
|
|
162
|
+
grammar_class: @grammar_class,
|
|
163
|
+
gem_name: @gem_name,
|
|
164
|
+
)
|
|
165
|
+
true
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Get debug information about the search
|
|
169
|
+
#
|
|
170
|
+
# @return [Hash] diagnostic information
|
|
171
|
+
def search_info
|
|
172
|
+
{
|
|
173
|
+
language: @language_name,
|
|
174
|
+
gem_name: @gem_name,
|
|
175
|
+
grammar_const: @grammar_const,
|
|
176
|
+
require_path: @require_path,
|
|
177
|
+
available: available?,
|
|
178
|
+
grammar_class: @grammar_class&.name,
|
|
179
|
+
}
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Get a human-readable error message when grammar is not found
|
|
183
|
+
#
|
|
184
|
+
# @return [String] error message with installation hints
|
|
185
|
+
def not_found_message
|
|
186
|
+
"Parslet grammar for #{@language_name} not found. " \
|
|
187
|
+
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
private
|
|
191
|
+
|
|
192
|
+
# Resolve a constant path like "TOML::Parslet"
|
|
193
|
+
#
|
|
194
|
+
# @param const_path [String] constant path
|
|
195
|
+
# @return [Object] the constant
|
|
196
|
+
# @raise [NameError] if constant not found
|
|
197
|
+
def resolve_constant(const_path)
|
|
198
|
+
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
199
|
+
mod.const_get(const_name)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Check if the class is a valid Parslet grammar
|
|
204
|
+
#
|
|
205
|
+
# @param klass [Class] the class to check
|
|
206
|
+
# @return [Boolean] true if valid
|
|
207
|
+
def valid_grammar_class?(klass)
|
|
208
|
+
return false unless klass.respond_to?(:new)
|
|
209
|
+
|
|
210
|
+
# Check if it's a Parslet::Parser subclass
|
|
211
|
+
if defined?(::Parslet::Parser)
|
|
212
|
+
return true if klass < ::Parslet::Parser
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Fallback: check if it can create an instance that responds to parse
|
|
216
|
+
begin
|
|
217
|
+
instance = klass.new
|
|
218
|
+
instance.respond_to?(:parse)
|
|
219
|
+
rescue StandardError
|
|
220
|
+
false
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|