tree_haver 4.0.4 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +193 -2
- data/README.md +497 -356
- data/lib/tree_haver/backends/citrus.rb +98 -114
- data/lib/tree_haver/backends/ffi.rb +257 -14
- data/lib/tree_haver/backends/java.rb +99 -14
- data/lib/tree_haver/backends/mri.rb +25 -1
- data/lib/tree_haver/backends/parslet.rb +560 -0
- data/lib/tree_haver/backends/prism.rb +1 -1
- data/lib/tree_haver/backends/psych.rb +1 -1
- data/lib/tree_haver/backends/rust.rb +1 -1
- data/lib/tree_haver/base/node.rb +8 -1
- data/lib/tree_haver/grammar_finder.rb +1 -3
- data/lib/tree_haver/language.rb +46 -21
- data/lib/tree_haver/parser.rb +129 -45
- data/lib/tree_haver/parslet_grammar_finder.rb +224 -0
- data/lib/tree_haver/point.rb +6 -44
- data/lib/tree_haver/rspec/dependency_tags.rb +115 -19
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +100 -13
- data.tar.gz.sig +0 -0
- metadata +15 -14
- metadata.gz.sig +0 -0
data/lib/tree_haver/language.rb
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
#
|
|
4
|
+
# Factory module for loading language grammars
|
|
5
5
|
#
|
|
6
6
|
# Language is the entry point for loading and using grammars. It provides
|
|
7
|
-
# a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus).
|
|
7
|
+
# a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus, Parslet).
|
|
8
|
+
#
|
|
9
|
+
# This is a module with only module methods (factory pattern), not a class.
|
|
10
|
+
# Backend-specific Language classes (e.g., Backends::Citrus::Language,
|
|
11
|
+
# Backends::Parslet::Language) inherit from Base::Language.
|
|
8
12
|
#
|
|
9
13
|
# For tree-sitter backends, languages are loaded from shared library files (.so/.dylib/.dll).
|
|
10
|
-
# For pure-Ruby backends (Citrus, Prism, Psych), languages are built-in or provided by gems.
|
|
14
|
+
# For pure-Ruby backends (Citrus, Parslet, Prism, Psych), languages are built-in or provided by gems.
|
|
11
15
|
#
|
|
12
16
|
# == Loading Languages
|
|
13
17
|
#
|
|
@@ -30,7 +34,9 @@ module TreeHaver
|
|
|
30
34
|
# @example Register and load a language
|
|
31
35
|
# TreeHaver.register_language(:toml, path: "/path/to/grammar.so")
|
|
32
36
|
# language = TreeHaver::Language.toml
|
|
33
|
-
|
|
37
|
+
#
|
|
38
|
+
# @see Base::Language The base class that backend Language classes inherit from
|
|
39
|
+
module Language
|
|
34
40
|
class << self
|
|
35
41
|
# Load a language grammar from a shared library (ruby_tree_sitter compatibility)
|
|
36
42
|
#
|
|
@@ -157,6 +163,8 @@ module TreeHaver
|
|
|
157
163
|
# Determine which backend type to use
|
|
158
164
|
backend_type = if current_backend == Backends::Citrus
|
|
159
165
|
:citrus
|
|
166
|
+
elsif current_backend == Backends::Parslet
|
|
167
|
+
:parslet
|
|
160
168
|
else
|
|
161
169
|
:tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
|
|
162
170
|
end
|
|
@@ -177,6 +185,19 @@ module TreeHaver
|
|
|
177
185
|
"Registered backends: #{all_backends.keys.inspect}"
|
|
178
186
|
end
|
|
179
187
|
|
|
188
|
+
# If Parslet backend is active
|
|
189
|
+
if backend_type == :parslet
|
|
190
|
+
if reg && reg[:grammar_class]
|
|
191
|
+
return Backends::Parslet::Language.new(reg[:grammar_class])
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Fall back to error if no Parslet grammar registered
|
|
195
|
+
raise NotAvailable,
|
|
196
|
+
"Parslet backend is active but no Parslet grammar registered for :#{method_name}. " \
|
|
197
|
+
"Either register a Parslet grammar or use a tree-sitter backend. " \
|
|
198
|
+
"Registered backends: #{all_backends.keys.inspect}"
|
|
199
|
+
end
|
|
200
|
+
|
|
180
201
|
# For tree-sitter backends, try to load from path
|
|
181
202
|
# If that fails, fall back to Citrus if available
|
|
182
203
|
if reg && reg[:path]
|
|
@@ -198,26 +219,25 @@ module TreeHaver
|
|
|
198
219
|
return from_library(path, symbol: symbol, name: name)
|
|
199
220
|
rescue NotAvailable, ArgumentError, LoadError => e
|
|
200
221
|
# Tree-sitter failed to load - check for Citrus fallback
|
|
222
|
+
# Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
|
|
201
223
|
handle_tree_sitter_load_failure(e, all_backends)
|
|
202
|
-
rescue => e
|
|
203
|
-
# Also catch FFI::NotFoundError if FFI is loaded (can't reference directly as FFI may not exist)
|
|
204
|
-
if defined?(::FFI::NotFoundError) && e.is_a?(::FFI::NotFoundError)
|
|
205
|
-
handle_tree_sitter_load_failure(e, all_backends)
|
|
206
|
-
else
|
|
207
|
-
raise
|
|
208
|
-
end
|
|
209
224
|
end
|
|
210
225
|
end
|
|
211
226
|
|
|
212
|
-
# No tree-sitter path registered - check for Citrus fallback
|
|
227
|
+
# No tree-sitter path registered - check for Citrus or Parslet fallback
|
|
213
228
|
# This enables auto-fallback when tree-sitter grammar is not installed
|
|
214
|
-
# but a
|
|
229
|
+
# but a pure Ruby grammar (Citrus or Parslet) is available.
|
|
215
230
|
# Only fall back when backend is :auto - explicit native backend requests should fail.
|
|
216
231
|
if TreeHaver.effective_backend == :auto
|
|
217
232
|
citrus_reg = all_backends[:citrus]
|
|
218
233
|
if citrus_reg && citrus_reg[:grammar_module]
|
|
219
234
|
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
220
235
|
end
|
|
236
|
+
|
|
237
|
+
parslet_reg = all_backends[:parslet]
|
|
238
|
+
if parslet_reg && parslet_reg[:grammar_class]
|
|
239
|
+
return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
|
|
240
|
+
end
|
|
221
241
|
end
|
|
222
242
|
|
|
223
243
|
# No appropriate registration found
|
|
@@ -233,27 +253,27 @@ module TreeHaver
|
|
|
233
253
|
|
|
234
254
|
private
|
|
235
255
|
|
|
236
|
-
# Handle tree-sitter load failure with optional Citrus fallback
|
|
256
|
+
# Handle tree-sitter load failure with optional Citrus/Parslet fallback
|
|
237
257
|
#
|
|
238
258
|
# This handles cases where:
|
|
239
259
|
# - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
|
|
240
|
-
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
|
|
260
|
+
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError inherits from LoadError)
|
|
241
261
|
# - Invalid arguments were provided (ArgumentError)
|
|
242
262
|
#
|
|
243
|
-
# Fallback to Citrus ONLY happens when:
|
|
263
|
+
# Fallback to Citrus/Parslet ONLY happens when:
|
|
244
264
|
# - The effective backend is :auto (user didn't explicitly request a native backend)
|
|
245
|
-
# - A Citrus grammar is registered for the language
|
|
265
|
+
# - A Citrus or Parslet grammar is registered for the language
|
|
246
266
|
#
|
|
247
267
|
# If the user explicitly requested a native backend (:mri, :rust, :ffi, :java),
|
|
248
|
-
# we should NOT silently fall back to
|
|
268
|
+
# we should NOT silently fall back to pure Ruby - that would violate the user's intent.
|
|
249
269
|
#
|
|
250
270
|
# @param error [Exception] the original error
|
|
251
271
|
# @param all_backends [Hash] all registered backends for the language
|
|
252
|
-
# @return [Backends::Citrus::Language] if
|
|
272
|
+
# @return [Backends::Citrus::Language, Backends::Parslet::Language] if fallback available and allowed
|
|
253
273
|
# @raise [Exception] re-raises original error if no fallback or fallback not allowed
|
|
254
274
|
# @api private
|
|
255
275
|
def handle_tree_sitter_load_failure(error, all_backends)
|
|
256
|
-
# Only fall back to
|
|
276
|
+
# Only fall back to pure Ruby when backend is :auto
|
|
257
277
|
# If user explicitly requested a native backend, respect that choice
|
|
258
278
|
effective = TreeHaver.effective_backend
|
|
259
279
|
if effective == :auto
|
|
@@ -261,8 +281,13 @@ module TreeHaver
|
|
|
261
281
|
if citrus_reg && citrus_reg[:grammar_module]
|
|
262
282
|
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
263
283
|
end
|
|
284
|
+
|
|
285
|
+
parslet_reg = all_backends[:parslet]
|
|
286
|
+
if parslet_reg && parslet_reg[:grammar_class]
|
|
287
|
+
return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
|
|
288
|
+
end
|
|
264
289
|
end
|
|
265
|
-
# No
|
|
290
|
+
# No pure Ruby fallback allowed or available, re-raise the original error
|
|
266
291
|
raise error
|
|
267
292
|
end
|
|
268
293
|
end
|
data/lib/tree_haver/parser.rb
CHANGED
|
@@ -1,20 +1,45 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
#
|
|
4
|
+
# Unified Parser facade providing a consistent API across all backends
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
6
|
+
# This class acts as a facade/adapter that delegates to backend-specific
|
|
7
|
+
# parser implementations. It automatically selects the appropriate backend
|
|
8
|
+
# and provides a unified interface regardless of which parser is being used.
|
|
9
|
+
#
|
|
10
|
+
# == Backend Selection
|
|
11
|
+
#
|
|
12
|
+
# The parser automatically selects a backend based on:
|
|
13
|
+
# 1. Explicit `backend:` parameter in constructor
|
|
14
|
+
# 2. `TreeHaver.backend` global setting
|
|
15
|
+
# 3. `TREE_HAVER_BACKEND` environment variable
|
|
16
|
+
# 4. Auto-detection (tries available backends in order)
|
|
17
|
+
#
|
|
18
|
+
# == Supported Backends
|
|
19
|
+
#
|
|
20
|
+
# **Tree-sitter backends** (native, high-performance):
|
|
21
|
+
# - `:mri` - ruby_tree_sitter gem (C extension, MRI only)
|
|
22
|
+
# - `:rust` - tree_stump gem (Rust via magnus, MRI only)
|
|
23
|
+
# - `:ffi` - FFI bindings to libtree-sitter (MRI, JRuby)
|
|
24
|
+
# - `:java` - java-tree-sitter (JRuby only)
|
|
25
|
+
#
|
|
26
|
+
# **Pure Ruby backends** (portable, no native dependencies):
|
|
27
|
+
# - `:citrus` - Citrus PEG parser (e.g., toml-rb)
|
|
28
|
+
# - `:parslet` - Parslet PEG parser (e.g., toml gem)
|
|
29
|
+
# - `:prism` - Ruby's official parser (Ruby only)
|
|
30
|
+
# - `:psych` - YAML parser (stdlib)
|
|
8
31
|
#
|
|
9
32
|
# == Wrapping/Unwrapping Responsibility
|
|
10
33
|
#
|
|
11
|
-
# TreeHaver::Parser
|
|
34
|
+
# TreeHaver::Parser handles ALL object wrapping and unwrapping:
|
|
12
35
|
#
|
|
13
36
|
# **Language objects:**
|
|
14
37
|
# - Unwraps Language wrappers before passing to backend.language=
|
|
15
38
|
# - MRI backend receives ::TreeSitter::Language
|
|
16
39
|
# - Rust backend receives String (language name)
|
|
17
40
|
# - FFI backend receives wrapped Language (needs to_ptr)
|
|
41
|
+
# - Citrus backend receives grammar module
|
|
42
|
+
# - Parslet backend receives grammar class
|
|
18
43
|
#
|
|
19
44
|
# **Tree objects:**
|
|
20
45
|
# - parse() receives raw source, backend returns raw tree, Parser wraps it
|
|
@@ -33,16 +58,32 @@ module TreeHaver
|
|
|
33
58
|
# parser = TreeHaver::Parser.new
|
|
34
59
|
# parser.language = TreeHaver::Language.toml
|
|
35
60
|
# tree = parser.parse("[package]\nname = \"foo\"")
|
|
36
|
-
|
|
61
|
+
#
|
|
62
|
+
# @example Explicit backend selection
|
|
63
|
+
# parser = TreeHaver::Parser.new(backend: :citrus)
|
|
64
|
+
# parser.language = TreeHaver::Language.toml
|
|
65
|
+
# tree = parser.parse(toml_source)
|
|
66
|
+
#
|
|
67
|
+
# @see Base::Parser The base class defining the parser interface
|
|
68
|
+
# @see Backends::Citrus::Parser Citrus backend implementation
|
|
69
|
+
# @see Backends::Parslet::Parser Parslet backend implementation
|
|
70
|
+
# @see Backends::Prism::Parser Prism backend implementation
|
|
71
|
+
class Parser < Base::Parser
|
|
37
72
|
# Create a new parser instance
|
|
38
73
|
#
|
|
74
|
+
# The parser automatically selects the best available backend unless
|
|
75
|
+
# explicitly specified. Use the `backend:` parameter to force a specific backend.
|
|
76
|
+
#
|
|
39
77
|
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
78
|
+
# Valid values: :auto, :mri, :rust, :ffi, :java, :citrus, :parslet, :prism, :psych
|
|
40
79
|
# @raise [NotAvailable] if no backend is available or requested backend is unavailable
|
|
41
|
-
# @example Default (
|
|
80
|
+
# @example Default (auto-selects best available backend)
|
|
42
81
|
# parser = TreeHaver::Parser.new
|
|
43
82
|
# @example Explicit backend
|
|
44
|
-
# parser = TreeHaver::Parser.new(backend: :
|
|
83
|
+
# parser = TreeHaver::Parser.new(backend: :citrus)
|
|
45
84
|
def initialize(backend: nil)
|
|
85
|
+
super() # Initialize @language from Base::Parser
|
|
86
|
+
|
|
46
87
|
# Convert string backend names to symbols for consistency
|
|
47
88
|
backend = backend.to_sym if backend.is_a?(String)
|
|
48
89
|
|
|
@@ -56,24 +97,18 @@ module TreeHaver
|
|
|
56
97
|
end
|
|
57
98
|
end
|
|
58
99
|
|
|
59
|
-
# Try to create the parser, with fallback to
|
|
100
|
+
# Try to create the parser, with fallback to pure Ruby if tree-sitter fails
|
|
60
101
|
# This enables auto-fallback when tree-sitter runtime isn't available
|
|
61
102
|
begin
|
|
62
103
|
@impl = mod::Parser.new
|
|
63
104
|
@explicit_backend = backend # Remember for introspection (always a Symbol or nil)
|
|
64
105
|
rescue NoMethodError, LoadError => e
|
|
106
|
+
# Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
|
|
65
107
|
handle_parser_creation_failure(e, backend)
|
|
66
|
-
rescue => e
|
|
67
|
-
# Also catch FFI::NotFoundError if FFI is loaded (can't reference directly as FFI may not exist)
|
|
68
|
-
if defined?(::FFI::NotFoundError) && e.is_a?(::FFI::NotFoundError)
|
|
69
|
-
handle_parser_creation_failure(e, backend)
|
|
70
|
-
else
|
|
71
|
-
raise
|
|
72
|
-
end
|
|
73
108
|
end
|
|
74
109
|
end
|
|
75
110
|
|
|
76
|
-
# Handle parser creation failure with optional Citrus fallback
|
|
111
|
+
# Handle parser creation failure with optional Citrus/Parslet fallback
|
|
77
112
|
#
|
|
78
113
|
# @param error [Exception] the error that caused parser creation to fail
|
|
79
114
|
# @param backend [Symbol, nil] the requested backend
|
|
@@ -81,15 +116,18 @@ module TreeHaver
|
|
|
81
116
|
# @api private
|
|
82
117
|
def handle_parser_creation_failure(error, backend)
|
|
83
118
|
# Tree-sitter backend failed (likely missing runtime library)
|
|
84
|
-
# Try Citrus as fallback if we weren't explicitly asked for a specific backend
|
|
119
|
+
# Try Citrus or Parslet as fallback if we weren't explicitly asked for a specific backend
|
|
85
120
|
if backend.nil? || backend == :auto
|
|
86
121
|
if Backends::Citrus.available?
|
|
87
122
|
@impl = Backends::Citrus::Parser.new
|
|
88
123
|
@explicit_backend = :citrus
|
|
124
|
+
elsif Backends::Parslet.available?
|
|
125
|
+
@impl = Backends::Parslet::Parser.new
|
|
126
|
+
@explicit_backend = :parslet
|
|
89
127
|
else
|
|
90
128
|
# No fallback available, re-raise original error
|
|
91
129
|
raise NotAvailable, "Tree-sitter backend failed: #{error.message}. " \
|
|
92
|
-
"Citrus fallback not available. Install tree-sitter runtime or
|
|
130
|
+
"Citrus/Parslet fallback not available. Install tree-sitter runtime, citrus gem, or parslet gem."
|
|
93
131
|
end
|
|
94
132
|
else
|
|
95
133
|
# Explicit backend was requested, don't fallback
|
|
@@ -101,7 +139,7 @@ module TreeHaver
|
|
|
101
139
|
#
|
|
102
140
|
# Returns the actual backend in use, resolving :auto to the concrete backend.
|
|
103
141
|
#
|
|
104
|
-
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :
|
|
142
|
+
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, :citrus, or :parslet)
|
|
105
143
|
def backend
|
|
106
144
|
if @explicit_backend && @explicit_backend != :auto
|
|
107
145
|
@explicit_backend
|
|
@@ -118,6 +156,8 @@ module TreeHaver
|
|
|
118
156
|
:java
|
|
119
157
|
when /Citrus/
|
|
120
158
|
:citrus
|
|
159
|
+
when /Parslet/
|
|
160
|
+
:parslet
|
|
121
161
|
else
|
|
122
162
|
# Fallback to effective_backend if we can't determine from class name
|
|
123
163
|
TreeHaver.effective_backend
|
|
@@ -127,28 +167,27 @@ module TreeHaver
|
|
|
127
167
|
|
|
128
168
|
# Set the language grammar for this parser
|
|
129
169
|
#
|
|
170
|
+
# The language must be compatible with the parser's backend. If a mismatch
|
|
171
|
+
# is detected (e.g., Citrus language on tree-sitter parser), the parser
|
|
172
|
+
# will automatically switch to the correct backend.
|
|
173
|
+
#
|
|
130
174
|
# @param lang [Language] the language to use for parsing
|
|
131
175
|
# @return [Language] the language that was set
|
|
132
176
|
# @example
|
|
133
177
|
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
134
178
|
def language=(lang)
|
|
135
|
-
#
|
|
136
|
-
# This
|
|
137
|
-
#
|
|
138
|
-
|
|
139
|
-
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
140
|
-
# Switch to Citrus parser to match the Citrus language
|
|
141
|
-
@impl = Backends::Citrus::Parser.new
|
|
142
|
-
@explicit_backend = :citrus
|
|
143
|
-
end
|
|
144
|
-
end
|
|
179
|
+
# Auto-switch backend if language type doesn't match current parser
|
|
180
|
+
# This handles the case where Language.toml returns a Citrus/Parslet language
|
|
181
|
+
# but the parser was initialized with a tree-sitter backend
|
|
182
|
+
switch_backend_for_language(lang)
|
|
145
183
|
|
|
146
184
|
# Unwrap the language before passing to backend
|
|
147
185
|
# Backends receive raw language objects, never TreeHaver wrappers
|
|
148
186
|
inner_lang = unwrap_language(lang)
|
|
149
187
|
@impl.language = inner_lang
|
|
150
|
-
|
|
151
|
-
|
|
188
|
+
|
|
189
|
+
# Store on base class for API compatibility
|
|
190
|
+
@language = lang
|
|
152
191
|
end
|
|
153
192
|
|
|
154
193
|
# Parse source code into a syntax tree
|
|
@@ -224,6 +263,51 @@ module TreeHaver
|
|
|
224
263
|
|
|
225
264
|
private
|
|
226
265
|
|
|
266
|
+
# Switch backend if language type doesn't match current parser
|
|
267
|
+
#
|
|
268
|
+
# This is necessary because TreeHaver.parser_for may return a Language
|
|
269
|
+
# from a different backend than the Parser was initialized with.
|
|
270
|
+
# For example, Language.toml might return a Citrus::Language when
|
|
271
|
+
# tree-sitter-toml is not available, but Parser was initialized with :auto.
|
|
272
|
+
#
|
|
273
|
+
# @param lang [Object] The language object
|
|
274
|
+
# @api private
|
|
275
|
+
def switch_backend_for_language(lang)
|
|
276
|
+
return unless lang.respond_to?(:backend)
|
|
277
|
+
|
|
278
|
+
lang_backend = lang.backend
|
|
279
|
+
parser_backend = backend
|
|
280
|
+
|
|
281
|
+
# No switch needed if backends match
|
|
282
|
+
return if lang_backend == parser_backend
|
|
283
|
+
|
|
284
|
+
# Switch to matching backend parser
|
|
285
|
+
case lang_backend
|
|
286
|
+
when :citrus
|
|
287
|
+
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
288
|
+
@impl = Backends::Citrus::Parser.new
|
|
289
|
+
@explicit_backend = :citrus
|
|
290
|
+
end
|
|
291
|
+
when :parslet
|
|
292
|
+
unless @impl.is_a?(Backends::Parslet::Parser)
|
|
293
|
+
@impl = Backends::Parslet::Parser.new
|
|
294
|
+
@explicit_backend = :parslet
|
|
295
|
+
end
|
|
296
|
+
when :prism
|
|
297
|
+
unless @impl.is_a?(Backends::Prism::Parser)
|
|
298
|
+
@impl = Backends::Prism::Parser.new
|
|
299
|
+
@explicit_backend = :prism
|
|
300
|
+
end
|
|
301
|
+
when :psych
|
|
302
|
+
unless @impl.is_a?(Backends::Psych::Parser)
|
|
303
|
+
@impl = Backends::Psych::Parser.new
|
|
304
|
+
@explicit_backend = :psych
|
|
305
|
+
end
|
|
306
|
+
# Tree-sitter backends (:mri, :rust, :ffi, :java) - don't auto-switch between them
|
|
307
|
+
# as that would require reloading the language from the .so file
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
227
311
|
# Unwrap a language object to extract the raw backend language
|
|
228
312
|
#
|
|
229
313
|
# This method is smart about backend compatibility:
|
|
@@ -280,24 +364,26 @@ module TreeHaver
|
|
|
280
364
|
|
|
281
365
|
case lang.backend
|
|
282
366
|
when :mri
|
|
283
|
-
|
|
284
|
-
|
|
367
|
+
lang.to_language if lang.respond_to?(:to_language)
|
|
368
|
+
lang.inner_language if lang.respond_to?(:inner_language)
|
|
285
369
|
when :rust
|
|
286
|
-
|
|
370
|
+
lang.name if lang.respond_to?(:name)
|
|
287
371
|
when :ffi
|
|
288
|
-
|
|
372
|
+
lang # FFI needs wrapper for to_ptr
|
|
289
373
|
when :java
|
|
290
|
-
|
|
374
|
+
lang.impl if lang.respond_to?(:impl)
|
|
291
375
|
when :citrus
|
|
292
|
-
|
|
376
|
+
lang # Citrus backend accepts Language wrapper (handles both)
|
|
377
|
+
when :parslet
|
|
378
|
+
lang # Parslet backend accepts Language wrapper (handles both)
|
|
293
379
|
when :prism
|
|
294
|
-
|
|
380
|
+
lang # Prism backend expects the Language wrapper
|
|
295
381
|
when :psych
|
|
296
|
-
|
|
382
|
+
lang # Psych backend expects the Language wrapper
|
|
297
383
|
when :commonmarker
|
|
298
|
-
|
|
384
|
+
lang # Commonmarker backend expects the Language wrapper
|
|
299
385
|
when :markly
|
|
300
|
-
|
|
386
|
+
lang # Markly backend expects the Language wrapper
|
|
301
387
|
else
|
|
302
388
|
# Unknown backend (e.g., test backend)
|
|
303
389
|
# Try generic unwrapping methods for flexibility in testing
|
|
@@ -305,15 +391,13 @@ module TreeHaver
|
|
|
305
391
|
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
306
392
|
return lang.impl if lang.respond_to?(:impl)
|
|
307
393
|
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
394
|
+
return lang.grammar_class if lang.respond_to?(:grammar_class)
|
|
308
395
|
return lang.name if lang.respond_to?(:name)
|
|
309
396
|
|
|
310
397
|
# If nothing works, pass through as-is
|
|
311
398
|
# This allows test languages to be passed directly
|
|
312
|
-
|
|
399
|
+
lang
|
|
313
400
|
end
|
|
314
|
-
|
|
315
|
-
# Shouldn't reach here, but just in case
|
|
316
|
-
lang
|
|
317
401
|
end
|
|
318
402
|
|
|
319
403
|
# Try to reload a language for the current backend
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Utility for finding and registering Parslet grammar gems.
|
|
5
|
+
#
|
|
6
|
+
# ParsletGrammarFinder provides language-agnostic discovery of Parslet grammar
|
|
7
|
+
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
+
# grammar and register it with tree_haver.
|
|
9
|
+
#
|
|
10
|
+
# Unlike tree-sitter grammars (which are .so files), Parslet grammars are
|
|
11
|
+
# Ruby classes that inherit from Parslet::Parser. This class handles the
|
|
12
|
+
# discovery and registration of these grammars.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage with toml gem
|
|
15
|
+
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
16
|
+
# language: :toml,
|
|
17
|
+
# gem_name: "toml",
|
|
18
|
+
# grammar_const: "TOML::Parslet"
|
|
19
|
+
# )
|
|
20
|
+
# finder.register! if finder.available?
|
|
21
|
+
#
|
|
22
|
+
# @example With custom require path
|
|
23
|
+
# finder = TreeHaver::ParsletGrammarFinder.new(
|
|
24
|
+
# language: :json,
|
|
25
|
+
# gem_name: "json-parslet",
|
|
26
|
+
# grammar_const: "JsonParslet::Grammar",
|
|
27
|
+
# require_path: "json/parslet"
|
|
28
|
+
# )
|
|
29
|
+
#
|
|
30
|
+
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
+
# @see CitrusGrammarFinder For Citrus grammar discovery
|
|
32
|
+
class ParsletGrammarFinder
|
|
33
|
+
# @return [Symbol] the language identifier
|
|
34
|
+
attr_reader :language_name
|
|
35
|
+
|
|
36
|
+
# @return [String] the gem name to require
|
|
37
|
+
attr_reader :gem_name
|
|
38
|
+
|
|
39
|
+
# @return [String] the constant path to the grammar class (e.g., "TOML::Parslet")
|
|
40
|
+
attr_reader :grammar_const
|
|
41
|
+
|
|
42
|
+
# @return [String, nil] custom require path (defaults to gem_name)
|
|
43
|
+
attr_reader :require_path
|
|
44
|
+
|
|
45
|
+
# Initialize a Parslet grammar finder
|
|
46
|
+
#
|
|
47
|
+
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
48
|
+
# @param gem_name [String] the gem name (e.g., "toml")
|
|
49
|
+
# @param grammar_const [String] constant path to grammar class (e.g., "TOML::Parslet")
|
|
50
|
+
# @param require_path [String, nil] custom require path (defaults to gem_name as-is)
|
|
51
|
+
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
52
|
+
@language_name = language.to_sym
|
|
53
|
+
@gem_name = gem_name
|
|
54
|
+
@grammar_const = grammar_const
|
|
55
|
+
@require_path = require_path || gem_name
|
|
56
|
+
@load_attempted = false
|
|
57
|
+
@available = false
|
|
58
|
+
@grammar_class = nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Check if the Parslet grammar is available
|
|
62
|
+
#
|
|
63
|
+
# Attempts to require the gem and resolve the grammar constant.
|
|
64
|
+
# Result is cached after first call.
|
|
65
|
+
#
|
|
66
|
+
# @return [Boolean] true if grammar is available
|
|
67
|
+
def available?
|
|
68
|
+
return @available if @load_attempted
|
|
69
|
+
|
|
70
|
+
@load_attempted = true
|
|
71
|
+
debug = ENV["TREE_HAVER_DEBUG"]
|
|
72
|
+
|
|
73
|
+
# Guard against nil require_path (can happen if gem_name was nil)
|
|
74
|
+
if @require_path.nil? || @require_path.empty?
|
|
75
|
+
warn("ParsletGrammarFinder: require_path is nil or empty for #{@language_name}") if debug
|
|
76
|
+
@available = false
|
|
77
|
+
return false
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
begin
|
|
81
|
+
# Try to require the gem
|
|
82
|
+
require @require_path
|
|
83
|
+
|
|
84
|
+
# Try to resolve the constant
|
|
85
|
+
@grammar_class = resolve_constant(@grammar_const)
|
|
86
|
+
|
|
87
|
+
# Verify it can create a parser instance with a parse method
|
|
88
|
+
unless valid_grammar_class?(@grammar_class)
|
|
89
|
+
if debug
|
|
90
|
+
warn("ParsletGrammarFinder: #{@grammar_const} is not a valid Parslet grammar class")
|
|
91
|
+
warn("ParsletGrammarFinder: #{@grammar_const}.class = #{@grammar_class.class}")
|
|
92
|
+
end
|
|
93
|
+
@available = false
|
|
94
|
+
return false
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
@available = true
|
|
98
|
+
rescue LoadError => e
|
|
99
|
+
# :nocov: defensive - requires gem to not be installed
|
|
100
|
+
if debug
|
|
101
|
+
warn("ParsletGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
102
|
+
warn("ParsletGrammarFinder: LoadError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
103
|
+
end
|
|
104
|
+
@available = false
|
|
105
|
+
# :nocov:
|
|
106
|
+
rescue NameError => e
|
|
107
|
+
# :nocov: defensive - requires gem with missing constant
|
|
108
|
+
if debug
|
|
109
|
+
warn("ParsletGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
110
|
+
warn("ParsletGrammarFinder: NameError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
111
|
+
end
|
|
112
|
+
@available = false
|
|
113
|
+
# :nocov:
|
|
114
|
+
rescue TypeError => e
|
|
115
|
+
# :nocov: defensive - TruffleRuby-specific edge case
|
|
116
|
+
warn("ParsletGrammarFinder: TypeError during load of '#{@require_path}': #{e.class}: #{e.message}")
|
|
117
|
+
warn("ParsletGrammarFinder: This may be a TruffleRuby bundled_gems.rb issue")
|
|
118
|
+
if debug
|
|
119
|
+
warn("ParsletGrammarFinder: TypeError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
120
|
+
end
|
|
121
|
+
@available = false
|
|
122
|
+
# :nocov:
|
|
123
|
+
rescue => e
|
|
124
|
+
# :nocov: defensive - catch-all for unexpected errors
|
|
125
|
+
warn("ParsletGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
126
|
+
if debug
|
|
127
|
+
warn("ParsletGrammarFinder: backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
128
|
+
end
|
|
129
|
+
@available = false
|
|
130
|
+
# :nocov:
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
@available
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Get the resolved grammar class
|
|
137
|
+
#
|
|
138
|
+
# @return [Class, nil] the grammar class if available
|
|
139
|
+
def grammar_class
|
|
140
|
+
available? # Ensure we've tried to load
|
|
141
|
+
@grammar_class
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Register this Parslet grammar with TreeHaver
|
|
145
|
+
#
|
|
146
|
+
# After registration, the language can be used via:
|
|
147
|
+
# TreeHaver::Language.{language_name}
|
|
148
|
+
#
|
|
149
|
+
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
150
|
+
# @return [Boolean] true if registration succeeded
|
|
151
|
+
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
152
|
+
def register!(raise_on_missing: false)
|
|
153
|
+
unless available?
|
|
154
|
+
if raise_on_missing
|
|
155
|
+
raise NotAvailable, not_found_message
|
|
156
|
+
end
|
|
157
|
+
return false
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
TreeHaver.register_language(
|
|
161
|
+
@language_name,
|
|
162
|
+
grammar_class: @grammar_class,
|
|
163
|
+
gem_name: @gem_name,
|
|
164
|
+
)
|
|
165
|
+
true
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Get debug information about the search
|
|
169
|
+
#
|
|
170
|
+
# @return [Hash] diagnostic information
|
|
171
|
+
def search_info
|
|
172
|
+
{
|
|
173
|
+
language: @language_name,
|
|
174
|
+
gem_name: @gem_name,
|
|
175
|
+
grammar_const: @grammar_const,
|
|
176
|
+
require_path: @require_path,
|
|
177
|
+
available: available?,
|
|
178
|
+
grammar_class: @grammar_class&.name,
|
|
179
|
+
}
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Get a human-readable error message when grammar is not found
|
|
183
|
+
#
|
|
184
|
+
# @return [String] error message with installation hints
|
|
185
|
+
def not_found_message
|
|
186
|
+
"Parslet grammar for #{@language_name} not found. " \
|
|
187
|
+
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
private
|
|
191
|
+
|
|
192
|
+
# Resolve a constant path like "TOML::Parslet"
|
|
193
|
+
#
|
|
194
|
+
# @param const_path [String] constant path
|
|
195
|
+
# @return [Object] the constant
|
|
196
|
+
# @raise [NameError] if constant not found
|
|
197
|
+
def resolve_constant(const_path)
|
|
198
|
+
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
199
|
+
mod.const_get(const_name)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Check if the class is a valid Parslet grammar
|
|
204
|
+
#
|
|
205
|
+
# @param klass [Class] the class to check
|
|
206
|
+
# @return [Boolean] true if valid
|
|
207
|
+
def valid_grammar_class?(klass)
|
|
208
|
+
return false unless klass.respond_to?(:new)
|
|
209
|
+
|
|
210
|
+
# Check if it's a Parslet::Parser subclass
|
|
211
|
+
if defined?(::Parslet::Parser)
|
|
212
|
+
return true if klass < ::Parslet::Parser
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Fallback: check if it can create an instance that responds to parse
|
|
216
|
+
begin
|
|
217
|
+
instance = klass.new
|
|
218
|
+
instance.respond_to?(:parse)
|
|
219
|
+
rescue StandardError
|
|
220
|
+
false
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|