tree_haver 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +82 -1
- data/README.md +1 -1
- data/lib/tree_haver/backends/ffi.rb +8 -6
- data/lib/tree_haver/backends/java.rb +5 -3
- data/lib/tree_haver/backends/mri.rb +23 -20
- data/lib/tree_haver/backends/rust.rb +3 -4
- data/lib/tree_haver/grammar_finder.rb +4 -1
- data/lib/tree_haver/language.rb +255 -0
- data/lib/tree_haver/library_path_utils.rb +80 -0
- data/lib/tree_haver/parser.rb +352 -0
- data/lib/tree_haver/rspec/dependency_tags.rb +264 -47
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +14 -553
- data.tar.gz.sig +0 -0
- metadata +7 -4
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Represents a tree-sitter parser instance
|
|
5
|
+
#
|
|
6
|
+
# A Parser is used to parse source code into a syntax tree. You must
|
|
7
|
+
# set a language before parsing.
|
|
8
|
+
#
|
|
9
|
+
# == Wrapping/Unwrapping Responsibility
|
|
10
|
+
#
|
|
11
|
+
# TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
|
|
12
|
+
#
|
|
13
|
+
# **Language objects:**
|
|
14
|
+
# - Unwraps Language wrappers before passing to backend.language=
|
|
15
|
+
# - MRI backend receives ::TreeSitter::Language
|
|
16
|
+
# - Rust backend receives String (language name)
|
|
17
|
+
# - FFI backend receives wrapped Language (needs to_ptr)
|
|
18
|
+
#
|
|
19
|
+
# **Tree objects:**
|
|
20
|
+
# - parse() receives raw source, backend returns raw tree, Parser wraps it
|
|
21
|
+
# - parse_string() unwraps old_tree before passing to backend, wraps returned tree
|
|
22
|
+
# - Backends always work with raw backend trees, never TreeHaver::Tree
|
|
23
|
+
#
|
|
24
|
+
# **Node objects:**
|
|
25
|
+
# - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
|
|
26
|
+
#
|
|
27
|
+
# This design ensures:
|
|
28
|
+
# - Principle of Least Surprise: wrapping happens at boundaries, consistently
|
|
29
|
+
# - Backends are simple: they don't need to know about TreeHaver wrappers
|
|
30
|
+
# - Single Responsibility: wrapping logic is only in TreeHaver::Parser
|
|
31
|
+
#
|
|
32
|
+
# @example Basic parsing
|
|
33
|
+
# parser = TreeHaver::Parser.new
|
|
34
|
+
# parser.language = TreeHaver::Language.toml
|
|
35
|
+
# tree = parser.parse("[package]\nname = \"foo\"")
|
|
36
|
+
class Parser
|
|
37
|
+
# Create a new parser instance
|
|
38
|
+
#
|
|
39
|
+
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
40
|
+
# @raise [NotAvailable] if no backend is available or requested backend is unavailable
|
|
41
|
+
# @example Default (uses context/global)
|
|
42
|
+
# parser = TreeHaver::Parser.new
|
|
43
|
+
# @example Explicit backend
|
|
44
|
+
# parser = TreeHaver::Parser.new(backend: :ffi)
|
|
45
|
+
def initialize(backend: nil)
|
|
46
|
+
# Convert string backend names to symbols for consistency
|
|
47
|
+
backend = backend.to_sym if backend.is_a?(String)
|
|
48
|
+
|
|
49
|
+
mod = TreeHaver.resolve_backend_module(backend)
|
|
50
|
+
|
|
51
|
+
if mod.nil?
|
|
52
|
+
if backend
|
|
53
|
+
raise NotAvailable, "Requested backend #{backend.inspect} is not available"
|
|
54
|
+
else
|
|
55
|
+
raise NotAvailable, "No TreeHaver backend is available"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Try to create the parser, with fallback to Citrus if tree-sitter fails
|
|
60
|
+
# This enables auto-fallback when tree-sitter runtime isn't available
|
|
61
|
+
begin
|
|
62
|
+
@impl = mod::Parser.new
|
|
63
|
+
@explicit_backend = backend # Remember for introspection (always a Symbol or nil)
|
|
64
|
+
rescue NoMethodError, LoadError => e
|
|
65
|
+
handle_parser_creation_failure(e, backend)
|
|
66
|
+
rescue => e
|
|
67
|
+
# Also catch FFI::NotFoundError if FFI is loaded (can't reference directly as FFI may not exist)
|
|
68
|
+
if defined?(::FFI::NotFoundError) && e.is_a?(::FFI::NotFoundError)
|
|
69
|
+
handle_parser_creation_failure(e, backend)
|
|
70
|
+
else
|
|
71
|
+
raise
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Handle parser creation failure with optional Citrus fallback
|
|
77
|
+
#
|
|
78
|
+
# @param error [Exception] the error that caused parser creation to fail
|
|
79
|
+
# @param backend [Symbol, nil] the requested backend
|
|
80
|
+
# @raise [NotAvailable] if no fallback is available
|
|
81
|
+
# @api private
|
|
82
|
+
def handle_parser_creation_failure(error, backend)
|
|
83
|
+
# Tree-sitter backend failed (likely missing runtime library)
|
|
84
|
+
# Try Citrus as fallback if we weren't explicitly asked for a specific backend
|
|
85
|
+
if backend.nil? || backend == :auto
|
|
86
|
+
if Backends::Citrus.available?
|
|
87
|
+
@impl = Backends::Citrus::Parser.new
|
|
88
|
+
@explicit_backend = :citrus
|
|
89
|
+
else
|
|
90
|
+
# No fallback available, re-raise original error
|
|
91
|
+
raise NotAvailable, "Tree-sitter backend failed: #{error.message}. " \
|
|
92
|
+
"Citrus fallback not available. Install tree-sitter runtime or citrus gem."
|
|
93
|
+
end
|
|
94
|
+
else
|
|
95
|
+
# Explicit backend was requested, don't fallback
|
|
96
|
+
raise error
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Get the backend this parser is using (for introspection)
|
|
101
|
+
#
|
|
102
|
+
# Returns the actual backend in use, resolving :auto to the concrete backend.
|
|
103
|
+
#
|
|
104
|
+
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
|
|
105
|
+
def backend
|
|
106
|
+
if @explicit_backend && @explicit_backend != :auto
|
|
107
|
+
@explicit_backend
|
|
108
|
+
else
|
|
109
|
+
# Determine actual backend from the implementation class
|
|
110
|
+
case @impl.class.name
|
|
111
|
+
when /MRI/
|
|
112
|
+
:mri
|
|
113
|
+
when /Rust/
|
|
114
|
+
:rust
|
|
115
|
+
when /FFI/
|
|
116
|
+
:ffi
|
|
117
|
+
when /Java/
|
|
118
|
+
:java
|
|
119
|
+
when /Citrus/
|
|
120
|
+
:citrus
|
|
121
|
+
else
|
|
122
|
+
# Fallback to effective_backend if we can't determine from class name
|
|
123
|
+
TreeHaver.effective_backend
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Set the language grammar for this parser
|
|
129
|
+
#
|
|
130
|
+
# @param lang [Language] the language to use for parsing
|
|
131
|
+
# @return [Language] the language that was set
|
|
132
|
+
# @example
|
|
133
|
+
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
134
|
+
def language=(lang)
|
|
135
|
+
# Check if this is a Citrus language - if so, we need a Citrus parser
|
|
136
|
+
# This enables automatic backend switching when tree-sitter fails and
|
|
137
|
+
# falls back to Citrus
|
|
138
|
+
if lang.is_a?(Backends::Citrus::Language)
|
|
139
|
+
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
140
|
+
# Switch to Citrus parser to match the Citrus language
|
|
141
|
+
@impl = Backends::Citrus::Parser.new
|
|
142
|
+
@explicit_backend = :citrus
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Unwrap the language before passing to backend
|
|
147
|
+
# Backends receive raw language objects, never TreeHaver wrappers
|
|
148
|
+
inner_lang = unwrap_language(lang)
|
|
149
|
+
@impl.language = inner_lang
|
|
150
|
+
# Return the original (possibly wrapped) language for consistency
|
|
151
|
+
lang # rubocop:disable Lint/Void (intentional return value)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Parse source code into a syntax tree
|
|
155
|
+
#
|
|
156
|
+
# @param source [String] the source code to parse (should be UTF-8)
|
|
157
|
+
# @return [Tree] the parsed syntax tree
|
|
158
|
+
# @example
|
|
159
|
+
# tree = parser.parse("x = 1")
|
|
160
|
+
# puts tree.root_node.type
|
|
161
|
+
def parse(source)
|
|
162
|
+
tree_impl = @impl.parse(source)
|
|
163
|
+
# Wrap backend tree with source so Node#text works
|
|
164
|
+
Tree.new(tree_impl, source: source)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Parse source code into a syntax tree (with optional incremental parsing)
|
|
168
|
+
#
|
|
169
|
+
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
170
|
+
# `parse_string(old_tree, source)`.
|
|
171
|
+
#
|
|
172
|
+
# == Incremental Parsing
|
|
173
|
+
#
|
|
174
|
+
# tree-sitter supports **incremental parsing** where you can pass a previously
|
|
175
|
+
# parsed tree along with edit information to efficiently re-parse only the
|
|
176
|
+
# changed portions of source code. This is a major performance optimization
|
|
177
|
+
# for editors and IDEs that need to re-parse on every keystroke.
|
|
178
|
+
#
|
|
179
|
+
# The workflow for incremental parsing is:
|
|
180
|
+
# 1. Parse the initial source: `tree = parser.parse_string(nil, source)`
|
|
181
|
+
# 2. User edits the source (e.g., inserts a character)
|
|
182
|
+
# 3. Call `tree.edit(...)` to update the tree's position data
|
|
183
|
+
# 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
|
|
184
|
+
# 5. tree-sitter reuses unchanged nodes, only re-parsing affected regions
|
|
185
|
+
#
|
|
186
|
+
# TreeHaver passes through to the underlying backend if it supports incremental
|
|
187
|
+
# parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
|
|
188
|
+
# to see if the current backend supports it.
|
|
189
|
+
#
|
|
190
|
+
# @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
|
|
191
|
+
# @param source [String] the source code to parse (should be UTF-8)
|
|
192
|
+
# @return [Tree] the parsed syntax tree
|
|
193
|
+
# @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing tree-sitter incremental parsing docs
|
|
194
|
+
# @see Tree#edit For marking edits before incremental re-parsing
|
|
195
|
+
# @example First parse (no old tree)
|
|
196
|
+
# tree = parser.parse_string(nil, "x = 1")
|
|
197
|
+
# @example Incremental parse
|
|
198
|
+
# tree.edit(start_byte: 4, old_end_byte: 5, new_end_byte: 6, ...)
|
|
199
|
+
# new_tree = parser.parse_string(tree, "x = 42")
|
|
200
|
+
def parse_string(old_tree, source)
|
|
201
|
+
# Pass through to backend if it supports incremental parsing
|
|
202
|
+
if old_tree && @impl.respond_to?(:parse_string)
|
|
203
|
+
# Extract the underlying implementation from our Tree wrapper
|
|
204
|
+
old_impl = if old_tree.respond_to?(:inner_tree)
|
|
205
|
+
old_tree.inner_tree
|
|
206
|
+
elsif old_tree.respond_to?(:instance_variable_get)
|
|
207
|
+
# Fallback for compatibility
|
|
208
|
+
old_tree.instance_variable_get(:@inner_tree) || old_tree.instance_variable_get(:@impl) || old_tree
|
|
209
|
+
else
|
|
210
|
+
old_tree
|
|
211
|
+
end
|
|
212
|
+
tree_impl = @impl.parse_string(old_impl, source)
|
|
213
|
+
# Wrap backend tree with source so Node#text works
|
|
214
|
+
Tree.new(tree_impl, source: source)
|
|
215
|
+
elsif @impl.respond_to?(:parse_string)
|
|
216
|
+
tree_impl = @impl.parse_string(nil, source)
|
|
217
|
+
# Wrap backend tree with source so Node#text works
|
|
218
|
+
Tree.new(tree_impl, source: source)
|
|
219
|
+
else
|
|
220
|
+
# Fallback for backends that don't support parse_string
|
|
221
|
+
parse(source)
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
private
|
|
226
|
+
|
|
227
|
+
# Unwrap a language object to extract the raw backend language
|
|
228
|
+
#
|
|
229
|
+
# This method is smart about backend compatibility:
|
|
230
|
+
# 1. If language has a backend attribute, checks if it matches current backend
|
|
231
|
+
# 2. If mismatch detected, attempts to reload language for correct backend
|
|
232
|
+
# 3. If reload successful, uses new language; otherwise continues with original
|
|
233
|
+
# 4. Unwraps the language wrapper to get raw backend object
|
|
234
|
+
#
|
|
235
|
+
# @param lang [Object] wrapped or raw language object
|
|
236
|
+
# @return [Object] raw backend language object appropriate for current backend
|
|
237
|
+
# @api private
|
|
238
|
+
def unwrap_language(lang)
|
|
239
|
+
# Check if this is a TreeHaver language wrapper with backend info
|
|
240
|
+
if lang.respond_to?(:backend)
|
|
241
|
+
# Verify backend compatibility FIRST
|
|
242
|
+
# This prevents passing languages from wrong backends to native code
|
|
243
|
+
# Exception: :auto backend is permissive - accepts any language
|
|
244
|
+
current_backend = backend
|
|
245
|
+
|
|
246
|
+
if lang.backend != current_backend && current_backend != :auto
|
|
247
|
+
# Backend mismatch! Try to reload for correct backend
|
|
248
|
+
reloaded = try_reload_language_for_backend(lang, current_backend)
|
|
249
|
+
if reloaded
|
|
250
|
+
lang = reloaded
|
|
251
|
+
else
|
|
252
|
+
# Couldn't reload - this is an error
|
|
253
|
+
raise TreeHaver::Error,
|
|
254
|
+
"Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
|
|
255
|
+
"Cannot reload language for correct backend. " \
|
|
256
|
+
"Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Get the current parser's language (if set)
|
|
261
|
+
current_lang = @impl.respond_to?(:language) ? @impl.language : nil
|
|
262
|
+
|
|
263
|
+
# Language mismatch detected! The parser might have a different language set
|
|
264
|
+
# Compare the actual language objects using Comparable
|
|
265
|
+
if current_lang && lang != current_lang
|
|
266
|
+
# Different language being set (e.g., switching from TOML to JSON)
|
|
267
|
+
# This is fine, just informational
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Unwrap based on backend type
|
|
272
|
+
# All TreeHaver Language wrappers have the backend attribute
|
|
273
|
+
unless lang.respond_to?(:backend)
|
|
274
|
+
# This shouldn't happen - all our wrappers have backend attribute
|
|
275
|
+
# If we get here, it's likely a raw backend object that was passed directly
|
|
276
|
+
raise TreeHaver::Error,
|
|
277
|
+
"Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
|
|
278
|
+
"Use TreeHaver::Language.from_library to create language objects."
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
case lang.backend
|
|
282
|
+
when :mri
|
|
283
|
+
return lang.to_language if lang.respond_to?(:to_language)
|
|
284
|
+
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
285
|
+
when :rust
|
|
286
|
+
return lang.name if lang.respond_to?(:name)
|
|
287
|
+
when :ffi
|
|
288
|
+
return lang # FFI needs wrapper for to_ptr
|
|
289
|
+
when :java
|
|
290
|
+
return lang.impl if lang.respond_to?(:impl)
|
|
291
|
+
when :citrus
|
|
292
|
+
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
293
|
+
when :prism
|
|
294
|
+
return lang # Prism backend expects the Language wrapper
|
|
295
|
+
when :psych
|
|
296
|
+
return lang # Psych backend expects the Language wrapper
|
|
297
|
+
when :commonmarker
|
|
298
|
+
return lang # Commonmarker backend expects the Language wrapper
|
|
299
|
+
when :markly
|
|
300
|
+
return lang # Markly backend expects the Language wrapper
|
|
301
|
+
else
|
|
302
|
+
# Unknown backend (e.g., test backend)
|
|
303
|
+
# Try generic unwrapping methods for flexibility in testing
|
|
304
|
+
return lang.to_language if lang.respond_to?(:to_language)
|
|
305
|
+
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
306
|
+
return lang.impl if lang.respond_to?(:impl)
|
|
307
|
+
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
308
|
+
return lang.name if lang.respond_to?(:name)
|
|
309
|
+
|
|
310
|
+
# If nothing works, pass through as-is
|
|
311
|
+
# This allows test languages to be passed directly
|
|
312
|
+
return lang
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Shouldn't reach here, but just in case
|
|
316
|
+
lang
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Try to reload a language for the current backend
|
|
320
|
+
#
|
|
321
|
+
# This handles the case where a language was loaded for one backend,
|
|
322
|
+
# but is now being used with a different backend (e.g., after backend switch).
|
|
323
|
+
#
|
|
324
|
+
# @param lang [Object] language object with metadata
|
|
325
|
+
# @param target_backend [Symbol] backend to reload for
|
|
326
|
+
# @return [Object, nil] reloaded language or nil if reload not possible
|
|
327
|
+
# @api private
|
|
328
|
+
def try_reload_language_for_backend(lang, target_backend)
|
|
329
|
+
# Can't reload without path information
|
|
330
|
+
return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
|
|
331
|
+
|
|
332
|
+
# For tree-sitter backends, reload from path
|
|
333
|
+
if lang.respond_to?(:path) && lang.path
|
|
334
|
+
begin
|
|
335
|
+
# Use Language.from_library which respects current backend
|
|
336
|
+
return Language.from_library(
|
|
337
|
+
lang.path,
|
|
338
|
+
symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
|
|
339
|
+
name: lang.respond_to?(:name) ? lang.name : nil,
|
|
340
|
+
)
|
|
341
|
+
rescue => e
|
|
342
|
+
# Reload failed, continue with original
|
|
343
|
+
warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
|
|
344
|
+
return
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# For Citrus, can't really reload as it's just a module reference
|
|
349
|
+
nil
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|