tree_haver 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,352 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Represents a tree-sitter parser instance
5
+ #
6
+ # A Parser is used to parse source code into a syntax tree. You must
7
+ # set a language before parsing.
8
+ #
9
+ # == Wrapping/Unwrapping Responsibility
10
+ #
11
+ # TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
12
+ #
13
+ # **Language objects:**
14
+ # - Unwraps Language wrappers before passing to backend.language=
15
+ # - MRI backend receives ::TreeSitter::Language
16
+ # - Rust backend receives String (language name)
17
+ # - FFI backend receives wrapped Language (needs to_ptr)
18
+ #
19
+ # **Tree objects:**
20
+ # - parse() receives raw source, backend returns raw tree, Parser wraps it
21
+ # - parse_string() unwraps old_tree before passing to backend, wraps returned tree
22
+ # - Backends always work with raw backend trees, never TreeHaver::Tree
23
+ #
24
+ # **Node objects:**
25
+ # - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
26
+ #
27
+ # This design ensures:
28
+ # - Principle of Least Surprise: wrapping happens at boundaries, consistently
29
+ # - Backends are simple: they don't need to know about TreeHaver wrappers
30
+ # - Single Responsibility: wrapping logic is only in TreeHaver::Parser
31
+ #
32
+ # @example Basic parsing
33
+ # parser = TreeHaver::Parser.new
34
+ # parser.language = TreeHaver::Language.toml
35
+ # tree = parser.parse("[package]\nname = \"foo\"")
36
+ class Parser
37
+ # Create a new parser instance
38
+ #
39
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
40
+ # @raise [NotAvailable] if no backend is available or requested backend is unavailable
41
+ # @example Default (uses context/global)
42
+ # parser = TreeHaver::Parser.new
43
+ # @example Explicit backend
44
+ # parser = TreeHaver::Parser.new(backend: :ffi)
45
+ def initialize(backend: nil)
46
+ # Convert string backend names to symbols for consistency
47
+ backend = backend.to_sym if backend.is_a?(String)
48
+
49
+ mod = TreeHaver.resolve_backend_module(backend)
50
+
51
+ if mod.nil?
52
+ if backend
53
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
54
+ else
55
+ raise NotAvailable, "No TreeHaver backend is available"
56
+ end
57
+ end
58
+
59
+ # Try to create the parser, with fallback to Citrus if tree-sitter fails
60
+ # This enables auto-fallback when tree-sitter runtime isn't available
61
+ begin
62
+ @impl = mod::Parser.new
63
+ @explicit_backend = backend # Remember for introspection (always a Symbol or nil)
64
+ rescue NoMethodError, LoadError => e
65
+ handle_parser_creation_failure(e, backend)
66
+ rescue => e
67
+ # Also catch FFI::NotFoundError if FFI is loaded (can't reference directly as FFI may not exist)
68
+ if defined?(::FFI::NotFoundError) && e.is_a?(::FFI::NotFoundError)
69
+ handle_parser_creation_failure(e, backend)
70
+ else
71
+ raise
72
+ end
73
+ end
74
+ end
75
+
76
+ # Handle parser creation failure with optional Citrus fallback
77
+ #
78
+ # @param error [Exception] the error that caused parser creation to fail
79
+ # @param backend [Symbol, nil] the requested backend
80
+ # @raise [NotAvailable] if no fallback is available
81
+ # @api private
82
+ def handle_parser_creation_failure(error, backend)
83
+ # Tree-sitter backend failed (likely missing runtime library)
84
+ # Try Citrus as fallback if we weren't explicitly asked for a specific backend
85
+ if backend.nil? || backend == :auto
86
+ if Backends::Citrus.available?
87
+ @impl = Backends::Citrus::Parser.new
88
+ @explicit_backend = :citrus
89
+ else
90
+ # No fallback available, re-raise original error
91
+ raise NotAvailable, "Tree-sitter backend failed: #{error.message}. " \
92
+ "Citrus fallback not available. Install tree-sitter runtime or citrus gem."
93
+ end
94
+ else
95
+ # Explicit backend was requested, don't fallback
96
+ raise error
97
+ end
98
+ end
99
+
100
+ # Get the backend this parser is using (for introspection)
101
+ #
102
+ # Returns the actual backend in use, resolving :auto to the concrete backend.
103
+ #
104
+ # @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
105
+ def backend
106
+ if @explicit_backend && @explicit_backend != :auto
107
+ @explicit_backend
108
+ else
109
+ # Determine actual backend from the implementation class
110
+ case @impl.class.name
111
+ when /MRI/
112
+ :mri
113
+ when /Rust/
114
+ :rust
115
+ when /FFI/
116
+ :ffi
117
+ when /Java/
118
+ :java
119
+ when /Citrus/
120
+ :citrus
121
+ else
122
+ # Fallback to effective_backend if we can't determine from class name
123
+ TreeHaver.effective_backend
124
+ end
125
+ end
126
+ end
127
+
128
+ # Set the language grammar for this parser
129
+ #
130
+ # @param lang [Language] the language to use for parsing
131
+ # @return [Language] the language that was set
132
+ # @example
133
+ # parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
134
+ def language=(lang)
135
+ # Check if this is a Citrus language - if so, we need a Citrus parser
136
+ # This enables automatic backend switching when tree-sitter fails and
137
+ # falls back to Citrus
138
+ if lang.is_a?(Backends::Citrus::Language)
139
+ unless @impl.is_a?(Backends::Citrus::Parser)
140
+ # Switch to Citrus parser to match the Citrus language
141
+ @impl = Backends::Citrus::Parser.new
142
+ @explicit_backend = :citrus
143
+ end
144
+ end
145
+
146
+ # Unwrap the language before passing to backend
147
+ # Backends receive raw language objects, never TreeHaver wrappers
148
+ inner_lang = unwrap_language(lang)
149
+ @impl.language = inner_lang
150
+ # Return the original (possibly wrapped) language for consistency
151
+ lang # rubocop:disable Lint/Void (intentional return value)
152
+ end
153
+
154
+ # Parse source code into a syntax tree
155
+ #
156
+ # @param source [String] the source code to parse (should be UTF-8)
157
+ # @return [Tree] the parsed syntax tree
158
+ # @example
159
+ # tree = parser.parse("x = 1")
160
+ # puts tree.root_node.type
161
+ def parse(source)
162
+ tree_impl = @impl.parse(source)
163
+ # Wrap backend tree with source so Node#text works
164
+ Tree.new(tree_impl, source: source)
165
+ end
166
+
167
+ # Parse source code into a syntax tree (with optional incremental parsing)
168
+ #
169
+ # This method provides API compatibility with ruby_tree_sitter which uses
170
+ # `parse_string(old_tree, source)`.
171
+ #
172
+ # == Incremental Parsing
173
+ #
174
+ # tree-sitter supports **incremental parsing** where you can pass a previously
175
+ # parsed tree along with edit information to efficiently re-parse only the
176
+ # changed portions of source code. This is a major performance optimization
177
+ # for editors and IDEs that need to re-parse on every keystroke.
178
+ #
179
+ # The workflow for incremental parsing is:
180
+ # 1. Parse the initial source: `tree = parser.parse_string(nil, source)`
181
+ # 2. User edits the source (e.g., inserts a character)
182
+ # 3. Call `tree.edit(...)` to update the tree's position data
183
+ # 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
184
+ # 5. tree-sitter reuses unchanged nodes, only re-parsing affected regions
185
+ #
186
+ # TreeHaver passes through to the underlying backend if it supports incremental
187
+ # parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
188
+ # to see if the current backend supports it.
189
+ #
190
+ # @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
191
+ # @param source [String] the source code to parse (should be UTF-8)
192
+ # @return [Tree] the parsed syntax tree
193
+ # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing tree-sitter incremental parsing docs
194
+ # @see Tree#edit For marking edits before incremental re-parsing
195
+ # @example First parse (no old tree)
196
+ # tree = parser.parse_string(nil, "x = 1")
197
+ # @example Incremental parse
198
+ # tree.edit(start_byte: 4, old_end_byte: 5, new_end_byte: 6, ...)
199
+ # new_tree = parser.parse_string(tree, "x = 42")
200
+ def parse_string(old_tree, source)
201
+ # Pass through to backend if it supports incremental parsing
202
+ if old_tree && @impl.respond_to?(:parse_string)
203
+ # Extract the underlying implementation from our Tree wrapper
204
+ old_impl = if old_tree.respond_to?(:inner_tree)
205
+ old_tree.inner_tree
206
+ elsif old_tree.respond_to?(:instance_variable_get)
207
+ # Fallback for compatibility
208
+ old_tree.instance_variable_get(:@inner_tree) || old_tree.instance_variable_get(:@impl) || old_tree
209
+ else
210
+ old_tree
211
+ end
212
+ tree_impl = @impl.parse_string(old_impl, source)
213
+ # Wrap backend tree with source so Node#text works
214
+ Tree.new(tree_impl, source: source)
215
+ elsif @impl.respond_to?(:parse_string)
216
+ tree_impl = @impl.parse_string(nil, source)
217
+ # Wrap backend tree with source so Node#text works
218
+ Tree.new(tree_impl, source: source)
219
+ else
220
+ # Fallback for backends that don't support parse_string
221
+ parse(source)
222
+ end
223
+ end
224
+
225
+ private
226
+
227
+ # Unwrap a language object to extract the raw backend language
228
+ #
229
+ # This method is smart about backend compatibility:
230
+ # 1. If language has a backend attribute, checks if it matches current backend
231
+ # 2. If mismatch detected, attempts to reload language for correct backend
232
+ # 3. If reload successful, uses new language; otherwise continues with original
233
+ # 4. Unwraps the language wrapper to get raw backend object
234
+ #
235
+ # @param lang [Object] wrapped or raw language object
236
+ # @return [Object] raw backend language object appropriate for current backend
237
+ # @api private
238
+ def unwrap_language(lang)
239
+ # Check if this is a TreeHaver language wrapper with backend info
240
+ if lang.respond_to?(:backend)
241
+ # Verify backend compatibility FIRST
242
+ # This prevents passing languages from wrong backends to native code
243
+ # Exception: :auto backend is permissive - accepts any language
244
+ current_backend = backend
245
+
246
+ if lang.backend != current_backend && current_backend != :auto
247
+ # Backend mismatch! Try to reload for correct backend
248
+ reloaded = try_reload_language_for_backend(lang, current_backend)
249
+ if reloaded
250
+ lang = reloaded
251
+ else
252
+ # Couldn't reload - this is an error
253
+ raise TreeHaver::Error,
254
+ "Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
255
+ "Cannot reload language for correct backend. " \
256
+ "Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
257
+ end
258
+ end
259
+
260
+ # Get the current parser's language (if set)
261
+ current_lang = @impl.respond_to?(:language) ? @impl.language : nil
262
+
263
+ # Language mismatch detected! The parser might have a different language set
264
+ # Compare the actual language objects using Comparable
265
+ if current_lang && lang != current_lang
266
+ # Different language being set (e.g., switching from TOML to JSON)
267
+ # This is fine, just informational
268
+ end
269
+ end
270
+
271
+ # Unwrap based on backend type
272
+ # All TreeHaver Language wrappers have the backend attribute
273
+ unless lang.respond_to?(:backend)
274
+ # This shouldn't happen - all our wrappers have backend attribute
275
+ # If we get here, it's likely a raw backend object that was passed directly
276
+ raise TreeHaver::Error,
277
+ "Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
278
+ "Use TreeHaver::Language.from_library to create language objects."
279
+ end
280
+
281
+ case lang.backend
282
+ when :mri
283
+ return lang.to_language if lang.respond_to?(:to_language)
284
+ return lang.inner_language if lang.respond_to?(:inner_language)
285
+ when :rust
286
+ return lang.name if lang.respond_to?(:name)
287
+ when :ffi
288
+ return lang # FFI needs wrapper for to_ptr
289
+ when :java
290
+ return lang.impl if lang.respond_to?(:impl)
291
+ when :citrus
292
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
293
+ when :prism
294
+ return lang # Prism backend expects the Language wrapper
295
+ when :psych
296
+ return lang # Psych backend expects the Language wrapper
297
+ when :commonmarker
298
+ return lang # Commonmarker backend expects the Language wrapper
299
+ when :markly
300
+ return lang # Markly backend expects the Language wrapper
301
+ else
302
+ # Unknown backend (e.g., test backend)
303
+ # Try generic unwrapping methods for flexibility in testing
304
+ return lang.to_language if lang.respond_to?(:to_language)
305
+ return lang.inner_language if lang.respond_to?(:inner_language)
306
+ return lang.impl if lang.respond_to?(:impl)
307
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
308
+ return lang.name if lang.respond_to?(:name)
309
+
310
+ # If nothing works, pass through as-is
311
+ # This allows test languages to be passed directly
312
+ return lang
313
+ end
314
+
315
+ # Shouldn't reach here, but just in case
316
+ lang
317
+ end
318
+
319
+ # Try to reload a language for the current backend
320
+ #
321
+ # This handles the case where a language was loaded for one backend,
322
+ # but is now being used with a different backend (e.g., after backend switch).
323
+ #
324
+ # @param lang [Object] language object with metadata
325
+ # @param target_backend [Symbol] backend to reload for
326
+ # @return [Object, nil] reloaded language or nil if reload not possible
327
+ # @api private
328
+ def try_reload_language_for_backend(lang, target_backend)
329
+ # Can't reload without path information
330
+ return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
331
+
332
+ # For tree-sitter backends, reload from path
333
+ if lang.respond_to?(:path) && lang.path
334
+ begin
335
+ # Use Language.from_library which respects current backend
336
+ return Language.from_library(
337
+ lang.path,
338
+ symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
339
+ name: lang.respond_to?(:name) ? lang.name : nil,
340
+ )
341
+ rescue => e
342
+ # Reload failed, continue with original
343
+ warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
344
+ return
345
+ end
346
+ end
347
+
348
+ # For Citrus, can't really reload as it's just a module reference
349
+ nil
350
+ end
351
+ end
352
+ end