tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tree_haver.rb CHANGED
@@ -3,16 +3,24 @@
3
3
  # External gems
4
4
  require "version_gem"
5
5
 
6
+ # Standard library
7
+ require "set"
8
+
6
9
  # This gem
7
10
  require_relative "tree_haver/version"
8
11
  require_relative "tree_haver/language_registry"
9
12
 
10
- # TreeHaver is a cross-Ruby adapter for the tree-sitter parsing library.
13
+ # TreeHaver is a cross-Ruby adapter for code parsing with 10 backends.
14
+ #
15
+ # Provides a unified API for parsing source code across MRI Ruby, JRuby, and TruffleRuby
16
+ # using tree-sitter grammars or language-specific native parsers.
11
17
  #
12
- # It provides a unified API for parsing source code using tree-sitter grammars,
13
- # working seamlessly across MRI Ruby, JRuby, and TruffleRuby.
18
+ # Supports 10 backends:
19
+ # - Tree-sitter: MRI (C), Rust, FFI, Java
20
+ # - Native parsers: Prism (Ruby), Psych (YAML), Commonmarker (Markdown), Markly (GFM)
21
+ # - Pure Ruby: Citrus (portable fallback)
14
22
  #
15
- # @example Basic usage with TOML
23
+ # @example Basic usage with tree-sitter
16
24
  # # Load a language grammar
17
25
  # language = TreeHaver::Language.from_library(
18
26
  # "/usr/local/lib/libtree-sitter-toml.so",
@@ -27,8 +35,28 @@ require_relative "tree_haver/language_registry"
27
35
  # tree = parser.parse("[package]\nname = \"my-app\"")
28
36
  # root = tree.root_node
29
37
  #
30
- # # Traverse the AST
31
- # root.each { |child| puts child.type }
38
+ # # Use unified Position API (works across all backends)
39
+ # puts root.start_line # => 1 (1-based)
40
+ # puts root.source_position # => {start_line:, end_line:, start_column:, end_column:}
41
+ #
42
+ # @example Using language-specific backends
43
+ # # Parse Ruby with Prism
44
+ # TreeHaver.backend = :prism
45
+ # parser = TreeHaver::Parser.new
46
+ # parser.language = TreeHaver::Backends::Prism::Language.ruby
47
+ # tree = parser.parse("class Example; end")
48
+ #
49
+ # # Parse YAML with Psych
50
+ # TreeHaver.backend = :psych
51
+ # parser = TreeHaver::Parser.new
52
+ # parser.language = TreeHaver::Backends::Psych::Language.yaml
53
+ # tree = parser.parse("key: value")
54
+ #
55
+ # # Parse Markdown with Commonmarker
56
+ # TreeHaver.backend = :commonmarker
57
+ # parser = TreeHaver::Parser.new
58
+ # parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
59
+ # tree = parser.parse("# Heading\nParagraph")
32
60
  #
33
61
  # @example Using language registration
34
62
  # TreeHaver.register_language(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
@@ -40,29 +68,31 @@ require_relative "tree_haver/language_registry"
40
68
  # finder.register! if finder.available?
41
69
  # language = TreeHaver::Language.toml
42
70
  #
43
- # @example Using GrammarFinder in a *-merge gem
44
- # # Each merge gem (toml-merge, json-merge, bash-merge) uses the same pattern
45
- # finder = TreeHaver::GrammarFinder.new(:toml) # or :json, :bash, etc.
46
- # if finder.available?
47
- # finder.register!
48
- # else
49
- # warn finder.not_found_message
50
- # end
51
- #
52
71
  # @example Selecting a backend
53
- # TreeHaver.backend = :ffi # Force FFI backend
54
- # TreeHaver.backend = :mri # Force MRI backend
55
- # TreeHaver.backend = :auto # Auto-select (default)
72
+ # TreeHaver.backend = :mri # Force MRI (ruby_tree_sitter)
73
+ # TreeHaver.backend = :rust # Force Rust (tree_stump)
74
+ # TreeHaver.backend = :ffi # Force FFI
75
+ # TreeHaver.backend = :java # Force Java (JRuby)
76
+ # TreeHaver.backend = :prism # Force Prism (Ruby)
77
+ # TreeHaver.backend = :psych # Force Psych (YAML)
78
+ # TreeHaver.backend = :commonmarker # Force Commonmarker (Markdown)
79
+ # TreeHaver.backend = :markly # Force Markly (GFM)
80
+ # TreeHaver.backend = :citrus # Force Citrus (pure Ruby)
81
+ # TreeHaver.backend = :auto # Auto-select (default)
56
82
  #
57
83
  # @see https://tree-sitter.github.io/tree-sitter/ tree-sitter documentation
58
84
  # @see GrammarFinder For automatic grammar library discovery
85
+ # @see Backends For available parsing backends
59
86
  module TreeHaver
60
87
  # Base error class for TreeHaver exceptions
88
+ # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
61
89
  #
62
90
  # @abstract Subclass to create specific error types
63
- class Error < StandardError; end
91
+ class Error < Exception; end # rubocop:disable Lint/InheritException
64
92
 
65
93
  # Raised when a requested backend or feature is not available
94
+ # These are serious errors that extends Exception (not StandardError).
95
+ # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
66
96
  #
67
97
  # This can occur when:
68
98
  # - Required native libraries are not installed
@@ -77,6 +107,30 @@ module TreeHaver
77
107
  # end
78
108
  class NotAvailable < Error; end
79
109
 
110
+ # Raised when attempting to use backends that are known to conflict
111
+ #
112
+ # This is a serious error that extends Exception (not StandardError) because
113
+ # it prevents a segmentation fault. The MRI backend (ruby_tree_sitter) and
114
+ # FFI backend cannot coexist in the same process - once MRI loads, FFI will
115
+ # segfault when trying to set a language on a parser.
116
+ #
117
+ # This protection can be disabled with `TreeHaver.backend_protect = false`
118
+ # but doing so risks segfaults.
119
+ #
120
+ # @example Handling backend conflicts
121
+ # begin
122
+ # # This will raise if MRI was already used
123
+ # TreeHaver.with_backend(:ffi) { parser.language = lang }
124
+ # rescue TreeHaver::BackendConflict => e
125
+ # puts "Backend conflict: #{e.message}"
126
+ # # Fall back to a compatible backend
127
+ # end
128
+ #
129
+ # @example Disabling protection (not recommended)
130
+ # TreeHaver.backend_protect = false
131
+ # # Now you can test backend conflicts (at risk of segfaults)
132
+ class BackendConflict < Error; end
133
+
80
134
  # Namespace for backend implementations
81
135
  #
82
136
  # TreeHaver provides multiple backends to support different Ruby implementations:
@@ -85,12 +139,36 @@ module TreeHaver
85
139
  # - {Backends::FFI} - Uses Ruby FFI to call libtree-sitter directly
86
140
  # - {Backends::Java} - Uses JRuby's Java integration
87
141
  # - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
142
+ # - {Backends::Prism} - Uses Ruby's built-in Prism parser (Ruby-only, stdlib in 3.4+)
88
143
  module Backends
89
144
  autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
90
145
  autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
91
146
  autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
92
147
  autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
93
148
  autoload :Citrus, File.join(__dir__, "tree_haver", "backends", "citrus")
149
+ autoload :Prism, File.join(__dir__, "tree_haver", "backends", "prism")
150
+ autoload :Psych, File.join(__dir__, "tree_haver", "backends", "psych")
151
+ autoload :Commonmarker, File.join(__dir__, "tree_haver", "backends", "commonmarker")
152
+ autoload :Markly, File.join(__dir__, "tree_haver", "backends", "markly")
153
+
154
+ # Known backend conflicts
155
+ #
156
+ # Maps each backend to an array of backends that block it from working.
157
+ # For example, :ffi is blocked by :mri because once ruby_tree_sitter loads,
158
+ # FFI calls to ts_parser_set_language will segfault.
159
+ #
160
+ # @return [Hash{Symbol => Array<Symbol>}]
161
+ BLOCKED_BY = {
162
+ mri: [],
163
+ rust: [],
164
+ ffi: [:mri], # FFI segfaults if MRI (ruby_tree_sitter) has been loaded
165
+ java: [],
166
+ citrus: [],
167
+ prism: [], # Prism has no conflicts with other backends
168
+ psych: [], # Psych has no conflicts with other backends
169
+ commonmarker: [], # Commonmarker has no conflicts with other backends
170
+ markly: [], # Markly has no conflicts with other backends
171
+ }.freeze
94
172
  end
95
173
 
96
174
  # Security utilities for validating paths before loading shared libraries
@@ -121,6 +199,19 @@ module TreeHaver
121
199
  # @see PathValidator
122
200
  autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
123
201
 
202
+ # Citrus grammar finder for discovering and registering Citrus-based parsers
203
+ #
204
+ # @example Register toml-rb
205
+ # finder = TreeHaver::CitrusGrammarFinder.new(
206
+ # language: :toml,
207
+ # gem_name: "toml-rb",
208
+ # grammar_const: "TomlRB::Document"
209
+ # )
210
+ # finder.register! if finder.available?
211
+ #
212
+ # @see CitrusGrammarFinder
213
+ autoload :CitrusGrammarFinder, File.join(__dir__, "tree_haver", "citrus_grammar_finder")
214
+
124
215
  # Unified Node wrapper providing consistent API across backends
125
216
  autoload :Node, File.join(__dir__, "tree_haver", "node")
126
217
 
@@ -132,6 +223,77 @@ module TreeHaver
132
223
  # @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
133
224
  # @note Can be set via ENV["TREE_HAVER_BACKEND"]
134
225
  class << self
226
+ # Whether backend conflict protection is enabled
227
+ #
228
+ # When true (default), TreeHaver will raise BackendConflict if you try to
229
+ # use a backend that is known to conflict with a previously used backend.
230
+ # For example, FFI will not work after MRI has been used.
231
+ #
232
+ # Set to false to disable protection (useful for testing compatibility).
233
+ #
234
+ # @return [Boolean]
235
+ # @example Disable protection for testing
236
+ # TreeHaver.backend_protect = false
237
+ def backend_protect=(value)
238
+ @backend_protect_mutex ||= Mutex.new
239
+ @backend_protect_mutex.synchronize { @backend_protect = value }
240
+ end
241
+
242
+ # Check if backend conflict protection is enabled
243
+ #
244
+ # @return [Boolean] true if protection is enabled (default)
245
+ def backend_protect?
246
+ return @backend_protect if defined?(@backend_protect) # rubocop:disable ThreadSafety/ClassInstanceVariable
247
+ true # Default is protected
248
+ end
249
+
250
+ # Alias for backend_protect?
251
+ def backend_protect
252
+ backend_protect?
253
+ end
254
+
255
+ # Track which backends have been used in this process
256
+ #
257
+ # @return [Set<Symbol>] set of backend symbols that have been used
258
+ def backends_used
259
+ @backends_used ||= Set.new # rubocop:disable ThreadSafety/ClassInstanceVariable
260
+ end
261
+
262
+ # Record that a backend has been used
263
+ #
264
+ # @param backend [Symbol] the backend that was used
265
+ # @return [void]
266
+ # @api private
267
+ def record_backend_usage(backend)
268
+ backends_used << backend
269
+ end
270
+
271
+ # Check if a backend would conflict with previously used backends
272
+ #
273
+ # @param backend [Symbol] the backend to check
274
+ # @return [Array<Symbol>] list of previously used backends that block this one
275
+ def conflicting_backends_for(backend)
276
+ blockers = Backends::BLOCKED_BY[backend] || []
277
+ blockers & backends_used.to_a
278
+ end
279
+
280
+ # Check if using a backend would cause a conflict
281
+ #
282
+ # @param backend [Symbol] the backend to check
283
+ # @raise [BackendConflict] if protection is enabled and there's a conflict
284
+ # @return [void]
285
+ def check_backend_conflict!(backend)
286
+ return unless backend_protect?
287
+
288
+ conflicts = conflicting_backends_for(backend)
289
+ return if conflicts.empty?
290
+
291
+ raise BackendConflict,
292
+ "Cannot use #{backend} backend: it is blocked by previously used backend(s): #{conflicts.join(", ")}. " \
293
+ "The #{backend} backend will segfault when #{conflicts.first} has already loaded. " \
294
+ "To disable this protection (at risk of segfaults), set TreeHaver.backend_protect = false"
295
+ end
296
+
135
297
  # @example
136
298
  # TreeHaver.backend # => :auto
137
299
  def backend
@@ -141,6 +303,10 @@ module TreeHaver
141
303
  when "ffi" then :ffi
142
304
  when "java" then :java
143
305
  when "citrus" then :citrus
306
+ when "prism" then :prism
307
+ when "psych" then :psych
308
+ when "commonmarker" then :commonmarker
309
+ when "markly" then :markly
144
310
  else :auto
145
311
  end
146
312
  end
@@ -171,6 +337,211 @@ module TreeHaver
171
337
  @backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
172
338
  end
173
339
 
340
+ # Thread-local backend context storage
341
+ #
342
+ # Returns a hash containing the thread-local backend context with keys:
343
+ # - :backend - The backend name (Symbol) or nil if using global default
344
+ # - :depth - The nesting depth (Integer) for proper cleanup
345
+ #
346
+ # @return [Hash{Symbol => Object}] context hash with :backend and :depth keys
347
+ # @example
348
+ # ctx = TreeHaver.current_backend_context
349
+ # ctx[:backend] # => nil or :ffi, :mri, etc.
350
+ # ctx[:depth] # => 0, 1, 2, etc.
351
+ def current_backend_context
352
+ Thread.current[:tree_haver_backend_context] ||= {
353
+ backend: nil, # nil means "use global default"
354
+ depth: 0, # Track nesting depth for proper cleanup
355
+ }
356
+ end
357
+
358
+ # Get the effective backend for current context
359
+ #
360
+ # Priority: thread-local context → global @backend → :auto
361
+ #
362
+ # @return [Symbol] the backend to use
363
+ # @example
364
+ # TreeHaver.effective_backend # => :auto (default)
365
+ # @example With thread-local context
366
+ # TreeHaver.with_backend(:ffi) do
367
+ # TreeHaver.effective_backend # => :ffi
368
+ # end
369
+ def effective_backend
370
+ ctx = current_backend_context
371
+ ctx[:backend] || backend || :auto
372
+ end
373
+
374
+ # Execute a block with a specific backend in thread-local context
375
+ #
376
+ # This method provides temporary, thread-safe backend switching for a block of code.
377
+ # The backend setting is automatically restored when the block exits, even if
378
+ # an exception is raised. Supports nesting—inner blocks override outer blocks,
379
+ # and each level is properly unwound.
380
+ #
381
+ # Thread Safety: Each thread maintains its own backend context, so concurrent
382
+ # threads can safely use different backends without interfering with each other.
383
+ #
384
+ # Use Cases:
385
+ # - Testing: Test the same code path with different backends
386
+ # - Performance comparison: Benchmark parsing with different backends
387
+ # - Fallback scenarios: Try one backend, fall back to another on failure
388
+ # - Thread isolation: Different threads can use different backends safely
389
+ #
390
+ # @param name [Symbol, String] backend name (:mri, :rust, :ffi, :java, :citrus, :auto)
391
+ # @yield block to execute with the specified backend
392
+ # @return [Object] the return value of the block
393
+ # @raise [ArgumentError] if backend name is nil
394
+ # @raise [BackendConflict] if the requested backend conflicts with a previously used backend
395
+ #
396
+ # @example Basic usage
397
+ # TreeHaver.with_backend(:mri) do
398
+ # parser = TreeHaver::Parser.new
399
+ # tree = parser.parse(source)
400
+ # end
401
+ # # Backend is automatically restored here
402
+ #
403
+ # @example Nested blocks (inner overrides outer)
404
+ # TreeHaver.with_backend(:rust) do
405
+ # parser1 = TreeHaver::Parser.new # Uses :rust
406
+ # TreeHaver.with_backend(:citrus) do
407
+ # parser2 = TreeHaver::Parser.new # Uses :citrus
408
+ # end
409
+ # parser3 = TreeHaver::Parser.new # Back to :rust
410
+ # end
411
+ #
412
+ # @example Testing multiple backends
413
+ # [:mri, :rust, :citrus].each do |backend_name|
414
+ # TreeHaver.with_backend(backend_name) do
415
+ # parser = TreeHaver::Parser.new
416
+ # result = parser.parse(source)
417
+ # puts "#{backend_name}: #{result.root_node.type}"
418
+ # end
419
+ # end
420
+ #
421
+ # @example Exception safety (backend restored even on error)
422
+ # TreeHaver.with_backend(:mri) do
423
+ # raise "Something went wrong"
424
+ # rescue
425
+ # # Handle error
426
+ # end
427
+ # # Backend is still restored to its previous value
428
+ #
429
+ # @example Thread isolation
430
+ # threads = [:mri, :rust].map do |backend_name|
431
+ # Thread.new do
432
+ # TreeHaver.with_backend(backend_name) do
433
+ # # Each thread uses its own backend independently
434
+ # TreeHaver::Parser.new
435
+ # end
436
+ # end
437
+ # end
438
+ # threads.each(&:join)
439
+ #
440
+ # @see #effective_backend
441
+ # @see #current_backend_context
442
+ def with_backend(name)
443
+ raise ArgumentError, "Backend name required" if name.nil?
444
+
445
+ # Get context FIRST to ensure it exists
446
+ ctx = current_backend_context
447
+ old_backend = ctx[:backend]
448
+ old_depth = ctx[:depth]
449
+
450
+ begin
451
+ # Set new backend and increment depth
452
+ ctx[:backend] = name.to_sym
453
+ ctx[:depth] += 1
454
+
455
+ # Execute block
456
+ yield
457
+ ensure
458
+ # Restore previous backend and depth
459
+ # This ensures proper unwinding even with exceptions
460
+ ctx[:backend] = old_backend
461
+ ctx[:depth] = old_depth
462
+ end
463
+ end
464
+
465
+ # Resolve the effective backend considering explicit override
466
+ #
467
+ # Priority: explicit > thread context > global > :auto
468
+ #
469
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
470
+ # @return [Symbol] the backend to use
471
+ # @example
472
+ # TreeHaver.resolve_effective_backend(:ffi) # => :ffi
473
+ # @example With thread-local context
474
+ # TreeHaver.with_backend(:mri) do
475
+ # TreeHaver.resolve_effective_backend(nil) # => :mri
476
+ # TreeHaver.resolve_effective_backend(:ffi) # => :ffi (explicit wins)
477
+ # end
478
+ def resolve_effective_backend(explicit_backend = nil)
479
+ return explicit_backend.to_sym if explicit_backend
480
+ effective_backend
481
+ end
482
+
483
+ # Get backend module for a specific backend (with explicit override)
484
+ #
485
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
486
+ # @return [Module, nil] the backend module or nil if not available
487
+ # @raise [BackendConflict] if the backend conflicts with previously used backends
488
+ # @example
489
+ # mod = TreeHaver.resolve_backend_module(:ffi)
490
+ # mod.capabilities[:backend] # => :ffi
491
+ def resolve_backend_module(explicit_backend = nil)
492
+ # Temporarily override effective backend
493
+ requested = resolve_effective_backend(explicit_backend)
494
+
495
+ mod = case requested
496
+ when :mri
497
+ Backends::MRI
498
+ when :rust
499
+ Backends::Rust
500
+ when :ffi
501
+ Backends::FFI
502
+ when :java
503
+ Backends::Java
504
+ when :citrus
505
+ Backends::Citrus
506
+ when :prism
507
+ Backends::Prism
508
+ when :psych
509
+ Backends::Psych
510
+ when :commonmarker
511
+ Backends::Commonmarker
512
+ when :markly
513
+ Backends::Markly
514
+ when :auto
515
+ backend_module # Fall back to normal resolution for :auto
516
+ else
517
+ # Unknown backend name - return nil to trigger error in caller
518
+ nil
519
+ end
520
+
521
+ # Return nil if the module doesn't exist
522
+ return unless mod
523
+
524
+ # Check for backend conflicts FIRST, before checking availability
525
+ # This is critical because the conflict causes the backend to report unavailable
526
+ # We want to raise a clear error explaining WHY it's unavailable
527
+ # Use the requested backend name directly (not capabilities) because
528
+ # capabilities may be empty when the backend is blocked/unavailable
529
+ check_backend_conflict!(requested) if requested && requested != :auto
530
+
531
+ # Now check if the backend is available
532
+ # Why assume modules without available? are available?
533
+ # - Some backends might be mocked in tests without an available? method
534
+ # - This makes the code more defensive and test-friendly
535
+ # - It allows graceful degradation if a backend module is incomplete
536
+ # - Backward compatibility: if a module doesn't declare availability, assume it works
537
+ return if mod.respond_to?(:available?) && !mod.available?
538
+
539
+ # Record that this backend is being used
540
+ record_backend_usage(requested) if requested && requested != :auto
541
+
542
+ mod
543
+ end
544
+
174
545
  # Determine the concrete backend module to use
175
546
  #
176
547
  # This method performs backend auto-selection when backend is :auto.
@@ -185,7 +556,7 @@ module TreeHaver
185
556
  # puts "Using #{mod.capabilities[:backend]} backend"
186
557
  # end
187
558
  def backend_module
188
- case backend
559
+ case effective_backend # Changed from: backend
189
560
  when :mri
190
561
  Backends::MRI
191
562
  when :rust
@@ -196,6 +567,14 @@ module TreeHaver
196
567
  Backends::Java
197
568
  when :citrus
198
569
  Backends::Citrus
570
+ when :prism
571
+ Backends::Prism
572
+ when :psych
573
+ Backends::Psych
574
+ when :commonmarker
575
+ Backends::Commonmarker
576
+ when :markly
577
+ Backends::Markly
199
578
  else
200
579
  # auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
201
580
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && Backends::Java.available?
@@ -240,44 +619,102 @@ module TreeHaver
240
619
  # Allows opting-in dynamic helpers like TreeHaver::Language.toml without
241
620
  # advertising all names by default.
242
621
 
243
- # Register a language helper by name
622
+ # Register a language helper by name (backend-agnostic)
244
623
  #
245
624
  # After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
246
- # to load the registered language.
625
+ # to load the registered language. TreeHaver will automatically use the appropriate
626
+ # grammar based on the active backend.
627
+ #
628
+ # The `name` parameter is an arbitrary identifier you choose - it doesn't need to
629
+ # match the actual language name. This is useful for:
630
+ # - Testing: Use unique names like `:toml_test` to avoid collisions
631
+ # - Aliasing: Register the same grammar under multiple names
632
+ # - Versioning: Register different grammar versions as `:ruby_2` and `:ruby_3`
633
+ #
634
+ # The actual grammar identity comes from `path`/`symbol` (tree-sitter) or
635
+ # `grammar_module` (Citrus), not from the name.
247
636
  #
248
- # @param name [Symbol, String] language identifier (e.g., :toml, :json)
249
- # @param path [String] absolute path to the language shared library
637
+ # IMPORTANT: This method INTENTIONALLY allows registering BOTH a tree-sitter
638
+ # library AND a Citrus grammar for the same language IN A SINGLE CALL.
639
+ # This is achieved by using separate `if` statements (not `elsif`) and no early
640
+ # returns. This design is deliberate and provides significant benefits:
641
+ #
642
+ # Why register both backends for one language?
643
+ # - Backend flexibility: Code works regardless of which backend is active
644
+ # - Performance testing: Compare tree-sitter vs Citrus performance
645
+ # - Gradual migration: Transition between backends without breaking code
646
+ # - Fallback scenarios: Use Citrus when tree-sitter library unavailable
647
+ # - Platform portability: tree-sitter on Linux/Mac, Citrus on JRuby/Windows
648
+ #
649
+ # The active backend determines which registration is used automatically.
650
+ # No code changes needed to switch backends - just change TreeHaver.backend.
651
+ #
652
+ # @param name [Symbol, String] identifier for this registration (can be any name you choose)
653
+ # @param path [String, nil] absolute path to the language shared library (for tree-sitter)
250
654
  # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
655
+ # @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
656
+ # @param gem_name [String, nil] optional gem name for error messages
251
657
  # @return [void]
252
- # @example Register TOML grammar
658
+ # @example Register tree-sitter grammar only
253
659
  # TreeHaver.register_language(
254
660
  # :toml,
255
661
  # path: "/usr/local/lib/libtree-sitter-toml.so",
256
662
  # symbol: "tree_sitter_toml"
257
663
  # )
258
- def register_language(name, path:, symbol: nil)
259
- LanguageRegistry.register(name, path: path, symbol: symbol)
260
- end
664
+ # @example Register Citrus grammar only
665
+ # TreeHaver.register_language(
666
+ # :toml,
667
+ # grammar_module: TomlRB::Document,
668
+ # gem_name: "toml-rb"
669
+ # )
670
+ # @example Register BOTH backends in separate calls
671
+ # TreeHaver.register_language(
672
+ # :toml,
673
+ # path: "/usr/local/lib/libtree-sitter-toml.so",
674
+ # symbol: "tree_sitter_toml"
675
+ # )
676
+ # TreeHaver.register_language(
677
+ # :toml,
678
+ # grammar_module: TomlRB::Document,
679
+ # gem_name: "toml-rb"
680
+ # )
681
+ # @example Register BOTH backends in ONE call (recommended for maximum flexibility)
682
+ # TreeHaver.register_language(
683
+ # :toml,
684
+ # path: "/usr/local/lib/libtree-sitter-toml.so",
685
+ # symbol: "tree_sitter_toml",
686
+ # grammar_module: TomlRB::Document,
687
+ # gem_name: "toml-rb"
688
+ # )
689
+ # # Now TreeHaver::Language.toml works with ANY backend!
690
+ def register_language(name, path: nil, symbol: nil, grammar_module: nil, gem_name: nil)
691
+ # Register tree-sitter backend if path provided
692
+ # Note: Uses `if` not `elsif` so both backends can be registered in one call
693
+ if path
694
+ LanguageRegistry.register(name, :tree_sitter, path: path, symbol: symbol)
695
+ end
261
696
 
262
- # Unregister a previously registered language helper
263
- #
264
- # @param name [Symbol, String] language identifier to unregister
265
- # @return [void]
266
- # @example
267
- # TreeHaver.unregister_language(:toml)
268
- def unregister_language(name)
269
- LanguageRegistry.unregister(name)
270
- end
697
+ # Register Citrus backend if grammar_module provided
698
+ # Note: Uses `if` not `elsif` so both backends can be registered in one call
699
+ # This allows maximum flexibility - register once, use with any backend
700
+ if grammar_module
701
+ unless grammar_module.respond_to?(:parse)
702
+ raise ArgumentError, "Grammar module must respond to :parse"
703
+ end
271
704
 
272
- # Clear all registered languages
273
- #
274
- # Primarily intended for test cleanup and resetting state.
275
- #
276
- # @return [void]
277
- # @example
278
- # TreeHaver.clear_languages!
279
- def clear_languages!
280
- LanguageRegistry.clear_registrations!
705
+ LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
706
+ end
707
+
708
+ # Require at least one backend to be registered
709
+ if path.nil? && grammar_module.nil?
710
+ raise ArgumentError, "Must provide at least one of: path (tree-sitter) or grammar_module (Citrus)"
711
+ end
712
+
713
+ # Note: No early return! This method intentionally processes both `if` blocks
714
+ # above to allow registering multiple backends for the same language.
715
+ # Both tree-sitter and Citrus can be registered simultaneously for maximum
716
+ # flexibility. See method documentation for rationale.
717
+ nil
281
718
  end
282
719
 
283
720
  # Fetch a registered language entry
@@ -338,6 +775,7 @@ module TreeHaver
338
775
  # @param symbol [String, nil] name of the exported function (defaults to auto-detection)
339
776
  # @param name [String, nil] logical name for the language (used in caching)
340
777
  # @param validate [Boolean] if true, validates path and symbol for safety (default: true)
778
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
341
779
  # @return [Language] loaded language handle
342
780
  # @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
343
781
  # @raise [ArgumentError] if path or symbol fails security validation
@@ -347,7 +785,13 @@ module TreeHaver
347
785
  # symbol: "tree_sitter_toml",
348
786
  # name: "toml"
349
787
  # )
350
- def from_library(path, symbol: nil, name: nil, validate: true)
788
+ # @example With explicit backend
789
+ # language = TreeHaver::Language.from_library(
790
+ # "/usr/local/lib/libtree-sitter-toml.so",
791
+ # symbol: "tree_sitter_toml",
792
+ # backend: :ffi
793
+ # )
794
+ def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
351
795
  if validate
352
796
  unless PathValidator.safe_library_path?(path)
353
797
  errors = PathValidator.validation_errors(path)
@@ -360,11 +804,20 @@ module TreeHaver
360
804
  end
361
805
  end
362
806
 
363
- mod = TreeHaver.backend_module
364
- raise NotAvailable, "No TreeHaver backend is available" unless mod
807
+ mod = TreeHaver.resolve_backend_module(backend)
808
+
809
+ if mod.nil?
810
+ if backend
811
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
812
+ else
813
+ raise NotAvailable, "No TreeHaver backend is available"
814
+ end
815
+ end
816
+
365
817
  # Backend must implement .from_library; fallback to .from_path for older impls
366
- # Include ENV vars in cache key since they affect symbol resolution
367
- key = [path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
818
+ # Include effective backend AND ENV vars in cache key since they affect loading
819
+ effective_b = TreeHaver.resolve_effective_backend(backend)
820
+ key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
368
821
  LanguageRegistry.fetch(key) do
369
822
  if mod::Language.respond_to?(:from_library)
370
823
  mod::Language.from_library(path, symbol: symbol, name: name)
@@ -380,31 +833,102 @@ module TreeHaver
380
833
  # Dynamic helper to load a registered language by name
381
834
  #
382
835
  # After registering a language with {TreeHaver.register_language},
383
- # you can load it using a method call:
836
+ # you can load it using a method call. The appropriate backend will be
837
+ # used based on registration and current backend.
384
838
  #
385
- # @example
839
+ # @example With tree-sitter
386
840
  # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
387
841
  # language = TreeHaver::Language.toml
388
842
  #
389
- # @example With overrides
390
- # language = TreeHaver::Language.toml(path: "/custom/path.so")
843
+ # @example With both backends
844
+ # TreeHaver.register_language(:toml,
845
+ # path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
846
+ # TreeHaver.register_language(:toml,
847
+ # grammar_module: TomlRB::Document)
848
+ # language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
391
849
  #
392
850
  # @param method_name [Symbol] the registered language name
393
- # @param args [Array] positional arguments (first is used as path if provided)
394
- # @param kwargs [Hash] keyword arguments (:path, :symbol, :name)
851
+ # @param args [Array] positional arguments
852
+ # @param kwargs [Hash] keyword arguments
395
853
  # @return [Language] loaded language handle
396
854
  # @raise [NoMethodError] if the language name is not registered
397
855
  def method_missing(method_name, *args, **kwargs, &block)
398
856
  # Resolve only if the language name was registered
399
- reg = TreeHaver.registered_language(method_name)
400
- return super unless reg
401
-
402
- # Allow per-call overrides; otherwise use registered defaults
403
- path = kwargs[:path] || args.first || reg[:path]
404
- raise ArgumentError, "path is required" unless path
405
- symbol = kwargs.key?(:symbol) ? kwargs[:symbol] : (reg[:symbol] || "tree_sitter_#{method_name}")
406
- name = kwargs[:name] || method_name.to_s
407
- from_library(path, symbol: symbol, name: name)
857
+ all_backends = TreeHaver.registered_language(method_name)
858
+ return super unless all_backends
859
+
860
+ # Check current backend
861
+ current_backend = TreeHaver.backend_module
862
+
863
+ # Determine which backend type to use
864
+ backend_type = if current_backend == Backends::Citrus
865
+ :citrus
866
+ else
867
+ :tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
868
+ end
869
+
870
+ # Get backend-specific registration
871
+ reg = all_backends[backend_type]
872
+
873
+ # If Citrus backend is active
874
+ if backend_type == :citrus
875
+ if reg && reg[:grammar_module]
876
+ return Backends::Citrus::Language.new(reg[:grammar_module])
877
+ end
878
+
879
+ # Fall back to error if no Citrus grammar registered
880
+ raise NotAvailable,
881
+ "Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
882
+ "Either register a Citrus grammar or use a tree-sitter backend. " \
883
+ "Registered backends: #{all_backends.keys.inspect}"
884
+ end
885
+
886
+ # For tree-sitter backends, try to load from path
887
+ # If that fails, fall back to Citrus if available
888
+ if reg && reg[:path]
889
+ path = kwargs[:path] || args.first || reg[:path]
890
+ # Symbol priority: kwargs override > registration > derive from method_name
891
+ symbol = if kwargs.key?(:symbol)
892
+ kwargs[:symbol]
893
+ elsif reg[:symbol]
894
+ reg[:symbol]
895
+ else
896
+ "tree_sitter_#{method_name}"
897
+ end
898
+ # Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
899
+ # Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
900
+ # e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
901
+ name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
902
+
903
+ begin
904
+ return from_library(path, symbol: symbol, name: name)
905
+ rescue NotAvailable, ArgumentError, LoadError, FFI::NotFoundError => _e
906
+ # Tree-sitter failed to load - check for Citrus fallback
907
+ # This handles cases where:
908
+ # - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
909
+ # - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
910
+ # - Invalid arguments were provided (ArgumentError)
911
+ citrus_reg = all_backends[:citrus]
912
+ if citrus_reg && citrus_reg[:grammar_module]
913
+ return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
914
+ end
915
+ # No Citrus fallback available, re-raise the original error
916
+ raise
917
+ end
918
+ end
919
+
920
+ # No tree-sitter path registered - check for Citrus fallback
921
+ # This enables auto-fallback when tree-sitter grammar is not installed
922
+ # but a Citrus grammar (pure Ruby) is available
923
+ citrus_reg = all_backends[:citrus]
924
+ if citrus_reg && citrus_reg[:grammar_module]
925
+ return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
926
+ end
927
+
928
+ # No appropriate registration found
929
+ raise ArgumentError,
930
+ "No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
931
+ "Registered backends: #{all_backends.keys.inspect}"
408
932
  end
409
933
 
410
934
  # @api private
@@ -419,6 +943,29 @@ module TreeHaver
419
943
  # A Parser is used to parse source code into a syntax tree. You must
420
944
  # set a language before parsing.
421
945
  #
946
+ # == Wrapping/Unwrapping Responsibility
947
+ #
948
+ # TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
949
+ #
950
+ # **Language objects:**
951
+ # - Unwraps Language wrappers before passing to backend.language=
952
+ # - MRI backend receives ::TreeSitter::Language
953
+ # - Rust backend receives String (language name)
954
+ # - FFI backend receives wrapped Language (needs to_ptr)
955
+ #
956
+ # **Tree objects:**
957
+ # - parse() receives raw source, backend returns raw tree, Parser wraps it
958
+ # - parse_string() unwraps old_tree before passing to backend, wraps returned tree
959
+ # - Backends always work with raw backend trees, never TreeHaver::Tree
960
+ #
961
+ # **Node objects:**
962
+ # - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
963
+ #
964
+ # This design ensures:
965
+ # - Principle of Least Surprise: wrapping happens at boundaries, consistently
966
+ # - Backends are simple: they don't need to know about TreeHaver wrappers
967
+ # - Single Responsibility: wrapping logic is only in TreeHaver::Parser
968
+ #
422
969
  # @example Basic parsing
423
970
  # parser = TreeHaver::Parser.new
424
971
  # parser.language = TreeHaver::Language.toml
@@ -426,11 +973,76 @@ module TreeHaver
426
973
  class Parser
427
974
  # Create a new parser instance
428
975
  #
429
- # @raise [NotAvailable] if no backend is available
430
- def initialize
431
- mod = TreeHaver.backend_module
432
- raise NotAvailable, "No TreeHaver backend is available" unless mod
433
- @impl = mod::Parser.new
976
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
977
+ # @raise [NotAvailable] if no backend is available or requested backend is unavailable
978
+ # @example Default (uses context/global)
979
+ # parser = TreeHaver::Parser.new
980
+ # @example Explicit backend
981
+ # parser = TreeHaver::Parser.new(backend: :ffi)
982
+ def initialize(backend: nil)
983
+ # Convert string backend names to symbols for consistency
984
+ backend = backend.to_sym if backend.is_a?(String)
985
+
986
+ mod = TreeHaver.resolve_backend_module(backend)
987
+
988
+ if mod.nil?
989
+ if backend
990
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
991
+ else
992
+ raise NotAvailable, "No TreeHaver backend is available"
993
+ end
994
+ end
995
+
996
+ # Try to create the parser, with fallback to Citrus if tree-sitter fails
997
+ # This enables auto-fallback when tree-sitter runtime isn't available
998
+ begin
999
+ @impl = mod::Parser.new
1000
+ @explicit_backend = backend # Remember for introspection (always a Symbol or nil)
1001
+ rescue NoMethodError, FFI::NotFoundError, LoadError => e
1002
+ # Tree-sitter backend failed (likely missing runtime library)
1003
+ # Try Citrus as fallback if we weren't explicitly asked for a specific backend
1004
+ if backend.nil? || backend == :auto
1005
+ if Backends::Citrus.available?
1006
+ @impl = Backends::Citrus::Parser.new
1007
+ @explicit_backend = :citrus
1008
+ else
1009
+ # No fallback available, re-raise original error
1010
+ raise NotAvailable, "Tree-sitter backend failed: #{e.message}. " \
1011
+ "Citrus fallback not available. Install tree-sitter runtime or citrus gem."
1012
+ end
1013
+ else
1014
+ # Explicit backend was requested, don't fallback
1015
+ raise
1016
+ end
1017
+ end
1018
+ end
1019
+
1020
+ # Get the backend this parser is using (for introspection)
1021
+ #
1022
+ # Returns the actual backend in use, resolving :auto to the concrete backend.
1023
+ #
1024
+ # @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
1025
+ def backend
1026
+ if @explicit_backend && @explicit_backend != :auto
1027
+ @explicit_backend
1028
+ else
1029
+ # Determine actual backend from the implementation class
1030
+ case @impl.class.name
1031
+ when /MRI/
1032
+ :mri
1033
+ when /Rust/
1034
+ :rust
1035
+ when /FFI/
1036
+ :ffi
1037
+ when /Java/
1038
+ :java
1039
+ when /Citrus/
1040
+ :citrus
1041
+ else
1042
+ # Fallback to effective_backend if we can't determine from class name
1043
+ TreeHaver.effective_backend
1044
+ end
1045
+ end
434
1046
  end
435
1047
 
436
1048
  # Set the language grammar for this parser
@@ -440,9 +1052,154 @@ module TreeHaver
440
1052
  # @example
441
1053
  # parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
442
1054
  def language=(lang)
443
- @impl.language = lang
1055
+ # Check if this is a Citrus language - if so, we need a Citrus parser
1056
+ # This enables automatic backend switching when tree-sitter fails and
1057
+ # falls back to Citrus
1058
+ if lang.is_a?(Backends::Citrus::Language)
1059
+ unless @impl.is_a?(Backends::Citrus::Parser)
1060
+ # Switch to Citrus parser to match the Citrus language
1061
+ @impl = Backends::Citrus::Parser.new
1062
+ @explicit_backend = :citrus
1063
+ end
1064
+ end
1065
+
1066
+ # Unwrap the language before passing to backend
1067
+ # Backends receive raw language objects, never TreeHaver wrappers
1068
+ inner_lang = unwrap_language(lang)
1069
+ @impl.language = inner_lang
1070
+ # Return the original (possibly wrapped) language for consistency
1071
+ lang # rubocop:disable Lint/Void (intentional return value)
444
1072
  end
445
1073
 
1074
+ private
1075
+
1076
+ # Unwrap a language object to extract the raw backend language
1077
+ #
1078
+ # This method is smart about backend compatibility:
1079
+ # 1. If language has a backend attribute, checks if it matches current backend
1080
+ # 2. If mismatch detected, attempts to reload language for correct backend
1081
+ # 3. If reload successful, uses new language; otherwise continues with original
1082
+ # 4. Unwraps the language wrapper to get raw backend object
1083
+ #
1084
+ # @param lang [Object] wrapped or raw language object
1085
+ # @return [Object] raw backend language object appropriate for current backend
1086
+ # @api private
1087
+ def unwrap_language(lang)
1088
+ # Check if this is a TreeHaver language wrapper with backend info
1089
+ if lang.respond_to?(:backend)
1090
+ # Verify backend compatibility FIRST
1091
+ # This prevents passing languages from wrong backends to native code
1092
+ # Exception: :auto backend is permissive - accepts any language
1093
+ current_backend = backend
1094
+
1095
+ if lang.backend != current_backend && current_backend != :auto
1096
+ # Backend mismatch! Try to reload for correct backend
1097
+ reloaded = try_reload_language_for_backend(lang, current_backend)
1098
+ if reloaded
1099
+ lang = reloaded
1100
+ else
1101
+ # Couldn't reload - this is an error
1102
+ raise TreeHaver::Error,
1103
+ "Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
1104
+ "Cannot reload language for correct backend. " \
1105
+ "Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
1106
+ end
1107
+ end
1108
+
1109
+ # Get the current parser's language (if set)
1110
+ current_lang = @impl.respond_to?(:language) ? @impl.language : nil
1111
+
1112
+ # Language mismatch detected! The parser might have a different language set
1113
+ # Compare the actual language objects using Comparable
1114
+ if current_lang && lang != current_lang
1115
+ # Different language being set (e.g., switching from TOML to JSON)
1116
+ # This is fine, just informational
1117
+ end
1118
+ end
1119
+
1120
+ # Unwrap based on backend type
1121
+ # All TreeHaver Language wrappers have the backend attribute
1122
+ unless lang.respond_to?(:backend)
1123
+ # This shouldn't happen - all our wrappers have backend attribute
1124
+ # If we get here, it's likely a raw backend object that was passed directly
1125
+ raise TreeHaver::Error,
1126
+ "Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
1127
+ "Use TreeHaver::Language.from_library to create language objects."
1128
+ end
1129
+
1130
+ case lang.backend
1131
+ when :mri
1132
+ return lang.to_language if lang.respond_to?(:to_language)
1133
+ return lang.inner_language if lang.respond_to?(:inner_language)
1134
+ when :rust
1135
+ return lang.name if lang.respond_to?(:name)
1136
+ when :ffi
1137
+ return lang # FFI needs wrapper for to_ptr
1138
+ when :java
1139
+ return lang.impl if lang.respond_to?(:impl)
1140
+ when :citrus
1141
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
1142
+ when :prism
1143
+ return lang # Prism backend expects the Language wrapper
1144
+ when :psych
1145
+ return lang # Psych backend expects the Language wrapper
1146
+ when :commonmarker
1147
+ return lang # Commonmarker backend expects the Language wrapper
1148
+ when :markly
1149
+ return lang # Markly backend expects the Language wrapper
1150
+ else
1151
+ # Unknown backend (e.g., test backend)
1152
+ # Try generic unwrapping methods for flexibility in testing
1153
+ return lang.to_language if lang.respond_to?(:to_language)
1154
+ return lang.inner_language if lang.respond_to?(:inner_language)
1155
+ return lang.impl if lang.respond_to?(:impl)
1156
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
1157
+ return lang.name if lang.respond_to?(:name)
1158
+
1159
+ # If nothing works, pass through as-is
1160
+ # This allows test languages to be passed directly
1161
+ return lang
1162
+ end
1163
+
1164
+ # Shouldn't reach here, but just in case
1165
+ lang
1166
+ end
1167
+
1168
+ # Try to reload a language for the current backend
1169
+ #
1170
+ # This handles the case where a language was loaded for one backend,
1171
+ # but is now being used with a different backend (e.g., after backend switch).
1172
+ #
1173
+ # @param lang [Object] language object with metadata
1174
+ # @param target_backend [Symbol] backend to reload for
1175
+ # @return [Object, nil] reloaded language or nil if reload not possible
1176
+ # @api private
1177
+ def try_reload_language_for_backend(lang, target_backend)
1178
+ # Can't reload without path information
1179
+ return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
1180
+
1181
+ # For tree-sitter backends, reload from path
1182
+ if lang.respond_to?(:path) && lang.path
1183
+ begin
1184
+ # Use Language.from_library which respects current backend
1185
+ return Language.from_library(
1186
+ lang.path,
1187
+ symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
1188
+ name: lang.respond_to?(:name) ? lang.name : nil,
1189
+ )
1190
+ rescue => e
1191
+ # Reload failed, continue with original
1192
+ warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
1193
+ return
1194
+ end
1195
+ end
1196
+
1197
+ # For Citrus, can't really reload as it's just a module reference
1198
+ nil
1199
+ end
1200
+
1201
+ public
1202
+
446
1203
  # Parse source code into a syntax tree
447
1204
  #
448
1205
  # @param source [String] the source code to parse (should be UTF-8)
@@ -452,7 +1209,8 @@ module TreeHaver
452
1209
  # puts tree.root_node.type
453
1210
  def parse(source)
454
1211
  tree_impl = @impl.parse(source)
455
- Tree.new(tree_impl)
1212
+ # Wrap backend tree with source so Node#text works
1213
+ Tree.new(tree_impl, source: source)
456
1214
  end
457
1215
 
458
1216
  # Parse source code into a syntax tree (with optional incremental parsing)
@@ -501,10 +1259,12 @@ module TreeHaver
501
1259
  old_tree
502
1260
  end
503
1261
  tree_impl = @impl.parse_string(old_impl, source)
504
- Tree.new(tree_impl)
1262
+ # Wrap backend tree with source so Node#text works
1263
+ Tree.new(tree_impl, source: source)
505
1264
  elsif @impl.respond_to?(:parse_string)
506
1265
  tree_impl = @impl.parse_string(nil, source)
507
- Tree.new(tree_impl)
1266
+ # Wrap backend tree with source so Node#text works
1267
+ Tree.new(tree_impl, source: source)
508
1268
  else
509
1269
  # Fallback for backends that don't support parse_string
510
1270
  parse(source)