tree_haver 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tree_haver.rb CHANGED
@@ -3,6 +3,9 @@
3
3
  # External gems
4
4
  require "version_gem"
5
5
 
6
+ # Standard library
7
+ require "set"
8
+
6
9
  # This gem
7
10
  require_relative "tree_haver/version"
8
11
  require_relative "tree_haver/language_registry"
@@ -58,11 +61,14 @@ require_relative "tree_haver/language_registry"
58
61
  # @see GrammarFinder For automatic grammar library discovery
59
62
  module TreeHaver
60
63
  # Base error class for TreeHaver exceptions
64
+ # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
61
65
  #
62
66
  # @abstract Subclass to create specific error types
63
- class Error < StandardError; end
67
+ class Error < Exception; end # rubocop:disable Lint/InheritException
64
68
 
65
69
  # Raised when a requested backend or feature is not available
70
+ # These are serious errors that extends Exception (not StandardError).
71
+ # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
66
72
  #
67
73
  # This can occur when:
68
74
  # - Required native libraries are not installed
@@ -77,6 +83,30 @@ module TreeHaver
77
83
  # end
78
84
  class NotAvailable < Error; end
79
85
 
86
+ # Raised when attempting to use backends that are known to conflict
87
+ #
88
+ # This is a serious error that extends Exception (not StandardError) because
89
+ # it prevents a segmentation fault. The MRI backend (ruby_tree_sitter) and
90
+ # FFI backend cannot coexist in the same process - once MRI loads, FFI will
91
+ # segfault when trying to set a language on a parser.
92
+ #
93
+ # This protection can be disabled with `TreeHaver.backend_protect = false`
94
+ # but doing so risks segfaults.
95
+ #
96
+ # @example Handling backend conflicts
97
+ # begin
98
+ # # This will raise if MRI was already used
99
+ # TreeHaver.with_backend(:ffi) { parser.language = lang }
100
+ # rescue TreeHaver::BackendConflict => e
101
+ # puts "Backend conflict: #{e.message}"
102
+ # # Fall back to a compatible backend
103
+ # end
104
+ #
105
+ # @example Disabling protection (not recommended)
106
+ # TreeHaver.backend_protect = false
107
+ # # Now you can test backend conflicts (at risk of segfaults)
108
+ class BackendConflict < Error; end
109
+
80
110
  # Namespace for backend implementations
81
111
  #
82
112
  # TreeHaver provides multiple backends to support different Ruby implementations:
@@ -91,6 +121,21 @@ module TreeHaver
91
121
  autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
92
122
  autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
93
123
  autoload :Citrus, File.join(__dir__, "tree_haver", "backends", "citrus")
124
+
125
+ # Known backend conflicts
126
+ #
127
+ # Maps each backend to an array of backends that block it from working.
128
+ # For example, :ffi is blocked by :mri because once ruby_tree_sitter loads,
129
+ # FFI calls to ts_parser_set_language will segfault.
130
+ #
131
+ # @return [Hash{Symbol => Array<Symbol>}]
132
+ BLOCKED_BY = {
133
+ mri: [],
134
+ rust: [],
135
+ ffi: [:mri], # FFI segfaults if MRI (ruby_tree_sitter) has been loaded
136
+ java: [],
137
+ citrus: [],
138
+ }.freeze
94
139
  end
95
140
 
96
141
  # Security utilities for validating paths before loading shared libraries
@@ -121,6 +166,19 @@ module TreeHaver
121
166
  # @see PathValidator
122
167
  autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
123
168
 
169
+ # Citrus grammar finder for discovering and registering Citrus-based parsers
170
+ #
171
+ # @example Register toml-rb
172
+ # finder = TreeHaver::CitrusGrammarFinder.new(
173
+ # language: :toml,
174
+ # gem_name: "toml-rb",
175
+ # grammar_const: "TomlRB::Document"
176
+ # )
177
+ # finder.register! if finder.available?
178
+ #
179
+ # @see CitrusGrammarFinder
180
+ autoload :CitrusGrammarFinder, File.join(__dir__, "tree_haver", "citrus_grammar_finder")
181
+
124
182
  # Unified Node wrapper providing consistent API across backends
125
183
  autoload :Node, File.join(__dir__, "tree_haver", "node")
126
184
 
@@ -132,6 +190,74 @@ module TreeHaver
132
190
  # @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
133
191
  # @note Can be set via ENV["TREE_HAVER_BACKEND"]
134
192
  class << self
193
+ # Whether backend conflict protection is enabled
194
+ #
195
+ # When true (default), TreeHaver will raise BackendConflict if you try to
196
+ # use a backend that is known to conflict with a previously used backend.
197
+ # For example, FFI will not work after MRI has been used.
198
+ #
199
+ # Set to false to disable protection (useful for testing compatibility).
200
+ #
201
+ # @return [Boolean]
202
+ # @example Disable protection for testing
203
+ # TreeHaver.backend_protect = false
204
+ attr_writer :backend_protect
205
+
206
+ # Check if backend conflict protection is enabled
207
+ #
208
+ # @return [Boolean] true if protection is enabled (default)
209
+ def backend_protect?
210
+ return @backend_protect if defined?(@backend_protect) # rubocop:disable ThreadSafety/ClassInstanceVariable
211
+ true # Default is protected
212
+ end
213
+
214
+ # Alias for backend_protect?
215
+ def backend_protect
216
+ backend_protect?
217
+ end
218
+
219
+ # Track which backends have been used in this process
220
+ #
221
+ # @return [Set<Symbol>] set of backend symbols that have been used
222
+ def backends_used
223
+ @backends_used ||= Set.new # rubocop:disable ThreadSafety/ClassInstanceVariable
224
+ end
225
+
226
+ # Record that a backend has been used
227
+ #
228
+ # @param backend [Symbol] the backend that was used
229
+ # @return [void]
230
+ # @api private
231
+ def record_backend_usage(backend)
232
+ backends_used << backend
233
+ end
234
+
235
+ # Check if a backend would conflict with previously used backends
236
+ #
237
+ # @param backend [Symbol] the backend to check
238
+ # @return [Array<Symbol>] list of previously used backends that block this one
239
+ def conflicting_backends_for(backend)
240
+ blockers = Backends::BLOCKED_BY[backend] || []
241
+ blockers & backends_used.to_a
242
+ end
243
+
244
+ # Check if using a backend would cause a conflict
245
+ #
246
+ # @param backend [Symbol] the backend to check
247
+ # @raise [BackendConflict] if protection is enabled and there's a conflict
248
+ # @return [void]
249
+ def check_backend_conflict!(backend)
250
+ return unless backend_protect?
251
+
252
+ conflicts = conflicting_backends_for(backend)
253
+ return if conflicts.empty?
254
+
255
+ raise BackendConflict,
256
+ "Cannot use #{backend} backend: it is blocked by previously used backend(s): #{conflicts.join(", ")}. " \
257
+ "The #{backend} backend will segfault when #{conflicts.first} has already loaded. " \
258
+ "To disable this protection (at risk of segfaults), set TreeHaver.backend_protect = false"
259
+ end
260
+
135
261
  # @example
136
262
  # TreeHaver.backend # => :auto
137
263
  def backend
@@ -171,6 +297,203 @@ module TreeHaver
171
297
  @backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
172
298
  end
173
299
 
300
+ # Thread-local backend context storage
301
+ #
302
+ # Returns a hash containing the thread-local backend context with keys:
303
+ # - :backend - The backend name (Symbol) or nil if using global default
304
+ # - :depth - The nesting depth (Integer) for proper cleanup
305
+ #
306
+ # @return [Hash{Symbol => Object}] context hash with :backend and :depth keys
307
+ # @example
308
+ # ctx = TreeHaver.current_backend_context
309
+ # ctx[:backend] # => nil or :ffi, :mri, etc.
310
+ # ctx[:depth] # => 0, 1, 2, etc.
311
+ def current_backend_context
312
+ Thread.current[:tree_haver_backend_context] ||= {
313
+ backend: nil, # nil means "use global default"
314
+ depth: 0, # Track nesting depth for proper cleanup
315
+ }
316
+ end
317
+
318
+ # Get the effective backend for current context
319
+ #
320
+ # Priority: thread-local context → global @backend → :auto
321
+ #
322
+ # @return [Symbol] the backend to use
323
+ # @example
324
+ # TreeHaver.effective_backend # => :auto (default)
325
+ # @example With thread-local context
326
+ # TreeHaver.with_backend(:ffi) do
327
+ # TreeHaver.effective_backend # => :ffi
328
+ # end
329
+ def effective_backend
330
+ ctx = current_backend_context
331
+ ctx[:backend] || backend || :auto
332
+ end
333
+
334
+ # Execute a block with a specific backend in thread-local context
335
+ #
336
+ # This method provides temporary, thread-safe backend switching for a block of code.
337
+ # The backend setting is automatically restored when the block exits, even if
338
+ # an exception is raised. Supports nesting—inner blocks override outer blocks,
339
+ # and each level is properly unwound.
340
+ #
341
+ # Thread Safety: Each thread maintains its own backend context, so concurrent
342
+ # threads can safely use different backends without interfering with each other.
343
+ #
344
+ # Use Cases:
345
+ # - Testing: Test the same code path with different backends
346
+ # - Performance comparison: Benchmark parsing with different backends
347
+ # - Fallback scenarios: Try one backend, fall back to another on failure
348
+ # - Thread isolation: Different threads can use different backends safely
349
+ #
350
+ # @param name [Symbol, String] backend name (:mri, :rust, :ffi, :java, :citrus, :auto)
351
+ # @yield block to execute with the specified backend
352
+ # @return [Object] the return value of the block
353
+ # @raise [ArgumentError] if backend name is nil
354
+ # @raise [BackendConflict] if the requested backend conflicts with a previously used backend
355
+ #
356
+ # @example Basic usage
357
+ # TreeHaver.with_backend(:mri) do
358
+ # parser = TreeHaver::Parser.new
359
+ # tree = parser.parse(source)
360
+ # end
361
+ # # Backend is automatically restored here
362
+ #
363
+ # @example Nested blocks (inner overrides outer)
364
+ # TreeHaver.with_backend(:rust) do
365
+ # parser1 = TreeHaver::Parser.new # Uses :rust
366
+ # TreeHaver.with_backend(:citrus) do
367
+ # parser2 = TreeHaver::Parser.new # Uses :citrus
368
+ # end
369
+ # parser3 = TreeHaver::Parser.new # Back to :rust
370
+ # end
371
+ #
372
+ # @example Testing multiple backends
373
+ # [:mri, :rust, :citrus].each do |backend_name|
374
+ # TreeHaver.with_backend(backend_name) do
375
+ # parser = TreeHaver::Parser.new
376
+ # result = parser.parse(source)
377
+ # puts "#{backend_name}: #{result.root_node.type}"
378
+ # end
379
+ # end
380
+ #
381
+ # @example Exception safety (backend restored even on error)
382
+ # TreeHaver.with_backend(:mri) do
383
+ # raise "Something went wrong"
384
+ # rescue
385
+ # # Handle error
386
+ # end
387
+ # # Backend is still restored to its previous value
388
+ #
389
+ # @example Thread isolation
390
+ # threads = [:mri, :rust].map do |backend_name|
391
+ # Thread.new do
392
+ # TreeHaver.with_backend(backend_name) do
393
+ # # Each thread uses its own backend independently
394
+ # TreeHaver::Parser.new
395
+ # end
396
+ # end
397
+ # end
398
+ # threads.each(&:join)
399
+ #
400
+ # @see #effective_backend
401
+ # @see #current_backend_context
402
+ def with_backend(name)
403
+ raise ArgumentError, "Backend name required" if name.nil?
404
+
405
+ # Get context FIRST to ensure it exists
406
+ ctx = current_backend_context
407
+ old_backend = ctx[:backend]
408
+ old_depth = ctx[:depth]
409
+
410
+ begin
411
+ # Set new backend and increment depth
412
+ ctx[:backend] = name.to_sym
413
+ ctx[:depth] += 1
414
+
415
+ # Execute block
416
+ yield
417
+ ensure
418
+ # Restore previous backend and depth
419
+ # This ensures proper unwinding even with exceptions
420
+ ctx[:backend] = old_backend
421
+ ctx[:depth] = old_depth
422
+ end
423
+ end
424
+
425
+ # Resolve the effective backend considering explicit override
426
+ #
427
+ # Priority: explicit > thread context > global > :auto
428
+ #
429
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
430
+ # @return [Symbol] the backend to use
431
+ # @example
432
+ # TreeHaver.resolve_effective_backend(:ffi) # => :ffi
433
+ # @example With thread-local context
434
+ # TreeHaver.with_backend(:mri) do
435
+ # TreeHaver.resolve_effective_backend(nil) # => :mri
436
+ # TreeHaver.resolve_effective_backend(:ffi) # => :ffi (explicit wins)
437
+ # end
438
+ def resolve_effective_backend(explicit_backend = nil)
439
+ return explicit_backend.to_sym if explicit_backend
440
+ effective_backend
441
+ end
442
+
443
+ # Get backend module for a specific backend (with explicit override)
444
+ #
445
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
446
+ # @return [Module, nil] the backend module or nil if not available
447
+ # @raise [BackendConflict] if the backend conflicts with previously used backends
448
+ # @example
449
+ # mod = TreeHaver.resolve_backend_module(:ffi)
450
+ # mod.capabilities[:backend] # => :ffi
451
+ def resolve_backend_module(explicit_backend = nil)
452
+ # Temporarily override effective backend
453
+ requested = resolve_effective_backend(explicit_backend)
454
+
455
+ mod = case requested
456
+ when :mri
457
+ Backends::MRI
458
+ when :rust
459
+ Backends::Rust
460
+ when :ffi
461
+ Backends::FFI
462
+ when :java
463
+ Backends::Java
464
+ when :citrus
465
+ Backends::Citrus
466
+ when :auto
467
+ backend_module # Fall back to normal resolution for :auto
468
+ else
469
+ # Unknown backend name - return nil to trigger error in caller
470
+ nil
471
+ end
472
+
473
+ # Return nil if the module doesn't exist
474
+ return unless mod
475
+
476
+ # Check for backend conflicts FIRST, before checking availability
477
+ # This is critical because the conflict causes the backend to report unavailable
478
+ # We want to raise a clear error explaining WHY it's unavailable
479
+ # Use the requested backend name directly (not capabilities) because
480
+ # capabilities may be empty when the backend is blocked/unavailable
481
+ check_backend_conflict!(requested) if requested && requested != :auto
482
+
483
+ # Now check if the backend is available
484
+ # Why assume modules without available? are available?
485
+ # - Some backends might be mocked in tests without an available? method
486
+ # - This makes the code more defensive and test-friendly
487
+ # - It allows graceful degradation if a backend module is incomplete
488
+ # - Backward compatibility: if a module doesn't declare availability, assume it works
489
+ return if mod.respond_to?(:available?) && !mod.available?
490
+
491
+ # Record that this backend is being used
492
+ record_backend_usage(requested) if requested && requested != :auto
493
+
494
+ mod
495
+ end
496
+
174
497
  # Determine the concrete backend module to use
175
498
  #
176
499
  # This method performs backend auto-selection when backend is :auto.
@@ -185,7 +508,7 @@ module TreeHaver
185
508
  # puts "Using #{mod.capabilities[:backend]} backend"
186
509
  # end
187
510
  def backend_module
188
- case backend
511
+ case effective_backend # Changed from: backend
189
512
  when :mri
190
513
  Backends::MRI
191
514
  when :rust
@@ -240,44 +563,102 @@ module TreeHaver
240
563
  # Allows opting-in dynamic helpers like TreeHaver::Language.toml without
241
564
  # advertising all names by default.
242
565
 
243
- # Register a language helper by name
566
+ # Register a language helper by name (backend-agnostic)
244
567
  #
245
568
  # After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
246
- # to load the registered language.
569
+ # to load the registered language. TreeHaver will automatically use the appropriate
570
+ # grammar based on the active backend.
571
+ #
572
+ # The `name` parameter is an arbitrary identifier you choose - it doesn't need to
573
+ # match the actual language name. This is useful for:
574
+ # - Testing: Use unique names like `:toml_test` to avoid collisions
575
+ # - Aliasing: Register the same grammar under multiple names
576
+ # - Versioning: Register different grammar versions as `:ruby_2` and `:ruby_3`
247
577
  #
248
- # @param name [Symbol, String] language identifier (e.g., :toml, :json)
249
- # @param path [String] absolute path to the language shared library
578
+ # The actual grammar identity comes from `path`/`symbol` (tree-sitter) or
579
+ # `grammar_module` (Citrus), not from the name.
580
+ #
581
+ # IMPORTANT: This method INTENTIONALLY allows registering BOTH a tree-sitter
582
+ # library AND a Citrus grammar for the same language IN A SINGLE CALL.
583
+ # This is achieved by using separate `if` statements (not `elsif`) and no early
584
+ # returns. This design is deliberate and provides significant benefits:
585
+ #
586
+ # Why register both backends for one language?
587
+ # - Backend flexibility: Code works regardless of which backend is active
588
+ # - Performance testing: Compare tree-sitter vs Citrus performance
589
+ # - Gradual migration: Transition between backends without breaking code
590
+ # - Fallback scenarios: Use Citrus when tree-sitter library unavailable
591
+ # - Platform portability: tree-sitter on Linux/Mac, Citrus on JRuby/Windows
592
+ #
593
+ # The active backend determines which registration is used automatically.
594
+ # No code changes needed to switch backends - just change TreeHaver.backend.
595
+ #
596
+ # @param name [Symbol, String] identifier for this registration (can be any name you choose)
597
+ # @param path [String, nil] absolute path to the language shared library (for tree-sitter)
250
598
  # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
599
+ # @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
600
+ # @param gem_name [String, nil] optional gem name for error messages
251
601
  # @return [void]
252
- # @example Register TOML grammar
602
+ # @example Register tree-sitter grammar only
253
603
  # TreeHaver.register_language(
254
604
  # :toml,
255
605
  # path: "/usr/local/lib/libtree-sitter-toml.so",
256
606
  # symbol: "tree_sitter_toml"
257
607
  # )
258
- def register_language(name, path:, symbol: nil)
259
- LanguageRegistry.register(name, path: path, symbol: symbol)
260
- end
608
+ # @example Register Citrus grammar only
609
+ # TreeHaver.register_language(
610
+ # :toml,
611
+ # grammar_module: TomlRB::Document,
612
+ # gem_name: "toml-rb"
613
+ # )
614
+ # @example Register BOTH backends in separate calls
615
+ # TreeHaver.register_language(
616
+ # :toml,
617
+ # path: "/usr/local/lib/libtree-sitter-toml.so",
618
+ # symbol: "tree_sitter_toml"
619
+ # )
620
+ # TreeHaver.register_language(
621
+ # :toml,
622
+ # grammar_module: TomlRB::Document,
623
+ # gem_name: "toml-rb"
624
+ # )
625
+ # @example Register BOTH backends in ONE call (recommended for maximum flexibility)
626
+ # TreeHaver.register_language(
627
+ # :toml,
628
+ # path: "/usr/local/lib/libtree-sitter-toml.so",
629
+ # symbol: "tree_sitter_toml",
630
+ # grammar_module: TomlRB::Document,
631
+ # gem_name: "toml-rb"
632
+ # )
633
+ # # Now TreeHaver::Language.toml works with ANY backend!
634
+ def register_language(name, path: nil, symbol: nil, grammar_module: nil, gem_name: nil)
635
+ # Register tree-sitter backend if path provided
636
+ # Note: Uses `if` not `elsif` so both backends can be registered in one call
637
+ if path
638
+ LanguageRegistry.register(name, :tree_sitter, path: path, symbol: symbol)
639
+ end
261
640
 
262
- # Unregister a previously registered language helper
263
- #
264
- # @param name [Symbol, String] language identifier to unregister
265
- # @return [void]
266
- # @example
267
- # TreeHaver.unregister_language(:toml)
268
- def unregister_language(name)
269
- LanguageRegistry.unregister(name)
270
- end
641
+ # Register Citrus backend if grammar_module provided
642
+ # Note: Uses `if` not `elsif` so both backends can be registered in one call
643
+ # This allows maximum flexibility - register once, use with any backend
644
+ if grammar_module
645
+ unless grammar_module.respond_to?(:parse)
646
+ raise ArgumentError, "Grammar module must respond to :parse"
647
+ end
271
648
 
272
- # Clear all registered languages
273
- #
274
- # Primarily intended for test cleanup and resetting state.
275
- #
276
- # @return [void]
277
- # @example
278
- # TreeHaver.clear_languages!
279
- def clear_languages!
280
- LanguageRegistry.clear_registrations!
649
+ LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
650
+ end
651
+
652
+ # Require at least one backend to be registered
653
+ if path.nil? && grammar_module.nil?
654
+ raise ArgumentError, "Must provide at least one of: path (tree-sitter) or grammar_module (Citrus)"
655
+ end
656
+
657
+ # Note: No early return! This method intentionally processes both `if` blocks
658
+ # above to allow registering multiple backends for the same language.
659
+ # Both tree-sitter and Citrus can be registered simultaneously for maximum
660
+ # flexibility. See method documentation for rationale.
661
+ nil
281
662
  end
282
663
 
283
664
  # Fetch a registered language entry
@@ -338,6 +719,7 @@ module TreeHaver
338
719
  # @param symbol [String, nil] name of the exported function (defaults to auto-detection)
339
720
  # @param name [String, nil] logical name for the language (used in caching)
340
721
  # @param validate [Boolean] if true, validates path and symbol for safety (default: true)
722
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
341
723
  # @return [Language] loaded language handle
342
724
  # @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
343
725
  # @raise [ArgumentError] if path or symbol fails security validation
@@ -347,7 +729,13 @@ module TreeHaver
347
729
  # symbol: "tree_sitter_toml",
348
730
  # name: "toml"
349
731
  # )
350
- def from_library(path, symbol: nil, name: nil, validate: true)
732
+ # @example With explicit backend
733
+ # language = TreeHaver::Language.from_library(
734
+ # "/usr/local/lib/libtree-sitter-toml.so",
735
+ # symbol: "tree_sitter_toml",
736
+ # backend: :ffi
737
+ # )
738
+ def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
351
739
  if validate
352
740
  unless PathValidator.safe_library_path?(path)
353
741
  errors = PathValidator.validation_errors(path)
@@ -360,11 +748,20 @@ module TreeHaver
360
748
  end
361
749
  end
362
750
 
363
- mod = TreeHaver.backend_module
364
- raise NotAvailable, "No TreeHaver backend is available" unless mod
751
+ mod = TreeHaver.resolve_backend_module(backend)
752
+
753
+ if mod.nil?
754
+ if backend
755
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
756
+ else
757
+ raise NotAvailable, "No TreeHaver backend is available"
758
+ end
759
+ end
760
+
365
761
  # Backend must implement .from_library; fallback to .from_path for older impls
366
- # Include ENV vars in cache key since they affect symbol resolution
367
- key = [path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
762
+ # Include effective backend AND ENV vars in cache key since they affect loading
763
+ effective_b = TreeHaver.resolve_effective_backend(backend)
764
+ key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
368
765
  LanguageRegistry.fetch(key) do
369
766
  if mod::Language.respond_to?(:from_library)
370
767
  mod::Language.from_library(path, symbol: symbol, name: name)
@@ -380,31 +777,78 @@ module TreeHaver
380
777
  # Dynamic helper to load a registered language by name
381
778
  #
382
779
  # After registering a language with {TreeHaver.register_language},
383
- # you can load it using a method call:
780
+ # you can load it using a method call. The appropriate backend will be
781
+ # used based on registration and current backend.
384
782
  #
385
- # @example
783
+ # @example With tree-sitter
386
784
  # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
387
785
  # language = TreeHaver::Language.toml
388
786
  #
389
- # @example With overrides
390
- # language = TreeHaver::Language.toml(path: "/custom/path.so")
787
+ # @example With both backends
788
+ # TreeHaver.register_language(:toml,
789
+ # path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
790
+ # TreeHaver.register_language(:toml,
791
+ # grammar_module: TomlRB::Document)
792
+ # language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
391
793
  #
392
794
  # @param method_name [Symbol] the registered language name
393
- # @param args [Array] positional arguments (first is used as path if provided)
394
- # @param kwargs [Hash] keyword arguments (:path, :symbol, :name)
795
+ # @param args [Array] positional arguments
796
+ # @param kwargs [Hash] keyword arguments
395
797
  # @return [Language] loaded language handle
396
798
  # @raise [NoMethodError] if the language name is not registered
397
799
  def method_missing(method_name, *args, **kwargs, &block)
398
800
  # Resolve only if the language name was registered
399
- reg = TreeHaver.registered_language(method_name)
400
- return super unless reg
401
-
402
- # Allow per-call overrides; otherwise use registered defaults
403
- path = kwargs[:path] || args.first || reg[:path]
404
- raise ArgumentError, "path is required" unless path
405
- symbol = kwargs.key?(:symbol) ? kwargs[:symbol] : (reg[:symbol] || "tree_sitter_#{method_name}")
406
- name = kwargs[:name] || method_name.to_s
407
- from_library(path, symbol: symbol, name: name)
801
+ all_backends = TreeHaver.registered_language(method_name)
802
+ return super unless all_backends
803
+
804
+ # Check current backend
805
+ current_backend = TreeHaver.backend_module
806
+
807
+ # Determine which backend type to use
808
+ backend_type = if current_backend == Backends::Citrus
809
+ :citrus
810
+ else
811
+ :tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
812
+ end
813
+
814
+ # Get backend-specific registration
815
+ reg = all_backends[backend_type]
816
+
817
+ # If Citrus backend is active
818
+ if backend_type == :citrus
819
+ if reg && reg[:grammar_module]
820
+ return Backends::Citrus::Language.new(reg[:grammar_module])
821
+ end
822
+
823
+ # Fall back to error if no Citrus grammar registered
824
+ raise NotAvailable,
825
+ "Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
826
+ "Either register a Citrus grammar or use a tree-sitter backend. " \
827
+ "Registered backends: #{all_backends.keys.inspect}"
828
+ end
829
+
830
+ # For tree-sitter backends, use the path
831
+ if reg && reg[:path]
832
+ path = kwargs[:path] || args.first || reg[:path]
833
+ # Symbol priority: kwargs override > registration > derive from method_name
834
+ symbol = if kwargs.key?(:symbol)
835
+ kwargs[:symbol]
836
+ elsif reg[:symbol]
837
+ reg[:symbol]
838
+ else
839
+ "tree_sitter_#{method_name}"
840
+ end
841
+ # Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
842
+ # Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
843
+ # e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
844
+ name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
845
+ return from_library(path, symbol: symbol, name: name)
846
+ end
847
+
848
+ # No appropriate registration found
849
+ raise ArgumentError,
850
+ "No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
851
+ "Registered backends: #{all_backends.keys.inspect}"
408
852
  end
409
853
 
410
854
  # @api private
@@ -419,6 +863,29 @@ module TreeHaver
419
863
  # A Parser is used to parse source code into a syntax tree. You must
420
864
  # set a language before parsing.
421
865
  #
866
+ # == Wrapping/Unwrapping Responsibility
867
+ #
868
+ # TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
869
+ #
870
+ # **Language objects:**
871
+ # - Unwraps Language wrappers before passing to backend.language=
872
+ # - MRI backend receives ::TreeSitter::Language
873
+ # - Rust backend receives String (language name)
874
+ # - FFI backend receives wrapped Language (needs to_ptr)
875
+ #
876
+ # **Tree objects:**
877
+ # - parse() receives raw source, backend returns raw tree, Parser wraps it
878
+ # - parse_string() unwraps old_tree before passing to backend, wraps returned tree
879
+ # - Backends always work with raw backend trees, never TreeHaver::Tree
880
+ #
881
+ # **Node objects:**
882
+ # - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
883
+ #
884
+ # This design ensures:
885
+ # - Principle of Least Surprise: wrapping happens at boundaries, consistently
886
+ # - Backends are simple: they don't need to know about TreeHaver wrappers
887
+ # - Single Responsibility: wrapping logic is only in TreeHaver::Parser
888
+ #
422
889
  # @example Basic parsing
423
890
  # parser = TreeHaver::Parser.new
424
891
  # parser.language = TreeHaver::Language.toml
@@ -426,11 +893,56 @@ module TreeHaver
426
893
  class Parser
427
894
  # Create a new parser instance
428
895
  #
429
- # @raise [NotAvailable] if no backend is available
430
- def initialize
431
- mod = TreeHaver.backend_module
432
- raise NotAvailable, "No TreeHaver backend is available" unless mod
896
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
897
+ # @raise [NotAvailable] if no backend is available or requested backend is unavailable
898
+ # @example Default (uses context/global)
899
+ # parser = TreeHaver::Parser.new
900
+ # @example Explicit backend
901
+ # parser = TreeHaver::Parser.new(backend: :ffi)
902
+ def initialize(backend: nil)
903
+ # Convert string backend names to symbols for consistency
904
+ backend = backend.to_sym if backend.is_a?(String)
905
+
906
+ mod = TreeHaver.resolve_backend_module(backend)
907
+
908
+ if mod.nil?
909
+ if backend
910
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
911
+ else
912
+ raise NotAvailable, "No TreeHaver backend is available"
913
+ end
914
+ end
915
+
433
916
  @impl = mod::Parser.new
917
+ @explicit_backend = backend # Remember for introspection (always a Symbol or nil)
918
+ end
919
+
920
+ # Get the backend this parser is using (for introspection)
921
+ #
922
+ # Returns the actual backend in use, resolving :auto to the concrete backend.
923
+ #
924
+ # @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
925
+ def backend
926
+ if @explicit_backend && @explicit_backend != :auto
927
+ @explicit_backend
928
+ else
929
+ # Determine actual backend from the implementation class
930
+ case @impl.class.name
931
+ when /MRI/
932
+ :mri
933
+ when /Rust/
934
+ :rust
935
+ when /FFI/
936
+ :ffi
937
+ when /Java/
938
+ :java
939
+ when /Citrus/
940
+ :citrus
941
+ else
942
+ # Fallback to effective_backend if we can't determine from class name
943
+ TreeHaver.effective_backend
944
+ end
945
+ end
434
946
  end
435
947
 
436
948
  # Set the language grammar for this parser
@@ -440,9 +952,135 @@ module TreeHaver
440
952
  # @example
441
953
  # parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
442
954
  def language=(lang)
443
- @impl.language = lang
955
+ # Unwrap the language before passing to backend
956
+ # Backends receive raw language objects, never TreeHaver wrappers
957
+ inner_lang = unwrap_language(lang)
958
+ @impl.language = inner_lang
959
+ # Return the original (possibly wrapped) language for consistency
960
+ lang
961
+ end
962
+
963
+ private
964
+
965
+ # Unwrap a language object to extract the raw backend language
966
+ #
967
+ # This method is smart about backend compatibility:
968
+ # 1. If language has a backend attribute, checks if it matches current backend
969
+ # 2. If mismatch detected, attempts to reload language for correct backend
970
+ # 3. If reload successful, uses new language; otherwise continues with original
971
+ # 4. Unwraps the language wrapper to get raw backend object
972
+ #
973
+ # @param lang [Object] wrapped or raw language object
974
+ # @return [Object] raw backend language object appropriate for current backend
975
+ # @api private
976
+ def unwrap_language(lang)
977
+ # Check if this is a TreeHaver language wrapper with backend info
978
+ if lang.respond_to?(:backend)
979
+ # Verify backend compatibility FIRST
980
+ # This prevents passing languages from wrong backends to native code
981
+ # Exception: :auto backend is permissive - accepts any language
982
+ current_backend = backend
983
+
984
+ if lang.backend != current_backend && current_backend != :auto
985
+ # Backend mismatch! Try to reload for correct backend
986
+ reloaded = try_reload_language_for_backend(lang, current_backend)
987
+ if reloaded
988
+ lang = reloaded
989
+ else
990
+ # Couldn't reload - this is an error
991
+ raise TreeHaver::Error,
992
+ "Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
993
+ "Cannot reload language for correct backend. " \
994
+ "Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
995
+ end
996
+ end
997
+
998
+ # Get the current parser's language (if set)
999
+ current_lang = @impl.respond_to?(:language) ? @impl.language : nil
1000
+
1001
+ # Language mismatch detected! The parser might have a different language set
1002
+ # Compare the actual language objects using Comparable
1003
+ if current_lang && lang != current_lang
1004
+ # Different language being set (e.g., switching from TOML to JSON)
1005
+ # This is fine, just informational
1006
+ end
1007
+ end
1008
+
1009
+ # Unwrap based on backend type
1010
+ # All TreeHaver Language wrappers have the backend attribute
1011
+ unless lang.respond_to?(:backend)
1012
+ # This shouldn't happen - all our wrappers have backend attribute
1013
+ # If we get here, it's likely a raw backend object that was passed directly
1014
+ raise TreeHaver::Error,
1015
+ "Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
1016
+ "Use TreeHaver::Language.from_library to create language objects."
1017
+ end
1018
+
1019
+ case lang.backend
1020
+ when :mri
1021
+ return lang.to_language if lang.respond_to?(:to_language)
1022
+ return lang.inner_language if lang.respond_to?(:inner_language)
1023
+ when :rust
1024
+ return lang.name if lang.respond_to?(:name)
1025
+ when :ffi
1026
+ return lang # FFI needs wrapper for to_ptr
1027
+ when :java
1028
+ return lang.impl if lang.respond_to?(:impl)
1029
+ when :citrus
1030
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
1031
+ else
1032
+ # Unknown backend (e.g., test backend)
1033
+ # Try generic unwrapping methods for flexibility in testing
1034
+ return lang.to_language if lang.respond_to?(:to_language)
1035
+ return lang.inner_language if lang.respond_to?(:inner_language)
1036
+ return lang.impl if lang.respond_to?(:impl)
1037
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
1038
+ return lang.name if lang.respond_to?(:name)
1039
+
1040
+ # If nothing works, pass through as-is
1041
+ # This allows test languages to be passed directly
1042
+ return lang
1043
+ end
1044
+
1045
+ # Shouldn't reach here, but just in case
1046
+ lang
444
1047
  end
445
1048
 
1049
+ # Try to reload a language for the current backend
1050
+ #
1051
+ # This handles the case where a language was loaded for one backend,
1052
+ # but is now being used with a different backend (e.g., after backend switch).
1053
+ #
1054
+ # @param lang [Object] language object with metadata
1055
+ # @param target_backend [Symbol] backend to reload for
1056
+ # @return [Object, nil] reloaded language or nil if reload not possible
1057
+ # @api private
1058
+ def try_reload_language_for_backend(lang, target_backend)
1059
+ # Can't reload without path information
1060
+ return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
1061
+
1062
+ # For tree-sitter backends, reload from path
1063
+ if lang.respond_to?(:path) && lang.path
1064
+ begin
1065
+ # Use Language.from_library which respects current backend
1066
+ return Language.from_library(
1067
+ lang.path,
1068
+ symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
1069
+ name: lang.respond_to?(:name) ? lang.name : nil,
1070
+ )
1071
+ rescue => e
1072
+ # Reload failed, continue with original
1073
+ warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
1074
+ return
1075
+ end
1076
+ end
1077
+
1078
+ # For Citrus, can't really reload as it's just a module reference
1079
+ nil
1080
+ end
1081
+
1082
+ public
1083
+
446
1084
  # Parse source code into a syntax tree
447
1085
  #
448
1086
  # @param source [String] the source code to parse (should be UTF-8)
@@ -452,7 +1090,8 @@ module TreeHaver
452
1090
  # puts tree.root_node.type
453
1091
  def parse(source)
454
1092
  tree_impl = @impl.parse(source)
455
- Tree.new(tree_impl)
1093
+ # Wrap backend tree with source so Node#text works
1094
+ Tree.new(tree_impl, source: source)
456
1095
  end
457
1096
 
458
1097
  # Parse source code into a syntax tree (with optional incremental parsing)
@@ -501,10 +1140,12 @@ module TreeHaver
501
1140
  old_tree
502
1141
  end
503
1142
  tree_impl = @impl.parse_string(old_impl, source)
504
- Tree.new(tree_impl)
1143
+ # Wrap backend tree with source so Node#text works
1144
+ Tree.new(tree_impl, source: source)
505
1145
  elsif @impl.respond_to?(:parse_string)
506
1146
  tree_impl = @impl.parse_string(nil, source)
507
- Tree.new(tree_impl)
1147
+ # Wrap backend tree with source so Node#text works
1148
+ Tree.new(tree_impl, source: source)
508
1149
  else
509
1150
  # Fallback for backends that don't support parse_string
510
1151
  parse(source)