tree_haver 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tree_haver.rb CHANGED
@@ -3,13 +3,16 @@
3
3
  # External gems
4
4
  require "version_gem"
5
5
 
6
+ # Standard library
7
+ require "set"
8
+
6
9
  # This gem
7
10
  require_relative "tree_haver/version"
8
11
  require_relative "tree_haver/language_registry"
9
12
 
10
- # TreeHaver is a cross-Ruby adapter for the Tree-sitter parsing library.
13
+ # TreeHaver is a cross-Ruby adapter for the tree-sitter parsing library.
11
14
  #
12
- # It provides a unified API for parsing source code using Tree-sitter grammars,
15
+ # It provides a unified API for parsing source code using tree-sitter grammars,
13
16
  # working seamlessly across MRI Ruby, JRuby, and TruffleRuby.
14
17
  #
15
18
  # @example Basic usage with TOML
@@ -54,15 +57,18 @@ require_relative "tree_haver/language_registry"
54
57
  # TreeHaver.backend = :mri # Force MRI backend
55
58
  # TreeHaver.backend = :auto # Auto-select (default)
56
59
  #
57
- # @see https://tree-sitter.github.io/tree-sitter/ Tree-sitter documentation
60
+ # @see https://tree-sitter.github.io/tree-sitter/ tree-sitter documentation
58
61
  # @see GrammarFinder For automatic grammar library discovery
59
62
  module TreeHaver
60
63
  # Base error class for TreeHaver exceptions
64
+ # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
61
65
  #
62
66
  # @abstract Subclass to create specific error types
63
- class Error < StandardError; end
67
+ class Error < Exception; end # rubocop:disable Lint/InheritException
64
68
 
65
69
  # Raised when a requested backend or feature is not available
70
+ # These are serious errors that extends Exception (not StandardError).
71
+ # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
66
72
  #
67
73
  # This can occur when:
68
74
  # - Required native libraries are not installed
@@ -77,18 +83,59 @@ module TreeHaver
77
83
  # end
78
84
  class NotAvailable < Error; end
79
85
 
86
+ # Raised when attempting to use backends that are known to conflict
87
+ #
88
+ # This is a serious error that extends Exception (not StandardError) because
89
+ # it prevents a segmentation fault. The MRI backend (ruby_tree_sitter) and
90
+ # FFI backend cannot coexist in the same process - once MRI loads, FFI will
91
+ # segfault when trying to set a language on a parser.
92
+ #
93
+ # This protection can be disabled with `TreeHaver.backend_protect = false`
94
+ # but doing so risks segfaults.
95
+ #
96
+ # @example Handling backend conflicts
97
+ # begin
98
+ # # This will raise if MRI was already used
99
+ # TreeHaver.with_backend(:ffi) { parser.language = lang }
100
+ # rescue TreeHaver::BackendConflict => e
101
+ # puts "Backend conflict: #{e.message}"
102
+ # # Fall back to a compatible backend
103
+ # end
104
+ #
105
+ # @example Disabling protection (not recommended)
106
+ # TreeHaver.backend_protect = false
107
+ # # Now you can test backend conflicts (at risk of segfaults)
108
+ class BackendConflict < Error; end
109
+
80
110
  # Namespace for backend implementations
81
111
  #
82
112
  # TreeHaver provides multiple backends to support different Ruby implementations:
83
113
  # - {Backends::MRI} - Uses ruby_tree_sitter (MRI C extension)
84
114
  # - {Backends::Rust} - Uses tree_stump (Rust extension with precompiled binaries)
85
115
  # - {Backends::FFI} - Uses Ruby FFI to call libtree-sitter directly
86
- # - {Backends::Java} - Uses JRuby's Java integration (planned)
116
+ # - {Backends::Java} - Uses JRuby's Java integration
117
+ # - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
87
118
  module Backends
88
119
  autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
89
120
  autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
90
121
  autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
91
122
  autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
123
+ autoload :Citrus, File.join(__dir__, "tree_haver", "backends", "citrus")
124
+
125
+ # Known backend conflicts
126
+ #
127
+ # Maps each backend to an array of backends that block it from working.
128
+ # For example, :ffi is blocked by :mri because once ruby_tree_sitter loads,
129
+ # FFI calls to ts_parser_set_language will segfault.
130
+ #
131
+ # @return [Hash{Symbol => Array<Symbol>}]
132
+ BLOCKED_BY = {
133
+ mri: [],
134
+ rust: [],
135
+ ffi: [:mri], # FFI segfaults if MRI (ruby_tree_sitter) has been loaded
136
+ java: [],
137
+ citrus: [],
138
+ }.freeze
92
139
  end
93
140
 
94
141
  # Security utilities for validating paths before loading shared libraries
@@ -119,11 +166,98 @@ module TreeHaver
119
166
  # @see PathValidator
120
167
  autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
121
168
 
169
+ # Citrus grammar finder for discovering and registering Citrus-based parsers
170
+ #
171
+ # @example Register toml-rb
172
+ # finder = TreeHaver::CitrusGrammarFinder.new(
173
+ # language: :toml,
174
+ # gem_name: "toml-rb",
175
+ # grammar_const: "TomlRB::Document"
176
+ # )
177
+ # finder.register! if finder.available?
178
+ #
179
+ # @see CitrusGrammarFinder
180
+ autoload :CitrusGrammarFinder, File.join(__dir__, "tree_haver", "citrus_grammar_finder")
181
+
182
+ # Unified Node wrapper providing consistent API across backends
183
+ autoload :Node, File.join(__dir__, "tree_haver", "node")
184
+
185
+ # Unified Tree wrapper providing consistent API across backends
186
+ autoload :Tree, File.join(__dir__, "tree_haver", "tree")
187
+
122
188
  # Get the current backend selection
123
189
  #
124
- # @return [Symbol] one of :auto, :mri, :ffi, or :java
190
+ # @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
125
191
  # @note Can be set via ENV["TREE_HAVER_BACKEND"]
126
192
  class << self
193
+ # Whether backend conflict protection is enabled
194
+ #
195
+ # When true (default), TreeHaver will raise BackendConflict if you try to
196
+ # use a backend that is known to conflict with a previously used backend.
197
+ # For example, FFI will not work after MRI has been used.
198
+ #
199
+ # Set to false to disable protection (useful for testing compatibility).
200
+ #
201
+ # @return [Boolean]
202
+ # @example Disable protection for testing
203
+ # TreeHaver.backend_protect = false
204
+ attr_writer :backend_protect
205
+
206
+ # Check if backend conflict protection is enabled
207
+ #
208
+ # @return [Boolean] true if protection is enabled (default)
209
+ def backend_protect?
210
+ return @backend_protect if defined?(@backend_protect) # rubocop:disable ThreadSafety/ClassInstanceVariable
211
+ true # Default is protected
212
+ end
213
+
214
+ # Alias for backend_protect?
215
+ def backend_protect
216
+ backend_protect?
217
+ end
218
+
219
+ # Track which backends have been used in this process
220
+ #
221
+ # @return [Set<Symbol>] set of backend symbols that have been used
222
+ def backends_used
223
+ @backends_used ||= Set.new # rubocop:disable ThreadSafety/ClassInstanceVariable
224
+ end
225
+
226
+ # Record that a backend has been used
227
+ #
228
+ # @param backend [Symbol] the backend that was used
229
+ # @return [void]
230
+ # @api private
231
+ def record_backend_usage(backend)
232
+ backends_used << backend
233
+ end
234
+
235
+ # Check if a backend would conflict with previously used backends
236
+ #
237
+ # @param backend [Symbol] the backend to check
238
+ # @return [Array<Symbol>] list of previously used backends that block this one
239
+ def conflicting_backends_for(backend)
240
+ blockers = Backends::BLOCKED_BY[backend] || []
241
+ blockers & backends_used.to_a
242
+ end
243
+
244
+ # Check if using a backend would cause a conflict
245
+ #
246
+ # @param backend [Symbol] the backend to check
247
+ # @raise [BackendConflict] if protection is enabled and there's a conflict
248
+ # @return [void]
249
+ def check_backend_conflict!(backend)
250
+ return unless backend_protect?
251
+
252
+ conflicts = conflicting_backends_for(backend)
253
+ return if conflicts.empty?
254
+
255
+ raise BackendConflict,
256
+ "Cannot use #{backend} backend: it is blocked by previously used backend(s): #{conflicts.join(", ")}. " \
257
+ "The #{backend} backend will segfault when #{conflicts.first} has already loaded. " \
258
+ "To disable this protection (at risk of segfaults), set TreeHaver.backend_protect = false"
259
+ end
260
+
127
261
  # @example
128
262
  # TreeHaver.backend # => :auto
129
263
  def backend
@@ -132,13 +266,14 @@ module TreeHaver
132
266
  when "rust" then :rust
133
267
  when "ffi" then :ffi
134
268
  when "java" then :java
269
+ when "citrus" then :citrus
135
270
  else :auto
136
271
  end
137
272
  end
138
273
 
139
274
  # Set the backend to use
140
275
  #
141
- # @param name [Symbol, String, nil] backend name (:auto, :mri, :rust, :ffi, :java)
276
+ # @param name [Symbol, String, nil] backend name (:auto, :mri, :rust, :ffi, :java, :citrus)
142
277
  # @return [Symbol, nil] the backend that was set
143
278
  # @example Force FFI backend
144
279
  # TreeHaver.backend = :ffi
@@ -162,20 +297,218 @@ module TreeHaver
162
297
  @backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
163
298
  end
164
299
 
300
+ # Thread-local backend context storage
301
+ #
302
+ # Returns a hash containing the thread-local backend context with keys:
303
+ # - :backend - The backend name (Symbol) or nil if using global default
304
+ # - :depth - The nesting depth (Integer) for proper cleanup
305
+ #
306
+ # @return [Hash{Symbol => Object}] context hash with :backend and :depth keys
307
+ # @example
308
+ # ctx = TreeHaver.current_backend_context
309
+ # ctx[:backend] # => nil or :ffi, :mri, etc.
310
+ # ctx[:depth] # => 0, 1, 2, etc.
311
+ def current_backend_context
312
+ Thread.current[:tree_haver_backend_context] ||= {
313
+ backend: nil, # nil means "use global default"
314
+ depth: 0, # Track nesting depth for proper cleanup
315
+ }
316
+ end
317
+
318
+ # Get the effective backend for current context
319
+ #
320
+ # Priority: thread-local context → global @backend → :auto
321
+ #
322
+ # @return [Symbol] the backend to use
323
+ # @example
324
+ # TreeHaver.effective_backend # => :auto (default)
325
+ # @example With thread-local context
326
+ # TreeHaver.with_backend(:ffi) do
327
+ # TreeHaver.effective_backend # => :ffi
328
+ # end
329
+ def effective_backend
330
+ ctx = current_backend_context
331
+ ctx[:backend] || backend || :auto
332
+ end
333
+
334
+ # Execute a block with a specific backend in thread-local context
335
+ #
336
+ # This method provides temporary, thread-safe backend switching for a block of code.
337
+ # The backend setting is automatically restored when the block exits, even if
338
+ # an exception is raised. Supports nesting—inner blocks override outer blocks,
339
+ # and each level is properly unwound.
340
+ #
341
+ # Thread Safety: Each thread maintains its own backend context, so concurrent
342
+ # threads can safely use different backends without interfering with each other.
343
+ #
344
+ # Use Cases:
345
+ # - Testing: Test the same code path with different backends
346
+ # - Performance comparison: Benchmark parsing with different backends
347
+ # - Fallback scenarios: Try one backend, fall back to another on failure
348
+ # - Thread isolation: Different threads can use different backends safely
349
+ #
350
+ # @param name [Symbol, String] backend name (:mri, :rust, :ffi, :java, :citrus, :auto)
351
+ # @yield block to execute with the specified backend
352
+ # @return [Object] the return value of the block
353
+ # @raise [ArgumentError] if backend name is nil
354
+ # @raise [BackendConflict] if the requested backend conflicts with a previously used backend
355
+ #
356
+ # @example Basic usage
357
+ # TreeHaver.with_backend(:mri) do
358
+ # parser = TreeHaver::Parser.new
359
+ # tree = parser.parse(source)
360
+ # end
361
+ # # Backend is automatically restored here
362
+ #
363
+ # @example Nested blocks (inner overrides outer)
364
+ # TreeHaver.with_backend(:rust) do
365
+ # parser1 = TreeHaver::Parser.new # Uses :rust
366
+ # TreeHaver.with_backend(:citrus) do
367
+ # parser2 = TreeHaver::Parser.new # Uses :citrus
368
+ # end
369
+ # parser3 = TreeHaver::Parser.new # Back to :rust
370
+ # end
371
+ #
372
+ # @example Testing multiple backends
373
+ # [:mri, :rust, :citrus].each do |backend_name|
374
+ # TreeHaver.with_backend(backend_name) do
375
+ # parser = TreeHaver::Parser.new
376
+ # result = parser.parse(source)
377
+ # puts "#{backend_name}: #{result.root_node.type}"
378
+ # end
379
+ # end
380
+ #
381
+ # @example Exception safety (backend restored even on error)
382
+ # TreeHaver.with_backend(:mri) do
383
+ # raise "Something went wrong"
384
+ # rescue
385
+ # # Handle error
386
+ # end
387
+ # # Backend is still restored to its previous value
388
+ #
389
+ # @example Thread isolation
390
+ # threads = [:mri, :rust].map do |backend_name|
391
+ # Thread.new do
392
+ # TreeHaver.with_backend(backend_name) do
393
+ # # Each thread uses its own backend independently
394
+ # TreeHaver::Parser.new
395
+ # end
396
+ # end
397
+ # end
398
+ # threads.each(&:join)
399
+ #
400
+ # @see #effective_backend
401
+ # @see #current_backend_context
402
+ def with_backend(name)
403
+ raise ArgumentError, "Backend name required" if name.nil?
404
+
405
+ # Get context FIRST to ensure it exists
406
+ ctx = current_backend_context
407
+ old_backend = ctx[:backend]
408
+ old_depth = ctx[:depth]
409
+
410
+ begin
411
+ # Set new backend and increment depth
412
+ ctx[:backend] = name.to_sym
413
+ ctx[:depth] += 1
414
+
415
+ # Execute block
416
+ yield
417
+ ensure
418
+ # Restore previous backend and depth
419
+ # This ensures proper unwinding even with exceptions
420
+ ctx[:backend] = old_backend
421
+ ctx[:depth] = old_depth
422
+ end
423
+ end
424
+
425
+ # Resolve the effective backend considering explicit override
426
+ #
427
+ # Priority: explicit > thread context > global > :auto
428
+ #
429
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
430
+ # @return [Symbol] the backend to use
431
+ # @example
432
+ # TreeHaver.resolve_effective_backend(:ffi) # => :ffi
433
+ # @example With thread-local context
434
+ # TreeHaver.with_backend(:mri) do
435
+ # TreeHaver.resolve_effective_backend(nil) # => :mri
436
+ # TreeHaver.resolve_effective_backend(:ffi) # => :ffi (explicit wins)
437
+ # end
438
+ def resolve_effective_backend(explicit_backend = nil)
439
+ return explicit_backend.to_sym if explicit_backend
440
+ effective_backend
441
+ end
442
+
443
+ # Get backend module for a specific backend (with explicit override)
444
+ #
445
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
446
+ # @return [Module, nil] the backend module or nil if not available
447
+ # @raise [BackendConflict] if the backend conflicts with previously used backends
448
+ # @example
449
+ # mod = TreeHaver.resolve_backend_module(:ffi)
450
+ # mod.capabilities[:backend] # => :ffi
451
+ def resolve_backend_module(explicit_backend = nil)
452
+ # Temporarily override effective backend
453
+ requested = resolve_effective_backend(explicit_backend)
454
+
455
+ mod = case requested
456
+ when :mri
457
+ Backends::MRI
458
+ when :rust
459
+ Backends::Rust
460
+ when :ffi
461
+ Backends::FFI
462
+ when :java
463
+ Backends::Java
464
+ when :citrus
465
+ Backends::Citrus
466
+ when :auto
467
+ backend_module # Fall back to normal resolution for :auto
468
+ else
469
+ # Unknown backend name - return nil to trigger error in caller
470
+ nil
471
+ end
472
+
473
+ # Return nil if the module doesn't exist
474
+ return unless mod
475
+
476
+ # Check for backend conflicts FIRST, before checking availability
477
+ # This is critical because the conflict causes the backend to report unavailable
478
+ # We want to raise a clear error explaining WHY it's unavailable
479
+ # Use the requested backend name directly (not capabilities) because
480
+ # capabilities may be empty when the backend is blocked/unavailable
481
+ check_backend_conflict!(requested) if requested && requested != :auto
482
+
483
+ # Now check if the backend is available
484
+ # Why assume modules without available? are available?
485
+ # - Some backends might be mocked in tests without an available? method
486
+ # - This makes the code more defensive and test-friendly
487
+ # - It allows graceful degradation if a backend module is incomplete
488
+ # - Backward compatibility: if a module doesn't declare availability, assume it works
489
+ return if mod.respond_to?(:available?) && !mod.available?
490
+
491
+ # Record that this backend is being used
492
+ record_backend_usage(requested) if requested && requested != :auto
493
+
494
+ mod
495
+ end
496
+
165
497
  # Determine the concrete backend module to use
166
498
  #
167
499
  # This method performs backend auto-selection when backend is :auto.
168
- # On JRuby, prefers Java backend if available, then FFI.
169
- # On MRI, prefers MRI backend if available, then Rust, then FFI.
500
+ # On JRuby, prefers Java backend if available, then FFI, then Citrus.
501
+ # On MRI, prefers MRI backend if available, then Rust, then FFI, then Citrus.
502
+ # Citrus is the final fallback as it's pure Ruby and works everywhere.
170
503
  #
171
- # @return [Module, nil] the backend module (Backends::MRI, Backends::Rust, Backends::FFI, or Backends::Java), or nil if none available
504
+ # @return [Module, nil] the backend module (Backends::MRI, Backends::Rust, Backends::FFI, Backends::Java, or Backends::Citrus), or nil if none available
172
505
  # @example
173
506
  # mod = TreeHaver.backend_module
174
507
  # if mod
175
508
  # puts "Using #{mod.capabilities[:backend]} backend"
176
509
  # end
177
510
  def backend_module
178
- case backend
511
+ case effective_backend # Changed from: backend
179
512
  when :mri
180
513
  Backends::MRI
181
514
  when :rust
@@ -184,8 +517,10 @@ module TreeHaver
184
517
  Backends::FFI
185
518
  when :java
186
519
  Backends::Java
520
+ when :citrus
521
+ Backends::Citrus
187
522
  else
188
- # auto-select: on JRuby prefer Java backend if available; on MRI prefer MRI, then Rust; otherwise FFI
523
+ # auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
189
524
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && Backends::Java.available?
190
525
  Backends::Java
191
526
  elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::MRI.available?
@@ -194,8 +529,10 @@ module TreeHaver
194
529
  Backends::Rust
195
530
  elsif Backends::FFI.available?
196
531
  Backends::FFI
532
+ elsif Backends::Citrus.available?
533
+ Backends::Citrus # Pure Ruby fallback
197
534
  else
198
- # No backend available yet
535
+ # No backend available
199
536
  nil
200
537
  end
201
538
  end
@@ -226,44 +563,102 @@ module TreeHaver
226
563
  # Allows opting-in dynamic helpers like TreeHaver::Language.toml without
227
564
  # advertising all names by default.
228
565
 
229
- # Register a language helper by name
566
+ # Register a language helper by name (backend-agnostic)
230
567
  #
231
568
  # After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
232
- # to load the registered language.
569
+ # to load the registered language. TreeHaver will automatically use the appropriate
570
+ # grammar based on the active backend.
571
+ #
572
+ # The `name` parameter is an arbitrary identifier you choose - it doesn't need to
573
+ # match the actual language name. This is useful for:
574
+ # - Testing: Use unique names like `:toml_test` to avoid collisions
575
+ # - Aliasing: Register the same grammar under multiple names
576
+ # - Versioning: Register different grammar versions as `:ruby_2` and `:ruby_3`
577
+ #
578
+ # The actual grammar identity comes from `path`/`symbol` (tree-sitter) or
579
+ # `grammar_module` (Citrus), not from the name.
580
+ #
581
+ # IMPORTANT: This method INTENTIONALLY allows registering BOTH a tree-sitter
582
+ # library AND a Citrus grammar for the same language IN A SINGLE CALL.
583
+ # This is achieved by using separate `if` statements (not `elsif`) and no early
584
+ # returns. This design is deliberate and provides significant benefits:
585
+ #
586
+ # Why register both backends for one language?
587
+ # - Backend flexibility: Code works regardless of which backend is active
588
+ # - Performance testing: Compare tree-sitter vs Citrus performance
589
+ # - Gradual migration: Transition between backends without breaking code
590
+ # - Fallback scenarios: Use Citrus when tree-sitter library unavailable
591
+ # - Platform portability: tree-sitter on Linux/Mac, Citrus on JRuby/Windows
233
592
  #
234
- # @param name [Symbol, String] language identifier (e.g., :toml, :json)
235
- # @param path [String] absolute path to the language shared library
593
+ # The active backend determines which registration is used automatically.
594
+ # No code changes needed to switch backends - just change TreeHaver.backend.
595
+ #
596
+ # @param name [Symbol, String] identifier for this registration (can be any name you choose)
597
+ # @param path [String, nil] absolute path to the language shared library (for tree-sitter)
236
598
  # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
599
+ # @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
600
+ # @param gem_name [String, nil] optional gem name for error messages
237
601
  # @return [void]
238
- # @example Register TOML grammar
602
+ # @example Register tree-sitter grammar only
239
603
  # TreeHaver.register_language(
240
604
  # :toml,
241
605
  # path: "/usr/local/lib/libtree-sitter-toml.so",
242
606
  # symbol: "tree_sitter_toml"
243
607
  # )
244
- def register_language(name, path:, symbol: nil)
245
- LanguageRegistry.register(name, path: path, symbol: symbol)
246
- end
608
+ # @example Register Citrus grammar only
609
+ # TreeHaver.register_language(
610
+ # :toml,
611
+ # grammar_module: TomlRB::Document,
612
+ # gem_name: "toml-rb"
613
+ # )
614
+ # @example Register BOTH backends in separate calls
615
+ # TreeHaver.register_language(
616
+ # :toml,
617
+ # path: "/usr/local/lib/libtree-sitter-toml.so",
618
+ # symbol: "tree_sitter_toml"
619
+ # )
620
+ # TreeHaver.register_language(
621
+ # :toml,
622
+ # grammar_module: TomlRB::Document,
623
+ # gem_name: "toml-rb"
624
+ # )
625
+ # @example Register BOTH backends in ONE call (recommended for maximum flexibility)
626
+ # TreeHaver.register_language(
627
+ # :toml,
628
+ # path: "/usr/local/lib/libtree-sitter-toml.so",
629
+ # symbol: "tree_sitter_toml",
630
+ # grammar_module: TomlRB::Document,
631
+ # gem_name: "toml-rb"
632
+ # )
633
+ # # Now TreeHaver::Language.toml works with ANY backend!
634
+ def register_language(name, path: nil, symbol: nil, grammar_module: nil, gem_name: nil)
635
+ # Register tree-sitter backend if path provided
636
+ # Note: Uses `if` not `elsif` so both backends can be registered in one call
637
+ if path
638
+ LanguageRegistry.register(name, :tree_sitter, path: path, symbol: symbol)
639
+ end
247
640
 
248
- # Unregister a previously registered language helper
249
- #
250
- # @param name [Symbol, String] language identifier to unregister
251
- # @return [void]
252
- # @example
253
- # TreeHaver.unregister_language(:toml)
254
- def unregister_language(name)
255
- LanguageRegistry.unregister(name)
256
- end
641
+ # Register Citrus backend if grammar_module provided
642
+ # Note: Uses `if` not `elsif` so both backends can be registered in one call
643
+ # This allows maximum flexibility - register once, use with any backend
644
+ if grammar_module
645
+ unless grammar_module.respond_to?(:parse)
646
+ raise ArgumentError, "Grammar module must respond to :parse"
647
+ end
257
648
 
258
- # Clear all registered languages
259
- #
260
- # Primarily intended for test cleanup and resetting state.
261
- #
262
- # @return [void]
263
- # @example
264
- # TreeHaver.clear_languages!
265
- def clear_languages!
266
- LanguageRegistry.clear_registrations!
649
+ LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
650
+ end
651
+
652
+ # Require at least one backend to be registered
653
+ if path.nil? && grammar_module.nil?
654
+ raise ArgumentError, "Must provide at least one of: path (tree-sitter) or grammar_module (Citrus)"
655
+ end
656
+
657
+ # Note: No early return! This method intentionally processes both `if` blocks
658
+ # above to allow registering multiple backends for the same language.
659
+ # Both tree-sitter and Citrus can be registered simultaneously for maximum
660
+ # flexibility. See method documentation for rationale.
661
+ nil
267
662
  end
268
663
 
269
664
  # Fetch a registered language entry
@@ -276,7 +671,7 @@ module TreeHaver
276
671
  end
277
672
  end
278
673
 
279
- # Represents a Tree-sitter language grammar
674
+ # Represents a tree-sitter language grammar
280
675
  #
281
676
  # A Language object is an opaque handle to a TSLanguage* that defines
282
677
  # the grammar rules for parsing a specific programming language.
@@ -324,6 +719,7 @@ module TreeHaver
324
719
  # @param symbol [String, nil] name of the exported function (defaults to auto-detection)
325
720
  # @param name [String, nil] logical name for the language (used in caching)
326
721
  # @param validate [Boolean] if true, validates path and symbol for safety (default: true)
722
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
327
723
  # @return [Language] loaded language handle
328
724
  # @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
329
725
  # @raise [ArgumentError] if path or symbol fails security validation
@@ -333,7 +729,13 @@ module TreeHaver
333
729
  # symbol: "tree_sitter_toml",
334
730
  # name: "toml"
335
731
  # )
336
- def from_library(path, symbol: nil, name: nil, validate: true)
732
+ # @example With explicit backend
733
+ # language = TreeHaver::Language.from_library(
734
+ # "/usr/local/lib/libtree-sitter-toml.so",
735
+ # symbol: "tree_sitter_toml",
736
+ # backend: :ffi
737
+ # )
738
+ def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
337
739
  if validate
338
740
  unless PathValidator.safe_library_path?(path)
339
741
  errors = PathValidator.validation_errors(path)
@@ -346,11 +748,20 @@ module TreeHaver
346
748
  end
347
749
  end
348
750
 
349
- mod = TreeHaver.backend_module
350
- raise NotAvailable, "No TreeHaver backend is available" unless mod
751
+ mod = TreeHaver.resolve_backend_module(backend)
752
+
753
+ if mod.nil?
754
+ if backend
755
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
756
+ else
757
+ raise NotAvailable, "No TreeHaver backend is available"
758
+ end
759
+ end
760
+
351
761
  # Backend must implement .from_library; fallback to .from_path for older impls
352
- # Include ENV vars in cache key since they affect symbol resolution
353
- key = [path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
762
+ # Include effective backend AND ENV vars in cache key since they affect loading
763
+ effective_b = TreeHaver.resolve_effective_backend(backend)
764
+ key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
354
765
  LanguageRegistry.fetch(key) do
355
766
  if mod::Language.respond_to?(:from_library)
356
767
  mod::Language.from_library(path, symbol: symbol, name: name)
@@ -366,31 +777,78 @@ module TreeHaver
366
777
  # Dynamic helper to load a registered language by name
367
778
  #
368
779
  # After registering a language with {TreeHaver.register_language},
369
- # you can load it using a method call:
780
+ # you can load it using a method call. The appropriate backend will be
781
+ # used based on registration and current backend.
370
782
  #
371
- # @example
783
+ # @example With tree-sitter
372
784
  # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
373
785
  # language = TreeHaver::Language.toml
374
786
  #
375
- # @example With overrides
376
- # language = TreeHaver::Language.toml(path: "/custom/path.so")
787
+ # @example With both backends
788
+ # TreeHaver.register_language(:toml,
789
+ # path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
790
+ # TreeHaver.register_language(:toml,
791
+ # grammar_module: TomlRB::Document)
792
+ # language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
377
793
  #
378
794
  # @param method_name [Symbol] the registered language name
379
- # @param args [Array] positional arguments (first is used as path if provided)
380
- # @param kwargs [Hash] keyword arguments (:path, :symbol, :name)
795
+ # @param args [Array] positional arguments
796
+ # @param kwargs [Hash] keyword arguments
381
797
  # @return [Language] loaded language handle
382
798
  # @raise [NoMethodError] if the language name is not registered
383
799
  def method_missing(method_name, *args, **kwargs, &block)
384
800
  # Resolve only if the language name was registered
385
- reg = TreeHaver.registered_language(method_name)
386
- return super unless reg
387
-
388
- # Allow per-call overrides; otherwise use registered defaults
389
- path = kwargs[:path] || args.first || reg[:path]
390
- raise ArgumentError, "path is required" unless path
391
- symbol = kwargs.key?(:symbol) ? kwargs[:symbol] : (reg[:symbol] || "tree_sitter_#{method_name}")
392
- name = kwargs[:name] || method_name.to_s
393
- from_library(path, symbol: symbol, name: name)
801
+ all_backends = TreeHaver.registered_language(method_name)
802
+ return super unless all_backends
803
+
804
+ # Check current backend
805
+ current_backend = TreeHaver.backend_module
806
+
807
+ # Determine which backend type to use
808
+ backend_type = if current_backend == Backends::Citrus
809
+ :citrus
810
+ else
811
+ :tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
812
+ end
813
+
814
+ # Get backend-specific registration
815
+ reg = all_backends[backend_type]
816
+
817
+ # If Citrus backend is active
818
+ if backend_type == :citrus
819
+ if reg && reg[:grammar_module]
820
+ return Backends::Citrus::Language.new(reg[:grammar_module])
821
+ end
822
+
823
+ # Fall back to error if no Citrus grammar registered
824
+ raise NotAvailable,
825
+ "Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
826
+ "Either register a Citrus grammar or use a tree-sitter backend. " \
827
+ "Registered backends: #{all_backends.keys.inspect}"
828
+ end
829
+
830
+ # For tree-sitter backends, use the path
831
+ if reg && reg[:path]
832
+ path = kwargs[:path] || args.first || reg[:path]
833
+ # Symbol priority: kwargs override > registration > derive from method_name
834
+ symbol = if kwargs.key?(:symbol)
835
+ kwargs[:symbol]
836
+ elsif reg[:symbol]
837
+ reg[:symbol]
838
+ else
839
+ "tree_sitter_#{method_name}"
840
+ end
841
+ # Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
842
+ # Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
843
+ # e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
844
+ name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
845
+ return from_library(path, symbol: symbol, name: name)
846
+ end
847
+
848
+ # No appropriate registration found
849
+ raise ArgumentError,
850
+ "No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
851
+ "Registered backends: #{all_backends.keys.inspect}"
394
852
  end
395
853
 
396
854
  # @api private
@@ -400,11 +858,34 @@ module TreeHaver
400
858
  end
401
859
  end
402
860
 
403
- # Represents a Tree-sitter parser instance
861
+ # Represents a tree-sitter parser instance
404
862
  #
405
863
  # A Parser is used to parse source code into a syntax tree. You must
406
864
  # set a language before parsing.
407
865
  #
866
+ # == Wrapping/Unwrapping Responsibility
867
+ #
868
+ # TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
869
+ #
870
+ # **Language objects:**
871
+ # - Unwraps Language wrappers before passing to backend.language=
872
+ # - MRI backend receives ::TreeSitter::Language
873
+ # - Rust backend receives String (language name)
874
+ # - FFI backend receives wrapped Language (needs to_ptr)
875
+ #
876
+ # **Tree objects:**
877
+ # - parse() receives raw source, backend returns raw tree, Parser wraps it
878
+ # - parse_string() unwraps old_tree before passing to backend, wraps returned tree
879
+ # - Backends always work with raw backend trees, never TreeHaver::Tree
880
+ #
881
+ # **Node objects:**
882
+ # - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
883
+ #
884
+ # This design ensures:
885
+ # - Principle of Least Surprise: wrapping happens at boundaries, consistently
886
+ # - Backends are simple: they don't need to know about TreeHaver wrappers
887
+ # - Single Responsibility: wrapping logic is only in TreeHaver::Parser
888
+ #
408
889
  # @example Basic parsing
409
890
  # parser = TreeHaver::Parser.new
410
891
  # parser.language = TreeHaver::Language.toml
@@ -412,11 +893,56 @@ module TreeHaver
412
893
  class Parser
413
894
  # Create a new parser instance
414
895
  #
415
- # @raise [NotAvailable] if no backend is available
416
- def initialize
417
- mod = TreeHaver.backend_module
418
- raise NotAvailable, "No TreeHaver backend is available" unless mod
896
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
897
+ # @raise [NotAvailable] if no backend is available or requested backend is unavailable
898
+ # @example Default (uses context/global)
899
+ # parser = TreeHaver::Parser.new
900
+ # @example Explicit backend
901
+ # parser = TreeHaver::Parser.new(backend: :ffi)
902
+ def initialize(backend: nil)
903
+ # Convert string backend names to symbols for consistency
904
+ backend = backend.to_sym if backend.is_a?(String)
905
+
906
+ mod = TreeHaver.resolve_backend_module(backend)
907
+
908
+ if mod.nil?
909
+ if backend
910
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available"
911
+ else
912
+ raise NotAvailable, "No TreeHaver backend is available"
913
+ end
914
+ end
915
+
419
916
  @impl = mod::Parser.new
917
+ @explicit_backend = backend # Remember for introspection (always a Symbol or nil)
918
+ end
919
+
920
+ # Get the backend this parser is using (for introspection)
921
+ #
922
+ # Returns the actual backend in use, resolving :auto to the concrete backend.
923
+ #
924
+ # @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
925
+ def backend
926
+ if @explicit_backend && @explicit_backend != :auto
927
+ @explicit_backend
928
+ else
929
+ # Determine actual backend from the implementation class
930
+ case @impl.class.name
931
+ when /MRI/
932
+ :mri
933
+ when /Rust/
934
+ :rust
935
+ when /FFI/
936
+ :ffi
937
+ when /Java/
938
+ :java
939
+ when /Citrus/
940
+ :citrus
941
+ else
942
+ # Fallback to effective_backend if we can't determine from class name
943
+ TreeHaver.effective_backend
944
+ end
945
+ end
420
946
  end
421
947
 
422
948
  # Set the language grammar for this parser
@@ -426,9 +952,135 @@ module TreeHaver
426
952
  # @example
427
953
  # parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
428
954
  def language=(lang)
429
- @impl.language = lang
955
+ # Unwrap the language before passing to backend
956
+ # Backends receive raw language objects, never TreeHaver wrappers
957
+ inner_lang = unwrap_language(lang)
958
+ @impl.language = inner_lang
959
+ # Return the original (possibly wrapped) language for consistency
960
+ lang
430
961
  end
431
962
 
963
+ private
964
+
965
+ # Unwrap a language object to extract the raw backend language
966
+ #
967
+ # This method is smart about backend compatibility:
968
+ # 1. If language has a backend attribute, checks if it matches current backend
969
+ # 2. If mismatch detected, attempts to reload language for correct backend
970
+ # 3. If reload successful, uses new language; otherwise continues with original
971
+ # 4. Unwraps the language wrapper to get raw backend object
972
+ #
973
+ # @param lang [Object] wrapped or raw language object
974
+ # @return [Object] raw backend language object appropriate for current backend
975
+ # @api private
976
+ def unwrap_language(lang)
977
+ # Check if this is a TreeHaver language wrapper with backend info
978
+ if lang.respond_to?(:backend)
979
+ # Verify backend compatibility FIRST
980
+ # This prevents passing languages from wrong backends to native code
981
+ # Exception: :auto backend is permissive - accepts any language
982
+ current_backend = backend
983
+
984
+ if lang.backend != current_backend && current_backend != :auto
985
+ # Backend mismatch! Try to reload for correct backend
986
+ reloaded = try_reload_language_for_backend(lang, current_backend)
987
+ if reloaded
988
+ lang = reloaded
989
+ else
990
+ # Couldn't reload - this is an error
991
+ raise TreeHaver::Error,
992
+ "Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
993
+ "Cannot reload language for correct backend. " \
994
+ "Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
995
+ end
996
+ end
997
+
998
+ # Get the current parser's language (if set)
999
+ current_lang = @impl.respond_to?(:language) ? @impl.language : nil
1000
+
1001
+ # Language mismatch detected! The parser might have a different language set
1002
+ # Compare the actual language objects using Comparable
1003
+ if current_lang && lang != current_lang
1004
+ # Different language being set (e.g., switching from TOML to JSON)
1005
+ # This is fine, just informational
1006
+ end
1007
+ end
1008
+
1009
+ # Unwrap based on backend type
1010
+ # All TreeHaver Language wrappers have the backend attribute
1011
+ unless lang.respond_to?(:backend)
1012
+ # This shouldn't happen - all our wrappers have backend attribute
1013
+ # If we get here, it's likely a raw backend object that was passed directly
1014
+ raise TreeHaver::Error,
1015
+ "Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
1016
+ "Use TreeHaver::Language.from_library to create language objects."
1017
+ end
1018
+
1019
+ case lang.backend
1020
+ when :mri
1021
+ return lang.to_language if lang.respond_to?(:to_language)
1022
+ return lang.inner_language if lang.respond_to?(:inner_language)
1023
+ when :rust
1024
+ return lang.name if lang.respond_to?(:name)
1025
+ when :ffi
1026
+ return lang # FFI needs wrapper for to_ptr
1027
+ when :java
1028
+ return lang.impl if lang.respond_to?(:impl)
1029
+ when :citrus
1030
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
1031
+ else
1032
+ # Unknown backend (e.g., test backend)
1033
+ # Try generic unwrapping methods for flexibility in testing
1034
+ return lang.to_language if lang.respond_to?(:to_language)
1035
+ return lang.inner_language if lang.respond_to?(:inner_language)
1036
+ return lang.impl if lang.respond_to?(:impl)
1037
+ return lang.grammar_module if lang.respond_to?(:grammar_module)
1038
+ return lang.name if lang.respond_to?(:name)
1039
+
1040
+ # If nothing works, pass through as-is
1041
+ # This allows test languages to be passed directly
1042
+ return lang
1043
+ end
1044
+
1045
+ # Shouldn't reach here, but just in case
1046
+ lang
1047
+ end
1048
+
1049
+ # Try to reload a language for the current backend
1050
+ #
1051
+ # This handles the case where a language was loaded for one backend,
1052
+ # but is now being used with a different backend (e.g., after backend switch).
1053
+ #
1054
+ # @param lang [Object] language object with metadata
1055
+ # @param target_backend [Symbol] backend to reload for
1056
+ # @return [Object, nil] reloaded language or nil if reload not possible
1057
+ # @api private
1058
+ def try_reload_language_for_backend(lang, target_backend)
1059
+ # Can't reload without path information
1060
+ return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
1061
+
1062
+ # For tree-sitter backends, reload from path
1063
+ if lang.respond_to?(:path) && lang.path
1064
+ begin
1065
+ # Use Language.from_library which respects current backend
1066
+ return Language.from_library(
1067
+ lang.path,
1068
+ symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
1069
+ name: lang.respond_to?(:name) ? lang.name : nil,
1070
+ )
1071
+ rescue => e
1072
+ # Reload failed, continue with original
1073
+ warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
1074
+ return
1075
+ end
1076
+ end
1077
+
1078
+ # For Citrus, can't really reload as it's just a module reference
1079
+ nil
1080
+ end
1081
+
1082
+ public
1083
+
432
1084
  # Parse source code into a syntax tree
433
1085
  #
434
1086
  # @param source [String] the source code to parse (should be UTF-8)
@@ -438,7 +1090,8 @@ module TreeHaver
438
1090
  # puts tree.root_node.type
439
1091
  def parse(source)
440
1092
  tree_impl = @impl.parse(source)
441
- Tree.new(tree_impl)
1093
+ # Wrap backend tree with source so Node#text works
1094
+ Tree.new(tree_impl, source: source)
442
1095
  end
443
1096
 
444
1097
  # Parse source code into a syntax tree (with optional incremental parsing)
@@ -448,7 +1101,7 @@ module TreeHaver
448
1101
  #
449
1102
  # == Incremental Parsing
450
1103
  #
451
- # Tree-sitter supports **incremental parsing** where you can pass a previously
1104
+ # tree-sitter supports **incremental parsing** where you can pass a previously
452
1105
  # parsed tree along with edit information to efficiently re-parse only the
453
1106
  # changed portions of source code. This is a major performance optimization
454
1107
  # for editors and IDEs that need to re-parse on every keystroke.
@@ -458,7 +1111,7 @@ module TreeHaver
458
1111
  # 2. User edits the source (e.g., inserts a character)
459
1112
  # 3. Call `tree.edit(...)` to update the tree's position data
460
1113
  # 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
461
- # 5. Tree-sitter reuses unchanged nodes, only re-parsing affected regions
1114
+ # 5. tree-sitter reuses unchanged nodes, only re-parsing affected regions
462
1115
  #
463
1116
  # TreeHaver passes through to the underlying backend if it supports incremental
464
1117
  # parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
@@ -467,7 +1120,7 @@ module TreeHaver
467
1120
  # @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
468
1121
  # @param source [String] the source code to parse (should be UTF-8)
469
1122
  # @return [Tree] the parsed syntax tree
470
- # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing Tree-sitter incremental parsing docs
1123
+ # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing tree-sitter incremental parsing docs
471
1124
  # @see Tree#edit For marking edits before incremental re-parsing
472
1125
  # @example First parse (no old tree)
473
1126
  # tree = parser.parse_string(nil, "x = 1")
@@ -478,12 +1131,21 @@ module TreeHaver
478
1131
  # Pass through to backend if it supports incremental parsing
479
1132
  if old_tree && @impl.respond_to?(:parse_string)
480
1133
  # Extract the underlying implementation from our Tree wrapper
481
- old_impl = old_tree.is_a?(Tree) ? old_tree.instance_variable_get(:@impl) : old_tree
1134
+ old_impl = if old_tree.respond_to?(:inner_tree)
1135
+ old_tree.inner_tree
1136
+ elsif old_tree.respond_to?(:instance_variable_get)
1137
+ # Fallback for compatibility
1138
+ old_tree.instance_variable_get(:@inner_tree) || old_tree.instance_variable_get(:@impl) || old_tree
1139
+ else
1140
+ old_tree
1141
+ end
482
1142
  tree_impl = @impl.parse_string(old_impl, source)
483
- Tree.new(tree_impl)
1143
+ # Wrap backend tree with source so Node#text works
1144
+ Tree.new(tree_impl, source: source)
484
1145
  elsif @impl.respond_to?(:parse_string)
485
1146
  tree_impl = @impl.parse_string(nil, source)
486
- Tree.new(tree_impl)
1147
+ # Wrap backend tree with source so Node#text works
1148
+ Tree.new(tree_impl, source: source)
487
1149
  else
488
1150
  # Fallback for backends that don't support parse_string
489
1151
  parse(source)
@@ -491,219 +1153,13 @@ module TreeHaver
491
1153
  end
492
1154
  end
493
1155
 
494
- # Represents a parsed syntax tree
1156
+ # Tree and Node classes have been moved to separate files:
1157
+ # - tree_haver/tree.rb: TreeHaver::Tree - unified wrapper providing consistent API
1158
+ # - tree_haver/node.rb: TreeHaver::Node - unified wrapper providing consistent API
495
1159
  #
496
- # A Tree is the result of parsing source code. It provides access to
497
- # the root node of the AST and supports incremental parsing via the
498
- # {#edit} method.
499
- #
500
- # @example Basic usage
501
- # tree = parser.parse(source)
502
- # root = tree.root_node
503
- #
504
- # @example Incremental parsing
505
- # tree = parser.parse_string(nil, original_source)
506
- # tree.edit(
507
- # start_byte: 10,
508
- # old_end_byte: 15,
509
- # new_end_byte: 20,
510
- # start_point: { row: 0, column: 10 },
511
- # old_end_point: { row: 0, column: 15 },
512
- # new_end_point: { row: 0, column: 20 }
513
- # )
514
- # new_tree = parser.parse_string(tree, edited_source)
515
- class Tree
516
- # @api private
517
- def initialize(impl)
518
- @impl = impl
519
- end
520
-
521
- # Get the root node of the syntax tree
522
- #
523
- # @return [Node] the root node
524
- # @example
525
- # root = tree.root_node
526
- # puts root.type # => "document" or similar
527
- def root_node
528
- Node.new(@impl.root_node)
529
- end
530
-
531
- # Mark the tree as edited for incremental re-parsing
532
- #
533
- # Call this method after the source code has been modified but before
534
- # re-parsing. This tells Tree-sitter which parts of the tree are
535
- # invalidated so it can efficiently re-parse only the affected regions.
536
- #
537
- # @param start_byte [Integer] byte offset where the edit starts
538
- # @param old_end_byte [Integer] byte offset where the old text ended
539
- # @param new_end_byte [Integer] byte offset where the new text ends
540
- # @param start_point [Hash] starting position as `{ row:, column: }`
541
- # @param old_end_point [Hash] old ending position as `{ row:, column: }`
542
- # @param new_end_point [Hash] new ending position as `{ row:, column: }`
543
- # @return [void]
544
- # @raise [NotAvailable] if the backend doesn't support incremental parsing
545
- # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing
546
- #
547
- # @example
548
- # # Original: "x = 1"
549
- # # Edited: "x = 42" (replaced "1" with "42" at byte 4)
550
- # tree.edit(
551
- # start_byte: 4,
552
- # old_end_byte: 5,
553
- # new_end_byte: 6,
554
- # start_point: { row: 0, column: 4 },
555
- # old_end_point: { row: 0, column: 5 },
556
- # new_end_point: { row: 0, column: 6 }
557
- # )
558
- def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
559
- unless @impl.respond_to?(:edit)
560
- raise NotAvailable, "Incremental parsing not supported by current backend. " \
561
- "Use MRI (ruby_tree_sitter) or Rust (tree_stump) backend."
562
- end
563
-
564
- @impl.edit(
565
- start_byte: start_byte,
566
- old_end_byte: old_end_byte,
567
- new_end_byte: new_end_byte,
568
- start_point: start_point,
569
- old_end_point: old_end_point,
570
- new_end_point: new_end_point,
571
- )
572
- end
573
-
574
- # Check if the underlying implementation supports incremental parsing
575
- #
576
- # @return [Boolean] true if {#edit} can be called on this tree
577
- def supports_editing?
578
- @impl.respond_to?(:edit)
579
- end
580
- end
581
-
582
- # Represents a node in the syntax tree
583
- #
584
- # A Node represents a single element in the parsed AST. Each node has
585
- # a type (like "string", "number", "table", etc.) and may have child nodes.
586
- #
587
- # @example Traversing nodes
588
- # root = tree.root_node
589
- # root.each do |child|
590
- # puts "Child type: #{child.type}"
591
- # child.each { |grandchild| puts " Grandchild: #{grandchild.type}" }
592
- # end
593
- class Node
594
- # @api private
595
- def initialize(impl)
596
- @impl = impl
597
- end
598
-
599
- # Get the type name of this node
600
- #
601
- # The type corresponds to the grammar rule that produced this node
602
- # (e.g., "document", "table", "string_literal", "pair", etc.).
603
- #
604
- # @return [String] the node type
605
- # @example
606
- # node.type # => "table"
607
- def type
608
- @impl.type
609
- end
610
-
611
- # Iterate over child nodes
612
- #
613
- # @yieldparam child [Node] each child node
614
- # @return [Enumerator, nil] an enumerator if no block given, nil otherwise
615
- # @example With a block
616
- # node.each { |child| puts child.type }
617
- #
618
- # @example Without a block
619
- # children = node.each.to_a
620
- def each(&blk)
621
- return enum_for(:each) unless block_given?
622
- @impl.each { |child_impl| blk.call(Node.new(child_impl)) }
623
- end
624
-
625
- # Get the start position of this node in the source
626
- #
627
- # @return [Object] point object with row and column
628
- # @example
629
- # node.start_point.row # => 0
630
- # node.start_point.column # => 4
631
- def start_point
632
- @impl.start_point
633
- end
634
-
635
- # Get the end position of this node in the source
636
- #
637
- # @return [Object] point object with row and column
638
- # @example
639
- # node.end_point.row # => 0
640
- # node.end_point.column # => 10
641
- def end_point
642
- @impl.end_point
643
- end
644
-
645
- # Get the start byte offset of this node in the source
646
- #
647
- # @return [Integer] byte offset from beginning of source
648
- def start_byte
649
- @impl.start_byte
650
- end
651
-
652
- # Get the end byte offset of this node in the source
653
- #
654
- # @return [Integer] byte offset from beginning of source
655
- def end_byte
656
- @impl.end_byte
657
- end
658
-
659
- # Check if this node or any descendant has a parse error
660
- #
661
- # @return [Boolean] true if there is an error in the subtree
662
- def has_error?
663
- @impl.respond_to?(:has_error?) && @impl.has_error?
664
- end
665
-
666
- # Check if this node is a MISSING node (inserted by error recovery)
667
- #
668
- # @return [Boolean] true if this is a missing node
669
- def missing?
670
- @impl.respond_to?(:missing?) && @impl.missing?
671
- end
672
-
673
- # Get string representation of this node
674
- #
675
- # @return [String] string representation
676
- def to_s
677
- @impl.to_s
678
- end
679
-
680
- # Check if node responds to a method (includes delegation to @impl)
681
- #
682
- # @param method_name [Symbol] method to check
683
- # @param include_private [Boolean] include private methods
684
- # @return [Boolean]
685
- def respond_to_missing?(method_name, include_private = false)
686
- @impl.respond_to?(method_name, include_private) || super
687
- end
688
-
689
- # Delegate unknown methods to the underlying implementation
690
- #
691
- # This provides full compatibility with ruby_tree_sitter nodes
692
- # for methods not explicitly wrapped.
693
- #
694
- # @param method_name [Symbol] method to call
695
- # @param args [Array] arguments to pass
696
- # @param block [Proc] block to pass
697
- # @return [Object] result from the underlying implementation
698
- def method_missing(method_name, *args, **kwargs, &block)
699
- if @impl.respond_to?(method_name)
700
- @impl.public_send(method_name, *args, **kwargs, &block)
701
- else
702
- super
703
- end
704
- end
705
- end
706
- end
1160
+ # These provide a unified interface across all backends (MRI, Rust, FFI, Java, Citrus).
1161
+ # All backends now return properly wrapped TreeHaver::Tree and TreeHaver::Node objects.
1162
+ end # end module TreeHaver
707
1163
 
708
1164
  TreeHaver::Version.class_eval do
709
1165
  extend VersionGem::Basic