tree_haver 3.2.2 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tree_haver.rb CHANGED
@@ -16,7 +16,7 @@ require_relative "tree_haver/version"
16
16
  #
17
17
  # == Backends
18
18
  #
19
- # Supports 10 backends:
19
+ # Supports 9 backends:
20
20
  # - Tree-sitter: MRI (C), Rust, FFI, Java
21
21
  # - Native parsers: Prism (Ruby), Psych (YAML), Commonmarker (Markdown), Markly (GFM)
22
22
  # - Pure Ruby: Citrus (portable fallback)
@@ -108,6 +108,7 @@ module TreeHaver
108
108
  # Autoload internal modules
109
109
  autoload :LibraryPathUtils, File.join(__dir__, "tree_haver", "library_path_utils")
110
110
  autoload :LanguageRegistry, File.join(__dir__, "tree_haver", "language_registry")
111
+ autoload :BackendAPI, File.join(__dir__, "tree_haver", "backend_api")
111
112
 
112
113
  # Base error class for TreeHaver exceptions
113
114
  # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
@@ -179,6 +180,9 @@ module TreeHaver
179
180
  # - {Backends::Java} - Uses JRuby's Java integration
180
181
  # - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
181
182
  # - {Backends::Prism} - Uses Ruby's built-in Prism parser (Ruby-only, stdlib in 3.4+)
183
+ # - {Backends::Psych} - Uses Ruby's built-in Psych parser (YAML-only, stdlib)
184
+ # - {Backends::Commonmarker} - Uses commonmarker gem (Markdown)
185
+ # - {Backends::Markly} - Uses markly gem (Markdown/GFM)
182
186
  module Backends
183
187
  autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
184
188
  autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
@@ -208,6 +212,17 @@ module TreeHaver
208
212
  commonmarker: [], # Commonmarker has no conflicts with other backends
209
213
  markly: [], # Markly has no conflicts with other backends
210
214
  }.freeze
215
+
216
+ # Pure Ruby backends that parse specific languages
217
+ # These are language-specific and register themselves via LanguageRegistry
218
+ #
219
+ # @return [Hash{Symbol => Hash}] Maps backend name to language and module info
220
+ PURE_RUBY_BACKENDS = {
221
+ prism: {language: :ruby, module_name: "Prism"},
222
+ psych: {language: :yaml, module_name: "Psych"},
223
+ commonmarker: {language: :markdown, module_name: "Commonmarker"},
224
+ markly: {language: :markdown, module_name: "Markly"},
225
+ }.freeze
211
226
  end
212
227
 
213
228
  # Security utilities for validating paths before loading shared libraries
@@ -266,6 +281,11 @@ module TreeHaver
266
281
  # Parser class for parsing source code into syntax trees
267
282
  autoload :Parser, File.join(__dir__, "tree_haver", "parser")
268
283
 
284
+ # Native tree-sitter backends that support loading shared libraries (.so files)
285
+ # These backends wrap the tree-sitter C library via various bindings.
286
+ # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are excluded.
287
+ NATIVE_BACKENDS = %i[mri rust ffi java].freeze
288
+
269
289
  # Get the current backend selection
270
290
  #
271
291
  # @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
@@ -345,18 +365,92 @@ module TreeHaver
345
365
  # @example
346
366
  # TreeHaver.backend # => :auto
347
367
  def backend
348
- @backend ||= case (ENV["TREE_HAVER_BACKEND"] || :auto).to_s # rubocop:disable ThreadSafety/ClassInstanceVariable
349
- when "mri" then :mri
350
- when "rust" then :rust
351
- when "ffi" then :ffi
352
- when "java" then :java
353
- when "citrus" then :citrus
354
- when "prism" then :prism
355
- when "psych" then :psych
356
- when "commonmarker" then :commonmarker
357
- when "markly" then :markly
358
- else :auto
368
+ return @backend if defined?(@backend) && @backend # rubocop:disable ThreadSafety/ClassInstanceVariable
369
+
370
+ @backend = parse_single_backend_env # rubocop:disable ThreadSafety/ClassInstanceVariable
371
+ end
372
+
373
+ # Valid native backend names (require native extensions)
374
+ VALID_NATIVE_BACKENDS = %w[mri rust ffi java].freeze
375
+
376
+ # Valid pure Ruby backend names (no native extensions)
377
+ VALID_RUBY_BACKENDS = %w[citrus prism psych commonmarker markly].freeze
378
+
379
+ # All valid backend names
380
+ VALID_BACKENDS = (VALID_NATIVE_BACKENDS + VALID_RUBY_BACKENDS + %w[auto none]).freeze
381
+
382
+ # Get allowed native backends from TREE_HAVER_NATIVE_BACKEND environment variable
383
+ #
384
+ # Supports comma-separated values like "mri,ffi".
385
+ # Special values:
386
+ # - "auto" or empty/unset: automatically select from available native backends
387
+ # - "none": no native backends allowed (pure Ruby only)
388
+ #
389
+ # @return [Array<Symbol>] list of allowed native backend symbols, or [:auto] or [:none]
390
+ # @example Allow only MRI and FFI
391
+ # # TREE_HAVER_NATIVE_BACKEND=mri,ffi
392
+ # TreeHaver.allowed_native_backends # => [:mri, :ffi]
393
+ # @example Auto-select native backends (default)
394
+ # # TREE_HAVER_NATIVE_BACKEND not set, empty, or "auto"
395
+ # TreeHaver.allowed_native_backends # => [:auto]
396
+ # @example Disable all native backends
397
+ # # TREE_HAVER_NATIVE_BACKEND=none
398
+ # TreeHaver.allowed_native_backends # => [:none]
399
+ def allowed_native_backends
400
+ @allowed_native_backends ||= parse_backend_list_env("TREE_HAVER_NATIVE_BACKEND", VALID_NATIVE_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
401
+ end
402
+
403
+ # Get allowed Ruby backends from TREE_HAVER_RUBY_BACKEND environment variable
404
+ #
405
+ # Supports comma-separated values like "citrus,prism".
406
+ # Special values:
407
+ # - "auto" or empty/unset: automatically select from available Ruby backends
408
+ # - "none": no Ruby backends allowed (native only)
409
+ #
410
+ # @return [Array<Symbol>] list of allowed Ruby backend symbols, or [:auto] or [:none]
411
+ # @example Allow only Citrus
412
+ # # TREE_HAVER_RUBY_BACKEND=citrus
413
+ # TreeHaver.allowed_ruby_backends # => [:citrus]
414
+ # @example Auto-select Ruby backends (default)
415
+ # # TREE_HAVER_RUBY_BACKEND not set, empty, or "auto"
416
+ # TreeHaver.allowed_ruby_backends # => [:auto]
417
+ def allowed_ruby_backends
418
+ @allowed_ruby_backends ||= parse_backend_list_env("TREE_HAVER_RUBY_BACKEND", VALID_RUBY_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
419
+ end
420
+
421
+ # Check if a specific backend is allowed based on environment variables
422
+ #
423
+ # Checks TREE_HAVER_NATIVE_BACKEND for native backends and
424
+ # TREE_HAVER_RUBY_BACKEND for pure Ruby backends.
425
+ #
426
+ # @param backend_name [Symbol, String] the backend to check
427
+ # @return [Boolean] true if the backend is allowed
428
+ # @example
429
+ # # TREE_HAVER_NATIVE_BACKEND=mri
430
+ # TreeHaver.backend_allowed?(:mri) # => true
431
+ # TreeHaver.backend_allowed?(:ffi) # => false
432
+ # TreeHaver.backend_allowed?(:citrus) # => true (Ruby backends use separate env var)
433
+ def backend_allowed?(backend_name)
434
+ backend_sym = backend_name.to_sym
435
+
436
+ # Check if it's a native backend
437
+ if VALID_NATIVE_BACKENDS.include?(backend_sym.to_s)
438
+ allowed = allowed_native_backends
439
+ return true if allowed == [:auto]
440
+ return false if allowed == [:none]
441
+ return allowed.include?(backend_sym)
442
+ end
443
+
444
+ # Check if it's a Ruby backend
445
+ if VALID_RUBY_BACKENDS.include?(backend_sym.to_s)
446
+ allowed = allowed_ruby_backends
447
+ return true if allowed == [:auto]
448
+ return false if allowed == [:none]
449
+ return allowed.include?(backend_sym)
359
450
  end
451
+
452
+ # Unknown backend or :auto - allow
453
+ true
360
454
  end
361
455
 
362
456
  # Set the backend to use
@@ -383,6 +477,101 @@ module TreeHaver
383
477
  # TreeHaver.reset_backend!(to: :ffi)
384
478
  def reset_backend!(to: :auto)
385
479
  @backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
480
+ @allowed_native_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
481
+ @allowed_ruby_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
482
+ end
483
+
484
+ # Register built-in pure Ruby backends in the LanguageRegistry
485
+ #
486
+ # This registers Prism, Psych, Commonmarker, and Markly using the same
487
+ # registration API that external backends use. This ensures consistent
488
+ # behavior whether a backend is built-in or provided by an external gem.
489
+ #
490
+ # Called automatically when TreeHaver is first used, but can be called
491
+ # manually in tests or when reset! has cleared the registry.
492
+ #
493
+ # @return [void]
494
+ # @example Manual registration (usually not needed)
495
+ # TreeHaver.register_builtin_backends!
496
+ def register_builtin_backends!
497
+ Backends::PURE_RUBY_BACKENDS.each do |backend_type, info|
498
+ language = info[:language]
499
+ module_name = info[:module_name]
500
+
501
+ # Get the backend module
502
+ backend_mod = Backends.const_get(module_name)
503
+ next unless backend_mod
504
+
505
+ # Register if available (lazy check - doesn't require the gem yet)
506
+ LanguageRegistry.register(
507
+ language,
508
+ backend_type,
509
+ backend_module: backend_mod,
510
+ gem_name: module_name.downcase,
511
+ )
512
+ end
513
+ end
514
+
515
+ # Check if built-in backends have been registered
516
+ #
517
+ # @return [Boolean]
518
+ # @api private
519
+ def builtin_backends_registered?
520
+ @builtin_backends_registered ||= false # rubocop:disable ThreadSafety/ClassInstanceVariable
521
+ end
522
+
523
+ # Ensure built-in backends are registered (idempotent)
524
+ #
525
+ # @return [void]
526
+ # @api private
527
+ def ensure_builtin_backends_registered!
528
+ return if builtin_backends_registered?
529
+ register_builtin_backends!
530
+ @builtin_backends_registered = true # rubocop:disable ThreadSafety/ClassInstanceVariable
531
+ end
532
+
533
+ # Parse TREE_HAVER_BACKEND environment variable (single backend)
534
+ #
535
+ # @return [Symbol] the backend symbol (:auto if not set or invalid)
536
+ # @api private
537
+ def parse_single_backend_env
538
+ env_value = ENV["TREE_HAVER_BACKEND"]
539
+ return :auto if env_value.nil? || env_value.strip.empty?
540
+
541
+ name = env_value.strip.downcase
542
+ return :auto unless VALID_BACKENDS.include?(name) && name != "all" && name != "none"
543
+
544
+ name.to_sym
545
+ end
546
+
547
+ # Parse a backend list environment variable
548
+ #
549
+ # @param env_var [String] the environment variable name
550
+ # @param valid_backends [Array<String>] list of valid backend names
551
+ # @return [Array<Symbol>] list of backend symbols, or [:auto] or [:none]
552
+ # @api private
553
+ def parse_backend_list_env(env_var, valid_backends)
554
+ env_value = ENV[env_var]
555
+
556
+ # Empty or unset means "auto"
557
+ return [:auto] if env_value.nil? || env_value.strip.empty?
558
+
559
+ normalized = env_value.strip.downcase
560
+
561
+ # Handle special values
562
+ return [:auto] if normalized == "auto"
563
+ return [:none] if normalized == "none"
564
+
565
+ # Split on comma and parse each backend
566
+ backends = normalized.split(",").map(&:strip).uniq
567
+
568
+ # Convert to symbols, filtering out invalid ones
569
+ parsed = backends.filter_map do |name|
570
+ valid_backends.include?(name) ? name.to_sym : nil
571
+ end
572
+
573
+ # Return :auto if no valid backends found
574
+ parsed.empty? ? [:auto] : parsed
386
575
  end
387
576
 
388
577
  # Thread-local backend context storage
@@ -569,7 +758,11 @@ module TreeHaver
569
758
  # Return nil if the module doesn't exist
570
759
  return unless mod
571
760
 
572
- # Check for backend conflicts FIRST, before checking availability
761
+ # Check if the backend is allowed by environment variables FIRST
762
+ # This enforces TREE_HAVER_NATIVE_BACKEND and TREE_HAVER_RUBY_BACKEND as hard restrictions
763
+ return if requested && requested != :auto && !backend_allowed?(requested)
764
+
765
+ # Check for backend conflicts, before checking availability
573
766
  # This is critical because the conflict causes the backend to report unavailable
574
767
  # We want to raise a clear error explaining WHY it's unavailable
575
768
  # Use the requested backend name directly (not capabilities) because
@@ -590,11 +783,6 @@ module TreeHaver
590
783
  mod
591
784
  end
592
785
 
593
- # Native tree-sitter backends that support loading shared libraries (.so files)
594
- # These backends wrap the tree-sitter C library via various bindings.
595
- # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are excluded.
596
- NATIVE_BACKENDS = %i[mri rust ffi java].freeze
597
-
598
786
  # Resolve a native tree-sitter backend module (for from_library)
599
787
  #
600
788
  # This method is similar to resolve_backend_module but ONLY considers
@@ -643,8 +831,14 @@ module TreeHaver
643
831
  end
644
832
 
645
833
  native_priority.each do |backend|
834
+ # Rescue BackendConflict to allow iteration to continue
835
+ # This enables graceful fallback when a backend is blocked
836
+
646
837
  mod = resolve_backend_module(backend)
647
838
  return mod if mod
839
+ rescue BackendConflict
840
+ # This backend is blocked by a previously used backend, try the next one
841
+ next
648
842
  end
649
843
 
650
844
  nil # No native backend available
@@ -664,7 +858,19 @@ module TreeHaver
664
858
  # puts "Using #{mod.capabilities[:backend]} backend"
665
859
  # end
666
860
  def backend_module
667
- case effective_backend # Changed from: backend
861
+ requested = effective_backend # Changed from: backend
862
+
863
+ # For explicit backends (not :auto), check for conflicts first
864
+ # If the backend is blocked, fall through to auto-select
865
+ if requested != :auto && backend_protect?
866
+ conflicts = conflicting_backends_for(requested)
867
+ unless conflicts.empty?
868
+ # The explicitly requested backend is blocked - fall through to auto-select
869
+ requested = :auto
870
+ end
871
+ end
872
+
873
+ case requested
668
874
  when :mri
669
875
  Backends::MRI
670
876
  when :rust
@@ -685,15 +891,16 @@ module TreeHaver
685
891
  Backends::Markly
686
892
  else
687
893
  # auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
688
- if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && Backends::Java.available?
894
+ # Each backend must be both allowed (by ENV) and available (gem installed)
895
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && backend_allowed?(:java) && Backends::Java.available?
689
896
  Backends::Java
690
- elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::MRI.available?
897
+ elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:mri) && Backends::MRI.available?
691
898
  Backends::MRI
692
- elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::Rust.available?
899
+ elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:rust) && Backends::Rust.available?
693
900
  Backends::Rust
694
- elsif Backends::FFI.available?
901
+ elsif backend_allowed?(:ffi) && Backends::FFI.available?
695
902
  Backends::FFI
696
- elsif Backends::Citrus.available?
903
+ elsif backend_allowed?(:citrus) && Backends::Citrus.available?
697
904
  Backends::Citrus # Pure Ruby fallback
698
905
  else
699
906
  # No backend available
@@ -761,6 +968,8 @@ module TreeHaver
761
968
  # @param path [String, nil] absolute path to the language shared library (for tree-sitter)
762
969
  # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
763
970
  # @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
971
+ # @param backend_module [Module, nil] pure Ruby backend module with Language/Parser classes
972
+ # @param backend_type [Symbol, nil] backend type for backend_module (defaults to module name)
764
973
  # @param gem_name [String, nil] optional gem name for error messages
765
974
  # @return [void]
766
975
  # @example Register tree-sitter grammar only
@@ -775,6 +984,13 @@ module TreeHaver
775
984
  # grammar_module: TomlRB::Document,
776
985
  # gem_name: "toml-rb"
777
986
  # )
987
+ # @example Register pure Ruby backend (external gem like rbs-merge)
988
+ # TreeHaver.register_language(
989
+ # :rbs,
990
+ # backend_module: Rbs::Merge::Backends::RbsBackend,
991
+ # backend_type: :rbs,
992
+ # gem_name: "rbs"
993
+ # )
778
994
  # @example Register BOTH backends in separate calls
779
995
  # TreeHaver.register_language(
780
996
  # :toml,
@@ -795,7 +1011,7 @@ module TreeHaver
795
1011
  # gem_name: "toml-rb"
796
1012
  # )
797
1013
  # # Now TreeHaver::Language.toml works with ANY backend!
798
- def register_language(name, path: nil, symbol: nil, grammar_module: nil, gem_name: nil)
1014
+ def register_language(name, path: nil, symbol: nil, grammar_module: nil, backend_module: nil, backend_type: nil, gem_name: nil)
799
1015
  # Register tree-sitter backend if path provided
800
1016
  # Note: Uses `if` not `elsif` so both backends can be registered in one call
801
1017
  if path
@@ -813,9 +1029,17 @@ module TreeHaver
813
1029
  LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
814
1030
  end
815
1031
 
1032
+ # Register pure Ruby backend if backend_module provided
1033
+ # This is used by external gems (like rbs-merge) to register their own backends
1034
+ if backend_module
1035
+ # Derive backend_type from module name if not provided
1036
+ type = backend_type || backend_module.name.split("::").last.downcase.to_sym
1037
+ LanguageRegistry.register(name, type, backend_module: backend_module, gem_name: gem_name)
1038
+ end
1039
+
816
1040
  # Require at least one backend to be registered
817
- if path.nil? && grammar_module.nil?
818
- raise ArgumentError, "Must provide at least one of: path (tree-sitter) or grammar_module (Citrus)"
1041
+ if path.nil? && grammar_module.nil? && backend_module.nil?
1042
+ raise ArgumentError, "Must provide at least one of: path (tree-sitter), grammar_module (Citrus), or backend_module (pure Ruby)"
819
1043
  end
820
1044
 
821
1045
  # Note: No early return! This method intentionally processes both `if` blocks
@@ -836,114 +1060,152 @@ module TreeHaver
836
1060
 
837
1061
  # Create a parser configured for a specific language
838
1062
  #
839
- # This is the recommended high-level API for creating a parser. It handles:
840
- # 1. Checking if the language is already registered
841
- # 2. Auto-discovering tree-sitter grammar via GrammarFinder
842
- # 3. Falling back to Citrus grammar if tree-sitter is unavailable
843
- # 4. Creating and configuring the parser
1063
+ # Respects the effective backend setting (via TREE_HAVER_BACKEND env var,
1064
+ # TreeHaver.backend=, or with_backend block).
1065
+ #
1066
+ # Supports three types of backends:
1067
+ # 1. Tree-sitter native backends (auto-discovered or explicit path)
1068
+ # 2. Citrus grammars (pure Ruby, via CITRUS_DEFAULTS or explicit config)
1069
+ # 3. Pure Ruby backends (registered via backend_module, e.g., Prism, Psych, RBS)
844
1070
  #
845
- # @param language_name [Symbol, String] the language to parse (e.g., :toml, :json, :bash)
1071
+ # @param language_name [Symbol, String] the language to parse (e.g., :toml, :json, :ruby, :yaml, :rbs)
846
1072
  # @param library_path [String, nil] optional explicit path to tree-sitter grammar library
847
1073
  # @param symbol [String, nil] optional tree-sitter symbol name (defaults to "tree_sitter_<name>")
848
1074
  # @param citrus_config [Hash, nil] optional Citrus fallback configuration
849
- # @option citrus_config [String] :gem_name gem name for the Citrus grammar
850
- # @option citrus_config [String] :grammar_const fully qualified constant name for grammar module
851
1075
  # @return [TreeHaver::Parser] configured parser with language set
852
1076
  # @raise [TreeHaver::NotAvailable] if no parser backend is available for the language
853
1077
  #
854
1078
  # @example Basic usage (auto-discovers grammar)
855
1079
  # parser = TreeHaver.parser_for(:toml)
856
- # tree = parser.parse("[package]\nname = \"my-app\"")
857
1080
  #
858
- # @example With explicit library path
859
- # parser = TreeHaver.parser_for(:toml, library_path: "/custom/path/libtree-sitter-toml.so")
1081
+ # @example Force Citrus backend
1082
+ # TreeHaver.with_backend(:citrus) { TreeHaver.parser_for(:toml) }
860
1083
  #
861
- # @example With Citrus fallback configuration
862
- # parser = TreeHaver.parser_for(:toml,
863
- # citrus_config: { gem_name: "toml-rb", grammar_const: "TomlRB::Document" }
864
- # )
1084
+ # @example Use registered pure Ruby backend (e.g., RBS)
1085
+ # # First, rbs-merge registers its backend:
1086
+ # # TreeHaver.register_language(:rbs, backend_module: Rbs::Merge::RbsBackend, backend_type: :rbs)
1087
+ # parser = TreeHaver.parser_for(:rbs)
865
1088
  def parser_for(language_name, library_path: nil, symbol: nil, citrus_config: nil)
1089
+ # Ensure built-in pure Ruby backends are registered
1090
+ ensure_builtin_backends_registered!
1091
+
866
1092
  name = language_name.to_sym
867
1093
  symbol ||= "tree_sitter_#{name}"
868
-
869
- # Step 1: Try to get the language (may already be registered)
870
- language = begin
871
- # Check if already registered and loadable
872
- if registered_language(name)
873
- Language.public_send(name, path: library_path, symbol: symbol)
874
- end
875
- rescue NotAvailable, ArgumentError, LoadError
876
- nil
877
- end
878
-
879
- # Step 2: If not registered, try GrammarFinder for tree-sitter
880
- unless language
881
- # Principle of Least Surprise: If user provides an explicit path,
882
- # it MUST exist. Don't silently fall back to auto-discovery.
883
- if library_path && !library_path.empty?
884
- unless File.exist?(library_path)
885
- raise NotAvailable,
886
- "Specified parser path does not exist: #{library_path}"
887
- end
888
- begin
889
- register_language(name, path: library_path, symbol: symbol)
890
- language = Language.public_send(name)
891
- rescue NotAvailable, ArgumentError, LoadError => e
892
- # Re-raise with more context since user explicitly provided this path
893
- raise NotAvailable,
894
- "Failed to load parser from specified path #{library_path}: #{e.message}"
895
- end
896
- else
897
- # Auto-discover via GrammarFinder (no explicit path provided)
898
- begin
899
- finder = GrammarFinder.new(name)
900
- if finder.available?
901
- finder.register!
902
- language = Language.public_send(name)
1094
+ requested = effective_backend
1095
+
1096
+ # Determine which backends to try based on effective_backend
1097
+ try_tree_sitter = (requested == :auto) || NATIVE_BACKENDS.include?(requested)
1098
+ try_citrus = (requested == :auto) || (requested == :citrus)
1099
+
1100
+ language = nil
1101
+ parser = nil
1102
+
1103
+ # First, check for registered pure Ruby backends
1104
+ # These take precedence when explicitly requested or when no other backend is available
1105
+ registration = registered_language(name)
1106
+ # Find any registered backend_module (not tree_sitter or citrus)
1107
+ registration&.each do |backend_type, config|
1108
+ next if %i[tree_sitter citrus].include?(backend_type)
1109
+ next unless config[:backend_module]
1110
+
1111
+ backend_mod = config[:backend_module]
1112
+ # Check if this backend is available
1113
+ next unless backend_mod.respond_to?(:available?) && backend_mod.available?
1114
+
1115
+ # If a specific backend was requested, only use if it matches
1116
+ next if requested != :auto && requested != backend_type
1117
+
1118
+ # Create parser from the backend module
1119
+ if backend_mod.const_defined?(:Parser)
1120
+ parser = backend_mod::Parser.new
1121
+ if backend_mod.const_defined?(:Language)
1122
+ lang_class = backend_mod::Language
1123
+ # Try to get language by name (e.g., Language.ruby, Language.yaml, Language.rbs)
1124
+ if lang_class.respond_to?(name)
1125
+ parser.language = lang_class.public_send(name)
1126
+ elsif lang_class.respond_to?(:from_library)
1127
+ parser.language = lang_class.from_library(nil, name: name)
903
1128
  end
904
- rescue NotAvailable, ArgumentError, LoadError
905
- language = nil
906
1129
  end
1130
+ return parser
907
1131
  end
908
1132
  end
909
1133
 
910
- # Step 3: Try Citrus fallback if tree-sitter failed
911
- unless language
912
- # Use explicit config, or fall back to built-in defaults for known languages
913
- citrus_config ||= CITRUS_DEFAULTS[name] || {}
914
-
915
- # Only attempt if we have the required configuration
916
- if citrus_config[:gem_name] && citrus_config[:grammar_const]
917
- begin
918
- citrus_finder = CitrusGrammarFinder.new(
919
- language: name,
920
- gem_name: citrus_config[:gem_name],
921
- grammar_const: citrus_config[:grammar_const],
922
- require_path: citrus_config[:require_path],
923
- )
924
- if citrus_finder.available?
925
- citrus_finder.register!
926
- language = Language.public_send(name)
927
- end
928
- rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
929
- language = nil
930
- end
931
- end
1134
+ # Try tree-sitter if applicable
1135
+ if try_tree_sitter && !language
1136
+ language = load_tree_sitter_language(name, library_path: library_path, symbol: symbol)
932
1137
  end
933
1138
 
934
- # Step 4: Raise if nothing worked
935
- unless language
936
- raise NotAvailable,
937
- "No parser available for #{name}. " \
938
- "Install tree-sitter-#{name} or the appropriate Ruby gem. " \
939
- "Set TREE_SITTER_#{name.to_s.upcase}_PATH for custom grammar location."
1139
+ # Try Citrus if applicable
1140
+ if try_citrus && !language
1141
+ language = load_citrus_language(name, citrus_config: citrus_config)
940
1142
  end
941
1143
 
942
- # Step 5: Create and configure parser
1144
+ # Raise if nothing worked
1145
+ raise NotAvailable, "No parser available for #{name}. " \
1146
+ "Install tree-sitter-#{name} or configure a Citrus grammar." unless language
1147
+
1148
+ # Create and configure parser
943
1149
  parser = Parser.new
944
1150
  parser.language = language
945
1151
  parser
946
1152
  end
1153
+
1154
+ private
1155
+
1156
+ # Load a tree-sitter language, either from registry or via auto-discovery
1157
+ # @return [Language, nil]
1158
+ # @raise [NotAvailable] if explicit library_path is provided but doesn't exist or can't load
1159
+ def load_tree_sitter_language(name, library_path: nil, symbol: nil)
1160
+ # If explicit path provided, it must work - don't swallow errors
1161
+ if library_path && !library_path.empty?
1162
+ raise NotAvailable, "Specified parser path does not exist: #{library_path}" unless File.exist?(library_path)
1163
+ register_language(name, path: library_path, symbol: symbol)
1164
+ return Language.public_send(name)
1165
+ end
1166
+
1167
+ # Auto-discovery: errors are acceptable, just return nil
1168
+ begin
1169
+ # Try already-registered tree-sitter language (not Citrus)
1170
+ # But only if the registered path actually exists - ignore stale/test registrations
1171
+ registration = registered_language(name)
1172
+ ts_reg = registration&.dig(:tree_sitter)
1173
+ if ts_reg && ts_reg[:path] && File.exist?(ts_reg[:path])
1174
+ return Language.public_send(name, symbol: symbol)
1175
+ end
1176
+
1177
+ # Auto-discover via GrammarFinder
1178
+ finder = GrammarFinder.new(name)
1179
+ if finder.available?
1180
+ finder.register!
1181
+ return Language.public_send(name)
1182
+ end
1183
+ rescue NotAvailable, ArgumentError, LoadError
1184
+ # Auto-discovery failed, that's okay
1185
+ end
1186
+
1187
+ nil
1188
+ end
1189
+
1190
+ # Load a Citrus language from configuration or defaults
1191
+ # @return [Language, nil]
1192
+ def load_citrus_language(name, citrus_config: nil)
1193
+ config = citrus_config || CITRUS_DEFAULTS[name] || {}
1194
+ return unless config[:gem_name] && config[:grammar_const]
1195
+
1196
+ finder = CitrusGrammarFinder.new(
1197
+ language: name,
1198
+ gem_name: config[:gem_name],
1199
+ grammar_const: config[:grammar_const],
1200
+ require_path: config[:require_path],
1201
+ )
1202
+ return unless finder.available?
1203
+
1204
+ finder.register!
1205
+ Language.public_send(name)
1206
+ rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
1207
+ nil
1208
+ end
947
1209
  end
948
1210
 
949
1211
  # Language and Parser classes have been moved to separate files:
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_haver
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.2
4
+ version: 3.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter H. Boling
@@ -260,6 +260,7 @@ files:
260
260
  - RUBOCOP.md
261
261
  - SECURITY.md
262
262
  - lib/tree_haver.rb
263
+ - lib/tree_haver/backend_api.rb
263
264
  - lib/tree_haver/backends/citrus.rb
264
265
  - lib/tree_haver/backends/commonmarker.rb
265
266
  - lib/tree_haver/backends/ffi.rb
@@ -292,10 +293,10 @@ licenses:
292
293
  - MIT
293
294
  metadata:
294
295
  homepage_uri: https://tree-haver.galtzo.com/
295
- source_code_uri: https://github.com/kettle-rb/tree_haver/tree/v3.2.2
296
- changelog_uri: https://github.com/kettle-rb/tree_haver/blob/v3.2.2/CHANGELOG.md
296
+ source_code_uri: https://github.com/kettle-rb/tree_haver/tree/v3.2.4
297
+ changelog_uri: https://github.com/kettle-rb/tree_haver/blob/v3.2.4/CHANGELOG.md
297
298
  bug_tracker_uri: https://github.com/kettle-rb/tree_haver/issues
298
- documentation_uri: https://www.rubydoc.info/gems/tree_haver/3.2.2
299
+ documentation_uri: https://www.rubydoc.info/gems/tree_haver/3.2.4
299
300
  funding_uri: https://github.com/sponsors/pboling
300
301
  wiki_uri: https://github.com/kettle-rb/tree_haver/wiki
301
302
  news_uri: https://www.railsbling.com/tags/tree_haver
metadata.gz.sig CHANGED
Binary file