tree_haver 3.2.3 → 3.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tree_haver.rb CHANGED
@@ -108,6 +108,7 @@ module TreeHaver
108
108
  # Autoload internal modules
109
109
  autoload :LibraryPathUtils, File.join(__dir__, "tree_haver", "library_path_utils")
110
110
  autoload :LanguageRegistry, File.join(__dir__, "tree_haver", "language_registry")
111
+ autoload :BackendAPI, File.join(__dir__, "tree_haver", "backend_api")
111
112
 
112
113
  # Base error class for TreeHaver exceptions
113
114
  # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
@@ -179,6 +180,9 @@ module TreeHaver
179
180
  # - {Backends::Java} - Uses JRuby's Java integration
180
181
  # - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
181
182
  # - {Backends::Prism} - Uses Ruby's built-in Prism parser (Ruby-only, stdlib in 3.4+)
183
+ # - {Backends::Psych} - Uses Ruby's built-in Psych parser (YAML-only, stdlib)
184
+ # - {Backends::Commonmarker} - Uses commonmarker gem (Markdown)
185
+ # - {Backends::Markly} - Uses markly gem (Markdown/GFM)
182
186
  module Backends
183
187
  autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
184
188
  autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
@@ -208,6 +212,17 @@ module TreeHaver
208
212
  commonmarker: [], # Commonmarker has no conflicts with other backends
209
213
  markly: [], # Markly has no conflicts with other backends
210
214
  }.freeze
215
+
216
+ # Pure Ruby backends that parse specific languages
217
+ # These are language-specific and register themselves via LanguageRegistry
218
+ #
219
+ # @return [Hash{Symbol => Hash}] Maps backend name to language and module info
220
+ PURE_RUBY_BACKENDS = {
221
+ prism: {language: :ruby, module_name: "Prism"},
222
+ psych: {language: :yaml, module_name: "Psych"},
223
+ commonmarker: {language: :markdown, module_name: "Commonmarker"},
224
+ markly: {language: :markdown, module_name: "Markly"},
225
+ }.freeze
211
226
  end
212
227
 
213
228
  # Security utilities for validating paths before loading shared libraries
@@ -350,18 +365,92 @@ module TreeHaver
350
365
  # @example
351
366
  # TreeHaver.backend # => :auto
352
367
  def backend
353
- @backend ||= case (ENV["TREE_HAVER_BACKEND"] || :auto).to_s # rubocop:disable ThreadSafety/ClassInstanceVariable
354
- when "mri" then :mri
355
- when "rust" then :rust
356
- when "ffi" then :ffi
357
- when "java" then :java
358
- when "citrus" then :citrus
359
- when "prism" then :prism
360
- when "psych" then :psych
361
- when "commonmarker" then :commonmarker
362
- when "markly" then :markly
363
- else :auto
368
+ return @backend if defined?(@backend) && @backend # rubocop:disable ThreadSafety/ClassInstanceVariable
369
+
370
+ @backend = parse_single_backend_env # rubocop:disable ThreadSafety/ClassInstanceVariable
371
+ end
372
+
373
+ # Valid native backend names (require native extensions)
374
+ VALID_NATIVE_BACKENDS = %w[mri rust ffi java].freeze
375
+
376
+ # Valid pure Ruby backend names (no native extensions)
377
+ VALID_RUBY_BACKENDS = %w[citrus prism psych commonmarker markly].freeze
378
+
379
+ # All valid backend names
380
+ VALID_BACKENDS = (VALID_NATIVE_BACKENDS + VALID_RUBY_BACKENDS + %w[auto none]).freeze
381
+
382
+ # Get allowed native backends from TREE_HAVER_NATIVE_BACKEND environment variable
383
+ #
384
+ # Supports comma-separated values like "mri,ffi".
385
+ # Special values:
386
+ # - "auto" or empty/unset: automatically select from available native backends
387
+ # - "none": no native backends allowed (pure Ruby only)
388
+ #
389
+ # @return [Array<Symbol>] list of allowed native backend symbols, or [:auto] or [:none]
390
+ # @example Allow only MRI and FFI
391
+ # # TREE_HAVER_NATIVE_BACKEND=mri,ffi
392
+ # TreeHaver.allowed_native_backends # => [:mri, :ffi]
393
+ # @example Auto-select native backends (default)
394
+ # # TREE_HAVER_NATIVE_BACKEND not set, empty, or "auto"
395
+ # TreeHaver.allowed_native_backends # => [:auto]
396
+ # @example Disable all native backends
397
+ # # TREE_HAVER_NATIVE_BACKEND=none
398
+ # TreeHaver.allowed_native_backends # => [:none]
399
+ def allowed_native_backends
400
+ @allowed_native_backends ||= parse_backend_list_env("TREE_HAVER_NATIVE_BACKEND", VALID_NATIVE_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
401
+ end
402
+
403
+ # Get allowed Ruby backends from TREE_HAVER_RUBY_BACKEND environment variable
404
+ #
405
+ # Supports comma-separated values like "citrus,prism".
406
+ # Special values:
407
+ # - "auto" or empty/unset: automatically select from available Ruby backends
408
+ # - "none": no Ruby backends allowed (native only)
409
+ #
410
+ # @return [Array<Symbol>] list of allowed Ruby backend symbols, or [:auto] or [:none]
411
+ # @example Allow only Citrus
412
+ # # TREE_HAVER_RUBY_BACKEND=citrus
413
+ # TreeHaver.allowed_ruby_backends # => [:citrus]
414
+ # @example Auto-select Ruby backends (default)
415
+ # # TREE_HAVER_RUBY_BACKEND not set, empty, or "auto"
416
+ # TreeHaver.allowed_ruby_backends # => [:auto]
417
+ def allowed_ruby_backends
418
+ @allowed_ruby_backends ||= parse_backend_list_env("TREE_HAVER_RUBY_BACKEND", VALID_RUBY_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
419
+ end
420
+
421
+ # Check if a specific backend is allowed based on environment variables
422
+ #
423
+ # Checks TREE_HAVER_NATIVE_BACKEND for native backends and
424
+ # TREE_HAVER_RUBY_BACKEND for pure Ruby backends.
425
+ #
426
+ # @param backend_name [Symbol, String] the backend to check
427
+ # @return [Boolean] true if the backend is allowed
428
+ # @example
429
+ # # TREE_HAVER_NATIVE_BACKEND=mri
430
+ # TreeHaver.backend_allowed?(:mri) # => true
431
+ # TreeHaver.backend_allowed?(:ffi) # => false
432
+ # TreeHaver.backend_allowed?(:citrus) # => true (Ruby backends use separate env var)
433
+ def backend_allowed?(backend_name)
434
+ backend_sym = backend_name.to_sym
435
+
436
+ # Check if it's a native backend
437
+ if VALID_NATIVE_BACKENDS.include?(backend_sym.to_s)
438
+ allowed = allowed_native_backends
439
+ return true if allowed == [:auto]
440
+ return false if allowed == [:none]
441
+ return allowed.include?(backend_sym)
364
442
  end
443
+
444
+ # Check if it's a Ruby backend
445
+ if VALID_RUBY_BACKENDS.include?(backend_sym.to_s)
446
+ allowed = allowed_ruby_backends
447
+ return true if allowed == [:auto]
448
+ return false if allowed == [:none]
449
+ return allowed.include?(backend_sym)
450
+ end
451
+
452
+ # Unknown backend or :auto - allow
453
+ true
365
454
  end
366
455
 
367
456
  # Set the backend to use
@@ -388,6 +477,101 @@ module TreeHaver
388
477
  # TreeHaver.reset_backend!(to: :ffi)
389
478
  def reset_backend!(to: :auto)
390
479
  @backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
480
+ @allowed_native_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
481
+ @allowed_ruby_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
482
+ end
483
+
484
+ # Register built-in pure Ruby backends in the LanguageRegistry
485
+ #
486
+ # This registers Prism, Psych, Commonmarker, and Markly using the same
487
+ # registration API that external backends use. This ensures consistent
488
+ # behavior whether a backend is built-in or provided by an external gem.
489
+ #
490
+ # Called automatically when TreeHaver is first used, but can be called
491
+ # manually in tests or when reset! has cleared the registry.
492
+ #
493
+ # @return [void]
494
+ # @example Manual registration (usually not needed)
495
+ # TreeHaver.register_builtin_backends!
496
+ def register_builtin_backends!
497
+ Backends::PURE_RUBY_BACKENDS.each do |backend_type, info|
498
+ language = info[:language]
499
+ module_name = info[:module_name]
500
+
501
+ # Get the backend module
502
+ backend_mod = Backends.const_get(module_name)
503
+ next unless backend_mod
504
+
505
+ # Register if available (lazy check - doesn't require the gem yet)
506
+ LanguageRegistry.register(
507
+ language,
508
+ backend_type,
509
+ backend_module: backend_mod,
510
+ gem_name: module_name.downcase,
511
+ )
512
+ end
513
+ end
514
+
515
+ # Check if built-in backends have been registered
516
+ #
517
+ # @return [Boolean]
518
+ # @api private
519
+ def builtin_backends_registered?
520
+ @builtin_backends_registered ||= false # rubocop:disable ThreadSafety/ClassInstanceVariable
521
+ end
522
+
523
+ # Ensure built-in backends are registered (idempotent)
524
+ #
525
+ # @return [void]
526
+ # @api private
527
+ def ensure_builtin_backends_registered!
528
+ return if builtin_backends_registered?
529
+ register_builtin_backends!
530
+ @builtin_backends_registered = true # rubocop:disable ThreadSafety/ClassInstanceVariable
531
+ end
532
+
533
+ # Parse TREE_HAVER_BACKEND environment variable (single backend)
534
+ #
535
+ # @return [Symbol] the backend symbol (:auto if not set or invalid)
536
+ # @api private
537
+ def parse_single_backend_env
538
+ env_value = ENV["TREE_HAVER_BACKEND"]
539
+ return :auto if env_value.nil? || env_value.strip.empty?
540
+
541
+ name = env_value.strip.downcase
542
+ return :auto unless VALID_BACKENDS.include?(name) && name != "all" && name != "none"
543
+
544
+ name.to_sym
545
+ end
546
+
547
+ # Parse a backend list environment variable
548
+ #
549
+ # @param env_var [String] the environment variable name
550
+ # @param valid_backends [Array<String>] list of valid backend names
551
+ # @return [Array<Symbol>] list of backend symbols, or [:auto] or [:none]
552
+ # @api private
553
+ def parse_backend_list_env(env_var, valid_backends)
554
+ env_value = ENV[env_var]
555
+
556
+ # Empty or unset means "auto"
557
+ return [:auto] if env_value.nil? || env_value.strip.empty?
558
+
559
+ normalized = env_value.strip.downcase
560
+
561
+ # Handle special values
562
+ return [:auto] if normalized == "auto"
563
+ return [:none] if normalized == "none"
564
+
565
+ # Split on comma and parse each backend
566
+ backends = normalized.split(",").map(&:strip).uniq
567
+
568
+ # Convert to symbols, filtering out invalid ones
569
+ parsed = backends.filter_map do |name|
570
+ valid_backends.include?(name) ? name.to_sym : nil
571
+ end
572
+
573
+ # Return :auto if no valid backends found
574
+ parsed.empty? ? [:auto] : parsed
391
575
  end
392
576
 
393
577
  # Thread-local backend context storage
@@ -574,7 +758,11 @@ module TreeHaver
574
758
  # Return nil if the module doesn't exist
575
759
  return unless mod
576
760
 
577
- # Check for backend conflicts FIRST, before checking availability
761
+ # Check if the backend is allowed by environment variables FIRST
762
+ # This enforces TREE_HAVER_NATIVE_BACKEND and TREE_HAVER_RUBY_BACKEND as hard restrictions
763
+ return if requested && requested != :auto && !backend_allowed?(requested)
764
+
765
+ # Check for backend conflicts, before checking availability
578
766
  # This is critical because the conflict causes the backend to report unavailable
579
767
  # We want to raise a clear error explaining WHY it's unavailable
580
768
  # Use the requested backend name directly (not capabilities) because
@@ -643,8 +831,14 @@ module TreeHaver
643
831
  end
644
832
 
645
833
  native_priority.each do |backend|
834
+ # Rescue BackendConflict to allow iteration to continue
835
+ # This enables graceful fallback when a backend is blocked
836
+
646
837
  mod = resolve_backend_module(backend)
647
838
  return mod if mod
839
+ rescue BackendConflict
840
+ # This backend is blocked by a previously used backend, try the next one
841
+ next
648
842
  end
649
843
 
650
844
  nil # No native backend available
@@ -664,7 +858,19 @@ module TreeHaver
664
858
  # puts "Using #{mod.capabilities[:backend]} backend"
665
859
  # end
666
860
  def backend_module
667
- case effective_backend # Changed from: backend
861
+ requested = effective_backend # Changed from: backend
862
+
863
+ # For explicit backends (not :auto), check for conflicts first
864
+ # If the backend is blocked, fall through to auto-select
865
+ if requested != :auto && backend_protect?
866
+ conflicts = conflicting_backends_for(requested)
867
+ unless conflicts.empty?
868
+ # The explicitly requested backend is blocked - fall through to auto-select
869
+ requested = :auto
870
+ end
871
+ end
872
+
873
+ case requested
668
874
  when :mri
669
875
  Backends::MRI
670
876
  when :rust
@@ -685,15 +891,16 @@ module TreeHaver
685
891
  Backends::Markly
686
892
  else
687
893
  # auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
688
- if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && Backends::Java.available?
894
+ # Each backend must be both allowed (by ENV) and available (gem installed)
895
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && backend_allowed?(:java) && Backends::Java.available?
689
896
  Backends::Java
690
- elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::MRI.available?
897
+ elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:mri) && Backends::MRI.available?
691
898
  Backends::MRI
692
- elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::Rust.available?
899
+ elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:rust) && Backends::Rust.available?
693
900
  Backends::Rust
694
- elsif Backends::FFI.available?
901
+ elsif backend_allowed?(:ffi) && Backends::FFI.available?
695
902
  Backends::FFI
696
- elsif Backends::Citrus.available?
903
+ elsif backend_allowed?(:citrus) && Backends::Citrus.available?
697
904
  Backends::Citrus # Pure Ruby fallback
698
905
  else
699
906
  # No backend available
@@ -761,6 +968,8 @@ module TreeHaver
761
968
  # @param path [String, nil] absolute path to the language shared library (for tree-sitter)
762
969
  # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
763
970
  # @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
971
+ # @param backend_module [Module, nil] pure Ruby backend module with Language/Parser classes
972
+ # @param backend_type [Symbol, nil] backend type for backend_module (defaults to module name)
764
973
  # @param gem_name [String, nil] optional gem name for error messages
765
974
  # @return [void]
766
975
  # @example Register tree-sitter grammar only
@@ -775,6 +984,13 @@ module TreeHaver
775
984
  # grammar_module: TomlRB::Document,
776
985
  # gem_name: "toml-rb"
777
986
  # )
987
+ # @example Register pure Ruby backend (external gem like rbs-merge)
988
+ # TreeHaver.register_language(
989
+ # :rbs,
990
+ # backend_module: Rbs::Merge::Backends::RbsBackend,
991
+ # backend_type: :rbs,
992
+ # gem_name: "rbs"
993
+ # )
778
994
  # @example Register BOTH backends in separate calls
779
995
  # TreeHaver.register_language(
780
996
  # :toml,
@@ -795,7 +1011,7 @@ module TreeHaver
795
1011
  # gem_name: "toml-rb"
796
1012
  # )
797
1013
  # # Now TreeHaver::Language.toml works with ANY backend!
798
- def register_language(name, path: nil, symbol: nil, grammar_module: nil, gem_name: nil)
1014
+ def register_language(name, path: nil, symbol: nil, grammar_module: nil, backend_module: nil, backend_type: nil, gem_name: nil)
799
1015
  # Register tree-sitter backend if path provided
800
1016
  # Note: Uses `if` not `elsif` so both backends can be registered in one call
801
1017
  if path
@@ -813,9 +1029,17 @@ module TreeHaver
813
1029
  LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
814
1030
  end
815
1031
 
1032
+ # Register pure Ruby backend if backend_module provided
1033
+ # This is used by external gems (like rbs-merge) to register their own backends
1034
+ if backend_module
1035
+ # Derive backend_type from module name if not provided
1036
+ type = backend_type || backend_module.name.split("::").last.downcase.to_sym
1037
+ LanguageRegistry.register(name, type, backend_module: backend_module, gem_name: gem_name)
1038
+ end
1039
+
816
1040
  # Require at least one backend to be registered
817
- if path.nil? && grammar_module.nil?
818
- raise ArgumentError, "Must provide at least one of: path (tree-sitter) or grammar_module (Citrus)"
1041
+ if path.nil? && grammar_module.nil? && backend_module.nil?
1042
+ raise ArgumentError, "Must provide at least one of: path (tree-sitter), grammar_module (Citrus), or backend_module (pure Ruby)"
819
1043
  end
820
1044
 
821
1045
  # Note: No early return! This method intentionally processes both `if` blocks
@@ -839,7 +1063,12 @@ module TreeHaver
839
1063
  # Respects the effective backend setting (via TREE_HAVER_BACKEND env var,
840
1064
  # TreeHaver.backend=, or with_backend block).
841
1065
  #
842
- # @param language_name [Symbol, String] the language to parse (e.g., :toml, :json, :bash)
1066
+ # Supports three types of backends:
1067
+ # 1. Tree-sitter native backends (auto-discovered or explicit path)
1068
+ # 2. Citrus grammars (pure Ruby, via CITRUS_DEFAULTS or explicit config)
1069
+ # 3. Pure Ruby backends (registered via backend_module, e.g., Prism, Psych, RBS)
1070
+ #
1071
+ # @param language_name [Symbol, String] the language to parse (e.g., :toml, :json, :ruby, :yaml, :rbs)
843
1072
  # @param library_path [String, nil] optional explicit path to tree-sitter grammar library
844
1073
  # @param symbol [String, nil] optional tree-sitter symbol name (defaults to "tree_sitter_<name>")
845
1074
  # @param citrus_config [Hash, nil] optional Citrus fallback configuration
@@ -851,7 +1080,15 @@ module TreeHaver
851
1080
  #
852
1081
  # @example Force Citrus backend
853
1082
  # TreeHaver.with_backend(:citrus) { TreeHaver.parser_for(:toml) }
1083
+ #
1084
+ # @example Use registered pure Ruby backend (e.g., RBS)
1085
+ # # First, rbs-merge registers its backend:
1086
+ # # TreeHaver.register_language(:rbs, backend_module: Rbs::Merge::RbsBackend, backend_type: :rbs)
1087
+ # parser = TreeHaver.parser_for(:rbs)
854
1088
  def parser_for(language_name, library_path: nil, symbol: nil, citrus_config: nil)
1089
+ # Ensure built-in pure Ruby backends are registered
1090
+ ensure_builtin_backends_registered!
1091
+
855
1092
  name = language_name.to_sym
856
1093
  symbol ||= "tree_sitter_#{name}"
857
1094
  requested = effective_backend
@@ -861,6 +1098,38 @@ module TreeHaver
861
1098
  try_citrus = (requested == :auto) || (requested == :citrus)
862
1099
 
863
1100
  language = nil
1101
+ parser = nil
1102
+
1103
+ # First, check for registered pure Ruby backends
1104
+ # These take precedence when explicitly requested or when no other backend is available
1105
+ registration = registered_language(name)
1106
+ # Find any registered backend_module (not tree_sitter or citrus)
1107
+ registration&.each do |backend_type, config|
1108
+ next if %i[tree_sitter citrus].include?(backend_type)
1109
+ next unless config[:backend_module]
1110
+
1111
+ backend_mod = config[:backend_module]
1112
+ # Check if this backend is available
1113
+ next unless backend_mod.respond_to?(:available?) && backend_mod.available?
1114
+
1115
+ # If a specific backend was requested, only use if it matches
1116
+ next if requested != :auto && requested != backend_type
1117
+
1118
+ # Create parser from the backend module
1119
+ if backend_mod.const_defined?(:Parser)
1120
+ parser = backend_mod::Parser.new
1121
+ if backend_mod.const_defined?(:Language)
1122
+ lang_class = backend_mod::Language
1123
+ # Try to get language by name (e.g., Language.ruby, Language.yaml, Language.rbs)
1124
+ if lang_class.respond_to?(name)
1125
+ parser.language = lang_class.public_send(name)
1126
+ elsif lang_class.respond_to?(:from_library)
1127
+ parser.language = lang_class.from_library(nil, name: name)
1128
+ end
1129
+ end
1130
+ return parser
1131
+ end
1132
+ end
864
1133
 
865
1134
  # Try tree-sitter if applicable
866
1135
  if try_tree_sitter && !language
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_haver
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.3
4
+ version: 3.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter H. Boling
@@ -260,6 +260,7 @@ files:
260
260
  - RUBOCOP.md
261
261
  - SECURITY.md
262
262
  - lib/tree_haver.rb
263
+ - lib/tree_haver/backend_api.rb
263
264
  - lib/tree_haver/backends/citrus.rb
264
265
  - lib/tree_haver/backends/commonmarker.rb
265
266
  - lib/tree_haver/backends/ffi.rb
@@ -292,10 +293,10 @@ licenses:
292
293
  - MIT
293
294
  metadata:
294
295
  homepage_uri: https://tree-haver.galtzo.com/
295
- source_code_uri: https://github.com/kettle-rb/tree_haver/tree/v3.2.3
296
- changelog_uri: https://github.com/kettle-rb/tree_haver/blob/v3.2.3/CHANGELOG.md
296
+ source_code_uri: https://github.com/kettle-rb/tree_haver/tree/v3.2.5
297
+ changelog_uri: https://github.com/kettle-rb/tree_haver/blob/v3.2.5/CHANGELOG.md
297
298
  bug_tracker_uri: https://github.com/kettle-rb/tree_haver/issues
298
- documentation_uri: https://www.rubydoc.info/gems/tree_haver/3.2.3
299
+ documentation_uri: https://www.rubydoc.info/gems/tree_haver/3.2.5
299
300
  funding_uri: https://github.com/sponsors/pboling
300
301
  wiki_uri: https://github.com/kettle-rb/tree_haver/wiki
301
302
  news_uri: https://www.railsbling.com/tags/tree_haver
metadata.gz.sig CHANGED
Binary file