tree_haver 3.1.2 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tree_haver.rb CHANGED
@@ -6,20 +6,41 @@ require "version_gem"
6
6
  # Standard library
7
7
  require "set"
8
8
 
9
- # This gem
9
+ # This gem - only version can be required (never autoloaded)
10
10
  require_relative "tree_haver/version"
11
- require_relative "tree_haver/language_registry"
12
11
 
13
12
  # TreeHaver is a cross-Ruby adapter for code parsing with 10 backends.
14
13
  #
15
14
  # Provides a unified API for parsing source code across MRI Ruby, JRuby, and TruffleRuby
16
15
  # using tree-sitter grammars or language-specific native parsers.
17
16
  #
17
+ # == Backends
18
+ #
18
19
  # Supports 10 backends:
19
20
  # - Tree-sitter: MRI (C), Rust, FFI, Java
20
21
  # - Native parsers: Prism (Ruby), Psych (YAML), Commonmarker (Markdown), Markly (GFM)
21
22
  # - Pure Ruby: Citrus (portable fallback)
22
23
  #
24
+ # == Platform Compatibility
25
+ #
26
+ # Not all backends work on all Ruby platforms:
27
+ #
28
+ # | Backend | MRI | JRuby | TruffleRuby |
29
+ # |--------------|-----|-------|-------------|
30
+ # | MRI (C ext) | ✓ | ✗ | ✗ |
31
+ # | Rust | ✓ | ✗ | ✗ |
32
+ # | FFI | ✓ | ✓ | ✗ |
33
+ # | Java | ✗ | ✓ | ✗ |
34
+ # | Prism | ✓ | ✓ | ✓ |
35
+ # | Psych | ✓ | ✓ | ✓ |
36
+ # | Citrus | ✓ | ✓ | ✓ |
37
+ # | Commonmarker | ✓ | ✗ | ? |
38
+ # | Markly | ✓ | ✗ | ? |
39
+ #
40
+ # - JRuby: Cannot load native C/Rust extensions; use FFI, Java, or pure Ruby backends
41
+ # - TruffleRuby: FFI doesn't support STRUCT_BY_VALUE; magnus/rb-sys incompatible with C API;
42
+ # use Prism, Psych, Citrus, or potentially Commonmarker/Markly
43
+ #
23
44
  # @example Basic usage with tree-sitter
24
45
  # # Load a language grammar
25
46
  # language = TreeHaver::Language.from_library(
@@ -84,6 +105,10 @@ require_relative "tree_haver/language_registry"
84
105
  # @see GrammarFinder For automatic grammar library discovery
85
106
  # @see Backends For available parsing backends
86
107
  module TreeHaver
108
+ # Autoload internal modules
109
+ autoload :LibraryPathUtils, File.join(__dir__, "tree_haver", "library_path_utils")
110
+ autoload :LanguageRegistry, File.join(__dir__, "tree_haver", "language_registry")
111
+
87
112
  # Base error class for TreeHaver exceptions
88
113
  # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
89
114
  #
@@ -131,6 +156,20 @@ module TreeHaver
131
156
  # # Now you can test backend conflicts (at risk of segfaults)
132
157
  class BackendConflict < Error; end
133
158
 
159
+ # Default Citrus configurations for known languages
160
+ #
161
+ # These are used by {TreeHaver.parser_for} when no explicit citrus_config is provided
162
+ # and tree-sitter backends are not available (e.g., on TruffleRuby).
163
+ #
164
+ # @api private
165
+ CITRUS_DEFAULTS = {
166
+ toml: {
167
+ gem_name: "toml-rb",
168
+ grammar_const: "TomlRB::Document",
169
+ require_path: "toml-rb",
170
+ },
171
+ }.freeze
172
+
134
173
  # Namespace for backend implementations
135
174
  #
136
175
  # TreeHaver provides multiple backends to support different Ruby implementations:
@@ -221,6 +260,12 @@ module TreeHaver
221
260
  # Unified Tree wrapper providing consistent API across backends
222
261
  autoload :Tree, File.join(__dir__, "tree_haver", "tree")
223
262
 
263
+ # Language class for loading grammar shared libraries
264
+ autoload :Language, File.join(__dir__, "tree_haver", "language")
265
+
266
+ # Parser class for parsing source code into syntax trees
267
+ autoload :Parser, File.join(__dir__, "tree_haver", "parser")
268
+
224
269
  # Get the current backend selection
225
270
  #
226
271
  # @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
@@ -545,6 +590,66 @@ module TreeHaver
545
590
  mod
546
591
  end
547
592
 
593
+ # Native tree-sitter backends that support loading shared libraries (.so files)
594
+ # These backends wrap the tree-sitter C library via various bindings.
595
+ # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are excluded.
596
+ NATIVE_BACKENDS = %i[mri rust ffi java].freeze
597
+
598
+ # Resolve a native tree-sitter backend module (for from_library)
599
+ #
600
+ # This method is similar to resolve_backend_module but ONLY considers
601
+ # backends that support loading shared libraries (.so files):
602
+ # - MRI (ruby_tree_sitter C extension)
603
+ # - Rust (tree_stump)
604
+ # - FFI (ffi gem with libtree-sitter)
605
+ # - Java (jtreesitter on JRuby)
606
+ #
607
+ # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are NOT
608
+ # considered because they don't support from_library.
609
+ #
610
+ # @param explicit_backend [Symbol, String, nil] explicitly requested backend
611
+ # @return [Module, nil] the backend module or nil if none available
612
+ # @raise [BackendConflict] if the backend conflicts with previously used backends
613
+ def resolve_native_backend_module(explicit_backend = nil)
614
+ # Short-circuit on TruffleRuby: no native backends work
615
+ # - MRI: C extension, MRI only
616
+ # - Rust: magnus requires MRI's C API
617
+ # - FFI: STRUCT_BY_VALUE not supported
618
+ # - Java: requires JRuby's Java interop
619
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "truffleruby"
620
+ return unless explicit_backend # Auto-select: no backends available
621
+ # If explicit backend requested, let it fail with proper error below
622
+ end
623
+
624
+ # Get the effective backend (considers thread-local and global settings)
625
+ requested = resolve_effective_backend(explicit_backend)
626
+
627
+ # If the effective backend is a native backend, use it
628
+ if NATIVE_BACKENDS.include?(requested)
629
+ return resolve_backend_module(requested)
630
+ end
631
+
632
+ # If a specific non-native backend was explicitly requested, return nil
633
+ # (from_library only works with native backends that load .so files)
634
+ return if explicit_backend
635
+
636
+ # If effective backend is :auto, auto-select from native backends in priority order
637
+ # Note: non-native backends set via with_backend are NOT used here because
638
+ # from_library only works with native backends
639
+ native_priority = if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
640
+ %i[java ffi] # JRuby: Java first, then FFI
641
+ else
642
+ %i[mri rust ffi] # MRI: MRI first, then Rust, then FFI
643
+ end
644
+
645
+ native_priority.each do |backend|
646
+ mod = resolve_backend_module(backend)
647
+ return mod if mod
648
+ end
649
+
650
+ nil # No native backend available
651
+ end
652
+
548
653
  # Determine the concrete backend module to use
549
654
  #
550
655
  # This method performs backend auto-selection when backend is :auto.
@@ -804,19 +909,25 @@ module TreeHaver
804
909
 
805
910
  # Step 3: Try Citrus fallback if tree-sitter failed
806
911
  unless language
807
- citrus_config ||= {}
808
- begin
809
- citrus_finder = CitrusGrammarFinder.new(
810
- language: name,
811
- gem_name: citrus_config[:gem_name],
812
- grammar_const: citrus_config[:grammar_const],
813
- )
814
- if citrus_finder.available?
815
- citrus_finder.register!
816
- language = Language.public_send(name)
912
+ # Use explicit config, or fall back to built-in defaults for known languages
913
+ citrus_config ||= CITRUS_DEFAULTS[name] || {}
914
+
915
+ # Only attempt if we have the required configuration
916
+ if citrus_config[:gem_name] && citrus_config[:grammar_const]
917
+ begin
918
+ citrus_finder = CitrusGrammarFinder.new(
919
+ language: name,
920
+ gem_name: citrus_config[:gem_name],
921
+ grammar_const: citrus_config[:grammar_const],
922
+ require_path: citrus_config[:require_path],
923
+ )
924
+ if citrus_finder.available?
925
+ citrus_finder.register!
926
+ language = Language.public_send(name)
927
+ end
928
+ rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
929
+ language = nil
817
930
  end
818
- rescue NotAvailable, ArgumentError, LoadError, NameError
819
- language = nil
820
931
  end
821
932
  end
822
933
 
@@ -835,552 +946,9 @@ module TreeHaver
835
946
  end
836
947
  end
837
948
 
838
- # Represents a tree-sitter language grammar
839
- #
840
- # A Language object is an opaque handle to a TSLanguage* that defines
841
- # the grammar rules for parsing a specific programming language.
842
- #
843
- # @example Load a language from a shared library
844
- # language = TreeHaver::Language.from_library(
845
- # "/usr/local/lib/libtree-sitter-toml.so",
846
- # symbol: "tree_sitter_toml"
847
- # )
848
- #
849
- # @example Use a registered language
850
- # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
851
- # language = TreeHaver::Language.toml
852
- class Language
853
- class << self
854
- # Load a language grammar from a shared library (ruby_tree_sitter compatibility)
855
- #
856
- # This method provides API compatibility with ruby_tree_sitter which uses
857
- # `Language.load(name, path)`.
858
- #
859
- # @param name [String] the language name (e.g., "toml")
860
- # @param path [String] absolute path to the language shared library
861
- # @param validate [Boolean] if true, validates the path for safety (default: true)
862
- # @return [Language] loaded language handle
863
- # @raise [NotAvailable] if the library cannot be loaded
864
- # @raise [ArgumentError] if the path fails security validation
865
- # @example
866
- # language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
867
- def load(name, path, validate: true)
868
- from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
869
- end
870
-
871
- # Load a language grammar from a shared library
872
- #
873
- # The library must export a function that returns a pointer to a TSLanguage struct.
874
- # By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
875
- #
876
- # == Security
877
- #
878
- # By default, paths are validated using {PathValidator} to prevent path traversal
879
- # and other attacks. Set `validate: false` to skip validation (not recommended
880
- # unless you've already validated the path).
881
- #
882
- # @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
883
- # @param symbol [String, nil] name of the exported function (defaults to auto-detection)
884
- # @param name [String, nil] logical name for the language (used in caching)
885
- # @param validate [Boolean] if true, validates path and symbol for safety (default: true)
886
- # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
887
- # @return [Language] loaded language handle
888
- # @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
889
- # @raise [ArgumentError] if path or symbol fails security validation
890
- # @example
891
- # language = TreeHaver::Language.from_library(
892
- # "/usr/local/lib/libtree-sitter-toml.so",
893
- # symbol: "tree_sitter_toml",
894
- # name: "toml"
895
- # )
896
- # @example With explicit backend
897
- # language = TreeHaver::Language.from_library(
898
- # "/usr/local/lib/libtree-sitter-toml.so",
899
- # symbol: "tree_sitter_toml",
900
- # backend: :ffi
901
- # )
902
- def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
903
- if validate
904
- unless PathValidator.safe_library_path?(path)
905
- errors = PathValidator.validation_errors(path)
906
- raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
907
- end
908
-
909
- if symbol && !PathValidator.safe_symbol_name?(symbol)
910
- raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
911
- "Symbol names must be valid C identifiers."
912
- end
913
- end
914
-
915
- mod = TreeHaver.resolve_backend_module(backend)
916
-
917
- if mod.nil?
918
- if backend
919
- raise NotAvailable, "Requested backend #{backend.inspect} is not available"
920
- else
921
- raise NotAvailable, "No TreeHaver backend is available"
922
- end
923
- end
924
-
925
- # Backend must implement .from_library; fallback to .from_path for older impls
926
- # Include effective backend AND ENV vars in cache key since they affect loading
927
- effective_b = TreeHaver.resolve_effective_backend(backend)
928
- key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
929
- LanguageRegistry.fetch(key) do
930
- if mod::Language.respond_to?(:from_library)
931
- mod::Language.from_library(path, symbol: symbol, name: name)
932
- else
933
- mod::Language.from_path(path)
934
- end
935
- end
936
- end
937
- # Alias for {from_library}
938
- # @see from_library
939
- alias_method :from_path, :from_library
940
-
941
- # Dynamic helper to load a registered language by name
942
- #
943
- # After registering a language with {TreeHaver.register_language},
944
- # you can load it using a method call. The appropriate backend will be
945
- # used based on registration and current backend.
946
- #
947
- # @example With tree-sitter
948
- # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
949
- # language = TreeHaver::Language.toml
950
- #
951
- # @example With both backends
952
- # TreeHaver.register_language(:toml,
953
- # path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
954
- # TreeHaver.register_language(:toml,
955
- # grammar_module: TomlRB::Document)
956
- # language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
957
- #
958
- # @param method_name [Symbol] the registered language name
959
- # @param args [Array] positional arguments
960
- # @param kwargs [Hash] keyword arguments
961
- # @return [Language] loaded language handle
962
- # @raise [NoMethodError] if the language name is not registered
963
- def method_missing(method_name, *args, **kwargs, &block)
964
- # Resolve only if the language name was registered
965
- all_backends = TreeHaver.registered_language(method_name)
966
- return super unless all_backends
967
-
968
- # Check current backend
969
- current_backend = TreeHaver.backend_module
970
-
971
- # Determine which backend type to use
972
- backend_type = if current_backend == Backends::Citrus
973
- :citrus
974
- else
975
- :tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
976
- end
977
-
978
- # Get backend-specific registration
979
- reg = all_backends[backend_type]
980
-
981
- # If Citrus backend is active
982
- if backend_type == :citrus
983
- if reg && reg[:grammar_module]
984
- return Backends::Citrus::Language.new(reg[:grammar_module])
985
- end
986
-
987
- # Fall back to error if no Citrus grammar registered
988
- raise NotAvailable,
989
- "Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
990
- "Either register a Citrus grammar or use a tree-sitter backend. " \
991
- "Registered backends: #{all_backends.keys.inspect}"
992
- end
993
-
994
- # For tree-sitter backends, try to load from path
995
- # If that fails, fall back to Citrus if available
996
- if reg && reg[:path]
997
- path = kwargs[:path] || args.first || reg[:path]
998
- # Symbol priority: kwargs override > registration > derive from method_name
999
- symbol = if kwargs.key?(:symbol)
1000
- kwargs[:symbol]
1001
- elsif reg[:symbol]
1002
- reg[:symbol]
1003
- else
1004
- "tree_sitter_#{method_name}"
1005
- end
1006
- # Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
1007
- # Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
1008
- # e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
1009
- name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
1010
-
1011
- begin
1012
- return from_library(path, symbol: symbol, name: name)
1013
- rescue NotAvailable, ArgumentError, LoadError, FFI::NotFoundError => _e
1014
- # Tree-sitter failed to load - check for Citrus fallback
1015
- # This handles cases where:
1016
- # - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
1017
- # - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
1018
- # - Invalid arguments were provided (ArgumentError)
1019
- citrus_reg = all_backends[:citrus]
1020
- if citrus_reg && citrus_reg[:grammar_module]
1021
- return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
1022
- end
1023
- # No Citrus fallback available, re-raise the original error
1024
- raise
1025
- end
1026
- end
1027
-
1028
- # No tree-sitter path registered - check for Citrus fallback
1029
- # This enables auto-fallback when tree-sitter grammar is not installed
1030
- # but a Citrus grammar (pure Ruby) is available
1031
- citrus_reg = all_backends[:citrus]
1032
- if citrus_reg && citrus_reg[:grammar_module]
1033
- return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
1034
- end
1035
-
1036
- # No appropriate registration found
1037
- raise ArgumentError,
1038
- "No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
1039
- "Registered backends: #{all_backends.keys.inspect}"
1040
- end
1041
-
1042
- # @api private
1043
- def respond_to_missing?(method_name, include_private = false)
1044
- !!TreeHaver.registered_language(method_name) || super
1045
- end
1046
- end
1047
- end
1048
-
1049
- # Represents a tree-sitter parser instance
1050
- #
1051
- # A Parser is used to parse source code into a syntax tree. You must
1052
- # set a language before parsing.
1053
- #
1054
- # == Wrapping/Unwrapping Responsibility
1055
- #
1056
- # TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
1057
- #
1058
- # **Language objects:**
1059
- # - Unwraps Language wrappers before passing to backend.language=
1060
- # - MRI backend receives ::TreeSitter::Language
1061
- # - Rust backend receives String (language name)
1062
- # - FFI backend receives wrapped Language (needs to_ptr)
1063
- #
1064
- # **Tree objects:**
1065
- # - parse() receives raw source, backend returns raw tree, Parser wraps it
1066
- # - parse_string() unwraps old_tree before passing to backend, wraps returned tree
1067
- # - Backends always work with raw backend trees, never TreeHaver::Tree
1068
- #
1069
- # **Node objects:**
1070
- # - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
1071
- #
1072
- # This design ensures:
1073
- # - Principle of Least Surprise: wrapping happens at boundaries, consistently
1074
- # - Backends are simple: they don't need to know about TreeHaver wrappers
1075
- # - Single Responsibility: wrapping logic is only in TreeHaver::Parser
1076
- #
1077
- # @example Basic parsing
1078
- # parser = TreeHaver::Parser.new
1079
- # parser.language = TreeHaver::Language.toml
1080
- # tree = parser.parse("[package]\nname = \"foo\"")
1081
- class Parser
1082
- # Create a new parser instance
1083
- #
1084
- # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
1085
- # @raise [NotAvailable] if no backend is available or requested backend is unavailable
1086
- # @example Default (uses context/global)
1087
- # parser = TreeHaver::Parser.new
1088
- # @example Explicit backend
1089
- # parser = TreeHaver::Parser.new(backend: :ffi)
1090
- def initialize(backend: nil)
1091
- # Convert string backend names to symbols for consistency
1092
- backend = backend.to_sym if backend.is_a?(String)
1093
-
1094
- mod = TreeHaver.resolve_backend_module(backend)
1095
-
1096
- if mod.nil?
1097
- if backend
1098
- raise NotAvailable, "Requested backend #{backend.inspect} is not available"
1099
- else
1100
- raise NotAvailable, "No TreeHaver backend is available"
1101
- end
1102
- end
1103
-
1104
- # Try to create the parser, with fallback to Citrus if tree-sitter fails
1105
- # This enables auto-fallback when tree-sitter runtime isn't available
1106
- begin
1107
- @impl = mod::Parser.new
1108
- @explicit_backend = backend # Remember for introspection (always a Symbol or nil)
1109
- rescue NoMethodError, FFI::NotFoundError, LoadError => e
1110
- # Tree-sitter backend failed (likely missing runtime library)
1111
- # Try Citrus as fallback if we weren't explicitly asked for a specific backend
1112
- if backend.nil? || backend == :auto
1113
- if Backends::Citrus.available?
1114
- @impl = Backends::Citrus::Parser.new
1115
- @explicit_backend = :citrus
1116
- else
1117
- # No fallback available, re-raise original error
1118
- raise NotAvailable, "Tree-sitter backend failed: #{e.message}. " \
1119
- "Citrus fallback not available. Install tree-sitter runtime or citrus gem."
1120
- end
1121
- else
1122
- # Explicit backend was requested, don't fallback
1123
- raise
1124
- end
1125
- end
1126
- end
1127
-
1128
- # Get the backend this parser is using (for introspection)
1129
- #
1130
- # Returns the actual backend in use, resolving :auto to the concrete backend.
1131
- #
1132
- # @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
1133
- def backend
1134
- if @explicit_backend && @explicit_backend != :auto
1135
- @explicit_backend
1136
- else
1137
- # Determine actual backend from the implementation class
1138
- case @impl.class.name
1139
- when /MRI/
1140
- :mri
1141
- when /Rust/
1142
- :rust
1143
- when /FFI/
1144
- :ffi
1145
- when /Java/
1146
- :java
1147
- when /Citrus/
1148
- :citrus
1149
- else
1150
- # Fallback to effective_backend if we can't determine from class name
1151
- TreeHaver.effective_backend
1152
- end
1153
- end
1154
- end
1155
-
1156
- # Set the language grammar for this parser
1157
- #
1158
- # @param lang [Language] the language to use for parsing
1159
- # @return [Language] the language that was set
1160
- # @example
1161
- # parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
1162
- def language=(lang)
1163
- # Check if this is a Citrus language - if so, we need a Citrus parser
1164
- # This enables automatic backend switching when tree-sitter fails and
1165
- # falls back to Citrus
1166
- if lang.is_a?(Backends::Citrus::Language)
1167
- unless @impl.is_a?(Backends::Citrus::Parser)
1168
- # Switch to Citrus parser to match the Citrus language
1169
- @impl = Backends::Citrus::Parser.new
1170
- @explicit_backend = :citrus
1171
- end
1172
- end
1173
-
1174
- # Unwrap the language before passing to backend
1175
- # Backends receive raw language objects, never TreeHaver wrappers
1176
- inner_lang = unwrap_language(lang)
1177
- @impl.language = inner_lang
1178
- # Return the original (possibly wrapped) language for consistency
1179
- lang # rubocop:disable Lint/Void (intentional return value)
1180
- end
1181
-
1182
- private
1183
-
1184
- # Unwrap a language object to extract the raw backend language
1185
- #
1186
- # This method is smart about backend compatibility:
1187
- # 1. If language has a backend attribute, checks if it matches current backend
1188
- # 2. If mismatch detected, attempts to reload language for correct backend
1189
- # 3. If reload successful, uses new language; otherwise continues with original
1190
- # 4. Unwraps the language wrapper to get raw backend object
1191
- #
1192
- # @param lang [Object] wrapped or raw language object
1193
- # @return [Object] raw backend language object appropriate for current backend
1194
- # @api private
1195
- def unwrap_language(lang)
1196
- # Check if this is a TreeHaver language wrapper with backend info
1197
- if lang.respond_to?(:backend)
1198
- # Verify backend compatibility FIRST
1199
- # This prevents passing languages from wrong backends to native code
1200
- # Exception: :auto backend is permissive - accepts any language
1201
- current_backend = backend
1202
-
1203
- if lang.backend != current_backend && current_backend != :auto
1204
- # Backend mismatch! Try to reload for correct backend
1205
- reloaded = try_reload_language_for_backend(lang, current_backend)
1206
- if reloaded
1207
- lang = reloaded
1208
- else
1209
- # Couldn't reload - this is an error
1210
- raise TreeHaver::Error,
1211
- "Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
1212
- "Cannot reload language for correct backend. " \
1213
- "Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
1214
- end
1215
- end
1216
-
1217
- # Get the current parser's language (if set)
1218
- current_lang = @impl.respond_to?(:language) ? @impl.language : nil
1219
-
1220
- # Language mismatch detected! The parser might have a different language set
1221
- # Compare the actual language objects using Comparable
1222
- if current_lang && lang != current_lang
1223
- # Different language being set (e.g., switching from TOML to JSON)
1224
- # This is fine, just informational
1225
- end
1226
- end
1227
-
1228
- # Unwrap based on backend type
1229
- # All TreeHaver Language wrappers have the backend attribute
1230
- unless lang.respond_to?(:backend)
1231
- # This shouldn't happen - all our wrappers have backend attribute
1232
- # If we get here, it's likely a raw backend object that was passed directly
1233
- raise TreeHaver::Error,
1234
- "Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
1235
- "Use TreeHaver::Language.from_library to create language objects."
1236
- end
1237
-
1238
- case lang.backend
1239
- when :mri
1240
- return lang.to_language if lang.respond_to?(:to_language)
1241
- return lang.inner_language if lang.respond_to?(:inner_language)
1242
- when :rust
1243
- return lang.name if lang.respond_to?(:name)
1244
- when :ffi
1245
- return lang # FFI needs wrapper for to_ptr
1246
- when :java
1247
- return lang.impl if lang.respond_to?(:impl)
1248
- when :citrus
1249
- return lang.grammar_module if lang.respond_to?(:grammar_module)
1250
- when :prism
1251
- return lang # Prism backend expects the Language wrapper
1252
- when :psych
1253
- return lang # Psych backend expects the Language wrapper
1254
- when :commonmarker
1255
- return lang # Commonmarker backend expects the Language wrapper
1256
- when :markly
1257
- return lang # Markly backend expects the Language wrapper
1258
- else
1259
- # Unknown backend (e.g., test backend)
1260
- # Try generic unwrapping methods for flexibility in testing
1261
- return lang.to_language if lang.respond_to?(:to_language)
1262
- return lang.inner_language if lang.respond_to?(:inner_language)
1263
- return lang.impl if lang.respond_to?(:impl)
1264
- return lang.grammar_module if lang.respond_to?(:grammar_module)
1265
- return lang.name if lang.respond_to?(:name)
1266
-
1267
- # If nothing works, pass through as-is
1268
- # This allows test languages to be passed directly
1269
- return lang
1270
- end
1271
-
1272
- # Shouldn't reach here, but just in case
1273
- lang
1274
- end
1275
-
1276
- # Try to reload a language for the current backend
1277
- #
1278
- # This handles the case where a language was loaded for one backend,
1279
- # but is now being used with a different backend (e.g., after backend switch).
1280
- #
1281
- # @param lang [Object] language object with metadata
1282
- # @param target_backend [Symbol] backend to reload for
1283
- # @return [Object, nil] reloaded language or nil if reload not possible
1284
- # @api private
1285
- def try_reload_language_for_backend(lang, target_backend)
1286
- # Can't reload without path information
1287
- return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
1288
-
1289
- # For tree-sitter backends, reload from path
1290
- if lang.respond_to?(:path) && lang.path
1291
- begin
1292
- # Use Language.from_library which respects current backend
1293
- return Language.from_library(
1294
- lang.path,
1295
- symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
1296
- name: lang.respond_to?(:name) ? lang.name : nil,
1297
- )
1298
- rescue => e
1299
- # Reload failed, continue with original
1300
- warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
1301
- return
1302
- end
1303
- end
1304
-
1305
- # For Citrus, can't really reload as it's just a module reference
1306
- nil
1307
- end
1308
-
1309
- public
1310
-
1311
- # Parse source code into a syntax tree
1312
- #
1313
- # @param source [String] the source code to parse (should be UTF-8)
1314
- # @return [Tree] the parsed syntax tree
1315
- # @example
1316
- # tree = parser.parse("x = 1")
1317
- # puts tree.root_node.type
1318
- def parse(source)
1319
- tree_impl = @impl.parse(source)
1320
- # Wrap backend tree with source so Node#text works
1321
- Tree.new(tree_impl, source: source)
1322
- end
1323
-
1324
- # Parse source code into a syntax tree (with optional incremental parsing)
1325
- #
1326
- # This method provides API compatibility with ruby_tree_sitter which uses
1327
- # `parse_string(old_tree, source)`.
1328
- #
1329
- # == Incremental Parsing
1330
- #
1331
- # tree-sitter supports **incremental parsing** where you can pass a previously
1332
- # parsed tree along with edit information to efficiently re-parse only the
1333
- # changed portions of source code. This is a major performance optimization
1334
- # for editors and IDEs that need to re-parse on every keystroke.
1335
- #
1336
- # The workflow for incremental parsing is:
1337
- # 1. Parse the initial source: `tree = parser.parse_string(nil, source)`
1338
- # 2. User edits the source (e.g., inserts a character)
1339
- # 3. Call `tree.edit(...)` to update the tree's position data
1340
- # 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
1341
- # 5. tree-sitter reuses unchanged nodes, only re-parsing affected regions
1342
- #
1343
- # TreeHaver passes through to the underlying backend if it supports incremental
1344
- # parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
1345
- # to see if the current backend supports it.
1346
- #
1347
- # @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
1348
- # @param source [String] the source code to parse (should be UTF-8)
1349
- # @return [Tree] the parsed syntax tree
1350
- # @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing tree-sitter incremental parsing docs
1351
- # @see Tree#edit For marking edits before incremental re-parsing
1352
- # @example First parse (no old tree)
1353
- # tree = parser.parse_string(nil, "x = 1")
1354
- # @example Incremental parse
1355
- # tree.edit(start_byte: 4, old_end_byte: 5, new_end_byte: 6, ...)
1356
- # new_tree = parser.parse_string(tree, "x = 42")
1357
- def parse_string(old_tree, source)
1358
- # Pass through to backend if it supports incremental parsing
1359
- if old_tree && @impl.respond_to?(:parse_string)
1360
- # Extract the underlying implementation from our Tree wrapper
1361
- old_impl = if old_tree.respond_to?(:inner_tree)
1362
- old_tree.inner_tree
1363
- elsif old_tree.respond_to?(:instance_variable_get)
1364
- # Fallback for compatibility
1365
- old_tree.instance_variable_get(:@inner_tree) || old_tree.instance_variable_get(:@impl) || old_tree
1366
- else
1367
- old_tree
1368
- end
1369
- tree_impl = @impl.parse_string(old_impl, source)
1370
- # Wrap backend tree with source so Node#text works
1371
- Tree.new(tree_impl, source: source)
1372
- elsif @impl.respond_to?(:parse_string)
1373
- tree_impl = @impl.parse_string(nil, source)
1374
- # Wrap backend tree with source so Node#text works
1375
- Tree.new(tree_impl, source: source)
1376
- else
1377
- # Fallback for backends that don't support parse_string
1378
- parse(source)
1379
- end
1380
- end
1381
- end
1382
-
1383
- # Tree and Node classes have been moved to separate files:
949
+ # Language and Parser classes have been moved to separate files:
950
+ # - tree_haver/language.rb: TreeHaver::Language - loads grammar shared libraries
951
+ # - tree_haver/parser.rb: TreeHaver::Parser - parses source code into syntax trees
1384
952
  # - tree_haver/tree.rb: TreeHaver::Tree - unified wrapper providing consistent API
1385
953
  # - tree_haver/node.rb: TreeHaver::Node - unified wrapper providing consistent API
1386
954
  #