tree_haver 3.1.2 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,255 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Represents a language grammar for parsing source code
5
+ #
6
+ # Language is the entry point for loading and using grammars. It provides
7
+ # a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus).
8
+ #
9
+ # For tree-sitter backends, languages are loaded from shared library files (.so/.dylib/.dll).
10
+ # For pure-Ruby backends (Citrus, Prism, Psych), languages are built-in or provided by gems.
11
+ #
12
+ # == Loading Languages
13
+ #
14
+ # The primary way to load a language is via registration:
15
+ #
16
+ # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
17
+ # language = TreeHaver::Language.toml
18
+ #
19
+ # For explicit loading without registration:
20
+ #
21
+ # language = TreeHaver::Language.from_library(
22
+ # "/path/to/libtree-sitter-toml.so",
23
+ # symbol: "tree_sitter_toml"
24
+ # )
25
+ #
26
+ # For ruby_tree_sitter compatibility:
27
+ #
28
+ # language = TreeHaver::Language.load("toml", "/path/to/libtree-sitter-toml.so")
29
+ #
30
+ # @example Register and load a language
31
+ # TreeHaver.register_language(:toml, path: "/path/to/grammar.so")
32
+ # language = TreeHaver::Language.toml
33
+ class Language
34
+ class << self
35
+ # Load a language grammar from a shared library (ruby_tree_sitter compatibility)
36
+ #
37
+ # This method provides API compatibility with ruby_tree_sitter which uses
38
+ # `Language.load(name, path)`.
39
+ #
40
+ # @param name [String] the language name (e.g., "toml")
41
+ # @param path [String] absolute path to the language shared library
42
+ # @param validate [Boolean] if true, validates the path for safety (default: true)
43
+ # @return [Language] loaded language handle
44
+ # @raise [NotAvailable] if the library cannot be loaded
45
+ # @raise [ArgumentError] if the path fails security validation
46
+ # @example
47
+ # language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
48
+ def load(name, path, validate: true)
49
+ from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
50
+ end
51
+
52
+ # Load a language grammar from a shared library
53
+ #
54
+ # The library must export a function that returns a pointer to a TSLanguage struct.
55
+ # By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
56
+ #
57
+ # == Security
58
+ #
59
+ # By default, paths are validated using {PathValidator} to prevent path traversal
60
+ # and other attacks. Set `validate: false` to skip validation (not recommended
61
+ # unless you've already validated the path).
62
+ #
63
+ # @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
64
+ # @param symbol [String, nil] name of the exported function (defaults to auto-detection)
65
+ # @param name [String, nil] logical name for the language (used in caching)
66
+ # @param validate [Boolean] if true, validates path and symbol for safety (default: true)
67
+ # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
68
+ # @return [Language] loaded language handle
69
+ # @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
70
+ # @raise [ArgumentError] if path or symbol fails security validation
71
+ # @example
72
+ # language = TreeHaver::Language.from_library(
73
+ # "/usr/local/lib/libtree-sitter-toml.so",
74
+ # symbol: "tree_sitter_toml",
75
+ # name: "toml"
76
+ # )
77
+ # @example With explicit backend
78
+ # language = TreeHaver::Language.from_library(
79
+ # "/usr/local/lib/libtree-sitter-toml.so",
80
+ # symbol: "tree_sitter_toml",
81
+ # backend: :ffi
82
+ # )
83
+ def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
84
+ if validate
85
+ unless PathValidator.safe_library_path?(path)
86
+ errors = PathValidator.validation_errors(path)
87
+ raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
88
+ end
89
+
90
+ if symbol && !PathValidator.safe_symbol_name?(symbol)
91
+ raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
92
+ "Symbol names must be valid C identifiers."
93
+ end
94
+ end
95
+
96
+ # from_library only works with tree-sitter backends that support .so files
97
+ # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) don't support from_library
98
+ mod = TreeHaver.resolve_native_backend_module(backend)
99
+
100
+ if mod.nil?
101
+ if backend
102
+ raise NotAvailable, "Requested backend #{backend.inspect} is not available or does not support shared libraries"
103
+ else
104
+ raise NotAvailable,
105
+ "No native tree-sitter backend is available for loading shared libraries. " \
106
+ "Available native backends (MRI, Rust, FFI, Java) require platform-specific setup. " \
107
+ "For pure-Ruby parsing, use backend-specific Language classes directly (e.g., Prism, Psych, Citrus)."
108
+ end
109
+ end
110
+
111
+ # Backend must implement .from_library; fallback to .from_path for older impls
112
+ # Include effective backend AND ENV vars in cache key since they affect loading
113
+ effective_b = TreeHaver.resolve_effective_backend(backend)
114
+ key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
115
+ LanguageRegistry.fetch(key) do
116
+ if mod::Language.respond_to?(:from_library)
117
+ mod::Language.from_library(path, symbol: symbol, name: name)
118
+ else
119
+ mod::Language.from_path(path)
120
+ end
121
+ end
122
+ end
123
+ # Alias for {from_library}
124
+ # @see from_library
125
+ alias_method :from_path, :from_library
126
+
127
+ # Dynamic helper to load a registered language by name
128
+ #
129
+ # After registering a language with {TreeHaver.register_language},
130
+ # you can load it using a method call. The appropriate backend will be
131
+ # used based on registration and current backend.
132
+ #
133
+ # @example With tree-sitter
134
+ # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
135
+ # language = TreeHaver::Language.toml
136
+ #
137
+ # @example With both backends
138
+ # TreeHaver.register_language(:toml,
139
+ # path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
140
+ # TreeHaver.register_language(:toml,
141
+ # grammar_module: TomlRB::Document)
142
+ # language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
143
+ #
144
+ # @param method_name [Symbol] the registered language name
145
+ # @param args [Array] positional arguments
146
+ # @param kwargs [Hash] keyword arguments
147
+ # @return [Language] loaded language handle
148
+ # @raise [NoMethodError] if the language name is not registered
149
+ def method_missing(method_name, *args, **kwargs, &block)
150
+ # Resolve only if the language name was registered
151
+ all_backends = TreeHaver.registered_language(method_name)
152
+ return super unless all_backends
153
+
154
+ # Check current backend
155
+ current_backend = TreeHaver.backend_module
156
+
157
+ # Determine which backend type to use
158
+ backend_type = if current_backend == Backends::Citrus
159
+ :citrus
160
+ else
161
+ :tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
162
+ end
163
+
164
+ # Get backend-specific registration
165
+ reg = all_backends[backend_type]
166
+
167
+ # If Citrus backend is active
168
+ if backend_type == :citrus
169
+ if reg && reg[:grammar_module]
170
+ return Backends::Citrus::Language.new(reg[:grammar_module])
171
+ end
172
+
173
+ # Fall back to error if no Citrus grammar registered
174
+ raise NotAvailable,
175
+ "Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
176
+ "Either register a Citrus grammar or use a tree-sitter backend. " \
177
+ "Registered backends: #{all_backends.keys.inspect}"
178
+ end
179
+
180
+ # For tree-sitter backends, try to load from path
181
+ # If that fails, fall back to Citrus if available
182
+ if reg && reg[:path]
183
+ path = kwargs[:path] || args.first || reg[:path]
184
+ # Symbol priority: kwargs override > registration > derive from method_name
185
+ symbol = if kwargs.key?(:symbol)
186
+ kwargs[:symbol]
187
+ elsif reg[:symbol]
188
+ reg[:symbol]
189
+ else
190
+ "tree_sitter_#{method_name}"
191
+ end
192
+ # Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
193
+ # Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
194
+ # e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
195
+ name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
196
+
197
+ begin
198
+ return from_library(path, symbol: symbol, name: name)
199
+ rescue NotAvailable, ArgumentError, LoadError => e
200
+ # Tree-sitter failed to load - check for Citrus fallback
201
+ handle_tree_sitter_load_failure(e, all_backends)
202
+ rescue => e
203
+ # Also catch FFI::NotFoundError if FFI is loaded (can't reference directly as FFI may not exist)
204
+ if defined?(::FFI::NotFoundError) && e.is_a?(::FFI::NotFoundError)
205
+ handle_tree_sitter_load_failure(e, all_backends)
206
+ else
207
+ raise
208
+ end
209
+ end
210
+ end
211
+
212
+ # No tree-sitter path registered - check for Citrus fallback
213
+ # This enables auto-fallback when tree-sitter grammar is not installed
214
+ # but a Citrus grammar (pure Ruby) is available
215
+ citrus_reg = all_backends[:citrus]
216
+ if citrus_reg && citrus_reg[:grammar_module]
217
+ return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
218
+ end
219
+
220
+ # No appropriate registration found
221
+ raise ArgumentError,
222
+ "No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
223
+ "Registered backends: #{all_backends.keys.inspect}"
224
+ end
225
+
226
+ # @api private
227
+ def respond_to_missing?(method_name, include_private = false)
228
+ !!TreeHaver.registered_language(method_name) || super
229
+ end
230
+
231
+ private
232
+
233
+ # Handle tree-sitter load failure with optional Citrus fallback
234
+ #
235
+ # This handles cases where:
236
+ # - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
237
+ # - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
238
+ # - Invalid arguments were provided (ArgumentError)
239
+ #
240
+ # @param error [Exception] the original error
241
+ # @param all_backends [Hash] all registered backends for the language
242
+ # @return [Backends::Citrus::Language] if Citrus fallback available
243
+ # @raise [Exception] re-raises original error if no fallback
244
+ # @api private
245
+ def handle_tree_sitter_load_failure(error, all_backends)
246
+ citrus_reg = all_backends[:citrus]
247
+ if citrus_reg && citrus_reg[:grammar_module]
248
+ return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
249
+ end
250
+ # No Citrus fallback available, re-raise the original error
251
+ raise error
252
+ end
253
+ end
254
+ end
255
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Utility methods for deriving tree-sitter symbol and language names from library paths
5
+ #
6
+ # This module provides consistent path parsing across all backends that load
7
+ # tree-sitter grammar libraries from shared object files (.so/.dylib/.dll).
8
+ #
9
+ # @example
10
+ # TreeHaver::LibraryPathUtils.derive_symbol_from_path("/usr/lib/libtree-sitter-toml.so")
11
+ # # => "tree_sitter_toml"
12
+ #
13
+ # TreeHaver::LibraryPathUtils.derive_language_name_from_path("/usr/lib/libtree-sitter-toml.so")
14
+ # # => "toml"
15
+ module LibraryPathUtils
16
+ module_function
17
+
18
+ # Derive the tree-sitter symbol name from a library path
19
+ #
20
+ # Symbol names are the exported C function names (e.g., "tree_sitter_toml")
21
+ # that return a pointer to the TSLanguage struct.
22
+ #
23
+ # Handles various naming conventions:
24
+ # - libtree-sitter-toml.so → tree_sitter_toml
25
+ # - libtree_sitter_toml.so → tree_sitter_toml
26
+ # - tree-sitter-toml.so → tree_sitter_toml
27
+ # - tree_sitter_toml.so → tree_sitter_toml
28
+ # - toml.so → tree_sitter_toml (assumes simple language name)
29
+ #
30
+ # @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
31
+ # @return [String, nil] symbol like "tree_sitter_toml", or nil if path is nil
32
+ def derive_symbol_from_path(path)
33
+ return unless path
34
+
35
+ # Extract filename without extension: "libtree-sitter-toml" or "toml"
36
+ filename = File.basename(path, ".*")
37
+
38
+ # Handle multi-part extensions like .so.0.24
39
+ filename = filename.sub(/\.so(\.\d+)*\z/, "")
40
+
41
+ # Match patterns and normalize to tree_sitter_<lang>
42
+ case filename
43
+ when /\Alib[-_]?tree[-_]sitter[-_](.+)\z/
44
+ "tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
45
+ when /\Atree[-_]sitter[-_](.+)\z/
46
+ "tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
47
+ else
48
+ # Assume filename is just the language name (e.g., "toml.so" -> "tree_sitter_toml")
49
+ # Also strip "lib" prefix if present (e.g., "libtoml.so" -> "tree_sitter_toml")
50
+ lang = filename.sub(/\Alib/, "").tr("-", "_")
51
+ "tree_sitter_#{lang}"
52
+ end
53
+ end
54
+
55
+ # Derive the language name from a library path
56
+ #
57
+ # Language names are the short identifiers (e.g., "toml", "json", "ruby")
58
+ # used by some backends (like tree_stump/Rust) to register grammars.
59
+ #
60
+ # @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
61
+ # @return [String, nil] language name like "toml", or nil if path is nil
62
+ def derive_language_name_from_path(path)
63
+ symbol = derive_symbol_from_path(path)
64
+ return unless symbol
65
+
66
+ # Strip the "tree_sitter_" prefix to get the language name
67
+ symbol.sub(/\Atree_sitter_/, "")
68
+ end
69
+
70
+ # Derive language name from a symbol
71
+ #
72
+ # @param symbol [String, nil] symbol like "tree_sitter_toml"
73
+ # @return [String, nil] language name like "toml", or nil if symbol is nil
74
+ def derive_language_name_from_symbol(symbol)
75
+ return unless symbol
76
+
77
+ symbol.sub(/\Atree_sitter_/, "")
78
+ end
79
+ end
80
+ end
@@ -290,11 +290,14 @@ module TreeHaver
290
290
  # Get a child by index
291
291
  #
292
292
  # @param index [Integer] Child index
293
- # @return [Node, nil] Wrapped child node
293
+ # @return [Node, nil] Wrapped child node, or nil if index out of bounds
294
294
  def child(index)
295
295
  child_node = @inner_node.child(index)
296
296
  return if child_node.nil?
297
297
  Node.new(child_node, source: @source)
298
+ rescue IndexError
299
+ # Some backends (e.g., MRI w/ ruby_tree_sitter) raise IndexError for out of bounds
300
+ nil
298
301
  end
299
302
 
300
303
  # Get a named child by index