tree_haver 3.1.2 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +227 -2
- data/README.md +391 -357
- data/lib/tree_haver/backends/citrus.rb +7 -1
- data/lib/tree_haver/backends/ffi.rb +80 -66
- data/lib/tree_haver/backends/java.rb +11 -4
- data/lib/tree_haver/backends/mri.rb +37 -21
- data/lib/tree_haver/backends/rust.rb +17 -5
- data/lib/tree_haver/citrus_grammar_finder.rb +57 -9
- data/lib/tree_haver/grammar_finder.rb +4 -1
- data/lib/tree_haver/language.rb +255 -0
- data/lib/tree_haver/library_path_utils.rb +80 -0
- data/lib/tree_haver/node.rb +4 -1
- data/lib/tree_haver/parser.rb +352 -0
- data/lib/tree_haver/rspec/dependency_tags.rb +406 -226
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +128 -560
- data.tar.gz.sig +0 -0
- metadata +7 -4
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Represents a language grammar for parsing source code
|
|
5
|
+
#
|
|
6
|
+
# Language is the entry point for loading and using grammars. It provides
|
|
7
|
+
# a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus).
|
|
8
|
+
#
|
|
9
|
+
# For tree-sitter backends, languages are loaded from shared library files (.so/.dylib/.dll).
|
|
10
|
+
# For pure-Ruby backends (Citrus, Prism, Psych), languages are built-in or provided by gems.
|
|
11
|
+
#
|
|
12
|
+
# == Loading Languages
|
|
13
|
+
#
|
|
14
|
+
# The primary way to load a language is via registration:
|
|
15
|
+
#
|
|
16
|
+
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
17
|
+
# language = TreeHaver::Language.toml
|
|
18
|
+
#
|
|
19
|
+
# For explicit loading without registration:
|
|
20
|
+
#
|
|
21
|
+
# language = TreeHaver::Language.from_library(
|
|
22
|
+
# "/path/to/libtree-sitter-toml.so",
|
|
23
|
+
# symbol: "tree_sitter_toml"
|
|
24
|
+
# )
|
|
25
|
+
#
|
|
26
|
+
# For ruby_tree_sitter compatibility:
|
|
27
|
+
#
|
|
28
|
+
# language = TreeHaver::Language.load("toml", "/path/to/libtree-sitter-toml.so")
|
|
29
|
+
#
|
|
30
|
+
# @example Register and load a language
|
|
31
|
+
# TreeHaver.register_language(:toml, path: "/path/to/grammar.so")
|
|
32
|
+
# language = TreeHaver::Language.toml
|
|
33
|
+
class Language
|
|
34
|
+
class << self
|
|
35
|
+
# Load a language grammar from a shared library (ruby_tree_sitter compatibility)
|
|
36
|
+
#
|
|
37
|
+
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
38
|
+
# `Language.load(name, path)`.
|
|
39
|
+
#
|
|
40
|
+
# @param name [String] the language name (e.g., "toml")
|
|
41
|
+
# @param path [String] absolute path to the language shared library
|
|
42
|
+
# @param validate [Boolean] if true, validates the path for safety (default: true)
|
|
43
|
+
# @return [Language] loaded language handle
|
|
44
|
+
# @raise [NotAvailable] if the library cannot be loaded
|
|
45
|
+
# @raise [ArgumentError] if the path fails security validation
|
|
46
|
+
# @example
|
|
47
|
+
# language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
|
|
48
|
+
def load(name, path, validate: true)
|
|
49
|
+
from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Load a language grammar from a shared library
|
|
53
|
+
#
|
|
54
|
+
# The library must export a function that returns a pointer to a TSLanguage struct.
|
|
55
|
+
# By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
|
|
56
|
+
#
|
|
57
|
+
# == Security
|
|
58
|
+
#
|
|
59
|
+
# By default, paths are validated using {PathValidator} to prevent path traversal
|
|
60
|
+
# and other attacks. Set `validate: false` to skip validation (not recommended
|
|
61
|
+
# unless you've already validated the path).
|
|
62
|
+
#
|
|
63
|
+
# @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
|
|
64
|
+
# @param symbol [String, nil] name of the exported function (defaults to auto-detection)
|
|
65
|
+
# @param name [String, nil] logical name for the language (used in caching)
|
|
66
|
+
# @param validate [Boolean] if true, validates path and symbol for safety (default: true)
|
|
67
|
+
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
68
|
+
# @return [Language] loaded language handle
|
|
69
|
+
# @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
|
|
70
|
+
# @raise [ArgumentError] if path or symbol fails security validation
|
|
71
|
+
# @example
|
|
72
|
+
# language = TreeHaver::Language.from_library(
|
|
73
|
+
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
74
|
+
# symbol: "tree_sitter_toml",
|
|
75
|
+
# name: "toml"
|
|
76
|
+
# )
|
|
77
|
+
# @example With explicit backend
|
|
78
|
+
# language = TreeHaver::Language.from_library(
|
|
79
|
+
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
80
|
+
# symbol: "tree_sitter_toml",
|
|
81
|
+
# backend: :ffi
|
|
82
|
+
# )
|
|
83
|
+
def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
|
|
84
|
+
if validate
|
|
85
|
+
unless PathValidator.safe_library_path?(path)
|
|
86
|
+
errors = PathValidator.validation_errors(path)
|
|
87
|
+
raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
if symbol && !PathValidator.safe_symbol_name?(symbol)
|
|
91
|
+
raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
|
|
92
|
+
"Symbol names must be valid C identifiers."
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# from_library only works with tree-sitter backends that support .so files
|
|
97
|
+
# Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) don't support from_library
|
|
98
|
+
mod = TreeHaver.resolve_native_backend_module(backend)
|
|
99
|
+
|
|
100
|
+
if mod.nil?
|
|
101
|
+
if backend
|
|
102
|
+
raise NotAvailable, "Requested backend #{backend.inspect} is not available or does not support shared libraries"
|
|
103
|
+
else
|
|
104
|
+
raise NotAvailable,
|
|
105
|
+
"No native tree-sitter backend is available for loading shared libraries. " \
|
|
106
|
+
"Available native backends (MRI, Rust, FFI, Java) require platform-specific setup. " \
|
|
107
|
+
"For pure-Ruby parsing, use backend-specific Language classes directly (e.g., Prism, Psych, Citrus)."
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Backend must implement .from_library; fallback to .from_path for older impls
|
|
112
|
+
# Include effective backend AND ENV vars in cache key since they affect loading
|
|
113
|
+
effective_b = TreeHaver.resolve_effective_backend(backend)
|
|
114
|
+
key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
|
|
115
|
+
LanguageRegistry.fetch(key) do
|
|
116
|
+
if mod::Language.respond_to?(:from_library)
|
|
117
|
+
mod::Language.from_library(path, symbol: symbol, name: name)
|
|
118
|
+
else
|
|
119
|
+
mod::Language.from_path(path)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
# Alias for {from_library}
|
|
124
|
+
# @see from_library
|
|
125
|
+
alias_method :from_path, :from_library
|
|
126
|
+
|
|
127
|
+
# Dynamic helper to load a registered language by name
|
|
128
|
+
#
|
|
129
|
+
# After registering a language with {TreeHaver.register_language},
|
|
130
|
+
# you can load it using a method call. The appropriate backend will be
|
|
131
|
+
# used based on registration and current backend.
|
|
132
|
+
#
|
|
133
|
+
# @example With tree-sitter
|
|
134
|
+
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
135
|
+
# language = TreeHaver::Language.toml
|
|
136
|
+
#
|
|
137
|
+
# @example With both backends
|
|
138
|
+
# TreeHaver.register_language(:toml,
|
|
139
|
+
# path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
140
|
+
# TreeHaver.register_language(:toml,
|
|
141
|
+
# grammar_module: TomlRB::Document)
|
|
142
|
+
# language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
|
|
143
|
+
#
|
|
144
|
+
# @param method_name [Symbol] the registered language name
|
|
145
|
+
# @param args [Array] positional arguments
|
|
146
|
+
# @param kwargs [Hash] keyword arguments
|
|
147
|
+
# @return [Language] loaded language handle
|
|
148
|
+
# @raise [NoMethodError] if the language name is not registered
|
|
149
|
+
def method_missing(method_name, *args, **kwargs, &block)
|
|
150
|
+
# Resolve only if the language name was registered
|
|
151
|
+
all_backends = TreeHaver.registered_language(method_name)
|
|
152
|
+
return super unless all_backends
|
|
153
|
+
|
|
154
|
+
# Check current backend
|
|
155
|
+
current_backend = TreeHaver.backend_module
|
|
156
|
+
|
|
157
|
+
# Determine which backend type to use
|
|
158
|
+
backend_type = if current_backend == Backends::Citrus
|
|
159
|
+
:citrus
|
|
160
|
+
else
|
|
161
|
+
:tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Get backend-specific registration
|
|
165
|
+
reg = all_backends[backend_type]
|
|
166
|
+
|
|
167
|
+
# If Citrus backend is active
|
|
168
|
+
if backend_type == :citrus
|
|
169
|
+
if reg && reg[:grammar_module]
|
|
170
|
+
return Backends::Citrus::Language.new(reg[:grammar_module])
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Fall back to error if no Citrus grammar registered
|
|
174
|
+
raise NotAvailable,
|
|
175
|
+
"Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
|
|
176
|
+
"Either register a Citrus grammar or use a tree-sitter backend. " \
|
|
177
|
+
"Registered backends: #{all_backends.keys.inspect}"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# For tree-sitter backends, try to load from path
|
|
181
|
+
# If that fails, fall back to Citrus if available
|
|
182
|
+
if reg && reg[:path]
|
|
183
|
+
path = kwargs[:path] || args.first || reg[:path]
|
|
184
|
+
# Symbol priority: kwargs override > registration > derive from method_name
|
|
185
|
+
symbol = if kwargs.key?(:symbol)
|
|
186
|
+
kwargs[:symbol]
|
|
187
|
+
elsif reg[:symbol]
|
|
188
|
+
reg[:symbol]
|
|
189
|
+
else
|
|
190
|
+
"tree_sitter_#{method_name}"
|
|
191
|
+
end
|
|
192
|
+
# Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
|
|
193
|
+
# Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
|
|
194
|
+
# e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
|
|
195
|
+
name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
|
|
196
|
+
|
|
197
|
+
begin
|
|
198
|
+
return from_library(path, symbol: symbol, name: name)
|
|
199
|
+
rescue NotAvailable, ArgumentError, LoadError => e
|
|
200
|
+
# Tree-sitter failed to load - check for Citrus fallback
|
|
201
|
+
handle_tree_sitter_load_failure(e, all_backends)
|
|
202
|
+
rescue => e
|
|
203
|
+
# Also catch FFI::NotFoundError if FFI is loaded (can't reference directly as FFI may not exist)
|
|
204
|
+
if defined?(::FFI::NotFoundError) && e.is_a?(::FFI::NotFoundError)
|
|
205
|
+
handle_tree_sitter_load_failure(e, all_backends)
|
|
206
|
+
else
|
|
207
|
+
raise
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# No tree-sitter path registered - check for Citrus fallback
|
|
213
|
+
# This enables auto-fallback when tree-sitter grammar is not installed
|
|
214
|
+
# but a Citrus grammar (pure Ruby) is available
|
|
215
|
+
citrus_reg = all_backends[:citrus]
|
|
216
|
+
if citrus_reg && citrus_reg[:grammar_module]
|
|
217
|
+
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# No appropriate registration found
|
|
221
|
+
raise ArgumentError,
|
|
222
|
+
"No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
|
|
223
|
+
"Registered backends: #{all_backends.keys.inspect}"
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# @api private
|
|
227
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
228
|
+
!!TreeHaver.registered_language(method_name) || super
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
private
|
|
232
|
+
|
|
233
|
+
# Handle tree-sitter load failure with optional Citrus fallback
|
|
234
|
+
#
|
|
235
|
+
# This handles cases where:
|
|
236
|
+
# - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
|
|
237
|
+
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
|
|
238
|
+
# - Invalid arguments were provided (ArgumentError)
|
|
239
|
+
#
|
|
240
|
+
# @param error [Exception] the original error
|
|
241
|
+
# @param all_backends [Hash] all registered backends for the language
|
|
242
|
+
# @return [Backends::Citrus::Language] if Citrus fallback available
|
|
243
|
+
# @raise [Exception] re-raises original error if no fallback
|
|
244
|
+
# @api private
|
|
245
|
+
def handle_tree_sitter_load_failure(error, all_backends)
|
|
246
|
+
citrus_reg = all_backends[:citrus]
|
|
247
|
+
if citrus_reg && citrus_reg[:grammar_module]
|
|
248
|
+
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
249
|
+
end
|
|
250
|
+
# No Citrus fallback available, re-raise the original error
|
|
251
|
+
raise error
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Utility methods for deriving tree-sitter symbol and language names from library paths
|
|
5
|
+
#
|
|
6
|
+
# This module provides consistent path parsing across all backends that load
|
|
7
|
+
# tree-sitter grammar libraries from shared object files (.so/.dylib/.dll).
|
|
8
|
+
#
|
|
9
|
+
# @example
|
|
10
|
+
# TreeHaver::LibraryPathUtils.derive_symbol_from_path("/usr/lib/libtree-sitter-toml.so")
|
|
11
|
+
# # => "tree_sitter_toml"
|
|
12
|
+
#
|
|
13
|
+
# TreeHaver::LibraryPathUtils.derive_language_name_from_path("/usr/lib/libtree-sitter-toml.so")
|
|
14
|
+
# # => "toml"
|
|
15
|
+
module LibraryPathUtils
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
# Derive the tree-sitter symbol name from a library path
|
|
19
|
+
#
|
|
20
|
+
# Symbol names are the exported C function names (e.g., "tree_sitter_toml")
|
|
21
|
+
# that return a pointer to the TSLanguage struct.
|
|
22
|
+
#
|
|
23
|
+
# Handles various naming conventions:
|
|
24
|
+
# - libtree-sitter-toml.so → tree_sitter_toml
|
|
25
|
+
# - libtree_sitter_toml.so → tree_sitter_toml
|
|
26
|
+
# - tree-sitter-toml.so → tree_sitter_toml
|
|
27
|
+
# - tree_sitter_toml.so → tree_sitter_toml
|
|
28
|
+
# - toml.so → tree_sitter_toml (assumes simple language name)
|
|
29
|
+
#
|
|
30
|
+
# @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
|
|
31
|
+
# @return [String, nil] symbol like "tree_sitter_toml", or nil if path is nil
|
|
32
|
+
def derive_symbol_from_path(path)
|
|
33
|
+
return unless path
|
|
34
|
+
|
|
35
|
+
# Extract filename without extension: "libtree-sitter-toml" or "toml"
|
|
36
|
+
filename = File.basename(path, ".*")
|
|
37
|
+
|
|
38
|
+
# Handle multi-part extensions like .so.0.24
|
|
39
|
+
filename = filename.sub(/\.so(\.\d+)*\z/, "")
|
|
40
|
+
|
|
41
|
+
# Match patterns and normalize to tree_sitter_<lang>
|
|
42
|
+
case filename
|
|
43
|
+
when /\Alib[-_]?tree[-_]sitter[-_](.+)\z/
|
|
44
|
+
"tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
|
|
45
|
+
when /\Atree[-_]sitter[-_](.+)\z/
|
|
46
|
+
"tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
|
|
47
|
+
else
|
|
48
|
+
# Assume filename is just the language name (e.g., "toml.so" -> "tree_sitter_toml")
|
|
49
|
+
# Also strip "lib" prefix if present (e.g., "libtoml.so" -> "tree_sitter_toml")
|
|
50
|
+
lang = filename.sub(/\Alib/, "").tr("-", "_")
|
|
51
|
+
"tree_sitter_#{lang}"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Derive the language name from a library path
|
|
56
|
+
#
|
|
57
|
+
# Language names are the short identifiers (e.g., "toml", "json", "ruby")
|
|
58
|
+
# used by some backends (like tree_stump/Rust) to register grammars.
|
|
59
|
+
#
|
|
60
|
+
# @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
|
|
61
|
+
# @return [String, nil] language name like "toml", or nil if path is nil
|
|
62
|
+
def derive_language_name_from_path(path)
|
|
63
|
+
symbol = derive_symbol_from_path(path)
|
|
64
|
+
return unless symbol
|
|
65
|
+
|
|
66
|
+
# Strip the "tree_sitter_" prefix to get the language name
|
|
67
|
+
symbol.sub(/\Atree_sitter_/, "")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Derive language name from a symbol
|
|
71
|
+
#
|
|
72
|
+
# @param symbol [String, nil] symbol like "tree_sitter_toml"
|
|
73
|
+
# @return [String, nil] language name like "toml", or nil if symbol is nil
|
|
74
|
+
def derive_language_name_from_symbol(symbol)
|
|
75
|
+
return unless symbol
|
|
76
|
+
|
|
77
|
+
symbol.sub(/\Atree_sitter_/, "")
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
data/lib/tree_haver/node.rb
CHANGED
|
@@ -290,11 +290,14 @@ module TreeHaver
|
|
|
290
290
|
# Get a child by index
|
|
291
291
|
#
|
|
292
292
|
# @param index [Integer] Child index
|
|
293
|
-
# @return [Node, nil] Wrapped child node
|
|
293
|
+
# @return [Node, nil] Wrapped child node, or nil if index out of bounds
|
|
294
294
|
def child(index)
|
|
295
295
|
child_node = @inner_node.child(index)
|
|
296
296
|
return if child_node.nil?
|
|
297
297
|
Node.new(child_node, source: @source)
|
|
298
|
+
rescue IndexError
|
|
299
|
+
# Some backends (e.g., MRI w/ ruby_tree_sitter) raise IndexError for out of bounds
|
|
300
|
+
nil
|
|
298
301
|
end
|
|
299
302
|
|
|
300
303
|
# Get a named child by index
|