tree_haver 5.0.4 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/tree_haver/backend_context.rb +28 -0
- data/lib/tree_haver/backend_registry.rb +19 -432
- data/lib/tree_haver/contracts.rb +460 -0
- data/lib/tree_haver/kaitai_backend.rb +30 -0
- data/lib/tree_haver/language_pack.rb +190 -0
- data/lib/tree_haver/peg_backends.rb +76 -0
- data/lib/tree_haver/version.rb +1 -12
- data/lib/tree_haver.rb +7 -1316
- data.tar.gz.sig +0 -0
- metadata +34 -245
- metadata.gz.sig +0 -0
- data/CHANGELOG.md +0 -1366
- data/CITATION.cff +0 -20
- data/CODE_OF_CONDUCT.md +0 -134
- data/CONTRIBUTING.md +0 -359
- data/FUNDING.md +0 -74
- data/LICENSE.txt +0 -21
- data/README.md +0 -2347
- data/REEK +0 -0
- data/RUBOCOP.md +0 -71
- data/SECURITY.md +0 -21
- data/lib/tree_haver/backend_api.rb +0 -349
- data/lib/tree_haver/backends/citrus.rb +0 -487
- data/lib/tree_haver/backends/ffi.rb +0 -1009
- data/lib/tree_haver/backends/java.rb +0 -893
- data/lib/tree_haver/backends/mri.rb +0 -362
- data/lib/tree_haver/backends/parslet.rb +0 -560
- data/lib/tree_haver/backends/prism.rb +0 -471
- data/lib/tree_haver/backends/psych.rb +0 -375
- data/lib/tree_haver/backends/rust.rb +0 -239
- data/lib/tree_haver/base/language.rb +0 -98
- data/lib/tree_haver/base/node.rb +0 -322
- data/lib/tree_haver/base/parser.rb +0 -24
- data/lib/tree_haver/base/point.rb +0 -48
- data/lib/tree_haver/base/tree.rb +0 -128
- data/lib/tree_haver/base.rb +0 -12
- data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
- data/lib/tree_haver/compat.rb +0 -43
- data/lib/tree_haver/grammar_finder.rb +0 -374
- data/lib/tree_haver/language.rb +0 -295
- data/lib/tree_haver/language_registry.rb +0 -190
- data/lib/tree_haver/library_path_utils.rb +0 -80
- data/lib/tree_haver/node.rb +0 -579
- data/lib/tree_haver/parser.rb +0 -438
- data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
- data/lib/tree_haver/path_validator.rb +0 -353
- data/lib/tree_haver/point.rb +0 -27
- data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
- data/lib/tree_haver/rspec/testable_node.rb +0 -217
- data/lib/tree_haver/rspec.rb +0 -33
- data/lib/tree_haver/tree.rb +0 -258
- data/sig/tree_haver/backends.rbs +0 -352
- data/sig/tree_haver/grammar_finder.rbs +0 -29
- data/sig/tree_haver/path_validator.rbs +0 -32
- data/sig/tree_haver.rbs +0 -234
data/lib/tree_haver/language.rb
DELETED
|
@@ -1,295 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module TreeHaver
|
|
4
|
-
# Factory module for loading language grammars
|
|
5
|
-
#
|
|
6
|
-
# Language is the entry point for loading and using grammars. It provides
|
|
7
|
-
# a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus, Parslet).
|
|
8
|
-
#
|
|
9
|
-
# This is a module with only module methods (factory pattern), not a class.
|
|
10
|
-
# Backend-specific Language classes (e.g., Backends::Citrus::Language,
|
|
11
|
-
# Backends::Parslet::Language) inherit from Base::Language.
|
|
12
|
-
#
|
|
13
|
-
# For tree-sitter backends, languages are loaded from shared library files (.so/.dylib/.dll).
|
|
14
|
-
# For pure-Ruby backends (Citrus, Parslet, Prism, Psych), languages are built-in or provided by gems.
|
|
15
|
-
#
|
|
16
|
-
# == Loading Languages
|
|
17
|
-
#
|
|
18
|
-
# The primary way to load a language is via registration:
|
|
19
|
-
#
|
|
20
|
-
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
21
|
-
# language = TreeHaver::Language.toml
|
|
22
|
-
#
|
|
23
|
-
# For explicit loading without registration:
|
|
24
|
-
#
|
|
25
|
-
# language = TreeHaver::Language.from_library(
|
|
26
|
-
# "/path/to/libtree-sitter-toml.so",
|
|
27
|
-
# symbol: "tree_sitter_toml"
|
|
28
|
-
# )
|
|
29
|
-
#
|
|
30
|
-
# For ruby_tree_sitter compatibility:
|
|
31
|
-
#
|
|
32
|
-
# language = TreeHaver::Language.load("toml", "/path/to/libtree-sitter-toml.so")
|
|
33
|
-
#
|
|
34
|
-
# @example Register and load a language
|
|
35
|
-
# TreeHaver.register_language(:toml, path: "/path/to/grammar.so")
|
|
36
|
-
# language = TreeHaver::Language.toml
|
|
37
|
-
#
|
|
38
|
-
# @see Base::Language The base class that backend Language classes inherit from
|
|
39
|
-
module Language
|
|
40
|
-
class << self
|
|
41
|
-
# Load a language grammar from a shared library (ruby_tree_sitter compatibility)
|
|
42
|
-
#
|
|
43
|
-
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
44
|
-
# `Language.load(name, path)`.
|
|
45
|
-
#
|
|
46
|
-
# @param name [String] the language name (e.g., "toml")
|
|
47
|
-
# @param path [String] absolute path to the language shared library
|
|
48
|
-
# @param validate [Boolean] if true, validates the path for safety (default: true)
|
|
49
|
-
# @return [Language] loaded language handle
|
|
50
|
-
# @raise [NotAvailable] if the library cannot be loaded
|
|
51
|
-
# @raise [ArgumentError] if the path fails security validation
|
|
52
|
-
# @example
|
|
53
|
-
# language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
|
|
54
|
-
def load(name, path, validate: true)
|
|
55
|
-
from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# Load a language grammar from a shared library
|
|
59
|
-
#
|
|
60
|
-
# The library must export a function that returns a pointer to a TSLanguage struct.
|
|
61
|
-
# By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
|
|
62
|
-
#
|
|
63
|
-
# == Security
|
|
64
|
-
#
|
|
65
|
-
# By default, paths are validated using {PathValidator} to prevent path traversal
|
|
66
|
-
# and other attacks. Set `validate: false` to skip validation (not recommended
|
|
67
|
-
# unless you've already validated the path).
|
|
68
|
-
#
|
|
69
|
-
# @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
|
|
70
|
-
# @param symbol [String, nil] name of the exported function (defaults to auto-detection)
|
|
71
|
-
# @param name [String, nil] logical name for the language (used in caching)
|
|
72
|
-
# @param validate [Boolean] if true, validates path and symbol for safety (default: true)
|
|
73
|
-
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
74
|
-
# @return [Language] loaded language handle
|
|
75
|
-
# @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
|
|
76
|
-
# @raise [ArgumentError] if path or symbol fails security validation
|
|
77
|
-
# @example
|
|
78
|
-
# language = TreeHaver::Language.from_library(
|
|
79
|
-
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
80
|
-
# symbol: "tree_sitter_toml",
|
|
81
|
-
# name: "toml"
|
|
82
|
-
# )
|
|
83
|
-
# @example With explicit backend
|
|
84
|
-
# language = TreeHaver::Language.from_library(
|
|
85
|
-
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
86
|
-
# symbol: "tree_sitter_toml",
|
|
87
|
-
# backend: :ffi
|
|
88
|
-
# )
|
|
89
|
-
def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
|
|
90
|
-
if validate
|
|
91
|
-
unless PathValidator.safe_library_path?(path)
|
|
92
|
-
errors = PathValidator.validation_errors(path)
|
|
93
|
-
raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
if symbol && !PathValidator.safe_symbol_name?(symbol)
|
|
97
|
-
raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
|
|
98
|
-
"Symbol names must be valid C identifiers."
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
# from_library only works with tree-sitter backends that support .so files
|
|
103
|
-
# Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) don't support from_library
|
|
104
|
-
mod = TreeHaver.resolve_native_backend_module(backend)
|
|
105
|
-
|
|
106
|
-
if mod.nil?
|
|
107
|
-
if backend
|
|
108
|
-
raise NotAvailable, "Requested backend #{backend.inspect} is not available or does not support shared libraries"
|
|
109
|
-
else
|
|
110
|
-
raise NotAvailable,
|
|
111
|
-
"No native tree-sitter backend is available for loading shared libraries. " \
|
|
112
|
-
"Available native backends (MRI, Rust, FFI, Java) require platform-specific setup. " \
|
|
113
|
-
"For pure-Ruby parsing, use backend-specific Language classes directly (e.g., Prism, Psych, Citrus)."
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
# Backend must implement .from_library; fallback to .from_path for older impls
|
|
118
|
-
# Include effective backend AND ENV vars in cache key since they affect loading
|
|
119
|
-
effective_b = TreeHaver.resolve_effective_backend(backend)
|
|
120
|
-
key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
|
|
121
|
-
LanguageRegistry.fetch(key) do
|
|
122
|
-
if mod::Language.respond_to?(:from_library)
|
|
123
|
-
mod::Language.from_library(path, symbol: symbol, name: name)
|
|
124
|
-
else
|
|
125
|
-
mod::Language.from_path(path)
|
|
126
|
-
end
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
# Alias for {from_library}
|
|
130
|
-
# @see from_library
|
|
131
|
-
alias_method :from_path, :from_library
|
|
132
|
-
|
|
133
|
-
# Dynamic helper to load a registered language by name
|
|
134
|
-
#
|
|
135
|
-
# After registering a language with {TreeHaver.register_language},
|
|
136
|
-
# you can load it using a method call. The appropriate backend will be
|
|
137
|
-
# used based on registration and current backend.
|
|
138
|
-
#
|
|
139
|
-
# @example With tree-sitter
|
|
140
|
-
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
141
|
-
# language = TreeHaver::Language.toml
|
|
142
|
-
#
|
|
143
|
-
# @example With both backends
|
|
144
|
-
# TreeHaver.register_language(:toml,
|
|
145
|
-
# path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
146
|
-
# TreeHaver.register_language(:toml,
|
|
147
|
-
# grammar_module: TomlRB::Document)
|
|
148
|
-
# language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
|
|
149
|
-
#
|
|
150
|
-
# @param method_name [Symbol] the registered language name
|
|
151
|
-
# @param args [Array] positional arguments
|
|
152
|
-
# @param kwargs [Hash] keyword arguments
|
|
153
|
-
# @return [Language] loaded language handle
|
|
154
|
-
# @raise [NoMethodError] if the language name is not registered
|
|
155
|
-
def method_missing(method_name, *args, **kwargs, &block)
|
|
156
|
-
# Resolve only if the language name was registered
|
|
157
|
-
all_backends = TreeHaver.registered_language(method_name)
|
|
158
|
-
return super unless all_backends
|
|
159
|
-
|
|
160
|
-
# Check current backend
|
|
161
|
-
current_backend = TreeHaver.backend_module
|
|
162
|
-
|
|
163
|
-
# Determine which backend type to use
|
|
164
|
-
backend_type = if current_backend == Backends::Citrus
|
|
165
|
-
:citrus
|
|
166
|
-
elsif current_backend == Backends::Parslet
|
|
167
|
-
:parslet
|
|
168
|
-
else
|
|
169
|
-
:tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
# Get backend-specific registration
|
|
173
|
-
reg = all_backends[backend_type]
|
|
174
|
-
|
|
175
|
-
# If Citrus backend is active
|
|
176
|
-
if backend_type == :citrus
|
|
177
|
-
if reg && reg[:grammar_module]
|
|
178
|
-
return Backends::Citrus::Language.new(reg[:grammar_module])
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
# Fall back to error if no Citrus grammar registered
|
|
182
|
-
raise NotAvailable,
|
|
183
|
-
"Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
|
|
184
|
-
"Either register a Citrus grammar or use a tree-sitter backend. " \
|
|
185
|
-
"Registered backends: #{all_backends.keys.inspect}"
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# If Parslet backend is active
|
|
189
|
-
if backend_type == :parslet
|
|
190
|
-
if reg && reg[:grammar_class]
|
|
191
|
-
return Backends::Parslet::Language.new(reg[:grammar_class])
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
# Fall back to error if no Parslet grammar registered
|
|
195
|
-
raise NotAvailable,
|
|
196
|
-
"Parslet backend is active but no Parslet grammar registered for :#{method_name}. " \
|
|
197
|
-
"Either register a Parslet grammar or use a tree-sitter backend. " \
|
|
198
|
-
"Registered backends: #{all_backends.keys.inspect}"
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
# For tree-sitter backends, try to load from path
|
|
202
|
-
# If that fails, fall back to Citrus if available
|
|
203
|
-
if reg && reg[:path]
|
|
204
|
-
path = kwargs[:path] || args.first || reg[:path]
|
|
205
|
-
# Symbol priority: kwargs override > registration > derive from method_name
|
|
206
|
-
symbol = if kwargs.key?(:symbol)
|
|
207
|
-
kwargs[:symbol]
|
|
208
|
-
elsif reg[:symbol]
|
|
209
|
-
reg[:symbol]
|
|
210
|
-
else
|
|
211
|
-
"tree_sitter_#{method_name}"
|
|
212
|
-
end
|
|
213
|
-
# Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
|
|
214
|
-
# Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
|
|
215
|
-
# e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
|
|
216
|
-
name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
|
|
217
|
-
|
|
218
|
-
begin
|
|
219
|
-
return from_library(path, symbol: symbol, name: name)
|
|
220
|
-
rescue NotAvailable, ArgumentError, LoadError => e
|
|
221
|
-
# Tree-sitter failed to load - check for Citrus fallback
|
|
222
|
-
# Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
|
|
223
|
-
handle_tree_sitter_load_failure(e, all_backends)
|
|
224
|
-
end
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
# No tree-sitter path registered - check for Citrus or Parslet fallback
|
|
228
|
-
# This enables auto-fallback when tree-sitter grammar is not installed
|
|
229
|
-
# but a pure Ruby grammar (Citrus or Parslet) is available.
|
|
230
|
-
# Only fall back when backend is :auto - explicit native backend requests should fail.
|
|
231
|
-
if TreeHaver.effective_backend == :auto
|
|
232
|
-
citrus_reg = all_backends[:citrus]
|
|
233
|
-
if citrus_reg && citrus_reg[:grammar_module]
|
|
234
|
-
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
parslet_reg = all_backends[:parslet]
|
|
238
|
-
if parslet_reg && parslet_reg[:grammar_class]
|
|
239
|
-
return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
|
|
240
|
-
end
|
|
241
|
-
end
|
|
242
|
-
|
|
243
|
-
# No appropriate registration found
|
|
244
|
-
raise ArgumentError,
|
|
245
|
-
"No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
|
|
246
|
-
"Registered backends: #{all_backends.keys.inspect}"
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
# @api private
|
|
250
|
-
def respond_to_missing?(method_name, include_private = false)
|
|
251
|
-
!!TreeHaver.registered_language(method_name) || super
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
private
|
|
255
|
-
|
|
256
|
-
# Handle tree-sitter load failure with optional Citrus/Parslet fallback
|
|
257
|
-
#
|
|
258
|
-
# This handles cases where:
|
|
259
|
-
# - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
|
|
260
|
-
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError inherits from LoadError)
|
|
261
|
-
# - Invalid arguments were provided (ArgumentError)
|
|
262
|
-
#
|
|
263
|
-
# Fallback to Citrus/Parslet ONLY happens when:
|
|
264
|
-
# - The effective backend is :auto (user didn't explicitly request a native backend)
|
|
265
|
-
# - A Citrus or Parslet grammar is registered for the language
|
|
266
|
-
#
|
|
267
|
-
# If the user explicitly requested a native backend (:mri, :rust, :ffi, :java),
|
|
268
|
-
# we should NOT silently fall back to pure Ruby - that would violate the user's intent.
|
|
269
|
-
#
|
|
270
|
-
# @param error [Exception] the original error
|
|
271
|
-
# @param all_backends [Hash] all registered backends for the language
|
|
272
|
-
# @return [Backends::Citrus::Language, Backends::Parslet::Language] if fallback available and allowed
|
|
273
|
-
# @raise [Exception] re-raises original error if no fallback or fallback not allowed
|
|
274
|
-
# @api private
|
|
275
|
-
def handle_tree_sitter_load_failure(error, all_backends)
|
|
276
|
-
# Only fall back to pure Ruby when backend is :auto
|
|
277
|
-
# If user explicitly requested a native backend, respect that choice
|
|
278
|
-
effective = TreeHaver.effective_backend
|
|
279
|
-
if effective == :auto
|
|
280
|
-
citrus_reg = all_backends[:citrus]
|
|
281
|
-
if citrus_reg && citrus_reg[:grammar_module]
|
|
282
|
-
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
parslet_reg = all_backends[:parslet]
|
|
286
|
-
if parslet_reg && parslet_reg[:grammar_class]
|
|
287
|
-
return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
|
|
288
|
-
end
|
|
289
|
-
end
|
|
290
|
-
# No pure Ruby fallback allowed or available, re-raise the original error
|
|
291
|
-
raise error
|
|
292
|
-
end
|
|
293
|
-
end
|
|
294
|
-
end
|
|
295
|
-
end
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module TreeHaver
|
|
4
|
-
# Thread-safe language registrations and cache for loaded Language handles
|
|
5
|
-
#
|
|
6
|
-
# The LanguageRegistry provides two main functions:
|
|
7
|
-
# 1. **Registrations**: Store mappings from language names to backend-specific configurations
|
|
8
|
-
# 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
|
|
9
|
-
#
|
|
10
|
-
# The registry supports multiple backends for the same language, allowing runtime
|
|
11
|
-
# switching, benchmarking, and fallback scenarios.
|
|
12
|
-
#
|
|
13
|
-
# == Supported Backend Types
|
|
14
|
-
#
|
|
15
|
-
# The registry is extensible and supports any backend type. Common types include:
|
|
16
|
-
#
|
|
17
|
-
# - `:tree_sitter` - Native tree-sitter grammars (.so files)
|
|
18
|
-
# - `:citrus` - Citrus PEG parser grammars (pure Ruby)
|
|
19
|
-
# - `:prism` - Ruby's Prism parser (Ruby source only)
|
|
20
|
-
# - `:psych` - Ruby's Psych parser (YAML only)
|
|
21
|
-
# - `:commonmarker` - Commonmarker gem (Markdown)
|
|
22
|
-
# - `:markly` - Markly gem (Markdown/GFM)
|
|
23
|
-
# - `:rbs` - RBS gem (RBS type signatures) - registered externally by rbs-merge
|
|
24
|
-
#
|
|
25
|
-
# External gems can register their own backend types using the same API.
|
|
26
|
-
#
|
|
27
|
-
# Registration structure:
|
|
28
|
-
# ```ruby
|
|
29
|
-
# @registrations = {
|
|
30
|
-
# toml: {
|
|
31
|
-
# tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
|
|
32
|
-
# citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
33
|
-
# },
|
|
34
|
-
# ruby: {
|
|
35
|
-
# prism: { backend_module: TreeHaver::Backends::Prism }
|
|
36
|
-
# },
|
|
37
|
-
# yaml: {
|
|
38
|
-
# psych: { backend_module: TreeHaver::Backends::Psych }
|
|
39
|
-
# },
|
|
40
|
-
# markdown: {
|
|
41
|
-
# commonmarker: { backend_module: TreeHaver::Backends::Commonmarker },
|
|
42
|
-
# markly: { backend_module: TreeHaver::Backends::Markly }
|
|
43
|
-
# },
|
|
44
|
-
# rbs: {
|
|
45
|
-
# rbs: { backend_module: Rbs::Merge::Backends::RbsBackend } # External
|
|
46
|
-
# }
|
|
47
|
-
# }
|
|
48
|
-
# ```
|
|
49
|
-
#
|
|
50
|
-
# @example Register tree-sitter grammar
|
|
51
|
-
# ```ruby
|
|
52
|
-
# TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
|
|
53
|
-
# path: "/path/to/lib.so", symbol: "tree_sitter_toml")
|
|
54
|
-
# ```
|
|
55
|
-
#
|
|
56
|
-
# @example Register Citrus grammar
|
|
57
|
-
# ```ruby
|
|
58
|
-
# TreeHaver::LanguageRegistry.register(:toml, :citrus,
|
|
59
|
-
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
60
|
-
# ```
|
|
61
|
-
#
|
|
62
|
-
# @example Register a pure Ruby backend (internal or external)
|
|
63
|
-
# ```ruby
|
|
64
|
-
# TreeHaver::LanguageRegistry.register(:rbs, :rbs,
|
|
65
|
-
# backend_module: Rbs::Merge::Backends::RbsBackend,
|
|
66
|
-
# gem_name: "rbs")
|
|
67
|
-
# ```
|
|
68
|
-
#
|
|
69
|
-
# @api private
|
|
70
|
-
module LanguageRegistry
|
|
71
|
-
@mutex = Mutex.new
|
|
72
|
-
@cache = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
73
|
-
@registrations = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
74
|
-
|
|
75
|
-
module_function
|
|
76
|
-
|
|
77
|
-
# Register a language for a specific backend
|
|
78
|
-
#
|
|
79
|
-
# Stores backend-specific configuration for a language. Multiple backends
|
|
80
|
-
# can be registered for the same language without conflict.
|
|
81
|
-
#
|
|
82
|
-
# @param name [Symbol, String] language identifier (e.g., :toml, :json, :ruby, :yaml, :rbs)
|
|
83
|
-
# @param backend_type [Symbol] backend type (:tree_sitter, :citrus, :prism, :psych, :commonmarker, :markly, or custom)
|
|
84
|
-
# @param config [Hash] backend-specific configuration
|
|
85
|
-
# @option config [String] :path tree-sitter library path (for tree-sitter backends)
|
|
86
|
-
# @option config [String] :symbol exported symbol name (for tree-sitter backends)
|
|
87
|
-
# @option config [Module] :grammar_module Citrus grammar module (for Citrus backend)
|
|
88
|
-
# @option config [Module] :backend_module backend module with Language/Parser classes (for pure Ruby backends)
|
|
89
|
-
# @option config [String] :gem_name gem name for error messages and availability checks
|
|
90
|
-
# @return [void]
|
|
91
|
-
# @example Register tree-sitter grammar
|
|
92
|
-
# LanguageRegistry.register(:toml, :tree_sitter,
|
|
93
|
-
# path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
94
|
-
# @example Register Citrus grammar
|
|
95
|
-
# LanguageRegistry.register(:toml, :citrus,
|
|
96
|
-
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
97
|
-
# @example Register pure Ruby backend (external gem)
|
|
98
|
-
# LanguageRegistry.register(:rbs, :rbs,
|
|
99
|
-
# backend_module: Rbs::Merge::Backends::RbsBackend, gem_name: "rbs")
|
|
100
|
-
def register(name, backend_type, **config)
|
|
101
|
-
key = name.to_sym
|
|
102
|
-
backend_key = backend_type.to_sym
|
|
103
|
-
|
|
104
|
-
@mutex.synchronize do
|
|
105
|
-
@registrations[key] ||= {}
|
|
106
|
-
@registrations[key][backend_key] = config.compact
|
|
107
|
-
end
|
|
108
|
-
nil
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
# Fetch registration entries for a language
|
|
112
|
-
#
|
|
113
|
-
# Returns all backend-specific configurations for a language.
|
|
114
|
-
#
|
|
115
|
-
# @param name [Symbol, String] language identifier
|
|
116
|
-
# @param backend_type [Symbol, nil] optional backend type to filter by
|
|
117
|
-
# @return [Hash{Symbol => Hash}, Hash, nil] all backends or specific backend config
|
|
118
|
-
# @example Get all backends
|
|
119
|
-
# entries = LanguageRegistry.registered(:toml)
|
|
120
|
-
# # => {
|
|
121
|
-
# # tree_sitter: { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" },
|
|
122
|
-
# # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
123
|
-
# # }
|
|
124
|
-
# @example Get specific backend
|
|
125
|
-
# entry = LanguageRegistry.registered(:toml, :citrus)
|
|
126
|
-
# # => { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
127
|
-
def registered(name, backend_type = nil)
|
|
128
|
-
@mutex.synchronize do
|
|
129
|
-
lang_config = @registrations[name.to_sym]
|
|
130
|
-
return unless lang_config
|
|
131
|
-
|
|
132
|
-
if backend_type
|
|
133
|
-
lang_config[backend_type.to_sym]
|
|
134
|
-
else
|
|
135
|
-
lang_config
|
|
136
|
-
end
|
|
137
|
-
end
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# Fetch a cached language by key or compute and store it
|
|
141
|
-
#
|
|
142
|
-
# This method provides thread-safe memoization for loaded Language objects.
|
|
143
|
-
# If the key exists in the cache, the cached value is returned immediately.
|
|
144
|
-
# Otherwise, the block is called to compute the value, which is then cached.
|
|
145
|
-
#
|
|
146
|
-
# @param key [Array] cache key, typically [path, symbol, name]
|
|
147
|
-
# @yieldreturn [Object] the computed language handle (called only on cache miss)
|
|
148
|
-
# @return [Object] the cached or computed language handle
|
|
149
|
-
# @example
|
|
150
|
-
# language = LanguageRegistry.fetch(["/path/lib.so", "symbol", "toml"]) do
|
|
151
|
-
# expensive_language_load_operation
|
|
152
|
-
# end
|
|
153
|
-
def fetch(key)
|
|
154
|
-
@mutex.synchronize do
|
|
155
|
-
return @cache[key] if @cache.key?(key)
|
|
156
|
-
value = yield
|
|
157
|
-
@cache[key] = value
|
|
158
|
-
end
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
# Clear the language cache
|
|
162
|
-
#
|
|
163
|
-
# Removes all cached Language objects. The next call to {fetch} for any key
|
|
164
|
-
# will recompute the value. Does not clear registrations.
|
|
165
|
-
#
|
|
166
|
-
# @return [void]
|
|
167
|
-
# @example
|
|
168
|
-
# LanguageRegistry.clear_cache!
|
|
169
|
-
def clear_cache!
|
|
170
|
-
@mutex.synchronize { @cache.clear }
|
|
171
|
-
nil
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
# Clear all registrations and cache
|
|
175
|
-
#
|
|
176
|
-
# Removes all language registrations and cached Language objects.
|
|
177
|
-
# Primarily used in tests to reset state between test cases.
|
|
178
|
-
#
|
|
179
|
-
# @return [void]
|
|
180
|
-
# @example
|
|
181
|
-
# LanguageRegistry.clear
|
|
182
|
-
def clear
|
|
183
|
-
@mutex.synchronize do
|
|
184
|
-
@registrations.clear
|
|
185
|
-
@cache.clear
|
|
186
|
-
end
|
|
187
|
-
nil
|
|
188
|
-
end
|
|
189
|
-
end
|
|
190
|
-
end
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module TreeHaver
|
|
4
|
-
# Utility methods for deriving tree-sitter symbol and language names from library paths
|
|
5
|
-
#
|
|
6
|
-
# This module provides consistent path parsing across all backends that load
|
|
7
|
-
# tree-sitter grammar libraries from shared object files (.so/.dylib/.dll).
|
|
8
|
-
#
|
|
9
|
-
# @example
|
|
10
|
-
# TreeHaver::LibraryPathUtils.derive_symbol_from_path("/usr/lib/libtree-sitter-toml.so")
|
|
11
|
-
# # => "tree_sitter_toml"
|
|
12
|
-
#
|
|
13
|
-
# TreeHaver::LibraryPathUtils.derive_language_name_from_path("/usr/lib/libtree-sitter-toml.so")
|
|
14
|
-
# # => "toml"
|
|
15
|
-
module LibraryPathUtils
|
|
16
|
-
module_function
|
|
17
|
-
|
|
18
|
-
# Derive the tree-sitter symbol name from a library path
|
|
19
|
-
#
|
|
20
|
-
# Symbol names are the exported C function names (e.g., "tree_sitter_toml")
|
|
21
|
-
# that return a pointer to the TSLanguage struct.
|
|
22
|
-
#
|
|
23
|
-
# Handles various naming conventions:
|
|
24
|
-
# - libtree-sitter-toml.so → tree_sitter_toml
|
|
25
|
-
# - libtree_sitter_toml.so → tree_sitter_toml
|
|
26
|
-
# - tree-sitter-toml.so → tree_sitter_toml
|
|
27
|
-
# - tree_sitter_toml.so → tree_sitter_toml
|
|
28
|
-
# - toml.so → tree_sitter_toml (assumes simple language name)
|
|
29
|
-
#
|
|
30
|
-
# @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
|
|
31
|
-
# @return [String, nil] symbol like "tree_sitter_toml", or nil if path is nil
|
|
32
|
-
def derive_symbol_from_path(path)
|
|
33
|
-
return unless path
|
|
34
|
-
|
|
35
|
-
# Extract filename without extension: "libtree-sitter-toml" or "toml"
|
|
36
|
-
filename = File.basename(path, ".*")
|
|
37
|
-
|
|
38
|
-
# Handle multi-part extensions like .so.0.24
|
|
39
|
-
filename = filename.sub(/\.so(\.\d+)*\z/, "")
|
|
40
|
-
|
|
41
|
-
# Match patterns and normalize to tree_sitter_<lang>
|
|
42
|
-
case filename
|
|
43
|
-
when /\Alib[-_]?tree[-_]sitter[-_](.+)\z/
|
|
44
|
-
"tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
|
|
45
|
-
when /\Atree[-_]sitter[-_](.+)\z/
|
|
46
|
-
"tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
|
|
47
|
-
else
|
|
48
|
-
# Assume filename is just the language name (e.g., "toml.so" -> "tree_sitter_toml")
|
|
49
|
-
# Also strip "lib" prefix if present (e.g., "libtoml.so" -> "tree_sitter_toml")
|
|
50
|
-
lang = filename.sub(/\Alib/, "").tr("-", "_")
|
|
51
|
-
"tree_sitter_#{lang}"
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Derive the language name from a library path
|
|
56
|
-
#
|
|
57
|
-
# Language names are the short identifiers (e.g., "toml", "json", "ruby")
|
|
58
|
-
# used by some backends (like tree_stump/Rust) to register grammars.
|
|
59
|
-
#
|
|
60
|
-
# @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
|
|
61
|
-
# @return [String, nil] language name like "toml", or nil if path is nil
|
|
62
|
-
def derive_language_name_from_path(path)
|
|
63
|
-
symbol = derive_symbol_from_path(path)
|
|
64
|
-
return unless symbol
|
|
65
|
-
|
|
66
|
-
# Strip the "tree_sitter_" prefix to get the language name
|
|
67
|
-
symbol.sub(/\Atree_sitter_/, "")
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Derive language name from a symbol
|
|
71
|
-
#
|
|
72
|
-
# @param symbol [String, nil] symbol like "tree_sitter_toml"
|
|
73
|
-
# @return [String, nil] language name like "toml", or nil if symbol is nil
|
|
74
|
-
def derive_language_name_from_symbol(symbol)
|
|
75
|
-
return unless symbol
|
|
76
|
-
|
|
77
|
-
symbol.sub(/\Atree_sitter_/, "")
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
end
|