tree_haver 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +285 -1
- data/CONTRIBUTING.md +132 -0
- data/README.md +529 -36
- data/lib/tree_haver/backends/citrus.rb +177 -20
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +341 -142
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +183 -17
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +60 -17
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +115 -11
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +220 -4
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +835 -75
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +9 -4
- metadata.gz.sig +0 -0
|
@@ -54,14 +54,14 @@ module TreeHaver
|
|
|
54
54
|
# @return [Hash{Symbol => Object}] capability map
|
|
55
55
|
# @example
|
|
56
56
|
# TreeHaver::Backends::Rust.capabilities
|
|
57
|
-
# # => { backend: :rust, query: true, bytes_field: true, incremental:
|
|
57
|
+
# # => { backend: :rust, query: true, bytes_field: true, incremental: false }
|
|
58
58
|
def capabilities
|
|
59
59
|
return {} unless available?
|
|
60
60
|
{
|
|
61
61
|
backend: :rust,
|
|
62
62
|
query: true,
|
|
63
63
|
bytes_field: true,
|
|
64
|
-
incremental:
|
|
64
|
+
incremental: false, # TreeStump doesn't currently expose incremental parsing to Ruby
|
|
65
65
|
}
|
|
66
66
|
end
|
|
67
67
|
end
|
|
@@ -72,16 +72,52 @@ module TreeHaver
|
|
|
72
72
|
# tree_stump uses a registration-based API where languages are registered
|
|
73
73
|
# by name, then referenced by that name when setting parser language.
|
|
74
74
|
class Language
|
|
75
|
+
include Comparable
|
|
76
|
+
|
|
75
77
|
# The registered language name
|
|
76
78
|
# @return [String]
|
|
77
79
|
attr_reader :name
|
|
78
80
|
|
|
81
|
+
# The backend this language is for
|
|
82
|
+
# @return [Symbol]
|
|
83
|
+
attr_reader :backend
|
|
84
|
+
|
|
85
|
+
# The path this language was loaded from (if known)
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
attr_reader :path
|
|
88
|
+
|
|
79
89
|
# @api private
|
|
80
90
|
# @param name [String] the registered language name
|
|
81
|
-
|
|
91
|
+
# @param path [String, nil] path language was loaded from
|
|
92
|
+
def initialize(name, path: nil)
|
|
82
93
|
@name = name
|
|
94
|
+
@backend = :rust
|
|
95
|
+
@path = path
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Compare languages for equality
|
|
99
|
+
#
|
|
100
|
+
# Rust languages are equal if they have the same backend and name.
|
|
101
|
+
# Name uniquely identifies a registered language in TreeStump.
|
|
102
|
+
#
|
|
103
|
+
# @param other [Object] object to compare with
|
|
104
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
105
|
+
def <=>(other)
|
|
106
|
+
return unless other.is_a?(Language)
|
|
107
|
+
return unless other.backend == @backend
|
|
108
|
+
|
|
109
|
+
@name <=> other.name
|
|
83
110
|
end
|
|
84
111
|
|
|
112
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
113
|
+
# @return [Integer]
|
|
114
|
+
def hash
|
|
115
|
+
[@backend, @name].hash
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Alias eql? to ==
|
|
119
|
+
alias_method :eql?, :==
|
|
120
|
+
|
|
85
121
|
# Load a language from a shared library path
|
|
86
122
|
#
|
|
87
123
|
# @param path [String] absolute path to the language shared library
|
|
@@ -102,7 +138,7 @@ module TreeHaver
|
|
|
102
138
|
# The name is used to derive the symbol automatically (tree_sitter_<name>)
|
|
103
139
|
lang_name = name || File.basename(path, ".*").sub(/^libtree-sitter-/, "")
|
|
104
140
|
::TreeStump.register_lang(lang_name, path)
|
|
105
|
-
new(lang_name)
|
|
141
|
+
new(lang_name, path: path)
|
|
106
142
|
rescue RuntimeError => e
|
|
107
143
|
raise TreeHaver::NotAvailable, "Failed to load language from #{path}: #{e.message}"
|
|
108
144
|
end
|
|
@@ -128,34 +164,41 @@ module TreeHaver
|
|
|
128
164
|
|
|
129
165
|
# Set the language for this parser
|
|
130
166
|
#
|
|
131
|
-
#
|
|
167
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
168
|
+
# When called from TreeHaver::Parser, receives String (language name).
|
|
169
|
+
# For backward compatibility and backend tests, also handles Language wrapper.
|
|
170
|
+
#
|
|
171
|
+
# @param lang [Language, String] the language wrapper or name string
|
|
132
172
|
# @return [Language, String] the language that was set
|
|
133
173
|
def language=(lang)
|
|
134
|
-
#
|
|
174
|
+
# Extract language name (handle both wrapper and raw string)
|
|
135
175
|
lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
|
|
176
|
+
# tree_stump uses set_language with a string name
|
|
136
177
|
@parser.set_language(lang_name)
|
|
137
|
-
lang
|
|
178
|
+
lang # rubocop:disable Lint/Void (intentional return value)
|
|
138
179
|
end
|
|
139
180
|
|
|
140
181
|
# Parse source code
|
|
141
182
|
#
|
|
142
183
|
# @param source [String] the source code to parse
|
|
143
|
-
# @return [
|
|
184
|
+
# @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
144
185
|
def parse(source)
|
|
145
|
-
tree
|
|
146
|
-
|
|
186
|
+
# Return raw tree_stump tree - TreeHaver::Parser will wrap it
|
|
187
|
+
@parser.parse(source)
|
|
147
188
|
end
|
|
148
189
|
|
|
149
190
|
# Parse source code with optional incremental parsing
|
|
150
191
|
#
|
|
151
|
-
#
|
|
192
|
+
# Note: TreeStump does not currently expose incremental parsing to Ruby.
|
|
193
|
+
# The parse method always does a full parse, ignoring old_tree.
|
|
194
|
+
#
|
|
195
|
+
# @param old_tree [TreeHaver::Tree, nil] previous tree for incremental parsing (ignored)
|
|
152
196
|
# @param source [String] the source code to parse
|
|
153
|
-
# @return [
|
|
154
|
-
def parse_string(old_tree, source)
|
|
155
|
-
#
|
|
156
|
-
#
|
|
157
|
-
|
|
158
|
-
TreeHaver::Tree.new(tree, source: source)
|
|
197
|
+
# @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
198
|
+
def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
|
|
199
|
+
# TreeStump's parse method only accepts source as a single argument
|
|
200
|
+
# and internally always passes None for the old tree (no incremental parsing support)
|
|
201
|
+
@parser.parse(source)
|
|
159
202
|
end
|
|
160
203
|
end
|
|
161
204
|
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Utility for finding and registering Citrus grammar gems.
|
|
5
|
+
#
|
|
6
|
+
# CitrusGrammarFinder provides language-agnostic discovery of Citrus grammar
|
|
7
|
+
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
+
# grammar and register it with tree_haver.
|
|
9
|
+
#
|
|
10
|
+
# Unlike tree-sitter grammars (which are .so files), Citrus grammars are
|
|
11
|
+
# Ruby modules that respond to .parse(source). This class handles the
|
|
12
|
+
# discovery and registration of these grammars.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage with toml-rb
|
|
15
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
16
|
+
# language: :toml,
|
|
17
|
+
# gem_name: "toml-rb",
|
|
18
|
+
# grammar_const: "TomlRB::Document"
|
|
19
|
+
# )
|
|
20
|
+
# finder.register! if finder.available?
|
|
21
|
+
#
|
|
22
|
+
# @example With custom require path
|
|
23
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
24
|
+
# language: :json,
|
|
25
|
+
# gem_name: "json-rb",
|
|
26
|
+
# grammar_const: "JsonRB::Grammar",
|
|
27
|
+
# require_path: "json/rb"
|
|
28
|
+
# )
|
|
29
|
+
#
|
|
30
|
+
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
+
class CitrusGrammarFinder
|
|
32
|
+
# @return [Symbol] the language identifier
|
|
33
|
+
attr_reader :language_name
|
|
34
|
+
|
|
35
|
+
# @return [String] the gem name to require
|
|
36
|
+
attr_reader :gem_name
|
|
37
|
+
|
|
38
|
+
# @return [String] the constant path to the grammar (e.g., "TomlRB::Document")
|
|
39
|
+
attr_reader :grammar_const
|
|
40
|
+
|
|
41
|
+
# @return [String, nil] custom require path (defaults to gem_name with dashes to slashes)
|
|
42
|
+
attr_reader :require_path
|
|
43
|
+
|
|
44
|
+
# Initialize a Citrus grammar finder
|
|
45
|
+
#
|
|
46
|
+
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
47
|
+
# @param gem_name [String] the gem name (e.g., "toml-rb")
|
|
48
|
+
# @param grammar_const [String] constant path to grammar (e.g., "TomlRB::Document")
|
|
49
|
+
# @param require_path [String, nil] custom require path (defaults to gem_name with dashes→slashes)
|
|
50
|
+
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
51
|
+
@language_name = language.to_sym
|
|
52
|
+
@gem_name = gem_name
|
|
53
|
+
@grammar_const = grammar_const
|
|
54
|
+
@require_path = require_path || gem_name.tr("-", "/")
|
|
55
|
+
@load_attempted = false
|
|
56
|
+
@available = false
|
|
57
|
+
@grammar_module = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if the Citrus grammar is available
|
|
61
|
+
#
|
|
62
|
+
# Attempts to require the gem and resolve the grammar constant.
|
|
63
|
+
# Result is cached after first call.
|
|
64
|
+
#
|
|
65
|
+
# @return [Boolean] true if grammar is available
|
|
66
|
+
def available?
|
|
67
|
+
return @available if @load_attempted
|
|
68
|
+
|
|
69
|
+
@load_attempted = true
|
|
70
|
+
begin
|
|
71
|
+
# Try to require the gem
|
|
72
|
+
require @require_path
|
|
73
|
+
|
|
74
|
+
# Try to resolve the constant
|
|
75
|
+
@grammar_module = resolve_constant(@grammar_const)
|
|
76
|
+
|
|
77
|
+
# Verify it responds to parse
|
|
78
|
+
unless @grammar_module.respond_to?(:parse)
|
|
79
|
+
warn("#{@grammar_const} doesn't respond to :parse")
|
|
80
|
+
@available = false
|
|
81
|
+
return false
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@available = true
|
|
85
|
+
rescue LoadError => e
|
|
86
|
+
# Always show LoadError for debugging
|
|
87
|
+
warn("CitrusGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
88
|
+
@available = false
|
|
89
|
+
rescue NameError => e
|
|
90
|
+
# Always show NameError for debugging
|
|
91
|
+
warn("CitrusGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
92
|
+
@available = false
|
|
93
|
+
rescue => e
|
|
94
|
+
# Catch any other errors
|
|
95
|
+
warn("CitrusGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
96
|
+
warn(e.backtrace.first(3).join("\n")) if ENV["TREE_HAVER_DEBUG"]
|
|
97
|
+
@available = false
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
@available
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Get the resolved grammar module
|
|
104
|
+
#
|
|
105
|
+
# @return [Module, nil] the grammar module if available
|
|
106
|
+
def grammar_module
|
|
107
|
+
available? # Ensure we've tried to load
|
|
108
|
+
@grammar_module
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Register this Citrus grammar with TreeHaver
|
|
112
|
+
#
|
|
113
|
+
# After registration, the language can be used via:
|
|
114
|
+
# TreeHaver::Language.{language_name}
|
|
115
|
+
#
|
|
116
|
+
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
117
|
+
# @return [Boolean] true if registration succeeded
|
|
118
|
+
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
119
|
+
def register!(raise_on_missing: false)
|
|
120
|
+
unless available?
|
|
121
|
+
if raise_on_missing
|
|
122
|
+
raise NotAvailable, not_found_message
|
|
123
|
+
end
|
|
124
|
+
return false
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
TreeHaver.register_language(
|
|
128
|
+
@language_name,
|
|
129
|
+
grammar_module: @grammar_module,
|
|
130
|
+
gem_name: @gem_name,
|
|
131
|
+
)
|
|
132
|
+
true
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Get debug information about the search
|
|
136
|
+
#
|
|
137
|
+
# @return [Hash] diagnostic information
|
|
138
|
+
def search_info
|
|
139
|
+
{
|
|
140
|
+
language: @language_name,
|
|
141
|
+
gem_name: @gem_name,
|
|
142
|
+
grammar_const: @grammar_const,
|
|
143
|
+
require_path: @require_path,
|
|
144
|
+
available: available?,
|
|
145
|
+
grammar_module: @grammar_module&.name,
|
|
146
|
+
}
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Get a human-readable error message when grammar is not found
|
|
150
|
+
#
|
|
151
|
+
# @return [String] error message with installation hints
|
|
152
|
+
def not_found_message
|
|
153
|
+
"Citrus grammar for #{@language_name} not found. " \
|
|
154
|
+
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
# Resolve a constant path like "TomlRB::Document"
|
|
160
|
+
#
|
|
161
|
+
# @param const_path [String] constant path
|
|
162
|
+
# @return [Object] the constant
|
|
163
|
+
# @raise [NameError] if constant not found
|
|
164
|
+
def resolve_constant(const_path)
|
|
165
|
+
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
166
|
+
mod.const_get(const_name)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
@@ -137,19 +137,55 @@ module TreeHaver
|
|
|
137
137
|
# @note Paths from ENV are validated using {PathValidator.safe_library_path?}
|
|
138
138
|
# to prevent path traversal and other attacks. Invalid ENV paths are ignored.
|
|
139
139
|
#
|
|
140
|
+
# @note Setting the ENV variable to an empty string explicitly disables
|
|
141
|
+
# this grammar. This allows fallback to alternative backends (e.g., Citrus).
|
|
142
|
+
#
|
|
140
143
|
# @return [String, nil] the path to the library, or nil if not found
|
|
141
144
|
# @see #find_library_path_safe For stricter validation (trusted directories only)
|
|
142
145
|
def find_library_path
|
|
143
146
|
# Check environment variable first (highest priority)
|
|
144
|
-
|
|
145
|
-
if
|
|
146
|
-
|
|
147
|
+
# Use key? to distinguish between "not set" and "set to empty"
|
|
148
|
+
if ENV.key?(env_var_name)
|
|
149
|
+
env_path = ENV[env_var_name]
|
|
150
|
+
|
|
151
|
+
# Empty string means "explicitly skip this grammar"
|
|
152
|
+
# This allows users to disable tree-sitter for specific languages
|
|
153
|
+
# and fall back to alternative backends like Citrus
|
|
154
|
+
if env_path.empty?
|
|
155
|
+
@env_rejection_reason = "explicitly disabled (set to empty string)"
|
|
156
|
+
return
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Store why env path was rejected for better error messages
|
|
160
|
+
@env_rejection_reason = validate_env_path(env_path)
|
|
161
|
+
return env_path if @env_rejection_reason.nil?
|
|
147
162
|
end
|
|
148
163
|
|
|
149
164
|
# Search all paths (these are constructed from trusted base dirs)
|
|
150
165
|
search_paths.find { |path| File.exist?(path) }
|
|
151
166
|
end
|
|
152
167
|
|
|
168
|
+
# Validate an environment variable path and return reason if invalid
|
|
169
|
+
# @return [String, nil] rejection reason or nil if valid
|
|
170
|
+
def validate_env_path(path)
|
|
171
|
+
# Check for leading/trailing whitespace
|
|
172
|
+
if path != path.strip
|
|
173
|
+
return "contains leading or trailing whitespace (use #{path.strip.inspect})"
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Check if path is safe
|
|
177
|
+
unless PathValidator.safe_library_path?(path)
|
|
178
|
+
return "failed security validation (may contain path traversal or suspicious characters)"
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Check if file exists
|
|
182
|
+
unless File.exist?(path)
|
|
183
|
+
return "file does not exist"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
nil # Valid!
|
|
187
|
+
end
|
|
188
|
+
|
|
153
189
|
# Find the grammar library path with strict security validation
|
|
154
190
|
#
|
|
155
191
|
# This method only returns paths that are in trusted system directories.
|
|
@@ -165,11 +201,67 @@ module TreeHaver
|
|
|
165
201
|
end
|
|
166
202
|
end
|
|
167
203
|
|
|
168
|
-
# Check if the grammar library is available
|
|
204
|
+
# Check if the grammar library is available AND usable
|
|
169
205
|
#
|
|
170
|
-
#
|
|
206
|
+
# This checks:
|
|
207
|
+
# 1. The grammar library file exists
|
|
208
|
+
# 2. The tree-sitter runtime is functional (can create a parser)
|
|
209
|
+
#
|
|
210
|
+
# This prevents registering grammars when tree-sitter isn't actually usable,
|
|
211
|
+
# allowing clean fallback to alternative backends like Citrus.
|
|
212
|
+
#
|
|
213
|
+
# @return [Boolean] true if the library can be found AND tree-sitter runtime works
|
|
171
214
|
def available?
|
|
172
|
-
|
|
215
|
+
path = find_library_path
|
|
216
|
+
return false if path.nil?
|
|
217
|
+
|
|
218
|
+
# Check if tree-sitter runtime is actually functional
|
|
219
|
+
# This is cached at the class level since it's the same for all grammars
|
|
220
|
+
self.class.tree_sitter_runtime_usable?
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Backends that use tree-sitter (require native runtime libraries)
|
|
224
|
+
# Other backends (Citrus, Prism, Psych, etc.) don't use tree-sitter
|
|
225
|
+
TREE_SITTER_BACKENDS = [
|
|
226
|
+
TreeHaver::Backends::MRI,
|
|
227
|
+
TreeHaver::Backends::FFI,
|
|
228
|
+
TreeHaver::Backends::Rust,
|
|
229
|
+
TreeHaver::Backends::Java,
|
|
230
|
+
].freeze
|
|
231
|
+
|
|
232
|
+
class << self
|
|
233
|
+
# Check if the tree-sitter runtime is usable
|
|
234
|
+
#
|
|
235
|
+
# Tests whether we can actually create a tree-sitter parser.
|
|
236
|
+
# Result is cached since this is expensive and won't change during runtime.
|
|
237
|
+
#
|
|
238
|
+
# @return [Boolean] true if tree-sitter runtime is functional
|
|
239
|
+
def tree_sitter_runtime_usable?
|
|
240
|
+
return @tree_sitter_runtime_usable if defined?(@tree_sitter_runtime_usable)
|
|
241
|
+
|
|
242
|
+
@tree_sitter_runtime_usable = begin
|
|
243
|
+
# Try to create a parser using the current backend
|
|
244
|
+
mod = TreeHaver.resolve_backend_module(nil)
|
|
245
|
+
|
|
246
|
+
# Only tree-sitter backends are relevant here
|
|
247
|
+
# Non-tree-sitter backends (Citrus, Prism, Psych, etc.) don't use grammar files
|
|
248
|
+
return false if mod.nil?
|
|
249
|
+
return false unless TREE_SITTER_BACKENDS.include?(mod)
|
|
250
|
+
|
|
251
|
+
# Try to instantiate a parser - this will fail if runtime isn't available
|
|
252
|
+
mod::Parser.new
|
|
253
|
+
true
|
|
254
|
+
rescue NoMethodError, FFI::NotFoundError, LoadError, NotAvailable => _e
|
|
255
|
+
false
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Reset the cached tree-sitter runtime check (for testing)
|
|
260
|
+
#
|
|
261
|
+
# @api private
|
|
262
|
+
def reset_runtime_check!
|
|
263
|
+
remove_instance_variable(:@tree_sitter_runtime_usable) if defined?(@tree_sitter_runtime_usable)
|
|
264
|
+
end
|
|
173
265
|
end
|
|
174
266
|
|
|
175
267
|
# Check if the grammar library is available in a trusted directory
|
|
@@ -205,15 +297,17 @@ module TreeHaver
|
|
|
205
297
|
#
|
|
206
298
|
# @return [Hash] diagnostic information
|
|
207
299
|
def search_info
|
|
300
|
+
found = find_library_path # This populates @env_rejection_reason
|
|
208
301
|
{
|
|
209
302
|
language: @language_name,
|
|
210
303
|
env_var: env_var_name,
|
|
211
304
|
env_value: ENV[env_var_name],
|
|
305
|
+
env_rejection_reason: @env_rejection_reason,
|
|
212
306
|
symbol: symbol_name,
|
|
213
307
|
library_filename: library_filename,
|
|
214
308
|
search_paths: search_paths,
|
|
215
|
-
found_path:
|
|
216
|
-
available:
|
|
309
|
+
found_path: found,
|
|
310
|
+
available: !found.nil?,
|
|
217
311
|
}
|
|
218
312
|
end
|
|
219
313
|
|
|
@@ -221,9 +315,19 @@ module TreeHaver
|
|
|
221
315
|
#
|
|
222
316
|
# @return [String] error message with installation hints
|
|
223
317
|
def not_found_message
|
|
224
|
-
"tree-sitter #{@language_name} grammar not found.
|
|
225
|
-
|
|
226
|
-
|
|
318
|
+
msg = "tree-sitter #{@language_name} grammar not found."
|
|
319
|
+
|
|
320
|
+
# Check if env var is set but rejected
|
|
321
|
+
env_value = ENV[env_var_name]
|
|
322
|
+
msg += if env_value && @env_rejection_reason
|
|
323
|
+
" #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
|
|
324
|
+
elsif env_value
|
|
325
|
+
" #{env_var_name} is set but was not used (file may have been removed)."
|
|
326
|
+
else
|
|
327
|
+
" Searched: #{search_paths.join(", ")}."
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
msg + " Install tree-sitter-#{@language_name} or set #{env_var_name} to a valid path."
|
|
227
331
|
end
|
|
228
332
|
|
|
229
333
|
private
|
|
@@ -4,86 +4,93 @@ module TreeHaver
|
|
|
4
4
|
# Thread-safe language registrations and cache for loaded Language handles
|
|
5
5
|
#
|
|
6
6
|
# The LanguageRegistry provides two main functions:
|
|
7
|
-
# 1. **Registrations**: Store mappings from language names to
|
|
7
|
+
# 1. **Registrations**: Store mappings from language names to backend-specific configurations
|
|
8
8
|
# 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
|
|
9
9
|
#
|
|
10
|
-
#
|
|
10
|
+
# The registry supports multiple backends for the same language, allowing runtime
|
|
11
|
+
# switching, benchmarking, and fallback scenarios.
|
|
11
12
|
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
13
|
+
# Registration structure:
|
|
14
|
+
# @registrations = {
|
|
15
|
+
# toml: {
|
|
16
|
+
# tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
|
|
17
|
+
# citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
18
|
+
# }
|
|
19
|
+
# }
|
|
20
|
+
#
|
|
21
|
+
# @example Register tree-sitter grammar
|
|
22
|
+
# TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
|
|
23
|
+
# path: "/path/to/lib.so", symbol: "tree_sitter_toml")
|
|
24
|
+
#
|
|
25
|
+
# @example Register Citrus grammar
|
|
26
|
+
# TreeHaver::LanguageRegistry.register(:toml, :citrus,
|
|
27
|
+
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
18
28
|
#
|
|
19
29
|
# @api private
|
|
20
30
|
module LanguageRegistry
|
|
21
31
|
@mutex = Mutex.new
|
|
22
|
-
@cache = {}
|
|
23
|
-
@registrations = {}
|
|
32
|
+
@cache = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
33
|
+
@registrations = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
24
34
|
|
|
25
35
|
module_function
|
|
26
36
|
|
|
27
|
-
# Register a language
|
|
37
|
+
# Register a language for a specific backend
|
|
28
38
|
#
|
|
29
|
-
# Stores
|
|
30
|
-
#
|
|
31
|
-
# accessed via dynamic helpers on {TreeHaver::Language}.
|
|
39
|
+
# Stores backend-specific configuration for a language. Multiple backends
|
|
40
|
+
# can be registered for the same language without conflict.
|
|
32
41
|
#
|
|
33
42
|
# @param name [Symbol, String] language identifier (e.g., :toml, :json)
|
|
34
|
-
# @param
|
|
35
|
-
# @param
|
|
43
|
+
# @param backend_type [Symbol] backend type (:tree_sitter, :citrus, :mri, :rust, :ffi, :java)
|
|
44
|
+
# @param config [Hash] backend-specific configuration
|
|
45
|
+
# @option config [String] :path tree-sitter library path (for tree-sitter backends)
|
|
46
|
+
# @option config [String] :symbol exported symbol name (for tree-sitter backends)
|
|
47
|
+
# @option config [Module] :grammar_module Citrus grammar module (for Citrus backend)
|
|
48
|
+
# @option config [String] :gem_name gem name for error messages (for Citrus backend)
|
|
36
49
|
# @return [void]
|
|
37
|
-
# @example
|
|
38
|
-
# LanguageRegistry.register(:toml,
|
|
39
|
-
|
|
50
|
+
# @example Register tree-sitter grammar
|
|
51
|
+
# LanguageRegistry.register(:toml, :tree_sitter,
|
|
52
|
+
# path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
53
|
+
# @example Register Citrus grammar
|
|
54
|
+
# LanguageRegistry.register(:toml, :citrus,
|
|
55
|
+
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
56
|
+
def register(name, backend_type, **config)
|
|
40
57
|
key = name.to_sym
|
|
41
|
-
|
|
42
|
-
@registrations[key] = {path: path, symbol: symbol}
|
|
43
|
-
end
|
|
44
|
-
nil
|
|
45
|
-
end
|
|
58
|
+
backend_key = backend_type.to_sym
|
|
46
59
|
|
|
47
|
-
# Unregister a previously registered language helper
|
|
48
|
-
#
|
|
49
|
-
# Removes the registration entry but does not affect cached Language objects.
|
|
50
|
-
#
|
|
51
|
-
# @param name [Symbol, String] language identifier to unregister
|
|
52
|
-
# @return [void]
|
|
53
|
-
# @example
|
|
54
|
-
# LanguageRegistry.unregister(:toml)
|
|
55
|
-
def unregister(name)
|
|
56
|
-
key = name.to_sym
|
|
57
60
|
@mutex.synchronize do
|
|
58
|
-
@registrations
|
|
61
|
+
@registrations[key] ||= {}
|
|
62
|
+
@registrations[key][backend_key] = config.compact
|
|
59
63
|
end
|
|
60
64
|
nil
|
|
61
65
|
end
|
|
62
66
|
|
|
63
|
-
# Fetch a
|
|
67
|
+
# Fetch registration entries for a language
|
|
64
68
|
#
|
|
65
|
-
# Returns
|
|
69
|
+
# Returns all backend-specific configurations for a language.
|
|
66
70
|
#
|
|
67
71
|
# @param name [Symbol, String] language identifier
|
|
68
|
-
# @
|
|
69
|
-
# @
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
# @param backend_type [Symbol, nil] optional backend type to filter by
|
|
73
|
+
# @return [Hash{Symbol => Hash}, Hash, nil] all backends or specific backend config
|
|
74
|
+
# @example Get all backends
|
|
75
|
+
# entries = LanguageRegistry.registered(:toml)
|
|
76
|
+
# # => {
|
|
77
|
+
# # tree_sitter: { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" },
|
|
78
|
+
# # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
79
|
+
# # }
|
|
80
|
+
# @example Get specific backend
|
|
81
|
+
# entry = LanguageRegistry.registered(:toml, :citrus)
|
|
82
|
+
# # => { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
83
|
+
def registered(name, backend_type = nil)
|
|
84
|
+
@mutex.synchronize do
|
|
85
|
+
lang_config = @registrations[name.to_sym]
|
|
86
|
+
return unless lang_config
|
|
75
87
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
# @example
|
|
83
|
-
# LanguageRegistry.clear_registrations!
|
|
84
|
-
def clear_registrations!
|
|
85
|
-
@mutex.synchronize { @registrations.clear }
|
|
86
|
-
nil
|
|
88
|
+
if backend_type
|
|
89
|
+
lang_config[backend_type.to_sym]
|
|
90
|
+
else
|
|
91
|
+
lang_config
|
|
92
|
+
end
|
|
93
|
+
end
|
|
87
94
|
end
|
|
88
95
|
|
|
89
96
|
# Fetch a cached language by key or compute and store it
|
|
@@ -119,21 +126,5 @@ module TreeHaver
|
|
|
119
126
|
@mutex.synchronize { @cache.clear }
|
|
120
127
|
nil
|
|
121
128
|
end
|
|
122
|
-
|
|
123
|
-
# Clear everything (registrations and cache)
|
|
124
|
-
#
|
|
125
|
-
# Removes all registered languages and all cached Language objects.
|
|
126
|
-
# Useful for complete teardown in tests.
|
|
127
|
-
#
|
|
128
|
-
# @return [void]
|
|
129
|
-
# @example
|
|
130
|
-
# LanguageRegistry.clear_all!
|
|
131
|
-
def clear_all!
|
|
132
|
-
@mutex.synchronize do
|
|
133
|
-
@registrations.clear
|
|
134
|
-
@cache.clear
|
|
135
|
-
end
|
|
136
|
-
nil
|
|
137
|
-
end
|
|
138
129
|
end
|
|
139
130
|
end
|