tree_haver 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +236 -3
- data/CONTRIBUTING.md +100 -0
- data/README.md +470 -85
- data/lib/tree_haver/backends/citrus.rb +423 -0
- data/lib/tree_haver/backends/ffi.rb +405 -150
- data/lib/tree_haver/backends/java.rb +63 -10
- data/lib/tree_haver/backends/mri.rb +154 -27
- data/lib/tree_haver/backends/rust.rb +58 -27
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +526 -0
- data/lib/tree_haver/path_validator.rb +47 -27
- data/lib/tree_haver/tree.rb +259 -0
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +741 -285
- data/sig/tree_haver/backends.rbs +68 -1
- data/sig/tree_haver/path_validator.rbs +1 -0
- data/sig/tree_haver.rbs +95 -9
- data.tar.gz.sig +0 -0
- metadata +12 -8
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Utility for finding and registering Citrus grammar gems.
|
|
5
|
+
#
|
|
6
|
+
# CitrusGrammarFinder provides language-agnostic discovery of Citrus grammar
|
|
7
|
+
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
+
# grammar and register it with tree_haver.
|
|
9
|
+
#
|
|
10
|
+
# Unlike tree-sitter grammars (which are .so files), Citrus grammars are
|
|
11
|
+
# Ruby modules that respond to .parse(source). This class handles the
|
|
12
|
+
# discovery and registration of these grammars.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage with toml-rb
|
|
15
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
16
|
+
# language: :toml,
|
|
17
|
+
# gem_name: "toml-rb",
|
|
18
|
+
# grammar_const: "TomlRB::Document"
|
|
19
|
+
# )
|
|
20
|
+
# finder.register! if finder.available?
|
|
21
|
+
#
|
|
22
|
+
# @example With custom require path
|
|
23
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
24
|
+
# language: :json,
|
|
25
|
+
# gem_name: "json-rb",
|
|
26
|
+
# grammar_const: "JsonRB::Grammar",
|
|
27
|
+
# require_path: "json/rb"
|
|
28
|
+
# )
|
|
29
|
+
#
|
|
30
|
+
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
+
class CitrusGrammarFinder
|
|
32
|
+
# @return [Symbol] the language identifier
|
|
33
|
+
attr_reader :language_name
|
|
34
|
+
|
|
35
|
+
# @return [String] the gem name to require
|
|
36
|
+
attr_reader :gem_name
|
|
37
|
+
|
|
38
|
+
# @return [String] the constant path to the grammar (e.g., "TomlRB::Document")
|
|
39
|
+
attr_reader :grammar_const
|
|
40
|
+
|
|
41
|
+
# @return [String, nil] custom require path (defaults to gem_name with dashes to slashes)
|
|
42
|
+
attr_reader :require_path
|
|
43
|
+
|
|
44
|
+
# Initialize a Citrus grammar finder
|
|
45
|
+
#
|
|
46
|
+
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
47
|
+
# @param gem_name [String] the gem name (e.g., "toml-rb")
|
|
48
|
+
# @param grammar_const [String] constant path to grammar (e.g., "TomlRB::Document")
|
|
49
|
+
# @param require_path [String, nil] custom require path (defaults to gem_name with dashes→slashes)
|
|
50
|
+
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
51
|
+
@language_name = language.to_sym
|
|
52
|
+
@gem_name = gem_name
|
|
53
|
+
@grammar_const = grammar_const
|
|
54
|
+
@require_path = require_path || gem_name.tr("-", "/")
|
|
55
|
+
@load_attempted = false
|
|
56
|
+
@available = false
|
|
57
|
+
@grammar_module = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if the Citrus grammar is available
|
|
61
|
+
#
|
|
62
|
+
# Attempts to require the gem and resolve the grammar constant.
|
|
63
|
+
# Result is cached after first call.
|
|
64
|
+
#
|
|
65
|
+
# @return [Boolean] true if grammar is available
|
|
66
|
+
def available?
|
|
67
|
+
return @available if @load_attempted
|
|
68
|
+
|
|
69
|
+
@load_attempted = true
|
|
70
|
+
begin
|
|
71
|
+
# Try to require the gem
|
|
72
|
+
require @require_path
|
|
73
|
+
|
|
74
|
+
# Try to resolve the constant
|
|
75
|
+
@grammar_module = resolve_constant(@grammar_const)
|
|
76
|
+
|
|
77
|
+
# Verify it responds to parse
|
|
78
|
+
unless @grammar_module.respond_to?(:parse)
|
|
79
|
+
warn("#{@grammar_const} doesn't respond to :parse")
|
|
80
|
+
@available = false
|
|
81
|
+
return false
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@available = true
|
|
85
|
+
rescue LoadError => e
|
|
86
|
+
# Always show LoadError for debugging
|
|
87
|
+
warn("CitrusGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
88
|
+
@available = false
|
|
89
|
+
rescue NameError => e
|
|
90
|
+
# Always show NameError for debugging
|
|
91
|
+
warn("CitrusGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
92
|
+
@available = false
|
|
93
|
+
rescue => e
|
|
94
|
+
# Catch any other errors
|
|
95
|
+
warn("CitrusGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
96
|
+
warn(e.backtrace.first(3).join("\n")) if ENV["TREE_HAVER_DEBUG"]
|
|
97
|
+
@available = false
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
@available
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Get the resolved grammar module
|
|
104
|
+
#
|
|
105
|
+
# @return [Module, nil] the grammar module if available
|
|
106
|
+
def grammar_module
|
|
107
|
+
available? # Ensure we've tried to load
|
|
108
|
+
@grammar_module
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Register this Citrus grammar with TreeHaver
|
|
112
|
+
#
|
|
113
|
+
# After registration, the language can be used via:
|
|
114
|
+
# TreeHaver::Language.{language_name}
|
|
115
|
+
#
|
|
116
|
+
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
117
|
+
# @return [Boolean] true if registration succeeded
|
|
118
|
+
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
119
|
+
def register!(raise_on_missing: false)
|
|
120
|
+
unless available?
|
|
121
|
+
if raise_on_missing
|
|
122
|
+
raise NotAvailable, not_found_message
|
|
123
|
+
end
|
|
124
|
+
return false
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
TreeHaver.register_language(
|
|
128
|
+
@language_name,
|
|
129
|
+
grammar_module: @grammar_module,
|
|
130
|
+
gem_name: @gem_name,
|
|
131
|
+
)
|
|
132
|
+
true
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Get debug information about the search
|
|
136
|
+
#
|
|
137
|
+
# @return [Hash] diagnostic information
|
|
138
|
+
def search_info
|
|
139
|
+
{
|
|
140
|
+
language: @language_name,
|
|
141
|
+
gem_name: @gem_name,
|
|
142
|
+
grammar_const: @grammar_const,
|
|
143
|
+
require_path: @require_path,
|
|
144
|
+
available: available?,
|
|
145
|
+
grammar_module: @grammar_module&.name,
|
|
146
|
+
}
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Get a human-readable error message when grammar is not found
|
|
150
|
+
#
|
|
151
|
+
# @return [String] error message with installation hints
|
|
152
|
+
def not_found_message
|
|
153
|
+
"Citrus grammar for #{@language_name} not found. " \
|
|
154
|
+
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
# Resolve a constant path like "TomlRB::Document"
|
|
160
|
+
#
|
|
161
|
+
# @param const_path [String] constant path
|
|
162
|
+
# @return [Object] the constant
|
|
163
|
+
# @raise [NameError] if constant not found
|
|
164
|
+
def resolve_constant(const_path)
|
|
165
|
+
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
166
|
+
mod.const_get(const_name)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
@@ -142,14 +142,37 @@ module TreeHaver
|
|
|
142
142
|
def find_library_path
|
|
143
143
|
# Check environment variable first (highest priority)
|
|
144
144
|
env_path = ENV[env_var_name]
|
|
145
|
-
if env_path
|
|
146
|
-
|
|
145
|
+
if env_path
|
|
146
|
+
# Store why env path was rejected for better error messages
|
|
147
|
+
@env_rejection_reason = validate_env_path(env_path)
|
|
148
|
+
return env_path if @env_rejection_reason.nil?
|
|
147
149
|
end
|
|
148
150
|
|
|
149
151
|
# Search all paths (these are constructed from trusted base dirs)
|
|
150
152
|
search_paths.find { |path| File.exist?(path) }
|
|
151
153
|
end
|
|
152
154
|
|
|
155
|
+
# Validate an environment variable path and return reason if invalid
|
|
156
|
+
# @return [String, nil] rejection reason or nil if valid
|
|
157
|
+
def validate_env_path(path)
|
|
158
|
+
# Check for leading/trailing whitespace
|
|
159
|
+
if path != path.strip
|
|
160
|
+
return "contains leading or trailing whitespace (use #{path.strip.inspect})"
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Check if path is safe
|
|
164
|
+
unless PathValidator.safe_library_path?(path)
|
|
165
|
+
return "failed security validation (may contain path traversal or suspicious characters)"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Check if file exists
|
|
169
|
+
unless File.exist?(path)
|
|
170
|
+
return "file does not exist"
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
nil # Valid!
|
|
174
|
+
end
|
|
175
|
+
|
|
153
176
|
# Find the grammar library path with strict security validation
|
|
154
177
|
#
|
|
155
178
|
# This method only returns paths that are in trusted system directories.
|
|
@@ -205,15 +228,17 @@ module TreeHaver
|
|
|
205
228
|
#
|
|
206
229
|
# @return [Hash] diagnostic information
|
|
207
230
|
def search_info
|
|
231
|
+
found = find_library_path # This populates @env_rejection_reason
|
|
208
232
|
{
|
|
209
233
|
language: @language_name,
|
|
210
234
|
env_var: env_var_name,
|
|
211
235
|
env_value: ENV[env_var_name],
|
|
236
|
+
env_rejection_reason: @env_rejection_reason,
|
|
212
237
|
symbol: symbol_name,
|
|
213
238
|
library_filename: library_filename,
|
|
214
239
|
search_paths: search_paths,
|
|
215
|
-
found_path:
|
|
216
|
-
available:
|
|
240
|
+
found_path: found,
|
|
241
|
+
available: !found.nil?,
|
|
217
242
|
}
|
|
218
243
|
end
|
|
219
244
|
|
|
@@ -221,9 +246,19 @@ module TreeHaver
|
|
|
221
246
|
#
|
|
222
247
|
# @return [String] error message with installation hints
|
|
223
248
|
def not_found_message
|
|
224
|
-
"
|
|
225
|
-
|
|
226
|
-
|
|
249
|
+
msg = "tree-sitter #{@language_name} grammar not found."
|
|
250
|
+
|
|
251
|
+
# Check if env var is set but rejected
|
|
252
|
+
env_value = ENV[env_var_name]
|
|
253
|
+
msg += if env_value && @env_rejection_reason
|
|
254
|
+
" #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
|
|
255
|
+
elsif env_value
|
|
256
|
+
" #{env_var_name} is set but was not used (file may have been removed)."
|
|
257
|
+
else
|
|
258
|
+
" Searched: #{search_paths.join(", ")}."
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
msg + " Install tree-sitter-#{@language_name} or set #{env_var_name} to a valid path."
|
|
227
262
|
end
|
|
228
263
|
|
|
229
264
|
private
|
|
@@ -4,86 +4,93 @@ module TreeHaver
|
|
|
4
4
|
# Thread-safe language registrations and cache for loaded Language handles
|
|
5
5
|
#
|
|
6
6
|
# The LanguageRegistry provides two main functions:
|
|
7
|
-
# 1. **Registrations**: Store mappings from language names to
|
|
7
|
+
# 1. **Registrations**: Store mappings from language names to backend-specific configurations
|
|
8
8
|
# 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
|
|
9
9
|
#
|
|
10
|
-
#
|
|
10
|
+
# The registry supports multiple backends for the same language, allowing runtime
|
|
11
|
+
# switching, benchmarking, and fallback scenarios.
|
|
11
12
|
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
13
|
+
# Registration structure:
|
|
14
|
+
# @registrations = {
|
|
15
|
+
# toml: {
|
|
16
|
+
# tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
|
|
17
|
+
# citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
18
|
+
# }
|
|
19
|
+
# }
|
|
20
|
+
#
|
|
21
|
+
# @example Register tree-sitter grammar
|
|
22
|
+
# TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
|
|
23
|
+
# path: "/path/to/lib.so", symbol: "tree_sitter_toml")
|
|
24
|
+
#
|
|
25
|
+
# @example Register Citrus grammar
|
|
26
|
+
# TreeHaver::LanguageRegistry.register(:toml, :citrus,
|
|
27
|
+
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
18
28
|
#
|
|
19
29
|
# @api private
|
|
20
30
|
module LanguageRegistry
|
|
21
31
|
@mutex = Mutex.new
|
|
22
|
-
@cache = {}
|
|
23
|
-
@registrations = {}
|
|
32
|
+
@cache = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
33
|
+
@registrations = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
24
34
|
|
|
25
35
|
module_function
|
|
26
36
|
|
|
27
|
-
# Register a language
|
|
37
|
+
# Register a language for a specific backend
|
|
28
38
|
#
|
|
29
|
-
# Stores
|
|
30
|
-
#
|
|
31
|
-
# accessed via dynamic helpers on {TreeHaver::Language}.
|
|
39
|
+
# Stores backend-specific configuration for a language. Multiple backends
|
|
40
|
+
# can be registered for the same language without conflict.
|
|
32
41
|
#
|
|
33
42
|
# @param name [Symbol, String] language identifier (e.g., :toml, :json)
|
|
34
|
-
# @param
|
|
35
|
-
# @param
|
|
43
|
+
# @param backend_type [Symbol] backend type (:tree_sitter, :citrus, :mri, :rust, :ffi, :java)
|
|
44
|
+
# @param config [Hash] backend-specific configuration
|
|
45
|
+
# @option config [String] :path tree-sitter library path (for tree-sitter backends)
|
|
46
|
+
# @option config [String] :symbol exported symbol name (for tree-sitter backends)
|
|
47
|
+
# @option config [Module] :grammar_module Citrus grammar module (for Citrus backend)
|
|
48
|
+
# @option config [String] :gem_name gem name for error messages (for Citrus backend)
|
|
36
49
|
# @return [void]
|
|
37
|
-
# @example
|
|
38
|
-
# LanguageRegistry.register(:toml,
|
|
39
|
-
|
|
50
|
+
# @example Register tree-sitter grammar
|
|
51
|
+
# LanguageRegistry.register(:toml, :tree_sitter,
|
|
52
|
+
# path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
53
|
+
# @example Register Citrus grammar
|
|
54
|
+
# LanguageRegistry.register(:toml, :citrus,
|
|
55
|
+
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
56
|
+
def register(name, backend_type, **config)
|
|
40
57
|
key = name.to_sym
|
|
41
|
-
|
|
42
|
-
@registrations[key] = {path: path, symbol: symbol}
|
|
43
|
-
end
|
|
44
|
-
nil
|
|
45
|
-
end
|
|
58
|
+
backend_key = backend_type.to_sym
|
|
46
59
|
|
|
47
|
-
# Unregister a previously registered language helper
|
|
48
|
-
#
|
|
49
|
-
# Removes the registration entry but does not affect cached Language objects.
|
|
50
|
-
#
|
|
51
|
-
# @param name [Symbol, String] language identifier to unregister
|
|
52
|
-
# @return [void]
|
|
53
|
-
# @example
|
|
54
|
-
# LanguageRegistry.unregister(:toml)
|
|
55
|
-
def unregister(name)
|
|
56
|
-
key = name.to_sym
|
|
57
60
|
@mutex.synchronize do
|
|
58
|
-
@registrations
|
|
61
|
+
@registrations[key] ||= {}
|
|
62
|
+
@registrations[key][backend_key] = config.compact
|
|
59
63
|
end
|
|
60
64
|
nil
|
|
61
65
|
end
|
|
62
66
|
|
|
63
|
-
# Fetch a
|
|
67
|
+
# Fetch registration entries for a language
|
|
64
68
|
#
|
|
65
|
-
# Returns
|
|
69
|
+
# Returns all backend-specific configurations for a language.
|
|
66
70
|
#
|
|
67
71
|
# @param name [Symbol, String] language identifier
|
|
68
|
-
# @
|
|
69
|
-
# @
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
# @param backend_type [Symbol, nil] optional backend type to filter by
|
|
73
|
+
# @return [Hash{Symbol => Hash}, Hash, nil] all backends or specific backend config
|
|
74
|
+
# @example Get all backends
|
|
75
|
+
# entries = LanguageRegistry.registered(:toml)
|
|
76
|
+
# # => {
|
|
77
|
+
# # tree_sitter: { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" },
|
|
78
|
+
# # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
79
|
+
# # }
|
|
80
|
+
# @example Get specific backend
|
|
81
|
+
# entry = LanguageRegistry.registered(:toml, :citrus)
|
|
82
|
+
# # => { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
83
|
+
def registered(name, backend_type = nil)
|
|
84
|
+
@mutex.synchronize do
|
|
85
|
+
lang_config = @registrations[name.to_sym]
|
|
86
|
+
return unless lang_config
|
|
75
87
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
# @example
|
|
83
|
-
# LanguageRegistry.clear_registrations!
|
|
84
|
-
def clear_registrations!
|
|
85
|
-
@mutex.synchronize { @registrations.clear }
|
|
86
|
-
nil
|
|
88
|
+
if backend_type
|
|
89
|
+
lang_config[backend_type.to_sym]
|
|
90
|
+
else
|
|
91
|
+
lang_config
|
|
92
|
+
end
|
|
93
|
+
end
|
|
87
94
|
end
|
|
88
95
|
|
|
89
96
|
# Fetch a cached language by key or compute and store it
|
|
@@ -119,21 +126,5 @@ module TreeHaver
|
|
|
119
126
|
@mutex.synchronize { @cache.clear }
|
|
120
127
|
nil
|
|
121
128
|
end
|
|
122
|
-
|
|
123
|
-
# Clear everything (registrations and cache)
|
|
124
|
-
#
|
|
125
|
-
# Removes all registered languages and all cached Language objects.
|
|
126
|
-
# Useful for complete teardown in tests.
|
|
127
|
-
#
|
|
128
|
-
# @return [void]
|
|
129
|
-
# @example
|
|
130
|
-
# LanguageRegistry.clear_all!
|
|
131
|
-
def clear_all!
|
|
132
|
-
@mutex.synchronize do
|
|
133
|
-
@registrations.clear
|
|
134
|
-
@cache.clear
|
|
135
|
-
end
|
|
136
|
-
nil
|
|
137
|
-
end
|
|
138
129
|
end
|
|
139
130
|
end
|