tree_haver 5.0.4 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/tree_haver/backend_context.rb +28 -0
- data/lib/tree_haver/backend_registry.rb +19 -432
- data/lib/tree_haver/contracts.rb +460 -0
- data/lib/tree_haver/kaitai_backend.rb +30 -0
- data/lib/tree_haver/language_pack.rb +190 -0
- data/lib/tree_haver/peg_backends.rb +76 -0
- data/lib/tree_haver/version.rb +1 -12
- data/lib/tree_haver.rb +7 -1316
- data.tar.gz.sig +0 -0
- metadata +34 -245
- metadata.gz.sig +0 -0
- data/CHANGELOG.md +0 -1366
- data/CITATION.cff +0 -20
- data/CODE_OF_CONDUCT.md +0 -134
- data/CONTRIBUTING.md +0 -359
- data/FUNDING.md +0 -74
- data/LICENSE.txt +0 -21
- data/README.md +0 -2347
- data/REEK +0 -0
- data/RUBOCOP.md +0 -71
- data/SECURITY.md +0 -21
- data/lib/tree_haver/backend_api.rb +0 -349
- data/lib/tree_haver/backends/citrus.rb +0 -487
- data/lib/tree_haver/backends/ffi.rb +0 -1009
- data/lib/tree_haver/backends/java.rb +0 -893
- data/lib/tree_haver/backends/mri.rb +0 -362
- data/lib/tree_haver/backends/parslet.rb +0 -560
- data/lib/tree_haver/backends/prism.rb +0 -471
- data/lib/tree_haver/backends/psych.rb +0 -375
- data/lib/tree_haver/backends/rust.rb +0 -239
- data/lib/tree_haver/base/language.rb +0 -98
- data/lib/tree_haver/base/node.rb +0 -322
- data/lib/tree_haver/base/parser.rb +0 -24
- data/lib/tree_haver/base/point.rb +0 -48
- data/lib/tree_haver/base/tree.rb +0 -128
- data/lib/tree_haver/base.rb +0 -12
- data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
- data/lib/tree_haver/compat.rb +0 -43
- data/lib/tree_haver/grammar_finder.rb +0 -374
- data/lib/tree_haver/language.rb +0 -295
- data/lib/tree_haver/language_registry.rb +0 -190
- data/lib/tree_haver/library_path_utils.rb +0 -80
- data/lib/tree_haver/node.rb +0 -579
- data/lib/tree_haver/parser.rb +0 -438
- data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
- data/lib/tree_haver/path_validator.rb +0 -353
- data/lib/tree_haver/point.rb +0 -27
- data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
- data/lib/tree_haver/rspec/testable_node.rb +0 -217
- data/lib/tree_haver/rspec.rb +0 -33
- data/lib/tree_haver/tree.rb +0 -258
- data/sig/tree_haver/backends.rbs +0 -352
- data/sig/tree_haver/grammar_finder.rbs +0 -29
- data/sig/tree_haver/path_validator.rbs +0 -32
- data/sig/tree_haver.rbs +0 -234
|
@@ -1,218 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module TreeHaver
|
|
4
|
-
# Utility for finding and registering Citrus grammar gems.
|
|
5
|
-
#
|
|
6
|
-
# CitrusGrammarFinder provides language-agnostic discovery of Citrus grammar
|
|
7
|
-
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
-
# grammar and register it with tree_haver.
|
|
9
|
-
#
|
|
10
|
-
# Unlike tree-sitter grammars (which are .so files), Citrus grammars are
|
|
11
|
-
# Ruby modules that respond to .parse(source). This class handles the
|
|
12
|
-
# discovery and registration of these grammars.
|
|
13
|
-
#
|
|
14
|
-
# @example Basic usage with toml-rb
|
|
15
|
-
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
16
|
-
# language: :toml,
|
|
17
|
-
# gem_name: "toml-rb",
|
|
18
|
-
# grammar_const: "TomlRB::Document"
|
|
19
|
-
# )
|
|
20
|
-
# finder.register! if finder.available?
|
|
21
|
-
#
|
|
22
|
-
# @example With custom require path
|
|
23
|
-
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
24
|
-
# language: :json,
|
|
25
|
-
# gem_name: "json-rb",
|
|
26
|
-
# grammar_const: "JsonRB::Grammar",
|
|
27
|
-
# require_path: "json/rb"
|
|
28
|
-
# )
|
|
29
|
-
#
|
|
30
|
-
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
-
class CitrusGrammarFinder
|
|
32
|
-
# @return [Symbol] the language identifier
|
|
33
|
-
attr_reader :language_name
|
|
34
|
-
|
|
35
|
-
# @return [String] the gem name to require
|
|
36
|
-
attr_reader :gem_name
|
|
37
|
-
|
|
38
|
-
# @return [String] the constant path to the grammar (e.g., "TomlRB::Document")
|
|
39
|
-
attr_reader :grammar_const
|
|
40
|
-
|
|
41
|
-
# @return [String, nil] custom require path (defaults to gem_name with dashes to slashes)
|
|
42
|
-
attr_reader :require_path
|
|
43
|
-
|
|
44
|
-
# Initialize a Citrus grammar finder
|
|
45
|
-
#
|
|
46
|
-
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
47
|
-
# @param gem_name [String] the gem name (e.g., "toml-rb")
|
|
48
|
-
# @param grammar_const [String] constant path to grammar (e.g., "TomlRB::Document")
|
|
49
|
-
# @param require_path [String, nil] custom require path (defaults to gem_name as-is)
|
|
50
|
-
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
51
|
-
@language_name = language.to_sym
|
|
52
|
-
@gem_name = gem_name
|
|
53
|
-
@grammar_const = grammar_const
|
|
54
|
-
@require_path = require_path || gem_name
|
|
55
|
-
@load_attempted = false
|
|
56
|
-
@available = false
|
|
57
|
-
@grammar_module = nil
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# Check if the Citrus grammar is available
|
|
61
|
-
#
|
|
62
|
-
# Attempts to require the gem and resolve the grammar constant.
|
|
63
|
-
# Result is cached after first call.
|
|
64
|
-
#
|
|
65
|
-
# @return [Boolean] true if grammar is available
|
|
66
|
-
def available?
|
|
67
|
-
return @available if @load_attempted
|
|
68
|
-
|
|
69
|
-
@load_attempted = true
|
|
70
|
-
debug = ENV["TREE_HAVER_DEBUG"]
|
|
71
|
-
|
|
72
|
-
# Guard against nil require_path (can happen if gem_name was nil)
|
|
73
|
-
if @require_path.nil? || @require_path.empty?
|
|
74
|
-
warn("CitrusGrammarFinder: require_path is nil or empty for #{@language_name}") if debug
|
|
75
|
-
@available = false
|
|
76
|
-
return false
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
begin
|
|
80
|
-
# Try to require the gem
|
|
81
|
-
require @require_path
|
|
82
|
-
|
|
83
|
-
# Try to resolve the constant
|
|
84
|
-
@grammar_module = resolve_constant(@grammar_const)
|
|
85
|
-
|
|
86
|
-
# Verify it responds to parse
|
|
87
|
-
unless @grammar_module.respond_to?(:parse)
|
|
88
|
-
# :nocov: defensive - requires a gem with malformed grammar module
|
|
89
|
-
# Show what methods ARE available to help diagnose the issue
|
|
90
|
-
if debug
|
|
91
|
-
available_methods = @grammar_module.methods(false).sort.first(20)
|
|
92
|
-
warn("CitrusGrammarFinder: #{@grammar_const} doesn't respond to :parse")
|
|
93
|
-
warn("CitrusGrammarFinder: #{@grammar_const}.class = #{@grammar_module.class}")
|
|
94
|
-
warn("CitrusGrammarFinder: #{@grammar_const} is a #{@grammar_module.is_a?(Module) ? "Module" : "non-Module"}")
|
|
95
|
-
warn("CitrusGrammarFinder: Available singleton methods (first 20): #{available_methods.inspect}")
|
|
96
|
-
if @grammar_module.respond_to?(:instance_methods)
|
|
97
|
-
instance_methods = @grammar_module.instance_methods(false).sort.first(20)
|
|
98
|
-
warn("CitrusGrammarFinder: Available instance methods (first 20): #{instance_methods.inspect}")
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
@available = false
|
|
102
|
-
return false
|
|
103
|
-
# :nocov:
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
@available = true
|
|
107
|
-
rescue LoadError => e
|
|
108
|
-
# :nocov: defensive - requires gem to not be installed
|
|
109
|
-
# Only show LoadError details when debugging
|
|
110
|
-
if debug
|
|
111
|
-
warn("CitrusGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
112
|
-
warn("CitrusGrammarFinder: LoadError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
113
|
-
end
|
|
114
|
-
@available = false
|
|
115
|
-
# :nocov:
|
|
116
|
-
rescue NameError => e
|
|
117
|
-
# :nocov: defensive - requires gem with missing constant
|
|
118
|
-
# Only show NameError details when debugging
|
|
119
|
-
if debug
|
|
120
|
-
warn("CitrusGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
121
|
-
warn("CitrusGrammarFinder: NameError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
122
|
-
end
|
|
123
|
-
@available = false
|
|
124
|
-
# :nocov:
|
|
125
|
-
rescue TypeError => e
|
|
126
|
-
# :nocov: defensive - TruffleRuby-specific edge case
|
|
127
|
-
# TruffleRuby's bundled_gems.rb can raise TypeError when File.path is called on nil
|
|
128
|
-
# This happens in bundled_gems.rb:124 warning? method when caller locations return nil
|
|
129
|
-
# Always warn about TypeError as it indicates a platform-specific issue
|
|
130
|
-
warn("CitrusGrammarFinder: TypeError during load of '#{@require_path}': #{e.class}: #{e.message}")
|
|
131
|
-
warn("CitrusGrammarFinder: This may be a TruffleRuby bundled_gems.rb issue")
|
|
132
|
-
if debug
|
|
133
|
-
warn("CitrusGrammarFinder: TypeError backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
134
|
-
end
|
|
135
|
-
@available = false
|
|
136
|
-
# :nocov:
|
|
137
|
-
rescue => e
|
|
138
|
-
# :nocov: defensive - catch-all for unexpected errors
|
|
139
|
-
# Always warn about unexpected errors
|
|
140
|
-
warn("CitrusGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
141
|
-
if debug
|
|
142
|
-
warn("CitrusGrammarFinder: backtrace:\n #{e.backtrace&.first(10)&.join("\n ")}")
|
|
143
|
-
end
|
|
144
|
-
@available = false
|
|
145
|
-
# :nocov:
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
@available
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
# Get the resolved grammar module
|
|
152
|
-
#
|
|
153
|
-
# @return [Module, nil] the grammar module if available
|
|
154
|
-
def grammar_module
|
|
155
|
-
available? # Ensure we've tried to load
|
|
156
|
-
@grammar_module
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
# Register this Citrus grammar with TreeHaver
|
|
160
|
-
#
|
|
161
|
-
# After registration, the language can be used via:
|
|
162
|
-
# TreeHaver::Language.{language_name}
|
|
163
|
-
#
|
|
164
|
-
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
165
|
-
# @return [Boolean] true if registration succeeded
|
|
166
|
-
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
167
|
-
def register!(raise_on_missing: false)
|
|
168
|
-
unless available?
|
|
169
|
-
if raise_on_missing
|
|
170
|
-
raise NotAvailable, not_found_message
|
|
171
|
-
end
|
|
172
|
-
return false
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
TreeHaver.register_language(
|
|
176
|
-
@language_name,
|
|
177
|
-
grammar_module: @grammar_module,
|
|
178
|
-
gem_name: @gem_name,
|
|
179
|
-
)
|
|
180
|
-
true
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
# Get debug information about the search
|
|
184
|
-
#
|
|
185
|
-
# @return [Hash] diagnostic information
|
|
186
|
-
def search_info
|
|
187
|
-
{
|
|
188
|
-
language: @language_name,
|
|
189
|
-
gem_name: @gem_name,
|
|
190
|
-
grammar_const: @grammar_const,
|
|
191
|
-
require_path: @require_path,
|
|
192
|
-
available: available?,
|
|
193
|
-
grammar_module: @grammar_module&.name,
|
|
194
|
-
}
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
# Get a human-readable error message when grammar is not found
|
|
198
|
-
#
|
|
199
|
-
# @return [String] error message with installation hints
|
|
200
|
-
def not_found_message
|
|
201
|
-
"Citrus grammar for #{@language_name} not found. " \
|
|
202
|
-
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
private
|
|
206
|
-
|
|
207
|
-
# Resolve a constant path like "TomlRB::Document"
|
|
208
|
-
#
|
|
209
|
-
# @param const_path [String] constant path
|
|
210
|
-
# @return [Object] the constant
|
|
211
|
-
# @raise [NameError] if constant not found
|
|
212
|
-
def resolve_constant(const_path)
|
|
213
|
-
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
214
|
-
mod.const_get(const_name)
|
|
215
|
-
end
|
|
216
|
-
end
|
|
217
|
-
end
|
|
218
|
-
end
|
data/lib/tree_haver/compat.rb
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# Compatibility shim for code that expects TreeSitter constants
|
|
4
|
-
#
|
|
5
|
-
# When required, this file creates a TreeSitter module that maps to TreeHaver
|
|
6
|
-
# equivalents, allowing code written for ruby_tree_sitter to work with TreeHaver
|
|
7
|
-
# without modification.
|
|
8
|
-
#
|
|
9
|
-
# This shim is safe and idempotent:
|
|
10
|
-
# - If TreeSitter is already defined (real ruby_tree_sitter is loaded), this does nothing
|
|
11
|
-
# - If TreeSitter is not defined, it creates aliases to TreeHaver
|
|
12
|
-
#
|
|
13
|
-
# @example Using the compatibility shim
|
|
14
|
-
# require "tree_haver/compat"
|
|
15
|
-
#
|
|
16
|
-
# # Now code expecting TreeSitter will work
|
|
17
|
-
# parser = TreeSitter::Parser.new # Actually creates TreeHaver::Parser
|
|
18
|
-
# tree = parser.parse(source)
|
|
19
|
-
#
|
|
20
|
-
# @note This is an opt-in feature. Only require this file if you need compatibility
|
|
21
|
-
# @see TreeHaver The main module this aliases to
|
|
22
|
-
|
|
23
|
-
unless defined?(TreeSitter)
|
|
24
|
-
# Compatibility module aliasing TreeHaver classes to TreeSitter
|
|
25
|
-
#
|
|
26
|
-
# @note Only defined if TreeSitter doesn't already exist
|
|
27
|
-
module TreeSitter; end
|
|
28
|
-
|
|
29
|
-
# @!parse
|
|
30
|
-
# module TreeSitter
|
|
31
|
-
# Error = TreeHaver::Error
|
|
32
|
-
# Parser = TreeHaver::Parser
|
|
33
|
-
# Tree = TreeHaver::Tree
|
|
34
|
-
# Node = TreeHaver::Node
|
|
35
|
-
# Language = TreeHaver::Language
|
|
36
|
-
# end
|
|
37
|
-
|
|
38
|
-
TreeSitter::Error = TreeHaver::Error
|
|
39
|
-
TreeSitter::Parser = TreeHaver::Parser
|
|
40
|
-
TreeSitter::Tree = TreeHaver::Tree
|
|
41
|
-
TreeSitter::Node = TreeHaver::Node
|
|
42
|
-
TreeSitter::Language = TreeHaver::Language
|
|
43
|
-
end
|
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "rbconfig"
|
|
4
|
-
|
|
5
|
-
module TreeHaver
|
|
6
|
-
# Generic utility for finding tree-sitter grammar shared libraries.
|
|
7
|
-
#
|
|
8
|
-
# GrammarFinder provides platform-aware discovery of tree-sitter grammar
|
|
9
|
-
# libraries. Given a language name, it searches common installation paths
|
|
10
|
-
# and supports environment variable overrides.
|
|
11
|
-
#
|
|
12
|
-
# This class is designed to be used by language-specific merge gems
|
|
13
|
-
# (toml-merge, json-merge, bash-merge, etc.) without requiring TreeHaver
|
|
14
|
-
# to have knowledge of each specific language.
|
|
15
|
-
#
|
|
16
|
-
# == Security Considerations
|
|
17
|
-
#
|
|
18
|
-
# Loading shared libraries is inherently dangerous as it executes arbitrary
|
|
19
|
-
# native code. GrammarFinder performs the following security validations:
|
|
20
|
-
#
|
|
21
|
-
# - Language names are validated to contain only safe characters
|
|
22
|
-
# - Paths from environment variables are validated before use
|
|
23
|
-
# - Path traversal attempts (../) are rejected
|
|
24
|
-
# - Only files with expected extensions (.so, .dylib, .dll) are accepted
|
|
25
|
-
#
|
|
26
|
-
# For additional security, use {#find_library_path_safe} which only returns
|
|
27
|
-
# paths from trusted system directories.
|
|
28
|
-
#
|
|
29
|
-
# @example Basic usage
|
|
30
|
-
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
31
|
-
# path = finder.find_library_path
|
|
32
|
-
# # => "/usr/lib/libtree-sitter-toml.so"
|
|
33
|
-
#
|
|
34
|
-
# @example Check availability
|
|
35
|
-
# finder = TreeHaver::GrammarFinder.new(:json)
|
|
36
|
-
# if finder.available?
|
|
37
|
-
# language = TreeHaver::Language.load(finder.language_name, finder.find_library_path)
|
|
38
|
-
# end
|
|
39
|
-
#
|
|
40
|
-
# @example Register with TreeHaver
|
|
41
|
-
# finder = TreeHaver::GrammarFinder.new(:bash)
|
|
42
|
-
# finder.register! if finder.available?
|
|
43
|
-
# # Now you can use: TreeHaver::Language.bash
|
|
44
|
-
#
|
|
45
|
-
# @example With custom search paths
|
|
46
|
-
# finder = TreeHaver::GrammarFinder.new(:toml, extra_paths: ["/opt/custom/lib"])
|
|
47
|
-
#
|
|
48
|
-
# @example Secure mode (trusted directories only)
|
|
49
|
-
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
50
|
-
# path = finder.find_library_path_safe # Only returns paths in trusted dirs
|
|
51
|
-
#
|
|
52
|
-
# @see PathValidator For details on security validations
|
|
53
|
-
class GrammarFinder
|
|
54
|
-
# Common base directories where tree-sitter libraries are installed
|
|
55
|
-
# Platform-specific extensions are appended automatically
|
|
56
|
-
BASE_SEARCH_DIRS = [
|
|
57
|
-
"/usr/lib",
|
|
58
|
-
"/usr/lib64",
|
|
59
|
-
"/usr/local/lib",
|
|
60
|
-
"/opt/homebrew/lib",
|
|
61
|
-
].freeze
|
|
62
|
-
|
|
63
|
-
# @return [Symbol] the language identifier
|
|
64
|
-
attr_reader :language_name
|
|
65
|
-
|
|
66
|
-
# @return [Array<String>] additional search paths provided at initialization
|
|
67
|
-
attr_reader :extra_paths
|
|
68
|
-
|
|
69
|
-
# Initialize a grammar finder for a specific language
|
|
70
|
-
#
|
|
71
|
-
# @param language_name [Symbol, String] the tree-sitter language name (e.g., :toml, :json, :bash)
|
|
72
|
-
# @param extra_paths [Array<String>] additional paths to search (searched first after ENV)
|
|
73
|
-
# @param validate [Boolean] if true, validates the language name (default: true)
|
|
74
|
-
# @raise [ArgumentError] if language_name is invalid and validate is true
|
|
75
|
-
def initialize(language_name, extra_paths: [], validate: true)
|
|
76
|
-
name_str = language_name.to_s.downcase
|
|
77
|
-
|
|
78
|
-
if validate && !PathValidator.safe_language_name?(name_str)
|
|
79
|
-
raise ArgumentError, "Invalid language name: #{language_name.inspect}. " \
|
|
80
|
-
"Language names must start with a letter and contain only lowercase letters, numbers, and underscores."
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
@language_name = name_str.to_sym
|
|
84
|
-
@extra_paths = Array(extra_paths)
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Get the environment variable name for this language
|
|
88
|
-
#
|
|
89
|
-
# @return [String] the ENV var name (e.g., "TREE_SITTER_TOML_PATH")
|
|
90
|
-
def env_var_name
|
|
91
|
-
"TREE_SITTER_#{@language_name.to_s.upcase}_PATH"
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# Get the expected symbol name exported by the grammar library
|
|
95
|
-
#
|
|
96
|
-
# @return [String] the symbol name (e.g., "tree_sitter_toml")
|
|
97
|
-
def symbol_name
|
|
98
|
-
"tree_sitter_#{@language_name}"
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
# Get the library filename for the current platform
|
|
102
|
-
#
|
|
103
|
-
# @return [String] the library filename (e.g., "libtree-sitter-toml.so")
|
|
104
|
-
def library_filename
|
|
105
|
-
ext = platform_extension
|
|
106
|
-
"libtree-sitter-#{@language_name}#{ext}"
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Generate the full list of search paths for this language
|
|
110
|
-
#
|
|
111
|
-
# Order: ENV override, extra_paths, then common system paths
|
|
112
|
-
#
|
|
113
|
-
# @return [Array<String>] all paths to search
|
|
114
|
-
def search_paths
|
|
115
|
-
paths = []
|
|
116
|
-
|
|
117
|
-
# Extra paths provided at initialization (searched after ENV)
|
|
118
|
-
@extra_paths.each do |dir|
|
|
119
|
-
paths << File.join(dir, library_filename)
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
# Common system paths with platform-appropriate extension
|
|
123
|
-
BASE_SEARCH_DIRS.each do |dir|
|
|
124
|
-
paths << File.join(dir, library_filename)
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
paths
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
# Find the grammar library path
|
|
131
|
-
#
|
|
132
|
-
# Searches in order:
|
|
133
|
-
# 1. Environment variable override (validated for safety)
|
|
134
|
-
# 2. Extra paths provided at initialization
|
|
135
|
-
# 3. Common system installation paths
|
|
136
|
-
#
|
|
137
|
-
# @note Paths from ENV are validated using {PathValidator.safe_library_path?}
|
|
138
|
-
# to prevent path traversal and other attacks. Invalid ENV paths cause
|
|
139
|
-
# an error to be raised (Principle of Least Surprise - explicit paths must work).
|
|
140
|
-
#
|
|
141
|
-
# @note Setting the ENV variable to an empty string explicitly disables
|
|
142
|
-
# this grammar. This allows fallback to alternative backends (e.g., Citrus).
|
|
143
|
-
#
|
|
144
|
-
# @return [String, nil] the path to the library, or nil if not found
|
|
145
|
-
# @raise [TreeHaver::NotAvailable] if ENV variable is set to an invalid path
|
|
146
|
-
# @see #find_library_path_safe For stricter validation (trusted directories only)
|
|
147
|
-
def find_library_path
|
|
148
|
-
# Check environment variable first (highest priority)
|
|
149
|
-
# Use key? to distinguish between "not set" and "set to empty"
|
|
150
|
-
env_var = env_var_name
|
|
151
|
-
if ENV[env_var] || ENV.key?(env_var)
|
|
152
|
-
env_path = ENV[env_var]
|
|
153
|
-
|
|
154
|
-
# :nocov: defensive - ENV.key? true with nil value is rare edge case
|
|
155
|
-
if env_path.nil?
|
|
156
|
-
@env_rejection_reason = "explicitly disabled (set to nil)"
|
|
157
|
-
return
|
|
158
|
-
end
|
|
159
|
-
# :nocov:
|
|
160
|
-
|
|
161
|
-
# Empty string means "explicitly skip this grammar"
|
|
162
|
-
# This allows users to disable tree-sitter for specific languages
|
|
163
|
-
# and fall back to alternative backends like Citrus
|
|
164
|
-
if env_path.empty?
|
|
165
|
-
@env_rejection_reason = "explicitly disabled (set to empty string)"
|
|
166
|
-
return
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
# Store why env path was rejected for better error messages
|
|
170
|
-
@env_rejection_reason = validate_env_path(env_path)
|
|
171
|
-
|
|
172
|
-
# Principle of Least Surprise: If user explicitly sets an ENV variable
|
|
173
|
-
# to a path, that path MUST work. Don't silently fall back to auto-discovery.
|
|
174
|
-
if @env_rejection_reason
|
|
175
|
-
raise TreeHaver::NotAvailable,
|
|
176
|
-
"#{env_var_name} is set to #{env_path.inspect} but #{@env_rejection_reason}. " \
|
|
177
|
-
"Either fix the path, unset the variable to use auto-discovery, " \
|
|
178
|
-
"or set it to empty string to explicitly disable this grammar."
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
return env_path
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
# Search all paths (these are constructed from trusted base dirs)
|
|
185
|
-
search_paths.find { |path| File.exist?(path) }
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# Validate an environment variable path and return reason if invalid
|
|
189
|
-
# @return [String, nil] rejection reason or nil if valid
|
|
190
|
-
def validate_env_path(path)
|
|
191
|
-
# Check for leading/trailing whitespace
|
|
192
|
-
if path != path.strip
|
|
193
|
-
return "contains leading or trailing whitespace (use #{path.strip.inspect})"
|
|
194
|
-
end
|
|
195
|
-
|
|
196
|
-
# Check if path is safe
|
|
197
|
-
unless PathValidator.safe_library_path?(path)
|
|
198
|
-
return "failed security validation (may contain path traversal or suspicious characters)"
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
# Check if file exists
|
|
202
|
-
unless File.exist?(path)
|
|
203
|
-
return "file does not exist"
|
|
204
|
-
end
|
|
205
|
-
|
|
206
|
-
nil # Valid!
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
# Find the grammar library path with strict security validation
|
|
210
|
-
#
|
|
211
|
-
# This method only returns paths that are in trusted system directories.
|
|
212
|
-
# Use this when you want maximum security and don't need to support
|
|
213
|
-
# custom installation locations.
|
|
214
|
-
#
|
|
215
|
-
# @return [String, nil] the path to the library, or nil if not found
|
|
216
|
-
# @see PathValidator::TRUSTED_DIRECTORIES For the list of trusted directories
|
|
217
|
-
def find_library_path_safe
|
|
218
|
-
# Environment variable is NOT checked in safe mode - only trusted system paths
|
|
219
|
-
search_paths.find do |path|
|
|
220
|
-
File.exist?(path) && PathValidator.in_trusted_directory?(path)
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
# Check if the grammar library is available AND usable
|
|
225
|
-
#
|
|
226
|
-
# This checks:
|
|
227
|
-
# 1. The grammar library file exists
|
|
228
|
-
# 2. The tree-sitter runtime is functional (can create a parser)
|
|
229
|
-
#
|
|
230
|
-
# This prevents registering grammars when tree-sitter isn't actually usable,
|
|
231
|
-
# allowing clean fallback to alternative backends like Citrus.
|
|
232
|
-
#
|
|
233
|
-
# @return [Boolean] true if the library can be found AND tree-sitter runtime works
|
|
234
|
-
def available?
|
|
235
|
-
path = find_library_path
|
|
236
|
-
return false if path.nil?
|
|
237
|
-
|
|
238
|
-
# Check if tree-sitter runtime is actually functional
|
|
239
|
-
# This is cached at the class level since it's the same for all grammars
|
|
240
|
-
self.class.tree_sitter_runtime_usable?
|
|
241
|
-
end
|
|
242
|
-
|
|
243
|
-
# Backends that use tree-sitter (require native runtime libraries)
|
|
244
|
-
# Other backends (Citrus, Prism, Psych, etc.) don't use tree-sitter
|
|
245
|
-
TREE_SITTER_BACKENDS = [
|
|
246
|
-
TreeHaver::Backends::MRI,
|
|
247
|
-
TreeHaver::Backends::FFI,
|
|
248
|
-
TreeHaver::Backends::Rust,
|
|
249
|
-
TreeHaver::Backends::Java,
|
|
250
|
-
].freeze
|
|
251
|
-
|
|
252
|
-
class << self
|
|
253
|
-
# Check if the tree-sitter runtime is usable
|
|
254
|
-
#
|
|
255
|
-
# Tests whether we can actually create a tree-sitter parser.
|
|
256
|
-
# Result is cached since this is expensive and won't change during runtime.
|
|
257
|
-
#
|
|
258
|
-
# @return [Boolean] true if tree-sitter runtime is functional
|
|
259
|
-
def tree_sitter_runtime_usable?
|
|
260
|
-
return @tree_sitter_runtime_usable if defined?(@tree_sitter_runtime_usable)
|
|
261
|
-
|
|
262
|
-
@tree_sitter_runtime_usable = begin
|
|
263
|
-
# Try to create a parser using the current backend
|
|
264
|
-
mod = TreeHaver.resolve_backend_module(nil)
|
|
265
|
-
|
|
266
|
-
# Only tree-sitter backends are relevant here
|
|
267
|
-
# Non-tree-sitter backends (Citrus, Prism, Psych, etc.) don't use grammar files
|
|
268
|
-
if mod.nil? || !TREE_SITTER_BACKENDS.include?(mod)
|
|
269
|
-
false
|
|
270
|
-
else
|
|
271
|
-
# Try to instantiate a parser - this will fail if runtime isn't available
|
|
272
|
-
mod::Parser.new
|
|
273
|
-
true
|
|
274
|
-
end
|
|
275
|
-
rescue NoMethodError, LoadError, NotAvailable => _e
|
|
276
|
-
# Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
|
|
277
|
-
false
|
|
278
|
-
end
|
|
279
|
-
end
|
|
280
|
-
|
|
281
|
-
# Reset the cached tree-sitter runtime check (for testing)
|
|
282
|
-
#
|
|
283
|
-
# @api private
|
|
284
|
-
def reset_runtime_check!
|
|
285
|
-
remove_instance_variable(:@tree_sitter_runtime_usable) if defined?(@tree_sitter_runtime_usable)
|
|
286
|
-
end
|
|
287
|
-
end
|
|
288
|
-
|
|
289
|
-
# Check if the grammar library is available in a trusted directory
|
|
290
|
-
#
|
|
291
|
-
# @return [Boolean] true if the library can be found in a trusted directory
|
|
292
|
-
# @see #find_library_path_safe
|
|
293
|
-
def available_safe?
|
|
294
|
-
!find_library_path_safe.nil?
|
|
295
|
-
end
|
|
296
|
-
|
|
297
|
-
# Register this language with TreeHaver
|
|
298
|
-
#
|
|
299
|
-
# After registration, the language can be loaded via dynamic method
|
|
300
|
-
# (e.g., `TreeHaver::Language.toml`).
|
|
301
|
-
#
|
|
302
|
-
# @param raise_on_missing [Boolean] if true, raises when library not found
|
|
303
|
-
# @return [Boolean] true if registration succeeded
|
|
304
|
-
# @raise [NotAvailable] if library not found and raise_on_missing is true
|
|
305
|
-
def register!(raise_on_missing: false)
|
|
306
|
-
path = find_library_path
|
|
307
|
-
unless path
|
|
308
|
-
if raise_on_missing
|
|
309
|
-
raise NotAvailable, not_found_message
|
|
310
|
-
end
|
|
311
|
-
return false
|
|
312
|
-
end
|
|
313
|
-
|
|
314
|
-
TreeHaver.register_language(@language_name, path: path, symbol: symbol_name)
|
|
315
|
-
true
|
|
316
|
-
end
|
|
317
|
-
|
|
318
|
-
# Get debug information about the search
|
|
319
|
-
#
|
|
320
|
-
# @return [Hash] diagnostic information
|
|
321
|
-
def search_info
|
|
322
|
-
found = find_library_path # This populates @env_rejection_reason
|
|
323
|
-
{
|
|
324
|
-
language: @language_name,
|
|
325
|
-
env_var: env_var_name,
|
|
326
|
-
env_value: ENV[env_var_name],
|
|
327
|
-
env_rejection_reason: @env_rejection_reason,
|
|
328
|
-
symbol: symbol_name,
|
|
329
|
-
library_filename: library_filename,
|
|
330
|
-
search_paths: search_paths,
|
|
331
|
-
found_path: found,
|
|
332
|
-
available: !found.nil?,
|
|
333
|
-
}
|
|
334
|
-
end
|
|
335
|
-
|
|
336
|
-
# Get a human-readable error message when library is not found
|
|
337
|
-
#
|
|
338
|
-
# @return [String] error message with installation hints
|
|
339
|
-
def not_found_message
|
|
340
|
-
msg = "tree-sitter #{@language_name} grammar not found."
|
|
341
|
-
|
|
342
|
-
# Check if env var is set but rejected
|
|
343
|
-
env_value = ENV[env_var_name]
|
|
344
|
-
msg += if env_value && @env_rejection_reason
|
|
345
|
-
" #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
|
|
346
|
-
elsif env_value && File.exist?(env_value) && !self.class.tree_sitter_runtime_usable?
|
|
347
|
-
" #{env_var_name} is set and file exists, but no tree-sitter runtime is available. " \
|
|
348
|
-
"Add ruby_tree_sitter, ffi, or tree_stump gem to your Gemfile."
|
|
349
|
-
elsif env_value
|
|
350
|
-
" #{env_var_name} is set but was not used (file may have been removed)."
|
|
351
|
-
else
|
|
352
|
-
" Searched: #{search_paths.join(", ")}."
|
|
353
|
-
end
|
|
354
|
-
|
|
355
|
-
msg + " Install tree-sitter-#{@language_name} or set #{env_var_name} to a valid path."
|
|
356
|
-
end
|
|
357
|
-
|
|
358
|
-
private
|
|
359
|
-
|
|
360
|
-
# Get the platform-appropriate shared library extension
|
|
361
|
-
#
|
|
362
|
-
# @return [String] ".so" on Linux, ".dylib" on macOS
|
|
363
|
-
def platform_extension
|
|
364
|
-
case RbConfig::CONFIG["host_os"]
|
|
365
|
-
when /darwin/i
|
|
366
|
-
".dylib"
|
|
367
|
-
when /mswin|mingw|cygwin/i
|
|
368
|
-
".dll"
|
|
369
|
-
else
|
|
370
|
-
".so"
|
|
371
|
-
end
|
|
372
|
-
end
|
|
373
|
-
end
|
|
374
|
-
end
|