tree_haver 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +48 -0
- data/CITATION.cff +20 -0
- data/CODE_OF_CONDUCT.md +134 -0
- data/CONTRIBUTING.md +227 -0
- data/FUNDING.md +74 -0
- data/LICENSE.txt +21 -0
- data/README.md +1260 -0
- data/REEK +0 -0
- data/RUBOCOP.md +71 -0
- data/SECURITY.md +21 -0
- data/lib/tree_haver/backends/ffi.rb +410 -0
- data/lib/tree_haver/backends/java.rb +568 -0
- data/lib/tree_haver/backends/mri.rb +129 -0
- data/lib/tree_haver/backends/rust.rb +175 -0
- data/lib/tree_haver/compat.rb +43 -0
- data/lib/tree_haver/grammar_finder.rb +245 -0
- data/lib/tree_haver/language_registry.rb +139 -0
- data/lib/tree_haver/path_validator.rb +333 -0
- data/lib/tree_haver/version.rb +20 -0
- data/lib/tree_haver.rb +710 -0
- data/sig/tree_haver/backends.rbs +285 -0
- data/sig/tree_haver/grammar_finder.rbs +29 -0
- data/sig/tree_haver/path_validator.rbs +31 -0
- data/sig/tree_haver.rbs +131 -0
- data.tar.gz.sig +0 -0
- metadata +298 -0
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Security utilities for validating paths and inputs before loading shared libraries.
|
|
5
|
+
#
|
|
6
|
+
# Loading shared libraries (.so/.dylib/.dll) is inherently dangerous as it executes
|
|
7
|
+
# arbitrary native code. This module provides defense-in-depth validations to reduce
|
|
8
|
+
# the attack surface when paths come from potentially untrusted sources like
|
|
9
|
+
# environment variables or user input.
|
|
10
|
+
#
|
|
11
|
+
# @example Validate a path before loading
|
|
12
|
+
# path = ENV["TREE_SITTER_TOML_PATH"]
|
|
13
|
+
# if TreeHaver::PathValidator.safe_library_path?(path)
|
|
14
|
+
# language = TreeHaver::Language.from_library(path)
|
|
15
|
+
# else
|
|
16
|
+
# raise "Unsafe path: #{path}"
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @example Register custom trusted directories
|
|
20
|
+
# # For Homebrew on Linux (linuxbrew)
|
|
21
|
+
# TreeHaver::PathValidator.add_trusted_directory("/home/linuxbrew/.linuxbrew/Cellar")
|
|
22
|
+
#
|
|
23
|
+
# # For luarocks-installed grammars
|
|
24
|
+
# TreeHaver::PathValidator.add_trusted_directory("~/.local/share/mise/installs/lua")
|
|
25
|
+
#
|
|
26
|
+
# # Or via environment variable (comma-separated)
|
|
27
|
+
# # export TREE_HAVER_TRUSTED_DIRS="/home/linuxbrew/.linuxbrew/Cellar,~/.local/share/mise"
|
|
28
|
+
#
|
|
29
|
+
# @note These validations provide defense-in-depth but cannot guarantee safety.
|
|
30
|
+
# Loading shared libraries from untrusted sources is always risky.
|
|
31
|
+
module PathValidator
|
|
32
|
+
# Allowed shared library extensions by platform
|
|
33
|
+
ALLOWED_EXTENSIONS = %w[.so .dylib .dll].freeze
|
|
34
|
+
|
|
35
|
+
# Default directories that are generally trusted for system libraries
|
|
36
|
+
# These are searched by the dynamic linker anyway
|
|
37
|
+
DEFAULT_TRUSTED_DIRECTORIES = [
|
|
38
|
+
"/usr/lib",
|
|
39
|
+
"/usr/lib64",
|
|
40
|
+
"/usr/lib/x86_64-linux-gnu",
|
|
41
|
+
"/usr/lib/aarch64-linux-gnu",
|
|
42
|
+
"/usr/local/lib",
|
|
43
|
+
"/opt/homebrew/lib",
|
|
44
|
+
"/opt/local/lib",
|
|
45
|
+
].freeze
|
|
46
|
+
|
|
47
|
+
# Environment variable for adding trusted directories (comma-separated)
|
|
48
|
+
TRUSTED_DIRS_ENV_VAR = "TREE_HAVER_TRUSTED_DIRS"
|
|
49
|
+
|
|
50
|
+
# Maximum reasonable path length (prevents DoS via extremely long paths)
|
|
51
|
+
MAX_PATH_LENGTH = 4096
|
|
52
|
+
|
|
53
|
+
# Pattern for valid library filenames (alphanumeric, hyphens, underscores, dots)
|
|
54
|
+
# This prevents shell metacharacters and other injection attempts
|
|
55
|
+
VALID_FILENAME_PATTERN = /\A[a-zA-Z0-9][a-zA-Z0-9._-]*\z/
|
|
56
|
+
|
|
57
|
+
# Pattern for valid language names (lowercase alphanumeric and underscores)
|
|
58
|
+
VALID_LANGUAGE_PATTERN = /\A[a-z][a-z0-9_]*\z/
|
|
59
|
+
|
|
60
|
+
# Pattern for valid symbol names (C identifier format)
|
|
61
|
+
VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
|
|
62
|
+
|
|
63
|
+
@custom_trusted_directories = []
|
|
64
|
+
@mutex = Mutex.new
|
|
65
|
+
|
|
66
|
+
module_function
|
|
67
|
+
|
|
68
|
+
# Get all trusted directories (default + custom + from ENV)
|
|
69
|
+
#
|
|
70
|
+
# @return [Array<String>] list of all trusted directory prefixes
|
|
71
|
+
def trusted_directories
|
|
72
|
+
dirs = DEFAULT_TRUSTED_DIRECTORIES.dup
|
|
73
|
+
|
|
74
|
+
# Add custom registered directories
|
|
75
|
+
@mutex.synchronize { dirs.concat(@custom_trusted_directories) }
|
|
76
|
+
|
|
77
|
+
# Add directories from environment variable
|
|
78
|
+
env_dirs = ENV[TRUSTED_DIRS_ENV_VAR]
|
|
79
|
+
if env_dirs
|
|
80
|
+
env_dirs.split(",").each do |dir|
|
|
81
|
+
expanded = File.expand_path(dir.strip)
|
|
82
|
+
# :nocov:
|
|
83
|
+
# File.expand_path always returns absolute paths on Unix/macOS.
|
|
84
|
+
# This guard exists for defensive programming on exotic platforms
|
|
85
|
+
# where expand_path might behave differently, but cannot be tested
|
|
86
|
+
# in standard CI environments.
|
|
87
|
+
dirs << expanded if expanded.start_with?("/")
|
|
88
|
+
# :nocov:
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
dirs.uniq
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Register a custom trusted directory
|
|
96
|
+
#
|
|
97
|
+
# Use this to add directories where you install tree-sitter grammars,
|
|
98
|
+
# such as Homebrew locations, luarocks paths, or other package managers.
|
|
99
|
+
#
|
|
100
|
+
# @param directory [String] absolute path to trust (~ is expanded)
|
|
101
|
+
# @return [void]
|
|
102
|
+
# @raise [ArgumentError] if directory is not an absolute path
|
|
103
|
+
#
|
|
104
|
+
# @example Register linuxbrew directory
|
|
105
|
+
# TreeHaver::PathValidator.add_trusted_directory("/home/linuxbrew/.linuxbrew/Cellar")
|
|
106
|
+
#
|
|
107
|
+
# @example Register user's luarocks directory
|
|
108
|
+
# TreeHaver::PathValidator.add_trusted_directory("~/.local/share/mise/installs/lua")
|
|
109
|
+
def add_trusted_directory(directory)
|
|
110
|
+
expanded = File.expand_path(directory)
|
|
111
|
+
|
|
112
|
+
# :nocov:
|
|
113
|
+
# File.expand_path always returns absolute paths on Unix/macOS.
|
|
114
|
+
# This guard exists for defensive programming on exotic platforms
|
|
115
|
+
# where expand_path might behave differently, but cannot be tested
|
|
116
|
+
# in standard CI environments.
|
|
117
|
+
unless expanded.start_with?("/")
|
|
118
|
+
raise ArgumentError, "Trusted directory must be an absolute path: #{directory.inspect}"
|
|
119
|
+
end
|
|
120
|
+
# :nocov:
|
|
121
|
+
|
|
122
|
+
@mutex.synchronize do
|
|
123
|
+
@custom_trusted_directories << expanded unless @custom_trusted_directories.include?(expanded)
|
|
124
|
+
end
|
|
125
|
+
nil
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Remove a custom trusted directory
|
|
129
|
+
#
|
|
130
|
+
# @param directory [String] the directory to remove
|
|
131
|
+
# @return [void]
|
|
132
|
+
def remove_trusted_directory(directory)
|
|
133
|
+
expanded = File.expand_path(directory)
|
|
134
|
+
@mutex.synchronize { @custom_trusted_directories.delete(expanded) }
|
|
135
|
+
nil
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Clear all custom trusted directories
|
|
139
|
+
#
|
|
140
|
+
# Does not affect DEFAULT_TRUSTED_DIRECTORIES or ENV-based directories.
|
|
141
|
+
# Primarily useful for testing.
|
|
142
|
+
#
|
|
143
|
+
# @return [void]
|
|
144
|
+
def clear_custom_trusted_directories!
|
|
145
|
+
@mutex.synchronize { @custom_trusted_directories.clear }
|
|
146
|
+
nil
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Get the list of custom trusted directories (for debugging)
|
|
150
|
+
#
|
|
151
|
+
# @return [Array<String>] list of custom registered directories
|
|
152
|
+
def custom_trusted_directories
|
|
153
|
+
@mutex.synchronize { @custom_trusted_directories.dup }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Validate a path is safe for loading as a shared library
|
|
157
|
+
#
|
|
158
|
+
# Checks performed:
|
|
159
|
+
# - Path is not nil or empty
|
|
160
|
+
# - Path length is reasonable
|
|
161
|
+
# - Path is absolute (no relative path traversal)
|
|
162
|
+
# - Path has an allowed extension
|
|
163
|
+
# - Path does not contain null bytes
|
|
164
|
+
# - Filename portion matches safe pattern
|
|
165
|
+
#
|
|
166
|
+
# @param path [String, nil] the path to validate
|
|
167
|
+
# @param require_trusted_dir [Boolean] if true, path must be in a trusted directory
|
|
168
|
+
# @return [Boolean] true if the path passes all safety checks
|
|
169
|
+
#
|
|
170
|
+
# @example
|
|
171
|
+
# PathValidator.safe_library_path?("/usr/lib/libtree-sitter-toml.so")
|
|
172
|
+
# # => true
|
|
173
|
+
#
|
|
174
|
+
# PathValidator.safe_library_path?("../../../tmp/evil.so")
|
|
175
|
+
# # => false
|
|
176
|
+
def safe_library_path?(path, require_trusted_dir: false)
|
|
177
|
+
return false if path.nil? || path.empty?
|
|
178
|
+
return false if path.length > MAX_PATH_LENGTH
|
|
179
|
+
return false if path.include?("\0") # Null byte injection
|
|
180
|
+
|
|
181
|
+
# Must be absolute path (prevents relative path traversal)
|
|
182
|
+
return false unless path.start_with?("/") || windows_absolute_path?(path)
|
|
183
|
+
|
|
184
|
+
# Check for path traversal attempts
|
|
185
|
+
return false if path.include?("/../") || path.end_with?("/..")
|
|
186
|
+
return false if path.include?("/./") || path.end_with?("/.")
|
|
187
|
+
|
|
188
|
+
# Validate extension
|
|
189
|
+
return false unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
|
|
190
|
+
|
|
191
|
+
# Validate filename portion
|
|
192
|
+
filename = File.basename(path)
|
|
193
|
+
return false unless filename.match?(VALID_FILENAME_PATTERN)
|
|
194
|
+
|
|
195
|
+
# Optionally require the path to be in a trusted directory
|
|
196
|
+
if require_trusted_dir
|
|
197
|
+
return false unless in_trusted_directory?(path)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
true
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Check if a path is within a trusted directory
|
|
204
|
+
#
|
|
205
|
+
# Checks against DEFAULT_TRUSTED_DIRECTORIES, custom registered directories,
|
|
206
|
+
# and directories from TREE_HAVER_TRUSTED_DIRS environment variable.
|
|
207
|
+
#
|
|
208
|
+
# @param path [String] the path to check
|
|
209
|
+
# @return [Boolean] true if the path is in a trusted directory
|
|
210
|
+
def in_trusted_directory?(path)
|
|
211
|
+
return false if path.nil?
|
|
212
|
+
|
|
213
|
+
# Resolve the real path to handle symlinks
|
|
214
|
+
check_path = begin
|
|
215
|
+
File.realpath(path)
|
|
216
|
+
rescue Errno::ENOENT
|
|
217
|
+
# File doesn't exist yet, check the directory
|
|
218
|
+
dir = File.dirname(path)
|
|
219
|
+
begin
|
|
220
|
+
File.realpath(dir)
|
|
221
|
+
rescue Errno::ENOENT
|
|
222
|
+
return false
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Validate a language name is safe
|
|
230
|
+
#
|
|
231
|
+
# Language names are used to construct:
|
|
232
|
+
# - Environment variable names (TREE_SITTER_<LANG>_PATH)
|
|
233
|
+
# - Library filenames (libtree-sitter-<lang>.so)
|
|
234
|
+
# - Symbol names (tree_sitter_<lang>)
|
|
235
|
+
#
|
|
236
|
+
# @param name [String, Symbol, nil] the language name to validate
|
|
237
|
+
# @return [Boolean] true if the name is safe
|
|
238
|
+
#
|
|
239
|
+
# @example
|
|
240
|
+
# PathValidator.safe_language_name?(:toml) # => true
|
|
241
|
+
# PathValidator.safe_language_name?("json") # => true
|
|
242
|
+
# PathValidator.safe_language_name?("../../etc") # => false
|
|
243
|
+
def safe_language_name?(name)
|
|
244
|
+
return false if name.nil?
|
|
245
|
+
|
|
246
|
+
name_str = name.to_s
|
|
247
|
+
return false if name_str.empty?
|
|
248
|
+
return false if name_str.length > 64 # Reasonable limit
|
|
249
|
+
|
|
250
|
+
name_str.match?(VALID_LANGUAGE_PATTERN)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Validate a symbol name is safe for dlsym lookup
|
|
254
|
+
#
|
|
255
|
+
# @param symbol [String, nil] the symbol name to validate
|
|
256
|
+
# @return [Boolean] true if the symbol name is safe
|
|
257
|
+
#
|
|
258
|
+
# @example
|
|
259
|
+
# PathValidator.safe_symbol_name?("tree_sitter_toml") # => true
|
|
260
|
+
# PathValidator.safe_symbol_name?("evil; rm -rf /") # => false
|
|
261
|
+
def safe_symbol_name?(symbol)
|
|
262
|
+
return false if symbol.nil?
|
|
263
|
+
return false if symbol.empty?
|
|
264
|
+
return false if symbol.length > 256 # Reasonable limit
|
|
265
|
+
|
|
266
|
+
symbol.match?(VALID_SYMBOL_PATTERN)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Validate a backend name
|
|
270
|
+
#
|
|
271
|
+
# @param backend [String, Symbol, nil] the backend name
|
|
272
|
+
# @return [Boolean] true if it's a valid backend name
|
|
273
|
+
def safe_backend_name?(backend)
|
|
274
|
+
return true if backend.nil? # nil means :auto
|
|
275
|
+
|
|
276
|
+
%i[auto mri rust ffi java].include?(backend.to_s.to_sym)
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Sanitize a language name for safe use
|
|
280
|
+
#
|
|
281
|
+
# @param name [String, Symbol] the language name
|
|
282
|
+
# @return [Symbol, nil] sanitized name or nil if invalid
|
|
283
|
+
#
|
|
284
|
+
# @example
|
|
285
|
+
# PathValidator.sanitize_language_name("TOML") # => :toml
|
|
286
|
+
# PathValidator.sanitize_language_name("c++") # => nil (invalid)
|
|
287
|
+
def sanitize_language_name(name)
|
|
288
|
+
return if name.nil?
|
|
289
|
+
|
|
290
|
+
sanitized = name.to_s.downcase.gsub(/[^a-z0-9_]/, "")
|
|
291
|
+
return if sanitized.empty?
|
|
292
|
+
return unless sanitized.match?(/\A[a-z]/)
|
|
293
|
+
|
|
294
|
+
sanitized.to_sym
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Get validation errors for a path (for debugging/error messages)
|
|
298
|
+
#
|
|
299
|
+
# @param path [String, nil] the path to validate
|
|
300
|
+
# @return [Array<String>] list of validation errors (empty if valid)
|
|
301
|
+
def validation_errors(path)
|
|
302
|
+
errors = []
|
|
303
|
+
|
|
304
|
+
if path.nil? || path.empty?
|
|
305
|
+
errors << "Path is nil or empty"
|
|
306
|
+
return errors
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
errors << "Path exceeds maximum length (#{MAX_PATH_LENGTH})" if path.length > MAX_PATH_LENGTH
|
|
310
|
+
errors << "Path contains null byte" if path.include?("\0")
|
|
311
|
+
errors << "Path is not absolute" unless path.start_with?("/") || windows_absolute_path?(path)
|
|
312
|
+
errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
|
|
313
|
+
errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
|
|
314
|
+
|
|
315
|
+
unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
|
|
316
|
+
errors << "Path does not have allowed extension (#{ALLOWED_EXTENSIONS.join(", ")})"
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
filename = File.basename(path)
|
|
320
|
+
unless filename.match?(VALID_FILENAME_PATTERN)
|
|
321
|
+
errors << "Filename contains invalid characters"
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
errors
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# @api private
|
|
328
|
+
def windows_absolute_path?(path)
|
|
329
|
+
# Match Windows absolute paths like C:\path or D:/path
|
|
330
|
+
path.match?(/\A[A-Za-z]:[\\\/]/)
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Version information for TreeHaver
|
|
5
|
+
#
|
|
6
|
+
# This module contains version constants following Semantic Versioning 2.0.0.
|
|
7
|
+
#
|
|
8
|
+
# @see https://semver.org/ Semantic Versioning
|
|
9
|
+
module Version
|
|
10
|
+
# Current version of the tree_haver gem
|
|
11
|
+
#
|
|
12
|
+
# @return [String] the version string (e.g., "1.0.0")
|
|
13
|
+
VERSION = "1.0.0"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Traditional location for VERSION constant
|
|
17
|
+
#
|
|
18
|
+
# @return [String] the version string
|
|
19
|
+
VERSION = Version::VERSION
|
|
20
|
+
end
|