tree_haver 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,333 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Security utilities for validating paths and inputs before loading shared libraries.
5
+ #
6
+ # Loading shared libraries (.so/.dylib/.dll) is inherently dangerous as it executes
7
+ # arbitrary native code. This module provides defense-in-depth validations to reduce
8
+ # the attack surface when paths come from potentially untrusted sources like
9
+ # environment variables or user input.
10
+ #
11
+ # @example Validate a path before loading
12
+ # path = ENV["TREE_SITTER_TOML_PATH"]
13
+ # if TreeHaver::PathValidator.safe_library_path?(path)
14
+ # language = TreeHaver::Language.from_library(path)
15
+ # else
16
+ # raise "Unsafe path: #{path}"
17
+ # end
18
+ #
19
+ # @example Register custom trusted directories
20
+ # # For Homebrew on Linux (linuxbrew)
21
+ # TreeHaver::PathValidator.add_trusted_directory("/home/linuxbrew/.linuxbrew/Cellar")
22
+ #
23
+ # # For luarocks-installed grammars
24
+ # TreeHaver::PathValidator.add_trusted_directory("~/.local/share/mise/installs/lua")
25
+ #
26
+ # # Or via environment variable (comma-separated)
27
+ # # export TREE_HAVER_TRUSTED_DIRS="/home/linuxbrew/.linuxbrew/Cellar,~/.local/share/mise"
28
+ #
29
+ # @note These validations provide defense-in-depth but cannot guarantee safety.
30
+ # Loading shared libraries from untrusted sources is always risky.
31
+ module PathValidator
32
+ # Allowed shared library extensions by platform
33
+ ALLOWED_EXTENSIONS = %w[.so .dylib .dll].freeze
34
+
35
+ # Default directories that are generally trusted for system libraries
36
+ # These are searched by the dynamic linker anyway
37
+ DEFAULT_TRUSTED_DIRECTORIES = [
38
+ "/usr/lib",
39
+ "/usr/lib64",
40
+ "/usr/lib/x86_64-linux-gnu",
41
+ "/usr/lib/aarch64-linux-gnu",
42
+ "/usr/local/lib",
43
+ "/opt/homebrew/lib",
44
+ "/opt/local/lib",
45
+ ].freeze
46
+
47
+ # Environment variable for adding trusted directories (comma-separated)
48
+ TRUSTED_DIRS_ENV_VAR = "TREE_HAVER_TRUSTED_DIRS"
49
+
50
+ # Maximum reasonable path length (prevents DoS via extremely long paths)
51
+ MAX_PATH_LENGTH = 4096
52
+
53
+ # Pattern for valid library filenames (alphanumeric, hyphens, underscores, dots)
54
+ # This prevents shell metacharacters and other injection attempts
55
+ VALID_FILENAME_PATTERN = /\A[a-zA-Z0-9][a-zA-Z0-9._-]*\z/
56
+
57
+ # Pattern for valid language names (lowercase alphanumeric and underscores)
58
+ VALID_LANGUAGE_PATTERN = /\A[a-z][a-z0-9_]*\z/
59
+
60
+ # Pattern for valid symbol names (C identifier format)
61
+ VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
62
+
63
+ @custom_trusted_directories = []
64
+ @mutex = Mutex.new
65
+
66
+ module_function
67
+
68
+ # Get all trusted directories (default + custom + from ENV)
69
+ #
70
+ # @return [Array<String>] list of all trusted directory prefixes
71
+ def trusted_directories
72
+ dirs = DEFAULT_TRUSTED_DIRECTORIES.dup
73
+
74
+ # Add custom registered directories
75
+ @mutex.synchronize { dirs.concat(@custom_trusted_directories) }
76
+
77
+ # Add directories from environment variable
78
+ env_dirs = ENV[TRUSTED_DIRS_ENV_VAR]
79
+ if env_dirs
80
+ env_dirs.split(",").each do |dir|
81
+ expanded = File.expand_path(dir.strip)
82
+ # :nocov:
83
+ # File.expand_path always returns absolute paths on Unix/macOS.
84
+ # This guard exists for defensive programming on exotic platforms
85
+ # where expand_path might behave differently, but cannot be tested
86
+ # in standard CI environments.
87
+ dirs << expanded if expanded.start_with?("/")
88
+ # :nocov:
89
+ end
90
+ end
91
+
92
+ dirs.uniq
93
+ end
94
+
95
+ # Register a custom trusted directory
96
+ #
97
+ # Use this to add directories where you install tree-sitter grammars,
98
+ # such as Homebrew locations, luarocks paths, or other package managers.
99
+ #
100
+ # @param directory [String] absolute path to trust (~ is expanded)
101
+ # @return [void]
102
+ # @raise [ArgumentError] if directory is not an absolute path
103
+ #
104
+ # @example Register linuxbrew directory
105
+ # TreeHaver::PathValidator.add_trusted_directory("/home/linuxbrew/.linuxbrew/Cellar")
106
+ #
107
+ # @example Register user's luarocks directory
108
+ # TreeHaver::PathValidator.add_trusted_directory("~/.local/share/mise/installs/lua")
109
+ def add_trusted_directory(directory)
110
+ expanded = File.expand_path(directory)
111
+
112
+ # :nocov:
113
+ # File.expand_path always returns absolute paths on Unix/macOS.
114
+ # This guard exists for defensive programming on exotic platforms
115
+ # where expand_path might behave differently, but cannot be tested
116
+ # in standard CI environments.
117
+ unless expanded.start_with?("/")
118
+ raise ArgumentError, "Trusted directory must be an absolute path: #{directory.inspect}"
119
+ end
120
+ # :nocov:
121
+
122
+ @mutex.synchronize do
123
+ @custom_trusted_directories << expanded unless @custom_trusted_directories.include?(expanded)
124
+ end
125
+ nil
126
+ end
127
+
128
+ # Remove a custom trusted directory
129
+ #
130
+ # @param directory [String] the directory to remove
131
+ # @return [void]
132
+ def remove_trusted_directory(directory)
133
+ expanded = File.expand_path(directory)
134
+ @mutex.synchronize { @custom_trusted_directories.delete(expanded) }
135
+ nil
136
+ end
137
+
138
+ # Clear all custom trusted directories
139
+ #
140
+ # Does not affect DEFAULT_TRUSTED_DIRECTORIES or ENV-based directories.
141
+ # Primarily useful for testing.
142
+ #
143
+ # @return [void]
144
+ def clear_custom_trusted_directories!
145
+ @mutex.synchronize { @custom_trusted_directories.clear }
146
+ nil
147
+ end
148
+
149
+ # Get the list of custom trusted directories (for debugging)
150
+ #
151
+ # @return [Array<String>] list of custom registered directories
152
+ def custom_trusted_directories
153
+ @mutex.synchronize { @custom_trusted_directories.dup }
154
+ end
155
+
156
+ # Validate a path is safe for loading as a shared library
157
+ #
158
+ # Checks performed:
159
+ # - Path is not nil or empty
160
+ # - Path length is reasonable
161
+ # - Path is absolute (no relative path traversal)
162
+ # - Path has an allowed extension
163
+ # - Path does not contain null bytes
164
+ # - Filename portion matches safe pattern
165
+ #
166
+ # @param path [String, nil] the path to validate
167
+ # @param require_trusted_dir [Boolean] if true, path must be in a trusted directory
168
+ # @return [Boolean] true if the path passes all safety checks
169
+ #
170
+ # @example
171
+ # PathValidator.safe_library_path?("/usr/lib/libtree-sitter-toml.so")
172
+ # # => true
173
+ #
174
+ # PathValidator.safe_library_path?("../../../tmp/evil.so")
175
+ # # => false
176
+ def safe_library_path?(path, require_trusted_dir: false)
177
+ return false if path.nil? || path.empty?
178
+ return false if path.length > MAX_PATH_LENGTH
179
+ return false if path.include?("\0") # Null byte injection
180
+
181
+ # Must be absolute path (prevents relative path traversal)
182
+ return false unless path.start_with?("/") || windows_absolute_path?(path)
183
+
184
+ # Check for path traversal attempts
185
+ return false if path.include?("/../") || path.end_with?("/..")
186
+ return false if path.include?("/./") || path.end_with?("/.")
187
+
188
+ # Validate extension
189
+ return false unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
190
+
191
+ # Validate filename portion
192
+ filename = File.basename(path)
193
+ return false unless filename.match?(VALID_FILENAME_PATTERN)
194
+
195
+ # Optionally require the path to be in a trusted directory
196
+ if require_trusted_dir
197
+ return false unless in_trusted_directory?(path)
198
+ end
199
+
200
+ true
201
+ end
202
+
203
+ # Check if a path is within a trusted directory
204
+ #
205
+ # Checks against DEFAULT_TRUSTED_DIRECTORIES, custom registered directories,
206
+ # and directories from TREE_HAVER_TRUSTED_DIRS environment variable.
207
+ #
208
+ # @param path [String] the path to check
209
+ # @return [Boolean] true if the path is in a trusted directory
210
+ def in_trusted_directory?(path)
211
+ return false if path.nil?
212
+
213
+ # Resolve the real path to handle symlinks
214
+ check_path = begin
215
+ File.realpath(path)
216
+ rescue Errno::ENOENT
217
+ # File doesn't exist yet, check the directory
218
+ dir = File.dirname(path)
219
+ begin
220
+ File.realpath(dir)
221
+ rescue Errno::ENOENT
222
+ return false
223
+ end
224
+ end
225
+
226
+ trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
227
+ end
228
+
229
+ # Validate a language name is safe
230
+ #
231
+ # Language names are used to construct:
232
+ # - Environment variable names (TREE_SITTER_<LANG>_PATH)
233
+ # - Library filenames (libtree-sitter-<lang>.so)
234
+ # - Symbol names (tree_sitter_<lang>)
235
+ #
236
+ # @param name [String, Symbol, nil] the language name to validate
237
+ # @return [Boolean] true if the name is safe
238
+ #
239
+ # @example
240
+ # PathValidator.safe_language_name?(:toml) # => true
241
+ # PathValidator.safe_language_name?("json") # => true
242
+ # PathValidator.safe_language_name?("../../etc") # => false
243
+ def safe_language_name?(name)
244
+ return false if name.nil?
245
+
246
+ name_str = name.to_s
247
+ return false if name_str.empty?
248
+ return false if name_str.length > 64 # Reasonable limit
249
+
250
+ name_str.match?(VALID_LANGUAGE_PATTERN)
251
+ end
252
+
253
+ # Validate a symbol name is safe for dlsym lookup
254
+ #
255
+ # @param symbol [String, nil] the symbol name to validate
256
+ # @return [Boolean] true if the symbol name is safe
257
+ #
258
+ # @example
259
+ # PathValidator.safe_symbol_name?("tree_sitter_toml") # => true
260
+ # PathValidator.safe_symbol_name?("evil; rm -rf /") # => false
261
+ def safe_symbol_name?(symbol)
262
+ return false if symbol.nil?
263
+ return false if symbol.empty?
264
+ return false if symbol.length > 256 # Reasonable limit
265
+
266
+ symbol.match?(VALID_SYMBOL_PATTERN)
267
+ end
268
+
269
+ # Validate a backend name
270
+ #
271
+ # @param backend [String, Symbol, nil] the backend name
272
+ # @return [Boolean] true if it's a valid backend name
273
+ def safe_backend_name?(backend)
274
+ return true if backend.nil? # nil means :auto
275
+
276
+ %i[auto mri rust ffi java].include?(backend.to_s.to_sym)
277
+ end
278
+
279
+ # Sanitize a language name for safe use
280
+ #
281
+ # @param name [String, Symbol] the language name
282
+ # @return [Symbol, nil] sanitized name or nil if invalid
283
+ #
284
+ # @example
285
+ # PathValidator.sanitize_language_name("TOML") # => :toml
286
+ # PathValidator.sanitize_language_name("c++") # => nil (invalid)
287
+ def sanitize_language_name(name)
288
+ return if name.nil?
289
+
290
+ sanitized = name.to_s.downcase.gsub(/[^a-z0-9_]/, "")
291
+ return if sanitized.empty?
292
+ return unless sanitized.match?(/\A[a-z]/)
293
+
294
+ sanitized.to_sym
295
+ end
296
+
297
+ # Get validation errors for a path (for debugging/error messages)
298
+ #
299
+ # @param path [String, nil] the path to validate
300
+ # @return [Array<String>] list of validation errors (empty if valid)
301
+ def validation_errors(path)
302
+ errors = []
303
+
304
+ if path.nil? || path.empty?
305
+ errors << "Path is nil or empty"
306
+ return errors
307
+ end
308
+
309
+ errors << "Path exceeds maximum length (#{MAX_PATH_LENGTH})" if path.length > MAX_PATH_LENGTH
310
+ errors << "Path contains null byte" if path.include?("\0")
311
+ errors << "Path is not absolute" unless path.start_with?("/") || windows_absolute_path?(path)
312
+ errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
313
+ errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
314
+
315
+ unless ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
316
+ errors << "Path does not have allowed extension (#{ALLOWED_EXTENSIONS.join(", ")})"
317
+ end
318
+
319
+ filename = File.basename(path)
320
+ unless filename.match?(VALID_FILENAME_PATTERN)
321
+ errors << "Filename contains invalid characters"
322
+ end
323
+
324
+ errors
325
+ end
326
+
327
+ # @api private
328
+ def windows_absolute_path?(path)
329
+ # Match Windows absolute paths like C:\path or D:/path
330
+ path.match?(/\A[A-Za-z]:[\\\/]/)
331
+ end
332
+ end
333
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Version information for TreeHaver
5
+ #
6
+ # This module contains version constants following Semantic Versioning 2.0.0.
7
+ #
8
+ # @see https://semver.org/ Semantic Versioning
9
+ module Version
10
+ # Current version of the tree_haver gem
11
+ #
12
+ # @return [String] the version string (e.g., "1.0.0")
13
+ VERSION = "1.0.0"
14
+ end
15
+
16
+ # Traditional location for VERSION constant
17
+ #
18
+ # @return [String] the version string
19
+ VERSION = Version::VERSION
20
+ end