tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,353 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Security utilities for validating paths and inputs before loading shared libraries.
5
- #
6
- # Loading shared libraries (.so/.dylib/.dll) is inherently dangerous as it executes
7
- # arbitrary native code. This module provides defense-in-depth validations to reduce
8
- # the attack surface when paths come from potentially untrusted sources like
9
- # environment variables or user input.
10
- #
11
- # @example Validate a path before loading
12
- # path = ENV["TREE_SITTER_TOML_PATH"]
13
- # if TreeHaver::PathValidator.safe_library_path?(path)
14
- # language = TreeHaver::Language.from_library(path)
15
- # else
16
- # raise "Unsafe path: #{path}"
17
- # end
18
- #
19
- # @example Register custom trusted directories
20
- # # For Homebrew on Linux (linuxbrew)
21
- # TreeHaver::PathValidator.add_trusted_directory("/home/linuxbrew/.linuxbrew/Cellar")
22
- #
23
- # # For luarocks-installed grammars
24
- # TreeHaver::PathValidator.add_trusted_directory("~/.local/share/mise/installs/lua")
25
- #
26
- # # Or via environment variable (comma-separated)
27
- # # export TREE_HAVER_TRUSTED_DIRS="/home/linuxbrew/.linuxbrew/Cellar,~/.local/share/mise"
28
- #
29
- # @note These validations provide defense-in-depth but cannot guarantee safety.
30
- # Loading shared libraries from untrusted sources is always risky.
31
- module PathValidator
32
- # Allowed shared library extensions by platform
33
- ALLOWED_EXTENSIONS = %w[.so .dylib .dll].freeze
34
-
35
- # Default directories that are generally trusted for system libraries
36
- # These are searched by the dynamic linker anyway
37
- DEFAULT_TRUSTED_DIRECTORIES = [
38
- "/usr/lib",
39
- "/usr/lib64",
40
- "/usr/lib/x86_64-linux-gnu",
41
- "/usr/lib/aarch64-linux-gnu",
42
- "/usr/local/lib",
43
- "/opt/homebrew/lib",
44
- "/opt/local/lib",
45
- ].freeze
46
-
47
- # Environment variable for adding trusted directories (comma-separated)
48
- TRUSTED_DIRS_ENV_VAR = "TREE_HAVER_TRUSTED_DIRS"
49
-
50
- # Maximum reasonable path length (prevents DoS via extremely long paths)
51
- MAX_PATH_LENGTH = 4096
52
-
53
- # Pattern for valid library filenames (alphanumeric, hyphens, underscores, dots)
54
- # This prevents shell metacharacters and other injection attempts
55
- VALID_FILENAME_PATTERN = /\A[a-zA-Z0-9][a-zA-Z0-9._-]*\z/
56
-
57
- # Pattern for valid language names (lowercase alphanumeric and underscores)
58
- VALID_LANGUAGE_PATTERN = /\A[a-z][a-z0-9_]*\z/
59
-
60
- # Pattern for valid symbol names (C identifier format)
61
- VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
62
-
63
- @custom_trusted_directories = [] # rubocop:disable ThreadSafety/MutableClassInstanceVariable
64
- @mutex = Mutex.new
65
-
66
- module_function
67
-
68
- # Get all trusted directories (default + custom + from ENV)
69
- #
70
- # @return [Array<String>] list of all trusted directory prefixes
71
- def trusted_directories
72
- dirs = DEFAULT_TRUSTED_DIRECTORIES.dup
73
-
74
- # Add custom registered directories
75
- @mutex.synchronize { dirs.concat(@custom_trusted_directories) }
76
-
77
- # Add directories from environment variable
78
- ENV[TRUSTED_DIRS_ENV_VAR]&.split(",")&.each do |dir|
79
- expanded = File.expand_path(dir.strip)
80
- # :nocov:
81
- # File.expand_path always returns absolute paths on Unix/macOS.
82
- # This guard exists for defensive programming on exotic platforms
83
- # where expand_path might behave differently, but cannot be tested
84
- # in standard CI environments.
85
- dirs << expanded if expanded.start_with?("/")
86
- # :nocov:
87
- end
88
-
89
- dirs.uniq
90
- end
91
-
92
- # Register a custom trusted directory
93
- #
94
- # Use this to add directories where you install tree-sitter grammars,
95
- # such as Homebrew locations, luarocks paths, or other package managers.
96
- #
97
- # @param directory [String] absolute path to trust (~ is expanded)
98
- # @return [void]
99
- # @raise [ArgumentError] if directory is not an absolute path
100
- #
101
- # @example Register linuxbrew directory
102
- # TreeHaver::PathValidator.add_trusted_directory("/home/linuxbrew/.linuxbrew/Cellar")
103
- #
104
- # @example Register user's luarocks directory
105
- # TreeHaver::PathValidator.add_trusted_directory("~/.local/share/mise/installs/lua")
106
- def add_trusted_directory(directory)
107
- expanded = File.expand_path(directory)
108
-
109
- # :nocov:
110
- # File.expand_path always returns absolute paths on Unix/macOS.
111
- # This guard exists for defensive programming on exotic platforms
112
- # where expand_path might behave differently, but cannot be tested
113
- # in standard CI environments.
114
- unless expanded.start_with?("/")
115
- raise ArgumentError, "Trusted directory must be an absolute path: #{directory.inspect}"
116
- end
117
- # :nocov:
118
-
119
- @mutex.synchronize do
120
- @custom_trusted_directories << expanded unless @custom_trusted_directories.include?(expanded)
121
- end
122
- nil
123
- end
124
-
125
- # Remove a custom trusted directory
126
- #
127
- # @param directory [String] the directory to remove
128
- # @return [void]
129
- def remove_trusted_directory(directory)
130
- expanded = File.expand_path(directory)
131
- @mutex.synchronize { @custom_trusted_directories.delete(expanded) }
132
- nil
133
- end
134
-
135
- # Clear all custom trusted directories
136
- #
137
- # Does not affect DEFAULT_TRUSTED_DIRECTORIES or ENV-based directories.
138
- # Primarily useful for testing.
139
- #
140
- # @return [void]
141
- def clear_custom_trusted_directories!
142
- @mutex.synchronize { @custom_trusted_directories.clear }
143
- nil
144
- end
145
-
146
- # Get the list of custom trusted directories (for debugging)
147
- #
148
- # @return [Array<String>] list of custom registered directories
149
- def custom_trusted_directories
150
- @mutex.synchronize { @custom_trusted_directories.dup }
151
- end
152
-
153
- # Validate a path is safe for loading as a shared library
154
- #
155
- # Checks performed:
156
- # - Path is not nil or empty
157
- # - Path length is reasonable
158
- # - Path is absolute (no relative path traversal)
159
- # - Path has an allowed extension
160
- # - Path does not contain null bytes
161
- # - Filename portion matches safe pattern
162
- #
163
- # @param path [String, nil] the path to validate
164
- # @param require_trusted_dir [Boolean] if true, path must be in a trusted directory
165
- # @return [Boolean] true if the path passes all safety checks
166
- #
167
- # @example
168
- # PathValidator.safe_library_path?("/usr/lib/libtree-sitter-toml.so")
169
- # # => true
170
- #
171
- # PathValidator.safe_library_path?("../../../tmp/evil.so")
172
- # # => false
173
- def safe_library_path?(path, require_trusted_dir: false)
174
- return false if path.nil? || path.empty?
175
- return false if path.length > MAX_PATH_LENGTH
176
- return false if path.include?("\0") # Null byte injection
177
-
178
- # Must be absolute path (prevents relative path traversal)
179
- return false unless path.start_with?("/") || windows_absolute_path?(path)
180
-
181
- # Check for path traversal attempts
182
- return false if path.include?("/../") || path.end_with?("/..")
183
- return false if path.include?("/./") || path.end_with?("/.")
184
-
185
- # Validate extension
186
- # Allow versioned .so files like .so.0, .so.14, etc. (common on Linux)
187
- return false unless has_valid_extension?(path)
188
-
189
- # Validate filename portion
190
- filename = File.basename(path)
191
- return false unless filename.match?(VALID_FILENAME_PATTERN)
192
-
193
- # Optionally require the path to be in a trusted directory
194
- if require_trusted_dir
195
- return false unless in_trusted_directory?(path)
196
- end
197
-
198
- true
199
- end
200
-
201
- # Check if a path is within a trusted directory
202
- #
203
- # Checks against DEFAULT_TRUSTED_DIRECTORIES, custom registered directories,
204
- # and directories from TREE_HAVER_TRUSTED_DIRS environment variable.
205
- #
206
- # @param path [String] the path to check
207
- # @return [Boolean] true if the path is in a trusted directory
208
- def in_trusted_directory?(path)
209
- return false if path.nil?
210
-
211
- # Resolve the real path to handle symlinks
212
- check_path = resolve_check_path(path)
213
- return false if check_path.nil?
214
-
215
- trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
216
- end
217
-
218
- # Resolve a path to its real path for trust checking
219
- #
220
- # @param path [String] the path to resolve
221
- # @return [String, nil] the resolved path or nil if unresolvable
222
- # @api private
223
- def resolve_check_path(path)
224
- File.realpath(path)
225
- rescue Errno::ENOENT
226
- # File doesn't exist yet, check the directory
227
- dir = File.dirname(path)
228
- begin
229
- File.realpath(dir)
230
- rescue Errno::ENOENT
231
- nil
232
- end
233
- end
234
-
235
- # Validate a language name is safe
236
- #
237
- # Language names are used to construct:
238
- # - Environment variable names (TREE_SITTER_<LANG>_PATH)
239
- # - Library filenames (libtree-sitter-<lang>.so)
240
- # - Symbol names (tree_sitter_<lang>)
241
- #
242
- # @param name [String, Symbol, nil] the language name to validate
243
- # @return [Boolean] true if the name is safe
244
- #
245
- # @example
246
- # PathValidator.safe_language_name?(:toml) # => true
247
- # PathValidator.safe_language_name?("json") # => true
248
- # PathValidator.safe_language_name?("../../etc") # => false
249
- def safe_language_name?(name)
250
- return false if name.nil?
251
-
252
- name_str = name.to_s
253
- return false if name_str.empty?
254
- return false if name_str.length > 64 # Reasonable limit
255
-
256
- name_str.match?(VALID_LANGUAGE_PATTERN)
257
- end
258
-
259
- # Validate a symbol name is safe for dlsym lookup
260
- #
261
- # @param symbol [String, nil] the symbol name to validate
262
- # @return [Boolean] true if the symbol name is safe
263
- #
264
- # @example
265
- # PathValidator.safe_symbol_name?("tree_sitter_toml") # => true
266
- # PathValidator.safe_symbol_name?("evil; rm -rf /") # => false
267
- def safe_symbol_name?(symbol)
268
- return false if symbol.nil?
269
- return false if symbol.empty?
270
- return false if symbol.length > 256 # Reasonable limit
271
-
272
- symbol.match?(VALID_SYMBOL_PATTERN)
273
- end
274
-
275
- # Validate a backend name
276
- #
277
- # @param backend [String, Symbol, nil] the backend name
278
- # @return [Boolean] true if it's a valid backend name
279
- def safe_backend_name?(backend)
280
- return true if backend.nil? # nil means :auto
281
-
282
- %i[auto mri rust ffi java].include?(backend.to_s.to_sym)
283
- end
284
-
285
- # Sanitize a language name for safe use
286
- #
287
- # @param name [String, Symbol] the language name
288
- # @return [Symbol, nil] sanitized name or nil if invalid
289
- #
290
- # @example
291
- # PathValidator.sanitize_language_name("TOML") # => :toml
292
- # PathValidator.sanitize_language_name("c++") # => nil (invalid)
293
- def sanitize_language_name(name)
294
- return if name.nil?
295
-
296
- sanitized = name.to_s.downcase.gsub(/[^a-z0-9_]/, "")
297
- return if sanitized.empty?
298
- return unless sanitized.match?(/\A[a-z]/)
299
-
300
- sanitized.to_sym
301
- end
302
-
303
- # Get validation errors for a path (for debugging/error messages)
304
- #
305
- # @param path [String, nil] the path to validate
306
- # @return [Array<String>] list of validation errors (empty if valid)
307
- def validation_errors(path)
308
- errors = []
309
-
310
- if path.nil? || path.empty?
311
- errors << "Path is nil or empty"
312
- return errors
313
- end
314
-
315
- errors << "Path exceeds maximum length (#{MAX_PATH_LENGTH})" if path.length > MAX_PATH_LENGTH
316
- errors << "Path contains null byte" if path.include?("\0")
317
- errors << "Path is not absolute" unless path.start_with?("/") || windows_absolute_path?(path)
318
- errors << "Path contains traversal sequence (/../)" if path.include?("/../") || path.end_with?("/..")
319
- errors << "Path contains traversal sequence (/./)" if path.include?("/./") || path.end_with?("/.")
320
-
321
- unless has_valid_extension?(path)
322
- errors << "Path does not have allowed extension (.so, .so.X, .dylib, .dll)"
323
- end
324
-
325
- filename = File.basename(path)
326
- unless filename.match?(VALID_FILENAME_PATTERN)
327
- errors << "Filename contains invalid characters"
328
- end
329
-
330
- errors
331
- end
332
-
333
- # @api private
334
- def windows_absolute_path?(path)
335
- # Match Windows absolute paths like C:\path or D:/path
336
- path.match?(/\A[A-Za-z]:[\\\/]/)
337
- end
338
-
339
- # @api private
340
- # Check if path has a valid library extension
341
- # Allows: .so, .dylib, .dll, and versioned .so files like .so.0, .so.14
342
- def has_valid_extension?(path)
343
- # Check for exact matches first (.so, .dylib, .dll)
344
- return true if ALLOWED_EXTENSIONS.any? { |ext| path.end_with?(ext) }
345
-
346
- # Check for versioned .so files (Linux convention)
347
- # e.g., libtree-sitter.so.0, libtree-sitter.so.14
348
- return true if path.match?(/\.so\.\d+\z/)
349
-
350
- false
351
- end
352
- end
353
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Point class that works as both a Hash and an object with row/column accessors
5
- #
6
- # This provides compatibility with code expecting either:
7
- # - Hash access: point[:row], point[:column]
8
- # - Method access: point.row, point.column
9
- #
10
- # TreeHaver::Point is an alias for TreeHaver::Base::Point, which is a Struct
11
- # providing all the necessary functionality.
12
- #
13
- # @example Method access
14
- # point = TreeHaver::Point.new(5, 10)
15
- # point.row # => 5
16
- # point.column # => 10
17
- #
18
- # @example Hash-like access
19
- # point[:row] # => 5
20
- # point[:column] # => 10
21
- #
22
- # @example Converting to hash
23
- # point.to_h # => {row: 5, column: 10}
24
- #
25
- # @see Base::Point The underlying Struct implementation
26
- Point = Base::Point
27
- end