tree_haver 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +190 -1
- data/CONTRIBUTING.md +100 -0
- data/README.md +342 -11
- data/lib/tree_haver/backends/citrus.rb +141 -20
- data/lib/tree_haver/backends/ffi.rb +338 -141
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/mri.rb +154 -17
- data/lib/tree_haver/backends/rust.rb +59 -16
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +150 -0
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +697 -56
- data.tar.gz.sig +0 -0
- metadata +5 -4
- metadata.gz.sig +0 -0
|
@@ -142,14 +142,37 @@ module TreeHaver
|
|
|
142
142
|
def find_library_path
|
|
143
143
|
# Check environment variable first (highest priority)
|
|
144
144
|
env_path = ENV[env_var_name]
|
|
145
|
-
if env_path
|
|
146
|
-
|
|
145
|
+
if env_path
|
|
146
|
+
# Store why env path was rejected for better error messages
|
|
147
|
+
@env_rejection_reason = validate_env_path(env_path)
|
|
148
|
+
return env_path if @env_rejection_reason.nil?
|
|
147
149
|
end
|
|
148
150
|
|
|
149
151
|
# Search all paths (these are constructed from trusted base dirs)
|
|
150
152
|
search_paths.find { |path| File.exist?(path) }
|
|
151
153
|
end
|
|
152
154
|
|
|
155
|
+
# Validate an environment variable path and return reason if invalid
|
|
156
|
+
# @return [String, nil] rejection reason or nil if valid
|
|
157
|
+
def validate_env_path(path)
|
|
158
|
+
# Check for leading/trailing whitespace
|
|
159
|
+
if path != path.strip
|
|
160
|
+
return "contains leading or trailing whitespace (use #{path.strip.inspect})"
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Check if path is safe
|
|
164
|
+
unless PathValidator.safe_library_path?(path)
|
|
165
|
+
return "failed security validation (may contain path traversal or suspicious characters)"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Check if file exists
|
|
169
|
+
unless File.exist?(path)
|
|
170
|
+
return "file does not exist"
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
nil # Valid!
|
|
174
|
+
end
|
|
175
|
+
|
|
153
176
|
# Find the grammar library path with strict security validation
|
|
154
177
|
#
|
|
155
178
|
# This method only returns paths that are in trusted system directories.
|
|
@@ -205,15 +228,17 @@ module TreeHaver
|
|
|
205
228
|
#
|
|
206
229
|
# @return [Hash] diagnostic information
|
|
207
230
|
def search_info
|
|
231
|
+
found = find_library_path # This populates @env_rejection_reason
|
|
208
232
|
{
|
|
209
233
|
language: @language_name,
|
|
210
234
|
env_var: env_var_name,
|
|
211
235
|
env_value: ENV[env_var_name],
|
|
236
|
+
env_rejection_reason: @env_rejection_reason,
|
|
212
237
|
symbol: symbol_name,
|
|
213
238
|
library_filename: library_filename,
|
|
214
239
|
search_paths: search_paths,
|
|
215
|
-
found_path:
|
|
216
|
-
available:
|
|
240
|
+
found_path: found,
|
|
241
|
+
available: !found.nil?,
|
|
217
242
|
}
|
|
218
243
|
end
|
|
219
244
|
|
|
@@ -221,9 +246,19 @@ module TreeHaver
|
|
|
221
246
|
#
|
|
222
247
|
# @return [String] error message with installation hints
|
|
223
248
|
def not_found_message
|
|
224
|
-
"tree-sitter #{@language_name} grammar not found.
|
|
225
|
-
|
|
226
|
-
|
|
249
|
+
msg = "tree-sitter #{@language_name} grammar not found."
|
|
250
|
+
|
|
251
|
+
# Check if env var is set but rejected
|
|
252
|
+
env_value = ENV[env_var_name]
|
|
253
|
+
msg += if env_value && @env_rejection_reason
|
|
254
|
+
" #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
|
|
255
|
+
elsif env_value
|
|
256
|
+
" #{env_var_name} is set but was not used (file may have been removed)."
|
|
257
|
+
else
|
|
258
|
+
" Searched: #{search_paths.join(", ")}."
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
msg + " Install tree-sitter-#{@language_name} or set #{env_var_name} to a valid path."
|
|
227
262
|
end
|
|
228
263
|
|
|
229
264
|
private
|
|
@@ -4,86 +4,93 @@ module TreeHaver
|
|
|
4
4
|
# Thread-safe language registrations and cache for loaded Language handles
|
|
5
5
|
#
|
|
6
6
|
# The LanguageRegistry provides two main functions:
|
|
7
|
-
# 1. **Registrations**: Store mappings from language names to
|
|
7
|
+
# 1. **Registrations**: Store mappings from language names to backend-specific configurations
|
|
8
8
|
# 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
|
|
9
9
|
#
|
|
10
|
-
#
|
|
10
|
+
# The registry supports multiple backends for the same language, allowing runtime
|
|
11
|
+
# switching, benchmarking, and fallback scenarios.
|
|
11
12
|
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
13
|
+
# Registration structure:
|
|
14
|
+
# @registrations = {
|
|
15
|
+
# toml: {
|
|
16
|
+
# tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
|
|
17
|
+
# citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
18
|
+
# }
|
|
19
|
+
# }
|
|
20
|
+
#
|
|
21
|
+
# @example Register tree-sitter grammar
|
|
22
|
+
# TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
|
|
23
|
+
# path: "/path/to/lib.so", symbol: "tree_sitter_toml")
|
|
24
|
+
#
|
|
25
|
+
# @example Register Citrus grammar
|
|
26
|
+
# TreeHaver::LanguageRegistry.register(:toml, :citrus,
|
|
27
|
+
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
18
28
|
#
|
|
19
29
|
# @api private
|
|
20
30
|
module LanguageRegistry
|
|
21
31
|
@mutex = Mutex.new
|
|
22
|
-
@cache = {}
|
|
23
|
-
@registrations = {}
|
|
32
|
+
@cache = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
33
|
+
@registrations = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
24
34
|
|
|
25
35
|
module_function
|
|
26
36
|
|
|
27
|
-
# Register a language
|
|
37
|
+
# Register a language for a specific backend
|
|
28
38
|
#
|
|
29
|
-
# Stores
|
|
30
|
-
#
|
|
31
|
-
# accessed via dynamic helpers on {TreeHaver::Language}.
|
|
39
|
+
# Stores backend-specific configuration for a language. Multiple backends
|
|
40
|
+
# can be registered for the same language without conflict.
|
|
32
41
|
#
|
|
33
42
|
# @param name [Symbol, String] language identifier (e.g., :toml, :json)
|
|
34
|
-
# @param
|
|
35
|
-
# @param
|
|
43
|
+
# @param backend_type [Symbol] backend type (:tree_sitter, :citrus, :mri, :rust, :ffi, :java)
|
|
44
|
+
# @param config [Hash] backend-specific configuration
|
|
45
|
+
# @option config [String] :path tree-sitter library path (for tree-sitter backends)
|
|
46
|
+
# @option config [String] :symbol exported symbol name (for tree-sitter backends)
|
|
47
|
+
# @option config [Module] :grammar_module Citrus grammar module (for Citrus backend)
|
|
48
|
+
# @option config [String] :gem_name gem name for error messages (for Citrus backend)
|
|
36
49
|
# @return [void]
|
|
37
|
-
# @example
|
|
38
|
-
# LanguageRegistry.register(:toml,
|
|
39
|
-
|
|
50
|
+
# @example Register tree-sitter grammar
|
|
51
|
+
# LanguageRegistry.register(:toml, :tree_sitter,
|
|
52
|
+
# path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
53
|
+
# @example Register Citrus grammar
|
|
54
|
+
# LanguageRegistry.register(:toml, :citrus,
|
|
55
|
+
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
56
|
+
def register(name, backend_type, **config)
|
|
40
57
|
key = name.to_sym
|
|
41
|
-
|
|
42
|
-
@registrations[key] = {path: path, symbol: symbol}
|
|
43
|
-
end
|
|
44
|
-
nil
|
|
45
|
-
end
|
|
58
|
+
backend_key = backend_type.to_sym
|
|
46
59
|
|
|
47
|
-
# Unregister a previously registered language helper
|
|
48
|
-
#
|
|
49
|
-
# Removes the registration entry but does not affect cached Language objects.
|
|
50
|
-
#
|
|
51
|
-
# @param name [Symbol, String] language identifier to unregister
|
|
52
|
-
# @return [void]
|
|
53
|
-
# @example
|
|
54
|
-
# LanguageRegistry.unregister(:toml)
|
|
55
|
-
def unregister(name)
|
|
56
|
-
key = name.to_sym
|
|
57
60
|
@mutex.synchronize do
|
|
58
|
-
@registrations
|
|
61
|
+
@registrations[key] ||= {}
|
|
62
|
+
@registrations[key][backend_key] = config.compact
|
|
59
63
|
end
|
|
60
64
|
nil
|
|
61
65
|
end
|
|
62
66
|
|
|
63
|
-
# Fetch a
|
|
67
|
+
# Fetch registration entries for a language
|
|
64
68
|
#
|
|
65
|
-
# Returns
|
|
69
|
+
# Returns all backend-specific configurations for a language.
|
|
66
70
|
#
|
|
67
71
|
# @param name [Symbol, String] language identifier
|
|
68
|
-
# @
|
|
69
|
-
# @
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
# @param backend_type [Symbol, nil] optional backend type to filter by
|
|
73
|
+
# @return [Hash{Symbol => Hash}, Hash, nil] all backends or specific backend config
|
|
74
|
+
# @example Get all backends
|
|
75
|
+
# entries = LanguageRegistry.registered(:toml)
|
|
76
|
+
# # => {
|
|
77
|
+
# # tree_sitter: { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" },
|
|
78
|
+
# # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
79
|
+
# # }
|
|
80
|
+
# @example Get specific backend
|
|
81
|
+
# entry = LanguageRegistry.registered(:toml, :citrus)
|
|
82
|
+
# # => { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
83
|
+
def registered(name, backend_type = nil)
|
|
84
|
+
@mutex.synchronize do
|
|
85
|
+
lang_config = @registrations[name.to_sym]
|
|
86
|
+
return unless lang_config
|
|
75
87
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
# @example
|
|
83
|
-
# LanguageRegistry.clear_registrations!
|
|
84
|
-
def clear_registrations!
|
|
85
|
-
@mutex.synchronize { @registrations.clear }
|
|
86
|
-
nil
|
|
88
|
+
if backend_type
|
|
89
|
+
lang_config[backend_type.to_sym]
|
|
90
|
+
else
|
|
91
|
+
lang_config
|
|
92
|
+
end
|
|
93
|
+
end
|
|
87
94
|
end
|
|
88
95
|
|
|
89
96
|
# Fetch a cached language by key or compute and store it
|
|
@@ -119,21 +126,5 @@ module TreeHaver
|
|
|
119
126
|
@mutex.synchronize { @cache.clear }
|
|
120
127
|
nil
|
|
121
128
|
end
|
|
122
|
-
|
|
123
|
-
# Clear everything (registrations and cache)
|
|
124
|
-
#
|
|
125
|
-
# Removes all registered languages and all cached Language objects.
|
|
126
|
-
# Useful for complete teardown in tests.
|
|
127
|
-
#
|
|
128
|
-
# @return [void]
|
|
129
|
-
# @example
|
|
130
|
-
# LanguageRegistry.clear_all!
|
|
131
|
-
def clear_all!
|
|
132
|
-
@mutex.synchronize do
|
|
133
|
-
@registrations.clear
|
|
134
|
-
@cache.clear
|
|
135
|
-
end
|
|
136
|
-
nil
|
|
137
|
-
end
|
|
138
129
|
end
|
|
139
130
|
end
|
data/lib/tree_haver/node.rb
CHANGED
|
@@ -94,6 +94,8 @@ module TreeHaver
|
|
|
94
94
|
#
|
|
95
95
|
# @note This is the key to tree_haver's "write once, run anywhere" promise
|
|
96
96
|
class Node
|
|
97
|
+
include Comparable
|
|
98
|
+
|
|
97
99
|
# The wrapped backend-specific node object
|
|
98
100
|
#
|
|
99
101
|
# This provides direct access to the underlying backend node for advanced usage
|
|
@@ -226,6 +228,26 @@ module TreeHaver
|
|
|
226
228
|
end
|
|
227
229
|
end
|
|
228
230
|
|
|
231
|
+
# Check if the node is structural (non-terminal)
|
|
232
|
+
#
|
|
233
|
+
# In tree-sitter, this is equivalent to being a "named" node.
|
|
234
|
+
# Named nodes represent actual syntactic constructs (e.g., table, keyvalue, string)
|
|
235
|
+
# while anonymous nodes are syntax/punctuation (e.g., [, =, whitespace).
|
|
236
|
+
#
|
|
237
|
+
# For Citrus backends, this checks if the node is a non-terminal rule.
|
|
238
|
+
#
|
|
239
|
+
# @return [Boolean] true if this is a structural (non-terminal) node
|
|
240
|
+
def structural?
|
|
241
|
+
# Delegate to inner_node if it has its own structural? method (e.g., Citrus)
|
|
242
|
+
if @inner_node.respond_to?(:structural?)
|
|
243
|
+
@inner_node.structural?
|
|
244
|
+
else
|
|
245
|
+
# For tree-sitter backends, named? is equivalent to structural?
|
|
246
|
+
# Named nodes are syntactic constructs; anonymous nodes are punctuation
|
|
247
|
+
named?
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
229
251
|
# Get the number of children
|
|
230
252
|
# @return [Integer]
|
|
231
253
|
def child_count
|
|
@@ -242,6 +264,77 @@ module TreeHaver
|
|
|
242
264
|
Node.new(child_node, source: @source)
|
|
243
265
|
end
|
|
244
266
|
|
|
267
|
+
# Get a named child by index
|
|
268
|
+
#
|
|
269
|
+
# Returns the nth named child (skipping unnamed children).
|
|
270
|
+
# Uses backend's native named_child if available, otherwise provides fallback.
|
|
271
|
+
#
|
|
272
|
+
# @param index [Integer] Named child index (0-based)
|
|
273
|
+
# @return [Node, nil] Wrapped named child node, or nil if index out of bounds
|
|
274
|
+
def named_child(index)
|
|
275
|
+
# Try native implementation first
|
|
276
|
+
if @inner_node.respond_to?(:named_child)
|
|
277
|
+
child_node = @inner_node.named_child(index)
|
|
278
|
+
return if child_node.nil?
|
|
279
|
+
return Node.new(child_node, source: @source)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# Fallback: manually iterate through children and count named ones
|
|
283
|
+
named_count = 0
|
|
284
|
+
(0...child_count).each do |i|
|
|
285
|
+
child_node = @inner_node.child(i)
|
|
286
|
+
next if child_node.nil?
|
|
287
|
+
|
|
288
|
+
# Check if this child is named
|
|
289
|
+
is_named = if child_node.respond_to?(:named?)
|
|
290
|
+
child_node.named?
|
|
291
|
+
elsif child_node.respond_to?(:is_named?)
|
|
292
|
+
child_node.is_named?
|
|
293
|
+
else
|
|
294
|
+
true # Assume named if we can't determine
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
if is_named
|
|
298
|
+
return Node.new(child_node, source: @source) if named_count == index
|
|
299
|
+
named_count += 1
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
nil # Index out of bounds
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Get the count of named children
|
|
307
|
+
#
|
|
308
|
+
# Uses backend's native named_child_count if available, otherwise provides fallback.
|
|
309
|
+
#
|
|
310
|
+
# @return [Integer] Number of named children
|
|
311
|
+
def named_child_count
|
|
312
|
+
# Try native implementation first
|
|
313
|
+
if @inner_node.respond_to?(:named_child_count)
|
|
314
|
+
return @inner_node.named_child_count
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Fallback: count named children manually
|
|
318
|
+
count = 0
|
|
319
|
+
(0...child_count).each do |i|
|
|
320
|
+
child_node = @inner_node.child(i)
|
|
321
|
+
next if child_node.nil?
|
|
322
|
+
|
|
323
|
+
# Check if this child is named
|
|
324
|
+
is_named = if child_node.respond_to?(:named?)
|
|
325
|
+
child_node.named?
|
|
326
|
+
elsif child_node.respond_to?(:is_named?)
|
|
327
|
+
child_node.is_named?
|
|
328
|
+
else
|
|
329
|
+
true # Assume named if we can't determine
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
count += 1 if is_named
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
count
|
|
336
|
+
end
|
|
337
|
+
|
|
245
338
|
# Get all children as wrapped nodes
|
|
246
339
|
#
|
|
247
340
|
# @return [Array<Node>] Array of wrapped child nodes
|
|
@@ -325,6 +418,63 @@ module TreeHaver
|
|
|
325
418
|
text
|
|
326
419
|
end
|
|
327
420
|
|
|
421
|
+
# Compare nodes for ordering (used by Comparable module)
|
|
422
|
+
#
|
|
423
|
+
# Nodes are ordered by their position in the source:
|
|
424
|
+
# 1. First by start_byte (earlier nodes come first)
|
|
425
|
+
# 2. Then by end_byte for tie-breaking (shorter spans come first)
|
|
426
|
+
# 3. Then by type for deterministic ordering
|
|
427
|
+
#
|
|
428
|
+
# This allows nodes to be sorted by position and used in sorted collections.
|
|
429
|
+
# The Comparable module provides <, <=, ==, >=, >, and between? based on this.
|
|
430
|
+
#
|
|
431
|
+
# @param other [Node] node to compare with
|
|
432
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
433
|
+
def <=>(other)
|
|
434
|
+
return unless other.is_a?(Node)
|
|
435
|
+
|
|
436
|
+
# Compare by position first (start_byte, then end_byte)
|
|
437
|
+
cmp = start_byte <=> other.start_byte
|
|
438
|
+
return cmp unless cmp.zero?
|
|
439
|
+
|
|
440
|
+
cmp = end_byte <=> other.end_byte
|
|
441
|
+
return cmp unless cmp.zero?
|
|
442
|
+
|
|
443
|
+
# For nodes at the same position with same span, compare by type
|
|
444
|
+
type <=> other.type
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Check equality based on inner_node identity
|
|
448
|
+
#
|
|
449
|
+
# Two nodes are equal if they wrap the same backend node object.
|
|
450
|
+
# This is separate from the <=> comparison which orders by position.
|
|
451
|
+
# Nodes at the same position but wrapping different backend nodes are
|
|
452
|
+
# equal according to <=> (positional equality) but not equal according to == (identity equality).
|
|
453
|
+
#
|
|
454
|
+
# Note: We override Comparable's default == behavior to check inner_node identity
|
|
455
|
+
# rather than just relying on <=> returning 0, because we want identity-based
|
|
456
|
+
# equality for testing and collection membership, not position-based equality.
|
|
457
|
+
#
|
|
458
|
+
# @param other [Object] object to compare with
|
|
459
|
+
# @return [Boolean] true if both nodes wrap the same inner_node
|
|
460
|
+
def ==(other)
|
|
461
|
+
return false unless other.is_a?(Node)
|
|
462
|
+
@inner_node == other.inner_node
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
# Alias for == to support both styles
|
|
466
|
+
alias_method :eql?, :==
|
|
467
|
+
|
|
468
|
+
# Generate hash value for this node
|
|
469
|
+
#
|
|
470
|
+
# Uses the hash of the inner_node to ensure nodes wrapping the same
|
|
471
|
+
# backend node have the same hash value.
|
|
472
|
+
#
|
|
473
|
+
# @return [Integer] hash value
|
|
474
|
+
def hash
|
|
475
|
+
@inner_node.hash
|
|
476
|
+
end
|
|
477
|
+
|
|
328
478
|
# Check if node responds to a method (includes delegation to inner_node)
|
|
329
479
|
#
|
|
330
480
|
# @param method_name [Symbol] method to check
|
|
@@ -60,7 +60,7 @@ module TreeHaver
|
|
|
60
60
|
# Pattern for valid symbol names (C identifier format)
|
|
61
61
|
VALID_SYMBOL_PATTERN = /\A[a-zA-Z_][a-zA-Z0-9_]*\z/
|
|
62
62
|
|
|
63
|
-
@custom_trusted_directories = []
|
|
63
|
+
@custom_trusted_directories = [] # rubocop:disable ThreadSafety/MutableClassInstanceVariable
|
|
64
64
|
@mutex = Mutex.new
|
|
65
65
|
|
|
66
66
|
module_function
|
|
@@ -75,18 +75,15 @@ module TreeHaver
|
|
|
75
75
|
@mutex.synchronize { dirs.concat(@custom_trusted_directories) }
|
|
76
76
|
|
|
77
77
|
# Add directories from environment variable
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
dirs << expanded if expanded.start_with?("/")
|
|
88
|
-
# :nocov:
|
|
89
|
-
end
|
|
78
|
+
ENV[TRUSTED_DIRS_ENV_VAR]&.split(",")&.each do |dir|
|
|
79
|
+
expanded = File.expand_path(dir.strip)
|
|
80
|
+
# :nocov:
|
|
81
|
+
# File.expand_path always returns absolute paths on Unix/macOS.
|
|
82
|
+
# This guard exists for defensive programming on exotic platforms
|
|
83
|
+
# where expand_path might behave differently, but cannot be tested
|
|
84
|
+
# in standard CI environments.
|
|
85
|
+
dirs << expanded if expanded.start_with?("/")
|
|
86
|
+
# :nocov:
|
|
90
87
|
end
|
|
91
88
|
|
|
92
89
|
dirs.uniq
|
|
@@ -212,21 +209,29 @@ module TreeHaver
|
|
|
212
209
|
return false if path.nil?
|
|
213
210
|
|
|
214
211
|
# Resolve the real path to handle symlinks
|
|
215
|
-
check_path =
|
|
216
|
-
|
|
217
|
-
rescue Errno::ENOENT
|
|
218
|
-
# File doesn't exist yet, check the directory
|
|
219
|
-
dir = File.dirname(path)
|
|
220
|
-
begin
|
|
221
|
-
File.realpath(dir)
|
|
222
|
-
rescue Errno::ENOENT
|
|
223
|
-
return false
|
|
224
|
-
end
|
|
225
|
-
end
|
|
212
|
+
check_path = resolve_check_path(path)
|
|
213
|
+
return false if check_path.nil?
|
|
226
214
|
|
|
227
215
|
trusted_directories.any? { |trusted| check_path.start_with?(trusted) }
|
|
228
216
|
end
|
|
229
217
|
|
|
218
|
+
# Resolve a path to its real path for trust checking
|
|
219
|
+
#
|
|
220
|
+
# @param path [String] the path to resolve
|
|
221
|
+
# @return [String, nil] the resolved path or nil if unresolvable
|
|
222
|
+
# @api private
|
|
223
|
+
def resolve_check_path(path)
|
|
224
|
+
File.realpath(path)
|
|
225
|
+
rescue Errno::ENOENT
|
|
226
|
+
# File doesn't exist yet, check the directory
|
|
227
|
+
dir = File.dirname(path)
|
|
228
|
+
begin
|
|
229
|
+
File.realpath(dir)
|
|
230
|
+
rescue Errno::ENOENT
|
|
231
|
+
nil
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
230
235
|
# Validate a language name is safe
|
|
231
236
|
#
|
|
232
237
|
# Language names are used to construct:
|
data/lib/tree_haver/tree.rb
CHANGED
|
@@ -6,6 +6,26 @@ module TreeHaver
|
|
|
6
6
|
# This class wraps backend-specific tree objects and provides a unified interface.
|
|
7
7
|
# It stores the source text to enable text extraction from nodes.
|
|
8
8
|
#
|
|
9
|
+
# == Wrapping/Unwrapping Contract
|
|
10
|
+
#
|
|
11
|
+
# TreeHaver follows a consistent pattern for object wrapping:
|
|
12
|
+
#
|
|
13
|
+
# 1. **TreeHaver::Parser** (top level) handles ALL wrapping/unwrapping
|
|
14
|
+
# 2. **Backends** work exclusively with raw backend objects
|
|
15
|
+
# 3. **User-facing API** uses only TreeHaver wrapper classes
|
|
16
|
+
#
|
|
17
|
+
# Specifically for trees:
|
|
18
|
+
# - Backend Parser#parse returns raw backend tree (TreeSitter::Tree, TreeStump::Tree, etc.)
|
|
19
|
+
# - TreeHaver::Parser#parse wraps it in TreeHaver::Tree
|
|
20
|
+
# - TreeHaver::Parser#parse_string unwraps old_tree before passing to backend
|
|
21
|
+
# - Backend Parser#parse_string receives raw backend tree, returns raw backend tree
|
|
22
|
+
# - TreeHaver::Parser#parse_string wraps the returned tree
|
|
23
|
+
#
|
|
24
|
+
# This ensures:
|
|
25
|
+
# - Backends are simple and consistent
|
|
26
|
+
# - All complexity is in one place (TreeHaver top level)
|
|
27
|
+
# - Users always work with TreeHaver wrapper classes
|
|
28
|
+
#
|
|
9
29
|
# @example Basic usage
|
|
10
30
|
# parser = TreeHaver::Parser.new
|
|
11
31
|
# parser.language = TreeHaver::Language.toml
|
|
@@ -107,14 +127,30 @@ module TreeHaver
|
|
|
107
127
|
# # Re-parse with the edited tree for incremental parsing
|
|
108
128
|
# new_tree = parser.parse_string(tree, "x = 42")
|
|
109
129
|
def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
130
|
+
# MRI backend (ruby_tree_sitter) requires an InputEdit object
|
|
131
|
+
if defined?(::TreeSitter::InputEdit) && @inner_tree.is_a?(::TreeSitter::Tree)
|
|
132
|
+
input_edit = ::TreeSitter::InputEdit.new
|
|
133
|
+
input_edit.start_byte = start_byte
|
|
134
|
+
input_edit.old_end_byte = old_end_byte
|
|
135
|
+
input_edit.new_end_byte = new_end_byte
|
|
136
|
+
|
|
137
|
+
# Convert hash points to Point objects if needed
|
|
138
|
+
input_edit.start_point = make_point(start_point)
|
|
139
|
+
input_edit.old_end_point = make_point(old_end_point)
|
|
140
|
+
input_edit.new_end_point = make_point(new_end_point)
|
|
141
|
+
|
|
142
|
+
@inner_tree.edit(input_edit)
|
|
143
|
+
else
|
|
144
|
+
# Other backends may accept keyword arguments directly
|
|
145
|
+
@inner_tree.edit(
|
|
146
|
+
start_byte: start_byte,
|
|
147
|
+
old_end_byte: old_end_byte,
|
|
148
|
+
new_end_byte: new_end_byte,
|
|
149
|
+
start_point: start_point,
|
|
150
|
+
old_end_point: old_end_point,
|
|
151
|
+
new_end_point: new_end_point,
|
|
152
|
+
)
|
|
153
|
+
end
|
|
118
154
|
rescue NoMethodError => e
|
|
119
155
|
# Re-raise as NotAvailable if it's about the edit method
|
|
120
156
|
raise unless e.name == :edit || e.message.include?("edit")
|
|
@@ -123,6 +159,23 @@ module TreeHaver
|
|
|
123
159
|
"Use MRI (ruby_tree_sitter), Rust (tree_stump), or Java (java-tree-sitter) backend."
|
|
124
160
|
end
|
|
125
161
|
|
|
162
|
+
private
|
|
163
|
+
|
|
164
|
+
# Convert a point hash to a TreeSitter::Point if available
|
|
165
|
+
# @api private
|
|
166
|
+
def make_point(point_hash)
|
|
167
|
+
if defined?(::TreeSitter::Point)
|
|
168
|
+
pt = ::TreeSitter::Point.new
|
|
169
|
+
pt.row = point_hash[:row]
|
|
170
|
+
pt.column = point_hash[:column]
|
|
171
|
+
pt
|
|
172
|
+
else
|
|
173
|
+
point_hash
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
public
|
|
178
|
+
|
|
126
179
|
# Check if the current backend supports incremental parsing
|
|
127
180
|
#
|
|
128
181
|
# Incremental parsing allows tree-sitter to reuse unchanged nodes when
|
|
@@ -151,7 +204,8 @@ module TreeHaver
|
|
|
151
204
|
# String representation
|
|
152
205
|
# @return [String]
|
|
153
206
|
def inspect
|
|
154
|
-
|
|
207
|
+
inner_class = @inner_tree ? @inner_tree.class.name : "nil"
|
|
208
|
+
"#<#{self.class} source_length=#{@source&.bytesize || "unknown"} inner_tree=#{inner_class}>"
|
|
155
209
|
end
|
|
156
210
|
|
|
157
211
|
# Check if tree responds to a method (includes delegation to inner_tree)
|
data/lib/tree_haver/version.rb
CHANGED
|
@@ -9,8 +9,8 @@ module TreeHaver
|
|
|
9
9
|
module Version
|
|
10
10
|
# Current version of the tree_haver gem
|
|
11
11
|
#
|
|
12
|
-
# @return [String] the version string (e.g., "
|
|
13
|
-
VERSION = "
|
|
12
|
+
# @return [String] the version string (e.g., "3.0.0")
|
|
13
|
+
VERSION = "3.0.0"
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
# Traditional location for VERSION constant
|