tree_haver 3.0.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +152 -1
- data/CONTRIBUTING.md +46 -14
- data/README.md +425 -102
- data/lib/tree_haver/backends/citrus.rb +39 -0
- data/lib/tree_haver/backends/commonmarker.rb +491 -0
- data/lib/tree_haver/backends/ffi.rb +66 -23
- data/lib/tree_haver/backends/java.rb +3 -2
- data/lib/tree_haver/backends/markly.rb +560 -0
- data/lib/tree_haver/backends/mri.rb +41 -12
- data/lib/tree_haver/backends/prism.rb +625 -0
- data/lib/tree_haver/backends/psych.rb +622 -0
- data/lib/tree_haver/backends/rust.rb +2 -2
- data/lib/tree_haver/grammar_finder.rb +98 -7
- data/lib/tree_haver/language_registry.rb +6 -0
- data/lib/tree_haver/node.rb +73 -40
- data/lib/tree_haver/point.rb +65 -0
- data/lib/tree_haver/rspec/dependency_tags.rb +697 -0
- data/lib/tree_haver/rspec.rb +23 -0
- data/lib/tree_haver/tree.rb +1 -1
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +251 -24
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +32 -5
- metadata.gz.sig +0 -0
|
@@ -135,17 +135,49 @@ module TreeHaver
|
|
|
135
135
|
# 3. Common system installation paths
|
|
136
136
|
#
|
|
137
137
|
# @note Paths from ENV are validated using {PathValidator.safe_library_path?}
|
|
138
|
-
# to prevent path traversal and other attacks. Invalid ENV paths
|
|
138
|
+
# to prevent path traversal and other attacks. Invalid ENV paths cause
|
|
139
|
+
# an error to be raised (Principle of Least Surprise - explicit paths must work).
|
|
140
|
+
#
|
|
141
|
+
# @note Setting the ENV variable to an empty string explicitly disables
|
|
142
|
+
# this grammar. This allows fallback to alternative backends (e.g., Citrus).
|
|
139
143
|
#
|
|
140
144
|
# @return [String, nil] the path to the library, or nil if not found
|
|
145
|
+
# @raise [TreeHaver::NotAvailable] if ENV variable is set to an invalid path
|
|
141
146
|
# @see #find_library_path_safe For stricter validation (trusted directories only)
|
|
142
147
|
def find_library_path
|
|
143
148
|
# Check environment variable first (highest priority)
|
|
144
|
-
|
|
145
|
-
if
|
|
149
|
+
# Use key? to distinguish between "not set" and "set to empty"
|
|
150
|
+
if ENV.key?(env_var_name)
|
|
151
|
+
env_path = ENV[env_var_name]
|
|
152
|
+
|
|
153
|
+
# :nocov: defensive - ENV.key? true with nil value is rare edge case
|
|
154
|
+
if env_path.nil?
|
|
155
|
+
@env_rejection_reason = "explicitly disabled (set to nil)"
|
|
156
|
+
return
|
|
157
|
+
end
|
|
158
|
+
# :nocov:
|
|
159
|
+
|
|
160
|
+
# Empty string means "explicitly skip this grammar"
|
|
161
|
+
# This allows users to disable tree-sitter for specific languages
|
|
162
|
+
# and fall back to alternative backends like Citrus
|
|
163
|
+
if env_path.empty?
|
|
164
|
+
@env_rejection_reason = "explicitly disabled (set to empty string)"
|
|
165
|
+
return
|
|
166
|
+
end
|
|
167
|
+
|
|
146
168
|
# Store why env path was rejected for better error messages
|
|
147
169
|
@env_rejection_reason = validate_env_path(env_path)
|
|
148
|
-
|
|
170
|
+
|
|
171
|
+
# Principle of Least Surprise: If user explicitly sets an ENV variable
|
|
172
|
+
# to a path, that path MUST work. Don't silently fall back to auto-discovery.
|
|
173
|
+
if @env_rejection_reason
|
|
174
|
+
raise TreeHaver::NotAvailable,
|
|
175
|
+
"#{env_var_name} is set to #{env_path.inspect} but #{@env_rejection_reason}. " \
|
|
176
|
+
"Either fix the path, unset the variable to use auto-discovery, " \
|
|
177
|
+
"or set it to empty string to explicitly disable this grammar."
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
return env_path
|
|
149
181
|
end
|
|
150
182
|
|
|
151
183
|
# Search all paths (these are constructed from trusted base dirs)
|
|
@@ -188,11 +220,67 @@ module TreeHaver
|
|
|
188
220
|
end
|
|
189
221
|
end
|
|
190
222
|
|
|
191
|
-
# Check if the grammar library is available
|
|
223
|
+
# Check if the grammar library is available AND usable
|
|
192
224
|
#
|
|
193
|
-
#
|
|
225
|
+
# This checks:
|
|
226
|
+
# 1. The grammar library file exists
|
|
227
|
+
# 2. The tree-sitter runtime is functional (can create a parser)
|
|
228
|
+
#
|
|
229
|
+
# This prevents registering grammars when tree-sitter isn't actually usable,
|
|
230
|
+
# allowing clean fallback to alternative backends like Citrus.
|
|
231
|
+
#
|
|
232
|
+
# @return [Boolean] true if the library can be found AND tree-sitter runtime works
|
|
194
233
|
def available?
|
|
195
|
-
|
|
234
|
+
path = find_library_path
|
|
235
|
+
return false if path.nil?
|
|
236
|
+
|
|
237
|
+
# Check if tree-sitter runtime is actually functional
|
|
238
|
+
# This is cached at the class level since it's the same for all grammars
|
|
239
|
+
self.class.tree_sitter_runtime_usable?
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Backends that use tree-sitter (require native runtime libraries)
|
|
243
|
+
# Other backends (Citrus, Prism, Psych, etc.) don't use tree-sitter
|
|
244
|
+
TREE_SITTER_BACKENDS = [
|
|
245
|
+
TreeHaver::Backends::MRI,
|
|
246
|
+
TreeHaver::Backends::FFI,
|
|
247
|
+
TreeHaver::Backends::Rust,
|
|
248
|
+
TreeHaver::Backends::Java,
|
|
249
|
+
].freeze
|
|
250
|
+
|
|
251
|
+
class << self
|
|
252
|
+
# Check if the tree-sitter runtime is usable
|
|
253
|
+
#
|
|
254
|
+
# Tests whether we can actually create a tree-sitter parser.
|
|
255
|
+
# Result is cached since this is expensive and won't change during runtime.
|
|
256
|
+
#
|
|
257
|
+
# @return [Boolean] true if tree-sitter runtime is functional
|
|
258
|
+
def tree_sitter_runtime_usable?
|
|
259
|
+
return @tree_sitter_runtime_usable if defined?(@tree_sitter_runtime_usable)
|
|
260
|
+
|
|
261
|
+
@tree_sitter_runtime_usable = begin
|
|
262
|
+
# Try to create a parser using the current backend
|
|
263
|
+
mod = TreeHaver.resolve_backend_module(nil)
|
|
264
|
+
|
|
265
|
+
# Only tree-sitter backends are relevant here
|
|
266
|
+
# Non-tree-sitter backends (Citrus, Prism, Psych, etc.) don't use grammar files
|
|
267
|
+
return false if mod.nil?
|
|
268
|
+
return false unless TREE_SITTER_BACKENDS.include?(mod)
|
|
269
|
+
|
|
270
|
+
# Try to instantiate a parser - this will fail if runtime isn't available
|
|
271
|
+
mod::Parser.new
|
|
272
|
+
true
|
|
273
|
+
rescue NoMethodError, FFI::NotFoundError, LoadError, NotAvailable => _e
|
|
274
|
+
false
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Reset the cached tree-sitter runtime check (for testing)
|
|
279
|
+
#
|
|
280
|
+
# @api private
|
|
281
|
+
def reset_runtime_check!
|
|
282
|
+
remove_instance_variable(:@tree_sitter_runtime_usable) if defined?(@tree_sitter_runtime_usable)
|
|
283
|
+
end
|
|
196
284
|
end
|
|
197
285
|
|
|
198
286
|
# Check if the grammar library is available in a trusted directory
|
|
@@ -252,6 +340,9 @@ module TreeHaver
|
|
|
252
340
|
env_value = ENV[env_var_name]
|
|
253
341
|
msg += if env_value && @env_rejection_reason
|
|
254
342
|
" #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
|
|
343
|
+
elsif env_value && File.exist?(env_value) && !self.class.tree_sitter_runtime_usable?
|
|
344
|
+
" #{env_var_name} is set and file exists, but no tree-sitter runtime is available. " \
|
|
345
|
+
"Add ruby_tree_sitter, ffi, or tree_stump gem to your Gemfile."
|
|
255
346
|
elsif env_value
|
|
256
347
|
" #{env_var_name} is set but was not used (file may have been removed)."
|
|
257
348
|
else
|
|
@@ -11,20 +11,26 @@ module TreeHaver
|
|
|
11
11
|
# switching, benchmarking, and fallback scenarios.
|
|
12
12
|
#
|
|
13
13
|
# Registration structure:
|
|
14
|
+
# ```ruby
|
|
14
15
|
# @registrations = {
|
|
15
16
|
# toml: {
|
|
16
17
|
# tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
|
|
17
18
|
# citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
|
|
18
19
|
# }
|
|
19
20
|
# }
|
|
21
|
+
# ```
|
|
20
22
|
#
|
|
21
23
|
# @example Register tree-sitter grammar
|
|
24
|
+
# ```ruby
|
|
22
25
|
# TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
|
|
23
26
|
# path: "/path/to/lib.so", symbol: "tree_sitter_toml")
|
|
27
|
+
# ```
|
|
24
28
|
#
|
|
25
29
|
# @example Register Citrus grammar
|
|
30
|
+
# ```ruby
|
|
26
31
|
# TreeHaver::LanguageRegistry.register(:toml, :citrus,
|
|
27
32
|
# grammar_module: TomlRB::Document, gem_name: "toml-rb")
|
|
33
|
+
# ```
|
|
28
34
|
#
|
|
29
35
|
# @api private
|
|
30
36
|
module LanguageRegistry
|
data/lib/tree_haver/node.rb
CHANGED
|
@@ -1,40 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module TreeHaver
|
|
4
|
-
# Point class that works as both a Hash and an object with row/column accessors
|
|
5
|
-
#
|
|
6
|
-
# This provides compatibility with code expecting either:
|
|
7
|
-
# - Hash access: point[:row], point[:column]
|
|
8
|
-
# - Method access: point.row, point.column
|
|
9
|
-
class Point
|
|
10
|
-
attr_reader :row, :column
|
|
11
|
-
|
|
12
|
-
def initialize(row, column)
|
|
13
|
-
@row = row
|
|
14
|
-
@column = column
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
# Hash-like access for compatibility
|
|
18
|
-
def [](key)
|
|
19
|
-
case key
|
|
20
|
-
when :row, "row" then @row
|
|
21
|
-
when :column, "column" then @column
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def to_h
|
|
26
|
-
{row: @row, column: @column}
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def to_s
|
|
30
|
-
"(#{@row}, #{@column})"
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def inspect
|
|
34
|
-
"#<TreeHaver::Point row=#{@row} column=#{@column}>"
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
|
|
38
4
|
# Unified Node wrapper providing a consistent API across all backends
|
|
39
5
|
#
|
|
40
6
|
# This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
|
|
@@ -95,6 +61,7 @@ module TreeHaver
|
|
|
95
61
|
# @note This is the key to tree_haver's "write once, run anywhere" promise
|
|
96
62
|
class Node
|
|
97
63
|
include Comparable
|
|
64
|
+
include Enumerable
|
|
98
65
|
|
|
99
66
|
# The wrapped backend-specific node object
|
|
100
67
|
#
|
|
@@ -165,10 +132,20 @@ module TreeHaver
|
|
|
165
132
|
def start_point
|
|
166
133
|
if @inner_node.respond_to?(:start_point)
|
|
167
134
|
point = @inner_node.start_point
|
|
168
|
-
Point
|
|
135
|
+
# Handle both Point objects and hashes
|
|
136
|
+
if point.is_a?(Hash)
|
|
137
|
+
Point.new(point[:row], point[:column])
|
|
138
|
+
else
|
|
139
|
+
Point.new(point.row, point.column)
|
|
140
|
+
end
|
|
169
141
|
elsif @inner_node.respond_to?(:start_position)
|
|
170
142
|
point = @inner_node.start_position
|
|
171
|
-
Point
|
|
143
|
+
# Handle both Point objects and hashes
|
|
144
|
+
if point.is_a?(Hash)
|
|
145
|
+
Point.new(point[:row], point[:column])
|
|
146
|
+
else
|
|
147
|
+
Point.new(point.row, point.column)
|
|
148
|
+
end
|
|
172
149
|
else
|
|
173
150
|
raise TreeHaver::Error, "Backend node does not support start_point/start_position"
|
|
174
151
|
end
|
|
@@ -180,15 +157,71 @@ module TreeHaver
|
|
|
180
157
|
def end_point
|
|
181
158
|
if @inner_node.respond_to?(:end_point)
|
|
182
159
|
point = @inner_node.end_point
|
|
183
|
-
Point
|
|
160
|
+
# Handle both Point objects and hashes
|
|
161
|
+
if point.is_a?(Hash)
|
|
162
|
+
Point.new(point[:row], point[:column])
|
|
163
|
+
else
|
|
164
|
+
Point.new(point.row, point.column)
|
|
165
|
+
end
|
|
184
166
|
elsif @inner_node.respond_to?(:end_position)
|
|
185
167
|
point = @inner_node.end_position
|
|
186
|
-
Point
|
|
168
|
+
# Handle both Point objects and hashes
|
|
169
|
+
if point.is_a?(Hash)
|
|
170
|
+
Point.new(point[:row], point[:column])
|
|
171
|
+
else
|
|
172
|
+
Point.new(point.row, point.column)
|
|
173
|
+
end
|
|
187
174
|
else
|
|
188
175
|
raise TreeHaver::Error, "Backend node does not support end_point/end_position"
|
|
189
176
|
end
|
|
190
177
|
end
|
|
191
178
|
|
|
179
|
+
# Get the 1-based line number where this node starts
|
|
180
|
+
#
|
|
181
|
+
# Convenience method that converts 0-based row to 1-based line number.
|
|
182
|
+
# This is useful for error messages and matching with editor line numbers.
|
|
183
|
+
#
|
|
184
|
+
# @return [Integer] 1-based line number
|
|
185
|
+
def start_line
|
|
186
|
+
start_point.row + 1
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Get the 1-based line number where this node ends
|
|
190
|
+
#
|
|
191
|
+
# Convenience method that converts 0-based row to 1-based line number.
|
|
192
|
+
#
|
|
193
|
+
# @return [Integer] 1-based line number
|
|
194
|
+
def end_line
|
|
195
|
+
end_point.row + 1
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Get position information as a hash
|
|
199
|
+
#
|
|
200
|
+
# Returns a hash with 1-based line numbers and 0-based columns.
|
|
201
|
+
# This format is compatible with *-merge gems' FileAnalysisBase.
|
|
202
|
+
#
|
|
203
|
+
# @return [Hash{Symbol => Integer}] Position hash
|
|
204
|
+
# @example
|
|
205
|
+
# node.source_position
|
|
206
|
+
# # => { start_line: 1, end_line: 3, start_column: 0, end_column: 10 }
|
|
207
|
+
def source_position
|
|
208
|
+
{
|
|
209
|
+
start_line: start_line,
|
|
210
|
+
end_line: end_line,
|
|
211
|
+
start_column: start_point.column,
|
|
212
|
+
end_column: end_point.column,
|
|
213
|
+
}
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Get the first child node
|
|
217
|
+
#
|
|
218
|
+
# Convenience method for iteration patterns that expect first_child.
|
|
219
|
+
#
|
|
220
|
+
# @return [Node, nil] First child node or nil if no children
|
|
221
|
+
def first_child
|
|
222
|
+
child(0)
|
|
223
|
+
end
|
|
224
|
+
|
|
192
225
|
# Get the node's text content
|
|
193
226
|
#
|
|
194
227
|
# @return [String]
|
|
@@ -435,10 +468,10 @@ module TreeHaver
|
|
|
435
468
|
|
|
436
469
|
# Compare by position first (start_byte, then end_byte)
|
|
437
470
|
cmp = start_byte <=> other.start_byte
|
|
438
|
-
return cmp
|
|
471
|
+
return cmp if cmp.nonzero?
|
|
439
472
|
|
|
440
473
|
cmp = end_byte <=> other.end_byte
|
|
441
|
-
return cmp
|
|
474
|
+
return cmp if cmp.nonzero?
|
|
442
475
|
|
|
443
476
|
# For nodes at the same position with same span, compare by type
|
|
444
477
|
type <=> other.type
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Point class that works as both a Hash and an object with row/column accessors
|
|
5
|
+
#
|
|
6
|
+
# This provides compatibility with code expecting either:
|
|
7
|
+
# - Hash access: point[:row], point[:column]
|
|
8
|
+
# - Method access: point.row, point.column
|
|
9
|
+
#
|
|
10
|
+
# @example Method access
|
|
11
|
+
# point = TreeHaver::Point.new(5, 10)
|
|
12
|
+
# point.row # => 5
|
|
13
|
+
# point.column # => 10
|
|
14
|
+
#
|
|
15
|
+
# @example Hash-like access
|
|
16
|
+
# point[:row] # => 5
|
|
17
|
+
# point[:column] # => 10
|
|
18
|
+
#
|
|
19
|
+
# @example Converting to hash
|
|
20
|
+
# point.to_h # => {row: 5, column: 10}
|
|
21
|
+
class Point
|
|
22
|
+
attr_reader :row, :column
|
|
23
|
+
|
|
24
|
+
# Create a new Point
|
|
25
|
+
#
|
|
26
|
+
# @param row [Integer] the row (line) number, 0-indexed
|
|
27
|
+
# @param column [Integer] the column number, 0-indexed
|
|
28
|
+
def initialize(row, column)
|
|
29
|
+
@row = row
|
|
30
|
+
@column = column
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Hash-like access for compatibility
|
|
34
|
+
#
|
|
35
|
+
# @param key [Symbol, String] :row or :column
|
|
36
|
+
# @return [Integer, nil] the value or nil if key not recognized
|
|
37
|
+
def [](key)
|
|
38
|
+
case key
|
|
39
|
+
when :row, "row" then @row
|
|
40
|
+
when :column, "column" then @column
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Convert to a hash
|
|
45
|
+
#
|
|
46
|
+
# @return [Hash{Symbol => Integer}]
|
|
47
|
+
def to_h
|
|
48
|
+
{row: @row, column: @column}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# String representation
|
|
52
|
+
#
|
|
53
|
+
# @return [String]
|
|
54
|
+
def to_s
|
|
55
|
+
"(#{@row}, #{@column})"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Inspect representation
|
|
59
|
+
#
|
|
60
|
+
# @return [String]
|
|
61
|
+
def inspect
|
|
62
|
+
"#<TreeHaver::Point row=#{@row} column=#{@column}>"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|