tree_haver 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/CHANGELOG.md +96 -1
- data/CONTRIBUTING.md +46 -14
- data/README.md +248 -86
- data/lib/tree_haver/backends/citrus.rb +36 -0
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +15 -13
- data/lib/tree_haver/backends/java.rb +1 -1
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +41 -12
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +1 -1
- data/lib/tree_haver/grammar_finder.rb +74 -5
- data/lib/tree_haver/node.rb +72 -6
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +143 -24
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +8 -4
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
module Backends
|
|
5
|
+
# Psych backend using Ruby's built-in YAML parser
|
|
6
|
+
#
|
|
7
|
+
# This backend wraps Psych, Ruby's standard library YAML parser.
|
|
8
|
+
# Psych provides AST access via Psych.parse_stream which returns
|
|
9
|
+
# Psych::Nodes::* objects (Stream, Document, Mapping, Sequence, Scalar, Alias).
|
|
10
|
+
#
|
|
11
|
+
# @note This backend only parses YAML source code
|
|
12
|
+
# @see https://ruby-doc.org/stdlib/libdoc/psych/rdoc/Psych.html Psych documentation
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage
|
|
15
|
+
# parser = TreeHaver::Parser.new
|
|
16
|
+
# parser.language = TreeHaver::Backends::Psych::Language.yaml
|
|
17
|
+
# tree = parser.parse(yaml_source)
|
|
18
|
+
# root = tree.root_node
|
|
19
|
+
# puts root.type # => "stream"
|
|
20
|
+
module Psych
|
|
21
|
+
@load_attempted = false
|
|
22
|
+
@loaded = false
|
|
23
|
+
|
|
24
|
+
# Check if the Psych backend is available
|
|
25
|
+
#
|
|
26
|
+
# Psych is part of Ruby stdlib, so it should always be available.
|
|
27
|
+
#
|
|
28
|
+
# @return [Boolean] true if psych is available
|
|
29
|
+
class << self
|
|
30
|
+
def available?
|
|
31
|
+
return @loaded if @load_attempted
|
|
32
|
+
@load_attempted = true
|
|
33
|
+
begin
|
|
34
|
+
require "psych"
|
|
35
|
+
@loaded = true
|
|
36
|
+
rescue LoadError
|
|
37
|
+
@loaded = false
|
|
38
|
+
end
|
|
39
|
+
@loaded
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Reset the load state (primarily for testing)
|
|
43
|
+
#
|
|
44
|
+
# @return [void]
|
|
45
|
+
# @api private
|
|
46
|
+
def reset!
|
|
47
|
+
@load_attempted = false
|
|
48
|
+
@loaded = false
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Get capabilities supported by this backend
|
|
52
|
+
#
|
|
53
|
+
# @return [Hash{Symbol => Object}] capability map
|
|
54
|
+
def capabilities
|
|
55
|
+
return {} unless available?
|
|
56
|
+
{
|
|
57
|
+
backend: :psych,
|
|
58
|
+
query: false, # Psych doesn't have tree-sitter-style queries
|
|
59
|
+
bytes_field: false, # Psych uses line/column, not byte offsets
|
|
60
|
+
incremental: false, # Psych doesn't support incremental parsing
|
|
61
|
+
pure_ruby: false, # Psych has native libyaml C extension
|
|
62
|
+
yaml_only: true, # Psych only parses YAML
|
|
63
|
+
error_tolerant: false, # Psych raises on syntax errors
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Psych language wrapper
|
|
69
|
+
#
|
|
70
|
+
# Unlike tree-sitter which supports many languages via grammar files,
|
|
71
|
+
# Psych only parses YAML. This class exists for API compatibility with
|
|
72
|
+
# other tree_haver backends.
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# language = TreeHaver::Backends::Psych::Language.yaml
|
|
76
|
+
# parser.language = language
|
|
77
|
+
class Language
|
|
78
|
+
include Comparable
|
|
79
|
+
|
|
80
|
+
# The language name (always :yaml for Psych)
|
|
81
|
+
# @return [Symbol]
|
|
82
|
+
attr_reader :name
|
|
83
|
+
|
|
84
|
+
# The backend this language is for
|
|
85
|
+
# @return [Symbol]
|
|
86
|
+
attr_reader :backend
|
|
87
|
+
|
|
88
|
+
# Create a new Psych language instance
|
|
89
|
+
#
|
|
90
|
+
# @param name [Symbol] Language name (should be :yaml)
|
|
91
|
+
def initialize(name = :yaml)
|
|
92
|
+
@name = name.to_sym
|
|
93
|
+
@backend = :psych
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
class << self
|
|
97
|
+
# Create a YAML language instance
|
|
98
|
+
#
|
|
99
|
+
# @return [Language] YAML language
|
|
100
|
+
def yaml
|
|
101
|
+
new(:yaml)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Comparison for sorting/equality
|
|
106
|
+
#
|
|
107
|
+
# @param other [Language] other language
|
|
108
|
+
# @return [Integer, nil] comparison result
|
|
109
|
+
def <=>(other)
|
|
110
|
+
return unless other.is_a?(Language)
|
|
111
|
+
name <=> other.name
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# @return [String] human-readable representation
|
|
115
|
+
def inspect
|
|
116
|
+
"#<TreeHaver::Backends::Psych::Language name=#{name}>"
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Psych parser wrapper
|
|
121
|
+
#
|
|
122
|
+
# Wraps Psych.parse_stream to provide TreeHaver-compatible parsing.
|
|
123
|
+
#
|
|
124
|
+
# @example
|
|
125
|
+
# parser = TreeHaver::Backends::Psych::Parser.new
|
|
126
|
+
# parser.language = Language.yaml
|
|
127
|
+
# tree = parser.parse(yaml_source)
|
|
128
|
+
class Parser
|
|
129
|
+
# @return [Language, nil] The language to parse
|
|
130
|
+
attr_accessor :language
|
|
131
|
+
|
|
132
|
+
# Create a new Psych parser
|
|
133
|
+
def initialize
|
|
134
|
+
@language = nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Parse YAML source code
|
|
138
|
+
#
|
|
139
|
+
# @param source [String] YAML source to parse
|
|
140
|
+
# @return [Tree] Parsed tree
|
|
141
|
+
# @raise [::Psych::SyntaxError] on syntax errors
|
|
142
|
+
def parse(source)
|
|
143
|
+
raise "Language not set" unless @language
|
|
144
|
+
Psych.available? or raise "Psych not available"
|
|
145
|
+
|
|
146
|
+
ast = ::Psych.parse_stream(source)
|
|
147
|
+
Tree.new(ast, source)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Alias for compatibility with tree-sitter API
|
|
151
|
+
#
|
|
152
|
+
# @param _old_tree [nil] Ignored (Psych doesn't support incremental parsing)
|
|
153
|
+
# @param source [String] YAML source to parse
|
|
154
|
+
# @return [Tree] Parsed tree
|
|
155
|
+
def parse_string(_old_tree, source)
|
|
156
|
+
parse(source)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Psych tree wrapper
|
|
161
|
+
#
|
|
162
|
+
# Wraps a Psych::Nodes::Stream to provide TreeHaver-compatible tree interface.
|
|
163
|
+
class Tree
|
|
164
|
+
# @return [::Psych::Nodes::Stream] The underlying Psych stream
|
|
165
|
+
attr_reader :inner_tree
|
|
166
|
+
|
|
167
|
+
# @return [String] The original source
|
|
168
|
+
attr_reader :source
|
|
169
|
+
|
|
170
|
+
# Create a new tree wrapper
|
|
171
|
+
#
|
|
172
|
+
# @param stream [::Psych::Nodes::Stream] Psych stream node
|
|
173
|
+
# @param source [String] Original source
|
|
174
|
+
def initialize(stream, source)
|
|
175
|
+
@inner_tree = stream
|
|
176
|
+
@source = source
|
|
177
|
+
@lines = source.lines
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Get the root node
|
|
181
|
+
#
|
|
182
|
+
# For YAML, the stream is the root. We wrap it as a Node.
|
|
183
|
+
#
|
|
184
|
+
# @return [Node] Root node
|
|
185
|
+
def root_node
|
|
186
|
+
Node.new(@inner_tree, @source, @lines)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Get parse errors
|
|
190
|
+
#
|
|
191
|
+
# Psych raises exceptions on parse errors rather than recording them,
|
|
192
|
+
# so this is always empty if we got a tree.
|
|
193
|
+
#
|
|
194
|
+
# @return [Array] Empty array (no errors if parsing succeeded)
|
|
195
|
+
def errors
|
|
196
|
+
[]
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Get parse warnings
|
|
200
|
+
#
|
|
201
|
+
# @return [Array] Empty array (Psych doesn't produce warnings)
|
|
202
|
+
def warnings
|
|
203
|
+
[]
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Get comments from the document
|
|
207
|
+
#
|
|
208
|
+
# Psych doesn't preserve comments in the AST by default.
|
|
209
|
+
#
|
|
210
|
+
# @return [Array] Empty array
|
|
211
|
+
def comments
|
|
212
|
+
[]
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# @return [String] human-readable representation
|
|
216
|
+
def inspect
|
|
217
|
+
"#<TreeHaver::Backends::Psych::Tree documents=#{@inner_tree.children&.size || 0}>"
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Psych node wrapper
|
|
222
|
+
#
|
|
223
|
+
# Wraps Psych::Nodes::* classes to provide TreeHaver::Node-compatible interface.
|
|
224
|
+
#
|
|
225
|
+
# Psych node types:
|
|
226
|
+
# - Stream: Root container
|
|
227
|
+
# - Document: YAML document (multiple per stream possible)
|
|
228
|
+
# - Mapping: Hash/object
|
|
229
|
+
# - Sequence: Array/list
|
|
230
|
+
# - Scalar: Primitive value (string, number, boolean, null)
|
|
231
|
+
# - Alias: YAML anchor reference
|
|
232
|
+
class Node
|
|
233
|
+
include Comparable
|
|
234
|
+
|
|
235
|
+
# @return [::Psych::Nodes::Node] The underlying Psych node
|
|
236
|
+
attr_reader :inner_node
|
|
237
|
+
|
|
238
|
+
# @return [String] The original source
|
|
239
|
+
attr_reader :source
|
|
240
|
+
|
|
241
|
+
# Create a new node wrapper
|
|
242
|
+
#
|
|
243
|
+
# @param node [::Psych::Nodes::Node] Psych node
|
|
244
|
+
# @param source [String] Original source
|
|
245
|
+
# @param lines [Array<String>] Source lines for text extraction
|
|
246
|
+
def initialize(node, source, lines = nil)
|
|
247
|
+
@inner_node = node
|
|
248
|
+
@source = source
|
|
249
|
+
@lines = lines || source.lines
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Get the node type as a string
|
|
253
|
+
#
|
|
254
|
+
# Maps Psych class names to lowercase type strings:
|
|
255
|
+
# - Psych::Nodes::Stream → "stream"
|
|
256
|
+
# - Psych::Nodes::Document → "document"
|
|
257
|
+
# - Psych::Nodes::Mapping → "mapping"
|
|
258
|
+
# - Psych::Nodes::Sequence → "sequence"
|
|
259
|
+
# - Psych::Nodes::Scalar → "scalar"
|
|
260
|
+
# - Psych::Nodes::Alias → "alias"
|
|
261
|
+
#
|
|
262
|
+
# @return [String] Node type
|
|
263
|
+
def type
|
|
264
|
+
@inner_node.class.name.split("::").last.downcase
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Alias for tree-sitter compatibility
|
|
268
|
+
alias_method :kind, :type
|
|
269
|
+
|
|
270
|
+
# Get the text content of this node
|
|
271
|
+
#
|
|
272
|
+
# For Scalar nodes, returns the value. For containers, returns
|
|
273
|
+
# the source text spanning the node's location.
|
|
274
|
+
#
|
|
275
|
+
# @return [String] Node text
|
|
276
|
+
def text
|
|
277
|
+
case @inner_node
|
|
278
|
+
when ::Psych::Nodes::Scalar
|
|
279
|
+
@inner_node.value.to_s
|
|
280
|
+
when ::Psych::Nodes::Alias
|
|
281
|
+
"*#{@inner_node.anchor}"
|
|
282
|
+
else
|
|
283
|
+
# For container nodes, extract from source using location
|
|
284
|
+
extract_text_from_location
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Get child nodes
|
|
289
|
+
#
|
|
290
|
+
# @return [Array<Node>] Child nodes
|
|
291
|
+
def children
|
|
292
|
+
return [] unless @inner_node.respond_to?(:children) && @inner_node.children
|
|
293
|
+
|
|
294
|
+
@inner_node.children.map { |child| Node.new(child, @source, @lines) }
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Iterate over child nodes
|
|
298
|
+
#
|
|
299
|
+
# @yield [Node] Each child node
|
|
300
|
+
# @return [Enumerator, nil]
|
|
301
|
+
def each(&block)
|
|
302
|
+
return to_enum(__method__) unless block
|
|
303
|
+
children.each(&block)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Get the number of children
|
|
307
|
+
#
|
|
308
|
+
# @return [Integer] Child count
|
|
309
|
+
def child_count
|
|
310
|
+
children.size
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Get child by index
|
|
314
|
+
#
|
|
315
|
+
# @param index [Integer] Child index
|
|
316
|
+
# @return [Node, nil] Child node
|
|
317
|
+
def child(index)
|
|
318
|
+
children[index]
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Get start byte offset
|
|
322
|
+
#
|
|
323
|
+
# Psych doesn't provide byte offsets directly, so we calculate from line/column.
|
|
324
|
+
#
|
|
325
|
+
# @return [Integer] Start byte offset
|
|
326
|
+
def start_byte
|
|
327
|
+
return 0 unless @inner_node.respond_to?(:start_line)
|
|
328
|
+
|
|
329
|
+
line = @inner_node.start_line || 0
|
|
330
|
+
col = @inner_node.start_column || 0
|
|
331
|
+
calculate_byte_offset(line, col)
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Get end byte offset
|
|
335
|
+
#
|
|
336
|
+
# @return [Integer] End byte offset
|
|
337
|
+
def end_byte
|
|
338
|
+
return start_byte + text.bytesize unless @inner_node.respond_to?(:end_line)
|
|
339
|
+
|
|
340
|
+
line = @inner_node.end_line || 0
|
|
341
|
+
col = @inner_node.end_column || 0
|
|
342
|
+
calculate_byte_offset(line, col)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Get start point (row, column)
|
|
346
|
+
#
|
|
347
|
+
# @return [Point] Start position (0-based)
|
|
348
|
+
def start_point
|
|
349
|
+
row = (@inner_node.respond_to?(:start_line) ? @inner_node.start_line : 0) || 0
|
|
350
|
+
col = (@inner_node.respond_to?(:start_column) ? @inner_node.start_column : 0) || 0
|
|
351
|
+
Point.new(row, col)
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# Get end point (row, column)
|
|
355
|
+
#
|
|
356
|
+
# @return [Point] End position (0-based)
|
|
357
|
+
def end_point
|
|
358
|
+
row = (@inner_node.respond_to?(:end_line) ? @inner_node.end_line : 0) || 0
|
|
359
|
+
col = (@inner_node.respond_to?(:end_column) ? @inner_node.end_column : 0) || 0
|
|
360
|
+
Point.new(row, col)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Get the 1-based line number where this node starts
|
|
364
|
+
#
|
|
365
|
+
# Psych provides 0-based line numbers, so we add 1.
|
|
366
|
+
#
|
|
367
|
+
# @return [Integer] 1-based line number
|
|
368
|
+
def start_line
|
|
369
|
+
row = start_point.row
|
|
370
|
+
row + 1
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# Get the 1-based line number where this node ends
|
|
374
|
+
#
|
|
375
|
+
# @return [Integer] 1-based line number
|
|
376
|
+
def end_line
|
|
377
|
+
row = end_point.row
|
|
378
|
+
row + 1
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# Get position information as a hash
|
|
382
|
+
#
|
|
383
|
+
# Returns a hash with 1-based line numbers and 0-based columns.
|
|
384
|
+
# Compatible with *-merge gems' FileAnalysisBase.
|
|
385
|
+
#
|
|
386
|
+
# @return [Hash{Symbol => Integer}] Position hash
|
|
387
|
+
def source_position
|
|
388
|
+
{
|
|
389
|
+
start_line: start_line,
|
|
390
|
+
end_line: end_line,
|
|
391
|
+
start_column: start_point.column,
|
|
392
|
+
end_column: end_point.column,
|
|
393
|
+
}
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Get the first child node
|
|
397
|
+
#
|
|
398
|
+
# @return [Node, nil] First child or nil
|
|
399
|
+
def first_child
|
|
400
|
+
children.first
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Check if this is a named (structural) node
|
|
404
|
+
#
|
|
405
|
+
# All Psych nodes are structural.
|
|
406
|
+
#
|
|
407
|
+
# @return [Boolean] true
|
|
408
|
+
def named?
|
|
409
|
+
true
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Alias for tree-sitter compatibility
|
|
413
|
+
alias_method :structural?, :named?
|
|
414
|
+
|
|
415
|
+
# Check if the node or any descendant has an error
|
|
416
|
+
#
|
|
417
|
+
# Psych raises on errors rather than embedding them.
|
|
418
|
+
#
|
|
419
|
+
# @return [Boolean] false
|
|
420
|
+
def has_error?
|
|
421
|
+
false
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
# Check if this is a missing node
|
|
425
|
+
#
|
|
426
|
+
# Psych doesn't have missing nodes.
|
|
427
|
+
#
|
|
428
|
+
# @return [Boolean] false
|
|
429
|
+
def missing?
|
|
430
|
+
false
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# Comparison for sorting
|
|
434
|
+
#
|
|
435
|
+
# @param other [Node] other node
|
|
436
|
+
# @return [Integer, nil] comparison result
|
|
437
|
+
def <=>(other)
|
|
438
|
+
return unless other.respond_to?(:start_byte)
|
|
439
|
+
cmp = start_byte <=> other.start_byte
|
|
440
|
+
return cmp unless cmp&.zero?
|
|
441
|
+
end_byte <=> other.end_byte
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
# @return [String] human-readable representation
|
|
445
|
+
def inspect
|
|
446
|
+
"#<TreeHaver::Backends::Psych::Node type=#{type} children=#{child_count}>"
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Psych-specific: Get the anchor name for Alias/anchored nodes
|
|
450
|
+
#
|
|
451
|
+
# @return [String, nil] Anchor name
|
|
452
|
+
def anchor
|
|
453
|
+
@inner_node.anchor if @inner_node.respond_to?(:anchor)
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Psych-specific: Get the tag for tagged nodes
|
|
457
|
+
#
|
|
458
|
+
# @return [String, nil] Tag
|
|
459
|
+
def tag
|
|
460
|
+
@inner_node.tag if @inner_node.respond_to?(:tag)
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
# Psych-specific: Get the scalar value
|
|
464
|
+
#
|
|
465
|
+
# @return [String, nil] Value for scalar nodes
|
|
466
|
+
def value
|
|
467
|
+
@inner_node.value if @inner_node.respond_to?(:value)
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
# Psych-specific: Check if this is a mapping (hash)
|
|
471
|
+
#
|
|
472
|
+
# @return [Boolean]
|
|
473
|
+
def mapping?
|
|
474
|
+
@inner_node.is_a?(::Psych::Nodes::Mapping)
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
# Psych-specific: Check if this is a sequence (array)
|
|
478
|
+
#
|
|
479
|
+
# @return [Boolean]
|
|
480
|
+
def sequence?
|
|
481
|
+
@inner_node.is_a?(::Psych::Nodes::Sequence)
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# Psych-specific: Check if this is a scalar (primitive)
|
|
485
|
+
#
|
|
486
|
+
# @return [Boolean]
|
|
487
|
+
def scalar?
|
|
488
|
+
@inner_node.is_a?(::Psych::Nodes::Scalar)
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# Psych-specific: Check if this is an alias
|
|
492
|
+
#
|
|
493
|
+
# @return [Boolean]
|
|
494
|
+
def alias?
|
|
495
|
+
@inner_node.is_a?(::Psych::Nodes::Alias)
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
# Psych-specific: Get mapping entries as key-value pairs
|
|
499
|
+
#
|
|
500
|
+
# For Mapping nodes, children alternate key, value, key, value...
|
|
501
|
+
#
|
|
502
|
+
# @return [Array<Array(Node, Node)>] Key-value pairs
|
|
503
|
+
def mapping_entries
|
|
504
|
+
return [] unless mapping?
|
|
505
|
+
|
|
506
|
+
pairs = []
|
|
507
|
+
children.each_slice(2) do |key, val|
|
|
508
|
+
pairs << [key, val] if key && val
|
|
509
|
+
end
|
|
510
|
+
pairs
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
private
|
|
514
|
+
|
|
515
|
+
# Calculate byte offset from line and column
|
|
516
|
+
#
|
|
517
|
+
# @param line [Integer] 0-based line number
|
|
518
|
+
# @param column [Integer] 0-based column
|
|
519
|
+
# @return [Integer] Byte offset
|
|
520
|
+
def calculate_byte_offset(line, column)
|
|
521
|
+
offset = 0
|
|
522
|
+
@lines.each_with_index do |line_content, idx|
|
|
523
|
+
if idx < line
|
|
524
|
+
offset += line_content.bytesize
|
|
525
|
+
offset += 1 unless line_content.end_with?("\n") # Add newline
|
|
526
|
+
else
|
|
527
|
+
offset += [column, line_content.bytesize].min
|
|
528
|
+
break
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
offset
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
# Extract text from source using location
|
|
535
|
+
#
|
|
536
|
+
# @return [String] Extracted text
|
|
537
|
+
def extract_text_from_location
|
|
538
|
+
return "" unless @inner_node.respond_to?(:start_line) && @inner_node.respond_to?(:end_line)
|
|
539
|
+
|
|
540
|
+
start_line = @inner_node.start_line || 0
|
|
541
|
+
end_line = @inner_node.end_line || start_line
|
|
542
|
+
start_col = @inner_node.start_column || 0
|
|
543
|
+
end_col = @inner_node.end_column || 0
|
|
544
|
+
|
|
545
|
+
if start_line == end_line
|
|
546
|
+
line = @lines[start_line] || ""
|
|
547
|
+
line[start_col...end_col] || ""
|
|
548
|
+
else
|
|
549
|
+
result = []
|
|
550
|
+
(start_line..end_line).each do |ln|
|
|
551
|
+
line = @lines[ln] || ""
|
|
552
|
+
result << if ln == start_line
|
|
553
|
+
line[start_col..]
|
|
554
|
+
elsif ln == end_line
|
|
555
|
+
line[0...end_col]
|
|
556
|
+
else
|
|
557
|
+
line
|
|
558
|
+
end
|
|
559
|
+
end
|
|
560
|
+
result.compact.join
|
|
561
|
+
end
|
|
562
|
+
end
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
# Point struct for position information
|
|
566
|
+
#
|
|
567
|
+
# Provides both method and hash-style access for compatibility.
|
|
568
|
+
Point = Struct.new(:row, :column) do
|
|
569
|
+
# Hash-like access
|
|
570
|
+
#
|
|
571
|
+
# @param key [Symbol, String] :row or :column
|
|
572
|
+
# @return [Integer, nil]
|
|
573
|
+
def [](key)
|
|
574
|
+
case key
|
|
575
|
+
when :row, "row" then row
|
|
576
|
+
when :column, "column" then column
|
|
577
|
+
end
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
# @return [Hash]
|
|
581
|
+
def to_h
|
|
582
|
+
{row: row, column: column}
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
# @return [String]
|
|
586
|
+
def to_s
|
|
587
|
+
"(#{row}, #{column})"
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
# @return [String]
|
|
591
|
+
def inspect
|
|
592
|
+
"#<TreeHaver::Backends::Psych::Point row=#{row} column=#{column}>"
|
|
593
|
+
end
|
|
594
|
+
end
|
|
595
|
+
end
|
|
596
|
+
end
|
|
597
|
+
end
|
|
@@ -175,7 +175,7 @@ module TreeHaver
|
|
|
175
175
|
lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
|
|
176
176
|
# tree_stump uses set_language with a string name
|
|
177
177
|
@parser.set_language(lang_name)
|
|
178
|
-
lang
|
|
178
|
+
lang # rubocop:disable Lint/Void (intentional return value)
|
|
179
179
|
end
|
|
180
180
|
|
|
181
181
|
# Parse source code
|
|
@@ -137,12 +137,25 @@ module TreeHaver
|
|
|
137
137
|
# @note Paths from ENV are validated using {PathValidator.safe_library_path?}
|
|
138
138
|
# to prevent path traversal and other attacks. Invalid ENV paths are ignored.
|
|
139
139
|
#
|
|
140
|
+
# @note Setting the ENV variable to an empty string explicitly disables
|
|
141
|
+
# this grammar. This allows fallback to alternative backends (e.g., Citrus).
|
|
142
|
+
#
|
|
140
143
|
# @return [String, nil] the path to the library, or nil if not found
|
|
141
144
|
# @see #find_library_path_safe For stricter validation (trusted directories only)
|
|
142
145
|
def find_library_path
|
|
143
146
|
# Check environment variable first (highest priority)
|
|
144
|
-
|
|
145
|
-
if
|
|
147
|
+
# Use key? to distinguish between "not set" and "set to empty"
|
|
148
|
+
if ENV.key?(env_var_name)
|
|
149
|
+
env_path = ENV[env_var_name]
|
|
150
|
+
|
|
151
|
+
# Empty string means "explicitly skip this grammar"
|
|
152
|
+
# This allows users to disable tree-sitter for specific languages
|
|
153
|
+
# and fall back to alternative backends like Citrus
|
|
154
|
+
if env_path.empty?
|
|
155
|
+
@env_rejection_reason = "explicitly disabled (set to empty string)"
|
|
156
|
+
return
|
|
157
|
+
end
|
|
158
|
+
|
|
146
159
|
# Store why env path was rejected for better error messages
|
|
147
160
|
@env_rejection_reason = validate_env_path(env_path)
|
|
148
161
|
return env_path if @env_rejection_reason.nil?
|
|
@@ -188,11 +201,67 @@ module TreeHaver
|
|
|
188
201
|
end
|
|
189
202
|
end
|
|
190
203
|
|
|
191
|
-
# Check if the grammar library is available
|
|
204
|
+
# Check if the grammar library is available AND usable
|
|
205
|
+
#
|
|
206
|
+
# This checks:
|
|
207
|
+
# 1. The grammar library file exists
|
|
208
|
+
# 2. The tree-sitter runtime is functional (can create a parser)
|
|
192
209
|
#
|
|
193
|
-
#
|
|
210
|
+
# This prevents registering grammars when tree-sitter isn't actually usable,
|
|
211
|
+
# allowing clean fallback to alternative backends like Citrus.
|
|
212
|
+
#
|
|
213
|
+
# @return [Boolean] true if the library can be found AND tree-sitter runtime works
|
|
194
214
|
def available?
|
|
195
|
-
|
|
215
|
+
path = find_library_path
|
|
216
|
+
return false if path.nil?
|
|
217
|
+
|
|
218
|
+
# Check if tree-sitter runtime is actually functional
|
|
219
|
+
# This is cached at the class level since it's the same for all grammars
|
|
220
|
+
self.class.tree_sitter_runtime_usable?
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Backends that use tree-sitter (require native runtime libraries)
|
|
224
|
+
# Other backends (Citrus, Prism, Psych, etc.) don't use tree-sitter
|
|
225
|
+
TREE_SITTER_BACKENDS = [
|
|
226
|
+
TreeHaver::Backends::MRI,
|
|
227
|
+
TreeHaver::Backends::FFI,
|
|
228
|
+
TreeHaver::Backends::Rust,
|
|
229
|
+
TreeHaver::Backends::Java,
|
|
230
|
+
].freeze
|
|
231
|
+
|
|
232
|
+
class << self
|
|
233
|
+
# Check if the tree-sitter runtime is usable
|
|
234
|
+
#
|
|
235
|
+
# Tests whether we can actually create a tree-sitter parser.
|
|
236
|
+
# Result is cached since this is expensive and won't change during runtime.
|
|
237
|
+
#
|
|
238
|
+
# @return [Boolean] true if tree-sitter runtime is functional
|
|
239
|
+
def tree_sitter_runtime_usable?
|
|
240
|
+
return @tree_sitter_runtime_usable if defined?(@tree_sitter_runtime_usable)
|
|
241
|
+
|
|
242
|
+
@tree_sitter_runtime_usable = begin
|
|
243
|
+
# Try to create a parser using the current backend
|
|
244
|
+
mod = TreeHaver.resolve_backend_module(nil)
|
|
245
|
+
|
|
246
|
+
# Only tree-sitter backends are relevant here
|
|
247
|
+
# Non-tree-sitter backends (Citrus, Prism, Psych, etc.) don't use grammar files
|
|
248
|
+
return false if mod.nil?
|
|
249
|
+
return false unless TREE_SITTER_BACKENDS.include?(mod)
|
|
250
|
+
|
|
251
|
+
# Try to instantiate a parser - this will fail if runtime isn't available
|
|
252
|
+
mod::Parser.new
|
|
253
|
+
true
|
|
254
|
+
rescue NoMethodError, FFI::NotFoundError, LoadError, NotAvailable => _e
|
|
255
|
+
false
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Reset the cached tree-sitter runtime check (for testing)
|
|
260
|
+
#
|
|
261
|
+
# @api private
|
|
262
|
+
def reset_runtime_check!
|
|
263
|
+
remove_instance_variable(:@tree_sitter_runtime_usable) if defined?(@tree_sitter_runtime_usable)
|
|
264
|
+
end
|
|
196
265
|
end
|
|
197
266
|
|
|
198
267
|
# Check if the grammar library is available in a trusted directory
|