tree_haver 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,622 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Psych backend using Ruby's built-in YAML parser
6
+ #
7
+ # This backend wraps Psych, Ruby's standard library YAML parser.
8
+ # Psych provides AST access via Psych.parse_stream which returns
9
+ # Psych::Nodes::* objects (Stream, Document, Mapping, Sequence, Scalar, Alias).
10
+ #
11
+ # @note This backend only parses YAML source code
12
+ # @see https://ruby-doc.org/stdlib/libdoc/psych/rdoc/Psych.html Psych documentation
13
+ #
14
+ # @example Basic usage
15
+ # parser = TreeHaver::Parser.new
16
+ # parser.language = TreeHaver::Backends::Psych::Language.yaml
17
+ # tree = parser.parse(yaml_source)
18
+ # root = tree.root_node
19
+ # puts root.type # => "stream"
20
+ module Psych
21
+ @load_attempted = false
22
+ @loaded = false
23
+
24
+ # Check if the Psych backend is available
25
+ #
26
+ # Psych is part of Ruby stdlib, so it should always be available.
27
+ #
28
+ # @return [Boolean] true if psych is available
29
+ class << self
30
+ def available?
31
+ return @loaded if @load_attempted
32
+ @load_attempted = true
33
+ begin
34
+ require "psych"
35
+ @loaded = true
36
+ rescue LoadError
37
+ @loaded = false
38
+ end
39
+ @loaded
40
+ end
41
+
42
+ # Reset the load state (primarily for testing)
43
+ #
44
+ # @return [void]
45
+ # @api private
46
+ def reset!
47
+ @load_attempted = false
48
+ @loaded = false
49
+ end
50
+
51
+ # Get capabilities supported by this backend
52
+ #
53
+ # @return [Hash{Symbol => Object}] capability map
54
+ def capabilities
55
+ return {} unless available?
56
+ {
57
+ backend: :psych,
58
+ query: false, # Psych doesn't have tree-sitter-style queries
59
+ bytes_field: false, # Psych uses line/column, not byte offsets
60
+ incremental: false, # Psych doesn't support incremental parsing
61
+ pure_ruby: false, # Psych has native libyaml C extension
62
+ yaml_only: true, # Psych only parses YAML
63
+ error_tolerant: false, # Psych raises on syntax errors
64
+ }
65
+ end
66
+ end
67
+
68
+ # Psych language wrapper
69
+ #
70
+ # Unlike tree-sitter which supports many languages via grammar files,
71
+ # Psych only parses YAML. This class exists for API compatibility with
72
+ # other tree_haver backends.
73
+ #
74
+ # @example
75
+ # language = TreeHaver::Backends::Psych::Language.yaml
76
+ # parser.language = language
77
+ class Language
78
+ include Comparable
79
+
80
+ # The language name (always :yaml for Psych)
81
+ # @return [Symbol]
82
+ attr_reader :name
83
+
84
+ # The backend this language is for
85
+ # @return [Symbol]
86
+ attr_reader :backend
87
+
88
+ # Create a new Psych language instance
89
+ #
90
+ # @param name [Symbol] Language name (should be :yaml)
91
+ def initialize(name = :yaml)
92
+ @name = name.to_sym
93
+ @backend = :psych
94
+ end
95
+
96
+ class << self
97
+ # Create a YAML language instance
98
+ #
99
+ # @return [Language] YAML language
100
+ def yaml
101
+ new(:yaml)
102
+ end
103
+ end
104
+
105
+ # Comparison for sorting/equality
106
+ #
107
+ # @param other [Language] other language
108
+ # @return [Integer, nil] comparison result
109
+ def <=>(other)
110
+ return unless other.is_a?(Language)
111
+ name <=> other.name
112
+ end
113
+
114
+ # @return [String] human-readable representation
115
+ def inspect
116
+ "#<TreeHaver::Backends::Psych::Language name=#{name}>"
117
+ end
118
+ end
119
+
120
+ # Psych parser wrapper
121
+ #
122
+ # Wraps Psych.parse_stream to provide TreeHaver-compatible parsing.
123
+ #
124
+ # @example
125
+ # parser = TreeHaver::Backends::Psych::Parser.new
126
+ # parser.language = Language.yaml
127
+ # tree = parser.parse(yaml_source)
128
+ class Parser
129
+ # @return [Language, nil] The language to parse
130
+ attr_accessor :language
131
+
132
+ # Create a new Psych parser
133
+ def initialize
134
+ @language = nil
135
+ end
136
+
137
+ # Parse YAML source code
138
+ #
139
+ # @param source [String] YAML source to parse
140
+ # @return [Tree] Parsed tree
141
+ # @raise [::Psych::SyntaxError] on syntax errors
142
+ def parse(source)
143
+ raise "Language not set" unless @language
144
+ Psych.available? or raise "Psych not available"
145
+
146
+ ast = ::Psych.parse_stream(source)
147
+ Tree.new(ast, source)
148
+ end
149
+
150
+ # Alias for compatibility with tree-sitter API
151
+ #
152
+ # @param _old_tree [nil] Ignored (Psych doesn't support incremental parsing)
153
+ # @param source [String] YAML source to parse
154
+ # @return [Tree] Parsed tree
155
+ def parse_string(_old_tree, source)
156
+ parse(source)
157
+ end
158
+ end
159
+
160
+ # Psych tree wrapper
161
+ #
162
+ # Wraps a Psych::Nodes::Stream to provide TreeHaver-compatible tree interface.
163
+ class Tree
164
+ # @return [::Psych::Nodes::Stream] The underlying Psych stream
165
+ attr_reader :inner_tree
166
+
167
+ # @return [String] The original source
168
+ attr_reader :source
169
+
170
+ # Create a new tree wrapper
171
+ #
172
+ # @param stream [::Psych::Nodes::Stream] Psych stream node
173
+ # @param source [String] Original source
174
+ def initialize(stream, source)
175
+ @inner_tree = stream
176
+ @source = source
177
+ @lines = source.lines
178
+ end
179
+
180
+ # Get the root node
181
+ #
182
+ # For YAML, the stream is the root. We wrap it as a Node.
183
+ #
184
+ # @return [Node] Root node
185
+ def root_node
186
+ Node.new(@inner_tree, @source, @lines)
187
+ end
188
+
189
+ # Get parse errors
190
+ #
191
+ # Psych raises exceptions on parse errors rather than recording them,
192
+ # so this is always empty if we got a tree.
193
+ #
194
+ # @return [Array] Empty array (no errors if parsing succeeded)
195
+ def errors
196
+ []
197
+ end
198
+
199
+ # Get parse warnings
200
+ #
201
+ # @return [Array] Empty array (Psych doesn't produce warnings)
202
+ def warnings
203
+ []
204
+ end
205
+
206
+ # Get comments from the document
207
+ #
208
+ # Psych doesn't preserve comments in the AST by default.
209
+ #
210
+ # @return [Array] Empty array
211
+ def comments
212
+ []
213
+ end
214
+
215
+ # @return [String] human-readable representation
216
+ def inspect
217
+ "#<TreeHaver::Backends::Psych::Tree documents=#{@inner_tree.children&.size || 0}>"
218
+ end
219
+ end
220
+
221
+ # Psych node wrapper
222
+ #
223
+ # Wraps Psych::Nodes::* classes to provide TreeHaver::Node-compatible interface.
224
+ #
225
+ # Psych node types:
226
+ # - Stream: Root container
227
+ # - Document: YAML document (multiple per stream possible)
228
+ # - Mapping: Hash/object
229
+ # - Sequence: Array/list
230
+ # - Scalar: Primitive value (string, number, boolean, null)
231
+ # - Alias: YAML anchor reference
232
+ class Node
233
+ include Comparable
234
+ include Enumerable
235
+
236
+ # @return [::Psych::Nodes::Node] The underlying Psych node
237
+ attr_reader :inner_node
238
+
239
+ # @return [String] The original source
240
+ attr_reader :source
241
+
242
+ # Create a new node wrapper
243
+ #
244
+ # @param node [::Psych::Nodes::Node] Psych node
245
+ # @param source [String] Original source
246
+ # @param lines [Array<String>] Source lines for text extraction
247
+ def initialize(node, source, lines = nil)
248
+ @inner_node = node
249
+ @source = source
250
+ @lines = lines || source.lines
251
+ end
252
+
253
+ # Get the node type as a string
254
+ #
255
+ # Maps Psych class names to lowercase type strings:
256
+ # - Psych::Nodes::Stream → "stream"
257
+ # - Psych::Nodes::Document → "document"
258
+ # - Psych::Nodes::Mapping → "mapping"
259
+ # - Psych::Nodes::Sequence → "sequence"
260
+ # - Psych::Nodes::Scalar → "scalar"
261
+ # - Psych::Nodes::Alias → "alias"
262
+ #
263
+ # @return [String] Node type
264
+ def type
265
+ @inner_node.class.name.split("::").last.downcase
266
+ end
267
+
268
+ # Alias for tree-sitter compatibility
269
+ alias_method :kind, :type
270
+
271
+ # Get the text content of this node
272
+ #
273
+ # For Scalar nodes, returns the value. For containers, returns
274
+ # the source text spanning the node's location.
275
+ #
276
+ # @return [String] Node text
277
+ def text
278
+ case @inner_node
279
+ when ::Psych::Nodes::Scalar
280
+ @inner_node.value.to_s
281
+ when ::Psych::Nodes::Alias
282
+ "*#{@inner_node.anchor}"
283
+ else
284
+ # For container nodes, extract from source using location
285
+ extract_text_from_location
286
+ end
287
+ end
288
+
289
+ # Get child nodes
290
+ #
291
+ # @return [Array<Node>] Child nodes
292
+ def children
293
+ return [] unless @inner_node.respond_to?(:children) && @inner_node.children
294
+
295
+ @inner_node.children.map { |child| Node.new(child, @source, @lines) }
296
+ end
297
+
298
+ # Iterate over child nodes
299
+ #
300
+ # @yield [Node] Each child node
301
+ # @return [Enumerator, nil]
302
+ def each(&block)
303
+ return to_enum(__method__) unless block
304
+ children.each(&block)
305
+ end
306
+
307
+ # Get the number of children
308
+ #
309
+ # @return [Integer] Child count
310
+ def child_count
311
+ children.size
312
+ end
313
+
314
+ # Get child by index
315
+ #
316
+ # @param index [Integer] Child index
317
+ # @return [Node, nil] Child node
318
+ def child(index)
319
+ children[index]
320
+ end
321
+
322
+ # Get start byte offset
323
+ #
324
+ # Psych doesn't provide byte offsets directly, so we calculate from line/column.
325
+ #
326
+ # @return [Integer] Start byte offset
327
+ def start_byte
328
+ return 0 unless @inner_node.respond_to?(:start_line)
329
+
330
+ line = @inner_node.start_line || 0
331
+ col = @inner_node.start_column || 0
332
+ calculate_byte_offset(line, col)
333
+ end
334
+
335
+ # Get end byte offset
336
+ #
337
+ # @return [Integer] End byte offset
338
+ def end_byte
339
+ return start_byte + text.bytesize unless @inner_node.respond_to?(:end_line)
340
+
341
+ line = @inner_node.end_line || 0
342
+ col = @inner_node.end_column || 0
343
+ calculate_byte_offset(line, col)
344
+ end
345
+
346
+ # Get start point (row, column)
347
+ #
348
+ # @return [Point] Start position (0-based)
349
+ def start_point
350
+ row = (@inner_node.respond_to?(:start_line) ? @inner_node.start_line : 0) || 0
351
+ col = (@inner_node.respond_to?(:start_column) ? @inner_node.start_column : 0) || 0
352
+ Point.new(row, col)
353
+ end
354
+
355
+ # Get end point (row, column)
356
+ #
357
+ # @return [Point] End position (0-based)
358
+ def end_point
359
+ row = (@inner_node.respond_to?(:end_line) ? @inner_node.end_line : 0) || 0
360
+ col = (@inner_node.respond_to?(:end_column) ? @inner_node.end_column : 0) || 0
361
+ Point.new(row, col)
362
+ end
363
+
364
+ # Get the 1-based line number where this node starts
365
+ #
366
+ # Psych provides 0-based line numbers, so we add 1.
367
+ #
368
+ # @return [Integer] 1-based line number
369
+ def start_line
370
+ row = start_point.row
371
+ row + 1
372
+ end
373
+
374
+ # Get the 1-based line number where this node ends
375
+ #
376
+ # @return [Integer] 1-based line number
377
+ def end_line
378
+ row = end_point.row
379
+ row + 1
380
+ end
381
+
382
+ # Get position information as a hash
383
+ #
384
+ # Returns a hash with 1-based line numbers and 0-based columns.
385
+ # Compatible with *-merge gems' FileAnalysisBase.
386
+ #
387
+ # @return [Hash{Symbol => Integer}] Position hash
388
+ def source_position
389
+ {
390
+ start_line: start_line,
391
+ end_line: end_line,
392
+ start_column: start_point.column,
393
+ end_column: end_point.column,
394
+ }
395
+ end
396
+
397
+ # Get the first child node
398
+ #
399
+ # @return [Node, nil] First child or nil
400
+ def first_child
401
+ children.first
402
+ end
403
+
404
+ # Check if this is a named (structural) node
405
+ #
406
+ # All Psych nodes are structural.
407
+ #
408
+ # @return [Boolean] true
409
+ def named?
410
+ true
411
+ end
412
+
413
+ # Alias for tree-sitter compatibility
414
+ alias_method :structural?, :named?
415
+
416
+ # Check if the node or any descendant has an error
417
+ #
418
+ # Psych raises on errors rather than embedding them.
419
+ #
420
+ # @return [Boolean] false
421
+ def has_error?
422
+ false
423
+ end
424
+
425
+ # Check if this is a missing node
426
+ #
427
+ # Psych doesn't have missing nodes.
428
+ #
429
+ # @return [Boolean] false
430
+ def missing?
431
+ false
432
+ end
433
+
434
+ # Comparison for sorting
435
+ #
436
+ # @param other [Node] other node
437
+ # @return [Integer, nil] comparison result
438
+ def <=>(other)
439
+ return unless other.respond_to?(:start_byte)
440
+ cmp = start_byte <=> other.start_byte
441
+ return cmp unless cmp&.zero?
442
+ end_byte <=> other.end_byte
443
+ end
444
+
445
+ # @return [String] human-readable representation
446
+ def inspect
447
+ "#<TreeHaver::Backends::Psych::Node type=#{type} children=#{child_count}>"
448
+ end
449
+
450
+ # Get the next sibling
451
+ #
452
+ # @raise [NotImplementedError] Psych nodes don't have sibling references
453
+ # @return [void]
454
+ def next_sibling
455
+ raise NotImplementedError, "Psych backend does not support sibling navigation"
456
+ end
457
+
458
+ # Get the previous sibling
459
+ #
460
+ # @raise [NotImplementedError] Psych nodes don't have sibling references
461
+ # @return [void]
462
+ def prev_sibling
463
+ raise NotImplementedError, "Psych backend does not support sibling navigation"
464
+ end
465
+
466
+ # Get the parent node
467
+ #
468
+ # @raise [NotImplementedError] Psych nodes don't have parent references
469
+ # @return [void]
470
+ def parent
471
+ raise NotImplementedError, "Psych backend does not support parent navigation"
472
+ end
473
+
474
+ # Psych-specific: Get the anchor name for Alias/anchored nodes
475
+ #
476
+ # @return [String, nil] Anchor name
477
+ def anchor
478
+ @inner_node.anchor if @inner_node.respond_to?(:anchor)
479
+ end
480
+
481
+ # Psych-specific: Get the tag for tagged nodes
482
+ #
483
+ # @return [String, nil] Tag
484
+ def tag
485
+ @inner_node.tag if @inner_node.respond_to?(:tag)
486
+ end
487
+
488
+ # Psych-specific: Get the scalar value
489
+ #
490
+ # @return [String, nil] Value for scalar nodes
491
+ def value
492
+ @inner_node.value if @inner_node.respond_to?(:value)
493
+ end
494
+
495
+ # Psych-specific: Check if this is a mapping (hash)
496
+ #
497
+ # @return [Boolean]
498
+ def mapping?
499
+ @inner_node.is_a?(::Psych::Nodes::Mapping)
500
+ end
501
+
502
+ # Psych-specific: Check if this is a sequence (array)
503
+ #
504
+ # @return [Boolean]
505
+ def sequence?
506
+ @inner_node.is_a?(::Psych::Nodes::Sequence)
507
+ end
508
+
509
+ # Psych-specific: Check if this is a scalar (primitive)
510
+ #
511
+ # @return [Boolean]
512
+ def scalar?
513
+ @inner_node.is_a?(::Psych::Nodes::Scalar)
514
+ end
515
+
516
+ # Psych-specific: Check if this is an alias
517
+ #
518
+ # @return [Boolean]
519
+ def alias?
520
+ @inner_node.is_a?(::Psych::Nodes::Alias)
521
+ end
522
+
523
+ # Psych-specific: Get mapping entries as key-value pairs
524
+ #
525
+ # For Mapping nodes, children alternate key, value, key, value...
526
+ #
527
+ # @return [Array<Array(Node, Node)>] Key-value pairs
528
+ def mapping_entries
529
+ return [] unless mapping?
530
+
531
+ pairs = []
532
+ children.each_slice(2) do |key, val|
533
+ pairs << [key, val] if key && val
534
+ end
535
+ pairs
536
+ end
537
+
538
+ private
539
+
540
+ # Calculate byte offset from line and column
541
+ #
542
+ # @param line [Integer] 0-based line number
543
+ # @param column [Integer] 0-based column
544
+ # @return [Integer] Byte offset
545
+ def calculate_byte_offset(line, column)
546
+ offset = 0
547
+ @lines.each_with_index do |line_content, idx|
548
+ if idx < line
549
+ offset += line_content.bytesize
550
+ offset += 1 unless line_content.end_with?("\n") # Add newline
551
+ else
552
+ offset += [column, line_content.bytesize].min
553
+ break
554
+ end
555
+ end
556
+ offset
557
+ end
558
+
559
+ # Extract text from source using location
560
+ #
561
+ # @return [String] Extracted text
562
+ def extract_text_from_location
563
+ return "" unless @inner_node.respond_to?(:start_line) && @inner_node.respond_to?(:end_line)
564
+
565
+ start_line = @inner_node.start_line || 0
566
+ end_line = @inner_node.end_line || start_line
567
+ start_col = @inner_node.start_column || 0
568
+ end_col = @inner_node.end_column || 0
569
+
570
+ if start_line == end_line
571
+ line = @lines[start_line] || ""
572
+ line[start_col...end_col] || ""
573
+ else
574
+ result = []
575
+ (start_line..end_line).each do |ln|
576
+ line = @lines[ln] || ""
577
+ result << if ln == start_line
578
+ line[start_col..]
579
+ elsif ln == end_line
580
+ line[0...end_col]
581
+ else
582
+ line
583
+ end
584
+ end
585
+ result.compact.join
586
+ end
587
+ end
588
+ end
589
+
590
+ # Point struct for position information
591
+ #
592
+ # Provides both method and hash-style access for compatibility.
593
+ Point = Struct.new(:row, :column) do
594
+ # Hash-like access
595
+ #
596
+ # @param key [Symbol, String] :row or :column
597
+ # @return [Integer, nil]
598
+ def [](key)
599
+ case key
600
+ when :row, "row" then row
601
+ when :column, "column" then column
602
+ end
603
+ end
604
+
605
+ # @return [Hash]
606
+ def to_h
607
+ {row: row, column: column}
608
+ end
609
+
610
+ # @return [String]
611
+ def to_s
612
+ "(#{row}, #{column})"
613
+ end
614
+
615
+ # @return [String]
616
+ def inspect
617
+ "#<TreeHaver::Backends::Psych::Point row=#{row} column=#{column}>"
618
+ end
619
+ end
620
+ end
621
+ end
622
+ end
@@ -12,7 +12,7 @@ module TreeHaver
12
12
  # suitable for editor/IDE use cases where performance is critical.
13
13
  #
14
14
  # @note This backend works on MRI Ruby. JRuby/TruffleRuby support is unknown.
15
- # @see https://github.com/anthropics/tree_stump tree_stump
15
+ # @see https://github.com/joker1007/tree_stump tree_stump
16
16
  module Rust
17
17
  @load_attempted = false
18
18
  @loaded = false
@@ -175,7 +175,7 @@ module TreeHaver
175
175
  lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
176
176
  # tree_stump uses set_language with a string name
177
177
  @parser.set_language(lang_name)
178
- lang
178
+ lang # rubocop:disable Lint/Void (intentional return value)
179
179
  end
180
180
 
181
181
  # Parse source code