tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,597 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Psych backend using Ruby's built-in YAML parser
6
+ #
7
+ # This backend wraps Psych, Ruby's standard library YAML parser.
8
+ # Psych provides AST access via Psych.parse_stream which returns
9
+ # Psych::Nodes::* objects (Stream, Document, Mapping, Sequence, Scalar, Alias).
10
+ #
11
+ # @note This backend only parses YAML source code
12
+ # @see https://ruby-doc.org/stdlib/libdoc/psych/rdoc/Psych.html Psych documentation
13
+ #
14
+ # @example Basic usage
15
+ # parser = TreeHaver::Parser.new
16
+ # parser.language = TreeHaver::Backends::Psych::Language.yaml
17
+ # tree = parser.parse(yaml_source)
18
+ # root = tree.root_node
19
+ # puts root.type # => "stream"
20
+ module Psych
21
+ @load_attempted = false
22
+ @loaded = false
23
+
24
+ # Check if the Psych backend is available
25
+ #
26
+ # Psych is part of Ruby stdlib, so it should always be available.
27
+ #
28
+ # @return [Boolean] true if psych is available
29
+ class << self
30
+ def available?
31
+ return @loaded if @load_attempted
32
+ @load_attempted = true
33
+ begin
34
+ require "psych"
35
+ @loaded = true
36
+ rescue LoadError
37
+ @loaded = false
38
+ end
39
+ @loaded
40
+ end
41
+
42
+ # Reset the load state (primarily for testing)
43
+ #
44
+ # @return [void]
45
+ # @api private
46
+ def reset!
47
+ @load_attempted = false
48
+ @loaded = false
49
+ end
50
+
51
+ # Get capabilities supported by this backend
52
+ #
53
+ # @return [Hash{Symbol => Object}] capability map
54
+ def capabilities
55
+ return {} unless available?
56
+ {
57
+ backend: :psych,
58
+ query: false, # Psych doesn't have tree-sitter-style queries
59
+ bytes_field: false, # Psych uses line/column, not byte offsets
60
+ incremental: false, # Psych doesn't support incremental parsing
61
+ pure_ruby: false, # Psych has native libyaml C extension
62
+ yaml_only: true, # Psych only parses YAML
63
+ error_tolerant: false, # Psych raises on syntax errors
64
+ }
65
+ end
66
+ end
67
+
68
+ # Psych language wrapper
69
+ #
70
+ # Unlike tree-sitter which supports many languages via grammar files,
71
+ # Psych only parses YAML. This class exists for API compatibility with
72
+ # other tree_haver backends.
73
+ #
74
+ # @example
75
+ # language = TreeHaver::Backends::Psych::Language.yaml
76
+ # parser.language = language
77
+ class Language
78
+ include Comparable
79
+
80
+ # The language name (always :yaml for Psych)
81
+ # @return [Symbol]
82
+ attr_reader :name
83
+
84
+ # The backend this language is for
85
+ # @return [Symbol]
86
+ attr_reader :backend
87
+
88
+ # Create a new Psych language instance
89
+ #
90
+ # @param name [Symbol] Language name (should be :yaml)
91
+ def initialize(name = :yaml)
92
+ @name = name.to_sym
93
+ @backend = :psych
94
+ end
95
+
96
+ class << self
97
+ # Create a YAML language instance
98
+ #
99
+ # @return [Language] YAML language
100
+ def yaml
101
+ new(:yaml)
102
+ end
103
+ end
104
+
105
+ # Comparison for sorting/equality
106
+ #
107
+ # @param other [Language] other language
108
+ # @return [Integer, nil] comparison result
109
+ def <=>(other)
110
+ return unless other.is_a?(Language)
111
+ name <=> other.name
112
+ end
113
+
114
+ # @return [String] human-readable representation
115
+ def inspect
116
+ "#<TreeHaver::Backends::Psych::Language name=#{name}>"
117
+ end
118
+ end
119
+
120
+ # Psych parser wrapper
121
+ #
122
+ # Wraps Psych.parse_stream to provide TreeHaver-compatible parsing.
123
+ #
124
+ # @example
125
+ # parser = TreeHaver::Backends::Psych::Parser.new
126
+ # parser.language = Language.yaml
127
+ # tree = parser.parse(yaml_source)
128
+ class Parser
129
+ # @return [Language, nil] The language to parse
130
+ attr_accessor :language
131
+
132
+ # Create a new Psych parser
133
+ def initialize
134
+ @language = nil
135
+ end
136
+
137
+ # Parse YAML source code
138
+ #
139
+ # @param source [String] YAML source to parse
140
+ # @return [Tree] Parsed tree
141
+ # @raise [::Psych::SyntaxError] on syntax errors
142
+ def parse(source)
143
+ raise "Language not set" unless @language
144
+ Psych.available? or raise "Psych not available"
145
+
146
+ ast = ::Psych.parse_stream(source)
147
+ Tree.new(ast, source)
148
+ end
149
+
150
+ # Alias for compatibility with tree-sitter API
151
+ #
152
+ # @param _old_tree [nil] Ignored (Psych doesn't support incremental parsing)
153
+ # @param source [String] YAML source to parse
154
+ # @return [Tree] Parsed tree
155
+ def parse_string(_old_tree, source)
156
+ parse(source)
157
+ end
158
+ end
159
+
160
+ # Psych tree wrapper
161
+ #
162
+ # Wraps a Psych::Nodes::Stream to provide TreeHaver-compatible tree interface.
163
+ class Tree
164
+ # @return [::Psych::Nodes::Stream] The underlying Psych stream
165
+ attr_reader :inner_tree
166
+
167
+ # @return [String] The original source
168
+ attr_reader :source
169
+
170
+ # Create a new tree wrapper
171
+ #
172
+ # @param stream [::Psych::Nodes::Stream] Psych stream node
173
+ # @param source [String] Original source
174
+ def initialize(stream, source)
175
+ @inner_tree = stream
176
+ @source = source
177
+ @lines = source.lines
178
+ end
179
+
180
+ # Get the root node
181
+ #
182
+ # For YAML, the stream is the root. We wrap it as a Node.
183
+ #
184
+ # @return [Node] Root node
185
+ def root_node
186
+ Node.new(@inner_tree, @source, @lines)
187
+ end
188
+
189
+ # Get parse errors
190
+ #
191
+ # Psych raises exceptions on parse errors rather than recording them,
192
+ # so this is always empty if we got a tree.
193
+ #
194
+ # @return [Array] Empty array (no errors if parsing succeeded)
195
+ def errors
196
+ []
197
+ end
198
+
199
+ # Get parse warnings
200
+ #
201
+ # @return [Array] Empty array (Psych doesn't produce warnings)
202
+ def warnings
203
+ []
204
+ end
205
+
206
+ # Get comments from the document
207
+ #
208
+ # Psych doesn't preserve comments in the AST by default.
209
+ #
210
+ # @return [Array] Empty array
211
+ def comments
212
+ []
213
+ end
214
+
215
+ # @return [String] human-readable representation
216
+ def inspect
217
+ "#<TreeHaver::Backends::Psych::Tree documents=#{@inner_tree.children&.size || 0}>"
218
+ end
219
+ end
220
+
221
+ # Psych node wrapper
222
+ #
223
+ # Wraps Psych::Nodes::* classes to provide TreeHaver::Node-compatible interface.
224
+ #
225
+ # Psych node types:
226
+ # - Stream: Root container
227
+ # - Document: YAML document (multiple per stream possible)
228
+ # - Mapping: Hash/object
229
+ # - Sequence: Array/list
230
+ # - Scalar: Primitive value (string, number, boolean, null)
231
+ # - Alias: YAML anchor reference
232
+ class Node
233
+ include Comparable
234
+
235
+ # @return [::Psych::Nodes::Node] The underlying Psych node
236
+ attr_reader :inner_node
237
+
238
+ # @return [String] The original source
239
+ attr_reader :source
240
+
241
+ # Create a new node wrapper
242
+ #
243
+ # @param node [::Psych::Nodes::Node] Psych node
244
+ # @param source [String] Original source
245
+ # @param lines [Array<String>] Source lines for text extraction
246
+ def initialize(node, source, lines = nil)
247
+ @inner_node = node
248
+ @source = source
249
+ @lines = lines || source.lines
250
+ end
251
+
252
+ # Get the node type as a string
253
+ #
254
+ # Maps Psych class names to lowercase type strings:
255
+ # - Psych::Nodes::Stream → "stream"
256
+ # - Psych::Nodes::Document → "document"
257
+ # - Psych::Nodes::Mapping → "mapping"
258
+ # - Psych::Nodes::Sequence → "sequence"
259
+ # - Psych::Nodes::Scalar → "scalar"
260
+ # - Psych::Nodes::Alias → "alias"
261
+ #
262
+ # @return [String] Node type
263
+ def type
264
+ @inner_node.class.name.split("::").last.downcase
265
+ end
266
+
267
+ # Alias for tree-sitter compatibility
268
+ alias_method :kind, :type
269
+
270
+ # Get the text content of this node
271
+ #
272
+ # For Scalar nodes, returns the value. For containers, returns
273
+ # the source text spanning the node's location.
274
+ #
275
+ # @return [String] Node text
276
+ def text
277
+ case @inner_node
278
+ when ::Psych::Nodes::Scalar
279
+ @inner_node.value.to_s
280
+ when ::Psych::Nodes::Alias
281
+ "*#{@inner_node.anchor}"
282
+ else
283
+ # For container nodes, extract from source using location
284
+ extract_text_from_location
285
+ end
286
+ end
287
+
288
+ # Get child nodes
289
+ #
290
+ # @return [Array<Node>] Child nodes
291
+ def children
292
+ return [] unless @inner_node.respond_to?(:children) && @inner_node.children
293
+
294
+ @inner_node.children.map { |child| Node.new(child, @source, @lines) }
295
+ end
296
+
297
+ # Iterate over child nodes
298
+ #
299
+ # @yield [Node] Each child node
300
+ # @return [Enumerator, nil]
301
+ def each(&block)
302
+ return to_enum(__method__) unless block
303
+ children.each(&block)
304
+ end
305
+
306
+ # Get the number of children
307
+ #
308
+ # @return [Integer] Child count
309
+ def child_count
310
+ children.size
311
+ end
312
+
313
+ # Get child by index
314
+ #
315
+ # @param index [Integer] Child index
316
+ # @return [Node, nil] Child node
317
+ def child(index)
318
+ children[index]
319
+ end
320
+
321
+ # Get start byte offset
322
+ #
323
+ # Psych doesn't provide byte offsets directly, so we calculate from line/column.
324
+ #
325
+ # @return [Integer] Start byte offset
326
+ def start_byte
327
+ return 0 unless @inner_node.respond_to?(:start_line)
328
+
329
+ line = @inner_node.start_line || 0
330
+ col = @inner_node.start_column || 0
331
+ calculate_byte_offset(line, col)
332
+ end
333
+
334
+ # Get end byte offset
335
+ #
336
+ # @return [Integer] End byte offset
337
+ def end_byte
338
+ return start_byte + text.bytesize unless @inner_node.respond_to?(:end_line)
339
+
340
+ line = @inner_node.end_line || 0
341
+ col = @inner_node.end_column || 0
342
+ calculate_byte_offset(line, col)
343
+ end
344
+
345
+ # Get start point (row, column)
346
+ #
347
+ # @return [Point] Start position (0-based)
348
+ def start_point
349
+ row = (@inner_node.respond_to?(:start_line) ? @inner_node.start_line : 0) || 0
350
+ col = (@inner_node.respond_to?(:start_column) ? @inner_node.start_column : 0) || 0
351
+ Point.new(row, col)
352
+ end
353
+
354
+ # Get end point (row, column)
355
+ #
356
+ # @return [Point] End position (0-based)
357
+ def end_point
358
+ row = (@inner_node.respond_to?(:end_line) ? @inner_node.end_line : 0) || 0
359
+ col = (@inner_node.respond_to?(:end_column) ? @inner_node.end_column : 0) || 0
360
+ Point.new(row, col)
361
+ end
362
+
363
+ # Get the 1-based line number where this node starts
364
+ #
365
+ # Psych provides 0-based line numbers, so we add 1.
366
+ #
367
+ # @return [Integer] 1-based line number
368
+ def start_line
369
+ row = start_point.row
370
+ row + 1
371
+ end
372
+
373
+ # Get the 1-based line number where this node ends
374
+ #
375
+ # @return [Integer] 1-based line number
376
+ def end_line
377
+ row = end_point.row
378
+ row + 1
379
+ end
380
+
381
+ # Get position information as a hash
382
+ #
383
+ # Returns a hash with 1-based line numbers and 0-based columns.
384
+ # Compatible with *-merge gems' FileAnalysisBase.
385
+ #
386
+ # @return [Hash{Symbol => Integer}] Position hash
387
+ def source_position
388
+ {
389
+ start_line: start_line,
390
+ end_line: end_line,
391
+ start_column: start_point.column,
392
+ end_column: end_point.column,
393
+ }
394
+ end
395
+
396
+ # Get the first child node
397
+ #
398
+ # @return [Node, nil] First child or nil
399
+ def first_child
400
+ children.first
401
+ end
402
+
403
+ # Check if this is a named (structural) node
404
+ #
405
+ # All Psych nodes are structural.
406
+ #
407
+ # @return [Boolean] true
408
+ def named?
409
+ true
410
+ end
411
+
412
+ # Alias for tree-sitter compatibility
413
+ alias_method :structural?, :named?
414
+
415
+ # Check if the node or any descendant has an error
416
+ #
417
+ # Psych raises on errors rather than embedding them.
418
+ #
419
+ # @return [Boolean] false
420
+ def has_error?
421
+ false
422
+ end
423
+
424
+ # Check if this is a missing node
425
+ #
426
+ # Psych doesn't have missing nodes.
427
+ #
428
+ # @return [Boolean] false
429
+ def missing?
430
+ false
431
+ end
432
+
433
+ # Comparison for sorting
434
+ #
435
+ # @param other [Node] other node
436
+ # @return [Integer, nil] comparison result
437
+ def <=>(other)
438
+ return unless other.respond_to?(:start_byte)
439
+ cmp = start_byte <=> other.start_byte
440
+ return cmp unless cmp&.zero?
441
+ end_byte <=> other.end_byte
442
+ end
443
+
444
+ # @return [String] human-readable representation
445
+ def inspect
446
+ "#<TreeHaver::Backends::Psych::Node type=#{type} children=#{child_count}>"
447
+ end
448
+
449
+ # Psych-specific: Get the anchor name for Alias/anchored nodes
450
+ #
451
+ # @return [String, nil] Anchor name
452
+ def anchor
453
+ @inner_node.anchor if @inner_node.respond_to?(:anchor)
454
+ end
455
+
456
+ # Psych-specific: Get the tag for tagged nodes
457
+ #
458
+ # @return [String, nil] Tag
459
+ def tag
460
+ @inner_node.tag if @inner_node.respond_to?(:tag)
461
+ end
462
+
463
+ # Psych-specific: Get the scalar value
464
+ #
465
+ # @return [String, nil] Value for scalar nodes
466
+ def value
467
+ @inner_node.value if @inner_node.respond_to?(:value)
468
+ end
469
+
470
+ # Psych-specific: Check if this is a mapping (hash)
471
+ #
472
+ # @return [Boolean]
473
+ def mapping?
474
+ @inner_node.is_a?(::Psych::Nodes::Mapping)
475
+ end
476
+
477
+ # Psych-specific: Check if this is a sequence (array)
478
+ #
479
+ # @return [Boolean]
480
+ def sequence?
481
+ @inner_node.is_a?(::Psych::Nodes::Sequence)
482
+ end
483
+
484
+ # Psych-specific: Check if this is a scalar (primitive)
485
+ #
486
+ # @return [Boolean]
487
+ def scalar?
488
+ @inner_node.is_a?(::Psych::Nodes::Scalar)
489
+ end
490
+
491
+ # Psych-specific: Check if this is an alias
492
+ #
493
+ # @return [Boolean]
494
+ def alias?
495
+ @inner_node.is_a?(::Psych::Nodes::Alias)
496
+ end
497
+
498
+ # Psych-specific: Get mapping entries as key-value pairs
499
+ #
500
+ # For Mapping nodes, children alternate key, value, key, value...
501
+ #
502
+ # @return [Array<Array(Node, Node)>] Key-value pairs
503
+ def mapping_entries
504
+ return [] unless mapping?
505
+
506
+ pairs = []
507
+ children.each_slice(2) do |key, val|
508
+ pairs << [key, val] if key && val
509
+ end
510
+ pairs
511
+ end
512
+
513
+ private
514
+
515
+ # Calculate byte offset from line and column
516
+ #
517
+ # @param line [Integer] 0-based line number
518
+ # @param column [Integer] 0-based column
519
+ # @return [Integer] Byte offset
520
+ def calculate_byte_offset(line, column)
521
+ offset = 0
522
+ @lines.each_with_index do |line_content, idx|
523
+ if idx < line
524
+ offset += line_content.bytesize
525
+ offset += 1 unless line_content.end_with?("\n") # Add newline
526
+ else
527
+ offset += [column, line_content.bytesize].min
528
+ break
529
+ end
530
+ end
531
+ offset
532
+ end
533
+
534
+ # Extract text from source using location
535
+ #
536
+ # @return [String] Extracted text
537
+ def extract_text_from_location
538
+ return "" unless @inner_node.respond_to?(:start_line) && @inner_node.respond_to?(:end_line)
539
+
540
+ start_line = @inner_node.start_line || 0
541
+ end_line = @inner_node.end_line || start_line
542
+ start_col = @inner_node.start_column || 0
543
+ end_col = @inner_node.end_column || 0
544
+
545
+ if start_line == end_line
546
+ line = @lines[start_line] || ""
547
+ line[start_col...end_col] || ""
548
+ else
549
+ result = []
550
+ (start_line..end_line).each do |ln|
551
+ line = @lines[ln] || ""
552
+ result << if ln == start_line
553
+ line[start_col..]
554
+ elsif ln == end_line
555
+ line[0...end_col]
556
+ else
557
+ line
558
+ end
559
+ end
560
+ result.compact.join
561
+ end
562
+ end
563
+ end
564
+
565
+ # Point struct for position information
566
+ #
567
+ # Provides both method and hash-style access for compatibility.
568
+ Point = Struct.new(:row, :column) do
569
+ # Hash-like access
570
+ #
571
+ # @param key [Symbol, String] :row or :column
572
+ # @return [Integer, nil]
573
+ def [](key)
574
+ case key
575
+ when :row, "row" then row
576
+ when :column, "column" then column
577
+ end
578
+ end
579
+
580
+ # @return [Hash]
581
+ def to_h
582
+ {row: row, column: column}
583
+ end
584
+
585
+ # @return [String]
586
+ def to_s
587
+ "(#{row}, #{column})"
588
+ end
589
+
590
+ # @return [String]
591
+ def inspect
592
+ "#<TreeHaver::Backends::Psych::Point row=#{row} column=#{column}>"
593
+ end
594
+ end
595
+ end
596
+ end
597
+ end