tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,624 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Prism backend using Ruby's built-in Prism parser
6
+ #
7
+ # This backend wraps Prism, Ruby's official parser (stdlib in Ruby 3.4+,
8
+ # available as a gem for 3.2+). Unlike tree-sitter backends which are
9
+ # language-agnostic runtime parsers, Prism is specifically designed for
10
+ # parsing Ruby source code.
11
+ #
12
+ # Prism provides excellent error recovery, detailed location information,
13
+ # and is the future of Ruby parsing (used by CRuby, JRuby, TruffleRuby).
14
+ #
15
+ # @note This backend only parses Ruby source code
16
+ # @see https://github.com/ruby/prism Prism parser
17
+ #
18
+ # @example Basic usage
19
+ # parser = TreeHaver::Parser.new
20
+ # parser.language = TreeHaver::Backends::Prism::Language.ruby
21
+ # tree = parser.parse(ruby_source)
22
+ # root = tree.root_node
23
+ # puts root.type # => "program_node"
24
+ module Prism
25
+ @load_attempted = false
26
+ @loaded = false
27
+
28
+ # Check if the Prism backend is available
29
+ #
30
+ # Attempts to require prism on first call and caches the result.
31
+ # On Ruby 3.4+, Prism is in stdlib. On 3.2-3.3, it's a gem.
32
+ #
33
+ # @return [Boolean] true if prism is available
34
+ # @example
35
+ # if TreeHaver::Backends::Prism.available?
36
+ # puts "Prism backend is ready"
37
+ # end
38
+ class << self
39
+ def available?
40
+ return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
41
+ @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
42
+ begin
43
+ require "prism"
44
+
45
+ @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
46
+ rescue LoadError
47
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
48
+ end
49
+ @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
50
+ end
51
+
52
+ # Reset the load state (primarily for testing)
53
+ #
54
+ # @return [void]
55
+ # @api private
56
+ def reset!
57
+ @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
58
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
59
+ end
60
+
61
+ # Get capabilities supported by this backend
62
+ #
63
+ # @return [Hash{Symbol => Object}] capability map
64
+ # @example
65
+ # TreeHaver::Backends::Prism.capabilities
66
+ # # => { backend: :prism, query: false, bytes_field: true, incremental: false, ruby_only: true }
67
+ def capabilities
68
+ return {} unless available?
69
+ {
70
+ backend: :prism,
71
+ query: false, # Prism doesn't have tree-sitter-style queries (has pattern matching)
72
+ bytes_field: true, # Prism provides byte offsets via Location
73
+ incremental: false, # Prism doesn't support incremental parsing (yet)
74
+ pure_ruby: false, # Prism has native C extension (but also pure Ruby mode)
75
+ ruby_only: true, # Prism only parses Ruby source code
76
+ error_tolerant: true, # Prism has excellent error recovery
77
+ }
78
+ end
79
+ end
80
+
81
+ # Prism language wrapper
82
+ #
83
+ # Unlike tree-sitter which supports many languages via grammar files,
84
+ # Prism only parses Ruby. This class exists for API compatibility with
85
+ # other tree_haver backends.
86
+ #
87
+ # @example
88
+ # language = TreeHaver::Backends::Prism::Language.ruby
89
+ # parser.language = language
90
+ class Language
91
+ include Comparable
92
+
93
+ # The language name (always :ruby for Prism)
94
+ # @return [Symbol]
95
+ attr_reader :name
96
+
97
+ # The backend this language is for
98
+ # @return [Symbol]
99
+ attr_reader :backend
100
+
101
+ # Prism parsing options
102
+ # @return [Hash]
103
+ attr_reader :options
104
+
105
+ # @param name [Symbol] language name (should be :ruby)
106
+ # @param options [Hash] Prism parsing options (e.g., frozen_string_literal, version)
107
+ def initialize(name = :ruby, options: {})
108
+ @name = name.to_sym
109
+ @backend = :prism
110
+ @options = options
111
+
112
+ unless @name == :ruby
113
+ raise TreeHaver::NotAvailable,
114
+ "Prism only supports Ruby parsing. " \
115
+ "Got language: #{name.inspect}"
116
+ end
117
+ end
118
+
119
+ # Compare languages for equality
120
+ #
121
+ # Prism languages are equal if they have the same backend and options.
122
+ #
123
+ # @param other [Object] object to compare with
124
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
125
+ def <=>(other)
126
+ return unless other.is_a?(Language)
127
+ return unless other.backend == @backend
128
+
129
+ @options.to_a.sort <=> other.options.to_a.sort
130
+ end
131
+
132
+ # Hash value for this language (for use in Sets/Hashes)
133
+ # @return [Integer]
134
+ def hash
135
+ [@backend, @name, @options.to_a.sort].hash
136
+ end
137
+
138
+ # Alias eql? to ==
139
+ alias_method :eql?, :==
140
+
141
+ class << self
142
+ # Create a Ruby language instance (convenience method)
143
+ #
144
+ # @param options [Hash] Prism parsing options
145
+ # @option options [Boolean] :frozen_string_literal frozen string literal pragma
146
+ # @option options [String] :version Ruby version to parse as (e.g., "3.3.0")
147
+ # @option options [Symbol] :command_line command line option (-e, -n, etc.)
148
+ # @return [Language]
149
+ # @example
150
+ # lang = TreeHaver::Backends::Prism::Language.ruby
151
+ # lang = TreeHaver::Backends::Prism::Language.ruby(frozen_string_literal: true)
152
+ def ruby(options = {})
153
+ new(:ruby, options: options)
154
+ end
155
+
156
+ # Not applicable for Prism (tree-sitter-specific)
157
+ #
158
+ # Prism is Ruby-only and doesn't load external grammar libraries.
159
+ # This method exists for API compatibility but will raise an error.
160
+ #
161
+ # @raise [TreeHaver::NotAvailable] always raises
162
+ def from_library(path, symbol: nil, name: nil)
163
+ raise TreeHaver::NotAvailable,
164
+ "Prism backend doesn't use shared libraries. " \
165
+ "Use Prism::Language.ruby instead."
166
+ end
167
+
168
+ alias_method :from_path, :from_library
169
+ end
170
+ end
171
+
172
+ # Prism parser wrapper
173
+ #
174
+ # Wraps Prism to provide a tree-sitter-like API for parsing Ruby code.
175
+ class Parser
176
+ # Create a new Prism parser instance
177
+ #
178
+ # @raise [TreeHaver::NotAvailable] if prism is not available
179
+ def initialize
180
+ raise TreeHaver::NotAvailable, "prism not available" unless Prism.available?
181
+ @language = nil
182
+ @options = {}
183
+ end
184
+
185
+ # Set the language for this parser
186
+ #
187
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
188
+ # This backend receives the Language wrapper (since Prism::Language stores options).
189
+ #
190
+ # @param lang [Language, Symbol] Prism language (should be :ruby or Language instance)
191
+ # @return [void]
192
+ def language=(lang)
193
+ case lang
194
+ when Language
195
+ @language = lang
196
+ @options = lang.options
197
+ when Symbol, String
198
+ if lang.to_sym == :ruby
199
+ @language = Language.ruby
200
+ @options = {}
201
+ else
202
+ raise ArgumentError,
203
+ "Prism only supports Ruby parsing. Got: #{lang.inspect}"
204
+ end
205
+ else
206
+ raise ArgumentError,
207
+ "Expected Prism::Language or :ruby, got #{lang.class}"
208
+ end
209
+ end
210
+
211
+ # Parse source code
212
+ #
213
+ # @param source [String] the Ruby source code to parse
214
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
215
+ # @raise [TreeHaver::NotAvailable] if no language is set
216
+ def parse(source)
217
+ raise TreeHaver::NotAvailable, "No language loaded (use parser.language = :ruby)" unless @language
218
+
219
+ # Use Prism.parse with options
220
+ prism_result = ::Prism.parse(source, **@options)
221
+ Tree.new(prism_result, source)
222
+ end
223
+
224
+ # Parse source code (compatibility with tree-sitter API)
225
+ #
226
+ # Prism doesn't support incremental parsing, so old_tree is ignored.
227
+ #
228
+ # @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
229
+ # @param source [String] the Ruby source code to parse
230
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
231
+ def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
232
+ parse(source) # Prism doesn't support incremental parsing
233
+ end
234
+ end
235
+
236
+ # Prism tree wrapper
237
+ #
238
+ # Wraps a Prism::ParseResult to provide tree-sitter-compatible API.
239
+ #
240
+ # @api private
241
+ class Tree
242
+ # @return [::Prism::ParseResult] the underlying Prism parse result
243
+ attr_reader :parse_result
244
+
245
+ # @return [String] the source code
246
+ attr_reader :source
247
+
248
+ def initialize(parse_result, source)
249
+ @parse_result = parse_result
250
+ @source = source
251
+ end
252
+
253
+ # Get the root node of the parse tree
254
+ #
255
+ # @return [Node] wrapped root node
256
+ def root_node
257
+ Node.new(@parse_result.value, @source)
258
+ end
259
+
260
+ # Check if the parse had errors
261
+ #
262
+ # @return [Boolean]
263
+ def has_errors?
264
+ @parse_result.failure?
265
+ end
266
+
267
+ # Get parse errors
268
+ #
269
+ # @return [Array<::Prism::ParseError>]
270
+ def errors
271
+ @parse_result.errors
272
+ end
273
+
274
+ # Get parse warnings
275
+ #
276
+ # @return [Array<::Prism::ParseWarning>]
277
+ def warnings
278
+ @parse_result.warnings
279
+ end
280
+
281
+ # Get comments from the parse
282
+ #
283
+ # @return [Array<::Prism::Comment>]
284
+ def comments
285
+ @parse_result.comments
286
+ end
287
+
288
+ # Get magic comments (e.g., frozen_string_literal)
289
+ #
290
+ # @return [Array<::Prism::MagicComment>]
291
+ def magic_comments
292
+ @parse_result.magic_comments
293
+ end
294
+
295
+ # Get data locations (__END__ section)
296
+ #
297
+ # @return [::Prism::Location, nil]
298
+ def data_loc
299
+ @parse_result.data_loc
300
+ end
301
+
302
+ # Access the underlying Prism result (passthrough)
303
+ #
304
+ # @return [::Prism::ParseResult]
305
+ def inner_tree
306
+ @parse_result
307
+ end
308
+ end
309
+
310
+ # Prism node wrapper
311
+ #
312
+ # Wraps Prism::Node objects to provide tree-sitter-compatible node API.
313
+ #
314
+ # Prism nodes provide:
315
+ # - type: class name without "Node" suffix (e.g., ProgramNode → "program")
316
+ # - location: ::Prism::Location with start/end offsets and line/column
317
+ # - child_nodes: array of child nodes
318
+ # - Various node-specific accessors
319
+ #
320
+ # @api private
321
+ class Node
322
+ # @return [::Prism::Node] the underlying Prism node
323
+ attr_reader :inner_node
324
+
325
+ # @return [String] the source code
326
+ attr_reader :source
327
+
328
+ def initialize(node, source)
329
+ @inner_node = node
330
+ @source = source
331
+ end
332
+
333
+ # Get node type from Prism class name
334
+ #
335
+ # Converts PrismClassName to tree-sitter-style type string.
336
+ # Example: CallNode → "call_node", ProgramNode → "program_node"
337
+ #
338
+ # @return [String] node type in snake_case
339
+ def type
340
+ return "nil" if @inner_node.nil?
341
+
342
+ # Convert class name to snake_case type
343
+ # ProgramNode → program_node, CallNode → call_node
344
+ class_name = @inner_node.class.name.split("::").last
345
+ class_name.gsub(/([A-Z])/, '_\1').downcase.sub(/^_/, "")
346
+ end
347
+
348
+ # Alias for tree-sitter compatibility
349
+ alias_method :kind, :type
350
+
351
+ # Get byte offset where the node starts
352
+ #
353
+ # @return [Integer]
354
+ def start_byte
355
+ return 0 if @inner_node.nil? || !@inner_node.respond_to?(:location)
356
+ loc = @inner_node.location
357
+ loc&.start_offset || 0
358
+ end
359
+
360
+ # Get byte offset where the node ends
361
+ #
362
+ # @return [Integer]
363
+ def end_byte
364
+ return 0 if @inner_node.nil? || !@inner_node.respond_to?(:location)
365
+ loc = @inner_node.location
366
+ loc&.end_offset || 0
367
+ end
368
+
369
+ # Get the start position as row/column
370
+ #
371
+ # @return [Hash{Symbol => Integer}] with :row and :column keys
372
+ def start_point
373
+ return {row: 0, column: 0} if @inner_node.nil? || !@inner_node.respond_to?(:location)
374
+ loc = @inner_node.location
375
+ return {row: 0, column: 0} unless loc
376
+
377
+ # Prism uses 1-based lines internally but we need 0-based for tree-sitter compat
378
+ {row: (loc.start_line - 1), column: loc.start_column}
379
+ end
380
+
381
+ # Get the end position as row/column
382
+ #
383
+ # @return [Hash{Symbol => Integer}] with :row and :column keys
384
+ def end_point
385
+ return {row: 0, column: 0} if @inner_node.nil? || !@inner_node.respond_to?(:location)
386
+ loc = @inner_node.location
387
+ return {row: 0, column: 0} unless loc
388
+
389
+ # Prism uses 1-based lines internally but we need 0-based for tree-sitter compat
390
+ {row: (loc.end_line - 1), column: loc.end_column}
391
+ end
392
+
393
+ # Get the 1-based line number where this node starts
394
+ #
395
+ # @return [Integer] 1-based line number
396
+ def start_line
397
+ return 1 if @inner_node.nil? || !@inner_node.respond_to?(:location)
398
+ loc = @inner_node.location
399
+ loc&.start_line || 1
400
+ end
401
+
402
+ # Get the 1-based line number where this node ends
403
+ #
404
+ # @return [Integer] 1-based line number
405
+ def end_line
406
+ return 1 if @inner_node.nil? || !@inner_node.respond_to?(:location)
407
+ loc = @inner_node.location
408
+ loc&.end_line || 1
409
+ end
410
+
411
+ # Get position information as a hash
412
+ #
413
+ # Returns a hash with 1-based line numbers and 0-based columns.
414
+ # Compatible with *-merge gems' FileAnalysisBase.
415
+ #
416
+ # @return [Hash{Symbol => Integer}] Position hash
417
+ def source_position
418
+ {
419
+ start_line: start_line,
420
+ end_line: end_line,
421
+ start_column: start_point[:column],
422
+ end_column: end_point[:column],
423
+ }
424
+ end
425
+
426
+ # Get the first child node
427
+ #
428
+ # @return [Node, nil] First child or nil
429
+ def first_child
430
+ child(0)
431
+ end
432
+
433
+ # Get the text content of this node
434
+ #
435
+ # @return [String]
436
+ def text
437
+ return "" if @inner_node.nil?
438
+
439
+ if @inner_node.respond_to?(:slice)
440
+ @inner_node.slice
441
+ elsif @source
442
+ @source[start_byte...end_byte] || ""
443
+ else
444
+ ""
445
+ end
446
+ end
447
+
448
+ # Alias for Prism compatibility
449
+ alias_method :slice, :text
450
+
451
+ # Get the number of child nodes
452
+ #
453
+ # @return [Integer]
454
+ def child_count
455
+ return 0 if @inner_node.nil?
456
+ return 0 unless @inner_node.respond_to?(:child_nodes)
457
+ @inner_node.child_nodes.compact.size
458
+ end
459
+
460
+ # Get a child node by index
461
+ #
462
+ # @param index [Integer] child index
463
+ # @return [Node, nil] wrapped child node
464
+ def child(index)
465
+ return if @inner_node.nil?
466
+ return unless @inner_node.respond_to?(:child_nodes)
467
+
468
+ children_array = @inner_node.child_nodes.compact
469
+ return if index >= children_array.size
470
+
471
+ Node.new(children_array[index], @source)
472
+ end
473
+
474
+ # Get all child nodes
475
+ #
476
+ # @return [Array<Node>] array of wrapped child nodes
477
+ def children
478
+ return [] if @inner_node.nil?
479
+ return [] unless @inner_node.respond_to?(:child_nodes)
480
+
481
+ @inner_node.child_nodes.compact.map { |n| Node.new(n, @source) }
482
+ end
483
+
484
+ # Iterate over child nodes
485
+ #
486
+ # @yield [Node] each child node
487
+ # @return [Enumerator, nil]
488
+ def each(&block)
489
+ return to_enum(__method__) unless block_given?
490
+ children.each(&block)
491
+ end
492
+
493
+ # Check if this node has errors
494
+ #
495
+ # @return [Boolean]
496
+ def has_error?
497
+ return false if @inner_node.nil?
498
+
499
+ # Check if this is an error node type
500
+ return true if type.include?("missing") || type.include?("error")
501
+
502
+ # Check children recursively (Prism error nodes are usually children)
503
+ return false unless @inner_node.respond_to?(:child_nodes)
504
+ @inner_node.child_nodes.compact.any? { |n| n.class.name.to_s.include?("Missing") }
505
+ end
506
+
507
+ # Check if this node is a "missing" node (error recovery)
508
+ #
509
+ # @return [Boolean]
510
+ def missing?
511
+ return false if @inner_node.nil?
512
+ type.include?("missing")
513
+ end
514
+
515
+ # Check if this is a "named" node (structural vs punctuation)
516
+ #
517
+ # In Prism, all nodes are "named" in tree-sitter terminology
518
+ # (there's no distinction between named and anonymous nodes).
519
+ #
520
+ # @return [Boolean]
521
+ def named?
522
+ true
523
+ end
524
+
525
+ # Check if this is a structural node
526
+ #
527
+ # @return [Boolean]
528
+ def structural?
529
+ true
530
+ end
531
+
532
+ # Get a child by field name (Prism node accessor)
533
+ #
534
+ # Prism nodes have specific accessors for their children.
535
+ # This method tries to call that accessor.
536
+ #
537
+ # @param name [String, Symbol] field/accessor name
538
+ # @return [Node, nil] wrapped child node
539
+ def child_by_field_name(name)
540
+ return if @inner_node.nil?
541
+ return unless @inner_node.respond_to?(name)
542
+
543
+ result = @inner_node.public_send(name)
544
+ return if result.nil?
545
+
546
+ # Wrap if it's a node, otherwise return nil
547
+ if result.is_a?(::Prism::Node)
548
+ Node.new(result, @source)
549
+ end
550
+ end
551
+
552
+ alias_method :field, :child_by_field_name
553
+
554
+ # Get the parent node
555
+ #
556
+ # @note Prism nodes don't have built-in parent references.
557
+ # This always returns nil. Use tree traversal instead.
558
+ # @return [nil]
559
+ def parent
560
+ nil # Prism doesn't track parent references
561
+ end
562
+
563
+ # Get next sibling
564
+ #
565
+ # @note Prism nodes don't have sibling references.
566
+ # @return [nil]
567
+ def next_sibling
568
+ nil
569
+ end
570
+
571
+ # Get previous sibling
572
+ #
573
+ # @note Prism nodes don't have sibling references.
574
+ # @return [nil]
575
+ def prev_sibling
576
+ nil
577
+ end
578
+
579
+ # String representation for debugging
580
+ #
581
+ # @return [String]
582
+ def inspect
583
+ "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
584
+ end
585
+
586
+ # String representation
587
+ #
588
+ # @return [String]
589
+ def to_s
590
+ text
591
+ end
592
+
593
+ # Check if node responds to a method (includes delegation to inner_node)
594
+ #
595
+ # @param method_name [Symbol] method to check
596
+ # @param include_private [Boolean] include private methods
597
+ # @return [Boolean]
598
+ def respond_to_missing?(method_name, include_private = false)
599
+ return false if @inner_node.nil?
600
+ @inner_node.respond_to?(method_name, include_private) || super
601
+ end
602
+
603
+ # Delegate unknown methods to the underlying Prism node
604
+ #
605
+ # This provides passthrough access for Prism-specific node methods
606
+ # like `receiver`, `message`, `arguments`, etc.
607
+ #
608
+ # @param method_name [Symbol] method to call
609
+ # @param args [Array] arguments to pass
610
+ # @param kwargs [Hash] keyword arguments
611
+ # @param block [Proc] block to pass
612
+ # @return [Object] result from the underlying node
613
+ def method_missing(method_name, *args, **kwargs, &block)
614
+ if @inner_node&.respond_to?(method_name)
615
+ @inner_node.public_send(method_name, *args, **kwargs, &block)
616
+ else
617
+ super
618
+ end
619
+ end
620
+ end
621
+ end
622
+ end
623
+ end
624
+