tree_haver 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,625 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Prism backend using Ruby's built-in Prism parser
6
+ #
7
+ # This backend wraps Prism, Ruby's official parser (stdlib in Ruby 3.4+,
8
+ # available as a gem for 3.2+). Unlike tree-sitter backends which are
9
+ # language-agnostic runtime parsers, Prism is specifically designed for
10
+ # parsing Ruby source code.
11
+ #
12
+ # Prism provides excellent error recovery, detailed location information,
13
+ # and is the future of Ruby parsing (used by CRuby, JRuby, TruffleRuby).
14
+ #
15
+ # @note This backend only parses Ruby source code
16
+ # @see https://github.com/ruby/prism Prism parser
17
+ #
18
+ # @example Basic usage
19
+ # parser = TreeHaver::Parser.new
20
+ # parser.language = TreeHaver::Backends::Prism::Language.ruby
21
+ # tree = parser.parse(ruby_source)
22
+ # root = tree.root_node
23
+ # puts root.type # => "program_node"
24
+ module Prism
25
+ @load_attempted = false
26
+ @loaded = false
27
+
28
+ # Check if the Prism backend is available
29
+ #
30
+ # Attempts to require prism on first call and caches the result.
31
+ # On Ruby 3.4+, Prism is in stdlib. On 3.2-3.3, it's a gem.
32
+ #
33
+ # @return [Boolean] true if prism is available
34
+ # @example
35
+ # if TreeHaver::Backends::Prism.available?
36
+ # puts "Prism backend is ready"
37
+ # end
38
+ class << self
39
+ def available?
40
+ return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
41
+ @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
42
+ begin
43
+ require "prism"
44
+
45
+ @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
46
+ rescue LoadError
47
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
48
+ end
49
+ @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
50
+ end
51
+
52
+ # Reset the load state (primarily for testing)
53
+ #
54
+ # @return [void]
55
+ # @api private
56
+ def reset!
57
+ @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
58
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
59
+ end
60
+
61
+ # Get capabilities supported by this backend
62
+ #
63
+ # @return [Hash{Symbol => Object}] capability map
64
+ # @example
65
+ # TreeHaver::Backends::Prism.capabilities
66
+ # # => { backend: :prism, query: false, bytes_field: true, incremental: false, ruby_only: true }
67
+ def capabilities
68
+ return {} unless available?
69
+ {
70
+ backend: :prism,
71
+ query: false, # Prism doesn't have tree-sitter-style queries (has pattern matching)
72
+ bytes_field: true, # Prism provides byte offsets via Location
73
+ incremental: false, # Prism doesn't support incremental parsing (yet)
74
+ pure_ruby: false, # Prism has native C extension (but also pure Ruby mode)
75
+ ruby_only: true, # Prism only parses Ruby source code
76
+ error_tolerant: true, # Prism has excellent error recovery
77
+ }
78
+ end
79
+ end
80
+
81
+ # Prism language wrapper
82
+ #
83
+ # Unlike tree-sitter which supports many languages via grammar files,
84
+ # Prism only parses Ruby. This class exists for API compatibility with
85
+ # other tree_haver backends.
86
+ #
87
+ # @example
88
+ # language = TreeHaver::Backends::Prism::Language.ruby
89
+ # parser.language = language
90
+ class Language
91
+ include Comparable
92
+
93
+ # The language name (always :ruby for Prism)
94
+ # @return [Symbol]
95
+ attr_reader :name
96
+
97
+ # The backend this language is for
98
+ # @return [Symbol]
99
+ attr_reader :backend
100
+
101
+ # Prism parsing options
102
+ # @return [Hash]
103
+ attr_reader :options
104
+
105
+ # @param name [Symbol] language name (should be :ruby)
106
+ # @param options [Hash] Prism parsing options (e.g., frozen_string_literal, version)
107
+ def initialize(name = :ruby, options: {})
108
+ @name = name.to_sym
109
+ @backend = :prism
110
+ @options = options
111
+
112
+ unless @name == :ruby
113
+ raise TreeHaver::NotAvailable,
114
+ "Prism only supports Ruby parsing. " \
115
+ "Got language: #{name.inspect}"
116
+ end
117
+ end
118
+
119
+ # Compare languages for equality
120
+ #
121
+ # Prism languages are equal if they have the same backend and options.
122
+ #
123
+ # @param other [Object] object to compare with
124
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
125
+ def <=>(other)
126
+ return unless other.is_a?(Language)
127
+ return unless other.backend == @backend
128
+
129
+ @options.to_a.sort <=> other.options.to_a.sort
130
+ end
131
+
132
+ # Hash value for this language (for use in Sets/Hashes)
133
+ # @return [Integer]
134
+ def hash
135
+ [@backend, @name, @options.to_a.sort].hash
136
+ end
137
+
138
+ # Alias eql? to ==
139
+ alias_method :eql?, :==
140
+
141
+ class << self
142
+ # Create a Ruby language instance (convenience method)
143
+ #
144
+ # @param options [Hash] Prism parsing options
145
+ # @option options [Boolean] :frozen_string_literal frozen string literal pragma
146
+ # @option options [String] :version Ruby version to parse as (e.g., "3.3.0")
147
+ # @option options [Symbol] :command_line command line option (-e, -n, etc.)
148
+ # @return [Language]
149
+ # @example
150
+ # lang = TreeHaver::Backends::Prism::Language.ruby
151
+ # lang = TreeHaver::Backends::Prism::Language.ruby(frozen_string_literal: true)
152
+ def ruby(options = {})
153
+ new(:ruby, options: options)
154
+ end
155
+
156
+ # Not applicable for Prism (tree-sitter-specific)
157
+ #
158
+ # Prism is Ruby-only and doesn't load external grammar libraries.
159
+ # This method exists for API compatibility but will raise an error.
160
+ #
161
+ # @raise [TreeHaver::NotAvailable] always raises
162
+ def from_library(path, symbol: nil, name: nil)
163
+ raise TreeHaver::NotAvailable,
164
+ "Prism backend doesn't use shared libraries. " \
165
+ "Use Prism::Language.ruby instead."
166
+ end
167
+
168
+ alias_method :from_path, :from_library
169
+ end
170
+ end
171
+
172
+ # Prism parser wrapper
173
+ #
174
+ # Wraps Prism to provide a tree-sitter-like API for parsing Ruby code.
175
+ class Parser
176
+ # Create a new Prism parser instance
177
+ #
178
+ # @raise [TreeHaver::NotAvailable] if prism is not available
179
+ def initialize
180
+ raise TreeHaver::NotAvailable, "prism not available" unless Prism.available?
181
+ @language = nil
182
+ @options = {}
183
+ end
184
+
185
+ # Set the language for this parser
186
+ #
187
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
188
+ # This backend receives the Language wrapper (since Prism::Language stores options).
189
+ #
190
+ # @param lang [Language, Symbol] Prism language (should be :ruby or Language instance)
191
+ # @return [void]
192
+ def language=(lang)
193
+ case lang
194
+ when Language
195
+ @language = lang
196
+ @options = lang.options
197
+ when Symbol, String
198
+ if lang.to_sym == :ruby
199
+ @language = Language.ruby
200
+ @options = {}
201
+ else
202
+ raise ArgumentError,
203
+ "Prism only supports Ruby parsing. Got: #{lang.inspect}"
204
+ end
205
+ else
206
+ raise ArgumentError,
207
+ "Expected Prism::Language or :ruby, got #{lang.class}"
208
+ end
209
+ end
210
+
211
+ # Parse source code
212
+ #
213
+ # @param source [String] the Ruby source code to parse
214
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
215
+ # @raise [TreeHaver::NotAvailable] if no language is set
216
+ def parse(source)
217
+ raise TreeHaver::NotAvailable, "No language loaded (use parser.language = :ruby)" unless @language
218
+
219
+ # Use Prism.parse with options
220
+ prism_result = ::Prism.parse(source, **@options)
221
+ Tree.new(prism_result, source)
222
+ end
223
+
224
+ # Parse source code (compatibility with tree-sitter API)
225
+ #
226
+ # Prism doesn't support incremental parsing, so old_tree is ignored.
227
+ #
228
+ # @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
229
+ # @param source [String] the Ruby source code to parse
230
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
231
+ def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
232
+ parse(source) # Prism doesn't support incremental parsing
233
+ end
234
+ end
235
+
236
+ # Prism tree wrapper
237
+ #
238
+ # Wraps a Prism::ParseResult to provide tree-sitter-compatible API.
239
+ #
240
+ # @api private
241
+ class Tree
242
+ # @return [::Prism::ParseResult] the underlying Prism parse result
243
+ attr_reader :parse_result
244
+
245
+ # @return [String] the source code
246
+ attr_reader :source
247
+
248
+ def initialize(parse_result, source)
249
+ @parse_result = parse_result
250
+ @source = source
251
+ end
252
+
253
+ # Get the root node of the parse tree
254
+ #
255
+ # @return [Node] wrapped root node
256
+ def root_node
257
+ Node.new(@parse_result.value, @source)
258
+ end
259
+
260
+ # Check if the parse had errors
261
+ #
262
+ # @return [Boolean]
263
+ def has_errors?
264
+ @parse_result.failure?
265
+ end
266
+
267
+ # Get parse errors
268
+ #
269
+ # @return [Array<::Prism::ParseError>]
270
+ def errors
271
+ @parse_result.errors
272
+ end
273
+
274
+ # Get parse warnings
275
+ #
276
+ # @return [Array<::Prism::ParseWarning>]
277
+ def warnings
278
+ @parse_result.warnings
279
+ end
280
+
281
+ # Get comments from the parse
282
+ #
283
+ # @return [Array<::Prism::Comment>]
284
+ def comments
285
+ @parse_result.comments
286
+ end
287
+
288
+ # Get magic comments (e.g., frozen_string_literal)
289
+ #
290
+ # @return [Array<::Prism::MagicComment>]
291
+ def magic_comments
292
+ @parse_result.magic_comments
293
+ end
294
+
295
+ # Get data locations (__END__ section)
296
+ #
297
+ # @return [::Prism::Location, nil]
298
+ def data_loc
299
+ @parse_result.data_loc
300
+ end
301
+
302
+ # Access the underlying Prism result (passthrough)
303
+ #
304
+ # @return [::Prism::ParseResult]
305
+ def inner_tree
306
+ @parse_result
307
+ end
308
+ end
309
+
310
+ # Prism node wrapper
311
+ #
312
+ # Wraps Prism::Node objects to provide tree-sitter-compatible node API.
313
+ #
314
+ # Prism nodes provide:
315
+ # - type: class name without "Node" suffix (e.g., ProgramNode → "program")
316
+ # - location: ::Prism::Location with start/end offsets and line/column
317
+ # - child_nodes: array of child nodes
318
+ # - Various node-specific accessors
319
+ #
320
+ # @api private
321
+ class Node
322
+ include Enumerable
323
+
324
+ # @return [::Prism::Node] the underlying Prism node
325
+ attr_reader :inner_node
326
+
327
+ # @return [String] the source code
328
+ attr_reader :source
329
+
330
+ def initialize(node, source)
331
+ @inner_node = node
332
+ @source = source
333
+ end
334
+
335
+ # Get node type from Prism class name
336
+ #
337
+ # Converts PrismClassName to tree-sitter-style type string.
338
+ # Example: CallNode → "call_node", ProgramNode → "program_node"
339
+ #
340
+ # @return [String] node type in snake_case
341
+ def type
342
+ return "nil" if @inner_node.nil?
343
+
344
+ # Convert class name to snake_case type
345
+ # ProgramNode → program_node, CallNode → call_node
346
+ class_name = @inner_node.class.name.split("::").last
347
+ class_name.gsub(/([A-Z])/, '_\1').downcase.sub(/^_/, "")
348
+ end
349
+
350
+ # Alias for tree-sitter compatibility
351
+ alias_method :kind, :type
352
+
353
+ # Get byte offset where the node starts
354
+ #
355
+ # @return [Integer]
356
+ def start_byte
357
+ return 0 if @inner_node.nil? || !@inner_node.respond_to?(:location)
358
+ loc = @inner_node.location
359
+ loc&.start_offset || 0
360
+ end
361
+
362
+ # Get byte offset where the node ends
363
+ #
364
+ # @return [Integer]
365
+ def end_byte
366
+ return 0 if @inner_node.nil? || !@inner_node.respond_to?(:location)
367
+ loc = @inner_node.location
368
+ loc&.end_offset || 0
369
+ end
370
+
371
+ # Get the start position as row/column
372
+ #
373
+ # @return [Hash{Symbol => Integer}] with :row and :column keys
374
+ def start_point
375
+ return {row: 0, column: 0} if @inner_node.nil? || !@inner_node.respond_to?(:location)
376
+ loc = @inner_node.location
377
+ return {row: 0, column: 0} unless loc
378
+
379
+ # Prism uses 1-based lines internally but we need 0-based for tree-sitter compat
380
+ {row: (loc.start_line - 1), column: loc.start_column}
381
+ end
382
+
383
+ # Get the end position as row/column
384
+ #
385
+ # @return [Hash{Symbol => Integer}] with :row and :column keys
386
+ def end_point
387
+ return {row: 0, column: 0} if @inner_node.nil? || !@inner_node.respond_to?(:location)
388
+ loc = @inner_node.location
389
+ return {row: 0, column: 0} unless loc
390
+
391
+ # Prism uses 1-based lines internally but we need 0-based for tree-sitter compat
392
+ {row: (loc.end_line - 1), column: loc.end_column}
393
+ end
394
+
395
+ # Get the 1-based line number where this node starts
396
+ #
397
+ # @return [Integer] 1-based line number
398
+ def start_line
399
+ return 1 if @inner_node.nil? || !@inner_node.respond_to?(:location)
400
+ loc = @inner_node.location
401
+ loc&.start_line || 1
402
+ end
403
+
404
+ # Get the 1-based line number where this node ends
405
+ #
406
+ # @return [Integer] 1-based line number
407
+ def end_line
408
+ return 1 if @inner_node.nil? || !@inner_node.respond_to?(:location)
409
+ loc = @inner_node.location
410
+ loc&.end_line || 1
411
+ end
412
+
413
+ # Get position information as a hash
414
+ #
415
+ # Returns a hash with 1-based line numbers and 0-based columns.
416
+ # Compatible with *-merge gems' FileAnalysisBase.
417
+ #
418
+ # @return [Hash{Symbol => Integer}] Position hash
419
+ def source_position
420
+ {
421
+ start_line: start_line,
422
+ end_line: end_line,
423
+ start_column: start_point[:column],
424
+ end_column: end_point[:column],
425
+ }
426
+ end
427
+
428
+ # Get the first child node
429
+ #
430
+ # @return [Node, nil] First child or nil
431
+ def first_child
432
+ child(0)
433
+ end
434
+
435
+ # Get the text content of this node
436
+ #
437
+ # @return [String]
438
+ def text
439
+ return "" if @inner_node.nil?
440
+
441
+ if @inner_node.respond_to?(:slice)
442
+ @inner_node.slice
443
+ elsif @source
444
+ @source[start_byte...end_byte] || ""
445
+ else
446
+ ""
447
+ end
448
+ end
449
+
450
+ # Alias for Prism compatibility
451
+ alias_method :slice, :text
452
+
453
+ # Get the number of child nodes
454
+ #
455
+ # @return [Integer]
456
+ def child_count
457
+ return 0 if @inner_node.nil?
458
+ return 0 unless @inner_node.respond_to?(:child_nodes)
459
+ @inner_node.child_nodes.compact.size
460
+ end
461
+
462
+ # Get a child node by index
463
+ #
464
+ # @param index [Integer] child index
465
+ # @return [Node, nil] wrapped child node
466
+ def child(index)
467
+ return if @inner_node.nil?
468
+ return unless @inner_node.respond_to?(:child_nodes)
469
+
470
+ children_array = @inner_node.child_nodes.compact
471
+ return if index >= children_array.size
472
+
473
+ Node.new(children_array[index], @source)
474
+ end
475
+
476
+ # Get all child nodes
477
+ #
478
+ # @return [Array<Node>] array of wrapped child nodes
479
+ def children
480
+ return [] if @inner_node.nil?
481
+ return [] unless @inner_node.respond_to?(:child_nodes)
482
+
483
+ @inner_node.child_nodes.compact.map { |n| Node.new(n, @source) }
484
+ end
485
+
486
+ # Iterate over child nodes
487
+ #
488
+ # @yield [Node] each child node
489
+ # @return [Enumerator, nil]
490
+ def each(&block)
491
+ return to_enum(__method__) unless block_given?
492
+ children.each(&block)
493
+ end
494
+
495
+ # Check if this node has errors
496
+ #
497
+ # @return [Boolean]
498
+ def has_error?
499
+ return false if @inner_node.nil?
500
+
501
+ # Check if this is an error node type
502
+ return true if type.include?("missing") || type.include?("error")
503
+
504
+ # Check children recursively (Prism error nodes are usually children)
505
+ return false unless @inner_node.respond_to?(:child_nodes)
506
+ @inner_node.child_nodes.compact.any? { |n| n.class.name.to_s.include?("Missing") }
507
+ end
508
+
509
+ # Check if this node is a "missing" node (error recovery)
510
+ #
511
+ # @return [Boolean]
512
+ def missing?
513
+ return false if @inner_node.nil?
514
+ type.include?("missing")
515
+ end
516
+
517
+ # Check if this is a "named" node (structural vs punctuation)
518
+ #
519
+ # In Prism, all nodes are "named" in tree-sitter terminology
520
+ # (there's no distinction between named and anonymous nodes).
521
+ #
522
+ # @return [Boolean]
523
+ def named?
524
+ true
525
+ end
526
+
527
+ # Check if this is a structural node
528
+ #
529
+ # @return [Boolean]
530
+ def structural?
531
+ true
532
+ end
533
+
534
+ # Get a child by field name (Prism node accessor)
535
+ #
536
+ # Prism nodes have specific accessors for their children.
537
+ # This method tries to call that accessor.
538
+ #
539
+ # @param name [String, Symbol] field/accessor name
540
+ # @return [Node, nil] wrapped child node
541
+ def child_by_field_name(name)
542
+ return if @inner_node.nil?
543
+ return unless @inner_node.respond_to?(name)
544
+
545
+ result = @inner_node.public_send(name)
546
+ return if result.nil?
547
+
548
+ # Wrap if it's a node, otherwise return nil
549
+ if result.is_a?(::Prism::Node)
550
+ Node.new(result, @source)
551
+ end
552
+ end
553
+
554
+ alias_method :field, :child_by_field_name
555
+
556
+ # Get the parent node
557
+ #
558
+ # @raise [NotImplementedError] Prism nodes don't have parent references
559
+ # @return [void]
560
+ def parent
561
+ raise NotImplementedError, "Prism backend does not support parent navigation"
562
+ end
563
+
564
+ # Get next sibling
565
+ #
566
+ # @raise [NotImplementedError] Prism nodes don't have sibling references
567
+ # @return [void]
568
+ def next_sibling
569
+ raise NotImplementedError, "Prism backend does not support sibling navigation"
570
+ end
571
+
572
+ # Get previous sibling
573
+ #
574
+ # @raise [NotImplementedError] Prism nodes don't have sibling references
575
+ # @return [void]
576
+ def prev_sibling
577
+ raise NotImplementedError, "Prism backend does not support sibling navigation"
578
+ end
579
+
580
+ # String representation for debugging
581
+ #
582
+ # @return [String]
583
+ def inspect
584
+ "#<#{self.class} type=#{type} bytes=#{start_byte}..#{end_byte}>"
585
+ end
586
+
587
+ # String representation
588
+ #
589
+ # @return [String]
590
+ def to_s
591
+ text
592
+ end
593
+
594
+ # Check if node responds to a method (includes delegation to inner_node)
595
+ #
596
+ # @param method_name [Symbol] method to check
597
+ # @param include_private [Boolean] include private methods
598
+ # @return [Boolean]
599
+ def respond_to_missing?(method_name, include_private = false)
600
+ return false if @inner_node.nil?
601
+ @inner_node.respond_to?(method_name, include_private) || super
602
+ end
603
+
604
+ # Delegate unknown methods to the underlying Prism node
605
+ #
606
+ # This provides passthrough access for Prism-specific node methods
607
+ # like `receiver`, `message`, `arguments`, etc.
608
+ #
609
+ # @param method_name [Symbol] method to call
610
+ # @param args [Array] arguments to pass
611
+ # @param kwargs [Hash] keyword arguments
612
+ # @param block [Proc] block to pass
613
+ # @return [Object] result from the underlying node
614
+ def method_missing(method_name, *args, **kwargs, &block)
615
+ if @inner_node&.respond_to?(method_name)
616
+ @inner_node.public_send(method_name, *args, **kwargs, &block)
617
+ else
618
+ super
619
+ end
620
+ end
621
+ end
622
+ end
623
+ end
624
+ end
625
+