tree_haver 4.0.5 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,560 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Parslet backend using pure Ruby PEG parser
6
+ #
7
+ # This backend wraps Parslet-based parsers (like the toml gem) to provide a
8
+ # pure Ruby alternative to tree-sitter. Parslet is a PEG (Parsing Expression
9
+ # Grammar) parser generator written in Ruby that produces Hash/Array/Slice
10
+ # results rather than a traditional AST.
11
+ #
12
+ # Unlike tree-sitter backends which are language-agnostic runtime parsers,
13
+ # Parslet parsers are grammar-specific and defined as Ruby classes. Each
14
+ # language needs its own Parslet grammar (e.g., TOML::Parslet for TOML).
15
+ #
16
+ # @note This backend requires a Parslet grammar class for the specific language
17
+ # @see https://github.com/kschiess/parslet Parslet parser generator
18
+ # @see https://github.com/jm/toml toml gem (TOML Parslet grammar)
19
+ #
20
+ # @example Using with toml gem
21
+ # require "toml"
22
+ #
23
+ # parser = TreeHaver::Parser.new
24
+ # # For Parslet, "language" is actually a grammar class
25
+ # parser.language = TOML::Parslet
26
+ # tree = parser.parse(toml_source)
27
+ module Parslet
28
+ @load_attempted = false
29
+ @loaded = false
30
+
31
+ # Check if the Parslet backend is available
32
+ #
33
+ # Attempts to require parslet on first call and caches the result.
34
+ #
35
+ # @return [Boolean] true if parslet gem is available
36
+ # @example
37
+ # if TreeHaver::Backends::Parslet.available?
38
+ # puts "Parslet backend is ready"
39
+ # end
40
+ class << self
41
+ def available?
42
+ return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
43
+ @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
44
+ begin
45
+ require "parslet"
46
+ @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
47
+ rescue LoadError
48
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
49
+ rescue StandardError
50
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
51
+ end
52
+ @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
53
+ end
54
+
55
+ # Reset the load state (primarily for testing)
56
+ #
57
+ # @return [void]
58
+ # @api private
59
+ def reset!
60
+ @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
61
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
62
+ end
63
+
64
+ # Get capabilities supported by this backend
65
+ #
66
+ # @return [Hash{Symbol => Object}] capability map
67
+ # @example
68
+ # TreeHaver::Backends::Parslet.capabilities
69
+ # # => { backend: :parslet, query: false, bytes_field: true, incremental: false }
70
+ def capabilities
71
+ return {} unless available?
72
+ {
73
+ backend: :parslet,
74
+ query: false, # Parslet doesn't have a query API like tree-sitter
75
+ bytes_field: true, # Parslet::Slice provides offset and length
76
+ incremental: false, # Parslet doesn't support incremental parsing
77
+ pure_ruby: true, # Parslet is pure Ruby (portable)
78
+ }
79
+ end
80
+ end
81
+
82
+ # Parslet grammar wrapper
83
+ #
84
+ # Unlike tree-sitter which loads compiled .so files, Parslet uses Ruby classes
85
+ # that define grammars. This class wraps a Parslet grammar class.
86
+ #
87
+ # @example
88
+ # # For TOML, use toml gem's grammar
89
+ # language = TreeHaver::Backends::Parslet::Language.new(TOML::Parslet)
90
+ class Language
91
+ include Comparable
92
+
93
+ # The Parslet grammar class
94
+ # @return [Class] Parslet grammar class (e.g., TOML::Parslet)
95
+ attr_reader :grammar_class
96
+
97
+ # The backend this language is for
98
+ # @return [Symbol]
99
+ attr_reader :backend
100
+
101
+ # @param grammar_class [Class] A Parslet grammar class (inherits from ::Parslet::Parser)
102
+ def initialize(grammar_class)
103
+ unless valid_grammar_class?(grammar_class)
104
+ raise TreeHaver::NotAvailable,
105
+ "Grammar class must be a Parslet::Parser subclass or respond to :new and return a parser with :parse. " \
106
+ "Expected a Parslet grammar class (e.g., TOML::Parslet)."
107
+ end
108
+ @grammar_class = grammar_class
109
+ @backend = :parslet
110
+ end
111
+
112
+ # Get the language name
113
+ #
114
+ # Derives a name from the grammar class name.
115
+ #
116
+ # @return [Symbol] language name
117
+ def language_name
118
+ # Derive name from grammar class (e.g., TOML::Parslet -> :toml)
119
+ return :unknown unless @grammar_class.respond_to?(:name) && @grammar_class.name
120
+
121
+ name = @grammar_class.name.to_s.split("::").first.downcase
122
+ name.to_sym
123
+ end
124
+
125
+ # Alias for language_name (API compatibility)
126
+ alias_method :name, :language_name
127
+
128
+ # Compare languages for equality
129
+ #
130
+ # Parslet languages are equal if they have the same backend and grammar_class.
131
+ # Grammar class uniquely identifies a Parslet language.
132
+ #
133
+ # @param other [Object] object to compare with
134
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
135
+ def <=>(other)
136
+ return unless other.is_a?(Language)
137
+ return unless other.backend == @backend
138
+
139
+ # Compare by grammar_class name (classes are compared by object_id by default)
140
+ @grammar_class.name <=> other.grammar_class.name
141
+ end
142
+
143
+ # Hash value for this language (for use in Sets/Hashes)
144
+ # @return [Integer]
145
+ def hash
146
+ [@backend, @grammar_class.name].hash
147
+ end
148
+
149
+ # Alias eql? to ==
150
+ alias_method :eql?, :==
151
+
152
+ # Load language from library path (API compatibility)
153
+ #
154
+ # Parslet grammars are Ruby classes, not shared libraries. This method
155
+ # provides API compatibility with tree-sitter backends by looking up
156
+ # registered Parslet grammars by name.
157
+ #
158
+ # For full API consistency, register a Parslet grammar with:
159
+ # TreeHaver.register_language(:toml, grammar_class: TOML::Parslet)
160
+ #
161
+ # Then this method will find it when called via `TreeHaver.parser_for(:toml)`.
162
+ #
163
+ # @param path [String, nil] Ignored for Parslet (used to derive language name)
164
+ # @param symbol [String, nil] Used to derive language name if path not provided
165
+ # @param name [String, Symbol, nil] Language name to look up
166
+ # @return [Language] Parslet language wrapper
167
+ # @raise [TreeHaver::NotAvailable] if no Parslet grammar is registered for the language
168
+ class << self
169
+ def from_library(path = nil, symbol: nil, name: nil)
170
+ # Derive language name from path, symbol, or explicit name
171
+ lang_name = name&.to_sym ||
172
+ symbol&.to_s&.sub(/^tree_sitter_/, "")&.to_sym ||
173
+ path && TreeHaver::LibraryPathUtils.derive_language_name_from_path(path)&.to_sym
174
+
175
+ unless lang_name
176
+ raise TreeHaver::NotAvailable,
177
+ "Parslet backend requires a language name. " \
178
+ "Provide name: parameter or register a grammar with TreeHaver.register_language."
179
+ end
180
+
181
+ # Look up registered Parslet grammar
182
+ registration = TreeHaver::LanguageRegistry.registered(lang_name, :parslet)
183
+
184
+ unless registration
185
+ raise TreeHaver::NotAvailable,
186
+ "No Parslet grammar registered for #{lang_name.inspect}. " \
187
+ "Register one with: TreeHaver.register_language(:#{lang_name}, grammar_class: YourGrammar)"
188
+ end
189
+
190
+ grammar_class = registration[:grammar_class]
191
+ new(grammar_class)
192
+ end
193
+
194
+ alias_method :from_path, :from_library
195
+ end
196
+
197
+ private
198
+
199
+ def valid_grammar_class?(klass)
200
+ return false unless klass.respond_to?(:new)
201
+
202
+ # Check if it's a Parslet::Parser subclass
203
+ if defined?(::Parslet::Parser)
204
+ return true if klass < ::Parslet::Parser
205
+ end
206
+
207
+ # Fallback: check if it can create an instance that responds to parse
208
+ begin
209
+ instance = klass.new
210
+ instance.respond_to?(:parse)
211
+ rescue StandardError
212
+ false
213
+ end
214
+ end
215
+ end
216
+
217
+ # Parslet parser wrapper
218
+ #
219
+ # Wraps Parslet grammar classes to provide a tree-sitter-like API.
220
+ class Parser
221
+ # Create a new Parslet parser instance
222
+ #
223
+ # @raise [TreeHaver::NotAvailable] if parslet gem is not available
224
+ def initialize
225
+ raise TreeHaver::NotAvailable, "parslet gem not available" unless Parslet.available?
226
+ @grammar = nil
227
+ end
228
+
229
+ # Set the grammar for this parser
230
+ #
231
+ # Accepts either a Parslet::Language wrapper or a raw Parslet grammar class.
232
+ # When passed a Language wrapper, extracts the grammar_class from it.
233
+ # When passed a raw grammar class, uses it directly.
234
+ #
235
+ # This flexibility allows both patterns:
236
+ # parser.language = TreeHaver::Backends::Parslet::Language.new(TOML::Parslet)
237
+ # parser.language = TOML::Parslet # Also works
238
+ #
239
+ # @param grammar [Language, Class] Parslet Language wrapper or grammar class
240
+ # @return [void]
241
+ def language=(grammar)
242
+ # Accept Language wrapper or raw grammar class
243
+ actual_grammar = case grammar
244
+ when Language
245
+ grammar.grammar_class
246
+ else
247
+ grammar
248
+ end
249
+
250
+ unless actual_grammar.respond_to?(:new)
251
+ raise ArgumentError,
252
+ "Expected Parslet grammar class with new method or Language wrapper, " \
253
+ "got #{grammar.class}"
254
+ end
255
+ @grammar = actual_grammar
256
+ end
257
+
258
+ # Parse source code
259
+ #
260
+ # @param source [String] the source code to parse
261
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
262
+ # @raise [TreeHaver::NotAvailable] if no grammar is set
263
+ # @raise [::Parslet::ParseFailed] if parsing fails
264
+ def parse(source)
265
+ raise TreeHaver::NotAvailable, "No grammar loaded" unless @grammar
266
+
267
+ begin
268
+ parser_instance = @grammar.new
269
+ parslet_result = parser_instance.parse(source)
270
+ # Return raw Parslet result wrapped in Tree - TreeHaver::Parser will wrap it
271
+ Tree.new(parslet_result, source)
272
+ rescue ::Parslet::ParseFailed => e
273
+ # Re-raise with more context
274
+ raise TreeHaver::Error, "Parse error: #{e.message}"
275
+ end
276
+ end
277
+
278
+ # Parse source code (compatibility with tree-sitter API)
279
+ #
280
+ # Parslet doesn't support incremental parsing, so old_tree is ignored.
281
+ #
282
+ # @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
283
+ # @param source [String] the source code to parse
284
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
285
+ def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
286
+ parse(source) # Parslet doesn't support incremental parsing
287
+ end
288
+ end
289
+
290
+ # Parslet tree wrapper
291
+ #
292
+ # Wraps Parslet parse results (Hash/Array/Slice) to provide
293
+ # tree-sitter-compatible API.
294
+ #
295
+ # Inherits from Base::Tree to get shared methods like #errors, #warnings,
296
+ # #comments, #has_error?, and #inspect.
297
+ #
298
+ # @api private
299
+ class Tree < TreeHaver::Base::Tree
300
+ # The raw Parslet parse result
301
+ # @return [Hash, Array, Parslet::Slice] The parse result
302
+ attr_reader :parslet_result
303
+
304
+ def initialize(parslet_result, source)
305
+ @parslet_result = parslet_result
306
+ super(parslet_result, source: source)
307
+ end
308
+
309
+ def root_node
310
+ Node.new(@parslet_result, @source, type: "document")
311
+ end
312
+ end
313
+
314
+ # Parslet node wrapper
315
+ #
316
+ # Wraps Parslet parse results (Hash/Array/Slice) to provide tree-sitter-compatible node API.
317
+ #
318
+ # Parslet produces different result types:
319
+ # - Hash: Named captures like {:key => value, :value => ...}
320
+ # - Array: Repeated captures like [{...}, {...}]
321
+ # - Parslet::Slice: Terminal string values with position info
322
+ # - String: Plain strings (less common)
323
+ #
324
+ # This wrapper normalizes these into a tree-sitter-like node structure.
325
+ #
326
+ # Inherits from Base::Node to get shared methods like #first_child, #last_child,
327
+ # #to_s, #inspect, #==, #<=>, #source_position, #start_line, #end_line, etc.
328
+ #
329
+ # @api private
330
+ class Node < TreeHaver::Base::Node
331
+ attr_reader :value, :node_type
332
+
333
+ def initialize(value, source, type: nil, key: nil)
334
+ @value = value
335
+ @node_type = type || infer_type(key)
336
+ @key = key
337
+ super(value, source: source)
338
+ end
339
+
340
+ # -- Required API Methods (from Base::Node) ----------------------------
341
+
342
+ # Get node type
343
+ #
344
+ # For Parslet results:
345
+ # - Hash keys become node types for their values
346
+ # - Arrays become "sequence" type
347
+ # - Slices use their parent's key as type
348
+ #
349
+ # @return [String] the node type
350
+ def type
351
+ @node_type
352
+ end
353
+
354
+ # Get position information from Parslet::Slice if available
355
+ #
356
+ # @return [Integer] byte offset where this node starts
357
+ def start_byte
358
+ case @value
359
+ when ::Parslet::Slice
360
+ @value.offset
361
+ when Hash
362
+ # Find first slice in hash values
363
+ first_slice = find_first_slice(@value)
364
+ first_slice&.offset || 0
365
+ when Array
366
+ # Find first slice in array
367
+ first_slice = find_first_slice(@value)
368
+ first_slice&.offset || 0
369
+ else
370
+ 0
371
+ end
372
+ end
373
+
374
+ # @return [Integer] byte offset where this node ends
375
+ def end_byte
376
+ case @value
377
+ when ::Parslet::Slice
378
+ @value.offset + @value.size
379
+ when Hash
380
+ # Find last slice in hash values
381
+ last_slice = find_last_slice(@value)
382
+ last_slice ? (last_slice.offset + last_slice.size) : @source.length
383
+ when Array
384
+ # Find last slice in array
385
+ last_slice = find_last_slice(@value)
386
+ last_slice ? (last_slice.offset + last_slice.size) : @source.length
387
+ else
388
+ @source.length
389
+ end
390
+ end
391
+
392
+ # Get all children
393
+ #
394
+ # @return [Array<Node>] child nodes
395
+ def children
396
+ case @value
397
+ when Hash
398
+ @value.map { |k, v| Node.new(v, @source, key: k) }
399
+ when Array
400
+ @value.map.with_index { |v, i| Node.new(v, @source, type: "element_#{i}") }
401
+ else
402
+ []
403
+ end
404
+ end
405
+
406
+ # -- Overridden Methods ------------------------------------------------
407
+
408
+ # Override start_point to calculate from source
409
+ # @return [Hash{Symbol => Integer}] {row: 0, column: 0}
410
+ def start_point
411
+ calculate_point(start_byte)
412
+ end
413
+
414
+ # Override end_point to calculate from source
415
+ # @return [Hash{Symbol => Integer}] {row: 0, column: 0}
416
+ def end_point
417
+ calculate_point(end_byte)
418
+ end
419
+
420
+ # Override text to handle Parslet-specific value types
421
+ # @return [String] matched text
422
+ def text
423
+ case @value
424
+ when ::Parslet::Slice
425
+ @value.to_s
426
+ when String
427
+ @value
428
+ when Hash, Array
429
+ @source[start_byte...end_byte] || ""
430
+ else
431
+ @value.to_s
432
+ end
433
+ end
434
+
435
+ # Override child to handle negative indices properly
436
+ # @param index [Integer] child index
437
+ # @return [Node, nil] child node or nil
438
+ def child(index)
439
+ return if index.negative?
440
+
441
+ case @value
442
+ when Hash
443
+ keys = @value.keys
444
+ return if index >= keys.size
445
+ key = keys[index]
446
+ Node.new(@value[key], @source, key: key)
447
+ when Array
448
+ return if index >= @value.size
449
+ Node.new(@value[index], @source, type: "element")
450
+ end
451
+ end
452
+
453
+ # Override child_count for efficiency (avoid building full children array)
454
+ # @return [Integer] child count
455
+ def child_count
456
+ case @value
457
+ when Hash
458
+ @value.keys.size
459
+ when Array
460
+ @value.size
461
+ else
462
+ 0
463
+ end
464
+ end
465
+
466
+ # Check if node is named
467
+ #
468
+ # Hash keys in Parslet results are "named" in tree-sitter terminology.
469
+ #
470
+ # @return [Boolean] true if this node has a key
471
+ def named?
472
+ !@key.nil? || @value.is_a?(Hash)
473
+ end
474
+
475
+ # Check if this node represents a structural element vs a terminal/token
476
+ #
477
+ # @return [Boolean] true if this is a structural (non-terminal) node
478
+ def structural?
479
+ @value.is_a?(Hash) || @value.is_a?(Array)
480
+ end
481
+
482
+ private
483
+
484
+ def calculate_point(offset)
485
+ return {row: 0, column: 0} if offset <= 0
486
+
487
+ lines_before = @source[0...offset].count("\n")
488
+ line_start = if offset > 0
489
+ @source.rindex("\n", offset - 1)
490
+ end
491
+ line_start ||= -1
492
+ column = offset - line_start - 1
493
+ {row: lines_before, column: column}
494
+ end
495
+
496
+ def infer_type(key)
497
+ return key.to_s if key
498
+
499
+ case @value
500
+ when ::Parslet::Slice
501
+ "slice"
502
+ when Hash
503
+ "hash"
504
+ when Array
505
+ "array"
506
+ when String
507
+ "string"
508
+ else
509
+ "unknown"
510
+ end
511
+ end
512
+
513
+ # Find the first Parslet::Slice in a nested structure
514
+ def find_first_slice(obj)
515
+ case obj
516
+ when ::Parslet::Slice
517
+ obj
518
+ when Hash
519
+ obj.values.each do |v|
520
+ result = find_first_slice(v)
521
+ return result if result
522
+ end
523
+ nil
524
+ when Array
525
+ obj.each do |v|
526
+ result = find_first_slice(v)
527
+ return result if result
528
+ end
529
+ nil
530
+ end
531
+ end
532
+
533
+ # Find the last Parslet::Slice in a nested structure
534
+ def find_last_slice(obj)
535
+ case obj
536
+ when ::Parslet::Slice
537
+ obj
538
+ when Hash
539
+ obj.values.reverse_each do |v|
540
+ result = find_last_slice(v)
541
+ return result if result
542
+ end
543
+ nil
544
+ when Array
545
+ obj.reverse_each do |v|
546
+ result = find_last_slice(v)
547
+ return result if result
548
+ end
549
+ nil
550
+ end
551
+ end
552
+ end
553
+
554
+ # Register the availability checker for RSpec dependency tags
555
+ TreeHaver::BackendRegistry.register_availability_checker(:parslet) do
556
+ available?
557
+ end
558
+ end
559
+ end
560
+ end
@@ -461,7 +461,7 @@ module TreeHaver
461
461
  end
462
462
  end
463
463
 
464
- # Register availability checker for RSpec dependency tags
464
+ # Register the availability checker for RSpec dependency tags
465
465
  TreeHaver::BackendRegistry.register_availability_checker(:prism) do
466
466
  available?
467
467
  end
@@ -366,7 +366,7 @@ module TreeHaver
366
366
  # Alias Point to the base class for compatibility
367
367
  Point = TreeHaver::Base::Point
368
368
 
369
- # Register availability checker for RSpec dependency tags
369
+ # Register the availability checker for RSpec dependency tags
370
370
  TreeHaver::BackendRegistry.register_availability_checker(:psych) do
371
371
  available?
372
372
  end
@@ -230,7 +230,7 @@ module TreeHaver
230
230
  end
231
231
  end
232
232
 
233
- # Register availability checker for RSpec dependency tags
233
+ # Register the availability checker for RSpec dependency tags
234
234
  TreeHaver::BackendRegistry.register_availability_checker(:rust) do
235
235
  available?
236
236
  end
@@ -95,9 +95,16 @@ module TreeHaver
95
95
  end
96
96
 
97
97
  # Get a child node by index
98
- # @param index [Integer] Child index
98
+ #
99
+ # Returns nil for negative indices or indices out of bounds.
100
+ # This matches tree-sitter behavior where negative indices are invalid.
101
+ #
102
+ # @param index [Integer] Child index (0-based, non-negative)
99
103
  # @return [Node, nil] The child node or nil
100
104
  def child(index)
105
+ return if index.negative?
106
+ return if index >= child_count
107
+
101
108
  children[index]
102
109
  end
103
110