tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,487 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Backends
5
- # Citrus backend using pure Ruby PEG parser
6
- #
7
- # This backend wraps Citrus-based parsers (like toml-rb) to provide a
8
- # pure Ruby alternative to tree-sitter. Citrus is a PEG (Parsing Expression
9
- # Grammar) parser generator written in Ruby.
10
- #
11
- # Unlike tree-sitter backends which are language-agnostic runtime parsers,
12
- # Citrus parsers are grammar-specific and compiled into Ruby code. Each
13
- # language needs its own Citrus grammar (e.g., toml-rb for TOML).
14
- #
15
- # @note This backend requires a Citrus grammar for the specific language
16
- # @see https://github.com/mjackson/citrus Citrus parser generator
17
- # @see https://github.com/emancu/toml-rb toml-rb (TOML Citrus grammar)
18
- #
19
- # @example Using with toml-rb
20
- # require "toml-rb"
21
- #
22
- # parser = TreeHaver::Parser.new
23
- # # For Citrus, "language" is actually a grammar module
24
- # parser.language = TomlRB::Document
25
- # tree = parser.parse(toml_source)
26
- module Citrus
27
- @load_attempted = false
28
- @loaded = false
29
-
30
- # Check if the Citrus backend is available
31
- #
32
- # Attempts to require citrus on first call and caches the result.
33
- #
34
- # @return [Boolean] true if citrus gem is available
35
- # @example
36
- # if TreeHaver::Backends::Citrus.available?
37
- # puts "Citrus backend is ready"
38
- # end
39
- class << self
40
- def available?
41
- return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
42
- @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
43
- begin
44
- require "citrus"
45
- @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
46
- rescue LoadError
47
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
48
- # :nocov: defensive code - StandardError during require is extremely rare
49
- rescue StandardError
50
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
51
- # :nocov:
52
- end
53
- @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
54
- end
55
-
56
- # Reset the load state (primarily for testing)
57
- #
58
- # @return [void]
59
- # @api private
60
- def reset!
61
- @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
62
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
63
- end
64
-
65
- # Get capabilities supported by this backend
66
- #
67
- # @return [Hash{Symbol => Object}] capability map
68
- # @example
69
- # TreeHaver::Backends::Citrus.capabilities
70
- # # => { backend: :citrus, query: false, bytes_field: true, incremental: false }
71
- def capabilities
72
- return {} unless available?
73
- {
74
- backend: :citrus,
75
- query: false, # Citrus doesn't have a query API like tree-sitter
76
- bytes_field: true, # Citrus::Match provides offset and length
77
- incremental: false, # Citrus doesn't support incremental parsing
78
- pure_ruby: true, # Citrus is pure Ruby (portable)
79
- }
80
- end
81
- end
82
-
83
- # Citrus grammar wrapper
84
- #
85
- # Unlike tree-sitter which loads compiled .so files, Citrus uses Ruby modules
86
- # that define grammars. This class wraps a Citrus grammar module.
87
- #
88
- # @example
89
- # # For TOML, use toml-rb's grammar
90
- # language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
91
- class Language
92
- include Comparable
93
-
94
- # The Citrus grammar module
95
- # @return [Module] Citrus grammar module (e.g., TomlRB::Document)
96
- attr_reader :grammar_module
97
-
98
- # The backend this language is for
99
- # @return [Symbol]
100
- attr_reader :backend
101
-
102
- # @param grammar_module [Module] A Citrus grammar module with a parse method
103
- def initialize(grammar_module)
104
- unless grammar_module.respond_to?(:parse)
105
- raise TreeHaver::NotAvailable,
106
- "Grammar module must respond to :parse. " \
107
- "Expected a Citrus grammar module (e.g., TomlRB::Document)."
108
- end
109
- @grammar_module = grammar_module
110
- @backend = :citrus
111
- end
112
-
113
- # Get the language name
114
- #
115
- # Derives a name from the grammar module name.
116
- #
117
- # @return [Symbol] language name
118
- def language_name
119
- # Derive name from grammar module (e.g., TomlRB::Document -> :toml)
120
- return :unknown unless @grammar_module.respond_to?(:name) && @grammar_module.name
121
-
122
- name = @grammar_module.name.to_s.split("::").first.downcase
123
- name.sub(/rb$/, "").to_sym
124
- end
125
-
126
- # Alias for language_name (API compatibility)
127
- alias_method :name, :language_name
128
-
129
- # Compare languages for equality
130
- #
131
- # Citrus languages are equal if they have the same backend and grammar_module.
132
- # Grammar module uniquely identifies a Citrus language.
133
- #
134
- # @param other [Object] object to compare with
135
- # @return [Integer, nil] -1, 0, 1, or nil if not comparable
136
- def <=>(other)
137
- return unless other.is_a?(Language)
138
- return unless other.backend == @backend
139
-
140
- # Compare by grammar_module name (modules are compared by object_id by default)
141
- @grammar_module.name <=> other.grammar_module.name
142
- end
143
-
144
- # Hash value for this language (for use in Sets/Hashes)
145
- # @return [Integer]
146
- def hash
147
- [@backend, @grammar_module.name].hash
148
- end
149
-
150
- # Alias eql? to ==
151
- alias_method :eql?, :==
152
-
153
- # Load language from library path (API compatibility)
154
- #
155
- # Citrus grammars are Ruby modules, not shared libraries. This method
156
- # provides API compatibility with tree-sitter backends by looking up
157
- # registered Citrus grammars by name.
158
- #
159
- # For full API consistency, register a Citrus grammar with:
160
- # TreeHaver.register_language(:toml, grammar_module: TomlRB::Document)
161
- #
162
- # Then this method will find it when called via `TreeHaver.parser_for(:toml)`.
163
- #
164
- # @param path [String, nil] Ignored for Citrus (used to derive language name)
165
- # @param symbol [String, nil] Used to derive language name if path not provided
166
- # @param name [String, Symbol, nil] Language name to look up
167
- # @return [Language] Citrus language wrapper
168
- # @raise [TreeHaver::NotAvailable] if no Citrus grammar is registered for the language
169
- class << self
170
- def from_library(path = nil, symbol: nil, name: nil)
171
- # Derive language name from path, symbol, or explicit name
172
- lang_name = name&.to_sym ||
173
- symbol&.to_s&.sub(/^tree_sitter_/, "")&.to_sym ||
174
- path && TreeHaver::LibraryPathUtils.derive_language_name_from_path(path)&.to_sym
175
-
176
- unless lang_name
177
- raise TreeHaver::NotAvailable,
178
- "Citrus backend requires a language name. " \
179
- "Provide name: parameter or register a grammar with TreeHaver.register_language."
180
- end
181
-
182
- # Look up registered Citrus grammar
183
- registration = TreeHaver::LanguageRegistry.registered(lang_name, :citrus)
184
-
185
- unless registration
186
- raise TreeHaver::NotAvailable,
187
- "No Citrus grammar registered for #{lang_name.inspect}. " \
188
- "Register one with: TreeHaver.register_language(:#{lang_name}, grammar_module: YourGrammar)"
189
- end
190
-
191
- grammar_module = registration[:grammar_module]
192
- new(grammar_module)
193
- end
194
-
195
- alias_method :from_path, :from_library
196
- end
197
- end
198
-
199
- # Citrus parser wrapper
200
- #
201
- # Wraps Citrus grammar modules to provide a tree-sitter-like API.
202
- class Parser
203
- # Create a new Citrus parser instance
204
- #
205
- # @raise [TreeHaver::NotAvailable] if citrus gem is not available
206
- def initialize
207
- raise TreeHaver::NotAvailable, "citrus gem not available" unless Citrus.available?
208
- @grammar = nil
209
- end
210
-
211
- # Set the grammar for this parser
212
- #
213
- # Accepts either a Citrus::Language wrapper or a raw Citrus grammar module.
214
- # When passed a Language wrapper, extracts the grammar_module from it.
215
- # When passed a raw grammar module, uses it directly.
216
- #
217
- # This flexibility allows both patterns:
218
- # parser.language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
219
- # parser.language = TomlRB::Document # Also works
220
- #
221
- # @param grammar [Language, Module] Citrus Language wrapper or grammar module
222
- # @return [void]
223
- def language=(grammar)
224
- # Accept Language wrapper or raw grammar module
225
- actual_grammar = case grammar
226
- when Language
227
- grammar.grammar_module
228
- else
229
- grammar
230
- end
231
-
232
- unless actual_grammar.respond_to?(:parse)
233
- raise ArgumentError,
234
- "Expected Citrus grammar module with parse method or Language wrapper, " \
235
- "got #{grammar.class}"
236
- end
237
- @grammar = actual_grammar
238
- end
239
-
240
- # Parse source code
241
- #
242
- # @param source [String] the source code to parse
243
- # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
244
- # @raise [TreeHaver::NotAvailable] if no grammar is set
245
- # @raise [::Citrus::ParseError] if parsing fails
246
- def parse(source)
247
- raise TreeHaver::NotAvailable, "No grammar loaded" unless @grammar
248
-
249
- begin
250
- citrus_match = @grammar.parse(source)
251
- # Return raw Citrus::Tree - TreeHaver::Parser will wrap it
252
- Tree.new(citrus_match, source)
253
- rescue ::Citrus::ParseError => e
254
- # Re-raise with more context
255
- raise TreeHaver::Error, "Parse error: #{e.message}"
256
- end
257
- end
258
-
259
- # Parse source code (compatibility with tree-sitter API)
260
- #
261
- # Citrus doesn't support incremental parsing, so old_tree is ignored.
262
- #
263
- # @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
264
- # @param source [String] the source code to parse
265
- # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
266
- def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
267
- parse(source) # Citrus doesn't support incremental parsing
268
- end
269
- end
270
-
271
- # Citrus tree wrapper
272
- #
273
- # Wraps a Citrus::Match (which represents the parse tree) to provide
274
- # tree-sitter-compatible API.
275
- #
276
- # Inherits from Base::Tree to get shared methods like #errors, #warnings,
277
- # #comments, #has_error?, and #inspect.
278
- #
279
- # @api private
280
- class Tree < TreeHaver::Base::Tree
281
- # The raw Citrus::Match root
282
- # @return [Citrus::Match] The root match
283
- attr_reader :root_match
284
-
285
- def initialize(root_match, source)
286
- @root_match = root_match
287
- super(root_match, source: source)
288
- end
289
-
290
- def root_node
291
- Node.new(@root_match, @source)
292
- end
293
- end
294
-
295
- # Citrus node wrapper
296
- #
297
- # Wraps Citrus::Match objects to provide tree-sitter-compatible node API.
298
- #
299
- # Citrus::Match provides:
300
- # - events[0]: rule name (Symbol) - used as type
301
- # - offset: byte position
302
- # - length: byte length
303
- # - string: matched text
304
- # - matches: child matches
305
- # - captures: named groups
306
- #
307
- # Inherits from Base::Node to get shared methods like #first_child, #last_child,
308
- # #to_s, #inspect, #==, #<=>, #source_position, #start_line, #end_line, etc.
309
- #
310
- # Language-specific helpers can be mixed in for convenience:
311
- # require "tree_haver/backends/citrus/toml_helpers"
312
- # TreeHaver::Backends::Citrus::Node.include(TreeHaver::Backends::Citrus::TomlHelpers)
313
- #
314
- # @api private
315
- class Node < TreeHaver::Base::Node
316
- attr_reader :match
317
-
318
- def initialize(match, source)
319
- @match = match
320
- super(match, source: source)
321
- end
322
-
323
- # -- Required API Methods (from Base::Node) ----------------------------
324
-
325
- # Get node type from Citrus rule name
326
- #
327
- # Uses Citrus grammar introspection to dynamically determine node types.
328
- # Works with any Citrus grammar without language-specific knowledge.
329
- #
330
- # Strategy:
331
- # 1. Check if first event has a .name method (returns Symbol) - use that
332
- # 2. If first event is a Symbol directly - use that
333
- # 3. For compound rules (Repeat, Choice), recurse into first match
334
- #
335
- # @return [String] rule name from grammar
336
- def type
337
- return "unknown" unless @match.respond_to?(:events)
338
- return "unknown" unless @match.events.is_a?(Array)
339
- return "unknown" if @match.events.empty?
340
-
341
- extract_type_from_event(@match.events.first)
342
- end
343
-
344
- def start_byte
345
- @match.offset
346
- end
347
-
348
- def end_byte
349
- @match.offset + @match.length
350
- end
351
-
352
- def children
353
- return [] unless @match.respond_to?(:matches)
354
- @match.matches.map { |m| Node.new(m, @source) }
355
- end
356
-
357
- # -- Overridden Methods ------------------------------------------------
358
-
359
- # Override start_point to calculate from source
360
- def start_point
361
- calculate_point(@match.offset)
362
- end
363
-
364
- # Override end_point to calculate from source
365
- def end_point
366
- calculate_point(@match.offset + @match.length)
367
- end
368
-
369
- # Override text to use Citrus match string
370
- def text
371
- @match.string
372
- end
373
-
374
- # Override child_count for efficiency (avoid building full children array)
375
- def child_count
376
- @match.respond_to?(:matches) ? @match.matches.size : 0
377
- end
378
-
379
- # Override child to handle negative indices properly
380
- def child(index)
381
- return if index.negative?
382
- return unless @match.respond_to?(:matches)
383
- return if index >= @match.matches.size
384
-
385
- Node.new(@match.matches[index], @source)
386
- end
387
-
388
- # Check if this node represents a structural element vs a terminal/token
389
- #
390
- # Uses Citrus grammar's terminal? method to determine if this is
391
- # a structural rule (like "table", "keyvalue") vs a terminal token
392
- # (like "[", "=", whitespace).
393
- #
394
- # @return [Boolean] true if this is a structural (non-terminal) node
395
- def structural?
396
- return false unless @match.respond_to?(:events)
397
- return false if @match.events.empty?
398
-
399
- first_event = @match.events.first
400
-
401
- # Check if event has terminal? method (Citrus rule object)
402
- if first_event.respond_to?(:terminal?)
403
- return !first_event.terminal?
404
- end
405
-
406
- # For Symbol events, try to look up in grammar
407
- if first_event.is_a?(Symbol) && @match.respond_to?(:grammar)
408
- grammar = @match.grammar
409
- if grammar.respond_to?(:rules) && grammar.rules.key?(first_event)
410
- rule = grammar.rules[first_event]
411
- return !rule.terminal? if rule.respond_to?(:terminal?)
412
- end
413
- end
414
-
415
- # Default: assume structural if not a simple string/regex terminal
416
- true
417
- end
418
-
419
- private
420
-
421
- # Extract type name from a Citrus event object
422
- #
423
- # Handles different event types:
424
- # - Objects with .name method (Citrus rule objects) -> use .name
425
- # - Symbol -> use directly
426
- # - Compound rules (Repeat, Choice) -> check string representation
427
- #
428
- # @param event [Object] Citrus event object
429
- # @return [String] type name
430
- def extract_type_from_event(event)
431
- # Case 1: Event has .name method (returns Symbol)
432
- if event.respond_to?(:name)
433
- name = event.name
434
- return name.to_s if name.is_a?(Symbol)
435
- end
436
-
437
- # Case 2: Event is a Symbol directly (most common for child nodes)
438
- return event.to_s if event.is_a?(Symbol)
439
-
440
- # Case 3: Event is a String
441
- return event if event.is_a?(String)
442
-
443
- # Case 4: For compound rules (Repeat, Choice), try string parsing first
444
- # This avoids recursion issues
445
- str = event.to_s
446
-
447
- # Try to extract rule name from string representation
448
- # Examples: "table", "(comment | table)*", "space?", etc.
449
- if str =~ /^([a-z_][a-z0-9_]*)/i
450
- return $1
451
- end
452
-
453
- # If we have a pattern like "(rule1 | rule2)*", we can't determine
454
- # the type without looking at actual matches, but that causes recursion
455
- # So just return a generic type based on the pattern
456
- if /^\(.*\)\*$/.match?(str)
457
- return "repeat"
458
- elsif /^\(.*\)\?$/.match?(str)
459
- return "optional"
460
- elsif /^.*\|.*$/.match?(str)
461
- return "choice"
462
- end
463
-
464
- "unknown"
465
- end
466
-
467
- def calculate_point(offset)
468
- return {row: 0, column: 0} if offset <= 0
469
-
470
- lines_before = @source[0...offset].count("\n")
471
- # Find the newline before this offset (or -1 if we're on line 0)
472
- line_start = if offset > 0
473
- @source.rindex("\n", offset - 1)
474
- end
475
- line_start ||= -1
476
- column = offset - line_start - 1
477
- {row: lines_before, column: column}
478
- end
479
- end
480
-
481
- # Register the availability checker for RSpec dependency tags
482
- TreeHaver::BackendRegistry.register_availability_checker(:citrus) do
483
- available?
484
- end
485
- end
486
- end
487
- end