tree_haver 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Citrus backend using pure Ruby PEG parser
6
+ #
7
+ # This backend wraps Citrus-based parsers (like toml-rb) to provide a
8
+ # pure Ruby alternative to tree-sitter. Citrus is a PEG (Parsing Expression
9
+ # Grammar) parser generator written in Ruby.
10
+ #
11
+ # Unlike tree-sitter backends which are language-agnostic runtime parsers,
12
+ # Citrus parsers are grammar-specific and compiled into Ruby code. Each
13
+ # language needs its own Citrus grammar (e.g., toml-rb for TOML).
14
+ #
15
+ # @note This backend requires a Citrus grammar for the specific language
16
+ # @see https://github.com/mjackson/citrus Citrus parser generator
17
+ # @see https://github.com/emancu/toml-rb toml-rb (TOML Citrus grammar)
18
+ #
19
+ # @example Using with toml-rb
20
+ # require "toml-rb"
21
+ #
22
+ # parser = TreeHaver::Parser.new
23
+ # # For Citrus, "language" is actually a grammar module
24
+ # parser.language = TomlRB::Document
25
+ # tree = parser.parse(toml_source)
26
+ module Citrus
27
+ @load_attempted = false
28
+ @loaded = false
29
+
30
+ # Check if the Citrus backend is available
31
+ #
32
+ # Attempts to require citrus on first call and caches the result.
33
+ #
34
+ # @return [Boolean] true if citrus gem is available
35
+ # @example
36
+ # if TreeHaver::Backends::Citrus.available?
37
+ # puts "Citrus backend is ready"
38
+ # end
39
+ class << self
40
+ def available?
41
+ return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
42
+ @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
43
+ begin
44
+ require "citrus"
45
+
46
+ @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
47
+ rescue LoadError
48
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
49
+ end
50
+ @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
51
+ end
52
+
53
+ # Reset the load state (primarily for testing)
54
+ #
55
+ # @return [void]
56
+ # @api private
57
+ def reset!
58
+ @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
59
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
60
+ end
61
+
62
+ # Get capabilities supported by this backend
63
+ #
64
+ # @return [Hash{Symbol => Object}] capability map
65
+ # @example
66
+ # TreeHaver::Backends::Citrus.capabilities
67
+ # # => { backend: :citrus, query: false, bytes_field: true, incremental: false }
68
+ def capabilities
69
+ return {} unless available?
70
+ {
71
+ backend: :citrus,
72
+ query: false, # Citrus doesn't have a query API like tree-sitter
73
+ bytes_field: true, # Citrus::Match provides offset and length
74
+ incremental: false, # Citrus doesn't support incremental parsing
75
+ pure_ruby: true, # Citrus is pure Ruby (portable)
76
+ }
77
+ end
78
+ end
79
+
80
+ # Citrus grammar wrapper
81
+ #
82
+ # Unlike tree-sitter which loads compiled .so files, Citrus uses Ruby modules
83
+ # that define grammars. This class wraps a Citrus grammar module.
84
+ #
85
+ # @example
86
+ # # For TOML, use toml-rb's grammar
87
+ # language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
88
+ class Language
89
+ include Comparable
90
+
91
+ # The Citrus grammar module
92
+ # @return [Module] Citrus grammar module (e.g., TomlRB::Document)
93
+ attr_reader :grammar_module
94
+
95
+ # The backend this language is for
96
+ # @return [Symbol]
97
+ attr_reader :backend
98
+
99
+ # @param grammar_module [Module] A Citrus grammar module with a parse method
100
+ def initialize(grammar_module)
101
+ unless grammar_module.respond_to?(:parse)
102
+ raise TreeHaver::NotAvailable,
103
+ "Grammar module must respond to :parse. " \
104
+ "Expected a Citrus grammar module (e.g., TomlRB::Document)."
105
+ end
106
+ @grammar_module = grammar_module
107
+ @backend = :citrus
108
+ end
109
+
110
+ # Compare languages for equality
111
+ #
112
+ # Citrus languages are equal if they have the same backend and grammar_module.
113
+ # Grammar module uniquely identifies a Citrus language.
114
+ #
115
+ # @param other [Object] object to compare with
116
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
117
+ def <=>(other)
118
+ return unless other.is_a?(Language)
119
+ return unless other.backend == @backend
120
+
121
+ # Compare by grammar_module name (modules are compared by object_id by default)
122
+ @grammar_module.name <=> other.grammar_module.name
123
+ end
124
+
125
+ # Hash value for this language (for use in Sets/Hashes)
126
+ # @return [Integer]
127
+ def hash
128
+ [@backend, @grammar_module.name].hash
129
+ end
130
+
131
+ # Alias eql? to ==
132
+ alias_method :eql?, :==
133
+
134
+ # Not applicable for Citrus (tree-sitter-specific)
135
+ #
136
+ # Citrus grammars are Ruby modules, not shared libraries.
137
+ # This method exists for API compatibility but will raise an error.
138
+ #
139
+ # @raise [TreeHaver::NotAvailable] always raises
140
+ class << self
141
+ def from_library(path, symbol: nil, name: nil)
142
+ raise TreeHaver::NotAvailable,
143
+ "Citrus backend doesn't use shared libraries. " \
144
+ "Use Citrus::Language.new(GrammarModule) instead."
145
+ end
146
+
147
+ alias_method :from_path, :from_library
148
+ end
149
+ end
150
+
151
+ # Citrus parser wrapper
152
+ #
153
+ # Wraps Citrus grammar modules to provide a tree-sitter-like API.
154
+ class Parser
155
+ # Create a new Citrus parser instance
156
+ #
157
+ # @raise [TreeHaver::NotAvailable] if citrus gem is not available
158
+ def initialize
159
+ raise TreeHaver::NotAvailable, "citrus gem not available" unless Citrus.available?
160
+ @grammar = nil
161
+ end
162
+
163
+ # Set the grammar for this parser
164
+ #
165
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
166
+ # This backend receives the raw Citrus grammar module (unwrapped), not the Language wrapper.
167
+ #
168
+ # @param grammar [Module] Citrus grammar module with a parse method
169
+ # @return [void]
170
+ # @example
171
+ # require "toml-rb"
172
+ # # TreeHaver::Parser unwraps Language.new(TomlRB::Document) to just TomlRB::Document
173
+ # parser.language = TomlRB::Document # Backend receives unwrapped module
174
+ def language=(grammar)
175
+ # grammar is already unwrapped by TreeHaver::Parser
176
+ unless grammar.respond_to?(:parse)
177
+ raise ArgumentError,
178
+ "Expected Citrus grammar module with parse method, " \
179
+ "got #{grammar.class}"
180
+ end
181
+ @grammar = grammar
182
+ end
183
+
184
+ # Parse source code
185
+ #
186
+ # @param source [String] the source code to parse
187
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
188
+ # @raise [TreeHaver::NotAvailable] if no grammar is set
189
+ # @raise [::Citrus::ParseError] if parsing fails
190
+ def parse(source)
191
+ raise TreeHaver::NotAvailable, "No grammar loaded" unless @grammar
192
+
193
+ begin
194
+ citrus_match = @grammar.parse(source)
195
+ # Return raw Citrus::Tree - TreeHaver::Parser will wrap it
196
+ Tree.new(citrus_match, source)
197
+ rescue ::Citrus::ParseError => e
198
+ # Re-raise with more context
199
+ raise TreeHaver::Error, "Parse error: #{e.message}"
200
+ end
201
+ end
202
+
203
+ # Parse source code (compatibility with tree-sitter API)
204
+ #
205
+ # Citrus doesn't support incremental parsing, so old_tree is ignored.
206
+ #
207
+ # @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
208
+ # @param source [String] the source code to parse
209
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
210
+ def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
211
+ parse(source) # Citrus doesn't support incremental parsing
212
+ end
213
+ end
214
+
215
+ # Citrus tree wrapper
216
+ #
217
+ # Wraps a Citrus::Match (which represents the parse tree) to provide
218
+ # tree-sitter-compatible API.
219
+ #
220
+ # @api private
221
+ class Tree
222
+ attr_reader :root_match, :source
223
+
224
+ def initialize(root_match, source)
225
+ @root_match = root_match
226
+ @source = source
227
+ end
228
+
229
+ def root_node
230
+ Node.new(@root_match, @source)
231
+ end
232
+ end
233
+
234
+ # Citrus node wrapper
235
+ #
236
+ # Wraps Citrus::Match objects to provide tree-sitter-compatible node API.
237
+ #
238
+ # Citrus::Match provides:
239
+ # - events[0]: rule name (Symbol) - used as type
240
+ # - offset: byte position
241
+ # - length: byte length
242
+ # - string: matched text
243
+ # - matches: child matches
244
+ # - captures: named groups
245
+ #
246
+ # Language-specific helpers can be mixed in for convenience:
247
+ # require "tree_haver/backends/citrus/toml_helpers"
248
+ # TreeHaver::Backends::Citrus::Node.include(TreeHaver::Backends::Citrus::TomlHelpers)
249
+ #
250
+ # @api private
251
+ class Node
252
+ attr_reader :match, :source
253
+
254
+ def initialize(match, source)
255
+ @match = match
256
+ @source = source
257
+ end
258
+
259
+ # Get node type from Citrus rule name
260
+ #
261
+ # Uses Citrus grammar introspection to dynamically determine node types.
262
+ # Works with any Citrus grammar without language-specific knowledge.
263
+ #
264
+ # Strategy:
265
+ # 1. Check if first event has a .name method (returns Symbol) - use that
266
+ # 2. If first event is a Symbol directly - use that
267
+ # 3. For compound rules (Repeat, Choice), recurse into first match
268
+ #
269
+ # @return [String] rule name from grammar
270
+ def type
271
+ return "unknown" unless @match.respond_to?(:events)
272
+ return "unknown" unless @match.events.is_a?(Array)
273
+ return "unknown" if @match.events.empty?
274
+
275
+ extract_type_from_event(@match.events.first)
276
+ end
277
+
278
+ # Check if this node represents a structural element vs a terminal/token
279
+ #
280
+ # Uses Citrus grammar's terminal? method to determine if this is
281
+ # a structural rule (like "table", "keyvalue") vs a terminal token
282
+ # (like "[", "=", whitespace).
283
+ #
284
+ # @return [Boolean] true if this is a structural (non-terminal) node
285
+ def structural?
286
+ return false unless @match.respond_to?(:events)
287
+ return false if @match.events.empty?
288
+
289
+ first_event = @match.events.first
290
+
291
+ # Check if event has terminal? method (Citrus rule object)
292
+ if first_event.respond_to?(:terminal?)
293
+ return !first_event.terminal?
294
+ end
295
+
296
+ # For Symbol events, try to look up in grammar
297
+ if first_event.is_a?(Symbol) && @match.respond_to?(:grammar)
298
+ grammar = @match.grammar
299
+ if grammar.respond_to?(:rules) && grammar.rules.key?(first_event)
300
+ rule = grammar.rules[first_event]
301
+ return !rule.terminal? if rule.respond_to?(:terminal?)
302
+ end
303
+ end
304
+
305
+ # Default: assume structural if not a simple string/regex terminal
306
+ true
307
+ end
308
+
309
+ private
310
+
311
+ # Extract type name from a Citrus event object
312
+ #
313
+ # Handles different event types:
314
+ # - Objects with .name method (Citrus rule objects) -> use .name
315
+ # - Symbol -> use directly
316
+ # - Compound rules (Repeat, Choice) -> check string representation
317
+ #
318
+ # @param event [Object] Citrus event object
319
+ # @return [String] type name
320
+ def extract_type_from_event(event)
321
+ # Case 1: Event has .name method (returns Symbol)
322
+ if event.respond_to?(:name)
323
+ name = event.name
324
+ return name.to_s if name.is_a?(Symbol)
325
+ end
326
+
327
+ # Case 2: Event is a Symbol directly (most common for child nodes)
328
+ return event.to_s if event.is_a?(Symbol)
329
+
330
+ # Case 3: Event is a String
331
+ return event if event.is_a?(String)
332
+
333
+ # Case 4: For compound rules (Repeat, Choice), try string parsing first
334
+ # This avoids recursion issues
335
+ str = event.to_s
336
+
337
+ # Try to extract rule name from string representation
338
+ # Examples: "table", "(comment | table)*", "space?", etc.
339
+ if str =~ /^([a-z_][a-z0-9_]*)/i
340
+ return $1
341
+ end
342
+
343
+ # If we have a pattern like "(rule1 | rule2)*", we can't determine
344
+ # the type without looking at actual matches, but that causes recursion
345
+ # So just return a generic type based on the pattern
346
+ if /^\(.*\)\*$/.match?(str)
347
+ return "repeat"
348
+ elsif /^\(.*\)\?$/.match?(str)
349
+ return "optional"
350
+ elsif /^.*\|.*$/.match?(str)
351
+ return "choice"
352
+ end
353
+
354
+ "unknown"
355
+ end
356
+
357
+ public
358
+
359
+ def start_byte
360
+ @match.offset
361
+ end
362
+
363
+ def end_byte
364
+ @match.offset + @match.length
365
+ end
366
+
367
+ def start_point
368
+ calculate_point(@match.offset)
369
+ end
370
+
371
+ def end_point
372
+ calculate_point(@match.offset + @match.length)
373
+ end
374
+
375
+ def text
376
+ @match.string
377
+ end
378
+
379
+ def child_count
380
+ @match.respond_to?(:matches) ? @match.matches.size : 0
381
+ end
382
+
383
+ def child(index)
384
+ return unless @match.respond_to?(:matches)
385
+ return if index >= @match.matches.size
386
+
387
+ Node.new(@match.matches[index], @source)
388
+ end
389
+
390
+ def children
391
+ return [] unless @match.respond_to?(:matches)
392
+ @match.matches.map { |m| Node.new(m, @source) }
393
+ end
394
+
395
+ def each(&block)
396
+ return to_enum(__method__) unless block_given?
397
+ children.each(&block)
398
+ end
399
+
400
+ def has_error?
401
+ false # Citrus raises on parse error, so successful parse has no errors
402
+ end
403
+
404
+ def missing?
405
+ false # Citrus doesn't have the concept of missing nodes
406
+ end
407
+
408
+ def named?
409
+ true # Citrus matches are typically "named" in tree-sitter terminology
410
+ end
411
+
412
+ private
413
+
414
+ def calculate_point(offset)
415
+ lines_before = @source[0...offset].count("\n")
416
+ line_start = @source.rindex("\n", offset - 1) || -1
417
+ column = offset - line_start - 1
418
+ {row: lines_before, column: column}
419
+ end
420
+ end
421
+ end
422
+ end
423
+ end