tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,10 +86,16 @@ module TreeHaver
86
86
  # # For TOML, use toml-rb's grammar
87
87
  # language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
88
88
  class Language
89
+ include Comparable
90
+
89
91
  # The Citrus grammar module
90
92
  # @return [Module] Citrus grammar module (e.g., TomlRB::Document)
91
93
  attr_reader :grammar_module
92
94
 
95
+ # The backend this language is for
96
+ # @return [Symbol]
97
+ attr_reader :backend
98
+
93
99
  # @param grammar_module [Module] A Citrus grammar module with a parse method
94
100
  def initialize(grammar_module)
95
101
  unless grammar_module.respond_to?(:parse)
@@ -98,8 +104,33 @@ module TreeHaver
98
104
  "Expected a Citrus grammar module (e.g., TomlRB::Document)."
99
105
  end
100
106
  @grammar_module = grammar_module
107
+ @backend = :citrus
108
+ end
109
+
110
+ # Compare languages for equality
111
+ #
112
+ # Citrus languages are equal if they have the same backend and grammar_module.
113
+ # Grammar module uniquely identifies a Citrus language.
114
+ #
115
+ # @param other [Object] object to compare with
116
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
117
+ def <=>(other)
118
+ return unless other.is_a?(Language)
119
+ return unless other.backend == @backend
120
+
121
+ # Compare by grammar_module name (modules are compared by object_id by default)
122
+ @grammar_module.name <=> other.grammar_module.name
123
+ end
124
+
125
+ # Hash value for this language (for use in Sets/Hashes)
126
+ # @return [Integer]
127
+ def hash
128
+ [@backend, @grammar_module.name].hash
101
129
  end
102
130
 
131
+ # Alias eql? to ==
132
+ alias_method :eql?, :==
133
+
103
134
  # Not applicable for Citrus (tree-sitter-specific)
104
135
  #
105
136
  # Citrus grammars are Ruby modules, not shared libraries.
@@ -131,30 +162,29 @@ module TreeHaver
131
162
 
132
163
  # Set the grammar for this parser
133
164
  #
134
- # @param grammar [Language, Module] Citrus grammar module or Language wrapper
135
- # @return [Language, Module] the grammar that was set
165
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
166
+ # This backend receives the raw Citrus grammar module (unwrapped), not the Language wrapper.
167
+ #
168
+ # @param grammar [Module] Citrus grammar module with a parse method
169
+ # @return [void]
136
170
  # @example
137
171
  # require "toml-rb"
138
- # parser.language = TomlRB::Document # Pass module directly
139
- # # or
140
- # parser.language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
172
+ # # TreeHaver::Parser unwraps Language.new(TomlRB::Document) to just TomlRB::Document
173
+ # parser.language = TomlRB::Document # Backend receives unwrapped module
141
174
  def language=(grammar)
142
- @grammar = if grammar.respond_to?(:grammar_module)
143
- grammar.grammar_module
144
- elsif grammar.respond_to?(:parse)
145
- grammar
146
- else
175
+ # grammar is already unwrapped by TreeHaver::Parser
176
+ unless grammar.respond_to?(:parse)
147
177
  raise ArgumentError,
148
- "Expected Citrus grammar module or Language wrapper, " \
178
+ "Expected Citrus grammar module with parse method, " \
149
179
  "got #{grammar.class}"
150
180
  end
151
- grammar
181
+ @grammar = grammar
152
182
  end
153
183
 
154
184
  # Parse source code
155
185
  #
156
186
  # @param source [String] the source code to parse
157
- # @return [TreeHaver::Tree] wrapped tree
187
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
158
188
  # @raise [TreeHaver::NotAvailable] if no grammar is set
159
189
  # @raise [::Citrus::ParseError] if parsing fails
160
190
  def parse(source)
@@ -162,8 +192,8 @@ module TreeHaver
162
192
 
163
193
  begin
164
194
  citrus_match = @grammar.parse(source)
165
- inner_tree = Tree.new(citrus_match, source)
166
- TreeHaver::Tree.new(inner_tree, source: source)
195
+ # Return raw Citrus::Tree - TreeHaver::Parser will wrap it
196
+ Tree.new(citrus_match, source)
167
197
  rescue ::Citrus::ParseError => e
168
198
  # Re-raise with more context
169
199
  raise TreeHaver::Error, "Parse error: #{e.message}"
@@ -176,8 +206,8 @@ module TreeHaver
176
206
  #
177
207
  # @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
178
208
  # @param source [String] the source code to parse
179
- # @return [TreeHaver::Tree] wrapped tree
180
- def parse_string(old_tree, source)
209
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
210
+ def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
181
211
  parse(source) # Citrus doesn't support incremental parsing
182
212
  end
183
213
  end
@@ -213,6 +243,10 @@ module TreeHaver
213
243
  # - matches: child matches
214
244
  # - captures: named groups
215
245
  #
246
+ # Language-specific helpers can be mixed in for convenience:
247
+ # require "tree_haver/backends/citrus/toml_helpers"
248
+ # TreeHaver::Backends::Citrus::Node.include(TreeHaver::Backends::Citrus::TomlHelpers)
249
+ #
216
250
  # @api private
217
251
  class Node
218
252
  attr_reader :match, :source
@@ -224,17 +258,104 @@ module TreeHaver
224
258
 
225
259
  # Get node type from Citrus rule name
226
260
  #
261
+ # Uses Citrus grammar introspection to dynamically determine node types.
262
+ # Works with any Citrus grammar without language-specific knowledge.
263
+ #
264
+ # Strategy:
265
+ # 1. Check if first event has a .name method (returns Symbol) - use that
266
+ # 2. If first event is a Symbol directly - use that
267
+ # 3. For compound rules (Repeat, Choice), recurse into first match
268
+ #
227
269
  # @return [String] rule name from grammar
228
270
  def type
229
- # Citrus stores the rule name in events[0]
230
271
  return "unknown" unless @match.respond_to?(:events)
231
272
  return "unknown" unless @match.events.is_a?(Array)
232
273
  return "unknown" if @match.events.empty?
233
274
 
234
- first = @match.events.first
235
- first.is_a?(Symbol) ? first.to_s : "unknown"
275
+ extract_type_from_event(@match.events.first)
236
276
  end
237
277
 
278
+ # Check if this node represents a structural element vs a terminal/token
279
+ #
280
+ # Uses Citrus grammar's terminal? method to determine if this is
281
+ # a structural rule (like "table", "keyvalue") vs a terminal token
282
+ # (like "[", "=", whitespace).
283
+ #
284
+ # @return [Boolean] true if this is a structural (non-terminal) node
285
+ def structural?
286
+ return false unless @match.respond_to?(:events)
287
+ return false if @match.events.empty?
288
+
289
+ first_event = @match.events.first
290
+
291
+ # Check if event has terminal? method (Citrus rule object)
292
+ if first_event.respond_to?(:terminal?)
293
+ return !first_event.terminal?
294
+ end
295
+
296
+ # For Symbol events, try to look up in grammar
297
+ if first_event.is_a?(Symbol) && @match.respond_to?(:grammar)
298
+ grammar = @match.grammar
299
+ if grammar.respond_to?(:rules) && grammar.rules.key?(first_event)
300
+ rule = grammar.rules[first_event]
301
+ return !rule.terminal? if rule.respond_to?(:terminal?)
302
+ end
303
+ end
304
+
305
+ # Default: assume structural if not a simple string/regex terminal
306
+ true
307
+ end
308
+
309
+ private
310
+
311
+ # Extract type name from a Citrus event object
312
+ #
313
+ # Handles different event types:
314
+ # - Objects with .name method (Citrus rule objects) -> use .name
315
+ # - Symbol -> use directly
316
+ # - Compound rules (Repeat, Choice) -> check string representation
317
+ #
318
+ # @param event [Object] Citrus event object
319
+ # @return [String] type name
320
+ def extract_type_from_event(event)
321
+ # Case 1: Event has .name method (returns Symbol)
322
+ if event.respond_to?(:name)
323
+ name = event.name
324
+ return name.to_s if name.is_a?(Symbol)
325
+ end
326
+
327
+ # Case 2: Event is a Symbol directly (most common for child nodes)
328
+ return event.to_s if event.is_a?(Symbol)
329
+
330
+ # Case 3: Event is a String
331
+ return event if event.is_a?(String)
332
+
333
+ # Case 4: For compound rules (Repeat, Choice), try string parsing first
334
+ # This avoids recursion issues
335
+ str = event.to_s
336
+
337
+ # Try to extract rule name from string representation
338
+ # Examples: "table", "(comment | table)*", "space?", etc.
339
+ if str =~ /^([a-z_][a-z0-9_]*)/i
340
+ return $1
341
+ end
342
+
343
+ # If we have a pattern like "(rule1 | rule2)*", we can't determine
344
+ # the type without looking at actual matches, but that causes recursion
345
+ # So just return a generic type based on the pattern
346
+ if /^\(.*\)\*$/.match?(str)
347
+ return "repeat"
348
+ elsif /^\(.*\)\?$/.match?(str)
349
+ return "optional"
350
+ elsif /^.*\|.*$/.match?(str)
351
+ return "choice"
352
+ end
353
+
354
+ "unknown"
355
+ end
356
+
357
+ public
358
+
238
359
  def start_byte
239
360
  @match.offset
240
361
  end
@@ -251,6 +372,42 @@ module TreeHaver
251
372
  calculate_point(@match.offset + @match.length)
252
373
  end
253
374
 
375
+ # Get the 1-based line number where this node starts
376
+ #
377
+ # @return [Integer] 1-based line number
378
+ def start_line
379
+ start_point[:row] + 1
380
+ end
381
+
382
+ # Get the 1-based line number where this node ends
383
+ #
384
+ # @return [Integer] 1-based line number
385
+ def end_line
386
+ end_point[:row] + 1
387
+ end
388
+
389
+ # Get position information as a hash
390
+ #
391
+ # Returns a hash with 1-based line numbers and 0-based columns.
392
+ # Compatible with *-merge gems' FileAnalysisBase.
393
+ #
394
+ # @return [Hash{Symbol => Integer}] Position hash
395
+ def source_position
396
+ {
397
+ start_line: start_line,
398
+ end_line: end_line,
399
+ start_column: start_point[:column],
400
+ end_column: end_point[:column],
401
+ }
402
+ end
403
+
404
+ # Get the first child node
405
+ #
406
+ # @return [Node, nil] First child or nil
407
+ def first_child
408
+ child(0)
409
+ end
410
+
254
411
  def text
255
412
  @match.string
256
413
  end