tree_haver 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +285 -1
- data/CONTRIBUTING.md +132 -0
- data/README.md +529 -36
- data/lib/tree_haver/backends/citrus.rb +177 -20
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +341 -142
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +183 -17
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +60 -17
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +115 -11
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +220 -4
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +835 -75
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +9 -4
- metadata.gz.sig +0 -0
|
@@ -86,10 +86,16 @@ module TreeHaver
|
|
|
86
86
|
# # For TOML, use toml-rb's grammar
|
|
87
87
|
# language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
|
|
88
88
|
class Language
|
|
89
|
+
include Comparable
|
|
90
|
+
|
|
89
91
|
# The Citrus grammar module
|
|
90
92
|
# @return [Module] Citrus grammar module (e.g., TomlRB::Document)
|
|
91
93
|
attr_reader :grammar_module
|
|
92
94
|
|
|
95
|
+
# The backend this language is for
|
|
96
|
+
# @return [Symbol]
|
|
97
|
+
attr_reader :backend
|
|
98
|
+
|
|
93
99
|
# @param grammar_module [Module] A Citrus grammar module with a parse method
|
|
94
100
|
def initialize(grammar_module)
|
|
95
101
|
unless grammar_module.respond_to?(:parse)
|
|
@@ -98,8 +104,33 @@ module TreeHaver
|
|
|
98
104
|
"Expected a Citrus grammar module (e.g., TomlRB::Document)."
|
|
99
105
|
end
|
|
100
106
|
@grammar_module = grammar_module
|
|
107
|
+
@backend = :citrus
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Compare languages for equality
|
|
111
|
+
#
|
|
112
|
+
# Citrus languages are equal if they have the same backend and grammar_module.
|
|
113
|
+
# Grammar module uniquely identifies a Citrus language.
|
|
114
|
+
#
|
|
115
|
+
# @param other [Object] object to compare with
|
|
116
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
117
|
+
def <=>(other)
|
|
118
|
+
return unless other.is_a?(Language)
|
|
119
|
+
return unless other.backend == @backend
|
|
120
|
+
|
|
121
|
+
# Compare by grammar_module name (modules are compared by object_id by default)
|
|
122
|
+
@grammar_module.name <=> other.grammar_module.name
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
126
|
+
# @return [Integer]
|
|
127
|
+
def hash
|
|
128
|
+
[@backend, @grammar_module.name].hash
|
|
101
129
|
end
|
|
102
130
|
|
|
131
|
+
# Alias eql? to ==
|
|
132
|
+
alias_method :eql?, :==
|
|
133
|
+
|
|
103
134
|
# Not applicable for Citrus (tree-sitter-specific)
|
|
104
135
|
#
|
|
105
136
|
# Citrus grammars are Ruby modules, not shared libraries.
|
|
@@ -131,30 +162,29 @@ module TreeHaver
|
|
|
131
162
|
|
|
132
163
|
# Set the grammar for this parser
|
|
133
164
|
#
|
|
134
|
-
#
|
|
135
|
-
#
|
|
165
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
166
|
+
# This backend receives the raw Citrus grammar module (unwrapped), not the Language wrapper.
|
|
167
|
+
#
|
|
168
|
+
# @param grammar [Module] Citrus grammar module with a parse method
|
|
169
|
+
# @return [void]
|
|
136
170
|
# @example
|
|
137
171
|
# require "toml-rb"
|
|
138
|
-
#
|
|
139
|
-
# #
|
|
140
|
-
# parser.language = TreeHaver::Backends::Citrus::Language.new(TomlRB::Document)
|
|
172
|
+
# # TreeHaver::Parser unwraps Language.new(TomlRB::Document) to just TomlRB::Document
|
|
173
|
+
# parser.language = TomlRB::Document # Backend receives unwrapped module
|
|
141
174
|
def language=(grammar)
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
elsif grammar.respond_to?(:parse)
|
|
145
|
-
grammar
|
|
146
|
-
else
|
|
175
|
+
# grammar is already unwrapped by TreeHaver::Parser
|
|
176
|
+
unless grammar.respond_to?(:parse)
|
|
147
177
|
raise ArgumentError,
|
|
148
|
-
"Expected Citrus grammar module
|
|
178
|
+
"Expected Citrus grammar module with parse method, " \
|
|
149
179
|
"got #{grammar.class}"
|
|
150
180
|
end
|
|
151
|
-
grammar
|
|
181
|
+
@grammar = grammar
|
|
152
182
|
end
|
|
153
183
|
|
|
154
184
|
# Parse source code
|
|
155
185
|
#
|
|
156
186
|
# @param source [String] the source code to parse
|
|
157
|
-
# @return [
|
|
187
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
158
188
|
# @raise [TreeHaver::NotAvailable] if no grammar is set
|
|
159
189
|
# @raise [::Citrus::ParseError] if parsing fails
|
|
160
190
|
def parse(source)
|
|
@@ -162,8 +192,8 @@ module TreeHaver
|
|
|
162
192
|
|
|
163
193
|
begin
|
|
164
194
|
citrus_match = @grammar.parse(source)
|
|
165
|
-
|
|
166
|
-
|
|
195
|
+
# Return raw Citrus::Tree - TreeHaver::Parser will wrap it
|
|
196
|
+
Tree.new(citrus_match, source)
|
|
167
197
|
rescue ::Citrus::ParseError => e
|
|
168
198
|
# Re-raise with more context
|
|
169
199
|
raise TreeHaver::Error, "Parse error: #{e.message}"
|
|
@@ -176,8 +206,8 @@ module TreeHaver
|
|
|
176
206
|
#
|
|
177
207
|
# @param old_tree [TreeHaver::Tree, nil] ignored (no incremental parsing support)
|
|
178
208
|
# @param source [String] the source code to parse
|
|
179
|
-
# @return [
|
|
180
|
-
def parse_string(old_tree, source)
|
|
209
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
210
|
+
def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
|
|
181
211
|
parse(source) # Citrus doesn't support incremental parsing
|
|
182
212
|
end
|
|
183
213
|
end
|
|
@@ -213,6 +243,10 @@ module TreeHaver
|
|
|
213
243
|
# - matches: child matches
|
|
214
244
|
# - captures: named groups
|
|
215
245
|
#
|
|
246
|
+
# Language-specific helpers can be mixed in for convenience:
|
|
247
|
+
# require "tree_haver/backends/citrus/toml_helpers"
|
|
248
|
+
# TreeHaver::Backends::Citrus::Node.include(TreeHaver::Backends::Citrus::TomlHelpers)
|
|
249
|
+
#
|
|
216
250
|
# @api private
|
|
217
251
|
class Node
|
|
218
252
|
attr_reader :match, :source
|
|
@@ -224,17 +258,104 @@ module TreeHaver
|
|
|
224
258
|
|
|
225
259
|
# Get node type from Citrus rule name
|
|
226
260
|
#
|
|
261
|
+
# Uses Citrus grammar introspection to dynamically determine node types.
|
|
262
|
+
# Works with any Citrus grammar without language-specific knowledge.
|
|
263
|
+
#
|
|
264
|
+
# Strategy:
|
|
265
|
+
# 1. Check if first event has a .name method (returns Symbol) - use that
|
|
266
|
+
# 2. If first event is a Symbol directly - use that
|
|
267
|
+
# 3. For compound rules (Repeat, Choice), recurse into first match
|
|
268
|
+
#
|
|
227
269
|
# @return [String] rule name from grammar
|
|
228
270
|
def type
|
|
229
|
-
# Citrus stores the rule name in events[0]
|
|
230
271
|
return "unknown" unless @match.respond_to?(:events)
|
|
231
272
|
return "unknown" unless @match.events.is_a?(Array)
|
|
232
273
|
return "unknown" if @match.events.empty?
|
|
233
274
|
|
|
234
|
-
|
|
235
|
-
first.is_a?(Symbol) ? first.to_s : "unknown"
|
|
275
|
+
extract_type_from_event(@match.events.first)
|
|
236
276
|
end
|
|
237
277
|
|
|
278
|
+
# Check if this node represents a structural element vs a terminal/token
|
|
279
|
+
#
|
|
280
|
+
# Uses Citrus grammar's terminal? method to determine if this is
|
|
281
|
+
# a structural rule (like "table", "keyvalue") vs a terminal token
|
|
282
|
+
# (like "[", "=", whitespace).
|
|
283
|
+
#
|
|
284
|
+
# @return [Boolean] true if this is a structural (non-terminal) node
|
|
285
|
+
def structural?
|
|
286
|
+
return false unless @match.respond_to?(:events)
|
|
287
|
+
return false if @match.events.empty?
|
|
288
|
+
|
|
289
|
+
first_event = @match.events.first
|
|
290
|
+
|
|
291
|
+
# Check if event has terminal? method (Citrus rule object)
|
|
292
|
+
if first_event.respond_to?(:terminal?)
|
|
293
|
+
return !first_event.terminal?
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# For Symbol events, try to look up in grammar
|
|
297
|
+
if first_event.is_a?(Symbol) && @match.respond_to?(:grammar)
|
|
298
|
+
grammar = @match.grammar
|
|
299
|
+
if grammar.respond_to?(:rules) && grammar.rules.key?(first_event)
|
|
300
|
+
rule = grammar.rules[first_event]
|
|
301
|
+
return !rule.terminal? if rule.respond_to?(:terminal?)
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Default: assume structural if not a simple string/regex terminal
|
|
306
|
+
true
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
private
|
|
310
|
+
|
|
311
|
+
# Extract type name from a Citrus event object
|
|
312
|
+
#
|
|
313
|
+
# Handles different event types:
|
|
314
|
+
# - Objects with .name method (Citrus rule objects) -> use .name
|
|
315
|
+
# - Symbol -> use directly
|
|
316
|
+
# - Compound rules (Repeat, Choice) -> check string representation
|
|
317
|
+
#
|
|
318
|
+
# @param event [Object] Citrus event object
|
|
319
|
+
# @return [String] type name
|
|
320
|
+
def extract_type_from_event(event)
|
|
321
|
+
# Case 1: Event has .name method (returns Symbol)
|
|
322
|
+
if event.respond_to?(:name)
|
|
323
|
+
name = event.name
|
|
324
|
+
return name.to_s if name.is_a?(Symbol)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Case 2: Event is a Symbol directly (most common for child nodes)
|
|
328
|
+
return event.to_s if event.is_a?(Symbol)
|
|
329
|
+
|
|
330
|
+
# Case 3: Event is a String
|
|
331
|
+
return event if event.is_a?(String)
|
|
332
|
+
|
|
333
|
+
# Case 4: For compound rules (Repeat, Choice), try string parsing first
|
|
334
|
+
# This avoids recursion issues
|
|
335
|
+
str = event.to_s
|
|
336
|
+
|
|
337
|
+
# Try to extract rule name from string representation
|
|
338
|
+
# Examples: "table", "(comment | table)*", "space?", etc.
|
|
339
|
+
if str =~ /^([a-z_][a-z0-9_]*)/i
|
|
340
|
+
return $1
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# If we have a pattern like "(rule1 | rule2)*", we can't determine
|
|
344
|
+
# the type without looking at actual matches, but that causes recursion
|
|
345
|
+
# So just return a generic type based on the pattern
|
|
346
|
+
if /^\(.*\)\*$/.match?(str)
|
|
347
|
+
return "repeat"
|
|
348
|
+
elsif /^\(.*\)\?$/.match?(str)
|
|
349
|
+
return "optional"
|
|
350
|
+
elsif /^.*\|.*$/.match?(str)
|
|
351
|
+
return "choice"
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
"unknown"
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
public
|
|
358
|
+
|
|
238
359
|
def start_byte
|
|
239
360
|
@match.offset
|
|
240
361
|
end
|
|
@@ -251,6 +372,42 @@ module TreeHaver
|
|
|
251
372
|
calculate_point(@match.offset + @match.length)
|
|
252
373
|
end
|
|
253
374
|
|
|
375
|
+
# Get the 1-based line number where this node starts
|
|
376
|
+
#
|
|
377
|
+
# @return [Integer] 1-based line number
|
|
378
|
+
def start_line
|
|
379
|
+
start_point[:row] + 1
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
# Get the 1-based line number where this node ends
|
|
383
|
+
#
|
|
384
|
+
# @return [Integer] 1-based line number
|
|
385
|
+
def end_line
|
|
386
|
+
end_point[:row] + 1
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Get position information as a hash
|
|
390
|
+
#
|
|
391
|
+
# Returns a hash with 1-based line numbers and 0-based columns.
|
|
392
|
+
# Compatible with *-merge gems' FileAnalysisBase.
|
|
393
|
+
#
|
|
394
|
+
# @return [Hash{Symbol => Integer}] Position hash
|
|
395
|
+
def source_position
|
|
396
|
+
{
|
|
397
|
+
start_line: start_line,
|
|
398
|
+
end_line: end_line,
|
|
399
|
+
start_column: start_point[:column],
|
|
400
|
+
end_column: end_point[:column],
|
|
401
|
+
}
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# Get the first child node
|
|
405
|
+
#
|
|
406
|
+
# @return [Node, nil] First child or nil
|
|
407
|
+
def first_child
|
|
408
|
+
child(0)
|
|
409
|
+
end
|
|
410
|
+
|
|
254
411
|
def text
|
|
255
412
|
@match.string
|
|
256
413
|
end
|