tree_haver 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +285 -1
- data/CONTRIBUTING.md +132 -0
- data/README.md +529 -36
- data/lib/tree_haver/backends/citrus.rb +177 -20
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +341 -142
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +183 -17
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +60 -17
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +115 -11
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +220 -4
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +835 -75
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +9 -4
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
module Backends
|
|
5
|
+
# Commonmarker backend using the Commonmarker gem (comrak Rust parser)
|
|
6
|
+
#
|
|
7
|
+
# This backend wraps Commonmarker, a Ruby gem that provides bindings to
|
|
8
|
+
# comrak, a fast CommonMark-compliant Markdown parser written in Rust.
|
|
9
|
+
#
|
|
10
|
+
# @note This backend only parses Markdown source code
|
|
11
|
+
# @see https://github.com/gjtorikian/commonmarker Commonmarker gem
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# parser = TreeHaver::Parser.new
|
|
15
|
+
# parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
|
|
16
|
+
# tree = parser.parse(markdown_source)
|
|
17
|
+
# root = tree.root_node
|
|
18
|
+
# puts root.type # => "document"
|
|
19
|
+
module Commonmarker
|
|
20
|
+
@load_attempted = false
|
|
21
|
+
@loaded = false
|
|
22
|
+
|
|
23
|
+
# Check if the Commonmarker backend is available
|
|
24
|
+
#
|
|
25
|
+
# @return [Boolean] true if commonmarker gem is available
|
|
26
|
+
class << self
|
|
27
|
+
def available?
|
|
28
|
+
return @loaded if @load_attempted
|
|
29
|
+
@load_attempted = true
|
|
30
|
+
begin
|
|
31
|
+
require "commonmarker"
|
|
32
|
+
@loaded = true
|
|
33
|
+
rescue LoadError
|
|
34
|
+
@loaded = false
|
|
35
|
+
end
|
|
36
|
+
@loaded
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Reset the load state (primarily for testing)
|
|
40
|
+
#
|
|
41
|
+
# @return [void]
|
|
42
|
+
# @api private
|
|
43
|
+
def reset!
|
|
44
|
+
@load_attempted = false
|
|
45
|
+
@loaded = false
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Get capabilities supported by this backend
|
|
49
|
+
#
|
|
50
|
+
# @return [Hash{Symbol => Object}] capability map
|
|
51
|
+
def capabilities
|
|
52
|
+
return {} unless available?
|
|
53
|
+
{
|
|
54
|
+
backend: :commonmarker,
|
|
55
|
+
query: false,
|
|
56
|
+
bytes_field: false, # Commonmarker uses line/column
|
|
57
|
+
incremental: false,
|
|
58
|
+
pure_ruby: false, # Uses Rust via FFI
|
|
59
|
+
markdown_only: true,
|
|
60
|
+
error_tolerant: true, # Markdown is forgiving
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Commonmarker language wrapper
|
|
66
|
+
#
|
|
67
|
+
# Commonmarker only parses Markdown. This class exists for API compatibility.
|
|
68
|
+
#
|
|
69
|
+
# @example
|
|
70
|
+
# language = TreeHaver::Backends::Commonmarker::Language.markdown
|
|
71
|
+
# parser.language = language
|
|
72
|
+
class Language
|
|
73
|
+
include Comparable
|
|
74
|
+
|
|
75
|
+
# The language name (always :markdown for Commonmarker)
|
|
76
|
+
# @return [Symbol]
|
|
77
|
+
attr_reader :name
|
|
78
|
+
|
|
79
|
+
# The backend this language is for
|
|
80
|
+
# @return [Symbol]
|
|
81
|
+
attr_reader :backend
|
|
82
|
+
|
|
83
|
+
# Commonmarker parse options
|
|
84
|
+
# @return [Hash]
|
|
85
|
+
attr_reader :options
|
|
86
|
+
|
|
87
|
+
# Create a new Commonmarker language instance
|
|
88
|
+
#
|
|
89
|
+
# @param name [Symbol] Language name (should be :markdown)
|
|
90
|
+
# @param options [Hash] Commonmarker parse options
|
|
91
|
+
def initialize(name = :markdown, options: {})
|
|
92
|
+
@name = name.to_sym
|
|
93
|
+
@backend = :commonmarker
|
|
94
|
+
@options = options
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
class << self
|
|
98
|
+
# Create a Markdown language instance
|
|
99
|
+
#
|
|
100
|
+
# @param options [Hash] Commonmarker parse options
|
|
101
|
+
# @return [Language] Markdown language
|
|
102
|
+
def markdown(options: {})
|
|
103
|
+
new(:markdown, options: options)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Comparison for sorting/equality
|
|
108
|
+
def <=>(other)
|
|
109
|
+
return unless other.is_a?(Language)
|
|
110
|
+
name <=> other.name
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def inspect
|
|
114
|
+
"#<TreeHaver::Backends::Commonmarker::Language name=#{name} options=#{options}>"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Commonmarker parser wrapper
|
|
119
|
+
class Parser
|
|
120
|
+
attr_accessor :language
|
|
121
|
+
|
|
122
|
+
def initialize
|
|
123
|
+
@language = nil
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Parse Markdown source code
|
|
127
|
+
#
|
|
128
|
+
# @param source [String] Markdown source to parse
|
|
129
|
+
# @return [Tree] Parsed tree
|
|
130
|
+
def parse(source)
|
|
131
|
+
raise "Language not set" unless @language
|
|
132
|
+
Commonmarker.available? or raise "Commonmarker not available"
|
|
133
|
+
|
|
134
|
+
options = @language.options || {}
|
|
135
|
+
doc = ::Commonmarker.parse(source, options: options)
|
|
136
|
+
Tree.new(doc, source)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Alias for compatibility
|
|
140
|
+
def parse_string(_old_tree, source)
|
|
141
|
+
parse(source)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Commonmarker tree wrapper
|
|
146
|
+
class Tree
|
|
147
|
+
attr_reader :inner_tree, :source
|
|
148
|
+
|
|
149
|
+
def initialize(document, source)
|
|
150
|
+
@inner_tree = document
|
|
151
|
+
@source = source
|
|
152
|
+
@lines = source.lines
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def root_node
|
|
156
|
+
Node.new(@inner_tree, @source, @lines)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def errors
|
|
160
|
+
[]
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def warnings
|
|
164
|
+
[]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def comments
|
|
168
|
+
[]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def inspect
|
|
172
|
+
"#<TreeHaver::Backends::Commonmarker::Tree>"
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Commonmarker node wrapper
|
|
177
|
+
#
|
|
178
|
+
# Wraps Commonmarker::Node to provide TreeHaver::Node-compatible interface.
|
|
179
|
+
class Node
|
|
180
|
+
include Comparable
|
|
181
|
+
|
|
182
|
+
attr_reader :inner_node, :source
|
|
183
|
+
|
|
184
|
+
def initialize(node, source, lines = nil)
|
|
185
|
+
@inner_node = node
|
|
186
|
+
@source = source
|
|
187
|
+
@lines = lines || source.lines
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Get the node type as a string
|
|
191
|
+
#
|
|
192
|
+
# Commonmarker uses symbols like :document, :heading, :paragraph, etc.
|
|
193
|
+
#
|
|
194
|
+
# @return [String] Node type
|
|
195
|
+
def type
|
|
196
|
+
@inner_node.type.to_s
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
alias_method :kind, :type
|
|
200
|
+
|
|
201
|
+
# Get the text content of this node
|
|
202
|
+
#
|
|
203
|
+
# @return [String] Node text
|
|
204
|
+
def text
|
|
205
|
+
# Commonmarker nodes have string_content for text nodes
|
|
206
|
+
# Container nodes don't have string_content and will raise TypeError
|
|
207
|
+
if @inner_node.respond_to?(:string_content)
|
|
208
|
+
begin
|
|
209
|
+
@inner_node.string_content.to_s
|
|
210
|
+
rescue TypeError
|
|
211
|
+
# Container node - concatenate children's text
|
|
212
|
+
children.map(&:text).join
|
|
213
|
+
end
|
|
214
|
+
else
|
|
215
|
+
# For container nodes, concatenate children's text
|
|
216
|
+
children.map(&:text).join
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Get child nodes
|
|
221
|
+
#
|
|
222
|
+
# @return [Array<Node>] Child nodes
|
|
223
|
+
def children
|
|
224
|
+
return [] unless @inner_node.respond_to?(:each)
|
|
225
|
+
|
|
226
|
+
result = []
|
|
227
|
+
@inner_node.each { |child| result << Node.new(child, @source, @lines) }
|
|
228
|
+
result
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def each(&block)
|
|
232
|
+
return to_enum(__method__) unless block
|
|
233
|
+
children.each(&block)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def child_count
|
|
237
|
+
children.size
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def child(index)
|
|
241
|
+
children[index]
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Position information
|
|
245
|
+
# Commonmarker 2.x provides source_position as a hash with start_line, start_column, end_line, end_column
|
|
246
|
+
|
|
247
|
+
def start_byte
|
|
248
|
+
sp = start_point
|
|
249
|
+
calculate_byte_offset(sp.row, sp.column)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def end_byte
|
|
253
|
+
ep = end_point
|
|
254
|
+
calculate_byte_offset(ep.row, ep.column)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def start_point
|
|
258
|
+
if @inner_node.respond_to?(:source_position)
|
|
259
|
+
pos = begin
|
|
260
|
+
@inner_node.source_position
|
|
261
|
+
rescue
|
|
262
|
+
nil
|
|
263
|
+
end
|
|
264
|
+
if pos && pos[:start_line]
|
|
265
|
+
return Point.new(pos[:start_line] - 1, (pos[:start_column] || 1) - 1)
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
pos = begin
|
|
269
|
+
@inner_node.sourcepos
|
|
270
|
+
rescue
|
|
271
|
+
nil
|
|
272
|
+
end
|
|
273
|
+
return Point.new(0, 0) unless pos
|
|
274
|
+
Point.new(pos[0] - 1, pos[1] - 1)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def end_point
|
|
278
|
+
if @inner_node.respond_to?(:source_position)
|
|
279
|
+
pos = begin
|
|
280
|
+
@inner_node.source_position
|
|
281
|
+
rescue
|
|
282
|
+
nil
|
|
283
|
+
end
|
|
284
|
+
if pos && pos[:end_line]
|
|
285
|
+
return Point.new(pos[:end_line] - 1, (pos[:end_column] || 1) - 1)
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
pos = begin
|
|
289
|
+
@inner_node.sourcepos
|
|
290
|
+
rescue
|
|
291
|
+
nil
|
|
292
|
+
end
|
|
293
|
+
return Point.new(0, 0) unless pos
|
|
294
|
+
Point.new(pos[2] - 1, pos[3] - 1)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def start_line
|
|
298
|
+
if @inner_node.respond_to?(:source_position)
|
|
299
|
+
pos = begin
|
|
300
|
+
@inner_node.source_position
|
|
301
|
+
rescue
|
|
302
|
+
nil
|
|
303
|
+
end
|
|
304
|
+
return pos[:start_line] if pos && pos[:start_line]
|
|
305
|
+
end
|
|
306
|
+
pos = begin
|
|
307
|
+
@inner_node.sourcepos
|
|
308
|
+
rescue
|
|
309
|
+
nil
|
|
310
|
+
end
|
|
311
|
+
pos ? pos[0] : 1
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def end_line
|
|
315
|
+
if @inner_node.respond_to?(:source_position)
|
|
316
|
+
pos = begin
|
|
317
|
+
@inner_node.source_position
|
|
318
|
+
rescue
|
|
319
|
+
nil
|
|
320
|
+
end
|
|
321
|
+
return pos[:end_line] if pos && pos[:end_line]
|
|
322
|
+
end
|
|
323
|
+
pos = begin
|
|
324
|
+
@inner_node.sourcepos
|
|
325
|
+
rescue
|
|
326
|
+
nil
|
|
327
|
+
end
|
|
328
|
+
pos ? pos[2] : 1
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Get position information as a hash
|
|
332
|
+
#
|
|
333
|
+
# Returns a hash with 1-based line numbers and 0-based columns.
|
|
334
|
+
# Compatible with *-merge gems' FileAnalysisBase.
|
|
335
|
+
#
|
|
336
|
+
# @return [Hash{Symbol => Integer}] Position hash
|
|
337
|
+
def source_position
|
|
338
|
+
{
|
|
339
|
+
start_line: start_line,
|
|
340
|
+
end_line: end_line,
|
|
341
|
+
start_column: start_point.column,
|
|
342
|
+
end_column: end_point.column,
|
|
343
|
+
}
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# Get the first child node
|
|
347
|
+
#
|
|
348
|
+
# @return [Node, nil] First child or nil
|
|
349
|
+
def first_child
|
|
350
|
+
children.first
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def named?
|
|
354
|
+
true
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
alias_method :structural?, :named?
|
|
358
|
+
|
|
359
|
+
def has_error?
|
|
360
|
+
false
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def missing?
|
|
364
|
+
false
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def <=>(other)
|
|
368
|
+
return unless other.respond_to?(:start_byte)
|
|
369
|
+
cmp = start_byte <=> other.start_byte
|
|
370
|
+
return cmp unless cmp&.zero?
|
|
371
|
+
end_byte <=> other.end_byte
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def inspect
|
|
375
|
+
"#<TreeHaver::Backends::Commonmarker::Node type=#{type}>"
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Commonmarker-specific methods
|
|
379
|
+
|
|
380
|
+
# Get heading level (1-6)
|
|
381
|
+
# @return [Integer, nil]
|
|
382
|
+
def header_level
|
|
383
|
+
return unless type == "heading"
|
|
384
|
+
begin
|
|
385
|
+
@inner_node.header_level
|
|
386
|
+
rescue
|
|
387
|
+
nil
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Get fence info for code blocks
|
|
392
|
+
# @return [String, nil]
|
|
393
|
+
def fence_info
|
|
394
|
+
return unless type == "code_block"
|
|
395
|
+
begin
|
|
396
|
+
@inner_node.fence_info
|
|
397
|
+
rescue
|
|
398
|
+
nil
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Get URL for links/images
|
|
403
|
+
# @return [String, nil]
|
|
404
|
+
def url
|
|
405
|
+
@inner_node.url
|
|
406
|
+
rescue
|
|
407
|
+
nil
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Get title for links/images
|
|
411
|
+
# @return [String, nil]
|
|
412
|
+
def title
|
|
413
|
+
@inner_node.title
|
|
414
|
+
rescue
|
|
415
|
+
nil
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Get the next sibling
|
|
419
|
+
# @return [Node, nil]
|
|
420
|
+
def next_sibling
|
|
421
|
+
sibling = begin
|
|
422
|
+
@inner_node.next_sibling
|
|
423
|
+
rescue
|
|
424
|
+
nil
|
|
425
|
+
end
|
|
426
|
+
sibling ? Node.new(sibling, @source, @lines) : nil
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# Get the previous sibling
|
|
430
|
+
# @return [Node, nil]
|
|
431
|
+
def previous_sibling
|
|
432
|
+
sibling = begin
|
|
433
|
+
@inner_node.previous_sibling
|
|
434
|
+
rescue
|
|
435
|
+
nil
|
|
436
|
+
end
|
|
437
|
+
sibling ? Node.new(sibling, @source, @lines) : nil
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Get the parent node
|
|
441
|
+
# @return [Node, nil]
|
|
442
|
+
def parent
|
|
443
|
+
p = begin
|
|
444
|
+
@inner_node.parent
|
|
445
|
+
rescue
|
|
446
|
+
nil
|
|
447
|
+
end
|
|
448
|
+
p ? Node.new(p, @source, @lines) : nil
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
private
|
|
452
|
+
|
|
453
|
+
def calculate_byte_offset(line, column)
|
|
454
|
+
offset = 0
|
|
455
|
+
@lines.each_with_index do |line_content, idx|
|
|
456
|
+
if idx < line
|
|
457
|
+
offset += line_content.bytesize
|
|
458
|
+
else
|
|
459
|
+
offset += [column, line_content.bytesize].min
|
|
460
|
+
break
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
offset
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# Point struct for position information
|
|
468
|
+
Point = Struct.new(:row, :column) do
|
|
469
|
+
def [](key)
|
|
470
|
+
case key
|
|
471
|
+
when :row, "row" then row
|
|
472
|
+
when :column, "column" then column
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
def to_h
|
|
477
|
+
{row: row, column: column}
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def to_s
|
|
481
|
+
"(#{row}, #{column})"
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def inspect
|
|
485
|
+
"#<TreeHaver::Backends::Commonmarker::Point row=#{row} column=#{column}>"
|
|
486
|
+
end
|
|
487
|
+
end
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
end
|