tree_haver 3.2.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,516 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Backends
5
- # Commonmarker backend using the Commonmarker gem (comrak Rust parser)
6
- #
7
- # This backend wraps Commonmarker, a Ruby gem that provides bindings to
8
- # comrak, a fast CommonMark-compliant Markdown parser written in Rust.
9
- #
10
- # @note This backend only parses Markdown source code
11
- # @see https://github.com/gjtorikian/commonmarker Commonmarker gem
12
- #
13
- # @example Basic usage
14
- # parser = TreeHaver::Parser.new
15
- # parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
16
- # tree = parser.parse(markdown_source)
17
- # root = tree.root_node
18
- # puts root.type # => "document"
19
- module Commonmarker
20
- @load_attempted = false
21
- @loaded = false
22
-
23
- # Check if the Commonmarker backend is available
24
- #
25
- # @return [Boolean] true if commonmarker gem is available
26
- class << self
27
- def available?
28
- return @loaded if @load_attempted
29
- @load_attempted = true
30
- begin
31
- require "commonmarker"
32
- @loaded = true
33
- rescue LoadError
34
- @loaded = false
35
- end
36
- @loaded
37
- end
38
-
39
- # Reset the load state (primarily for testing)
40
- #
41
- # @return [void]
42
- # @api private
43
- def reset!
44
- @load_attempted = false
45
- @loaded = false
46
- end
47
-
48
- # Get capabilities supported by this backend
49
- #
50
- # @return [Hash{Symbol => Object}] capability map
51
- def capabilities
52
- return {} unless available?
53
- {
54
- backend: :commonmarker,
55
- query: false,
56
- bytes_field: false, # Commonmarker uses line/column
57
- incremental: false,
58
- pure_ruby: false, # Uses Rust via FFI
59
- markdown_only: true,
60
- error_tolerant: true, # Markdown is forgiving
61
- }
62
- end
63
- end
64
-
65
- # Commonmarker language wrapper
66
- #
67
- # Commonmarker only parses Markdown. This class exists for API compatibility.
68
- #
69
- # @example
70
- # language = TreeHaver::Backends::Commonmarker::Language.markdown
71
- # parser.language = language
72
- class Language
73
- include Comparable
74
-
75
- # The language name (always :markdown for Commonmarker)
76
- # @return [Symbol]
77
- attr_reader :name
78
-
79
- # The backend this language is for
80
- # @return [Symbol]
81
- attr_reader :backend
82
-
83
- # Commonmarker parse options
84
- # @return [Hash]
85
- attr_reader :options
86
-
87
- # Create a new Commonmarker language instance
88
- #
89
- # @param name [Symbol] Language name (should be :markdown)
90
- # @param options [Hash] Commonmarker parse options
91
- def initialize(name = :markdown, options: {})
92
- @name = name.to_sym
93
- @backend = :commonmarker
94
- @options = options
95
- end
96
-
97
- class << self
98
- # Create a Markdown language instance
99
- #
100
- # @param options [Hash] Commonmarker parse options
101
- # @return [Language] Markdown language
102
- def markdown(options: {})
103
- new(:markdown, options: options)
104
- end
105
-
106
- # Load language from library path (API compatibility)
107
- #
108
- # Commonmarker only supports Markdown, so path and symbol parameters are ignored.
109
- # This method exists for API consistency with tree-sitter backends,
110
- # allowing `TreeHaver.parser_for(:markdown)` to work regardless of backend.
111
- #
112
- # @param _path [String] Ignored - Commonmarker doesn't load external grammars
113
- # @param symbol [String, nil] Ignored
114
- # @param name [String, nil] Language name hint (defaults to :markdown)
115
- # @return [Language] Markdown language
116
- # @raise [TreeHaver::NotAvailable] if requested language is not Markdown
117
- def from_library(_path = nil, symbol: nil, name: nil)
118
- # Derive language name from symbol if provided
119
- lang_name = name || symbol&.to_s&.sub(/^tree_sitter_/, "")&.to_sym || :markdown
120
-
121
- unless lang_name == :markdown
122
- raise TreeHaver::NotAvailable,
123
- "Commonmarker backend only supports Markdown, not #{lang_name}. " \
124
- "Use a tree-sitter backend for #{lang_name} support."
125
- end
126
-
127
- markdown
128
- end
129
- end
130
-
131
- # Comparison for sorting/equality
132
- def <=>(other)
133
- return unless other.is_a?(Language)
134
- name <=> other.name
135
- end
136
-
137
- def inspect
138
- "#<TreeHaver::Backends::Commonmarker::Language name=#{name} options=#{options}>"
139
- end
140
- end
141
-
142
- # Commonmarker parser wrapper
143
- class Parser
144
- attr_accessor :language
145
-
146
- def initialize
147
- @language = nil
148
- end
149
-
150
- # Parse Markdown source code
151
- #
152
- # @param source [String] Markdown source to parse
153
- # @return [Tree] Parsed tree
154
- def parse(source)
155
- raise "Language not set" unless @language
156
- Commonmarker.available? or raise "Commonmarker not available"
157
-
158
- options = @language.options || {}
159
- doc = ::Commonmarker.parse(source, options: options)
160
- Tree.new(doc, source)
161
- end
162
-
163
- # Alias for compatibility
164
- def parse_string(_old_tree, source)
165
- parse(source)
166
- end
167
- end
168
-
169
- # Commonmarker tree wrapper
170
- class Tree
171
- attr_reader :inner_tree, :source
172
-
173
- def initialize(document, source)
174
- @inner_tree = document
175
- @source = source
176
- @lines = source.lines
177
- end
178
-
179
- def root_node
180
- Node.new(@inner_tree, @source, @lines)
181
- end
182
-
183
- def errors
184
- []
185
- end
186
-
187
- def warnings
188
- []
189
- end
190
-
191
- def comments
192
- []
193
- end
194
-
195
- def inspect
196
- "#<TreeHaver::Backends::Commonmarker::Tree>"
197
- end
198
- end
199
-
200
- # Commonmarker node wrapper
201
- #
202
- # Wraps Commonmarker::Node to provide TreeHaver::Node-compatible interface.
203
- class Node
204
- include Comparable
205
- include Enumerable
206
-
207
- attr_reader :inner_node, :source
208
-
209
- def initialize(node, source, lines = nil)
210
- @inner_node = node
211
- @source = source
212
- @lines = lines || source.lines
213
- end
214
-
215
- # Get the node type as a string
216
- #
217
- # Commonmarker uses symbols like :document, :heading, :paragraph, etc.
218
- #
219
- # @return [String] Node type
220
- def type
221
- @inner_node.type.to_s
222
- end
223
-
224
- alias_method :kind, :type
225
-
226
- # Get the text content of this node
227
- #
228
- # @return [String] Node text
229
- def text
230
- # Commonmarker nodes have string_content for text nodes
231
- # Container nodes don't have string_content and will raise TypeError
232
- if @inner_node.respond_to?(:string_content)
233
- begin
234
- content = @inner_node.string_content.to_s
235
- # If string_content is non-empty, use it (leaf node)
236
- return content unless content.empty?
237
- rescue TypeError
238
- # Container node - fall through to concatenate children
239
- end
240
- end
241
-
242
- # For container nodes, concatenate children's text
243
- children.map(&:text).join
244
- end
245
-
246
- # Get child nodes
247
- #
248
- # @return [Array<Node>] Child nodes
249
- def children
250
- return [] unless @inner_node.respond_to?(:each)
251
-
252
- result = []
253
- @inner_node.each { |child| result << Node.new(child, @source, @lines) }
254
- result
255
- end
256
-
257
- def each(&block)
258
- return to_enum(__method__) unless block
259
- children.each(&block)
260
- end
261
-
262
- def child_count
263
- children.size
264
- end
265
-
266
- def child(index)
267
- children[index]
268
- end
269
-
270
- # Position information
271
- # Commonmarker 2.x provides source_position as a hash with start_line, start_column, end_line, end_column
272
-
273
- def start_byte
274
- sp = start_point
275
- calculate_byte_offset(sp.row, sp.column)
276
- end
277
-
278
- def end_byte
279
- ep = end_point
280
- calculate_byte_offset(ep.row, ep.column)
281
- end
282
-
283
- def start_point
284
- if @inner_node.respond_to?(:source_position)
285
- pos = begin
286
- @inner_node.source_position
287
- rescue
288
- nil
289
- end
290
- if pos && pos[:start_line]
291
- return Point.new(pos[:start_line] - 1, (pos[:start_column] || 1) - 1)
292
- end
293
- end
294
- pos = begin
295
- @inner_node.sourcepos
296
- rescue
297
- nil
298
- end
299
- return Point.new(0, 0) unless pos
300
- Point.new(pos[0] - 1, pos[1] - 1)
301
- end
302
-
303
- def end_point
304
- if @inner_node.respond_to?(:source_position)
305
- pos = begin
306
- @inner_node.source_position
307
- rescue
308
- nil
309
- end
310
- if pos && pos[:end_line]
311
- return Point.new(pos[:end_line] - 1, (pos[:end_column] || 1) - 1)
312
- end
313
- end
314
- pos = begin
315
- @inner_node.sourcepos
316
- rescue
317
- nil
318
- end
319
- return Point.new(0, 0) unless pos
320
- Point.new(pos[2] - 1, pos[3] - 1)
321
- end
322
-
323
- def start_line
324
- if @inner_node.respond_to?(:source_position)
325
- pos = begin
326
- @inner_node.source_position
327
- rescue
328
- nil
329
- end
330
- return pos[:start_line] if pos && pos[:start_line]
331
- end
332
- pos = begin
333
- @inner_node.sourcepos
334
- rescue
335
- nil
336
- end
337
- pos ? pos[0] : 1
338
- end
339
-
340
- def end_line
341
- if @inner_node.respond_to?(:source_position)
342
- pos = begin
343
- @inner_node.source_position
344
- rescue
345
- nil
346
- end
347
- return pos[:end_line] if pos && pos[:end_line]
348
- end
349
- pos = begin
350
- @inner_node.sourcepos
351
- rescue
352
- nil
353
- end
354
- pos ? pos[2] : 1
355
- end
356
-
357
- # Get position information as a hash
358
- #
359
- # Returns a hash with 1-based line numbers and 0-based columns.
360
- # Compatible with *-merge gems' FileAnalysisBase.
361
- #
362
- # @return [Hash{Symbol => Integer}] Position hash
363
- def source_position
364
- {
365
- start_line: start_line,
366
- end_line: end_line,
367
- start_column: start_point.column,
368
- end_column: end_point.column,
369
- }
370
- end
371
-
372
- # Get the first child node
373
- #
374
- # @return [Node, nil] First child or nil
375
- def first_child
376
- children.first
377
- end
378
-
379
- def named?
380
- true
381
- end
382
-
383
- alias_method :structural?, :named?
384
-
385
- def has_error?
386
- false
387
- end
388
-
389
- def missing?
390
- false
391
- end
392
-
393
- def <=>(other)
394
- return unless other.respond_to?(:start_byte)
395
- cmp = start_byte <=> other.start_byte
396
- return cmp unless cmp&.zero?
397
- end_byte <=> other.end_byte
398
- end
399
-
400
- def inspect
401
- "#<TreeHaver::Backends::Commonmarker::Node type=#{type}>"
402
- end
403
-
404
- # Commonmarker-specific methods
405
-
406
- # Get heading level (1-6)
407
- # @return [Integer, nil]
408
- def header_level
409
- return unless type == "heading"
410
- begin
411
- @inner_node.header_level
412
- rescue
413
- nil
414
- end
415
- end
416
-
417
- # Get fence info for code blocks
418
- # @return [String, nil]
419
- def fence_info
420
- return unless type == "code_block"
421
- begin
422
- @inner_node.fence_info
423
- rescue
424
- nil
425
- end
426
- end
427
-
428
- # Get URL for links/images
429
- # @return [String, nil]
430
- def url
431
- @inner_node.url
432
- rescue
433
- nil
434
- end
435
-
436
- # Get title for links/images
437
- # @return [String, nil]
438
- def title
439
- @inner_node.title
440
- rescue
441
- nil
442
- end
443
-
444
- # Get the next sibling
445
- # @return [Node, nil]
446
- def next_sibling
447
- sibling = begin
448
- @inner_node.next_sibling
449
- rescue
450
- nil
451
- end
452
- sibling ? Node.new(sibling, @source, @lines) : nil
453
- end
454
-
455
- # Get the previous sibling
456
- # @return [Node, nil]
457
- def prev_sibling
458
- sibling = begin
459
- @inner_node.previous_sibling
460
- rescue
461
- nil
462
- end
463
- sibling ? Node.new(sibling, @source, @lines) : nil
464
- end
465
-
466
- # Get the parent node
467
- # @return [Node, nil]
468
- def parent
469
- p = begin
470
- @inner_node.parent
471
- rescue
472
- nil
473
- end
474
- p ? Node.new(p, @source, @lines) : nil
475
- end
476
-
477
- private
478
-
479
- def calculate_byte_offset(line, column)
480
- offset = 0
481
- @lines.each_with_index do |line_content, idx|
482
- if idx < line
483
- offset += line_content.bytesize
484
- else
485
- offset += [column, line_content.bytesize].min
486
- break
487
- end
488
- end
489
- offset
490
- end
491
- end
492
-
493
- # Point struct for position information
494
- Point = Struct.new(:row, :column) do
495
- def [](key)
496
- case key
497
- when :row, "row" then row
498
- when :column, "column" then column
499
- end
500
- end
501
-
502
- def to_h
503
- {row: row, column: column}
504
- end
505
-
506
- def to_s
507
- "(#{row}, #{column})"
508
- end
509
-
510
- def inspect
511
- "#<TreeHaver::Backends::Commonmarker::Point row=#{row} column=#{column}>"
512
- end
513
- end
514
- end
515
- end
516
- end