tree_haver 3.2.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,590 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Backends
5
- # Markly backend using the Markly gem (cmark-gfm C library)
6
- #
7
- # This backend wraps Markly, a Ruby gem that provides bindings to
8
- # cmark-gfm, GitHub's fork of the CommonMark C library with extensions.
9
- #
10
- # @note This backend only parses Markdown source code
11
- # @see https://github.com/ioquatix/markly Markly gem
12
- #
13
- # @example Basic usage
14
- # parser = TreeHaver::Parser.new
15
- # parser.language = TreeHaver::Backends::Markly::Language.markdown(
16
- # flags: Markly::DEFAULT,
17
- # extensions: [:table, :strikethrough]
18
- # )
19
- # tree = parser.parse(markdown_source)
20
- # root = tree.root_node
21
- # puts root.type # => "document"
22
- module Markly
23
- @load_attempted = false
24
- @loaded = false
25
-
26
- # Check if the Markly backend is available
27
- #
28
- # @return [Boolean] true if markly gem is available
29
- class << self
30
- def available?
31
- return @loaded if @load_attempted
32
- @load_attempted = true
33
- begin
34
- require "markly"
35
- @loaded = true
36
- rescue LoadError
37
- @loaded = false
38
- end
39
- @loaded
40
- end
41
-
42
- # Reset the load state (primarily for testing)
43
- #
44
- # @return [void]
45
- # @api private
46
- def reset!
47
- @load_attempted = false
48
- @loaded = false
49
- end
50
-
51
- # Get capabilities supported by this backend
52
- #
53
- # @return [Hash{Symbol => Object}] capability map
54
- def capabilities
55
- return {} unless available?
56
- {
57
- backend: :markly,
58
- query: false,
59
- bytes_field: false, # Markly uses line/column
60
- incremental: false,
61
- pure_ruby: false, # Uses C via FFI
62
- markdown_only: true,
63
- error_tolerant: true, # Markdown is forgiving
64
- gfm_extensions: true, # Supports GitHub Flavored Markdown
65
- }
66
- end
67
- end
68
-
69
- # Markly language wrapper
70
- #
71
- # Markly only parses Markdown. This class exists for API compatibility
72
- # and to pass through Markly-specific options (flags, extensions).
73
- #
74
- # @example
75
- # language = TreeHaver::Backends::Markly::Language.markdown(
76
- # flags: Markly::DEFAULT | Markly::FOOTNOTES,
77
- # extensions: [:table, :strikethrough]
78
- # )
79
- # parser.language = language
80
- class Language
81
- include Comparable
82
-
83
- # The language name (always :markdown for Markly)
84
- # @return [Symbol]
85
- attr_reader :name
86
-
87
- # The backend this language is for
88
- # @return [Symbol]
89
- attr_reader :backend
90
-
91
- # Markly parse flags
92
- # @return [Integer]
93
- attr_reader :flags
94
-
95
- # Markly extensions to enable
96
- # @return [Array<Symbol>]
97
- attr_reader :extensions
98
-
99
- # Create a new Markly language instance
100
- #
101
- # @param name [Symbol] Language name (should be :markdown)
102
- # @param flags [Integer] Markly parse flags (default: Markly::DEFAULT)
103
- # @param extensions [Array<Symbol>] Extensions to enable (default: [:table])
104
- def initialize(name = :markdown, flags: nil, extensions: [:table])
105
- @name = name.to_sym
106
- @backend = :markly
107
- @flags = flags # Will use Markly::DEFAULT if nil at parse time
108
- @extensions = extensions
109
- end
110
-
111
- class << self
112
- # Create a Markdown language instance
113
- #
114
- # @param flags [Integer] Markly parse flags
115
- # @param extensions [Array<Symbol>] Extensions to enable
116
- # @return [Language] Markdown language
117
- def markdown(flags: nil, extensions: [:table])
118
- new(:markdown, flags: flags, extensions: extensions)
119
- end
120
-
121
- # Load language from library path (API compatibility)
122
- #
123
- # Markly only supports Markdown, so path and symbol parameters are ignored.
124
- # This method exists for API consistency with tree-sitter backends,
125
- # allowing `TreeHaver.parser_for(:markdown)` to work regardless of backend.
126
- #
127
- # @param _path [String] Ignored - Markly doesn't load external grammars
128
- # @param symbol [String, nil] Ignored
129
- # @param name [String, nil] Language name hint (defaults to :markdown)
130
- # @return [Language] Markdown language
131
- # @raise [TreeHaver::NotAvailable] if requested language is not Markdown
132
- def from_library(_path = nil, symbol: nil, name: nil)
133
- # Derive language name from symbol if provided
134
- lang_name = name || symbol&.to_s&.sub(/^tree_sitter_/, "")&.to_sym || :markdown
135
-
136
- unless lang_name == :markdown
137
- raise TreeHaver::NotAvailable,
138
- "Markly backend only supports Markdown, not #{lang_name}. " \
139
- "Use a tree-sitter backend for #{lang_name} support."
140
- end
141
-
142
- markdown
143
- end
144
- end
145
-
146
- # Comparison for sorting/equality
147
- def <=>(other)
148
- return unless other.is_a?(Language)
149
- name <=> other.name
150
- end
151
-
152
- def inspect
153
- "#<TreeHaver::Backends::Markly::Language name=#{name} flags=#{flags} extensions=#{extensions}>"
154
- end
155
- end
156
-
157
- # Markly parser wrapper
158
- class Parser
159
- attr_accessor :language
160
-
161
- def initialize
162
- @language = nil
163
- end
164
-
165
- # Parse Markdown source code
166
- #
167
- # @param source [String] Markdown source to parse
168
- # @return [Tree] Parsed tree
169
- def parse(source)
170
- raise "Language not set" unless @language
171
- Markly.available? or raise "Markly not available"
172
-
173
- flags = @language.flags || ::Markly::DEFAULT
174
- extensions = @language.extensions || [:table]
175
- doc = ::Markly.parse(source, flags: flags, extensions: extensions)
176
- Tree.new(doc, source)
177
- end
178
-
179
- # Alias for compatibility
180
- def parse_string(_old_tree, source)
181
- parse(source)
182
- end
183
- end
184
-
185
- # Markly tree wrapper
186
- class Tree
187
- attr_reader :inner_tree, :source
188
-
189
- def initialize(document, source)
190
- @inner_tree = document
191
- @source = source
192
- @lines = source.lines
193
- end
194
-
195
- def root_node
196
- Node.new(@inner_tree, @source, @lines)
197
- end
198
-
199
- def errors
200
- []
201
- end
202
-
203
- def warnings
204
- []
205
- end
206
-
207
- def comments
208
- []
209
- end
210
-
211
- def inspect
212
- "#<TreeHaver::Backends::Markly::Tree>"
213
- end
214
- end
215
-
216
- # Markly node wrapper
217
- #
218
- # Wraps Markly::Node to provide TreeHaver::Node-compatible interface.
219
- #
220
- # Note: Markly uses different type names than Commonmarker:
221
- # - :header instead of :heading
222
- # - :hrule instead of :thematic_break
223
- # - :blockquote instead of :block_quote
224
- # - :html instead of :html_block
225
- class Node
226
- include Comparable
227
- include Enumerable
228
-
229
- # Type normalization map (Markly → canonical)
230
- TYPE_MAP = {
231
- header: "heading",
232
- hrule: "thematic_break",
233
- html: "html_block",
234
- # blockquote is the same
235
- # Most types are the same between Markly and Commonmarker
236
- }.freeze
237
-
238
- # Default source position for nodes that don't have position info
239
- DEFAULT_SOURCE_POSITION = {
240
- start_line: 1,
241
- start_column: 1,
242
- end_line: 1,
243
- end_column: 1,
244
- }.freeze
245
-
246
- attr_reader :inner_node, :source
247
-
248
- def initialize(node, source, lines = nil)
249
- @inner_node = node
250
- @source = source
251
- @lines = lines || source.lines
252
- end
253
-
254
- # Get source position from the inner Markly node
255
- #
256
- # Markly provides source_position as a hash with :start_line, :start_column,
257
- # :end_line, :end_column (all 1-based).
258
- #
259
- # @return [Hash{Symbol => Integer}] Source position from Markly
260
- # @api private
261
- def inner_source_position
262
- @inner_source_position ||= if @inner_node.respond_to?(:source_position)
263
- @inner_node.source_position || DEFAULT_SOURCE_POSITION
264
- else
265
- DEFAULT_SOURCE_POSITION
266
- end
267
- end
268
-
269
- # Get the node type as a string
270
- #
271
- # Normalizes Markly types to canonical names for consistency.
272
- #
273
- # @return [String] Node type
274
- def type
275
- raw_type = @inner_node.type.to_s
276
- TYPE_MAP[raw_type.to_sym]&.to_s || raw_type
277
- end
278
-
279
- alias_method :kind, :type
280
-
281
- # Get the raw (non-normalized) type
282
- # @return [String]
283
- def raw_type
284
- @inner_node.type.to_s
285
- end
286
-
287
- # Get the text content of this node
288
- #
289
- # @return [String] Node text
290
- def text
291
- # Markly nodes have string_content for leaf nodes (text, code, etc.)
292
- # Container nodes (heading, paragraph, etc.) have empty string_content
293
- # and need to use to_plaintext or concatenate children's text.
294
- if @inner_node.respond_to?(:string_content)
295
- content = @inner_node.string_content.to_s
296
- # If string_content is non-empty, use it (leaf node)
297
- return content unless content.empty?
298
- end
299
-
300
- # For container nodes, use to_plaintext or concatenate children
301
- if @inner_node.respond_to?(:to_plaintext)
302
- begin
303
- @inner_node.to_plaintext
304
- rescue
305
- children.map(&:text).join
306
- end
307
- else
308
- children.map(&:text).join
309
- end
310
- end
311
-
312
- # Get child nodes
313
- #
314
- # Markly uses first_child/next pattern
315
- #
316
- # @return [Array<Node>] Child nodes
317
- def children
318
- result = []
319
- child = begin
320
- @inner_node.first_child
321
- rescue
322
- nil
323
- end
324
- while child
325
- result << Node.new(child, @source, @lines)
326
- child = begin
327
- child.next
328
- rescue
329
- nil
330
- end
331
- end
332
- result
333
- end
334
-
335
- def each(&block)
336
- return to_enum(__method__) unless block
337
- children.each(&block)
338
- end
339
-
340
- def child_count
341
- children.size
342
- end
343
-
344
- def child(index)
345
- children[index]
346
- end
347
-
348
- # Position information
349
- # Markly provides source_position as a hash with :start_line, :start_column, :end_line, :end_column (1-based)
350
-
351
- def start_byte
352
- pos = inner_source_position
353
- line = pos[:start_line] - 1
354
- col = pos[:start_column] - 1
355
- calculate_byte_offset(line, col)
356
- end
357
-
358
- def end_byte
359
- pos = inner_source_position
360
- line = pos[:end_line] - 1
361
- col = pos[:end_column] - 1
362
- calculate_byte_offset(line, col)
363
- end
364
-
365
- def start_point
366
- pos = inner_source_position
367
- line = pos[:start_line] - 1
368
- col = pos[:start_column] - 1
369
- Point.new(line, col)
370
- end
371
-
372
- def end_point
373
- pos = inner_source_position
374
- line = pos[:end_line] - 1
375
- col = pos[:end_column] - 1
376
- Point.new(line, col)
377
- end
378
-
379
- # Get the 1-based line number where this node starts
380
- #
381
- # Markly provides 1-based line numbers via source_position hash.
382
- #
383
- # @return [Integer] 1-based line number
384
- def start_line
385
- inner_source_position[:start_line]
386
- end
387
-
388
- # Get the 1-based line number where this node ends
389
- #
390
- # @return [Integer] 1-based line number
391
- def end_line
392
- inner_source_position[:end_line]
393
- end
394
-
395
- # Get position information as a hash
396
- #
397
- # Returns a hash with 1-based line numbers and 0-based columns.
398
- # Compatible with *-merge gems' FileAnalysisBase.
399
- #
400
- # @return [Hash{Symbol => Integer}] Position hash
401
- def source_position
402
- {
403
- start_line: start_line,
404
- end_line: end_line,
405
- start_column: start_point.column,
406
- end_column: end_point.column,
407
- }
408
- end
409
-
410
- # Get the first child node
411
- #
412
- # @return [Node, nil] First child or nil
413
- def first_child
414
- children.first
415
- end
416
-
417
- def named?
418
- true
419
- end
420
-
421
- alias_method :structural?, :named?
422
-
423
- def has_error?
424
- false
425
- end
426
-
427
- def missing?
428
- false
429
- end
430
-
431
- def <=>(other)
432
- return unless other.respond_to?(:start_byte)
433
- cmp = start_byte <=> other.start_byte
434
- return cmp unless cmp&.zero?
435
- end_byte <=> other.end_byte
436
- end
437
-
438
- def inspect
439
- "#<TreeHaver::Backends::Markly::Node type=#{type} raw_type=#{raw_type}>"
440
- end
441
-
442
- # Convert node to CommonMark format
443
- #
444
- # Delegates to the inner Markly node's to_commonmark method.
445
- #
446
- # @return [String] CommonMark representation
447
- def to_commonmark
448
- @inner_node.to_commonmark
449
- end
450
-
451
- # Convert node to Markdown format
452
- #
453
- # Delegates to the inner Markly node's to_markdown method.
454
- #
455
- # @return [String] Markdown representation
456
- def to_markdown
457
- @inner_node.to_markdown
458
- end
459
-
460
- # Convert node to plain text
461
- #
462
- # Delegates to the inner Markly node's to_plaintext method.
463
- #
464
- # @return [String] Plain text representation
465
- def to_plaintext
466
- @inner_node.to_plaintext
467
- end
468
-
469
- # Convert node to HTML
470
- #
471
- # Delegates to the inner Markly node's to_html method.
472
- #
473
- # @return [String] HTML representation
474
- def to_html
475
- @inner_node.to_html
476
- end
477
-
478
- # Markly-specific methods
479
-
480
- # Get heading level (1-6)
481
- # @return [Integer, nil]
482
- def header_level
483
- return unless raw_type == "header"
484
- begin
485
- @inner_node.header_level
486
- rescue
487
- nil
488
- end
489
- end
490
-
491
- # Get fence info for code blocks
492
- # @return [String, nil]
493
- def fence_info
494
- return unless type == "code_block"
495
- begin
496
- @inner_node.fence_info
497
- rescue
498
- nil
499
- end
500
- end
501
-
502
- # Get URL for links/images
503
- # @return [String, nil]
504
- def url
505
- @inner_node.url
506
- rescue
507
- nil
508
- end
509
-
510
- # Get title for links/images
511
- # @return [String, nil]
512
- def title
513
- @inner_node.title
514
- rescue
515
- nil
516
- end
517
-
518
- # Get the next sibling (Markly uses .next)
519
- # @return [Node, nil]
520
- def next_sibling
521
- sibling = begin
522
- @inner_node.next
523
- rescue
524
- nil
525
- end
526
- sibling ? Node.new(sibling, @source, @lines) : nil
527
- end
528
-
529
- # Get the previous sibling
530
- # @return [Node, nil]
531
- def prev_sibling
532
- sibling = begin
533
- @inner_node.previous
534
- rescue
535
- nil
536
- end
537
- sibling ? Node.new(sibling, @source, @lines) : nil
538
- end
539
-
540
- # Get the parent node
541
- # @return [Node, nil]
542
- def parent
543
- p = begin
544
- @inner_node.parent
545
- rescue
546
- nil
547
- end
548
- p ? Node.new(p, @source, @lines) : nil
549
- end
550
-
551
- private
552
-
553
- def calculate_byte_offset(line, column)
554
- offset = 0
555
- @lines.each_with_index do |line_content, idx|
556
- if idx < line
557
- offset += line_content.bytesize
558
- else
559
- offset += [column, line_content.bytesize].min
560
- break
561
- end
562
- end
563
- offset
564
- end
565
- end
566
-
567
- # Point struct for position information
568
- Point = Struct.new(:row, :column) do
569
- def [](key)
570
- case key
571
- when :row, "row" then row
572
- when :column, "column" then column
573
- end
574
- end
575
-
576
- def to_h
577
- {row: row, column: column}
578
- end
579
-
580
- def to_s
581
- "(#{row}, #{column})"
582
- end
583
-
584
- def inspect
585
- "#<TreeHaver::Backends::Markly::Point row=#{row} column=#{column}>"
586
- end
587
- end
588
- end
589
- end
590
- end