tree_haver 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,560 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Markly backend using the Markly gem (cmark-gfm C library)
6
+ #
7
+ # This backend wraps Markly, a Ruby gem that provides bindings to
8
+ # cmark-gfm, GitHub's fork of the CommonMark C library with extensions.
9
+ #
10
+ # @note This backend only parses Markdown source code
11
+ # @see https://github.com/ioquatix/markly Markly gem
12
+ #
13
+ # @example Basic usage
14
+ # parser = TreeHaver::Parser.new
15
+ # parser.language = TreeHaver::Backends::Markly::Language.markdown(
16
+ # flags: Markly::DEFAULT,
17
+ # extensions: [:table, :strikethrough]
18
+ # )
19
+ # tree = parser.parse(markdown_source)
20
+ # root = tree.root_node
21
+ # puts root.type # => "document"
22
+ module Markly
23
+ @load_attempted = false
24
+ @loaded = false
25
+
26
+ # Check if the Markly backend is available
27
+ #
28
+ # @return [Boolean] true if markly gem is available
29
+ class << self
30
+ def available?
31
+ return @loaded if @load_attempted
32
+ @load_attempted = true
33
+ begin
34
+ require "markly"
35
+ @loaded = true
36
+ rescue LoadError
37
+ @loaded = false
38
+ end
39
+ @loaded
40
+ end
41
+
42
+ # Reset the load state (primarily for testing)
43
+ #
44
+ # @return [void]
45
+ # @api private
46
+ def reset!
47
+ @load_attempted = false
48
+ @loaded = false
49
+ end
50
+
51
+ # Get capabilities supported by this backend
52
+ #
53
+ # @return [Hash{Symbol => Object}] capability map
54
+ def capabilities
55
+ return {} unless available?
56
+ {
57
+ backend: :markly,
58
+ query: false,
59
+ bytes_field: false, # Markly uses line/column
60
+ incremental: false,
61
+ pure_ruby: false, # Uses C via FFI
62
+ markdown_only: true,
63
+ error_tolerant: true, # Markdown is forgiving
64
+ gfm_extensions: true, # Supports GitHub Flavored Markdown
65
+ }
66
+ end
67
+ end
68
+
69
+ # Markly language wrapper
70
+ #
71
+ # Markly only parses Markdown. This class exists for API compatibility
72
+ # and to pass through Markly-specific options (flags, extensions).
73
+ #
74
+ # @example
75
+ # language = TreeHaver::Backends::Markly::Language.markdown(
76
+ # flags: Markly::DEFAULT | Markly::FOOTNOTES,
77
+ # extensions: [:table, :strikethrough]
78
+ # )
79
+ # parser.language = language
80
+ class Language
81
+ include Comparable
82
+
83
+ # The language name (always :markdown for Markly)
84
+ # @return [Symbol]
85
+ attr_reader :name
86
+
87
+ # The backend this language is for
88
+ # @return [Symbol]
89
+ attr_reader :backend
90
+
91
+ # Markly parse flags
92
+ # @return [Integer]
93
+ attr_reader :flags
94
+
95
+ # Markly extensions to enable
96
+ # @return [Array<Symbol>]
97
+ attr_reader :extensions
98
+
99
+ # Create a new Markly language instance
100
+ #
101
+ # @param name [Symbol] Language name (should be :markdown)
102
+ # @param flags [Integer] Markly parse flags (default: Markly::DEFAULT)
103
+ # @param extensions [Array<Symbol>] Extensions to enable (default: [:table])
104
+ def initialize(name = :markdown, flags: nil, extensions: [:table])
105
+ @name = name.to_sym
106
+ @backend = :markly
107
+ @flags = flags # Will use Markly::DEFAULT if nil at parse time
108
+ @extensions = extensions
109
+ end
110
+
111
+ class << self
112
+ # Create a Markdown language instance
113
+ #
114
+ # @param flags [Integer] Markly parse flags
115
+ # @param extensions [Array<Symbol>] Extensions to enable
116
+ # @return [Language] Markdown language
117
+ def markdown(flags: nil, extensions: [:table])
118
+ new(:markdown, flags: flags, extensions: extensions)
119
+ end
120
+ end
121
+
122
+ # Comparison for sorting/equality
123
+ def <=>(other)
124
+ return unless other.is_a?(Language)
125
+ name <=> other.name
126
+ end
127
+
128
+ def inspect
129
+ "#<TreeHaver::Backends::Markly::Language name=#{name} flags=#{flags} extensions=#{extensions}>"
130
+ end
131
+ end
132
+
133
+ # Markly parser wrapper
134
+ class Parser
135
+ attr_accessor :language
136
+
137
+ def initialize
138
+ @language = nil
139
+ end
140
+
141
+ # Parse Markdown source code
142
+ #
143
+ # @param source [String] Markdown source to parse
144
+ # @return [Tree] Parsed tree
145
+ def parse(source)
146
+ raise "Language not set" unless @language
147
+ Markly.available? or raise "Markly not available"
148
+
149
+ flags = @language.flags || ::Markly::DEFAULT
150
+ extensions = @language.extensions || [:table]
151
+ doc = ::Markly.parse(source, flags: flags, extensions: extensions)
152
+ Tree.new(doc, source)
153
+ end
154
+
155
+ # Alias for compatibility
156
+ def parse_string(_old_tree, source)
157
+ parse(source)
158
+ end
159
+ end
160
+
161
+ # Markly tree wrapper
162
+ class Tree
163
+ attr_reader :inner_tree, :source
164
+
165
+ def initialize(document, source)
166
+ @inner_tree = document
167
+ @source = source
168
+ @lines = source.lines
169
+ end
170
+
171
+ def root_node
172
+ Node.new(@inner_tree, @source, @lines)
173
+ end
174
+
175
+ def errors
176
+ []
177
+ end
178
+
179
+ def warnings
180
+ []
181
+ end
182
+
183
+ def comments
184
+ []
185
+ end
186
+
187
+ def inspect
188
+ "#<TreeHaver::Backends::Markly::Tree>"
189
+ end
190
+ end
191
+
192
+ # Markly node wrapper
193
+ #
194
+ # Wraps Markly::Node to provide TreeHaver::Node-compatible interface.
195
+ #
196
+ # Note: Markly uses different type names than Commonmarker:
197
+ # - :header instead of :heading
198
+ # - :hrule instead of :thematic_break
199
+ # - :blockquote instead of :block_quote
200
+ # - :html instead of :html_block
201
+ class Node
202
+ include Comparable
203
+ include Enumerable
204
+
205
+ # Type normalization map (Markly → canonical)
206
+ TYPE_MAP = {
207
+ header: "heading",
208
+ hrule: "thematic_break",
209
+ html: "html_block",
210
+ # blockquote is the same
211
+ # Most types are the same between Markly and Commonmarker
212
+ }.freeze
213
+
214
+ # Default source position for nodes that don't have position info
215
+ DEFAULT_SOURCE_POSITION = {
216
+ start_line: 1,
217
+ start_column: 1,
218
+ end_line: 1,
219
+ end_column: 1,
220
+ }.freeze
221
+
222
+ attr_reader :inner_node, :source
223
+
224
+ def initialize(node, source, lines = nil)
225
+ @inner_node = node
226
+ @source = source
227
+ @lines = lines || source.lines
228
+ end
229
+
230
+ # Get source position from the inner Markly node
231
+ #
232
+ # Markly provides source_position as a hash with :start_line, :start_column,
233
+ # :end_line, :end_column (all 1-based).
234
+ #
235
+ # @return [Hash{Symbol => Integer}] Source position from Markly
236
+ # @api private
237
+ def inner_source_position
238
+ @inner_source_position ||= if @inner_node.respond_to?(:source_position)
239
+ @inner_node.source_position || DEFAULT_SOURCE_POSITION
240
+ else
241
+ DEFAULT_SOURCE_POSITION
242
+ end
243
+ end
244
+
245
+ # Get the node type as a string
246
+ #
247
+ # Normalizes Markly types to canonical names for consistency.
248
+ #
249
+ # @return [String] Node type
250
+ def type
251
+ raw_type = @inner_node.type.to_s
252
+ TYPE_MAP[raw_type.to_sym]&.to_s || raw_type
253
+ end
254
+
255
+ alias_method :kind, :type
256
+
257
+ # Get the raw (non-normalized) type
258
+ # @return [String]
259
+ def raw_type
260
+ @inner_node.type.to_s
261
+ end
262
+
263
+ # Get the text content of this node
264
+ #
265
+ # @return [String] Node text
266
+ def text
267
+ # Markly nodes have string_content for leaf nodes
268
+ if @inner_node.respond_to?(:string_content)
269
+ @inner_node.string_content.to_s
270
+ elsif @inner_node.respond_to?(:to_plaintext)
271
+ # For container nodes, use to_plaintext or concatenate
272
+ begin
273
+ @inner_node.to_plaintext
274
+ rescue
275
+ children.map(&:text).join
276
+ end
277
+ else
278
+ children.map(&:text).join
279
+ end
280
+ end
281
+
282
+ # Get child nodes
283
+ #
284
+ # Markly uses first_child/next pattern
285
+ #
286
+ # @return [Array<Node>] Child nodes
287
+ def children
288
+ result = []
289
+ child = begin
290
+ @inner_node.first_child
291
+ rescue
292
+ nil
293
+ end
294
+ while child
295
+ result << Node.new(child, @source, @lines)
296
+ child = begin
297
+ child.next
298
+ rescue
299
+ nil
300
+ end
301
+ end
302
+ result
303
+ end
304
+
305
+ def each(&block)
306
+ return to_enum(__method__) unless block
307
+ children.each(&block)
308
+ end
309
+
310
+ def child_count
311
+ children.size
312
+ end
313
+
314
+ def child(index)
315
+ children[index]
316
+ end
317
+
318
+ # Position information
319
+ # Markly provides source_position as a hash with :start_line, :start_column, :end_line, :end_column (1-based)
320
+
321
+ def start_byte
322
+ pos = inner_source_position
323
+ line = pos[:start_line] - 1
324
+ col = pos[:start_column] - 1
325
+ calculate_byte_offset(line, col)
326
+ end
327
+
328
+ def end_byte
329
+ pos = inner_source_position
330
+ line = pos[:end_line] - 1
331
+ col = pos[:end_column] - 1
332
+ calculate_byte_offset(line, col)
333
+ end
334
+
335
+ def start_point
336
+ pos = inner_source_position
337
+ line = pos[:start_line] - 1
338
+ col = pos[:start_column] - 1
339
+ Point.new(line, col)
340
+ end
341
+
342
+ def end_point
343
+ pos = inner_source_position
344
+ line = pos[:end_line] - 1
345
+ col = pos[:end_column] - 1
346
+ Point.new(line, col)
347
+ end
348
+
349
+ # Get the 1-based line number where this node starts
350
+ #
351
+ # Markly provides 1-based line numbers via source_position hash.
352
+ #
353
+ # @return [Integer] 1-based line number
354
+ def start_line
355
+ inner_source_position[:start_line]
356
+ end
357
+
358
+ # Get the 1-based line number where this node ends
359
+ #
360
+ # @return [Integer] 1-based line number
361
+ def end_line
362
+ inner_source_position[:end_line]
363
+ end
364
+
365
+ # Get position information as a hash
366
+ #
367
+ # Returns a hash with 1-based line numbers and 0-based columns.
368
+ # Compatible with *-merge gems' FileAnalysisBase.
369
+ #
370
+ # @return [Hash{Symbol => Integer}] Position hash
371
+ def source_position
372
+ {
373
+ start_line: start_line,
374
+ end_line: end_line,
375
+ start_column: start_point.column,
376
+ end_column: end_point.column,
377
+ }
378
+ end
379
+
380
+ # Get the first child node
381
+ #
382
+ # @return [Node, nil] First child or nil
383
+ def first_child
384
+ children.first
385
+ end
386
+
387
+ def named?
388
+ true
389
+ end
390
+
391
+ alias_method :structural?, :named?
392
+
393
+ def has_error?
394
+ false
395
+ end
396
+
397
+ def missing?
398
+ false
399
+ end
400
+
401
+ def <=>(other)
402
+ return unless other.respond_to?(:start_byte)
403
+ cmp = start_byte <=> other.start_byte
404
+ return cmp unless cmp&.zero?
405
+ end_byte <=> other.end_byte
406
+ end
407
+
408
+ def inspect
409
+ "#<TreeHaver::Backends::Markly::Node type=#{type} raw_type=#{raw_type}>"
410
+ end
411
+
412
+ # Convert node to CommonMark format
413
+ #
414
+ # Delegates to the inner Markly node's to_commonmark method.
415
+ #
416
+ # @return [String] CommonMark representation
417
+ def to_commonmark
418
+ @inner_node.to_commonmark
419
+ end
420
+
421
+ # Convert node to Markdown format
422
+ #
423
+ # Delegates to the inner Markly node's to_markdown method.
424
+ #
425
+ # @return [String] Markdown representation
426
+ def to_markdown
427
+ @inner_node.to_markdown
428
+ end
429
+
430
+ # Convert node to plain text
431
+ #
432
+ # Delegates to the inner Markly node's to_plaintext method.
433
+ #
434
+ # @return [String] Plain text representation
435
+ def to_plaintext
436
+ @inner_node.to_plaintext
437
+ end
438
+
439
+ # Convert node to HTML
440
+ #
441
+ # Delegates to the inner Markly node's to_html method.
442
+ #
443
+ # @return [String] HTML representation
444
+ def to_html
445
+ @inner_node.to_html
446
+ end
447
+
448
+ # Markly-specific methods
449
+
450
+ # Get heading level (1-6)
451
+ # @return [Integer, nil]
452
+ def header_level
453
+ return unless raw_type == "header"
454
+ begin
455
+ @inner_node.header_level
456
+ rescue
457
+ nil
458
+ end
459
+ end
460
+
461
+ # Get fence info for code blocks
462
+ # @return [String, nil]
463
+ def fence_info
464
+ return unless type == "code_block"
465
+ begin
466
+ @inner_node.fence_info
467
+ rescue
468
+ nil
469
+ end
470
+ end
471
+
472
+ # Get URL for links/images
473
+ # @return [String, nil]
474
+ def url
475
+ @inner_node.url
476
+ rescue
477
+ nil
478
+ end
479
+
480
+ # Get title for links/images
481
+ # @return [String, nil]
482
+ def title
483
+ @inner_node.title
484
+ rescue
485
+ nil
486
+ end
487
+
488
+ # Get the next sibling (Markly uses .next)
489
+ # @return [Node, nil]
490
+ def next_sibling
491
+ sibling = begin
492
+ @inner_node.next
493
+ rescue
494
+ nil
495
+ end
496
+ sibling ? Node.new(sibling, @source, @lines) : nil
497
+ end
498
+
499
+ # Get the previous sibling
500
+ # @return [Node, nil]
501
+ def prev_sibling
502
+ sibling = begin
503
+ @inner_node.previous
504
+ rescue
505
+ nil
506
+ end
507
+ sibling ? Node.new(sibling, @source, @lines) : nil
508
+ end
509
+
510
+ # Get the parent node
511
+ # @return [Node, nil]
512
+ def parent
513
+ p = begin
514
+ @inner_node.parent
515
+ rescue
516
+ nil
517
+ end
518
+ p ? Node.new(p, @source, @lines) : nil
519
+ end
520
+
521
+ private
522
+
523
+ def calculate_byte_offset(line, column)
524
+ offset = 0
525
+ @lines.each_with_index do |line_content, idx|
526
+ if idx < line
527
+ offset += line_content.bytesize
528
+ else
529
+ offset += [column, line_content.bytesize].min
530
+ break
531
+ end
532
+ end
533
+ offset
534
+ end
535
+ end
536
+
537
+ # Point struct for position information
538
+ Point = Struct.new(:row, :column) do
539
+ def [](key)
540
+ case key
541
+ when :row, "row" then row
542
+ when :column, "column" then column
543
+ end
544
+ end
545
+
546
+ def to_h
547
+ {row: row, column: column}
548
+ end
549
+
550
+ def to_s
551
+ "(#{row}, #{column})"
552
+ end
553
+
554
+ def inspect
555
+ "#<TreeHaver::Backends::Markly::Point row=#{row} column=#{column}>"
556
+ end
557
+ end
558
+ end
559
+ end
560
+ end
@@ -110,7 +110,7 @@ module TreeHaver
110
110
 
111
111
  # Compare by path first, then symbol
112
112
  cmp = (@path || "") <=> (other.path || "")
113
- return cmp unless cmp.zero?
113
+ return cmp if cmp.nonzero?
114
114
 
115
115
  (@symbol || "") <=> (other.symbol || "")
116
116
  end
@@ -157,10 +157,15 @@ module TreeHaver
157
157
  rescue NameError => e
158
158
  # TreeSitter constant doesn't exist - backend not loaded
159
159
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
160
- rescue TreeSitter::TreeSitterError => e
160
+ rescue Exception => e # rubocop:disable Lint/RescueException
161
161
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
162
- # This includes: ParserNotFoundError, LanguageLoadError, SymbolNotFoundError, etc.
163
- raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
162
+ # We rescue Exception and check the class name dynamically to avoid NameError
163
+ # at parse time when TreeSitter constant isn't loaded yet
164
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
165
+ raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
166
+ else
167
+ raise # Re-raise if it's not a TreeSitter error
168
+ end
164
169
  end
165
170
 
166
171
  # Load a language from a shared library path (legacy method)
@@ -188,9 +193,15 @@ module TreeHaver
188
193
  rescue NameError => e
189
194
  # TreeSitter constant doesn't exist - backend not loaded
190
195
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
191
- rescue TreeSitter::TreeSitterError => e
196
+ rescue Exception => e # rubocop:disable Lint/RescueException
192
197
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
193
- raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
198
+ # We rescue Exception and check the class name dynamically to avoid NameError
199
+ # at parse time when TreeSitter constant isn't loaded yet
200
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
201
+ raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
202
+ else
203
+ raise # Re-raise if it's not a TreeSitter error
204
+ end
194
205
  end
195
206
 
196
207
  # Set the language for this parser
@@ -209,9 +220,15 @@ module TreeHaver
209
220
 
210
221
  # Return the language object
211
222
  lang
212
- rescue TreeSitter::TreeSitterError => e
223
+ rescue Exception => e # rubocop:disable Lint/RescueException
213
224
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
214
- raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
225
+ # We rescue Exception and check the class name dynamically to avoid NameError
226
+ # at parse time when TreeSitter constant isn't loaded yet
227
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
228
+ raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
229
+ else
230
+ raise # Re-raise if it's not a TreeSitter error
231
+ end
215
232
  end
216
233
 
217
234
  # Parse source code
@@ -228,9 +245,15 @@ module TreeHaver
228
245
  tree = @parser.parse_string(nil, source)
229
246
  raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
230
247
  tree
231
- rescue TreeSitter::TreeSitterError => e
248
+ rescue Exception => e # rubocop:disable Lint/RescueException
232
249
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
233
- raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
250
+ # We rescue Exception and check the class name dynamically to avoid NameError
251
+ # at parse time when TreeSitter constant isn't loaded yet
252
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
253
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
254
+ else
255
+ raise # Re-raise if it's not a TreeSitter error
256
+ end
234
257
  end
235
258
 
236
259
  # Parse source code with optional incremental parsing
@@ -246,9 +269,15 @@ module TreeHaver
246
269
  # old_tree is already unwrapped by TreeHaver::Parser, pass it directly
247
270
  # Return raw tree - TreeHaver::Parser will wrap it
248
271
  @parser.parse_string(old_tree, source)
249
- rescue TreeSitter::TreeSitterError => e
272
+ rescue Exception => e # rubocop:disable Lint/RescueException
250
273
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
251
- raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
274
+ # We rescue Exception and check the class name dynamically to avoid NameError
275
+ # at parse time when TreeSitter constant isn't loaded yet
276
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
277
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
278
+ else
279
+ raise # Re-raise if it's not a TreeSitter error
280
+ end
252
281
  end
253
282
  end
254
283
  end