tree_haver 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,559 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Markly backend using the Markly gem (cmark-gfm C library)
6
+ #
7
+ # This backend wraps Markly, a Ruby gem that provides bindings to
8
+ # cmark-gfm, GitHub's fork of the CommonMark C library with extensions.
9
+ #
10
+ # @note This backend only parses Markdown source code
11
+ # @see https://github.com/ioquatix/markly Markly gem
12
+ #
13
+ # @example Basic usage
14
+ # parser = TreeHaver::Parser.new
15
+ # parser.language = TreeHaver::Backends::Markly::Language.markdown(
16
+ # flags: Markly::DEFAULT,
17
+ # extensions: [:table, :strikethrough]
18
+ # )
19
+ # tree = parser.parse(markdown_source)
20
+ # root = tree.root_node
21
+ # puts root.type # => "document"
22
+ module Markly
23
+ @load_attempted = false
24
+ @loaded = false
25
+
26
+ # Check if the Markly backend is available
27
+ #
28
+ # @return [Boolean] true if markly gem is available
29
+ class << self
30
+ def available?
31
+ return @loaded if @load_attempted
32
+ @load_attempted = true
33
+ begin
34
+ require "markly"
35
+ @loaded = true
36
+ rescue LoadError
37
+ @loaded = false
38
+ end
39
+ @loaded
40
+ end
41
+
42
+ # Reset the load state (primarily for testing)
43
+ #
44
+ # @return [void]
45
+ # @api private
46
+ def reset!
47
+ @load_attempted = false
48
+ @loaded = false
49
+ end
50
+
51
+ # Get capabilities supported by this backend
52
+ #
53
+ # @return [Hash{Symbol => Object}] capability map
54
+ def capabilities
55
+ return {} unless available?
56
+ {
57
+ backend: :markly,
58
+ query: false,
59
+ bytes_field: false, # Markly uses line/column
60
+ incremental: false,
61
+ pure_ruby: false, # Uses C via FFI
62
+ markdown_only: true,
63
+ error_tolerant: true, # Markdown is forgiving
64
+ gfm_extensions: true, # Supports GitHub Flavored Markdown
65
+ }
66
+ end
67
+ end
68
+
69
+ # Markly language wrapper
70
+ #
71
+ # Markly only parses Markdown. This class exists for API compatibility
72
+ # and to pass through Markly-specific options (flags, extensions).
73
+ #
74
+ # @example
75
+ # language = TreeHaver::Backends::Markly::Language.markdown(
76
+ # flags: Markly::DEFAULT | Markly::FOOTNOTES,
77
+ # extensions: [:table, :strikethrough]
78
+ # )
79
+ # parser.language = language
80
+ class Language
81
+ include Comparable
82
+
83
+ # The language name (always :markdown for Markly)
84
+ # @return [Symbol]
85
+ attr_reader :name
86
+
87
+ # The backend this language is for
88
+ # @return [Symbol]
89
+ attr_reader :backend
90
+
91
+ # Markly parse flags
92
+ # @return [Integer]
93
+ attr_reader :flags
94
+
95
+ # Markly extensions to enable
96
+ # @return [Array<Symbol>]
97
+ attr_reader :extensions
98
+
99
+ # Create a new Markly language instance
100
+ #
101
+ # @param name [Symbol] Language name (should be :markdown)
102
+ # @param flags [Integer] Markly parse flags (default: Markly::DEFAULT)
103
+ # @param extensions [Array<Symbol>] Extensions to enable (default: [:table])
104
+ def initialize(name = :markdown, flags: nil, extensions: [:table])
105
+ @name = name.to_sym
106
+ @backend = :markly
107
+ @flags = flags # Will use Markly::DEFAULT if nil at parse time
108
+ @extensions = extensions
109
+ end
110
+
111
+ class << self
112
+ # Create a Markdown language instance
113
+ #
114
+ # @param flags [Integer] Markly parse flags
115
+ # @param extensions [Array<Symbol>] Extensions to enable
116
+ # @return [Language] Markdown language
117
+ def markdown(flags: nil, extensions: [:table])
118
+ new(:markdown, flags: flags, extensions: extensions)
119
+ end
120
+ end
121
+
122
+ # Comparison for sorting/equality
123
+ def <=>(other)
124
+ return unless other.is_a?(Language)
125
+ name <=> other.name
126
+ end
127
+
128
+ def inspect
129
+ "#<TreeHaver::Backends::Markly::Language name=#{name} flags=#{flags} extensions=#{extensions}>"
130
+ end
131
+ end
132
+
133
+ # Markly parser wrapper
134
+ class Parser
135
+ attr_accessor :language
136
+
137
+ def initialize
138
+ @language = nil
139
+ end
140
+
141
+ # Parse Markdown source code
142
+ #
143
+ # @param source [String] Markdown source to parse
144
+ # @return [Tree] Parsed tree
145
+ def parse(source)
146
+ raise "Language not set" unless @language
147
+ Markly.available? or raise "Markly not available"
148
+
149
+ flags = @language.flags || ::Markly::DEFAULT
150
+ extensions = @language.extensions || [:table]
151
+ doc = ::Markly.parse(source, flags: flags, extensions: extensions)
152
+ Tree.new(doc, source)
153
+ end
154
+
155
+ # Alias for compatibility
156
+ def parse_string(_old_tree, source)
157
+ parse(source)
158
+ end
159
+ end
160
+
161
+ # Markly tree wrapper
162
+ class Tree
163
+ attr_reader :inner_tree, :source
164
+
165
+ def initialize(document, source)
166
+ @inner_tree = document
167
+ @source = source
168
+ @lines = source.lines
169
+ end
170
+
171
+ def root_node
172
+ Node.new(@inner_tree, @source, @lines)
173
+ end
174
+
175
+ def errors
176
+ []
177
+ end
178
+
179
+ def warnings
180
+ []
181
+ end
182
+
183
+ def comments
184
+ []
185
+ end
186
+
187
+ def inspect
188
+ "#<TreeHaver::Backends::Markly::Tree>"
189
+ end
190
+ end
191
+
192
+ # Markly node wrapper
193
+ #
194
+ # Wraps Markly::Node to provide TreeHaver::Node-compatible interface.
195
+ #
196
+ # Note: Markly uses different type names than Commonmarker:
197
+ # - :header instead of :heading
198
+ # - :hrule instead of :thematic_break
199
+ # - :blockquote instead of :block_quote
200
+ # - :html instead of :html_block
201
+ class Node
202
+ include Comparable
203
+
204
+ # Type normalization map (Markly → canonical)
205
+ TYPE_MAP = {
206
+ header: "heading",
207
+ hrule: "thematic_break",
208
+ html: "html_block",
209
+ # blockquote is the same
210
+ # Most types are the same between Markly and Commonmarker
211
+ }.freeze
212
+
213
+ # Default source position for nodes that don't have position info
214
+ DEFAULT_SOURCE_POSITION = {
215
+ start_line: 1,
216
+ start_column: 1,
217
+ end_line: 1,
218
+ end_column: 1,
219
+ }.freeze
220
+
221
+ attr_reader :inner_node, :source
222
+
223
+ def initialize(node, source, lines = nil)
224
+ @inner_node = node
225
+ @source = source
226
+ @lines = lines || source.lines
227
+ end
228
+
229
+ # Get source position from the inner Markly node
230
+ #
231
+ # Markly provides source_position as a hash with :start_line, :start_column,
232
+ # :end_line, :end_column (all 1-based).
233
+ #
234
+ # @return [Hash{Symbol => Integer}] Source position from Markly
235
+ # @api private
236
+ def inner_source_position
237
+ @inner_source_position ||= if @inner_node.respond_to?(:source_position)
238
+ @inner_node.source_position || DEFAULT_SOURCE_POSITION
239
+ else
240
+ DEFAULT_SOURCE_POSITION
241
+ end
242
+ end
243
+
244
+ # Get the node type as a string
245
+ #
246
+ # Normalizes Markly types to canonical names for consistency.
247
+ #
248
+ # @return [String] Node type
249
+ def type
250
+ raw_type = @inner_node.type.to_s
251
+ TYPE_MAP[raw_type.to_sym]&.to_s || raw_type
252
+ end
253
+
254
+ alias_method :kind, :type
255
+
256
+ # Get the raw (non-normalized) type
257
+ # @return [String]
258
+ def raw_type
259
+ @inner_node.type.to_s
260
+ end
261
+
262
+ # Get the text content of this node
263
+ #
264
+ # @return [String] Node text
265
+ def text
266
+ # Markly nodes have string_content for leaf nodes
267
+ if @inner_node.respond_to?(:string_content)
268
+ @inner_node.string_content.to_s
269
+ elsif @inner_node.respond_to?(:to_plaintext)
270
+ # For container nodes, use to_plaintext or concatenate
271
+ begin
272
+ @inner_node.to_plaintext
273
+ rescue
274
+ children.map(&:text).join
275
+ end
276
+ else
277
+ children.map(&:text).join
278
+ end
279
+ end
280
+
281
+ # Get child nodes
282
+ #
283
+ # Markly uses first_child/next pattern
284
+ #
285
+ # @return [Array<Node>] Child nodes
286
+ def children
287
+ result = []
288
+ child = begin
289
+ @inner_node.first_child
290
+ rescue
291
+ nil
292
+ end
293
+ while child
294
+ result << Node.new(child, @source, @lines)
295
+ child = begin
296
+ child.next
297
+ rescue
298
+ nil
299
+ end
300
+ end
301
+ result
302
+ end
303
+
304
+ def each(&block)
305
+ return to_enum(__method__) unless block
306
+ children.each(&block)
307
+ end
308
+
309
+ def child_count
310
+ children.size
311
+ end
312
+
313
+ def child(index)
314
+ children[index]
315
+ end
316
+
317
+ # Position information
318
+ # Markly provides source_position as a hash with :start_line, :start_column, :end_line, :end_column (1-based)
319
+
320
+ def start_byte
321
+ pos = inner_source_position
322
+ line = pos[:start_line] - 1
323
+ col = pos[:start_column] - 1
324
+ calculate_byte_offset(line, col)
325
+ end
326
+
327
+ def end_byte
328
+ pos = inner_source_position
329
+ line = pos[:end_line] - 1
330
+ col = pos[:end_column] - 1
331
+ calculate_byte_offset(line, col)
332
+ end
333
+
334
+ def start_point
335
+ pos = inner_source_position
336
+ line = pos[:start_line] - 1
337
+ col = pos[:start_column] - 1
338
+ Point.new(line, col)
339
+ end
340
+
341
+ def end_point
342
+ pos = inner_source_position
343
+ line = pos[:end_line] - 1
344
+ col = pos[:end_column] - 1
345
+ Point.new(line, col)
346
+ end
347
+
348
+ # Get the 1-based line number where this node starts
349
+ #
350
+ # Markly provides 1-based line numbers via source_position hash.
351
+ #
352
+ # @return [Integer] 1-based line number
353
+ def start_line
354
+ inner_source_position[:start_line]
355
+ end
356
+
357
+ # Get the 1-based line number where this node ends
358
+ #
359
+ # @return [Integer] 1-based line number
360
+ def end_line
361
+ inner_source_position[:end_line]
362
+ end
363
+
364
+ # Get position information as a hash
365
+ #
366
+ # Returns a hash with 1-based line numbers and 0-based columns.
367
+ # Compatible with *-merge gems' FileAnalysisBase.
368
+ #
369
+ # @return [Hash{Symbol => Integer}] Position hash
370
+ def source_position
371
+ {
372
+ start_line: start_line,
373
+ end_line: end_line,
374
+ start_column: start_point.column,
375
+ end_column: end_point.column,
376
+ }
377
+ end
378
+
379
+ # Get the first child node
380
+ #
381
+ # @return [Node, nil] First child or nil
382
+ def first_child
383
+ children.first
384
+ end
385
+
386
+ def named?
387
+ true
388
+ end
389
+
390
+ alias_method :structural?, :named?
391
+
392
+ def has_error?
393
+ false
394
+ end
395
+
396
+ def missing?
397
+ false
398
+ end
399
+
400
+ def <=>(other)
401
+ return unless other.respond_to?(:start_byte)
402
+ cmp = start_byte <=> other.start_byte
403
+ return cmp unless cmp&.zero?
404
+ end_byte <=> other.end_byte
405
+ end
406
+
407
+ def inspect
408
+ "#<TreeHaver::Backends::Markly::Node type=#{type} raw_type=#{raw_type}>"
409
+ end
410
+
411
+ # Convert node to CommonMark format
412
+ #
413
+ # Delegates to the inner Markly node's to_commonmark method.
414
+ #
415
+ # @return [String] CommonMark representation
416
+ def to_commonmark
417
+ @inner_node.to_commonmark
418
+ end
419
+
420
+ # Convert node to Markdown format
421
+ #
422
+ # Delegates to the inner Markly node's to_markdown method.
423
+ #
424
+ # @return [String] Markdown representation
425
+ def to_markdown
426
+ @inner_node.to_markdown
427
+ end
428
+
429
+ # Convert node to plain text
430
+ #
431
+ # Delegates to the inner Markly node's to_plaintext method.
432
+ #
433
+ # @return [String] Plain text representation
434
+ def to_plaintext
435
+ @inner_node.to_plaintext
436
+ end
437
+
438
+ # Convert node to HTML
439
+ #
440
+ # Delegates to the inner Markly node's to_html method.
441
+ #
442
+ # @return [String] HTML representation
443
+ def to_html
444
+ @inner_node.to_html
445
+ end
446
+
447
+ # Markly-specific methods
448
+
449
+ # Get heading level (1-6)
450
+ # @return [Integer, nil]
451
+ def header_level
452
+ return unless raw_type == "header"
453
+ begin
454
+ @inner_node.header_level
455
+ rescue
456
+ nil
457
+ end
458
+ end
459
+
460
+ # Get fence info for code blocks
461
+ # @return [String, nil]
462
+ def fence_info
463
+ return unless type == "code_block"
464
+ begin
465
+ @inner_node.fence_info
466
+ rescue
467
+ nil
468
+ end
469
+ end
470
+
471
+ # Get URL for links/images
472
+ # @return [String, nil]
473
+ def url
474
+ @inner_node.url
475
+ rescue
476
+ nil
477
+ end
478
+
479
+ # Get title for links/images
480
+ # @return [String, nil]
481
+ def title
482
+ @inner_node.title
483
+ rescue
484
+ nil
485
+ end
486
+
487
+ # Get the next sibling (Markly uses .next)
488
+ # @return [Node, nil]
489
+ def next_sibling
490
+ sibling = begin
491
+ @inner_node.next
492
+ rescue
493
+ nil
494
+ end
495
+ sibling ? Node.new(sibling, @source, @lines) : nil
496
+ end
497
+
498
+ # Get the previous sibling
499
+ # @return [Node, nil]
500
+ def previous_sibling
501
+ sibling = begin
502
+ @inner_node.previous
503
+ rescue
504
+ nil
505
+ end
506
+ sibling ? Node.new(sibling, @source, @lines) : nil
507
+ end
508
+
509
+ # Get the parent node
510
+ # @return [Node, nil]
511
+ def parent
512
+ p = begin
513
+ @inner_node.parent
514
+ rescue
515
+ nil
516
+ end
517
+ p ? Node.new(p, @source, @lines) : nil
518
+ end
519
+
520
+ private
521
+
522
+ def calculate_byte_offset(line, column)
523
+ offset = 0
524
+ @lines.each_with_index do |line_content, idx|
525
+ if idx < line
526
+ offset += line_content.bytesize
527
+ else
528
+ offset += [column, line_content.bytesize].min
529
+ break
530
+ end
531
+ end
532
+ offset
533
+ end
534
+ end
535
+
536
+ # Point struct for position information
537
+ Point = Struct.new(:row, :column) do
538
+ def [](key)
539
+ case key
540
+ when :row, "row" then row
541
+ when :column, "column" then column
542
+ end
543
+ end
544
+
545
+ def to_h
546
+ {row: row, column: column}
547
+ end
548
+
549
+ def to_s
550
+ "(#{row}, #{column})"
551
+ end
552
+
553
+ def inspect
554
+ "#<TreeHaver::Backends::Markly::Point row=#{row} column=#{column}>"
555
+ end
556
+ end
557
+ end
558
+ end
559
+ end
@@ -110,7 +110,7 @@ module TreeHaver
110
110
 
111
111
  # Compare by path first, then symbol
112
112
  cmp = (@path || "") <=> (other.path || "")
113
- return cmp unless cmp.zero?
113
+ return cmp if cmp.nonzero?
114
114
 
115
115
  (@symbol || "") <=> (other.symbol || "")
116
116
  end
@@ -157,10 +157,15 @@ module TreeHaver
157
157
  rescue NameError => e
158
158
  # TreeSitter constant doesn't exist - backend not loaded
159
159
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
160
- rescue TreeSitter::TreeSitterError => e
160
+ rescue Exception => e # rubocop:disable Lint/RescueException
161
161
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
162
- # This includes: ParserNotFoundError, LanguageLoadError, SymbolNotFoundError, etc.
163
- raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
162
+ # We rescue Exception and check the class name dynamically to avoid NameError
163
+ # at parse time when TreeSitter constant isn't loaded yet
164
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
165
+ raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
166
+ else
167
+ raise # Re-raise if it's not a TreeSitter error
168
+ end
164
169
  end
165
170
 
166
171
  # Load a language from a shared library path (legacy method)
@@ -188,9 +193,15 @@ module TreeHaver
188
193
  rescue NameError => e
189
194
  # TreeSitter constant doesn't exist - backend not loaded
190
195
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
191
- rescue TreeSitter::TreeSitterError => e
196
+ rescue Exception => e # rubocop:disable Lint/RescueException
192
197
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
193
- raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
198
+ # We rescue Exception and check the class name dynamically to avoid NameError
199
+ # at parse time when TreeSitter constant isn't loaded yet
200
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
201
+ raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
202
+ else
203
+ raise # Re-raise if it's not a TreeSitter error
204
+ end
194
205
  end
195
206
 
196
207
  # Set the language for this parser
@@ -209,9 +220,15 @@ module TreeHaver
209
220
 
210
221
  # Return the language object
211
222
  lang
212
- rescue TreeSitter::TreeSitterError => e
223
+ rescue Exception => e # rubocop:disable Lint/RescueException
213
224
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
214
- raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
225
+ # We rescue Exception and check the class name dynamically to avoid NameError
226
+ # at parse time when TreeSitter constant isn't loaded yet
227
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
228
+ raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
229
+ else
230
+ raise # Re-raise if it's not a TreeSitter error
231
+ end
215
232
  end
216
233
 
217
234
  # Parse source code
@@ -228,9 +245,15 @@ module TreeHaver
228
245
  tree = @parser.parse_string(nil, source)
229
246
  raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
230
247
  tree
231
- rescue TreeSitter::TreeSitterError => e
248
+ rescue Exception => e # rubocop:disable Lint/RescueException
232
249
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
233
- raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
250
+ # We rescue Exception and check the class name dynamically to avoid NameError
251
+ # at parse time when TreeSitter constant isn't loaded yet
252
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
253
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
254
+ else
255
+ raise # Re-raise if it's not a TreeSitter error
256
+ end
234
257
  end
235
258
 
236
259
  # Parse source code with optional incremental parsing
@@ -246,9 +269,15 @@ module TreeHaver
246
269
  # old_tree is already unwrapped by TreeHaver::Parser, pass it directly
247
270
  # Return raw tree - TreeHaver::Parser will wrap it
248
271
  @parser.parse_string(old_tree, source)
249
- rescue TreeSitter::TreeSitterError => e
272
+ rescue Exception => e # rubocop:disable Lint/RescueException
250
273
  # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
251
- raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
274
+ # We rescue Exception and check the class name dynamically to avoid NameError
275
+ # at parse time when TreeSitter constant isn't loaded yet
276
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
277
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
278
+ else
279
+ raise # Re-raise if it's not a TreeSitter error
280
+ end
252
281
  end
253
282
  end
254
283
  end