tree_haver 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,490 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Commonmarker backend using the Commonmarker gem (comrak Rust parser)
6
+ #
7
+ # This backend wraps Commonmarker, a Ruby gem that provides bindings to
8
+ # comrak, a fast CommonMark-compliant Markdown parser written in Rust.
9
+ #
10
+ # @note This backend only parses Markdown source code
11
+ # @see https://github.com/gjtorikian/commonmarker Commonmarker gem
12
+ #
13
+ # @example Basic usage
14
+ # parser = TreeHaver::Parser.new
15
+ # parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
16
+ # tree = parser.parse(markdown_source)
17
+ # root = tree.root_node
18
+ # puts root.type # => "document"
19
+ module Commonmarker
20
+ @load_attempted = false
21
+ @loaded = false
22
+
23
+ # Check if the Commonmarker backend is available
24
+ #
25
+ # @return [Boolean] true if commonmarker gem is available
26
+ class << self
27
+ def available?
28
+ return @loaded if @load_attempted
29
+ @load_attempted = true
30
+ begin
31
+ require "commonmarker"
32
+ @loaded = true
33
+ rescue LoadError
34
+ @loaded = false
35
+ end
36
+ @loaded
37
+ end
38
+
39
+ # Reset the load state (primarily for testing)
40
+ #
41
+ # @return [void]
42
+ # @api private
43
+ def reset!
44
+ @load_attempted = false
45
+ @loaded = false
46
+ end
47
+
48
+ # Get capabilities supported by this backend
49
+ #
50
+ # @return [Hash{Symbol => Object}] capability map
51
+ def capabilities
52
+ return {} unless available?
53
+ {
54
+ backend: :commonmarker,
55
+ query: false,
56
+ bytes_field: false, # Commonmarker uses line/column
57
+ incremental: false,
58
+ pure_ruby: false, # Uses Rust via FFI
59
+ markdown_only: true,
60
+ error_tolerant: true, # Markdown is forgiving
61
+ }
62
+ end
63
+ end
64
+
65
+ # Commonmarker language wrapper
66
+ #
67
+ # Commonmarker only parses Markdown. This class exists for API compatibility.
68
+ #
69
+ # @example
70
+ # language = TreeHaver::Backends::Commonmarker::Language.markdown
71
+ # parser.language = language
72
+ class Language
73
+ include Comparable
74
+
75
+ # The language name (always :markdown for Commonmarker)
76
+ # @return [Symbol]
77
+ attr_reader :name
78
+
79
+ # The backend this language is for
80
+ # @return [Symbol]
81
+ attr_reader :backend
82
+
83
+ # Commonmarker parse options
84
+ # @return [Hash]
85
+ attr_reader :options
86
+
87
+ # Create a new Commonmarker language instance
88
+ #
89
+ # @param name [Symbol] Language name (should be :markdown)
90
+ # @param options [Hash] Commonmarker parse options
91
+ def initialize(name = :markdown, options: {})
92
+ @name = name.to_sym
93
+ @backend = :commonmarker
94
+ @options = options
95
+ end
96
+
97
+ class << self
98
+ # Create a Markdown language instance
99
+ #
100
+ # @param options [Hash] Commonmarker parse options
101
+ # @return [Language] Markdown language
102
+ def markdown(options: {})
103
+ new(:markdown, options: options)
104
+ end
105
+ end
106
+
107
+ # Comparison for sorting/equality
108
+ def <=>(other)
109
+ return unless other.is_a?(Language)
110
+ name <=> other.name
111
+ end
112
+
113
+ def inspect
114
+ "#<TreeHaver::Backends::Commonmarker::Language name=#{name} options=#{options}>"
115
+ end
116
+ end
117
+
118
+ # Commonmarker parser wrapper
119
+ class Parser
120
+ attr_accessor :language
121
+
122
+ def initialize
123
+ @language = nil
124
+ end
125
+
126
+ # Parse Markdown source code
127
+ #
128
+ # @param source [String] Markdown source to parse
129
+ # @return [Tree] Parsed tree
130
+ def parse(source)
131
+ raise "Language not set" unless @language
132
+ Commonmarker.available? or raise "Commonmarker not available"
133
+
134
+ options = @language.options || {}
135
+ doc = ::Commonmarker.parse(source, options: options)
136
+ Tree.new(doc, source)
137
+ end
138
+
139
+ # Alias for compatibility
140
+ def parse_string(_old_tree, source)
141
+ parse(source)
142
+ end
143
+ end
144
+
145
+ # Commonmarker tree wrapper
146
+ class Tree
147
+ attr_reader :inner_tree, :source
148
+
149
+ def initialize(document, source)
150
+ @inner_tree = document
151
+ @source = source
152
+ @lines = source.lines
153
+ end
154
+
155
+ def root_node
156
+ Node.new(@inner_tree, @source, @lines)
157
+ end
158
+
159
+ def errors
160
+ []
161
+ end
162
+
163
+ def warnings
164
+ []
165
+ end
166
+
167
+ def comments
168
+ []
169
+ end
170
+
171
+ def inspect
172
+ "#<TreeHaver::Backends::Commonmarker::Tree>"
173
+ end
174
+ end
175
+
176
+ # Commonmarker node wrapper
177
+ #
178
+ # Wraps Commonmarker::Node to provide TreeHaver::Node-compatible interface.
179
+ class Node
180
+ include Comparable
181
+
182
+ attr_reader :inner_node, :source
183
+
184
+ def initialize(node, source, lines = nil)
185
+ @inner_node = node
186
+ @source = source
187
+ @lines = lines || source.lines
188
+ end
189
+
190
+ # Get the node type as a string
191
+ #
192
+ # Commonmarker uses symbols like :document, :heading, :paragraph, etc.
193
+ #
194
+ # @return [String] Node type
195
+ def type
196
+ @inner_node.type.to_s
197
+ end
198
+
199
+ alias_method :kind, :type
200
+
201
+ # Get the text content of this node
202
+ #
203
+ # @return [String] Node text
204
+ def text
205
+ # Commonmarker nodes have string_content for text nodes
206
+ # Container nodes don't have string_content and will raise TypeError
207
+ if @inner_node.respond_to?(:string_content)
208
+ begin
209
+ @inner_node.string_content.to_s
210
+ rescue TypeError
211
+ # Container node - concatenate children's text
212
+ children.map(&:text).join
213
+ end
214
+ else
215
+ # For container nodes, concatenate children's text
216
+ children.map(&:text).join
217
+ end
218
+ end
219
+
220
+ # Get child nodes
221
+ #
222
+ # @return [Array<Node>] Child nodes
223
+ def children
224
+ return [] unless @inner_node.respond_to?(:each)
225
+
226
+ result = []
227
+ @inner_node.each { |child| result << Node.new(child, @source, @lines) }
228
+ result
229
+ end
230
+
231
+ def each(&block)
232
+ return to_enum(__method__) unless block
233
+ children.each(&block)
234
+ end
235
+
236
+ def child_count
237
+ children.size
238
+ end
239
+
240
+ def child(index)
241
+ children[index]
242
+ end
243
+
244
+ # Position information
245
+ # Commonmarker 2.x provides source_position as a hash with start_line, start_column, end_line, end_column
246
+
247
+ def start_byte
248
+ sp = start_point
249
+ calculate_byte_offset(sp.row, sp.column)
250
+ end
251
+
252
+ def end_byte
253
+ ep = end_point
254
+ calculate_byte_offset(ep.row, ep.column)
255
+ end
256
+
257
+ def start_point
258
+ if @inner_node.respond_to?(:source_position)
259
+ pos = begin
260
+ @inner_node.source_position
261
+ rescue
262
+ nil
263
+ end
264
+ if pos && pos[:start_line]
265
+ return Point.new(pos[:start_line] - 1, (pos[:start_column] || 1) - 1)
266
+ end
267
+ end
268
+ pos = begin
269
+ @inner_node.sourcepos
270
+ rescue
271
+ nil
272
+ end
273
+ return Point.new(0, 0) unless pos
274
+ Point.new(pos[0] - 1, pos[1] - 1)
275
+ end
276
+
277
+ def end_point
278
+ if @inner_node.respond_to?(:source_position)
279
+ pos = begin
280
+ @inner_node.source_position
281
+ rescue
282
+ nil
283
+ end
284
+ if pos && pos[:end_line]
285
+ return Point.new(pos[:end_line] - 1, (pos[:end_column] || 1) - 1)
286
+ end
287
+ end
288
+ pos = begin
289
+ @inner_node.sourcepos
290
+ rescue
291
+ nil
292
+ end
293
+ return Point.new(0, 0) unless pos
294
+ Point.new(pos[2] - 1, pos[3] - 1)
295
+ end
296
+
297
+ def start_line
298
+ if @inner_node.respond_to?(:source_position)
299
+ pos = begin
300
+ @inner_node.source_position
301
+ rescue
302
+ nil
303
+ end
304
+ return pos[:start_line] if pos && pos[:start_line]
305
+ end
306
+ pos = begin
307
+ @inner_node.sourcepos
308
+ rescue
309
+ nil
310
+ end
311
+ pos ? pos[0] : 1
312
+ end
313
+
314
+ def end_line
315
+ if @inner_node.respond_to?(:source_position)
316
+ pos = begin
317
+ @inner_node.source_position
318
+ rescue
319
+ nil
320
+ end
321
+ return pos[:end_line] if pos && pos[:end_line]
322
+ end
323
+ pos = begin
324
+ @inner_node.sourcepos
325
+ rescue
326
+ nil
327
+ end
328
+ pos ? pos[2] : 1
329
+ end
330
+
331
+ # Get position information as a hash
332
+ #
333
+ # Returns a hash with 1-based line numbers and 0-based columns.
334
+ # Compatible with *-merge gems' FileAnalysisBase.
335
+ #
336
+ # @return [Hash{Symbol => Integer}] Position hash
337
+ def source_position
338
+ {
339
+ start_line: start_line,
340
+ end_line: end_line,
341
+ start_column: start_point.column,
342
+ end_column: end_point.column,
343
+ }
344
+ end
345
+
346
+ # Get the first child node
347
+ #
348
+ # @return [Node, nil] First child or nil
349
+ def first_child
350
+ children.first
351
+ end
352
+
353
+ def named?
354
+ true
355
+ end
356
+
357
+ alias_method :structural?, :named?
358
+
359
+ def has_error?
360
+ false
361
+ end
362
+
363
+ def missing?
364
+ false
365
+ end
366
+
367
+ def <=>(other)
368
+ return unless other.respond_to?(:start_byte)
369
+ cmp = start_byte <=> other.start_byte
370
+ return cmp unless cmp&.zero?
371
+ end_byte <=> other.end_byte
372
+ end
373
+
374
+ def inspect
375
+ "#<TreeHaver::Backends::Commonmarker::Node type=#{type}>"
376
+ end
377
+
378
+ # Commonmarker-specific methods
379
+
380
+ # Get heading level (1-6)
381
+ # @return [Integer, nil]
382
+ def header_level
383
+ return unless type == "heading"
384
+ begin
385
+ @inner_node.header_level
386
+ rescue
387
+ nil
388
+ end
389
+ end
390
+
391
+ # Get fence info for code blocks
392
+ # @return [String, nil]
393
+ def fence_info
394
+ return unless type == "code_block"
395
+ begin
396
+ @inner_node.fence_info
397
+ rescue
398
+ nil
399
+ end
400
+ end
401
+
402
+ # Get URL for links/images
403
+ # @return [String, nil]
404
+ def url
405
+ @inner_node.url
406
+ rescue
407
+ nil
408
+ end
409
+
410
+ # Get title for links/images
411
+ # @return [String, nil]
412
+ def title
413
+ @inner_node.title
414
+ rescue
415
+ nil
416
+ end
417
+
418
+ # Get the next sibling
419
+ # @return [Node, nil]
420
+ def next_sibling
421
+ sibling = begin
422
+ @inner_node.next_sibling
423
+ rescue
424
+ nil
425
+ end
426
+ sibling ? Node.new(sibling, @source, @lines) : nil
427
+ end
428
+
429
+ # Get the previous sibling
430
+ # @return [Node, nil]
431
+ def previous_sibling
432
+ sibling = begin
433
+ @inner_node.previous_sibling
434
+ rescue
435
+ nil
436
+ end
437
+ sibling ? Node.new(sibling, @source, @lines) : nil
438
+ end
439
+
440
+ # Get the parent node
441
+ # @return [Node, nil]
442
+ def parent
443
+ p = begin
444
+ @inner_node.parent
445
+ rescue
446
+ nil
447
+ end
448
+ p ? Node.new(p, @source, @lines) : nil
449
+ end
450
+
451
+ private
452
+
453
+ def calculate_byte_offset(line, column)
454
+ offset = 0
455
+ @lines.each_with_index do |line_content, idx|
456
+ if idx < line
457
+ offset += line_content.bytesize
458
+ else
459
+ offset += [column, line_content.bytesize].min
460
+ break
461
+ end
462
+ end
463
+ offset
464
+ end
465
+ end
466
+
467
+ # Point struct for position information
468
+ Point = Struct.new(:row, :column) do
469
+ def [](key)
470
+ case key
471
+ when :row, "row" then row
472
+ when :column, "column" then column
473
+ end
474
+ end
475
+
476
+ def to_h
477
+ {row: row, column: column}
478
+ end
479
+
480
+ def to_s
481
+ "(#{row}, #{column})"
482
+ end
483
+
484
+ def inspect
485
+ "#<TreeHaver::Backends::Commonmarker::Point row=#{row} column=#{column}>"
486
+ end
487
+ end
488
+ end
489
+ end
490
+ end
@@ -24,17 +24,19 @@ module TreeHaver
24
24
  #
25
25
  # This method lazily checks for FFI gem availability to avoid
26
26
  # polluting the environment at load time.
27
- #
28
- # @return [Boolean] true if FFI gem can be loaded
29
- # @api private
30
- def self.ffi_gem_available?
31
- return @ffi_gem_available if defined?(@ffi_gem_available)
32
-
33
- @ffi_gem_available = begin
34
- require "ffi"
35
- true
36
- rescue LoadError
37
- false
27
+ class << self
28
+ # Check if the FFI gem can be loaded
29
+ # @return [Boolean] true if FFI gem can be loaded
30
+ # @api private
31
+ def ffi_gem_available?
32
+ return @ffi_gem_available if defined?(@ffi_gem_available)
33
+
34
+ @ffi_gem_available = begin
35
+ require "ffi"
36
+ true
37
+ rescue LoadError
38
+ false
39
+ end
38
40
  end
39
41
  end
40
42
 
@@ -283,7 +285,7 @@ module TreeHaver
283
285
 
284
286
  # Compare by path first, then symbol
285
287
  cmp = (@path || "") <=> (other.path || "")
286
- return cmp unless cmp.zero?
288
+ return cmp if cmp.nonzero?
287
289
 
288
290
  (@symbol || "") <=> (other.symbol || "")
289
291
  end
@@ -499,7 +501,7 @@ module TreeHaver
499
501
  ok = Native.ts_parser_set_language(@parser, ptr)
500
502
  raise TreeHaver::NotAvailable, "Failed to set language on parser" unless ok
501
503
 
502
- lang
504
+ lang # rubocop:disable Lint/Void (intentional return value)
503
505
  end
504
506
 
505
507
  # Parse source code into a syntax tree
@@ -265,7 +265,7 @@ module TreeHaver
265
265
 
266
266
  # Compare by path first, then symbol
267
267
  cmp = (@path || "") <=> (other.path || "")
268
- return cmp unless cmp.zero?
268
+ return cmp if cmp.nonzero?
269
269
 
270
270
  (@symbol || "") <=> (other.symbol || "")
271
271
  end