tree_haver 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -249,6 +249,9 @@ module TreeHaver
249
249
  #
250
250
  # @api private
251
251
  class Node
252
+ include Comparable
253
+ include Enumerable
254
+
252
255
  attr_reader :match, :source
253
256
 
254
257
  def initialize(match, source)
@@ -372,6 +375,42 @@ module TreeHaver
372
375
  calculate_point(@match.offset + @match.length)
373
376
  end
374
377
 
378
+ # Get the 1-based line number where this node starts
379
+ #
380
+ # @return [Integer] 1-based line number
381
+ def start_line
382
+ start_point[:row] + 1
383
+ end
384
+
385
+ # Get the 1-based line number where this node ends
386
+ #
387
+ # @return [Integer] 1-based line number
388
+ def end_line
389
+ end_point[:row] + 1
390
+ end
391
+
392
+ # Get position information as a hash
393
+ #
394
+ # Returns a hash with 1-based line numbers and 0-based columns.
395
+ # Compatible with *-merge gems' FileAnalysisBase.
396
+ #
397
+ # @return [Hash{Symbol => Integer}] Position hash
398
+ def source_position
399
+ {
400
+ start_line: start_line,
401
+ end_line: end_line,
402
+ start_column: start_point[:column],
403
+ end_column: end_point[:column],
404
+ }
405
+ end
406
+
407
+ # Get the first child node
408
+ #
409
+ # @return [Node, nil] First child or nil
410
+ def first_child
411
+ child(0)
412
+ end
413
+
375
414
  def text
376
415
  @match.string
377
416
  end
@@ -0,0 +1,491 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Commonmarker backend using the Commonmarker gem (comrak Rust parser)
6
+ #
7
+ # This backend wraps Commonmarker, a Ruby gem that provides bindings to
8
+ # comrak, a fast CommonMark-compliant Markdown parser written in Rust.
9
+ #
10
+ # @note This backend only parses Markdown source code
11
+ # @see https://github.com/gjtorikian/commonmarker Commonmarker gem
12
+ #
13
+ # @example Basic usage
14
+ # parser = TreeHaver::Parser.new
15
+ # parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
16
+ # tree = parser.parse(markdown_source)
17
+ # root = tree.root_node
18
+ # puts root.type # => "document"
19
+ module Commonmarker
20
+ @load_attempted = false
21
+ @loaded = false
22
+
23
+ # Check if the Commonmarker backend is available
24
+ #
25
+ # @return [Boolean] true if commonmarker gem is available
26
+ class << self
27
+ def available?
28
+ return @loaded if @load_attempted
29
+ @load_attempted = true
30
+ begin
31
+ require "commonmarker"
32
+ @loaded = true
33
+ rescue LoadError
34
+ @loaded = false
35
+ end
36
+ @loaded
37
+ end
38
+
39
+ # Reset the load state (primarily for testing)
40
+ #
41
+ # @return [void]
42
+ # @api private
43
+ def reset!
44
+ @load_attempted = false
45
+ @loaded = false
46
+ end
47
+
48
+ # Get capabilities supported by this backend
49
+ #
50
+ # @return [Hash{Symbol => Object}] capability map
51
+ def capabilities
52
+ return {} unless available?
53
+ {
54
+ backend: :commonmarker,
55
+ query: false,
56
+ bytes_field: false, # Commonmarker uses line/column
57
+ incremental: false,
58
+ pure_ruby: false, # Uses Rust via FFI
59
+ markdown_only: true,
60
+ error_tolerant: true, # Markdown is forgiving
61
+ }
62
+ end
63
+ end
64
+
65
+ # Commonmarker language wrapper
66
+ #
67
+ # Commonmarker only parses Markdown. This class exists for API compatibility.
68
+ #
69
+ # @example
70
+ # language = TreeHaver::Backends::Commonmarker::Language.markdown
71
+ # parser.language = language
72
+ class Language
73
+ include Comparable
74
+
75
+ # The language name (always :markdown for Commonmarker)
76
+ # @return [Symbol]
77
+ attr_reader :name
78
+
79
+ # The backend this language is for
80
+ # @return [Symbol]
81
+ attr_reader :backend
82
+
83
+ # Commonmarker parse options
84
+ # @return [Hash]
85
+ attr_reader :options
86
+
87
+ # Create a new Commonmarker language instance
88
+ #
89
+ # @param name [Symbol] Language name (should be :markdown)
90
+ # @param options [Hash] Commonmarker parse options
91
+ def initialize(name = :markdown, options: {})
92
+ @name = name.to_sym
93
+ @backend = :commonmarker
94
+ @options = options
95
+ end
96
+
97
+ class << self
98
+ # Create a Markdown language instance
99
+ #
100
+ # @param options [Hash] Commonmarker parse options
101
+ # @return [Language] Markdown language
102
+ def markdown(options: {})
103
+ new(:markdown, options: options)
104
+ end
105
+ end
106
+
107
+ # Comparison for sorting/equality
108
+ def <=>(other)
109
+ return unless other.is_a?(Language)
110
+ name <=> other.name
111
+ end
112
+
113
+ def inspect
114
+ "#<TreeHaver::Backends::Commonmarker::Language name=#{name} options=#{options}>"
115
+ end
116
+ end
117
+
118
+ # Commonmarker parser wrapper
119
+ class Parser
120
+ attr_accessor :language
121
+
122
+ def initialize
123
+ @language = nil
124
+ end
125
+
126
+ # Parse Markdown source code
127
+ #
128
+ # @param source [String] Markdown source to parse
129
+ # @return [Tree] Parsed tree
130
+ def parse(source)
131
+ raise "Language not set" unless @language
132
+ Commonmarker.available? or raise "Commonmarker not available"
133
+
134
+ options = @language.options || {}
135
+ doc = ::Commonmarker.parse(source, options: options)
136
+ Tree.new(doc, source)
137
+ end
138
+
139
+ # Alias for compatibility
140
+ def parse_string(_old_tree, source)
141
+ parse(source)
142
+ end
143
+ end
144
+
145
+ # Commonmarker tree wrapper
146
+ class Tree
147
+ attr_reader :inner_tree, :source
148
+
149
+ def initialize(document, source)
150
+ @inner_tree = document
151
+ @source = source
152
+ @lines = source.lines
153
+ end
154
+
155
+ def root_node
156
+ Node.new(@inner_tree, @source, @lines)
157
+ end
158
+
159
+ def errors
160
+ []
161
+ end
162
+
163
+ def warnings
164
+ []
165
+ end
166
+
167
+ def comments
168
+ []
169
+ end
170
+
171
+ def inspect
172
+ "#<TreeHaver::Backends::Commonmarker::Tree>"
173
+ end
174
+ end
175
+
176
+ # Commonmarker node wrapper
177
+ #
178
+ # Wraps Commonmarker::Node to provide TreeHaver::Node-compatible interface.
179
+ class Node
180
+ include Comparable
181
+ include Enumerable
182
+
183
+ attr_reader :inner_node, :source
184
+
185
+ def initialize(node, source, lines = nil)
186
+ @inner_node = node
187
+ @source = source
188
+ @lines = lines || source.lines
189
+ end
190
+
191
+ # Get the node type as a string
192
+ #
193
+ # Commonmarker uses symbols like :document, :heading, :paragraph, etc.
194
+ #
195
+ # @return [String] Node type
196
+ def type
197
+ @inner_node.type.to_s
198
+ end
199
+
200
+ alias_method :kind, :type
201
+
202
+ # Get the text content of this node
203
+ #
204
+ # @return [String] Node text
205
+ def text
206
+ # Commonmarker nodes have string_content for text nodes
207
+ # Container nodes don't have string_content and will raise TypeError
208
+ if @inner_node.respond_to?(:string_content)
209
+ begin
210
+ @inner_node.string_content.to_s
211
+ rescue TypeError
212
+ # Container node - concatenate children's text
213
+ children.map(&:text).join
214
+ end
215
+ else
216
+ # For container nodes, concatenate children's text
217
+ children.map(&:text).join
218
+ end
219
+ end
220
+
221
+ # Get child nodes
222
+ #
223
+ # @return [Array<Node>] Child nodes
224
+ def children
225
+ return [] unless @inner_node.respond_to?(:each)
226
+
227
+ result = []
228
+ @inner_node.each { |child| result << Node.new(child, @source, @lines) }
229
+ result
230
+ end
231
+
232
+ def each(&block)
233
+ return to_enum(__method__) unless block
234
+ children.each(&block)
235
+ end
236
+
237
+ def child_count
238
+ children.size
239
+ end
240
+
241
+ def child(index)
242
+ children[index]
243
+ end
244
+
245
+ # Position information
246
+ # Commonmarker 2.x provides source_position as a hash with start_line, start_column, end_line, end_column
247
+
248
+ def start_byte
249
+ sp = start_point
250
+ calculate_byte_offset(sp.row, sp.column)
251
+ end
252
+
253
+ def end_byte
254
+ ep = end_point
255
+ calculate_byte_offset(ep.row, ep.column)
256
+ end
257
+
258
+ def start_point
259
+ if @inner_node.respond_to?(:source_position)
260
+ pos = begin
261
+ @inner_node.source_position
262
+ rescue
263
+ nil
264
+ end
265
+ if pos && pos[:start_line]
266
+ return Point.new(pos[:start_line] - 1, (pos[:start_column] || 1) - 1)
267
+ end
268
+ end
269
+ pos = begin
270
+ @inner_node.sourcepos
271
+ rescue
272
+ nil
273
+ end
274
+ return Point.new(0, 0) unless pos
275
+ Point.new(pos[0] - 1, pos[1] - 1)
276
+ end
277
+
278
+ def end_point
279
+ if @inner_node.respond_to?(:source_position)
280
+ pos = begin
281
+ @inner_node.source_position
282
+ rescue
283
+ nil
284
+ end
285
+ if pos && pos[:end_line]
286
+ return Point.new(pos[:end_line] - 1, (pos[:end_column] || 1) - 1)
287
+ end
288
+ end
289
+ pos = begin
290
+ @inner_node.sourcepos
291
+ rescue
292
+ nil
293
+ end
294
+ return Point.new(0, 0) unless pos
295
+ Point.new(pos[2] - 1, pos[3] - 1)
296
+ end
297
+
298
+ def start_line
299
+ if @inner_node.respond_to?(:source_position)
300
+ pos = begin
301
+ @inner_node.source_position
302
+ rescue
303
+ nil
304
+ end
305
+ return pos[:start_line] if pos && pos[:start_line]
306
+ end
307
+ pos = begin
308
+ @inner_node.sourcepos
309
+ rescue
310
+ nil
311
+ end
312
+ pos ? pos[0] : 1
313
+ end
314
+
315
+ def end_line
316
+ if @inner_node.respond_to?(:source_position)
317
+ pos = begin
318
+ @inner_node.source_position
319
+ rescue
320
+ nil
321
+ end
322
+ return pos[:end_line] if pos && pos[:end_line]
323
+ end
324
+ pos = begin
325
+ @inner_node.sourcepos
326
+ rescue
327
+ nil
328
+ end
329
+ pos ? pos[2] : 1
330
+ end
331
+
332
+ # Get position information as a hash
333
+ #
334
+ # Returns a hash with 1-based line numbers and 0-based columns.
335
+ # Compatible with *-merge gems' FileAnalysisBase.
336
+ #
337
+ # @return [Hash{Symbol => Integer}] Position hash
338
+ def source_position
339
+ {
340
+ start_line: start_line,
341
+ end_line: end_line,
342
+ start_column: start_point.column,
343
+ end_column: end_point.column,
344
+ }
345
+ end
346
+
347
+ # Get the first child node
348
+ #
349
+ # @return [Node, nil] First child or nil
350
+ def first_child
351
+ children.first
352
+ end
353
+
354
+ def named?
355
+ true
356
+ end
357
+
358
+ alias_method :structural?, :named?
359
+
360
+ def has_error?
361
+ false
362
+ end
363
+
364
+ def missing?
365
+ false
366
+ end
367
+
368
+ def <=>(other)
369
+ return unless other.respond_to?(:start_byte)
370
+ cmp = start_byte <=> other.start_byte
371
+ return cmp unless cmp&.zero?
372
+ end_byte <=> other.end_byte
373
+ end
374
+
375
+ def inspect
376
+ "#<TreeHaver::Backends::Commonmarker::Node type=#{type}>"
377
+ end
378
+
379
+ # Commonmarker-specific methods
380
+
381
+ # Get heading level (1-6)
382
+ # @return [Integer, nil]
383
+ def header_level
384
+ return unless type == "heading"
385
+ begin
386
+ @inner_node.header_level
387
+ rescue
388
+ nil
389
+ end
390
+ end
391
+
392
+ # Get fence info for code blocks
393
+ # @return [String, nil]
394
+ def fence_info
395
+ return unless type == "code_block"
396
+ begin
397
+ @inner_node.fence_info
398
+ rescue
399
+ nil
400
+ end
401
+ end
402
+
403
+ # Get URL for links/images
404
+ # @return [String, nil]
405
+ def url
406
+ @inner_node.url
407
+ rescue
408
+ nil
409
+ end
410
+
411
+ # Get title for links/images
412
+ # @return [String, nil]
413
+ def title
414
+ @inner_node.title
415
+ rescue
416
+ nil
417
+ end
418
+
419
+ # Get the next sibling
420
+ # @return [Node, nil]
421
+ def next_sibling
422
+ sibling = begin
423
+ @inner_node.next_sibling
424
+ rescue
425
+ nil
426
+ end
427
+ sibling ? Node.new(sibling, @source, @lines) : nil
428
+ end
429
+
430
+ # Get the previous sibling
431
+ # @return [Node, nil]
432
+ def prev_sibling
433
+ sibling = begin
434
+ @inner_node.previous_sibling
435
+ rescue
436
+ nil
437
+ end
438
+ sibling ? Node.new(sibling, @source, @lines) : nil
439
+ end
440
+
441
+ # Get the parent node
442
+ # @return [Node, nil]
443
+ def parent
444
+ p = begin
445
+ @inner_node.parent
446
+ rescue
447
+ nil
448
+ end
449
+ p ? Node.new(p, @source, @lines) : nil
450
+ end
451
+
452
+ private
453
+
454
+ def calculate_byte_offset(line, column)
455
+ offset = 0
456
+ @lines.each_with_index do |line_content, idx|
457
+ if idx < line
458
+ offset += line_content.bytesize
459
+ else
460
+ offset += [column, line_content.bytesize].min
461
+ break
462
+ end
463
+ end
464
+ offset
465
+ end
466
+ end
467
+
468
+ # Point struct for position information
469
+ Point = Struct.new(:row, :column) do
470
+ def [](key)
471
+ case key
472
+ when :row, "row" then row
473
+ when :column, "column" then column
474
+ end
475
+ end
476
+
477
+ def to_h
478
+ {row: row, column: column}
479
+ end
480
+
481
+ def to_s
482
+ "(#{row}, #{column})"
483
+ end
484
+
485
+ def inspect
486
+ "#<TreeHaver::Backends::Commonmarker::Point row=#{row} column=#{column}>"
487
+ end
488
+ end
489
+ end
490
+ end
491
+ end
@@ -24,17 +24,19 @@ module TreeHaver
24
24
  #
25
25
  # This method lazily checks for FFI gem availability to avoid
26
26
  # polluting the environment at load time.
27
- #
28
- # @return [Boolean] true if FFI gem can be loaded
29
- # @api private
30
- def self.ffi_gem_available?
31
- return @ffi_gem_available if defined?(@ffi_gem_available)
32
-
33
- @ffi_gem_available = begin
34
- require "ffi"
35
- true
36
- rescue LoadError
37
- false
27
+ class << self
28
+ # Check if the FFI gem can be loaded
29
+ # @return [Boolean] true if FFI gem can be loaded
30
+ # @api private
31
+ def ffi_gem_available?
32
+ return @ffi_gem_available if defined?(@ffi_gem_available)
33
+
34
+ @ffi_gem_available = begin
35
+ require "ffi"
36
+ true
37
+ rescue LoadError
38
+ false
39
+ end
38
40
  end
39
41
  end
40
42
 
@@ -61,10 +63,28 @@ module TreeHaver
61
63
 
62
64
  extend(::FFI::Library)
63
65
 
66
+ define_ts_point_struct!
64
67
  define_ts_node_struct!
65
68
  @ffi_extended = true
66
69
  end
67
70
 
71
+ # Define the TSPoint struct lazily
72
+ # @api private
73
+ def define_ts_point_struct!
74
+ return if const_defined?(:TSPoint, false)
75
+
76
+ # FFI struct representation of TSPoint
77
+ # Mirrors the C struct layout: struct { uint32_t row; uint32_t column; }
78
+ ts_point_class = Class.new(::FFI::Struct) do
79
+ layout :row,
80
+ :uint32,
81
+ :column,
82
+ :uint32
83
+ end
84
+ const_set(:TSPoint, ts_point_class)
85
+ typedef(ts_point_class.by_value, :ts_point)
86
+ end
87
+
68
88
  # Define the TSNode struct lazily
69
89
  # @api private
70
90
  def define_ts_node_struct!
@@ -166,8 +186,8 @@ module TreeHaver
166
186
  attach_function(:ts_node_child, [:ts_node, :uint32], :ts_node)
167
187
  attach_function(:ts_node_start_byte, [:ts_node], :uint32)
168
188
  attach_function(:ts_node_end_byte, [:ts_node], :uint32)
169
- attach_function(:ts_node_start_point, [:ts_node], :pointer)
170
- attach_function(:ts_node_end_point, [:ts_node], :pointer)
189
+ attach_function(:ts_node_start_point, [:ts_node], :ts_point)
190
+ attach_function(:ts_node_end_point, [:ts_node], :ts_point)
171
191
  attach_function(:ts_node_is_null, [:ts_node], :bool)
172
192
  attach_function(:ts_node_is_named, [:ts_node], :bool)
173
193
  end
@@ -283,7 +303,7 @@ module TreeHaver
283
303
 
284
304
  # Compare by path first, then symbol
285
305
  cmp = (@path || "") <=> (other.path || "")
286
- return cmp unless cmp.zero?
306
+ return cmp if cmp.nonzero?
287
307
 
288
308
  (@symbol || "") <=> (other.symbol || "")
289
309
  end
@@ -499,7 +519,7 @@ module TreeHaver
499
519
  ok = Native.ts_parser_set_language(@parser, ptr)
500
520
  raise TreeHaver::NotAvailable, "Failed to set language on parser" unless ok
501
521
 
502
- lang
522
+ lang # rubocop:disable Lint/Void (intentional return value)
503
523
  end
504
524
 
505
525
  # Parse source code into a syntax tree
@@ -572,6 +592,8 @@ module TreeHaver
572
592
  # Wraps a TSNode by-value struct. TSNode is passed by value in the
573
593
  # tree-sitter C API, so we store the struct value directly.
574
594
  class Node
595
+ include Enumerable
596
+
575
597
  # @api private
576
598
  # @param ts_node_value [Native::TSNode] the TSNode struct (by value)
577
599
  def initialize(ts_node_value)
@@ -619,20 +641,20 @@ module TreeHaver
619
641
 
620
642
  # Get start point
621
643
  #
622
- # @return [Object] with row and column
644
+ # @return [TreeHaver::Point] with row and column
623
645
  def start_point
624
- # FFI backend would need to implement ts_node_start_point
625
- # For now, return a simple struct
626
- Struct.new(:row, :column).new(0, Native.ts_node_start_byte(@val))
646
+ point = Native.ts_node_start_point(@val)
647
+ # TSPoint is returned by value as an FFI::Struct with :row and :column fields
648
+ TreeHaver::Point.new(point[:row], point[:column])
627
649
  end
628
650
 
629
651
  # Get end point
630
652
  #
631
- # @return [Object] with row and column
653
+ # @return [TreeHaver::Point] with row and column
632
654
  def end_point
633
- # FFI backend would need to implement ts_node_end_point
634
- # For now, return a simple struct
635
- Struct.new(:row, :column).new(0, Native.ts_node_end_byte(@val))
655
+ point = Native.ts_node_end_point(@val)
656
+ # TSPoint is returned by value as an FFI::Struct with :row and :column fields
657
+ TreeHaver::Point.new(point[:row], point[:column])
636
658
  end
637
659
 
638
660
  # Check if node has error
@@ -659,6 +681,27 @@ module TreeHaver
659
681
  end
660
682
  nil
661
683
  end
684
+
685
+ # Compare nodes for ordering (used by Comparable module)
686
+ #
687
+ # Nodes are ordered by their position in the source:
688
+ # 1. First by start_byte (earlier nodes come first)
689
+ # 2. Then by end_byte for tie-breaking (shorter spans come first)
690
+ # 3. Then by type for deterministic ordering
691
+ #
692
+ # @param other [Node] node to compare with
693
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
694
+ def <=>(other)
695
+ return unless other.is_a?(Node)
696
+
697
+ cmp = start_byte <=> other.start_byte
698
+ return cmp if cmp.nonzero?
699
+
700
+ cmp = end_byte <=> other.end_byte
701
+ return cmp if cmp.nonzero?
702
+
703
+ type <=> other.type
704
+ end
662
705
  end
663
706
  end
664
707
  end
@@ -25,7 +25,8 @@ module TreeHaver
25
25
  # jruby -e "require 'tree_haver'; puts TreeHaver::Backends::Java.available?"
26
26
  #
27
27
  # @note Only available on JRuby
28
- # @see https://tree-sitter.github.io/java-tree-sitter/ java-tree-sitter documentation
28
+ # @see https://github.com/tree-sitter/java-tree-sitter source
29
+ # @see https://tree-sitter.github.io/java-tree-sitter java-tree-sitter documentation
29
30
  # @see https://central.sonatype.com/artifact/io.github.tree-sitter/jtreesitter Maven Central
30
31
  module Java
31
32
  # The Java package for java-tree-sitter
@@ -265,7 +266,7 @@ module TreeHaver
265
266
 
266
267
  # Compare by path first, then symbol
267
268
  cmp = (@path || "") <=> (other.path || "")
268
- return cmp unless cmp.zero?
269
+ return cmp if cmp.nonzero?
269
270
 
270
271
  (@symbol || "") <=> (other.symbol || "")
271
272
  end