tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -228,13 +228,57 @@ module TreeHaver
228
228
  # All Java backend implementation classes require JRuby and cannot be tested on MRI/CRuby.
229
229
  # JRuby-specific CI jobs would test this code.
230
230
  class Language
231
+ include Comparable
232
+
231
233
  attr_reader :impl
232
234
 
235
+ # The backend this language is for
236
+ # @return [Symbol]
237
+ attr_reader :backend
238
+
239
+ # The path this language was loaded from (if known)
240
+ # @return [String, nil]
241
+ attr_reader :path
242
+
243
+ # The symbol name (if known)
244
+ # @return [String, nil]
245
+ attr_reader :symbol
246
+
233
247
  # @api private
234
- def initialize(impl)
248
+ def initialize(impl, path: nil, symbol: nil)
235
249
  @impl = impl
250
+ @backend = :java
251
+ @path = path
252
+ @symbol = symbol
253
+ end
254
+
255
+ # Compare languages for equality
256
+ #
257
+ # Java languages are equal if they have the same backend, path, and symbol.
258
+ # Path and symbol uniquely identify a loaded language.
259
+ #
260
+ # @param other [Object] object to compare with
261
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
262
+ def <=>(other)
263
+ return unless other.is_a?(Language)
264
+ return unless other.backend == @backend
265
+
266
+ # Compare by path first, then symbol
267
+ cmp = (@path || "") <=> (other.path || "")
268
+ return cmp if cmp.nonzero?
269
+
270
+ (@symbol || "") <=> (other.symbol || "")
271
+ end
272
+
273
+ # Hash value for this language (for use in Sets/Hashes)
274
+ # @return [Integer]
275
+ def hash
276
+ [@backend, @path, @symbol].hash
236
277
  end
237
278
 
279
+ # Alias eql? to ==
280
+ alias_method :eql?, :==
281
+
238
282
  # Load a language from a shared library
239
283
  #
240
284
  # There are three ways java-tree-sitter can load shared libraries:
@@ -298,7 +342,7 @@ module TreeHaver
298
342
  combined_lookup = grammar_lookup.or(Java.runtime_lookup)
299
343
 
300
344
  java_lang = Java.java_classes[:Language].load(combined_lookup, sym)
301
- new(java_lang)
345
+ new(java_lang, path: path, symbol: symbol)
302
346
  rescue ::Java::JavaLang::RuntimeException => e
303
347
  cause = e.cause
304
348
  root_cause = cause&.cause || cause
@@ -354,7 +398,7 @@ module TreeHaver
354
398
  # java-tree-sitter's Language.load(String) searches for the language
355
399
  # in the classpath using standard naming conventions
356
400
  java_lang = Java.java_classes[:Language].load(name)
357
- new(java_lang)
401
+ new(java_lang, symbol: "tree_sitter_#{name}")
358
402
  rescue ::Java::JavaLang::RuntimeException => e
359
403
  raise TreeHaver::NotAvailable,
360
404
  "Failed to load language '#{name}': #{e.message}. " \
@@ -383,43 +427,48 @@ module TreeHaver
383
427
 
384
428
  # Set the language for this parser
385
429
  #
386
- # @param lang [Language] the language to use
430
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
431
+ # This backend receives the Language wrapper's inner impl (java Language object).
432
+ #
433
+ # @param lang [Object] the Java language object (already unwrapped)
387
434
  # @return [void]
388
435
  def language=(lang)
389
- java_lang = lang.is_a?(Language) ? lang.impl : lang
390
- @parser.language = java_lang
436
+ # lang is already unwrapped by TreeHaver::Parser
437
+ @parser.language = lang
391
438
  end
392
439
 
393
440
  # Parse source code
394
441
  #
395
442
  # @param source [String] the source code to parse
396
- # @return [TreeHaver::Tree] wrapped tree
443
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
397
444
  def parse(source)
398
445
  java_tree = @parser.parse(source)
399
- inner_tree = Tree.new(java_tree)
400
- TreeHaver::Tree.new(inner_tree, source: source)
446
+ # Return raw Java::Tree - TreeHaver::Parser will wrap it
447
+ Tree.new(java_tree)
401
448
  end
402
449
 
403
450
  # Parse source code with optional incremental parsing
404
451
  #
452
+ # Note: old_tree is already unwrapped by TreeHaver::Parser before reaching this method.
453
+ # The backend receives the raw Tree wrapper's impl, not a TreeHaver::Tree.
454
+ #
405
455
  # When old_tree is provided and has been edited, tree-sitter will reuse
406
456
  # unchanged nodes for better performance.
407
457
  #
408
- # @param old_tree [TreeHaver::Tree, nil] previous tree for incremental parsing
458
+ # @param old_tree [Tree, nil] previous backend tree for incremental parsing (already unwrapped)
409
459
  # @param source [String] the source code to parse
410
- # @return [TreeHaver::Tree] wrapped tree
460
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
411
461
  # @see https://tree-sitter.github.io/java-tree-sitter/io/github/treesitter/jtreesitter/Parser.html#parse(io.github.treesitter.jtreesitter.Tree,java.lang.String)
412
462
  def parse_string(old_tree, source)
463
+ # old_tree is already unwrapped to Tree wrapper's impl by TreeHaver::Parser
413
464
  if old_tree
414
- # Unwrap TreeHaver::Tree to get inner tree
415
- inner_old_tree = old_tree.respond_to?(:inner_tree) ? old_tree.inner_tree : old_tree
416
- java_old_tree = inner_old_tree.is_a?(Tree) ? inner_old_tree.impl : inner_old_tree
465
+ java_old_tree = old_tree.is_a?(Tree) ? old_tree.impl : old_tree
417
466
  java_tree = @parser.parse(java_old_tree, source)
418
467
  else
419
468
  java_tree = @parser.parse(source)
420
469
  end
421
- inner_tree = Tree.new(java_tree)
422
- TreeHaver::Tree.new(inner_tree, source: source)
470
+ # Return raw Java::Tree - TreeHaver::Parser will wrap it
471
+ Tree.new(java_tree)
423
472
  end
424
473
  end
425
474
 
@@ -0,0 +1,559 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Backends
5
+ # Markly backend using the Markly gem (cmark-gfm C library)
6
+ #
7
+ # This backend wraps Markly, a Ruby gem that provides bindings to
8
+ # cmark-gfm, GitHub's fork of the CommonMark C library with extensions.
9
+ #
10
+ # @note This backend only parses Markdown source code
11
+ # @see https://github.com/ioquatix/markly Markly gem
12
+ #
13
+ # @example Basic usage
14
+ # parser = TreeHaver::Parser.new
15
+ # parser.language = TreeHaver::Backends::Markly::Language.markdown(
16
+ # flags: Markly::DEFAULT,
17
+ # extensions: [:table, :strikethrough]
18
+ # )
19
+ # tree = parser.parse(markdown_source)
20
+ # root = tree.root_node
21
+ # puts root.type # => "document"
22
+ module Markly
23
+ @load_attempted = false
24
+ @loaded = false
25
+
26
+ # Check if the Markly backend is available
27
+ #
28
+ # @return [Boolean] true if markly gem is available
29
+ class << self
30
+ def available?
31
+ return @loaded if @load_attempted
32
+ @load_attempted = true
33
+ begin
34
+ require "markly"
35
+ @loaded = true
36
+ rescue LoadError
37
+ @loaded = false
38
+ end
39
+ @loaded
40
+ end
41
+
42
+ # Reset the load state (primarily for testing)
43
+ #
44
+ # @return [void]
45
+ # @api private
46
+ def reset!
47
+ @load_attempted = false
48
+ @loaded = false
49
+ end
50
+
51
+ # Get capabilities supported by this backend
52
+ #
53
+ # @return [Hash{Symbol => Object}] capability map
54
+ def capabilities
55
+ return {} unless available?
56
+ {
57
+ backend: :markly,
58
+ query: false,
59
+ bytes_field: false, # Markly uses line/column
60
+ incremental: false,
61
+ pure_ruby: false, # Uses C via FFI
62
+ markdown_only: true,
63
+ error_tolerant: true, # Markdown is forgiving
64
+ gfm_extensions: true, # Supports GitHub Flavored Markdown
65
+ }
66
+ end
67
+ end
68
+
69
+ # Markly language wrapper
70
+ #
71
+ # Markly only parses Markdown. This class exists for API compatibility
72
+ # and to pass through Markly-specific options (flags, extensions).
73
+ #
74
+ # @example
75
+ # language = TreeHaver::Backends::Markly::Language.markdown(
76
+ # flags: Markly::DEFAULT | Markly::FOOTNOTES,
77
+ # extensions: [:table, :strikethrough]
78
+ # )
79
+ # parser.language = language
80
+ class Language
81
+ include Comparable
82
+
83
+ # The language name (always :markdown for Markly)
84
+ # @return [Symbol]
85
+ attr_reader :name
86
+
87
+ # The backend this language is for
88
+ # @return [Symbol]
89
+ attr_reader :backend
90
+
91
+ # Markly parse flags
92
+ # @return [Integer]
93
+ attr_reader :flags
94
+
95
+ # Markly extensions to enable
96
+ # @return [Array<Symbol>]
97
+ attr_reader :extensions
98
+
99
+ # Create a new Markly language instance
100
+ #
101
+ # @param name [Symbol] Language name (should be :markdown)
102
+ # @param flags [Integer] Markly parse flags (default: Markly::DEFAULT)
103
+ # @param extensions [Array<Symbol>] Extensions to enable (default: [:table])
104
+ def initialize(name = :markdown, flags: nil, extensions: [:table])
105
+ @name = name.to_sym
106
+ @backend = :markly
107
+ @flags = flags # Will use Markly::DEFAULT if nil at parse time
108
+ @extensions = extensions
109
+ end
110
+
111
+ class << self
112
+ # Create a Markdown language instance
113
+ #
114
+ # @param flags [Integer] Markly parse flags
115
+ # @param extensions [Array<Symbol>] Extensions to enable
116
+ # @return [Language] Markdown language
117
+ def markdown(flags: nil, extensions: [:table])
118
+ new(:markdown, flags: flags, extensions: extensions)
119
+ end
120
+ end
121
+
122
+ # Comparison for sorting/equality
123
+ def <=>(other)
124
+ return unless other.is_a?(Language)
125
+ name <=> other.name
126
+ end
127
+
128
+ def inspect
129
+ "#<TreeHaver::Backends::Markly::Language name=#{name} flags=#{flags} extensions=#{extensions}>"
130
+ end
131
+ end
132
+
133
+ # Markly parser wrapper
134
+ class Parser
135
+ attr_accessor :language
136
+
137
+ def initialize
138
+ @language = nil
139
+ end
140
+
141
+ # Parse Markdown source code
142
+ #
143
+ # @param source [String] Markdown source to parse
144
+ # @return [Tree] Parsed tree
145
+ def parse(source)
146
+ raise "Language not set" unless @language
147
+ Markly.available? or raise "Markly not available"
148
+
149
+ flags = @language.flags || ::Markly::DEFAULT
150
+ extensions = @language.extensions || [:table]
151
+ doc = ::Markly.parse(source, flags: flags, extensions: extensions)
152
+ Tree.new(doc, source)
153
+ end
154
+
155
+ # Alias for compatibility
156
+ def parse_string(_old_tree, source)
157
+ parse(source)
158
+ end
159
+ end
160
+
161
+ # Markly tree wrapper
162
+ class Tree
163
+ attr_reader :inner_tree, :source
164
+
165
+ def initialize(document, source)
166
+ @inner_tree = document
167
+ @source = source
168
+ @lines = source.lines
169
+ end
170
+
171
+ def root_node
172
+ Node.new(@inner_tree, @source, @lines)
173
+ end
174
+
175
+ def errors
176
+ []
177
+ end
178
+
179
+ def warnings
180
+ []
181
+ end
182
+
183
+ def comments
184
+ []
185
+ end
186
+
187
+ def inspect
188
+ "#<TreeHaver::Backends::Markly::Tree>"
189
+ end
190
+ end
191
+
192
+ # Markly node wrapper
193
+ #
194
+ # Wraps Markly::Node to provide TreeHaver::Node-compatible interface.
195
+ #
196
+ # Note: Markly uses different type names than Commonmarker:
197
+ # - :header instead of :heading
198
+ # - :hrule instead of :thematic_break
199
+ # - :blockquote instead of :block_quote
200
+ # - :html instead of :html_block
201
+ class Node
202
+ include Comparable
203
+
204
+ # Type normalization map (Markly → canonical)
205
+ TYPE_MAP = {
206
+ header: "heading",
207
+ hrule: "thematic_break",
208
+ html: "html_block",
209
+ # blockquote is the same
210
+ # Most types are the same between Markly and Commonmarker
211
+ }.freeze
212
+
213
+ # Default source position for nodes that don't have position info
214
+ DEFAULT_SOURCE_POSITION = {
215
+ start_line: 1,
216
+ start_column: 1,
217
+ end_line: 1,
218
+ end_column: 1,
219
+ }.freeze
220
+
221
+ attr_reader :inner_node, :source
222
+
223
+ def initialize(node, source, lines = nil)
224
+ @inner_node = node
225
+ @source = source
226
+ @lines = lines || source.lines
227
+ end
228
+
229
+ # Get source position from the inner Markly node
230
+ #
231
+ # Markly provides source_position as a hash with :start_line, :start_column,
232
+ # :end_line, :end_column (all 1-based).
233
+ #
234
+ # @return [Hash{Symbol => Integer}] Source position from Markly
235
+ # @api private
236
+ def inner_source_position
237
+ @inner_source_position ||= if @inner_node.respond_to?(:source_position)
238
+ @inner_node.source_position || DEFAULT_SOURCE_POSITION
239
+ else
240
+ DEFAULT_SOURCE_POSITION
241
+ end
242
+ end
243
+
244
+ # Get the node type as a string
245
+ #
246
+ # Normalizes Markly types to canonical names for consistency.
247
+ #
248
+ # @return [String] Node type
249
+ def type
250
+ raw_type = @inner_node.type.to_s
251
+ TYPE_MAP[raw_type.to_sym]&.to_s || raw_type
252
+ end
253
+
254
+ alias_method :kind, :type
255
+
256
+ # Get the raw (non-normalized) type
257
+ # @return [String]
258
+ def raw_type
259
+ @inner_node.type.to_s
260
+ end
261
+
262
+ # Get the text content of this node
263
+ #
264
+ # @return [String] Node text
265
+ def text
266
+ # Markly nodes have string_content for leaf nodes
267
+ if @inner_node.respond_to?(:string_content)
268
+ @inner_node.string_content.to_s
269
+ elsif @inner_node.respond_to?(:to_plaintext)
270
+ # For container nodes, use to_plaintext or concatenate
271
+ begin
272
+ @inner_node.to_plaintext
273
+ rescue
274
+ children.map(&:text).join
275
+ end
276
+ else
277
+ children.map(&:text).join
278
+ end
279
+ end
280
+
281
+ # Get child nodes
282
+ #
283
+ # Markly uses first_child/next pattern
284
+ #
285
+ # @return [Array<Node>] Child nodes
286
+ def children
287
+ result = []
288
+ child = begin
289
+ @inner_node.first_child
290
+ rescue
291
+ nil
292
+ end
293
+ while child
294
+ result << Node.new(child, @source, @lines)
295
+ child = begin
296
+ child.next
297
+ rescue
298
+ nil
299
+ end
300
+ end
301
+ result
302
+ end
303
+
304
+ def each(&block)
305
+ return to_enum(__method__) unless block
306
+ children.each(&block)
307
+ end
308
+
309
+ def child_count
310
+ children.size
311
+ end
312
+
313
+ def child(index)
314
+ children[index]
315
+ end
316
+
317
+ # Position information
318
+ # Markly provides source_position as a hash with :start_line, :start_column, :end_line, :end_column (1-based)
319
+
320
+ def start_byte
321
+ pos = inner_source_position
322
+ line = pos[:start_line] - 1
323
+ col = pos[:start_column] - 1
324
+ calculate_byte_offset(line, col)
325
+ end
326
+
327
+ def end_byte
328
+ pos = inner_source_position
329
+ line = pos[:end_line] - 1
330
+ col = pos[:end_column] - 1
331
+ calculate_byte_offset(line, col)
332
+ end
333
+
334
+ def start_point
335
+ pos = inner_source_position
336
+ line = pos[:start_line] - 1
337
+ col = pos[:start_column] - 1
338
+ Point.new(line, col)
339
+ end
340
+
341
+ def end_point
342
+ pos = inner_source_position
343
+ line = pos[:end_line] - 1
344
+ col = pos[:end_column] - 1
345
+ Point.new(line, col)
346
+ end
347
+
348
+ # Get the 1-based line number where this node starts
349
+ #
350
+ # Markly provides 1-based line numbers via source_position hash.
351
+ #
352
+ # @return [Integer] 1-based line number
353
+ def start_line
354
+ inner_source_position[:start_line]
355
+ end
356
+
357
+ # Get the 1-based line number where this node ends
358
+ #
359
+ # @return [Integer] 1-based line number
360
+ def end_line
361
+ inner_source_position[:end_line]
362
+ end
363
+
364
+ # Get position information as a hash
365
+ #
366
+ # Returns a hash with 1-based line numbers and 0-based columns.
367
+ # Compatible with *-merge gems' FileAnalysisBase.
368
+ #
369
+ # @return [Hash{Symbol => Integer}] Position hash
370
+ def source_position
371
+ {
372
+ start_line: start_line,
373
+ end_line: end_line,
374
+ start_column: start_point.column,
375
+ end_column: end_point.column,
376
+ }
377
+ end
378
+
379
+ # Get the first child node
380
+ #
381
+ # @return [Node, nil] First child or nil
382
+ def first_child
383
+ children.first
384
+ end
385
+
386
+ def named?
387
+ true
388
+ end
389
+
390
+ alias_method :structural?, :named?
391
+
392
+ def has_error?
393
+ false
394
+ end
395
+
396
+ def missing?
397
+ false
398
+ end
399
+
400
+ def <=>(other)
401
+ return unless other.respond_to?(:start_byte)
402
+ cmp = start_byte <=> other.start_byte
403
+ return cmp unless cmp&.zero?
404
+ end_byte <=> other.end_byte
405
+ end
406
+
407
+ def inspect
408
+ "#<TreeHaver::Backends::Markly::Node type=#{type} raw_type=#{raw_type}>"
409
+ end
410
+
411
+ # Convert node to CommonMark format
412
+ #
413
+ # Delegates to the inner Markly node's to_commonmark method.
414
+ #
415
+ # @return [String] CommonMark representation
416
+ def to_commonmark
417
+ @inner_node.to_commonmark
418
+ end
419
+
420
+ # Convert node to Markdown format
421
+ #
422
+ # Delegates to the inner Markly node's to_markdown method.
423
+ #
424
+ # @return [String] Markdown representation
425
+ def to_markdown
426
+ @inner_node.to_markdown
427
+ end
428
+
429
+ # Convert node to plain text
430
+ #
431
+ # Delegates to the inner Markly node's to_plaintext method.
432
+ #
433
+ # @return [String] Plain text representation
434
+ def to_plaintext
435
+ @inner_node.to_plaintext
436
+ end
437
+
438
+ # Convert node to HTML
439
+ #
440
+ # Delegates to the inner Markly node's to_html method.
441
+ #
442
+ # @return [String] HTML representation
443
+ def to_html
444
+ @inner_node.to_html
445
+ end
446
+
447
+ # Markly-specific methods
448
+
449
+ # Get heading level (1-6)
450
+ # @return [Integer, nil]
451
+ def header_level
452
+ return unless raw_type == "header"
453
+ begin
454
+ @inner_node.header_level
455
+ rescue
456
+ nil
457
+ end
458
+ end
459
+
460
+ # Get fence info for code blocks
461
+ # @return [String, nil]
462
+ def fence_info
463
+ return unless type == "code_block"
464
+ begin
465
+ @inner_node.fence_info
466
+ rescue
467
+ nil
468
+ end
469
+ end
470
+
471
+ # Get URL for links/images
472
+ # @return [String, nil]
473
+ def url
474
+ @inner_node.url
475
+ rescue
476
+ nil
477
+ end
478
+
479
+ # Get title for links/images
480
+ # @return [String, nil]
481
+ def title
482
+ @inner_node.title
483
+ rescue
484
+ nil
485
+ end
486
+
487
+ # Get the next sibling (Markly uses .next)
488
+ # @return [Node, nil]
489
+ def next_sibling
490
+ sibling = begin
491
+ @inner_node.next
492
+ rescue
493
+ nil
494
+ end
495
+ sibling ? Node.new(sibling, @source, @lines) : nil
496
+ end
497
+
498
+ # Get the previous sibling
499
+ # @return [Node, nil]
500
+ def previous_sibling
501
+ sibling = begin
502
+ @inner_node.previous
503
+ rescue
504
+ nil
505
+ end
506
+ sibling ? Node.new(sibling, @source, @lines) : nil
507
+ end
508
+
509
+ # Get the parent node
510
+ # @return [Node, nil]
511
+ def parent
512
+ p = begin
513
+ @inner_node.parent
514
+ rescue
515
+ nil
516
+ end
517
+ p ? Node.new(p, @source, @lines) : nil
518
+ end
519
+
520
+ private
521
+
522
+ def calculate_byte_offset(line, column)
523
+ offset = 0
524
+ @lines.each_with_index do |line_content, idx|
525
+ if idx < line
526
+ offset += line_content.bytesize
527
+ else
528
+ offset += [column, line_content.bytesize].min
529
+ break
530
+ end
531
+ end
532
+ offset
533
+ end
534
+ end
535
+
536
+ # Point struct for position information
537
+ Point = Struct.new(:row, :column) do
538
+ def [](key)
539
+ case key
540
+ when :row, "row" then row
541
+ when :column, "column" then column
542
+ end
543
+ end
544
+
545
+ def to_h
546
+ {row: row, column: column}
547
+ end
548
+
549
+ def to_s
550
+ "(#{row}, #{column})"
551
+ end
552
+
553
+ def inspect
554
+ "#<TreeHaver::Backends::Markly::Point row=#{row} column=#{column}>"
555
+ end
556
+ end
557
+ end
558
+ end
559
+ end