tree_haver 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +236 -3
- data/CONTRIBUTING.md +100 -0
- data/README.md +470 -85
- data/lib/tree_haver/backends/citrus.rb +423 -0
- data/lib/tree_haver/backends/ffi.rb +405 -150
- data/lib/tree_haver/backends/java.rb +63 -10
- data/lib/tree_haver/backends/mri.rb +154 -27
- data/lib/tree_haver/backends/rust.rb +58 -27
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +526 -0
- data/lib/tree_haver/path_validator.rb +47 -27
- data/lib/tree_haver/tree.rb +259 -0
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +741 -285
- data/sig/tree_haver/backends.rbs +68 -1
- data/sig/tree_haver/path_validator.rbs +1 -0
- data/sig/tree_haver.rbs +95 -9
- data.tar.gz.sig +0 -0
- metadata +12 -8
- metadata.gz.sig +0 -0
|
@@ -7,7 +7,7 @@ module TreeHaver
|
|
|
7
7
|
# This backend integrates with java-tree-sitter JARs on JRuby,
|
|
8
8
|
# leveraging JRuby's native Java integration for optimal performance.
|
|
9
9
|
#
|
|
10
|
-
# java-tree-sitter provides Java bindings to
|
|
10
|
+
# java-tree-sitter provides Java bindings to tree-sitter and supports:
|
|
11
11
|
# - Parsing source code into syntax trees
|
|
12
12
|
# - Incremental parsing via Parser.parse(Tree, String)
|
|
13
13
|
# - The Query API for pattern matching
|
|
@@ -228,13 +228,57 @@ module TreeHaver
|
|
|
228
228
|
# All Java backend implementation classes require JRuby and cannot be tested on MRI/CRuby.
|
|
229
229
|
# JRuby-specific CI jobs would test this code.
|
|
230
230
|
class Language
|
|
231
|
+
include Comparable
|
|
232
|
+
|
|
231
233
|
attr_reader :impl
|
|
232
234
|
|
|
235
|
+
# The backend this language is for
|
|
236
|
+
# @return [Symbol]
|
|
237
|
+
attr_reader :backend
|
|
238
|
+
|
|
239
|
+
# The path this language was loaded from (if known)
|
|
240
|
+
# @return [String, nil]
|
|
241
|
+
attr_reader :path
|
|
242
|
+
|
|
243
|
+
# The symbol name (if known)
|
|
244
|
+
# @return [String, nil]
|
|
245
|
+
attr_reader :symbol
|
|
246
|
+
|
|
233
247
|
# @api private
|
|
234
|
-
def initialize(impl)
|
|
248
|
+
def initialize(impl, path: nil, symbol: nil)
|
|
235
249
|
@impl = impl
|
|
250
|
+
@backend = :java
|
|
251
|
+
@path = path
|
|
252
|
+
@symbol = symbol
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Compare languages for equality
|
|
256
|
+
#
|
|
257
|
+
# Java languages are equal if they have the same backend, path, and symbol.
|
|
258
|
+
# Path and symbol uniquely identify a loaded language.
|
|
259
|
+
#
|
|
260
|
+
# @param other [Object] object to compare with
|
|
261
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
262
|
+
def <=>(other)
|
|
263
|
+
return unless other.is_a?(Language)
|
|
264
|
+
return unless other.backend == @backend
|
|
265
|
+
|
|
266
|
+
# Compare by path first, then symbol
|
|
267
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
268
|
+
return cmp unless cmp.zero?
|
|
269
|
+
|
|
270
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
274
|
+
# @return [Integer]
|
|
275
|
+
def hash
|
|
276
|
+
[@backend, @path, @symbol].hash
|
|
236
277
|
end
|
|
237
278
|
|
|
279
|
+
# Alias eql? to ==
|
|
280
|
+
alias_method :eql?, :==
|
|
281
|
+
|
|
238
282
|
# Load a language from a shared library
|
|
239
283
|
#
|
|
240
284
|
# There are three ways java-tree-sitter can load shared libraries:
|
|
@@ -298,7 +342,7 @@ module TreeHaver
|
|
|
298
342
|
combined_lookup = grammar_lookup.or(Java.runtime_lookup)
|
|
299
343
|
|
|
300
344
|
java_lang = Java.java_classes[:Language].load(combined_lookup, sym)
|
|
301
|
-
new(java_lang)
|
|
345
|
+
new(java_lang, path: path, symbol: symbol)
|
|
302
346
|
rescue ::Java::JavaLang::RuntimeException => e
|
|
303
347
|
cause = e.cause
|
|
304
348
|
root_cause = cause&.cause || cause
|
|
@@ -354,7 +398,7 @@ module TreeHaver
|
|
|
354
398
|
# java-tree-sitter's Language.load(String) searches for the language
|
|
355
399
|
# in the classpath using standard naming conventions
|
|
356
400
|
java_lang = Java.java_classes[:Language].load(name)
|
|
357
|
-
new(java_lang)
|
|
401
|
+
new(java_lang, symbol: "tree_sitter_#{name}")
|
|
358
402
|
rescue ::Java::JavaLang::RuntimeException => e
|
|
359
403
|
raise TreeHaver::NotAvailable,
|
|
360
404
|
"Failed to load language '#{name}': #{e.message}. " \
|
|
@@ -383,38 +427,47 @@ module TreeHaver
|
|
|
383
427
|
|
|
384
428
|
# Set the language for this parser
|
|
385
429
|
#
|
|
386
|
-
#
|
|
430
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
431
|
+
# This backend receives the Language wrapper's inner impl (java Language object).
|
|
432
|
+
#
|
|
433
|
+
# @param lang [Object] the Java language object (already unwrapped)
|
|
387
434
|
# @return [void]
|
|
388
435
|
def language=(lang)
|
|
389
|
-
|
|
390
|
-
@parser.language =
|
|
436
|
+
# lang is already unwrapped by TreeHaver::Parser
|
|
437
|
+
@parser.language = lang
|
|
391
438
|
end
|
|
392
439
|
|
|
393
440
|
# Parse source code
|
|
394
441
|
#
|
|
395
442
|
# @param source [String] the source code to parse
|
|
396
|
-
# @return [Tree]
|
|
443
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
397
444
|
def parse(source)
|
|
398
445
|
java_tree = @parser.parse(source)
|
|
446
|
+
# Return raw Java::Tree - TreeHaver::Parser will wrap it
|
|
399
447
|
Tree.new(java_tree)
|
|
400
448
|
end
|
|
401
449
|
|
|
402
450
|
# Parse source code with optional incremental parsing
|
|
403
451
|
#
|
|
452
|
+
# Note: old_tree is already unwrapped by TreeHaver::Parser before reaching this method.
|
|
453
|
+
# The backend receives the raw Tree wrapper's impl, not a TreeHaver::Tree.
|
|
454
|
+
#
|
|
404
455
|
# When old_tree is provided and has been edited, tree-sitter will reuse
|
|
405
456
|
# unchanged nodes for better performance.
|
|
406
457
|
#
|
|
407
|
-
# @param old_tree [Tree, nil] previous tree for incremental parsing
|
|
458
|
+
# @param old_tree [Tree, nil] previous backend tree for incremental parsing (already unwrapped)
|
|
408
459
|
# @param source [String] the source code to parse
|
|
409
|
-
# @return [Tree]
|
|
460
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
410
461
|
# @see https://tree-sitter.github.io/java-tree-sitter/io/github/treesitter/jtreesitter/Parser.html#parse(io.github.treesitter.jtreesitter.Tree,java.lang.String)
|
|
411
462
|
def parse_string(old_tree, source)
|
|
463
|
+
# old_tree is already unwrapped to Tree wrapper's impl by TreeHaver::Parser
|
|
412
464
|
if old_tree
|
|
413
465
|
java_old_tree = old_tree.is_a?(Tree) ? old_tree.impl : old_tree
|
|
414
466
|
java_tree = @parser.parse(java_old_tree, source)
|
|
415
467
|
else
|
|
416
468
|
java_tree = @parser.parse(source)
|
|
417
469
|
end
|
|
470
|
+
# Return raw Java::Tree - TreeHaver::Parser will wrap it
|
|
418
471
|
Tree.new(java_tree)
|
|
419
472
|
end
|
|
420
473
|
end
|
|
@@ -5,7 +5,7 @@ module TreeHaver
|
|
|
5
5
|
# MRI backend using the ruby_tree_sitter gem
|
|
6
6
|
#
|
|
7
7
|
# This backend wraps the ruby_tree_sitter gem, which is a native C extension
|
|
8
|
-
# for MRI Ruby. It provides the most feature-complete
|
|
8
|
+
# for MRI Ruby. It provides the most feature-complete tree-sitter integration
|
|
9
9
|
# on MRI, including support for the Query API.
|
|
10
10
|
#
|
|
11
11
|
# @note This backend only works on MRI Ruby, not JRuby or TruffleRuby
|
|
@@ -28,7 +28,7 @@ module TreeHaver
|
|
|
28
28
|
return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
29
29
|
@load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
30
30
|
begin
|
|
31
|
-
require "ruby_tree_sitter
|
|
31
|
+
require "tree_sitter" # Note: gem is ruby_tree_sitter but requires tree_sitter
|
|
32
32
|
|
|
33
33
|
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
34
34
|
rescue LoadError
|
|
@@ -37,6 +37,15 @@ module TreeHaver
|
|
|
37
37
|
@loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
+
# Reset the load state (primarily for testing)
|
|
41
|
+
#
|
|
42
|
+
# @return [void]
|
|
43
|
+
# @api private
|
|
44
|
+
def reset!
|
|
45
|
+
@load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
46
|
+
@loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
47
|
+
end
|
|
48
|
+
|
|
40
49
|
# Get capabilities supported by this backend
|
|
41
50
|
#
|
|
42
51
|
# @return [Hash{Symbol => Object}] capability map
|
|
@@ -56,19 +65,112 @@ module TreeHaver
|
|
|
56
65
|
|
|
57
66
|
# Wrapper for ruby_tree_sitter Language
|
|
58
67
|
#
|
|
59
|
-
#
|
|
68
|
+
# Wraps ::TreeSitter::Language from ruby_tree_sitter to provide a consistent
|
|
69
|
+
# API across all backends.
|
|
60
70
|
class Language
|
|
61
|
-
|
|
71
|
+
include Comparable
|
|
72
|
+
|
|
73
|
+
# The wrapped TreeSitter::Language object
|
|
74
|
+
# @return [::TreeSitter::Language]
|
|
75
|
+
attr_reader :inner_language
|
|
76
|
+
|
|
77
|
+
# The backend this language is for
|
|
78
|
+
# @return [Symbol]
|
|
79
|
+
attr_reader :backend
|
|
80
|
+
|
|
81
|
+
# The path this language was loaded from (if known)
|
|
82
|
+
# @return [String, nil]
|
|
83
|
+
attr_reader :path
|
|
84
|
+
|
|
85
|
+
# The symbol name (if known)
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
attr_reader :symbol
|
|
88
|
+
|
|
89
|
+
# @api private
|
|
90
|
+
# @param lang [::TreeSitter::Language] the language object from ruby_tree_sitter
|
|
91
|
+
# @param path [String, nil] path language was loaded from
|
|
92
|
+
# @param symbol [String, nil] symbol name
|
|
93
|
+
def initialize(lang, path: nil, symbol: nil)
|
|
94
|
+
@inner_language = lang
|
|
95
|
+
@backend = :mri
|
|
96
|
+
@path = path
|
|
97
|
+
@symbol = symbol
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Compare languages for equality
|
|
101
|
+
#
|
|
102
|
+
# MRI languages are equal if they have the same backend, path, and symbol.
|
|
103
|
+
# Path and symbol uniquely identify a loaded language.
|
|
104
|
+
#
|
|
105
|
+
# @param other [Object] object to compare with
|
|
106
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
107
|
+
def <=>(other)
|
|
108
|
+
return unless other.is_a?(Language)
|
|
109
|
+
return unless other.backend == @backend
|
|
110
|
+
|
|
111
|
+
# Compare by path first, then symbol
|
|
112
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
113
|
+
return cmp unless cmp.zero?
|
|
114
|
+
|
|
115
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
119
|
+
# @return [Integer]
|
|
120
|
+
def hash
|
|
121
|
+
[@backend, @path, @symbol].hash
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Alias eql? to ==
|
|
125
|
+
alias_method :eql?, :==
|
|
126
|
+
|
|
127
|
+
# Convert to the underlying TreeSitter::Language for passing to parser
|
|
128
|
+
#
|
|
129
|
+
# @return [::TreeSitter::Language]
|
|
130
|
+
def to_language
|
|
131
|
+
@inner_language
|
|
132
|
+
end
|
|
133
|
+
alias_method :to_ts_language, :to_language
|
|
134
|
+
|
|
135
|
+
# Load a language from a shared library (preferred method)
|
|
62
136
|
#
|
|
63
137
|
# @param path [String] absolute path to the language shared library
|
|
64
|
-
# @
|
|
138
|
+
# @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
|
|
139
|
+
# @param name [String, nil] optional language name (unused by MRI backend)
|
|
140
|
+
# @return [Language] wrapped language handle
|
|
65
141
|
# @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
|
|
66
142
|
# @example
|
|
67
|
-
# lang = TreeHaver::Backends::MRI::Language.
|
|
143
|
+
# lang = TreeHaver::Backends::MRI::Language.from_library("/path/to/lib.so", symbol: "tree_sitter_json")
|
|
68
144
|
class << self
|
|
69
|
-
def
|
|
145
|
+
def from_library(path, symbol: nil, name: nil)
|
|
70
146
|
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
|
|
71
|
-
|
|
147
|
+
|
|
148
|
+
# ruby_tree_sitter's TreeSitter::Language.load takes (language_name, path_to_so)
|
|
149
|
+
# where language_name is the language identifier (e.g., "toml", "json")
|
|
150
|
+
# NOT the full symbol name (e.g., NOT "tree_sitter_toml")
|
|
151
|
+
# and path_to_so is the full path to the .so file
|
|
152
|
+
#
|
|
153
|
+
# If name is not provided, derive it from symbol by stripping "tree_sitter_" prefix
|
|
154
|
+
language_name = name || symbol&.sub(/\Atree_sitter_/, "")
|
|
155
|
+
ts_lang = ::TreeSitter::Language.load(language_name, path)
|
|
156
|
+
new(ts_lang, path: path, symbol: symbol)
|
|
157
|
+
rescue NameError => e
|
|
158
|
+
# TreeSitter constant doesn't exist - backend not loaded
|
|
159
|
+
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
|
|
160
|
+
rescue TreeSitter::TreeSitterError => e
|
|
161
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
162
|
+
# This includes: ParserNotFoundError, LanguageLoadError, SymbolNotFoundError, etc.
|
|
163
|
+
raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Load a language from a shared library path (legacy method)
|
|
167
|
+
#
|
|
168
|
+
# @param path [String] absolute path to the language shared library
|
|
169
|
+
# @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
|
|
170
|
+
# @return [Language] wrapped language handle
|
|
171
|
+
# @deprecated Use {from_library} instead
|
|
172
|
+
def from_path(path, symbol: nil)
|
|
173
|
+
from_library(path, symbol: symbol)
|
|
72
174
|
end
|
|
73
175
|
end
|
|
74
176
|
end
|
|
@@ -83,47 +185,72 @@ module TreeHaver
|
|
|
83
185
|
def initialize
|
|
84
186
|
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
|
|
85
187
|
@parser = ::TreeSitter::Parser.new
|
|
188
|
+
rescue NameError => e
|
|
189
|
+
# TreeSitter constant doesn't exist - backend not loaded
|
|
190
|
+
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
|
|
191
|
+
rescue TreeSitter::TreeSitterError => e
|
|
192
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
193
|
+
raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
|
|
86
194
|
end
|
|
87
195
|
|
|
88
196
|
# Set the language for this parser
|
|
89
197
|
#
|
|
90
|
-
#
|
|
198
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
199
|
+
# This backend receives raw ::TreeSitter::Language objects, never wrapped ones.
|
|
200
|
+
#
|
|
201
|
+
# @param lang [::TreeSitter::Language] the language to use (already unwrapped)
|
|
91
202
|
# @return [::TreeSitter::Language] the language that was set
|
|
203
|
+
# @raise [TreeHaver::NotAvailable] if setting language fails
|
|
92
204
|
def language=(lang)
|
|
205
|
+
# lang is already unwrapped by TreeHaver::Parser, use directly
|
|
93
206
|
@parser.language = lang
|
|
207
|
+
# Verify it was set
|
|
208
|
+
raise TreeHaver::NotAvailable, "Language not set correctly" if @parser.language.nil?
|
|
209
|
+
|
|
210
|
+
# Return the language object
|
|
211
|
+
lang
|
|
212
|
+
rescue TreeSitter::TreeSitterError => e
|
|
213
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
214
|
+
raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
|
|
94
215
|
end
|
|
95
216
|
|
|
96
217
|
# Parse source code
|
|
97
218
|
#
|
|
219
|
+
# ruby_tree_sitter provides parse_string for string input
|
|
220
|
+
#
|
|
98
221
|
# @param source [String] the source code to parse
|
|
99
|
-
# @return [::TreeSitter::Tree]
|
|
222
|
+
# @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
|
|
223
|
+
# @raise [TreeHaver::NotAvailable] if parsing returns nil (usually means language not set)
|
|
100
224
|
def parse(source)
|
|
101
|
-
|
|
225
|
+
# ruby_tree_sitter's parse_string(old_tree, string) method
|
|
226
|
+
# Pass nil for old_tree (initial parse)
|
|
227
|
+
# Return raw tree - TreeHaver::Parser will wrap it
|
|
228
|
+
tree = @parser.parse_string(nil, source)
|
|
229
|
+
raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
|
|
230
|
+
tree
|
|
231
|
+
rescue TreeSitter::TreeSitterError => e
|
|
232
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
233
|
+
raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
|
|
102
234
|
end
|
|
103
235
|
|
|
104
236
|
# Parse source code with optional incremental parsing
|
|
105
237
|
#
|
|
106
|
-
#
|
|
238
|
+
# Note: old_tree should already be unwrapped by TreeHaver::Parser before reaching this method.
|
|
239
|
+
# The backend receives the raw inner tree (::TreeSitter::Tree or nil), not a wrapped TreeHaver::Tree.
|
|
240
|
+
#
|
|
241
|
+
# @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing (already unwrapped)
|
|
107
242
|
# @param source [String] the source code to parse
|
|
108
|
-
# @return [::TreeSitter::Tree]
|
|
243
|
+
# @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
|
|
244
|
+
# @raise [TreeHaver::NotAvailable] if parsing fails
|
|
109
245
|
def parse_string(old_tree, source)
|
|
246
|
+
# old_tree is already unwrapped by TreeHaver::Parser, pass it directly
|
|
247
|
+
# Return raw tree - TreeHaver::Parser will wrap it
|
|
110
248
|
@parser.parse_string(old_tree, source)
|
|
249
|
+
rescue TreeSitter::TreeSitterError => e
|
|
250
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
251
|
+
raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
|
|
111
252
|
end
|
|
112
253
|
end
|
|
113
|
-
|
|
114
|
-
# Wrapper for ruby_tree_sitter Tree
|
|
115
|
-
#
|
|
116
|
-
# Not used directly; TreeHaver passes through ::TreeSitter::Tree objects.
|
|
117
|
-
class Tree
|
|
118
|
-
# Not used directly; we pass through ruby_tree_sitter::Tree
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
# Wrapper for ruby_tree_sitter Node
|
|
122
|
-
#
|
|
123
|
-
# Not used directly; TreeHaver passes through ::TreeSitter::Node objects.
|
|
124
|
-
class Node
|
|
125
|
-
# Not used directly; we pass through ruby_tree_sitter::Node
|
|
126
|
-
end
|
|
127
254
|
end
|
|
128
255
|
end
|
|
129
256
|
end
|
|
@@ -5,7 +5,7 @@ module TreeHaver
|
|
|
5
5
|
# Rust backend using the tree_stump gem
|
|
6
6
|
#
|
|
7
7
|
# This backend wraps the tree_stump gem, which provides Ruby bindings to
|
|
8
|
-
#
|
|
8
|
+
# tree-sitter written in Rust. It offers native performance with Rust's
|
|
9
9
|
# safety guarantees and includes precompiled binaries for common platforms.
|
|
10
10
|
#
|
|
11
11
|
# tree_stump supports incremental parsing and the Query API, making it
|
|
@@ -54,14 +54,14 @@ module TreeHaver
|
|
|
54
54
|
# @return [Hash{Symbol => Object}] capability map
|
|
55
55
|
# @example
|
|
56
56
|
# TreeHaver::Backends::Rust.capabilities
|
|
57
|
-
# # => { backend: :rust, query: true, bytes_field: true, incremental:
|
|
57
|
+
# # => { backend: :rust, query: true, bytes_field: true, incremental: false }
|
|
58
58
|
def capabilities
|
|
59
59
|
return {} unless available?
|
|
60
60
|
{
|
|
61
61
|
backend: :rust,
|
|
62
62
|
query: true,
|
|
63
63
|
bytes_field: true,
|
|
64
|
-
incremental:
|
|
64
|
+
incremental: false, # TreeStump doesn't currently expose incremental parsing to Ruby
|
|
65
65
|
}
|
|
66
66
|
end
|
|
67
67
|
end
|
|
@@ -72,16 +72,52 @@ module TreeHaver
|
|
|
72
72
|
# tree_stump uses a registration-based API where languages are registered
|
|
73
73
|
# by name, then referenced by that name when setting parser language.
|
|
74
74
|
class Language
|
|
75
|
+
include Comparable
|
|
76
|
+
|
|
75
77
|
# The registered language name
|
|
76
78
|
# @return [String]
|
|
77
79
|
attr_reader :name
|
|
78
80
|
|
|
81
|
+
# The backend this language is for
|
|
82
|
+
# @return [Symbol]
|
|
83
|
+
attr_reader :backend
|
|
84
|
+
|
|
85
|
+
# The path this language was loaded from (if known)
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
attr_reader :path
|
|
88
|
+
|
|
79
89
|
# @api private
|
|
80
90
|
# @param name [String] the registered language name
|
|
81
|
-
|
|
91
|
+
# @param path [String, nil] path language was loaded from
|
|
92
|
+
def initialize(name, path: nil)
|
|
82
93
|
@name = name
|
|
94
|
+
@backend = :rust
|
|
95
|
+
@path = path
|
|
83
96
|
end
|
|
84
97
|
|
|
98
|
+
# Compare languages for equality
|
|
99
|
+
#
|
|
100
|
+
# Rust languages are equal if they have the same backend and name.
|
|
101
|
+
# Name uniquely identifies a registered language in TreeStump.
|
|
102
|
+
#
|
|
103
|
+
# @param other [Object] object to compare with
|
|
104
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
105
|
+
def <=>(other)
|
|
106
|
+
return unless other.is_a?(Language)
|
|
107
|
+
return unless other.backend == @backend
|
|
108
|
+
|
|
109
|
+
@name <=> other.name
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
113
|
+
# @return [Integer]
|
|
114
|
+
def hash
|
|
115
|
+
[@backend, @name].hash
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Alias eql? to ==
|
|
119
|
+
alias_method :eql?, :==
|
|
120
|
+
|
|
85
121
|
# Load a language from a shared library path
|
|
86
122
|
#
|
|
87
123
|
# @param path [String] absolute path to the language shared library
|
|
@@ -102,7 +138,7 @@ module TreeHaver
|
|
|
102
138
|
# The name is used to derive the symbol automatically (tree_sitter_<name>)
|
|
103
139
|
lang_name = name || File.basename(path, ".*").sub(/^libtree-sitter-/, "")
|
|
104
140
|
::TreeStump.register_lang(lang_name, path)
|
|
105
|
-
new(lang_name)
|
|
141
|
+
new(lang_name, path: path)
|
|
106
142
|
rescue RuntimeError => e
|
|
107
143
|
raise TreeHaver::NotAvailable, "Failed to load language from #{path}: #{e.message}"
|
|
108
144
|
end
|
|
@@ -128,11 +164,16 @@ module TreeHaver
|
|
|
128
164
|
|
|
129
165
|
# Set the language for this parser
|
|
130
166
|
#
|
|
131
|
-
#
|
|
167
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
168
|
+
# When called from TreeHaver::Parser, receives String (language name).
|
|
169
|
+
# For backward compatibility and backend tests, also handles Language wrapper.
|
|
170
|
+
#
|
|
171
|
+
# @param lang [Language, String] the language wrapper or name string
|
|
132
172
|
# @return [Language, String] the language that was set
|
|
133
173
|
def language=(lang)
|
|
134
|
-
#
|
|
174
|
+
# Extract language name (handle both wrapper and raw string)
|
|
135
175
|
lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
|
|
176
|
+
# tree_stump uses set_language with a string name
|
|
136
177
|
@parser.set_language(lang_name)
|
|
137
178
|
lang
|
|
138
179
|
end
|
|
@@ -140,36 +181,26 @@ module TreeHaver
|
|
|
140
181
|
# Parse source code
|
|
141
182
|
#
|
|
142
183
|
# @param source [String] the source code to parse
|
|
143
|
-
# @return [
|
|
184
|
+
# @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
144
185
|
def parse(source)
|
|
186
|
+
# Return raw tree_stump tree - TreeHaver::Parser will wrap it
|
|
145
187
|
@parser.parse(source)
|
|
146
188
|
end
|
|
147
189
|
|
|
148
190
|
# Parse source code with optional incremental parsing
|
|
149
191
|
#
|
|
150
|
-
#
|
|
192
|
+
# Note: TreeStump does not currently expose incremental parsing to Ruby.
|
|
193
|
+
# The parse method always does a full parse, ignoring old_tree.
|
|
194
|
+
#
|
|
195
|
+
# @param old_tree [TreeHaver::Tree, nil] previous tree for incremental parsing (ignored)
|
|
151
196
|
# @param source [String] the source code to parse
|
|
152
|
-
# @return [
|
|
153
|
-
def parse_string(old_tree, source)
|
|
154
|
-
#
|
|
155
|
-
#
|
|
197
|
+
# @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
198
|
+
def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
|
|
199
|
+
# TreeStump's parse method only accepts source as a single argument
|
|
200
|
+
# and internally always passes None for the old tree (no incremental parsing support)
|
|
156
201
|
@parser.parse(source)
|
|
157
202
|
end
|
|
158
203
|
end
|
|
159
|
-
|
|
160
|
-
# Wrapper for tree_stump Tree
|
|
161
|
-
#
|
|
162
|
-
# Not used directly; TreeHaver passes through tree_stump Tree objects.
|
|
163
|
-
class Tree
|
|
164
|
-
# Not used directly; we pass through tree_stump::Tree
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
# Wrapper for tree_stump Node
|
|
168
|
-
#
|
|
169
|
-
# Not used directly; TreeHaver passes through tree_stump::Node objects.
|
|
170
|
-
class Node
|
|
171
|
-
# Not used directly; we pass through tree_stump::Node
|
|
172
|
-
end
|
|
173
204
|
end
|
|
174
205
|
end
|
|
175
206
|
end
|