tree_haver 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +190 -1
- data/CONTRIBUTING.md +100 -0
- data/README.md +342 -11
- data/lib/tree_haver/backends/citrus.rb +141 -20
- data/lib/tree_haver/backends/ffi.rb +338 -141
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/mri.rb +154 -17
- data/lib/tree_haver/backends/rust.rb +59 -16
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +150 -0
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +697 -56
- data.tar.gz.sig +0 -0
- metadata +5 -4
- metadata.gz.sig +0 -0
|
@@ -228,13 +228,57 @@ module TreeHaver
|
|
|
228
228
|
# All Java backend implementation classes require JRuby and cannot be tested on MRI/CRuby.
|
|
229
229
|
# JRuby-specific CI jobs would test this code.
|
|
230
230
|
class Language
|
|
231
|
+
include Comparable
|
|
232
|
+
|
|
231
233
|
attr_reader :impl
|
|
232
234
|
|
|
235
|
+
# The backend this language is for
|
|
236
|
+
# @return [Symbol]
|
|
237
|
+
attr_reader :backend
|
|
238
|
+
|
|
239
|
+
# The path this language was loaded from (if known)
|
|
240
|
+
# @return [String, nil]
|
|
241
|
+
attr_reader :path
|
|
242
|
+
|
|
243
|
+
# The symbol name (if known)
|
|
244
|
+
# @return [String, nil]
|
|
245
|
+
attr_reader :symbol
|
|
246
|
+
|
|
233
247
|
# @api private
|
|
234
|
-
def initialize(impl)
|
|
248
|
+
def initialize(impl, path: nil, symbol: nil)
|
|
235
249
|
@impl = impl
|
|
250
|
+
@backend = :java
|
|
251
|
+
@path = path
|
|
252
|
+
@symbol = symbol
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Compare languages for equality
|
|
256
|
+
#
|
|
257
|
+
# Java languages are equal if they have the same backend, path, and symbol.
|
|
258
|
+
# Path and symbol uniquely identify a loaded language.
|
|
259
|
+
#
|
|
260
|
+
# @param other [Object] object to compare with
|
|
261
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
262
|
+
def <=>(other)
|
|
263
|
+
return unless other.is_a?(Language)
|
|
264
|
+
return unless other.backend == @backend
|
|
265
|
+
|
|
266
|
+
# Compare by path first, then symbol
|
|
267
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
268
|
+
return cmp unless cmp.zero?
|
|
269
|
+
|
|
270
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
274
|
+
# @return [Integer]
|
|
275
|
+
def hash
|
|
276
|
+
[@backend, @path, @symbol].hash
|
|
236
277
|
end
|
|
237
278
|
|
|
279
|
+
# Alias eql? to ==
|
|
280
|
+
alias_method :eql?, :==
|
|
281
|
+
|
|
238
282
|
# Load a language from a shared library
|
|
239
283
|
#
|
|
240
284
|
# There are three ways java-tree-sitter can load shared libraries:
|
|
@@ -298,7 +342,7 @@ module TreeHaver
|
|
|
298
342
|
combined_lookup = grammar_lookup.or(Java.runtime_lookup)
|
|
299
343
|
|
|
300
344
|
java_lang = Java.java_classes[:Language].load(combined_lookup, sym)
|
|
301
|
-
new(java_lang)
|
|
345
|
+
new(java_lang, path: path, symbol: symbol)
|
|
302
346
|
rescue ::Java::JavaLang::RuntimeException => e
|
|
303
347
|
cause = e.cause
|
|
304
348
|
root_cause = cause&.cause || cause
|
|
@@ -354,7 +398,7 @@ module TreeHaver
|
|
|
354
398
|
# java-tree-sitter's Language.load(String) searches for the language
|
|
355
399
|
# in the classpath using standard naming conventions
|
|
356
400
|
java_lang = Java.java_classes[:Language].load(name)
|
|
357
|
-
new(java_lang)
|
|
401
|
+
new(java_lang, symbol: "tree_sitter_#{name}")
|
|
358
402
|
rescue ::Java::JavaLang::RuntimeException => e
|
|
359
403
|
raise TreeHaver::NotAvailable,
|
|
360
404
|
"Failed to load language '#{name}': #{e.message}. " \
|
|
@@ -383,43 +427,48 @@ module TreeHaver
|
|
|
383
427
|
|
|
384
428
|
# Set the language for this parser
|
|
385
429
|
#
|
|
386
|
-
#
|
|
430
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
431
|
+
# This backend receives the Language wrapper's inner impl (java Language object).
|
|
432
|
+
#
|
|
433
|
+
# @param lang [Object] the Java language object (already unwrapped)
|
|
387
434
|
# @return [void]
|
|
388
435
|
def language=(lang)
|
|
389
|
-
|
|
390
|
-
@parser.language =
|
|
436
|
+
# lang is already unwrapped by TreeHaver::Parser
|
|
437
|
+
@parser.language = lang
|
|
391
438
|
end
|
|
392
439
|
|
|
393
440
|
# Parse source code
|
|
394
441
|
#
|
|
395
442
|
# @param source [String] the source code to parse
|
|
396
|
-
# @return [
|
|
443
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
397
444
|
def parse(source)
|
|
398
445
|
java_tree = @parser.parse(source)
|
|
399
|
-
|
|
400
|
-
|
|
446
|
+
# Return raw Java::Tree - TreeHaver::Parser will wrap it
|
|
447
|
+
Tree.new(java_tree)
|
|
401
448
|
end
|
|
402
449
|
|
|
403
450
|
# Parse source code with optional incremental parsing
|
|
404
451
|
#
|
|
452
|
+
# Note: old_tree is already unwrapped by TreeHaver::Parser before reaching this method.
|
|
453
|
+
# The backend receives the raw Tree wrapper's impl, not a TreeHaver::Tree.
|
|
454
|
+
#
|
|
405
455
|
# When old_tree is provided and has been edited, tree-sitter will reuse
|
|
406
456
|
# unchanged nodes for better performance.
|
|
407
457
|
#
|
|
408
|
-
# @param old_tree [
|
|
458
|
+
# @param old_tree [Tree, nil] previous backend tree for incremental parsing (already unwrapped)
|
|
409
459
|
# @param source [String] the source code to parse
|
|
410
|
-
# @return [
|
|
460
|
+
# @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
411
461
|
# @see https://tree-sitter.github.io/java-tree-sitter/io/github/treesitter/jtreesitter/Parser.html#parse(io.github.treesitter.jtreesitter.Tree,java.lang.String)
|
|
412
462
|
def parse_string(old_tree, source)
|
|
463
|
+
# old_tree is already unwrapped to Tree wrapper's impl by TreeHaver::Parser
|
|
413
464
|
if old_tree
|
|
414
|
-
|
|
415
|
-
inner_old_tree = old_tree.respond_to?(:inner_tree) ? old_tree.inner_tree : old_tree
|
|
416
|
-
java_old_tree = inner_old_tree.is_a?(Tree) ? inner_old_tree.impl : inner_old_tree
|
|
465
|
+
java_old_tree = old_tree.is_a?(Tree) ? old_tree.impl : old_tree
|
|
417
466
|
java_tree = @parser.parse(java_old_tree, source)
|
|
418
467
|
else
|
|
419
468
|
java_tree = @parser.parse(source)
|
|
420
469
|
end
|
|
421
|
-
|
|
422
|
-
|
|
470
|
+
# Return raw Java::Tree - TreeHaver::Parser will wrap it
|
|
471
|
+
Tree.new(java_tree)
|
|
423
472
|
end
|
|
424
473
|
end
|
|
425
474
|
|
|
@@ -28,7 +28,7 @@ module TreeHaver
|
|
|
28
28
|
return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
29
29
|
@load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
30
30
|
begin
|
|
31
|
-
require "ruby_tree_sitter
|
|
31
|
+
require "tree_sitter" # Note: gem is ruby_tree_sitter but requires tree_sitter
|
|
32
32
|
|
|
33
33
|
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
34
34
|
rescue LoadError
|
|
@@ -37,6 +37,15 @@ module TreeHaver
|
|
|
37
37
|
@loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
+
# Reset the load state (primarily for testing)
|
|
41
|
+
#
|
|
42
|
+
# @return [void]
|
|
43
|
+
# @api private
|
|
44
|
+
def reset!
|
|
45
|
+
@load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
46
|
+
@loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
47
|
+
end
|
|
48
|
+
|
|
40
49
|
# Get capabilities supported by this backend
|
|
41
50
|
#
|
|
42
51
|
# @return [Hash{Symbol => Object}] capability map
|
|
@@ -56,19 +65,112 @@ module TreeHaver
|
|
|
56
65
|
|
|
57
66
|
# Wrapper for ruby_tree_sitter Language
|
|
58
67
|
#
|
|
59
|
-
#
|
|
68
|
+
# Wraps ::TreeSitter::Language from ruby_tree_sitter to provide a consistent
|
|
69
|
+
# API across all backends.
|
|
60
70
|
class Language
|
|
61
|
-
|
|
71
|
+
include Comparable
|
|
72
|
+
|
|
73
|
+
# The wrapped TreeSitter::Language object
|
|
74
|
+
# @return [::TreeSitter::Language]
|
|
75
|
+
attr_reader :inner_language
|
|
76
|
+
|
|
77
|
+
# The backend this language is for
|
|
78
|
+
# @return [Symbol]
|
|
79
|
+
attr_reader :backend
|
|
80
|
+
|
|
81
|
+
# The path this language was loaded from (if known)
|
|
82
|
+
# @return [String, nil]
|
|
83
|
+
attr_reader :path
|
|
84
|
+
|
|
85
|
+
# The symbol name (if known)
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
attr_reader :symbol
|
|
88
|
+
|
|
89
|
+
# @api private
|
|
90
|
+
# @param lang [::TreeSitter::Language] the language object from ruby_tree_sitter
|
|
91
|
+
# @param path [String, nil] path language was loaded from
|
|
92
|
+
# @param symbol [String, nil] symbol name
|
|
93
|
+
def initialize(lang, path: nil, symbol: nil)
|
|
94
|
+
@inner_language = lang
|
|
95
|
+
@backend = :mri
|
|
96
|
+
@path = path
|
|
97
|
+
@symbol = symbol
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Compare languages for equality
|
|
101
|
+
#
|
|
102
|
+
# MRI languages are equal if they have the same backend, path, and symbol.
|
|
103
|
+
# Path and symbol uniquely identify a loaded language.
|
|
104
|
+
#
|
|
105
|
+
# @param other [Object] object to compare with
|
|
106
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
107
|
+
def <=>(other)
|
|
108
|
+
return unless other.is_a?(Language)
|
|
109
|
+
return unless other.backend == @backend
|
|
110
|
+
|
|
111
|
+
# Compare by path first, then symbol
|
|
112
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
113
|
+
return cmp unless cmp.zero?
|
|
114
|
+
|
|
115
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
119
|
+
# @return [Integer]
|
|
120
|
+
def hash
|
|
121
|
+
[@backend, @path, @symbol].hash
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Alias eql? to ==
|
|
125
|
+
alias_method :eql?, :==
|
|
126
|
+
|
|
127
|
+
# Convert to the underlying TreeSitter::Language for passing to parser
|
|
128
|
+
#
|
|
129
|
+
# @return [::TreeSitter::Language]
|
|
130
|
+
def to_language
|
|
131
|
+
@inner_language
|
|
132
|
+
end
|
|
133
|
+
alias_method :to_ts_language, :to_language
|
|
134
|
+
|
|
135
|
+
# Load a language from a shared library (preferred method)
|
|
62
136
|
#
|
|
63
137
|
# @param path [String] absolute path to the language shared library
|
|
64
|
-
# @
|
|
138
|
+
# @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
|
|
139
|
+
# @param name [String, nil] optional language name (unused by MRI backend)
|
|
140
|
+
# @return [Language] wrapped language handle
|
|
65
141
|
# @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
|
|
66
142
|
# @example
|
|
67
|
-
# lang = TreeHaver::Backends::MRI::Language.
|
|
143
|
+
# lang = TreeHaver::Backends::MRI::Language.from_library("/path/to/lib.so", symbol: "tree_sitter_json")
|
|
68
144
|
class << self
|
|
69
|
-
def
|
|
145
|
+
def from_library(path, symbol: nil, name: nil)
|
|
70
146
|
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
|
|
71
|
-
|
|
147
|
+
|
|
148
|
+
# ruby_tree_sitter's TreeSitter::Language.load takes (language_name, path_to_so)
|
|
149
|
+
# where language_name is the language identifier (e.g., "toml", "json")
|
|
150
|
+
# NOT the full symbol name (e.g., NOT "tree_sitter_toml")
|
|
151
|
+
# and path_to_so is the full path to the .so file
|
|
152
|
+
#
|
|
153
|
+
# If name is not provided, derive it from symbol by stripping "tree_sitter_" prefix
|
|
154
|
+
language_name = name || symbol&.sub(/\Atree_sitter_/, "")
|
|
155
|
+
ts_lang = ::TreeSitter::Language.load(language_name, path)
|
|
156
|
+
new(ts_lang, path: path, symbol: symbol)
|
|
157
|
+
rescue NameError => e
|
|
158
|
+
# TreeSitter constant doesn't exist - backend not loaded
|
|
159
|
+
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
|
|
160
|
+
rescue TreeSitter::TreeSitterError => e
|
|
161
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
162
|
+
# This includes: ParserNotFoundError, LanguageLoadError, SymbolNotFoundError, etc.
|
|
163
|
+
raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Load a language from a shared library path (legacy method)
|
|
167
|
+
#
|
|
168
|
+
# @param path [String] absolute path to the language shared library
|
|
169
|
+
# @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
|
|
170
|
+
# @return [Language] wrapped language handle
|
|
171
|
+
# @deprecated Use {from_library} instead
|
|
172
|
+
def from_path(path, symbol: nil)
|
|
173
|
+
from_library(path, symbol: symbol)
|
|
72
174
|
end
|
|
73
175
|
end
|
|
74
176
|
end
|
|
@@ -83,35 +185,70 @@ module TreeHaver
|
|
|
83
185
|
def initialize
|
|
84
186
|
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
|
|
85
187
|
@parser = ::TreeSitter::Parser.new
|
|
188
|
+
rescue NameError => e
|
|
189
|
+
# TreeSitter constant doesn't exist - backend not loaded
|
|
190
|
+
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
|
|
191
|
+
rescue TreeSitter::TreeSitterError => e
|
|
192
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
193
|
+
raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
|
|
86
194
|
end
|
|
87
195
|
|
|
88
196
|
# Set the language for this parser
|
|
89
197
|
#
|
|
90
|
-
#
|
|
198
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
199
|
+
# This backend receives raw ::TreeSitter::Language objects, never wrapped ones.
|
|
200
|
+
#
|
|
201
|
+
# @param lang [::TreeSitter::Language] the language to use (already unwrapped)
|
|
91
202
|
# @return [::TreeSitter::Language] the language that was set
|
|
203
|
+
# @raise [TreeHaver::NotAvailable] if setting language fails
|
|
92
204
|
def language=(lang)
|
|
205
|
+
# lang is already unwrapped by TreeHaver::Parser, use directly
|
|
93
206
|
@parser.language = lang
|
|
207
|
+
# Verify it was set
|
|
208
|
+
raise TreeHaver::NotAvailable, "Language not set correctly" if @parser.language.nil?
|
|
209
|
+
|
|
210
|
+
# Return the language object
|
|
211
|
+
lang
|
|
212
|
+
rescue TreeSitter::TreeSitterError => e
|
|
213
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
214
|
+
raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
|
|
94
215
|
end
|
|
95
216
|
|
|
96
217
|
# Parse source code
|
|
97
218
|
#
|
|
219
|
+
# ruby_tree_sitter provides parse_string for string input
|
|
220
|
+
#
|
|
98
221
|
# @param source [String] the source code to parse
|
|
99
|
-
# @return [
|
|
222
|
+
# @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
|
|
223
|
+
# @raise [TreeHaver::NotAvailable] if parsing returns nil (usually means language not set)
|
|
100
224
|
def parse(source)
|
|
101
|
-
|
|
102
|
-
|
|
225
|
+
# ruby_tree_sitter's parse_string(old_tree, string) method
|
|
226
|
+
# Pass nil for old_tree (initial parse)
|
|
227
|
+
# Return raw tree - TreeHaver::Parser will wrap it
|
|
228
|
+
tree = @parser.parse_string(nil, source)
|
|
229
|
+
raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
|
|
230
|
+
tree
|
|
231
|
+
rescue TreeSitter::TreeSitterError => e
|
|
232
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
233
|
+
raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
|
|
103
234
|
end
|
|
104
235
|
|
|
105
236
|
# Parse source code with optional incremental parsing
|
|
106
237
|
#
|
|
107
|
-
#
|
|
238
|
+
# Note: old_tree should already be unwrapped by TreeHaver::Parser before reaching this method.
|
|
239
|
+
# The backend receives the raw inner tree (::TreeSitter::Tree or nil), not a wrapped TreeHaver::Tree.
|
|
240
|
+
#
|
|
241
|
+
# @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing (already unwrapped)
|
|
108
242
|
# @param source [String] the source code to parse
|
|
109
|
-
# @return [
|
|
243
|
+
# @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
|
|
244
|
+
# @raise [TreeHaver::NotAvailable] if parsing fails
|
|
110
245
|
def parse_string(old_tree, source)
|
|
111
|
-
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
246
|
+
# old_tree is already unwrapped by TreeHaver::Parser, pass it directly
|
|
247
|
+
# Return raw tree - TreeHaver::Parser will wrap it
|
|
248
|
+
@parser.parse_string(old_tree, source)
|
|
249
|
+
rescue TreeSitter::TreeSitterError => e
|
|
250
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
251
|
+
raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
|
|
115
252
|
end
|
|
116
253
|
end
|
|
117
254
|
end
|
|
@@ -54,14 +54,14 @@ module TreeHaver
|
|
|
54
54
|
# @return [Hash{Symbol => Object}] capability map
|
|
55
55
|
# @example
|
|
56
56
|
# TreeHaver::Backends::Rust.capabilities
|
|
57
|
-
# # => { backend: :rust, query: true, bytes_field: true, incremental:
|
|
57
|
+
# # => { backend: :rust, query: true, bytes_field: true, incremental: false }
|
|
58
58
|
def capabilities
|
|
59
59
|
return {} unless available?
|
|
60
60
|
{
|
|
61
61
|
backend: :rust,
|
|
62
62
|
query: true,
|
|
63
63
|
bytes_field: true,
|
|
64
|
-
incremental:
|
|
64
|
+
incremental: false, # TreeStump doesn't currently expose incremental parsing to Ruby
|
|
65
65
|
}
|
|
66
66
|
end
|
|
67
67
|
end
|
|
@@ -72,16 +72,52 @@ module TreeHaver
|
|
|
72
72
|
# tree_stump uses a registration-based API where languages are registered
|
|
73
73
|
# by name, then referenced by that name when setting parser language.
|
|
74
74
|
class Language
|
|
75
|
+
include Comparable
|
|
76
|
+
|
|
75
77
|
# The registered language name
|
|
76
78
|
# @return [String]
|
|
77
79
|
attr_reader :name
|
|
78
80
|
|
|
81
|
+
# The backend this language is for
|
|
82
|
+
# @return [Symbol]
|
|
83
|
+
attr_reader :backend
|
|
84
|
+
|
|
85
|
+
# The path this language was loaded from (if known)
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
attr_reader :path
|
|
88
|
+
|
|
79
89
|
# @api private
|
|
80
90
|
# @param name [String] the registered language name
|
|
81
|
-
|
|
91
|
+
# @param path [String, nil] path language was loaded from
|
|
92
|
+
def initialize(name, path: nil)
|
|
82
93
|
@name = name
|
|
94
|
+
@backend = :rust
|
|
95
|
+
@path = path
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Compare languages for equality
|
|
99
|
+
#
|
|
100
|
+
# Rust languages are equal if they have the same backend and name.
|
|
101
|
+
# Name uniquely identifies a registered language in TreeStump.
|
|
102
|
+
#
|
|
103
|
+
# @param other [Object] object to compare with
|
|
104
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
105
|
+
def <=>(other)
|
|
106
|
+
return unless other.is_a?(Language)
|
|
107
|
+
return unless other.backend == @backend
|
|
108
|
+
|
|
109
|
+
@name <=> other.name
|
|
83
110
|
end
|
|
84
111
|
|
|
112
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
113
|
+
# @return [Integer]
|
|
114
|
+
def hash
|
|
115
|
+
[@backend, @name].hash
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Alias eql? to ==
|
|
119
|
+
alias_method :eql?, :==
|
|
120
|
+
|
|
85
121
|
# Load a language from a shared library path
|
|
86
122
|
#
|
|
87
123
|
# @param path [String] absolute path to the language shared library
|
|
@@ -102,7 +138,7 @@ module TreeHaver
|
|
|
102
138
|
# The name is used to derive the symbol automatically (tree_sitter_<name>)
|
|
103
139
|
lang_name = name || File.basename(path, ".*").sub(/^libtree-sitter-/, "")
|
|
104
140
|
::TreeStump.register_lang(lang_name, path)
|
|
105
|
-
new(lang_name)
|
|
141
|
+
new(lang_name, path: path)
|
|
106
142
|
rescue RuntimeError => e
|
|
107
143
|
raise TreeHaver::NotAvailable, "Failed to load language from #{path}: #{e.message}"
|
|
108
144
|
end
|
|
@@ -128,11 +164,16 @@ module TreeHaver
|
|
|
128
164
|
|
|
129
165
|
# Set the language for this parser
|
|
130
166
|
#
|
|
131
|
-
#
|
|
167
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
168
|
+
# When called from TreeHaver::Parser, receives String (language name).
|
|
169
|
+
# For backward compatibility and backend tests, also handles Language wrapper.
|
|
170
|
+
#
|
|
171
|
+
# @param lang [Language, String] the language wrapper or name string
|
|
132
172
|
# @return [Language, String] the language that was set
|
|
133
173
|
def language=(lang)
|
|
134
|
-
#
|
|
174
|
+
# Extract language name (handle both wrapper and raw string)
|
|
135
175
|
lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
|
|
176
|
+
# tree_stump uses set_language with a string name
|
|
136
177
|
@parser.set_language(lang_name)
|
|
137
178
|
lang
|
|
138
179
|
end
|
|
@@ -140,22 +181,24 @@ module TreeHaver
|
|
|
140
181
|
# Parse source code
|
|
141
182
|
#
|
|
142
183
|
# @param source [String] the source code to parse
|
|
143
|
-
# @return [
|
|
184
|
+
# @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
144
185
|
def parse(source)
|
|
145
|
-
tree
|
|
146
|
-
|
|
186
|
+
# Return raw tree_stump tree - TreeHaver::Parser will wrap it
|
|
187
|
+
@parser.parse(source)
|
|
147
188
|
end
|
|
148
189
|
|
|
149
190
|
# Parse source code with optional incremental parsing
|
|
150
191
|
#
|
|
151
|
-
#
|
|
192
|
+
# Note: TreeStump does not currently expose incremental parsing to Ruby.
|
|
193
|
+
# The parse method always does a full parse, ignoring old_tree.
|
|
194
|
+
#
|
|
195
|
+
# @param old_tree [TreeHaver::Tree, nil] previous tree for incremental parsing (ignored)
|
|
152
196
|
# @param source [String] the source code to parse
|
|
153
|
-
# @return [
|
|
154
|
-
def parse_string(old_tree, source)
|
|
155
|
-
#
|
|
156
|
-
#
|
|
157
|
-
|
|
158
|
-
TreeHaver::Tree.new(tree, source: source)
|
|
197
|
+
# @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
|
|
198
|
+
def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
|
|
199
|
+
# TreeStump's parse method only accepts source as a single argument
|
|
200
|
+
# and internally always passes None for the old tree (no incremental parsing support)
|
|
201
|
+
@parser.parse(source)
|
|
159
202
|
end
|
|
160
203
|
end
|
|
161
204
|
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Utility for finding and registering Citrus grammar gems.
|
|
5
|
+
#
|
|
6
|
+
# CitrusGrammarFinder provides language-agnostic discovery of Citrus grammar
|
|
7
|
+
# gems. Given a language name and gem information, it attempts to load the
|
|
8
|
+
# grammar and register it with tree_haver.
|
|
9
|
+
#
|
|
10
|
+
# Unlike tree-sitter grammars (which are .so files), Citrus grammars are
|
|
11
|
+
# Ruby modules that respond to .parse(source). This class handles the
|
|
12
|
+
# discovery and registration of these grammars.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage with toml-rb
|
|
15
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
16
|
+
# language: :toml,
|
|
17
|
+
# gem_name: "toml-rb",
|
|
18
|
+
# grammar_const: "TomlRB::Document"
|
|
19
|
+
# )
|
|
20
|
+
# finder.register! if finder.available?
|
|
21
|
+
#
|
|
22
|
+
# @example With custom require path
|
|
23
|
+
# finder = TreeHaver::CitrusGrammarFinder.new(
|
|
24
|
+
# language: :json,
|
|
25
|
+
# gem_name: "json-rb",
|
|
26
|
+
# grammar_const: "JsonRB::Grammar",
|
|
27
|
+
# require_path: "json/rb"
|
|
28
|
+
# )
|
|
29
|
+
#
|
|
30
|
+
# @see GrammarFinder For tree-sitter grammar discovery
|
|
31
|
+
class CitrusGrammarFinder
|
|
32
|
+
# @return [Symbol] the language identifier
|
|
33
|
+
attr_reader :language_name
|
|
34
|
+
|
|
35
|
+
# @return [String] the gem name to require
|
|
36
|
+
attr_reader :gem_name
|
|
37
|
+
|
|
38
|
+
# @return [String] the constant path to the grammar (e.g., "TomlRB::Document")
|
|
39
|
+
attr_reader :grammar_const
|
|
40
|
+
|
|
41
|
+
# @return [String, nil] custom require path (defaults to gem_name with dashes to slashes)
|
|
42
|
+
attr_reader :require_path
|
|
43
|
+
|
|
44
|
+
# Initialize a Citrus grammar finder
|
|
45
|
+
#
|
|
46
|
+
# @param language [Symbol, String] the language name (e.g., :toml, :json)
|
|
47
|
+
# @param gem_name [String] the gem name (e.g., "toml-rb")
|
|
48
|
+
# @param grammar_const [String] constant path to grammar (e.g., "TomlRB::Document")
|
|
49
|
+
# @param require_path [String, nil] custom require path (defaults to gem_name with dashes→slashes)
|
|
50
|
+
def initialize(language:, gem_name:, grammar_const:, require_path: nil)
|
|
51
|
+
@language_name = language.to_sym
|
|
52
|
+
@gem_name = gem_name
|
|
53
|
+
@grammar_const = grammar_const
|
|
54
|
+
@require_path = require_path || gem_name.tr("-", "/")
|
|
55
|
+
@load_attempted = false
|
|
56
|
+
@available = false
|
|
57
|
+
@grammar_module = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if the Citrus grammar is available
|
|
61
|
+
#
|
|
62
|
+
# Attempts to require the gem and resolve the grammar constant.
|
|
63
|
+
# Result is cached after first call.
|
|
64
|
+
#
|
|
65
|
+
# @return [Boolean] true if grammar is available
|
|
66
|
+
def available?
|
|
67
|
+
return @available if @load_attempted
|
|
68
|
+
|
|
69
|
+
@load_attempted = true
|
|
70
|
+
begin
|
|
71
|
+
# Try to require the gem
|
|
72
|
+
require @require_path
|
|
73
|
+
|
|
74
|
+
# Try to resolve the constant
|
|
75
|
+
@grammar_module = resolve_constant(@grammar_const)
|
|
76
|
+
|
|
77
|
+
# Verify it responds to parse
|
|
78
|
+
unless @grammar_module.respond_to?(:parse)
|
|
79
|
+
warn("#{@grammar_const} doesn't respond to :parse")
|
|
80
|
+
@available = false
|
|
81
|
+
return false
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@available = true
|
|
85
|
+
rescue LoadError => e
|
|
86
|
+
# Always show LoadError for debugging
|
|
87
|
+
warn("CitrusGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
|
|
88
|
+
@available = false
|
|
89
|
+
rescue NameError => e
|
|
90
|
+
# Always show NameError for debugging
|
|
91
|
+
warn("CitrusGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
|
|
92
|
+
@available = false
|
|
93
|
+
rescue => e
|
|
94
|
+
# Catch any other errors
|
|
95
|
+
warn("CitrusGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
|
|
96
|
+
warn(e.backtrace.first(3).join("\n")) if ENV["TREE_HAVER_DEBUG"]
|
|
97
|
+
@available = false
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
@available
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Get the resolved grammar module
|
|
104
|
+
#
|
|
105
|
+
# @return [Module, nil] the grammar module if available
|
|
106
|
+
def grammar_module
|
|
107
|
+
available? # Ensure we've tried to load
|
|
108
|
+
@grammar_module
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Register this Citrus grammar with TreeHaver
|
|
112
|
+
#
|
|
113
|
+
# After registration, the language can be used via:
|
|
114
|
+
# TreeHaver::Language.{language_name}
|
|
115
|
+
#
|
|
116
|
+
# @param raise_on_missing [Boolean] if true, raises when grammar not available
|
|
117
|
+
# @return [Boolean] true if registration succeeded
|
|
118
|
+
# @raise [NotAvailable] if grammar not available and raise_on_missing is true
|
|
119
|
+
def register!(raise_on_missing: false)
|
|
120
|
+
unless available?
|
|
121
|
+
if raise_on_missing
|
|
122
|
+
raise NotAvailable, not_found_message
|
|
123
|
+
end
|
|
124
|
+
return false
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
TreeHaver.register_language(
|
|
128
|
+
@language_name,
|
|
129
|
+
grammar_module: @grammar_module,
|
|
130
|
+
gem_name: @gem_name,
|
|
131
|
+
)
|
|
132
|
+
true
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Get debug information about the search
|
|
136
|
+
#
|
|
137
|
+
# @return [Hash] diagnostic information
|
|
138
|
+
def search_info
|
|
139
|
+
{
|
|
140
|
+
language: @language_name,
|
|
141
|
+
gem_name: @gem_name,
|
|
142
|
+
grammar_const: @grammar_const,
|
|
143
|
+
require_path: @require_path,
|
|
144
|
+
available: available?,
|
|
145
|
+
grammar_module: @grammar_module&.name,
|
|
146
|
+
}
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Get a human-readable error message when grammar is not found
|
|
150
|
+
#
|
|
151
|
+
# @return [String] error message with installation hints
|
|
152
|
+
def not_found_message
|
|
153
|
+
"Citrus grammar for #{@language_name} not found. " \
|
|
154
|
+
"Install #{@gem_name} gem: gem install #{@gem_name}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
# Resolve a constant path like "TomlRB::Document"
|
|
160
|
+
#
|
|
161
|
+
# @param const_path [String] constant path
|
|
162
|
+
# @return [Object] the constant
|
|
163
|
+
# @raise [NameError] if constant not found
|
|
164
|
+
def resolve_constant(const_path)
|
|
165
|
+
const_path.split("::").reduce(Object) do |mod, const_name|
|
|
166
|
+
mod.const_get(const_name)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|