tree_haver 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +285 -1
- data/CONTRIBUTING.md +132 -0
- data/README.md +529 -36
- data/lib/tree_haver/backends/citrus.rb +177 -20
- data/lib/tree_haver/backends/commonmarker.rb +490 -0
- data/lib/tree_haver/backends/ffi.rb +341 -142
- data/lib/tree_haver/backends/java.rb +65 -16
- data/lib/tree_haver/backends/markly.rb +559 -0
- data/lib/tree_haver/backends/mri.rb +183 -17
- data/lib/tree_haver/backends/prism.rb +624 -0
- data/lib/tree_haver/backends/psych.rb +597 -0
- data/lib/tree_haver/backends/rust.rb +60 -17
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +115 -11
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +220 -4
- data/lib/tree_haver/path_validator.rb +29 -24
- data/lib/tree_haver/tree.rb +63 -9
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +835 -75
- data/sig/tree_haver.rbs +18 -1
- data.tar.gz.sig +0 -0
- metadata +9 -4
- metadata.gz.sig +0 -0
|
@@ -28,7 +28,7 @@ module TreeHaver
|
|
|
28
28
|
return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
29
29
|
@load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
30
30
|
begin
|
|
31
|
-
require "ruby_tree_sitter
|
|
31
|
+
require "tree_sitter" # Note: gem is ruby_tree_sitter but requires tree_sitter
|
|
32
32
|
|
|
33
33
|
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
34
34
|
rescue LoadError
|
|
@@ -37,6 +37,15 @@ module TreeHaver
|
|
|
37
37
|
@loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
+
# Reset the load state (primarily for testing)
|
|
41
|
+
#
|
|
42
|
+
# @return [void]
|
|
43
|
+
# @api private
|
|
44
|
+
def reset!
|
|
45
|
+
@load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
46
|
+
@loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
47
|
+
end
|
|
48
|
+
|
|
40
49
|
# Get capabilities supported by this backend
|
|
41
50
|
#
|
|
42
51
|
# @return [Hash{Symbol => Object}] capability map
|
|
@@ -56,19 +65,117 @@ module TreeHaver
|
|
|
56
65
|
|
|
57
66
|
# Wrapper for ruby_tree_sitter Language
|
|
58
67
|
#
|
|
59
|
-
#
|
|
68
|
+
# Wraps ::TreeSitter::Language from ruby_tree_sitter to provide a consistent
|
|
69
|
+
# API across all backends.
|
|
60
70
|
class Language
|
|
61
|
-
|
|
71
|
+
include Comparable
|
|
72
|
+
|
|
73
|
+
# The wrapped TreeSitter::Language object
|
|
74
|
+
# @return [::TreeSitter::Language]
|
|
75
|
+
attr_reader :inner_language
|
|
76
|
+
|
|
77
|
+
# The backend this language is for
|
|
78
|
+
# @return [Symbol]
|
|
79
|
+
attr_reader :backend
|
|
80
|
+
|
|
81
|
+
# The path this language was loaded from (if known)
|
|
82
|
+
# @return [String, nil]
|
|
83
|
+
attr_reader :path
|
|
84
|
+
|
|
85
|
+
# The symbol name (if known)
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
attr_reader :symbol
|
|
88
|
+
|
|
89
|
+
# @api private
|
|
90
|
+
# @param lang [::TreeSitter::Language] the language object from ruby_tree_sitter
|
|
91
|
+
# @param path [String, nil] path language was loaded from
|
|
92
|
+
# @param symbol [String, nil] symbol name
|
|
93
|
+
def initialize(lang, path: nil, symbol: nil)
|
|
94
|
+
@inner_language = lang
|
|
95
|
+
@backend = :mri
|
|
96
|
+
@path = path
|
|
97
|
+
@symbol = symbol
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Compare languages for equality
|
|
101
|
+
#
|
|
102
|
+
# MRI languages are equal if they have the same backend, path, and symbol.
|
|
103
|
+
# Path and symbol uniquely identify a loaded language.
|
|
104
|
+
#
|
|
105
|
+
# @param other [Object] object to compare with
|
|
106
|
+
# @return [Integer, nil] -1, 0, 1, or nil if not comparable
|
|
107
|
+
def <=>(other)
|
|
108
|
+
return unless other.is_a?(Language)
|
|
109
|
+
return unless other.backend == @backend
|
|
110
|
+
|
|
111
|
+
# Compare by path first, then symbol
|
|
112
|
+
cmp = (@path || "") <=> (other.path || "")
|
|
113
|
+
return cmp if cmp.nonzero?
|
|
114
|
+
|
|
115
|
+
(@symbol || "") <=> (other.symbol || "")
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Hash value for this language (for use in Sets/Hashes)
|
|
119
|
+
# @return [Integer]
|
|
120
|
+
def hash
|
|
121
|
+
[@backend, @path, @symbol].hash
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Alias eql? to ==
|
|
125
|
+
alias_method :eql?, :==
|
|
126
|
+
|
|
127
|
+
# Convert to the underlying TreeSitter::Language for passing to parser
|
|
128
|
+
#
|
|
129
|
+
# @return [::TreeSitter::Language]
|
|
130
|
+
def to_language
|
|
131
|
+
@inner_language
|
|
132
|
+
end
|
|
133
|
+
alias_method :to_ts_language, :to_language
|
|
134
|
+
|
|
135
|
+
# Load a language from a shared library (preferred method)
|
|
62
136
|
#
|
|
63
137
|
# @param path [String] absolute path to the language shared library
|
|
64
|
-
# @
|
|
138
|
+
# @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
|
|
139
|
+
# @param name [String, nil] optional language name (unused by MRI backend)
|
|
140
|
+
# @return [Language] wrapped language handle
|
|
65
141
|
# @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
|
|
66
142
|
# @example
|
|
67
|
-
# lang = TreeHaver::Backends::MRI::Language.
|
|
143
|
+
# lang = TreeHaver::Backends::MRI::Language.from_library("/path/to/lib.so", symbol: "tree_sitter_json")
|
|
68
144
|
class << self
|
|
69
|
-
def
|
|
145
|
+
def from_library(path, symbol: nil, name: nil)
|
|
70
146
|
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
|
|
71
|
-
|
|
147
|
+
|
|
148
|
+
# ruby_tree_sitter's TreeSitter::Language.load takes (language_name, path_to_so)
|
|
149
|
+
# where language_name is the language identifier (e.g., "toml", "json")
|
|
150
|
+
# NOT the full symbol name (e.g., NOT "tree_sitter_toml")
|
|
151
|
+
# and path_to_so is the full path to the .so file
|
|
152
|
+
#
|
|
153
|
+
# If name is not provided, derive it from symbol by stripping "tree_sitter_" prefix
|
|
154
|
+
language_name = name || symbol&.sub(/\Atree_sitter_/, "")
|
|
155
|
+
ts_lang = ::TreeSitter::Language.load(language_name, path)
|
|
156
|
+
new(ts_lang, path: path, symbol: symbol)
|
|
157
|
+
rescue NameError => e
|
|
158
|
+
# TreeSitter constant doesn't exist - backend not loaded
|
|
159
|
+
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
|
|
160
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
161
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
162
|
+
# We rescue Exception and check the class name dynamically to avoid NameError
|
|
163
|
+
# at parse time when TreeSitter constant isn't loaded yet
|
|
164
|
+
if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
|
|
165
|
+
raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
|
|
166
|
+
else
|
|
167
|
+
raise # Re-raise if it's not a TreeSitter error
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Load a language from a shared library path (legacy method)
|
|
172
|
+
#
|
|
173
|
+
# @param path [String] absolute path to the language shared library
|
|
174
|
+
# @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
|
|
175
|
+
# @return [Language] wrapped language handle
|
|
176
|
+
# @deprecated Use {from_library} instead
|
|
177
|
+
def from_path(path, symbol: nil)
|
|
178
|
+
from_library(path, symbol: symbol)
|
|
72
179
|
end
|
|
73
180
|
end
|
|
74
181
|
end
|
|
@@ -83,35 +190,94 @@ module TreeHaver
|
|
|
83
190
|
def initialize
|
|
84
191
|
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
|
|
85
192
|
@parser = ::TreeSitter::Parser.new
|
|
193
|
+
rescue NameError => e
|
|
194
|
+
# TreeSitter constant doesn't exist - backend not loaded
|
|
195
|
+
raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
|
|
196
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
197
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
198
|
+
# We rescue Exception and check the class name dynamically to avoid NameError
|
|
199
|
+
# at parse time when TreeSitter constant isn't loaded yet
|
|
200
|
+
if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
|
|
201
|
+
raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
|
|
202
|
+
else
|
|
203
|
+
raise # Re-raise if it's not a TreeSitter error
|
|
204
|
+
end
|
|
86
205
|
end
|
|
87
206
|
|
|
88
207
|
# Set the language for this parser
|
|
89
208
|
#
|
|
90
|
-
#
|
|
209
|
+
# Note: TreeHaver::Parser unwraps language objects before calling this method.
|
|
210
|
+
# This backend receives raw ::TreeSitter::Language objects, never wrapped ones.
|
|
211
|
+
#
|
|
212
|
+
# @param lang [::TreeSitter::Language] the language to use (already unwrapped)
|
|
91
213
|
# @return [::TreeSitter::Language] the language that was set
|
|
214
|
+
# @raise [TreeHaver::NotAvailable] if setting language fails
|
|
92
215
|
def language=(lang)
|
|
216
|
+
# lang is already unwrapped by TreeHaver::Parser, use directly
|
|
93
217
|
@parser.language = lang
|
|
218
|
+
# Verify it was set
|
|
219
|
+
raise TreeHaver::NotAvailable, "Language not set correctly" if @parser.language.nil?
|
|
220
|
+
|
|
221
|
+
# Return the language object
|
|
222
|
+
lang
|
|
223
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
224
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
225
|
+
# We rescue Exception and check the class name dynamically to avoid NameError
|
|
226
|
+
# at parse time when TreeSitter constant isn't loaded yet
|
|
227
|
+
if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
|
|
228
|
+
raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
|
|
229
|
+
else
|
|
230
|
+
raise # Re-raise if it's not a TreeSitter error
|
|
231
|
+
end
|
|
94
232
|
end
|
|
95
233
|
|
|
96
234
|
# Parse source code
|
|
97
235
|
#
|
|
236
|
+
# ruby_tree_sitter provides parse_string for string input
|
|
237
|
+
#
|
|
98
238
|
# @param source [String] the source code to parse
|
|
99
|
-
# @return [
|
|
239
|
+
# @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
|
|
240
|
+
# @raise [TreeHaver::NotAvailable] if parsing returns nil (usually means language not set)
|
|
100
241
|
def parse(source)
|
|
101
|
-
|
|
102
|
-
|
|
242
|
+
# ruby_tree_sitter's parse_string(old_tree, string) method
|
|
243
|
+
# Pass nil for old_tree (initial parse)
|
|
244
|
+
# Return raw tree - TreeHaver::Parser will wrap it
|
|
245
|
+
tree = @parser.parse_string(nil, source)
|
|
246
|
+
raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
|
|
247
|
+
tree
|
|
248
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
249
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
250
|
+
# We rescue Exception and check the class name dynamically to avoid NameError
|
|
251
|
+
# at parse time when TreeSitter constant isn't loaded yet
|
|
252
|
+
if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
|
|
253
|
+
raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
|
|
254
|
+
else
|
|
255
|
+
raise # Re-raise if it's not a TreeSitter error
|
|
256
|
+
end
|
|
103
257
|
end
|
|
104
258
|
|
|
105
259
|
# Parse source code with optional incremental parsing
|
|
106
260
|
#
|
|
107
|
-
#
|
|
261
|
+
# Note: old_tree should already be unwrapped by TreeHaver::Parser before reaching this method.
|
|
262
|
+
# The backend receives the raw inner tree (::TreeSitter::Tree or nil), not a wrapped TreeHaver::Tree.
|
|
263
|
+
#
|
|
264
|
+
# @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing (already unwrapped)
|
|
108
265
|
# @param source [String] the source code to parse
|
|
109
|
-
# @return [
|
|
266
|
+
# @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
|
|
267
|
+
# @raise [TreeHaver::NotAvailable] if parsing fails
|
|
110
268
|
def parse_string(old_tree, source)
|
|
111
|
-
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
269
|
+
# old_tree is already unwrapped by TreeHaver::Parser, pass it directly
|
|
270
|
+
# Return raw tree - TreeHaver::Parser will wrap it
|
|
271
|
+
@parser.parse_string(old_tree, source)
|
|
272
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
273
|
+
# TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
|
|
274
|
+
# We rescue Exception and check the class name dynamically to avoid NameError
|
|
275
|
+
# at parse time when TreeSitter constant isn't loaded yet
|
|
276
|
+
if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
|
|
277
|
+
raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
|
|
278
|
+
else
|
|
279
|
+
raise # Re-raise if it's not a TreeSitter error
|
|
280
|
+
end
|
|
115
281
|
end
|
|
116
282
|
end
|
|
117
283
|
end
|