tree_haver 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +48 -0
- data/CITATION.cff +20 -0
- data/CODE_OF_CONDUCT.md +134 -0
- data/CONTRIBUTING.md +227 -0
- data/FUNDING.md +74 -0
- data/LICENSE.txt +21 -0
- data/README.md +1260 -0
- data/REEK +0 -0
- data/RUBOCOP.md +71 -0
- data/SECURITY.md +21 -0
- data/lib/tree_haver/backends/ffi.rb +410 -0
- data/lib/tree_haver/backends/java.rb +568 -0
- data/lib/tree_haver/backends/mri.rb +129 -0
- data/lib/tree_haver/backends/rust.rb +175 -0
- data/lib/tree_haver/compat.rb +43 -0
- data/lib/tree_haver/grammar_finder.rb +245 -0
- data/lib/tree_haver/language_registry.rb +139 -0
- data/lib/tree_haver/path_validator.rb +333 -0
- data/lib/tree_haver/version.rb +20 -0
- data/lib/tree_haver.rb +710 -0
- data/sig/tree_haver/backends.rbs +285 -0
- data/sig/tree_haver/grammar_finder.rbs +29 -0
- data/sig/tree_haver/path_validator.rbs +31 -0
- data/sig/tree_haver.rbs +131 -0
- data.tar.gz.sig +0 -0
- metadata +298 -0
- metadata.gz.sig +0 -0
data/lib/tree_haver.rb
ADDED
|
@@ -0,0 +1,710 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# External gems
|
|
4
|
+
require "version_gem"
|
|
5
|
+
|
|
6
|
+
# This gem
|
|
7
|
+
require_relative "tree_haver/version"
|
|
8
|
+
require_relative "tree_haver/language_registry"
|
|
9
|
+
|
|
10
|
+
# TreeHaver is a cross-Ruby adapter for the Tree-sitter parsing library.
|
|
11
|
+
#
|
|
12
|
+
# It provides a unified API for parsing source code using Tree-sitter grammars,
|
|
13
|
+
# working seamlessly across MRI Ruby, JRuby, and TruffleRuby.
|
|
14
|
+
#
|
|
15
|
+
# @example Basic usage with TOML
|
|
16
|
+
# # Load a language grammar
|
|
17
|
+
# language = TreeHaver::Language.from_library(
|
|
18
|
+
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
19
|
+
# symbol: "tree_sitter_toml"
|
|
20
|
+
# )
|
|
21
|
+
#
|
|
22
|
+
# # Create and configure a parser
|
|
23
|
+
# parser = TreeHaver::Parser.new
|
|
24
|
+
# parser.language = language
|
|
25
|
+
#
|
|
26
|
+
# # Parse source code
|
|
27
|
+
# tree = parser.parse("[package]\nname = \"my-app\"")
|
|
28
|
+
# root = tree.root_node
|
|
29
|
+
#
|
|
30
|
+
# # Traverse the AST
|
|
31
|
+
# root.each { |child| puts child.type }
|
|
32
|
+
#
|
|
33
|
+
# @example Using language registration
|
|
34
|
+
# TreeHaver.register_language(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
|
|
35
|
+
# language = TreeHaver::Language.toml
|
|
36
|
+
#
|
|
37
|
+
# @example Using GrammarFinder for automatic discovery
|
|
38
|
+
# # GrammarFinder automatically locates grammar libraries on the system
|
|
39
|
+
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
40
|
+
# finder.register! if finder.available?
|
|
41
|
+
# language = TreeHaver::Language.toml
|
|
42
|
+
#
|
|
43
|
+
# @example Using GrammarFinder in a *-merge gem
|
|
44
|
+
# # Each merge gem (toml-merge, json-merge, bash-merge) uses the same pattern
|
|
45
|
+
# finder = TreeHaver::GrammarFinder.new(:toml) # or :json, :bash, etc.
|
|
46
|
+
# if finder.available?
|
|
47
|
+
# finder.register!
|
|
48
|
+
# else
|
|
49
|
+
# warn finder.not_found_message
|
|
50
|
+
# end
|
|
51
|
+
#
|
|
52
|
+
# @example Selecting a backend
|
|
53
|
+
# TreeHaver.backend = :ffi # Force FFI backend
|
|
54
|
+
# TreeHaver.backend = :mri # Force MRI backend
|
|
55
|
+
# TreeHaver.backend = :auto # Auto-select (default)
|
|
56
|
+
#
|
|
57
|
+
# @see https://tree-sitter.github.io/tree-sitter/ Tree-sitter documentation
|
|
58
|
+
# @see GrammarFinder For automatic grammar library discovery
|
|
59
|
+
module TreeHaver
|
|
60
|
+
# Base error class for TreeHaver exceptions
|
|
61
|
+
#
|
|
62
|
+
# @abstract Subclass to create specific error types
|
|
63
|
+
class Error < StandardError; end
|
|
64
|
+
|
|
65
|
+
# Raised when a requested backend or feature is not available
|
|
66
|
+
#
|
|
67
|
+
# This can occur when:
|
|
68
|
+
# - Required native libraries are not installed
|
|
69
|
+
# - The selected backend is not compatible with the current Ruby implementation
|
|
70
|
+
# - A language grammar cannot be loaded
|
|
71
|
+
#
|
|
72
|
+
# @example Handling unavailable backends
|
|
73
|
+
# begin
|
|
74
|
+
# language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
75
|
+
# rescue TreeHaver::NotAvailable => e
|
|
76
|
+
# puts "Grammar not available: #{e.message}"
|
|
77
|
+
# end
|
|
78
|
+
class NotAvailable < Error; end
|
|
79
|
+
|
|
80
|
+
# Namespace for backend implementations
|
|
81
|
+
#
|
|
82
|
+
# TreeHaver provides multiple backends to support different Ruby implementations:
|
|
83
|
+
# - {Backends::MRI} - Uses ruby_tree_sitter (MRI C extension)
|
|
84
|
+
# - {Backends::Rust} - Uses tree_stump (Rust extension with precompiled binaries)
|
|
85
|
+
# - {Backends::FFI} - Uses Ruby FFI to call libtree-sitter directly
|
|
86
|
+
# - {Backends::Java} - Uses JRuby's Java integration (planned)
|
|
87
|
+
module Backends
|
|
88
|
+
autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
|
|
89
|
+
autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
|
|
90
|
+
autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
|
|
91
|
+
autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Security utilities for validating paths before loading shared libraries
|
|
95
|
+
#
|
|
96
|
+
# @example Validate a path
|
|
97
|
+
# TreeHaver::PathValidator.safe_library_path?("/usr/lib/libtree-sitter-toml.so")
|
|
98
|
+
# # => true
|
|
99
|
+
#
|
|
100
|
+
# @see PathValidator
|
|
101
|
+
autoload :PathValidator, File.join(__dir__, "tree_haver", "path_validator")
|
|
102
|
+
|
|
103
|
+
# Generic grammar finder utility with built-in security validations
|
|
104
|
+
#
|
|
105
|
+
# GrammarFinder provides platform-aware discovery of tree-sitter grammar
|
|
106
|
+
# libraries for any language. It validates paths from environment variables
|
|
107
|
+
# to prevent path traversal and other attacks.
|
|
108
|
+
#
|
|
109
|
+
# @example Find and register a language
|
|
110
|
+
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
111
|
+
# finder.register! if finder.available?
|
|
112
|
+
# language = TreeHaver::Language.toml
|
|
113
|
+
#
|
|
114
|
+
# @example Secure mode (trusted directories only)
|
|
115
|
+
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
116
|
+
# path = finder.find_library_path_safe # Ignores ENV, only trusted dirs
|
|
117
|
+
#
|
|
118
|
+
# @see GrammarFinder
|
|
119
|
+
# @see PathValidator
|
|
120
|
+
autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
|
|
121
|
+
|
|
122
|
+
# Get the current backend selection
|
|
123
|
+
#
|
|
124
|
+
# @return [Symbol] one of :auto, :mri, :ffi, or :java
|
|
125
|
+
# @note Can be set via ENV["TREE_HAVER_BACKEND"]
|
|
126
|
+
class << self
|
|
127
|
+
# @example
|
|
128
|
+
# TreeHaver.backend # => :auto
|
|
129
|
+
def backend
|
|
130
|
+
@backend ||= case (ENV["TREE_HAVER_BACKEND"] || :auto).to_s # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
131
|
+
when "mri" then :mri
|
|
132
|
+
when "rust" then :rust
|
|
133
|
+
when "ffi" then :ffi
|
|
134
|
+
when "java" then :java
|
|
135
|
+
else :auto
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Set the backend to use
|
|
140
|
+
#
|
|
141
|
+
# @param name [Symbol, String, nil] backend name (:auto, :mri, :rust, :ffi, :java)
|
|
142
|
+
# @return [Symbol, nil] the backend that was set
|
|
143
|
+
# @example Force FFI backend
|
|
144
|
+
# TreeHaver.backend = :ffi
|
|
145
|
+
# @example Force Rust backend
|
|
146
|
+
# TreeHaver.backend = :rust
|
|
147
|
+
def backend=(name)
|
|
148
|
+
@backend = name&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Reset backend selection memoization
|
|
152
|
+
#
|
|
153
|
+
# Primarily useful in tests to switch backends without cross-example leakage.
|
|
154
|
+
#
|
|
155
|
+
# @param to [Symbol, String, nil] backend name or nil to clear (defaults to :auto)
|
|
156
|
+
# @return [void]
|
|
157
|
+
# @example Reset to auto-selection
|
|
158
|
+
# TreeHaver.reset_backend!
|
|
159
|
+
# @example Reset to specific backend
|
|
160
|
+
# TreeHaver.reset_backend!(to: :ffi)
|
|
161
|
+
def reset_backend!(to: :auto)
|
|
162
|
+
@backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Determine the concrete backend module to use
|
|
166
|
+
#
|
|
167
|
+
# This method performs backend auto-selection when backend is :auto.
|
|
168
|
+
# On JRuby, prefers Java backend if available, then FFI.
|
|
169
|
+
# On MRI, prefers MRI backend if available, then Rust, then FFI.
|
|
170
|
+
#
|
|
171
|
+
# @return [Module, nil] the backend module (Backends::MRI, Backends::Rust, Backends::FFI, or Backends::Java), or nil if none available
|
|
172
|
+
# @example
|
|
173
|
+
# mod = TreeHaver.backend_module
|
|
174
|
+
# if mod
|
|
175
|
+
# puts "Using #{mod.capabilities[:backend]} backend"
|
|
176
|
+
# end
|
|
177
|
+
def backend_module
|
|
178
|
+
case backend
|
|
179
|
+
when :mri
|
|
180
|
+
Backends::MRI
|
|
181
|
+
when :rust
|
|
182
|
+
Backends::Rust
|
|
183
|
+
when :ffi
|
|
184
|
+
Backends::FFI
|
|
185
|
+
when :java
|
|
186
|
+
Backends::Java
|
|
187
|
+
else
|
|
188
|
+
# auto-select: on JRuby prefer Java backend if available; on MRI prefer MRI, then Rust; otherwise FFI
|
|
189
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && Backends::Java.available?
|
|
190
|
+
Backends::Java
|
|
191
|
+
elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::MRI.available?
|
|
192
|
+
Backends::MRI
|
|
193
|
+
elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && Backends::Rust.available?
|
|
194
|
+
Backends::Rust
|
|
195
|
+
elsif Backends::FFI.available?
|
|
196
|
+
Backends::FFI
|
|
197
|
+
else
|
|
198
|
+
# No backend available yet
|
|
199
|
+
nil
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Get capabilities of the current backend
|
|
205
|
+
#
|
|
206
|
+
# Returns a hash describing what features the selected backend supports.
|
|
207
|
+
# Common keys include:
|
|
208
|
+
# - :backend - Symbol identifying the backend (:mri, :rust, :ffi, :java)
|
|
209
|
+
# - :parse - Whether parsing is implemented
|
|
210
|
+
# - :query - Whether the Query API is available
|
|
211
|
+
# - :bytes_field - Whether byte position fields are available
|
|
212
|
+
# - :incremental - Whether incremental parsing is supported
|
|
213
|
+
#
|
|
214
|
+
# @return [Hash{Symbol => Object}] capability map, or empty hash if no backend available
|
|
215
|
+
# @example
|
|
216
|
+
# TreeHaver.capabilities
|
|
217
|
+
# # => { backend: :mri, query: true, bytes_field: true }
|
|
218
|
+
def capabilities
|
|
219
|
+
mod = backend_module
|
|
220
|
+
return {} unless mod
|
|
221
|
+
mod.capabilities
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# -- Language registration API -------------------------------------------------
|
|
225
|
+
# Delegates to LanguageRegistry for thread-safe registration and lookup.
|
|
226
|
+
# Allows opting-in dynamic helpers like TreeHaver::Language.toml without
|
|
227
|
+
# advertising all names by default.
|
|
228
|
+
|
|
229
|
+
# Register a language helper by name
|
|
230
|
+
#
|
|
231
|
+
# After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
|
|
232
|
+
# to load the registered language.
|
|
233
|
+
#
|
|
234
|
+
# @param name [Symbol, String] language identifier (e.g., :toml, :json)
|
|
235
|
+
# @param path [String] absolute path to the language shared library
|
|
236
|
+
# @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
|
|
237
|
+
# @return [void]
|
|
238
|
+
# @example Register TOML grammar
|
|
239
|
+
# TreeHaver.register_language(
|
|
240
|
+
# :toml,
|
|
241
|
+
# path: "/usr/local/lib/libtree-sitter-toml.so",
|
|
242
|
+
# symbol: "tree_sitter_toml"
|
|
243
|
+
# )
|
|
244
|
+
def register_language(name, path:, symbol: nil)
|
|
245
|
+
LanguageRegistry.register(name, path: path, symbol: symbol)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Unregister a previously registered language helper
|
|
249
|
+
#
|
|
250
|
+
# @param name [Symbol, String] language identifier to unregister
|
|
251
|
+
# @return [void]
|
|
252
|
+
# @example
|
|
253
|
+
# TreeHaver.unregister_language(:toml)
|
|
254
|
+
def unregister_language(name)
|
|
255
|
+
LanguageRegistry.unregister(name)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Clear all registered languages
|
|
259
|
+
#
|
|
260
|
+
# Primarily intended for test cleanup and resetting state.
|
|
261
|
+
#
|
|
262
|
+
# @return [void]
|
|
263
|
+
# @example
|
|
264
|
+
# TreeHaver.clear_languages!
|
|
265
|
+
def clear_languages!
|
|
266
|
+
LanguageRegistry.clear_registrations!
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Fetch a registered language entry
|
|
270
|
+
#
|
|
271
|
+
# @api private
|
|
272
|
+
# @param name [Symbol, String] language identifier
|
|
273
|
+
# @return [Hash, nil] registration hash with keys :path and :symbol, or nil if not registered
|
|
274
|
+
def registered_language(name)
|
|
275
|
+
LanguageRegistry.registered(name)
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Represents a Tree-sitter language grammar
|
|
280
|
+
#
|
|
281
|
+
# A Language object is an opaque handle to a TSLanguage* that defines
|
|
282
|
+
# the grammar rules for parsing a specific programming language.
|
|
283
|
+
#
|
|
284
|
+
# @example Load a language from a shared library
|
|
285
|
+
# language = TreeHaver::Language.from_library(
|
|
286
|
+
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
287
|
+
# symbol: "tree_sitter_toml"
|
|
288
|
+
# )
|
|
289
|
+
#
|
|
290
|
+
# @example Use a registered language
|
|
291
|
+
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
292
|
+
# language = TreeHaver::Language.toml
|
|
293
|
+
class Language
|
|
294
|
+
class << self
|
|
295
|
+
# Load a language grammar from a shared library (ruby_tree_sitter compatibility)
|
|
296
|
+
#
|
|
297
|
+
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
298
|
+
# `Language.load(name, path)`.
|
|
299
|
+
#
|
|
300
|
+
# @param name [String] the language name (e.g., "toml")
|
|
301
|
+
# @param path [String] absolute path to the language shared library
|
|
302
|
+
# @param validate [Boolean] if true, validates the path for safety (default: true)
|
|
303
|
+
# @return [Language] loaded language handle
|
|
304
|
+
# @raise [NotAvailable] if the library cannot be loaded
|
|
305
|
+
# @raise [ArgumentError] if the path fails security validation
|
|
306
|
+
# @example
|
|
307
|
+
# language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
|
|
308
|
+
def load(name, path, validate: true)
|
|
309
|
+
from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Load a language grammar from a shared library
|
|
313
|
+
#
|
|
314
|
+
# The library must export a function that returns a pointer to a TSLanguage struct.
|
|
315
|
+
# By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
|
|
316
|
+
#
|
|
317
|
+
# == Security
|
|
318
|
+
#
|
|
319
|
+
# By default, paths are validated using {PathValidator} to prevent path traversal
|
|
320
|
+
# and other attacks. Set `validate: false` to skip validation (not recommended
|
|
321
|
+
# unless you've already validated the path).
|
|
322
|
+
#
|
|
323
|
+
# @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
|
|
324
|
+
# @param symbol [String, nil] name of the exported function (defaults to auto-detection)
|
|
325
|
+
# @param name [String, nil] logical name for the language (used in caching)
|
|
326
|
+
# @param validate [Boolean] if true, validates path and symbol for safety (default: true)
|
|
327
|
+
# @return [Language] loaded language handle
|
|
328
|
+
# @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
|
|
329
|
+
# @raise [ArgumentError] if path or symbol fails security validation
|
|
330
|
+
# @example
|
|
331
|
+
# language = TreeHaver::Language.from_library(
|
|
332
|
+
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
333
|
+
# symbol: "tree_sitter_toml",
|
|
334
|
+
# name: "toml"
|
|
335
|
+
# )
|
|
336
|
+
def from_library(path, symbol: nil, name: nil, validate: true)
|
|
337
|
+
if validate
|
|
338
|
+
unless PathValidator.safe_library_path?(path)
|
|
339
|
+
errors = PathValidator.validation_errors(path)
|
|
340
|
+
raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
if symbol && !PathValidator.safe_symbol_name?(symbol)
|
|
344
|
+
raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
|
|
345
|
+
"Symbol names must be valid C identifiers."
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
mod = TreeHaver.backend_module
|
|
350
|
+
raise NotAvailable, "No TreeHaver backend is available" unless mod
|
|
351
|
+
# Backend must implement .from_library; fallback to .from_path for older impls
|
|
352
|
+
# Include ENV vars in cache key since they affect symbol resolution
|
|
353
|
+
key = [path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
|
|
354
|
+
LanguageRegistry.fetch(key) do
|
|
355
|
+
if mod::Language.respond_to?(:from_library)
|
|
356
|
+
mod::Language.from_library(path, symbol: symbol, name: name)
|
|
357
|
+
else
|
|
358
|
+
mod::Language.from_path(path)
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
# Alias for {from_library}
|
|
363
|
+
# @see from_library
|
|
364
|
+
alias_method :from_path, :from_library
|
|
365
|
+
|
|
366
|
+
# Dynamic helper to load a registered language by name
|
|
367
|
+
#
|
|
368
|
+
# After registering a language with {TreeHaver.register_language},
|
|
369
|
+
# you can load it using a method call:
|
|
370
|
+
#
|
|
371
|
+
# @example
|
|
372
|
+
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
373
|
+
# language = TreeHaver::Language.toml
|
|
374
|
+
#
|
|
375
|
+
# @example With overrides
|
|
376
|
+
# language = TreeHaver::Language.toml(path: "/custom/path.so")
|
|
377
|
+
#
|
|
378
|
+
# @param method_name [Symbol] the registered language name
|
|
379
|
+
# @param args [Array] positional arguments (first is used as path if provided)
|
|
380
|
+
# @param kwargs [Hash] keyword arguments (:path, :symbol, :name)
|
|
381
|
+
# @return [Language] loaded language handle
|
|
382
|
+
# @raise [NoMethodError] if the language name is not registered
|
|
383
|
+
def method_missing(method_name, *args, **kwargs, &block)
|
|
384
|
+
# Resolve only if the language name was registered
|
|
385
|
+
reg = TreeHaver.registered_language(method_name)
|
|
386
|
+
return super unless reg
|
|
387
|
+
|
|
388
|
+
# Allow per-call overrides; otherwise use registered defaults
|
|
389
|
+
path = kwargs[:path] || args.first || reg[:path]
|
|
390
|
+
raise ArgumentError, "path is required" unless path
|
|
391
|
+
symbol = kwargs.key?(:symbol) ? kwargs[:symbol] : (reg[:symbol] || "tree_sitter_#{method_name}")
|
|
392
|
+
name = kwargs[:name] || method_name.to_s
|
|
393
|
+
from_library(path, symbol: symbol, name: name)
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# @api private
|
|
397
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
398
|
+
!!TreeHaver.registered_language(method_name) || super
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Represents a Tree-sitter parser instance
|
|
404
|
+
#
|
|
405
|
+
# A Parser is used to parse source code into a syntax tree. You must
|
|
406
|
+
# set a language before parsing.
|
|
407
|
+
#
|
|
408
|
+
# @example Basic parsing
|
|
409
|
+
# parser = TreeHaver::Parser.new
|
|
410
|
+
# parser.language = TreeHaver::Language.toml
|
|
411
|
+
# tree = parser.parse("[package]\nname = \"foo\"")
|
|
412
|
+
class Parser
|
|
413
|
+
# Create a new parser instance
|
|
414
|
+
#
|
|
415
|
+
# @raise [NotAvailable] if no backend is available
|
|
416
|
+
def initialize
|
|
417
|
+
mod = TreeHaver.backend_module
|
|
418
|
+
raise NotAvailable, "No TreeHaver backend is available" unless mod
|
|
419
|
+
@impl = mod::Parser.new
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Set the language grammar for this parser
|
|
423
|
+
#
|
|
424
|
+
# @param lang [Language] the language to use for parsing
|
|
425
|
+
# @return [Language] the language that was set
|
|
426
|
+
# @example
|
|
427
|
+
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
428
|
+
def language=(lang)
|
|
429
|
+
@impl.language = lang
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
# Parse source code into a syntax tree
|
|
433
|
+
#
|
|
434
|
+
# @param source [String] the source code to parse (should be UTF-8)
|
|
435
|
+
# @return [Tree] the parsed syntax tree
|
|
436
|
+
# @example
|
|
437
|
+
# tree = parser.parse("x = 1")
|
|
438
|
+
# puts tree.root_node.type
|
|
439
|
+
def parse(source)
|
|
440
|
+
tree_impl = @impl.parse(source)
|
|
441
|
+
Tree.new(tree_impl)
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
# Parse source code into a syntax tree (with optional incremental parsing)
|
|
445
|
+
#
|
|
446
|
+
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
447
|
+
# `parse_string(old_tree, source)`.
|
|
448
|
+
#
|
|
449
|
+
# == Incremental Parsing
|
|
450
|
+
#
|
|
451
|
+
# Tree-sitter supports **incremental parsing** where you can pass a previously
|
|
452
|
+
# parsed tree along with edit information to efficiently re-parse only the
|
|
453
|
+
# changed portions of source code. This is a major performance optimization
|
|
454
|
+
# for editors and IDEs that need to re-parse on every keystroke.
|
|
455
|
+
#
|
|
456
|
+
# The workflow for incremental parsing is:
|
|
457
|
+
# 1. Parse the initial source: `tree = parser.parse_string(nil, source)`
|
|
458
|
+
# 2. User edits the source (e.g., inserts a character)
|
|
459
|
+
# 3. Call `tree.edit(...)` to update the tree's position data
|
|
460
|
+
# 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
|
|
461
|
+
# 5. Tree-sitter reuses unchanged nodes, only re-parsing affected regions
|
|
462
|
+
#
|
|
463
|
+
# TreeHaver passes through to the underlying backend if it supports incremental
|
|
464
|
+
# parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
|
|
465
|
+
# to see if the current backend supports it.
|
|
466
|
+
#
|
|
467
|
+
# @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
|
|
468
|
+
# @param source [String] the source code to parse (should be UTF-8)
|
|
469
|
+
# @return [Tree] the parsed syntax tree
|
|
470
|
+
# @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing Tree-sitter incremental parsing docs
|
|
471
|
+
# @see Tree#edit For marking edits before incremental re-parsing
|
|
472
|
+
# @example First parse (no old tree)
|
|
473
|
+
# tree = parser.parse_string(nil, "x = 1")
|
|
474
|
+
# @example Incremental parse
|
|
475
|
+
# tree.edit(start_byte: 4, old_end_byte: 5, new_end_byte: 6, ...)
|
|
476
|
+
# new_tree = parser.parse_string(tree, "x = 42")
|
|
477
|
+
def parse_string(old_tree, source)
|
|
478
|
+
# Pass through to backend if it supports incremental parsing
|
|
479
|
+
if old_tree && @impl.respond_to?(:parse_string)
|
|
480
|
+
# Extract the underlying implementation from our Tree wrapper
|
|
481
|
+
old_impl = old_tree.is_a?(Tree) ? old_tree.instance_variable_get(:@impl) : old_tree
|
|
482
|
+
tree_impl = @impl.parse_string(old_impl, source)
|
|
483
|
+
Tree.new(tree_impl)
|
|
484
|
+
elsif @impl.respond_to?(:parse_string)
|
|
485
|
+
tree_impl = @impl.parse_string(nil, source)
|
|
486
|
+
Tree.new(tree_impl)
|
|
487
|
+
else
|
|
488
|
+
# Fallback for backends that don't support parse_string
|
|
489
|
+
parse(source)
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
# Represents a parsed syntax tree
|
|
495
|
+
#
|
|
496
|
+
# A Tree is the result of parsing source code. It provides access to
|
|
497
|
+
# the root node of the AST and supports incremental parsing via the
|
|
498
|
+
# {#edit} method.
|
|
499
|
+
#
|
|
500
|
+
# @example Basic usage
|
|
501
|
+
# tree = parser.parse(source)
|
|
502
|
+
# root = tree.root_node
|
|
503
|
+
#
|
|
504
|
+
# @example Incremental parsing
|
|
505
|
+
# tree = parser.parse_string(nil, original_source)
|
|
506
|
+
# tree.edit(
|
|
507
|
+
# start_byte: 10,
|
|
508
|
+
# old_end_byte: 15,
|
|
509
|
+
# new_end_byte: 20,
|
|
510
|
+
# start_point: { row: 0, column: 10 },
|
|
511
|
+
# old_end_point: { row: 0, column: 15 },
|
|
512
|
+
# new_end_point: { row: 0, column: 20 }
|
|
513
|
+
# )
|
|
514
|
+
# new_tree = parser.parse_string(tree, edited_source)
|
|
515
|
+
class Tree
|
|
516
|
+
# @api private
|
|
517
|
+
def initialize(impl)
|
|
518
|
+
@impl = impl
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
# Get the root node of the syntax tree
|
|
522
|
+
#
|
|
523
|
+
# @return [Node] the root node
|
|
524
|
+
# @example
|
|
525
|
+
# root = tree.root_node
|
|
526
|
+
# puts root.type # => "document" or similar
|
|
527
|
+
def root_node
|
|
528
|
+
Node.new(@impl.root_node)
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
# Mark the tree as edited for incremental re-parsing
|
|
532
|
+
#
|
|
533
|
+
# Call this method after the source code has been modified but before
|
|
534
|
+
# re-parsing. This tells Tree-sitter which parts of the tree are
|
|
535
|
+
# invalidated so it can efficiently re-parse only the affected regions.
|
|
536
|
+
#
|
|
537
|
+
# @param start_byte [Integer] byte offset where the edit starts
|
|
538
|
+
# @param old_end_byte [Integer] byte offset where the old text ended
|
|
539
|
+
# @param new_end_byte [Integer] byte offset where the new text ends
|
|
540
|
+
# @param start_point [Hash] starting position as `{ row:, column: }`
|
|
541
|
+
# @param old_end_point [Hash] old ending position as `{ row:, column: }`
|
|
542
|
+
# @param new_end_point [Hash] new ending position as `{ row:, column: }`
|
|
543
|
+
# @return [void]
|
|
544
|
+
# @raise [NotAvailable] if the backend doesn't support incremental parsing
|
|
545
|
+
# @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing
|
|
546
|
+
#
|
|
547
|
+
# @example
|
|
548
|
+
# # Original: "x = 1"
|
|
549
|
+
# # Edited: "x = 42" (replaced "1" with "42" at byte 4)
|
|
550
|
+
# tree.edit(
|
|
551
|
+
# start_byte: 4,
|
|
552
|
+
# old_end_byte: 5,
|
|
553
|
+
# new_end_byte: 6,
|
|
554
|
+
# start_point: { row: 0, column: 4 },
|
|
555
|
+
# old_end_point: { row: 0, column: 5 },
|
|
556
|
+
# new_end_point: { row: 0, column: 6 }
|
|
557
|
+
# )
|
|
558
|
+
def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
|
|
559
|
+
unless @impl.respond_to?(:edit)
|
|
560
|
+
raise NotAvailable, "Incremental parsing not supported by current backend. " \
|
|
561
|
+
"Use MRI (ruby_tree_sitter) or Rust (tree_stump) backend."
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
@impl.edit(
|
|
565
|
+
start_byte: start_byte,
|
|
566
|
+
old_end_byte: old_end_byte,
|
|
567
|
+
new_end_byte: new_end_byte,
|
|
568
|
+
start_point: start_point,
|
|
569
|
+
old_end_point: old_end_point,
|
|
570
|
+
new_end_point: new_end_point,
|
|
571
|
+
)
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
# Check if the underlying implementation supports incremental parsing
|
|
575
|
+
#
|
|
576
|
+
# @return [Boolean] true if {#edit} can be called on this tree
|
|
577
|
+
def supports_editing?
|
|
578
|
+
@impl.respond_to?(:edit)
|
|
579
|
+
end
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
# Represents a node in the syntax tree
|
|
583
|
+
#
|
|
584
|
+
# A Node represents a single element in the parsed AST. Each node has
|
|
585
|
+
# a type (like "string", "number", "table", etc.) and may have child nodes.
|
|
586
|
+
#
|
|
587
|
+
# @example Traversing nodes
|
|
588
|
+
# root = tree.root_node
|
|
589
|
+
# root.each do |child|
|
|
590
|
+
# puts "Child type: #{child.type}"
|
|
591
|
+
# child.each { |grandchild| puts " Grandchild: #{grandchild.type}" }
|
|
592
|
+
# end
|
|
593
|
+
class Node
|
|
594
|
+
# @api private
|
|
595
|
+
def initialize(impl)
|
|
596
|
+
@impl = impl
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
# Get the type name of this node
|
|
600
|
+
#
|
|
601
|
+
# The type corresponds to the grammar rule that produced this node
|
|
602
|
+
# (e.g., "document", "table", "string_literal", "pair", etc.).
|
|
603
|
+
#
|
|
604
|
+
# @return [String] the node type
|
|
605
|
+
# @example
|
|
606
|
+
# node.type # => "table"
|
|
607
|
+
def type
|
|
608
|
+
@impl.type
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
# Iterate over child nodes
|
|
612
|
+
#
|
|
613
|
+
# @yieldparam child [Node] each child node
|
|
614
|
+
# @return [Enumerator, nil] an enumerator if no block given, nil otherwise
|
|
615
|
+
# @example With a block
|
|
616
|
+
# node.each { |child| puts child.type }
|
|
617
|
+
#
|
|
618
|
+
# @example Without a block
|
|
619
|
+
# children = node.each.to_a
|
|
620
|
+
def each(&blk)
|
|
621
|
+
return enum_for(:each) unless block_given?
|
|
622
|
+
@impl.each { |child_impl| blk.call(Node.new(child_impl)) }
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
# Get the start position of this node in the source
|
|
626
|
+
#
|
|
627
|
+
# @return [Object] point object with row and column
|
|
628
|
+
# @example
|
|
629
|
+
# node.start_point.row # => 0
|
|
630
|
+
# node.start_point.column # => 4
|
|
631
|
+
def start_point
|
|
632
|
+
@impl.start_point
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
# Get the end position of this node in the source
|
|
636
|
+
#
|
|
637
|
+
# @return [Object] point object with row and column
|
|
638
|
+
# @example
|
|
639
|
+
# node.end_point.row # => 0
|
|
640
|
+
# node.end_point.column # => 10
|
|
641
|
+
def end_point
|
|
642
|
+
@impl.end_point
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
# Get the start byte offset of this node in the source
|
|
646
|
+
#
|
|
647
|
+
# @return [Integer] byte offset from beginning of source
|
|
648
|
+
def start_byte
|
|
649
|
+
@impl.start_byte
|
|
650
|
+
end
|
|
651
|
+
|
|
652
|
+
# Get the end byte offset of this node in the source
|
|
653
|
+
#
|
|
654
|
+
# @return [Integer] byte offset from beginning of source
|
|
655
|
+
def end_byte
|
|
656
|
+
@impl.end_byte
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
# Check if this node or any descendant has a parse error
|
|
660
|
+
#
|
|
661
|
+
# @return [Boolean] true if there is an error in the subtree
|
|
662
|
+
def has_error?
|
|
663
|
+
@impl.respond_to?(:has_error?) && @impl.has_error?
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# Check if this node is a MISSING node (inserted by error recovery)
|
|
667
|
+
#
|
|
668
|
+
# @return [Boolean] true if this is a missing node
|
|
669
|
+
def missing?
|
|
670
|
+
@impl.respond_to?(:missing?) && @impl.missing?
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
# Get string representation of this node
|
|
674
|
+
#
|
|
675
|
+
# @return [String] string representation
|
|
676
|
+
def to_s
|
|
677
|
+
@impl.to_s
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
# Check if node responds to a method (includes delegation to @impl)
|
|
681
|
+
#
|
|
682
|
+
# @param method_name [Symbol] method to check
|
|
683
|
+
# @param include_private [Boolean] include private methods
|
|
684
|
+
# @return [Boolean]
|
|
685
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
686
|
+
@impl.respond_to?(method_name, include_private) || super
|
|
687
|
+
end
|
|
688
|
+
|
|
689
|
+
# Delegate unknown methods to the underlying implementation
|
|
690
|
+
#
|
|
691
|
+
# This provides full compatibility with ruby_tree_sitter nodes
|
|
692
|
+
# for methods not explicitly wrapped.
|
|
693
|
+
#
|
|
694
|
+
# @param method_name [Symbol] method to call
|
|
695
|
+
# @param args [Array] arguments to pass
|
|
696
|
+
# @param block [Proc] block to pass
|
|
697
|
+
# @return [Object] result from the underlying implementation
|
|
698
|
+
def method_missing(method_name, *args, **kwargs, &block)
|
|
699
|
+
if @impl.respond_to?(method_name)
|
|
700
|
+
@impl.public_send(method_name, *args, **kwargs, &block)
|
|
701
|
+
else
|
|
702
|
+
super
|
|
703
|
+
end
|
|
704
|
+
end
|
|
705
|
+
end
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
TreeHaver::Version.class_eval do
|
|
709
|
+
extend VersionGem::Basic
|
|
710
|
+
end
|