tree_haver 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +236 -3
- data/CONTRIBUTING.md +100 -0
- data/README.md +470 -85
- data/lib/tree_haver/backends/citrus.rb +423 -0
- data/lib/tree_haver/backends/ffi.rb +405 -150
- data/lib/tree_haver/backends/java.rb +63 -10
- data/lib/tree_haver/backends/mri.rb +154 -27
- data/lib/tree_haver/backends/rust.rb +58 -27
- data/lib/tree_haver/citrus_grammar_finder.rb +170 -0
- data/lib/tree_haver/grammar_finder.rb +42 -7
- data/lib/tree_haver/language_registry.rb +62 -71
- data/lib/tree_haver/node.rb +526 -0
- data/lib/tree_haver/path_validator.rb +47 -27
- data/lib/tree_haver/tree.rb +259 -0
- data/lib/tree_haver/version.rb +2 -2
- data/lib/tree_haver.rb +741 -285
- data/sig/tree_haver/backends.rbs +68 -1
- data/sig/tree_haver/path_validator.rbs +1 -0
- data/sig/tree_haver.rbs +95 -9
- data.tar.gz.sig +0 -0
- metadata +12 -8
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Unified Tree wrapper providing a consistent API across all backends
|
|
5
|
+
#
|
|
6
|
+
# This class wraps backend-specific tree objects and provides a unified interface.
|
|
7
|
+
# It stores the source text to enable text extraction from nodes.
|
|
8
|
+
#
|
|
9
|
+
# == Wrapping/Unwrapping Contract
|
|
10
|
+
#
|
|
11
|
+
# TreeHaver follows a consistent pattern for object wrapping:
|
|
12
|
+
#
|
|
13
|
+
# 1. **TreeHaver::Parser** (top level) handles ALL wrapping/unwrapping
|
|
14
|
+
# 2. **Backends** work exclusively with raw backend objects
|
|
15
|
+
# 3. **User-facing API** uses only TreeHaver wrapper classes
|
|
16
|
+
#
|
|
17
|
+
# Specifically for trees:
|
|
18
|
+
# - Backend Parser#parse returns raw backend tree (TreeSitter::Tree, TreeStump::Tree, etc.)
|
|
19
|
+
# - TreeHaver::Parser#parse wraps it in TreeHaver::Tree
|
|
20
|
+
# - TreeHaver::Parser#parse_string unwraps old_tree before passing to backend
|
|
21
|
+
# - Backend Parser#parse_string receives raw backend tree, returns raw backend tree
|
|
22
|
+
# - TreeHaver::Parser#parse_string wraps the returned tree
|
|
23
|
+
#
|
|
24
|
+
# This ensures:
|
|
25
|
+
# - Backends are simple and consistent
|
|
26
|
+
# - All complexity is in one place (TreeHaver top level)
|
|
27
|
+
# - Users always work with TreeHaver wrapper classes
|
|
28
|
+
#
|
|
29
|
+
# @example Basic usage
|
|
30
|
+
# parser = TreeHaver::Parser.new
|
|
31
|
+
# parser.language = TreeHaver::Language.toml
|
|
32
|
+
# tree = parser.parse(source)
|
|
33
|
+
# root = tree.root_node
|
|
34
|
+
# puts root.type
|
|
35
|
+
#
|
|
36
|
+
# @example Incremental parsing (if backend supports it)
|
|
37
|
+
# tree = parser.parse("x = 1")
|
|
38
|
+
# # Edit the source: "x = 1" → "x = 42"
|
|
39
|
+
# tree.edit(
|
|
40
|
+
# start_byte: 4,
|
|
41
|
+
# old_end_byte: 5,
|
|
42
|
+
# new_end_byte: 6,
|
|
43
|
+
# start_point: { row: 0, column: 4 },
|
|
44
|
+
# old_end_point: { row: 0, column: 5 },
|
|
45
|
+
# new_end_point: { row: 0, column: 6 }
|
|
46
|
+
# )
|
|
47
|
+
# new_tree = parser.parse_string(tree, "x = 42")
|
|
48
|
+
#
|
|
49
|
+
# @example Accessing backend-specific features
|
|
50
|
+
# # Via passthrough (method_missing delegates to inner_tree)
|
|
51
|
+
# tree.some_backend_specific_method # Automatically delegated
|
|
52
|
+
#
|
|
53
|
+
# # Or explicitly via inner_tree
|
|
54
|
+
# tree.inner_tree.some_backend_specific_method
|
|
55
|
+
class Tree
|
|
56
|
+
# The wrapped backend-specific tree object
|
|
57
|
+
#
|
|
58
|
+
# This provides direct access to the underlying backend tree for advanced usage
|
|
59
|
+
# when you need backend-specific features not exposed by the unified API.
|
|
60
|
+
#
|
|
61
|
+
# @return [Object] The underlying tree (TreeSitter::Tree, TreeStump::Tree, etc.)
|
|
62
|
+
# @example Accessing backend-specific methods
|
|
63
|
+
# # Print DOT graph (TreeStump-specific)
|
|
64
|
+
# if tree.inner_tree.respond_to?(:print_dot_graph)
|
|
65
|
+
# File.open("tree.dot", "w") do |f|
|
|
66
|
+
# tree.inner_tree.print_dot_graph(f)
|
|
67
|
+
# end
|
|
68
|
+
# end
|
|
69
|
+
attr_reader :inner_tree
|
|
70
|
+
|
|
71
|
+
# The source text
|
|
72
|
+
#
|
|
73
|
+
# Stored to enable text extraction from nodes via byte offsets.
|
|
74
|
+
#
|
|
75
|
+
# @return [String] The original source code
|
|
76
|
+
attr_reader :source
|
|
77
|
+
|
|
78
|
+
# @param tree [Object] Backend-specific tree object
|
|
79
|
+
# @param source [String] Source text for node text extraction
|
|
80
|
+
def initialize(tree, source: nil)
|
|
81
|
+
@inner_tree = tree
|
|
82
|
+
@source = source
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Get the root node of the tree
|
|
86
|
+
#
|
|
87
|
+
# @return [Node] Wrapped root node
|
|
88
|
+
def root_node
|
|
89
|
+
root = @inner_tree.root_node
|
|
90
|
+
return if root.nil?
|
|
91
|
+
Node.new(root, source: @source)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Mark the tree as edited for incremental re-parsing
|
|
95
|
+
#
|
|
96
|
+
# Call this method after the source code has been modified but before
|
|
97
|
+
# re-parsing. This tells tree-sitter which parts of the tree are
|
|
98
|
+
# invalidated so it can efficiently re-parse only the affected regions.
|
|
99
|
+
#
|
|
100
|
+
# Not all backends support incremental parsing. Use {#supports_editing?}
|
|
101
|
+
# to check before calling this method.
|
|
102
|
+
#
|
|
103
|
+
# @param start_byte [Integer] byte offset where the edit starts
|
|
104
|
+
# @param old_end_byte [Integer] byte offset where the old text ended
|
|
105
|
+
# @param new_end_byte [Integer] byte offset where the new text ends
|
|
106
|
+
# @param start_point [Hash] starting position as `{ row:, column: }`
|
|
107
|
+
# @param old_end_point [Hash] old ending position as `{ row:, column: }`
|
|
108
|
+
# @param new_end_point [Hash] new ending position as `{ row:, column: }`
|
|
109
|
+
# @return [void]
|
|
110
|
+
# @raise [TreeHaver::NotAvailable] if the backend doesn't support incremental parsing
|
|
111
|
+
# @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing
|
|
112
|
+
#
|
|
113
|
+
# @example Incremental parsing workflow
|
|
114
|
+
# # Original source: "x = 1"
|
|
115
|
+
# tree = parser.parse("x = 1")
|
|
116
|
+
#
|
|
117
|
+
# # Edit the source: replace "1" with "42" at byte offset 4
|
|
118
|
+
# tree.edit(
|
|
119
|
+
# start_byte: 4,
|
|
120
|
+
# old_end_byte: 5, # "1" ends at byte 5
|
|
121
|
+
# new_end_byte: 6, # "42" ends at byte 6
|
|
122
|
+
# start_point: { row: 0, column: 4 },
|
|
123
|
+
# old_end_point: { row: 0, column: 5 },
|
|
124
|
+
# new_end_point: { row: 0, column: 6 }
|
|
125
|
+
# )
|
|
126
|
+
#
|
|
127
|
+
# # Re-parse with the edited tree for incremental parsing
|
|
128
|
+
# new_tree = parser.parse_string(tree, "x = 42")
|
|
129
|
+
def edit(start_byte:, old_end_byte:, new_end_byte:, start_point:, old_end_point:, new_end_point:)
|
|
130
|
+
# MRI backend (ruby_tree_sitter) requires an InputEdit object
|
|
131
|
+
if defined?(::TreeSitter::InputEdit) && @inner_tree.is_a?(::TreeSitter::Tree)
|
|
132
|
+
input_edit = ::TreeSitter::InputEdit.new
|
|
133
|
+
input_edit.start_byte = start_byte
|
|
134
|
+
input_edit.old_end_byte = old_end_byte
|
|
135
|
+
input_edit.new_end_byte = new_end_byte
|
|
136
|
+
|
|
137
|
+
# Convert hash points to Point objects if needed
|
|
138
|
+
input_edit.start_point = make_point(start_point)
|
|
139
|
+
input_edit.old_end_point = make_point(old_end_point)
|
|
140
|
+
input_edit.new_end_point = make_point(new_end_point)
|
|
141
|
+
|
|
142
|
+
@inner_tree.edit(input_edit)
|
|
143
|
+
else
|
|
144
|
+
# Other backends may accept keyword arguments directly
|
|
145
|
+
@inner_tree.edit(
|
|
146
|
+
start_byte: start_byte,
|
|
147
|
+
old_end_byte: old_end_byte,
|
|
148
|
+
new_end_byte: new_end_byte,
|
|
149
|
+
start_point: start_point,
|
|
150
|
+
old_end_point: old_end_point,
|
|
151
|
+
new_end_point: new_end_point,
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
rescue NoMethodError => e
|
|
155
|
+
# Re-raise as NotAvailable if it's about the edit method
|
|
156
|
+
raise unless e.name == :edit || e.message.include?("edit")
|
|
157
|
+
raise TreeHaver::NotAvailable,
|
|
158
|
+
"Incremental parsing not supported by current backend. " \
|
|
159
|
+
"Use MRI (ruby_tree_sitter), Rust (tree_stump), or Java (java-tree-sitter) backend."
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
private
|
|
163
|
+
|
|
164
|
+
# Convert a point hash to a TreeSitter::Point if available
|
|
165
|
+
# @api private
|
|
166
|
+
def make_point(point_hash)
|
|
167
|
+
if defined?(::TreeSitter::Point)
|
|
168
|
+
pt = ::TreeSitter::Point.new
|
|
169
|
+
pt.row = point_hash[:row]
|
|
170
|
+
pt.column = point_hash[:column]
|
|
171
|
+
pt
|
|
172
|
+
else
|
|
173
|
+
point_hash
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
public
|
|
178
|
+
|
|
179
|
+
# Check if the current backend supports incremental parsing
|
|
180
|
+
#
|
|
181
|
+
# Incremental parsing allows tree-sitter to reuse unchanged nodes when
|
|
182
|
+
# re-parsing edited source code, improving performance for large files
|
|
183
|
+
# with small edits.
|
|
184
|
+
#
|
|
185
|
+
# @return [Boolean] true if {#edit} can be called on this tree
|
|
186
|
+
# @example
|
|
187
|
+
# if tree.supports_editing?
|
|
188
|
+
# tree.edit(...)
|
|
189
|
+
# new_tree = parser.parse_string(tree, edited_source)
|
|
190
|
+
# else
|
|
191
|
+
# # Fall back to full re-parse
|
|
192
|
+
# new_tree = parser.parse(edited_source)
|
|
193
|
+
# end
|
|
194
|
+
def supports_editing?
|
|
195
|
+
# Try to get the edit method to verify it exists
|
|
196
|
+
# This is more reliable than respond_to? with Delegator wrappers
|
|
197
|
+
@inner_tree.method(:edit)
|
|
198
|
+
true
|
|
199
|
+
rescue NameError
|
|
200
|
+
# NameError is the parent class of NoMethodError, so this catches both
|
|
201
|
+
false
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# String representation
|
|
205
|
+
# @return [String]
|
|
206
|
+
def inspect
|
|
207
|
+
inner_class = @inner_tree ? @inner_tree.class.name : "nil"
|
|
208
|
+
"#<#{self.class} source_length=#{@source&.bytesize || "unknown"} inner_tree=#{inner_class}>"
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Check if tree responds to a method (includes delegation to inner_tree)
|
|
212
|
+
#
|
|
213
|
+
# @param method_name [Symbol] method to check
|
|
214
|
+
# @param include_private [Boolean] include private methods
|
|
215
|
+
# @return [Boolean]
|
|
216
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
217
|
+
@inner_tree.respond_to?(method_name, include_private) || super
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Delegate unknown methods to the underlying backend-specific tree
|
|
221
|
+
#
|
|
222
|
+
# This provides passthrough access for advanced usage when you need
|
|
223
|
+
# backend-specific features not exposed by TreeHaver's unified API.
|
|
224
|
+
#
|
|
225
|
+
# The delegation is automatic and transparent - you can call backend-specific
|
|
226
|
+
# methods directly on the TreeHaver::Tree and they'll be forwarded to the
|
|
227
|
+
# underlying tree implementation.
|
|
228
|
+
#
|
|
229
|
+
# @param method_name [Symbol] method to call
|
|
230
|
+
# @param args [Array] arguments to pass
|
|
231
|
+
# @param block [Proc] block to pass
|
|
232
|
+
# @return [Object] result from the underlying tree
|
|
233
|
+
#
|
|
234
|
+
# @example Using TreeStump-specific methods
|
|
235
|
+
# # print_dot_graph is TreeStump-specific
|
|
236
|
+
# File.open("tree.dot", "w") do |f|
|
|
237
|
+
# tree.print_dot_graph(f) # Delegated to inner_tree
|
|
238
|
+
# end
|
|
239
|
+
#
|
|
240
|
+
# @example Safe usage with respond_to? check
|
|
241
|
+
# if tree.respond_to?(:print_dot_graph)
|
|
242
|
+
# File.open("tree.dot", "w") { |f| tree.print_dot_graph(f) }
|
|
243
|
+
# end
|
|
244
|
+
#
|
|
245
|
+
# @example Equivalent explicit access
|
|
246
|
+
# tree.print_dot_graph(file) # Via passthrough (method_missing)
|
|
247
|
+
# tree.inner_tree.print_dot_graph(file) # Explicit access (same result)
|
|
248
|
+
#
|
|
249
|
+
# @note This maintains backward compatibility with code written for
|
|
250
|
+
# specific backends while providing the benefits of the unified API
|
|
251
|
+
def method_missing(method_name, *args, **kwargs, &block)
|
|
252
|
+
if @inner_tree.respond_to?(method_name)
|
|
253
|
+
@inner_tree.public_send(method_name, *args, **kwargs, &block)
|
|
254
|
+
else
|
|
255
|
+
super
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
data/lib/tree_haver/version.rb
CHANGED
|
@@ -9,8 +9,8 @@ module TreeHaver
|
|
|
9
9
|
module Version
|
|
10
10
|
# Current version of the tree_haver gem
|
|
11
11
|
#
|
|
12
|
-
# @return [String] the version string (e.g., "
|
|
13
|
-
VERSION = "
|
|
12
|
+
# @return [String] the version string (e.g., "3.0.0")
|
|
13
|
+
VERSION = "3.0.0"
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
# Traditional location for VERSION constant
|