tree_haver 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ module TreeHaver
7
7
  # This backend integrates with java-tree-sitter JARs on JRuby,
8
8
  # leveraging JRuby's native Java integration for optimal performance.
9
9
  #
10
- # java-tree-sitter provides Java bindings to Tree-sitter and supports:
10
+ # java-tree-sitter provides Java bindings to tree-sitter and supports:
11
11
  # - Parsing source code into syntax trees
12
12
  # - Incremental parsing via Parser.parse(Tree, String)
13
13
  # - The Query API for pattern matching
@@ -228,13 +228,57 @@ module TreeHaver
228
228
  # All Java backend implementation classes require JRuby and cannot be tested on MRI/CRuby.
229
229
  # JRuby-specific CI jobs would test this code.
230
230
  class Language
231
+ include Comparable
232
+
231
233
  attr_reader :impl
232
234
 
235
+ # The backend this language is for
236
+ # @return [Symbol]
237
+ attr_reader :backend
238
+
239
+ # The path this language was loaded from (if known)
240
+ # @return [String, nil]
241
+ attr_reader :path
242
+
243
+ # The symbol name (if known)
244
+ # @return [String, nil]
245
+ attr_reader :symbol
246
+
233
247
  # @api private
234
- def initialize(impl)
248
+ def initialize(impl, path: nil, symbol: nil)
235
249
  @impl = impl
250
+ @backend = :java
251
+ @path = path
252
+ @symbol = symbol
253
+ end
254
+
255
+ # Compare languages for equality
256
+ #
257
+ # Java languages are equal if they have the same backend, path, and symbol.
258
+ # Path and symbol uniquely identify a loaded language.
259
+ #
260
+ # @param other [Object] object to compare with
261
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
262
+ def <=>(other)
263
+ return unless other.is_a?(Language)
264
+ return unless other.backend == @backend
265
+
266
+ # Compare by path first, then symbol
267
+ cmp = (@path || "") <=> (other.path || "")
268
+ return cmp unless cmp.zero?
269
+
270
+ (@symbol || "") <=> (other.symbol || "")
271
+ end
272
+
273
+ # Hash value for this language (for use in Sets/Hashes)
274
+ # @return [Integer]
275
+ def hash
276
+ [@backend, @path, @symbol].hash
236
277
  end
237
278
 
279
+ # Alias eql? to ==
280
+ alias_method :eql?, :==
281
+
238
282
  # Load a language from a shared library
239
283
  #
240
284
  # There are three ways java-tree-sitter can load shared libraries:
@@ -298,7 +342,7 @@ module TreeHaver
298
342
  combined_lookup = grammar_lookup.or(Java.runtime_lookup)
299
343
 
300
344
  java_lang = Java.java_classes[:Language].load(combined_lookup, sym)
301
- new(java_lang)
345
+ new(java_lang, path: path, symbol: symbol)
302
346
  rescue ::Java::JavaLang::RuntimeException => e
303
347
  cause = e.cause
304
348
  root_cause = cause&.cause || cause
@@ -354,7 +398,7 @@ module TreeHaver
354
398
  # java-tree-sitter's Language.load(String) searches for the language
355
399
  # in the classpath using standard naming conventions
356
400
  java_lang = Java.java_classes[:Language].load(name)
357
- new(java_lang)
401
+ new(java_lang, symbol: "tree_sitter_#{name}")
358
402
  rescue ::Java::JavaLang::RuntimeException => e
359
403
  raise TreeHaver::NotAvailable,
360
404
  "Failed to load language '#{name}': #{e.message}. " \
@@ -383,38 +427,47 @@ module TreeHaver
383
427
 
384
428
  # Set the language for this parser
385
429
  #
386
- # @param lang [Language] the language to use
430
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
431
+ # This backend receives the Language wrapper's inner impl (java Language object).
432
+ #
433
+ # @param lang [Object] the Java language object (already unwrapped)
387
434
  # @return [void]
388
435
  def language=(lang)
389
- java_lang = lang.is_a?(Language) ? lang.impl : lang
390
- @parser.language = java_lang
436
+ # lang is already unwrapped by TreeHaver::Parser
437
+ @parser.language = lang
391
438
  end
392
439
 
393
440
  # Parse source code
394
441
  #
395
442
  # @param source [String] the source code to parse
396
- # @return [Tree] the parsed syntax tree
443
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
397
444
  def parse(source)
398
445
  java_tree = @parser.parse(source)
446
+ # Return raw Java::Tree - TreeHaver::Parser will wrap it
399
447
  Tree.new(java_tree)
400
448
  end
401
449
 
402
450
  # Parse source code with optional incremental parsing
403
451
  #
452
+ # Note: old_tree is already unwrapped by TreeHaver::Parser before reaching this method.
453
+ # The backend receives the raw Tree wrapper's impl, not a TreeHaver::Tree.
454
+ #
404
455
  # When old_tree is provided and has been edited, tree-sitter will reuse
405
456
  # unchanged nodes for better performance.
406
457
  #
407
- # @param old_tree [Tree, nil] previous tree for incremental parsing
458
+ # @param old_tree [Tree, nil] previous backend tree for incremental parsing (already unwrapped)
408
459
  # @param source [String] the source code to parse
409
- # @return [Tree] the parsed syntax tree
460
+ # @return [Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
410
461
  # @see https://tree-sitter.github.io/java-tree-sitter/io/github/treesitter/jtreesitter/Parser.html#parse(io.github.treesitter.jtreesitter.Tree,java.lang.String)
411
462
  def parse_string(old_tree, source)
463
+ # old_tree is already unwrapped to Tree wrapper's impl by TreeHaver::Parser
412
464
  if old_tree
413
465
  java_old_tree = old_tree.is_a?(Tree) ? old_tree.impl : old_tree
414
466
  java_tree = @parser.parse(java_old_tree, source)
415
467
  else
416
468
  java_tree = @parser.parse(source)
417
469
  end
470
+ # Return raw Java::Tree - TreeHaver::Parser will wrap it
418
471
  Tree.new(java_tree)
419
472
  end
420
473
  end
@@ -5,7 +5,7 @@ module TreeHaver
5
5
  # MRI backend using the ruby_tree_sitter gem
6
6
  #
7
7
  # This backend wraps the ruby_tree_sitter gem, which is a native C extension
8
- # for MRI Ruby. It provides the most feature-complete Tree-sitter integration
8
+ # for MRI Ruby. It provides the most feature-complete tree-sitter integration
9
9
  # on MRI, including support for the Query API.
10
10
  #
11
11
  # @note This backend only works on MRI Ruby, not JRuby or TruffleRuby
@@ -28,7 +28,7 @@ module TreeHaver
28
28
  return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
29
29
  @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
30
30
  begin
31
- require "ruby_tree_sitter"
31
+ require "tree_sitter" # Note: gem is ruby_tree_sitter but requires tree_sitter
32
32
 
33
33
  @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
34
34
  rescue LoadError
@@ -37,6 +37,15 @@ module TreeHaver
37
37
  @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
38
38
  end
39
39
 
40
+ # Reset the load state (primarily for testing)
41
+ #
42
+ # @return [void]
43
+ # @api private
44
+ def reset!
45
+ @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
46
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
47
+ end
48
+
40
49
  # Get capabilities supported by this backend
41
50
  #
42
51
  # @return [Hash{Symbol => Object}] capability map
@@ -56,19 +65,112 @@ module TreeHaver
56
65
 
57
66
  # Wrapper for ruby_tree_sitter Language
58
67
  #
59
- # This is a thin pass-through to ::TreeSitter::Language from ruby_tree_sitter.
68
+ # Wraps ::TreeSitter::Language from ruby_tree_sitter to provide a consistent
69
+ # API across all backends.
60
70
  class Language
61
- # Load a language from a shared library path
71
+ include Comparable
72
+
73
+ # The wrapped TreeSitter::Language object
74
+ # @return [::TreeSitter::Language]
75
+ attr_reader :inner_language
76
+
77
+ # The backend this language is for
78
+ # @return [Symbol]
79
+ attr_reader :backend
80
+
81
+ # The path this language was loaded from (if known)
82
+ # @return [String, nil]
83
+ attr_reader :path
84
+
85
+ # The symbol name (if known)
86
+ # @return [String, nil]
87
+ attr_reader :symbol
88
+
89
+ # @api private
90
+ # @param lang [::TreeSitter::Language] the language object from ruby_tree_sitter
91
+ # @param path [String, nil] path language was loaded from
92
+ # @param symbol [String, nil] symbol name
93
+ def initialize(lang, path: nil, symbol: nil)
94
+ @inner_language = lang
95
+ @backend = :mri
96
+ @path = path
97
+ @symbol = symbol
98
+ end
99
+
100
+ # Compare languages for equality
101
+ #
102
+ # MRI languages are equal if they have the same backend, path, and symbol.
103
+ # Path and symbol uniquely identify a loaded language.
104
+ #
105
+ # @param other [Object] object to compare with
106
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
107
+ def <=>(other)
108
+ return unless other.is_a?(Language)
109
+ return unless other.backend == @backend
110
+
111
+ # Compare by path first, then symbol
112
+ cmp = (@path || "") <=> (other.path || "")
113
+ return cmp unless cmp.zero?
114
+
115
+ (@symbol || "") <=> (other.symbol || "")
116
+ end
117
+
118
+ # Hash value for this language (for use in Sets/Hashes)
119
+ # @return [Integer]
120
+ def hash
121
+ [@backend, @path, @symbol].hash
122
+ end
123
+
124
+ # Alias eql? to ==
125
+ alias_method :eql?, :==
126
+
127
+ # Convert to the underlying TreeSitter::Language for passing to parser
128
+ #
129
+ # @return [::TreeSitter::Language]
130
+ def to_language
131
+ @inner_language
132
+ end
133
+ alias_method :to_ts_language, :to_language
134
+
135
+ # Load a language from a shared library (preferred method)
62
136
  #
63
137
  # @param path [String] absolute path to the language shared library
64
- # @return [::TreeSitter::Language] the loaded language handle
138
+ # @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
139
+ # @param name [String, nil] optional language name (unused by MRI backend)
140
+ # @return [Language] wrapped language handle
65
141
  # @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
66
142
  # @example
67
- # lang = TreeHaver::Backends::MRI::Language.from_path("/usr/local/lib/libtree-sitter-toml.so")
143
+ # lang = TreeHaver::Backends::MRI::Language.from_library("/path/to/lib.so", symbol: "tree_sitter_json")
68
144
  class << self
69
- def from_path(path)
145
+ def from_library(path, symbol: nil, name: nil)
70
146
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
71
- ::TreeSitter::Language.load(path)
147
+
148
+ # ruby_tree_sitter's TreeSitter::Language.load takes (language_name, path_to_so)
149
+ # where language_name is the language identifier (e.g., "toml", "json")
150
+ # NOT the full symbol name (e.g., NOT "tree_sitter_toml")
151
+ # and path_to_so is the full path to the .so file
152
+ #
153
+ # If name is not provided, derive it from symbol by stripping "tree_sitter_" prefix
154
+ language_name = name || symbol&.sub(/\Atree_sitter_/, "")
155
+ ts_lang = ::TreeSitter::Language.load(language_name, path)
156
+ new(ts_lang, path: path, symbol: symbol)
157
+ rescue NameError => e
158
+ # TreeSitter constant doesn't exist - backend not loaded
159
+ raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
160
+ rescue TreeSitter::TreeSitterError => e
161
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
162
+ # This includes: ParserNotFoundError, LanguageLoadError, SymbolNotFoundError, etc.
163
+ raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
164
+ end
165
+
166
+ # Load a language from a shared library path (legacy method)
167
+ #
168
+ # @param path [String] absolute path to the language shared library
169
+ # @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
170
+ # @return [Language] wrapped language handle
171
+ # @deprecated Use {from_library} instead
172
+ def from_path(path, symbol: nil)
173
+ from_library(path, symbol: symbol)
72
174
  end
73
175
  end
74
176
  end
@@ -83,47 +185,72 @@ module TreeHaver
83
185
  def initialize
84
186
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
85
187
  @parser = ::TreeSitter::Parser.new
188
+ rescue NameError => e
189
+ # TreeSitter constant doesn't exist - backend not loaded
190
+ raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
191
+ rescue TreeSitter::TreeSitterError => e
192
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
193
+ raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
86
194
  end
87
195
 
88
196
  # Set the language for this parser
89
197
  #
90
- # @param lang [::TreeSitter::Language] the language to use
198
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
199
+ # This backend receives raw ::TreeSitter::Language objects, never wrapped ones.
200
+ #
201
+ # @param lang [::TreeSitter::Language] the language to use (already unwrapped)
91
202
  # @return [::TreeSitter::Language] the language that was set
203
+ # @raise [TreeHaver::NotAvailable] if setting language fails
92
204
  def language=(lang)
205
+ # lang is already unwrapped by TreeHaver::Parser, use directly
93
206
  @parser.language = lang
207
+ # Verify it was set
208
+ raise TreeHaver::NotAvailable, "Language not set correctly" if @parser.language.nil?
209
+
210
+ # Return the language object
211
+ lang
212
+ rescue TreeSitter::TreeSitterError => e
213
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
214
+ raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
94
215
  end
95
216
 
96
217
  # Parse source code
97
218
  #
219
+ # ruby_tree_sitter provides parse_string for string input
220
+ #
98
221
  # @param source [String] the source code to parse
99
- # @return [::TreeSitter::Tree] the parsed syntax tree
222
+ # @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
223
+ # @raise [TreeHaver::NotAvailable] if parsing returns nil (usually means language not set)
100
224
  def parse(source)
101
- @parser.parse(source)
225
+ # ruby_tree_sitter's parse_string(old_tree, string) method
226
+ # Pass nil for old_tree (initial parse)
227
+ # Return raw tree - TreeHaver::Parser will wrap it
228
+ tree = @parser.parse_string(nil, source)
229
+ raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
230
+ tree
231
+ rescue TreeSitter::TreeSitterError => e
232
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
233
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
102
234
  end
103
235
 
104
236
  # Parse source code with optional incremental parsing
105
237
  #
106
- # @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing
238
+ # Note: old_tree should already be unwrapped by TreeHaver::Parser before reaching this method.
239
+ # The backend receives the raw inner tree (::TreeSitter::Tree or nil), not a wrapped TreeHaver::Tree.
240
+ #
241
+ # @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing (already unwrapped)
107
242
  # @param source [String] the source code to parse
108
- # @return [::TreeSitter::Tree] the parsed syntax tree
243
+ # @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
244
+ # @raise [TreeHaver::NotAvailable] if parsing fails
109
245
  def parse_string(old_tree, source)
246
+ # old_tree is already unwrapped by TreeHaver::Parser, pass it directly
247
+ # Return raw tree - TreeHaver::Parser will wrap it
110
248
  @parser.parse_string(old_tree, source)
249
+ rescue TreeSitter::TreeSitterError => e
250
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
251
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
111
252
  end
112
253
  end
113
-
114
- # Wrapper for ruby_tree_sitter Tree
115
- #
116
- # Not used directly; TreeHaver passes through ::TreeSitter::Tree objects.
117
- class Tree
118
- # Not used directly; we pass through ruby_tree_sitter::Tree
119
- end
120
-
121
- # Wrapper for ruby_tree_sitter Node
122
- #
123
- # Not used directly; TreeHaver passes through ::TreeSitter::Node objects.
124
- class Node
125
- # Not used directly; we pass through ruby_tree_sitter::Node
126
- end
127
254
  end
128
255
  end
129
256
  end
@@ -5,7 +5,7 @@ module TreeHaver
5
5
  # Rust backend using the tree_stump gem
6
6
  #
7
7
  # This backend wraps the tree_stump gem, which provides Ruby bindings to
8
- # Tree-sitter written in Rust. It offers native performance with Rust's
8
+ # tree-sitter written in Rust. It offers native performance with Rust's
9
9
  # safety guarantees and includes precompiled binaries for common platforms.
10
10
  #
11
11
  # tree_stump supports incremental parsing and the Query API, making it
@@ -54,14 +54,14 @@ module TreeHaver
54
54
  # @return [Hash{Symbol => Object}] capability map
55
55
  # @example
56
56
  # TreeHaver::Backends::Rust.capabilities
57
- # # => { backend: :rust, query: true, bytes_field: true, incremental: true }
57
+ # # => { backend: :rust, query: true, bytes_field: true, incremental: false }
58
58
  def capabilities
59
59
  return {} unless available?
60
60
  {
61
61
  backend: :rust,
62
62
  query: true,
63
63
  bytes_field: true,
64
- incremental: true,
64
+ incremental: false, # TreeStump doesn't currently expose incremental parsing to Ruby
65
65
  }
66
66
  end
67
67
  end
@@ -72,16 +72,52 @@ module TreeHaver
72
72
  # tree_stump uses a registration-based API where languages are registered
73
73
  # by name, then referenced by that name when setting parser language.
74
74
  class Language
75
+ include Comparable
76
+
75
77
  # The registered language name
76
78
  # @return [String]
77
79
  attr_reader :name
78
80
 
81
+ # The backend this language is for
82
+ # @return [Symbol]
83
+ attr_reader :backend
84
+
85
+ # The path this language was loaded from (if known)
86
+ # @return [String, nil]
87
+ attr_reader :path
88
+
79
89
  # @api private
80
90
  # @param name [String] the registered language name
81
- def initialize(name)
91
+ # @param path [String, nil] path language was loaded from
92
+ def initialize(name, path: nil)
82
93
  @name = name
94
+ @backend = :rust
95
+ @path = path
83
96
  end
84
97
 
98
+ # Compare languages for equality
99
+ #
100
+ # Rust languages are equal if they have the same backend and name.
101
+ # Name uniquely identifies a registered language in TreeStump.
102
+ #
103
+ # @param other [Object] object to compare with
104
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
105
+ def <=>(other)
106
+ return unless other.is_a?(Language)
107
+ return unless other.backend == @backend
108
+
109
+ @name <=> other.name
110
+ end
111
+
112
+ # Hash value for this language (for use in Sets/Hashes)
113
+ # @return [Integer]
114
+ def hash
115
+ [@backend, @name].hash
116
+ end
117
+
118
+ # Alias eql? to ==
119
+ alias_method :eql?, :==
120
+
85
121
  # Load a language from a shared library path
86
122
  #
87
123
  # @param path [String] absolute path to the language shared library
@@ -102,7 +138,7 @@ module TreeHaver
102
138
  # The name is used to derive the symbol automatically (tree_sitter_<name>)
103
139
  lang_name = name || File.basename(path, ".*").sub(/^libtree-sitter-/, "")
104
140
  ::TreeStump.register_lang(lang_name, path)
105
- new(lang_name)
141
+ new(lang_name, path: path)
106
142
  rescue RuntimeError => e
107
143
  raise TreeHaver::NotAvailable, "Failed to load language from #{path}: #{e.message}"
108
144
  end
@@ -128,11 +164,16 @@ module TreeHaver
128
164
 
129
165
  # Set the language for this parser
130
166
  #
131
- # @param lang [Language, String] the language to use (Language wrapper or name string)
167
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
168
+ # When called from TreeHaver::Parser, receives String (language name).
169
+ # For backward compatibility and backend tests, also handles Language wrapper.
170
+ #
171
+ # @param lang [Language, String] the language wrapper or name string
132
172
  # @return [Language, String] the language that was set
133
173
  def language=(lang)
134
- # tree_stump uses set_language with a string name
174
+ # Extract language name (handle both wrapper and raw string)
135
175
  lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
176
+ # tree_stump uses set_language with a string name
136
177
  @parser.set_language(lang_name)
137
178
  lang
138
179
  end
@@ -140,36 +181,26 @@ module TreeHaver
140
181
  # Parse source code
141
182
  #
142
183
  # @param source [String] the source code to parse
143
- # @return [Object] the parsed syntax tree
184
+ # @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
144
185
  def parse(source)
186
+ # Return raw tree_stump tree - TreeHaver::Parser will wrap it
145
187
  @parser.parse(source)
146
188
  end
147
189
 
148
190
  # Parse source code with optional incremental parsing
149
191
  #
150
- # @param old_tree [Object, nil] previous tree for incremental parsing
192
+ # Note: TreeStump does not currently expose incremental parsing to Ruby.
193
+ # The parse method always does a full parse, ignoring old_tree.
194
+ #
195
+ # @param old_tree [TreeHaver::Tree, nil] previous tree for incremental parsing (ignored)
151
196
  # @param source [String] the source code to parse
152
- # @return [Object] the parsed syntax tree
153
- def parse_string(old_tree, source)
154
- # tree_stump doesn't have parse_string, use parse instead
155
- # TODO: Check if tree_stump supports incremental parsing
197
+ # @return [TreeStump::Tree] raw backend tree (wrapping happens in TreeHaver::Parser)
198
+ def parse_string(old_tree, source) # rubocop:disable Lint/UnusedMethodArgument
199
+ # TreeStump's parse method only accepts source as a single argument
200
+ # and internally always passes None for the old tree (no incremental parsing support)
156
201
  @parser.parse(source)
157
202
  end
158
203
  end
159
-
160
- # Wrapper for tree_stump Tree
161
- #
162
- # Not used directly; TreeHaver passes through tree_stump Tree objects.
163
- class Tree
164
- # Not used directly; we pass through tree_stump::Tree
165
- end
166
-
167
- # Wrapper for tree_stump Node
168
- #
169
- # Not used directly; TreeHaver passes through tree_stump::Node objects.
170
- class Node
171
- # Not used directly; we pass through tree_stump::Node
172
- end
173
204
  end
174
205
  end
175
206
  end