tree_haver 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ module TreeHaver
28
28
  return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
29
29
  @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
30
30
  begin
31
- require "ruby_tree_sitter"
31
+ require "tree_sitter" # Note: gem is ruby_tree_sitter but requires tree_sitter
32
32
 
33
33
  @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
34
34
  rescue LoadError
@@ -37,6 +37,15 @@ module TreeHaver
37
37
  @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
38
38
  end
39
39
 
40
+ # Reset the load state (primarily for testing)
41
+ #
42
+ # @return [void]
43
+ # @api private
44
+ def reset!
45
+ @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
46
+ @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
47
+ end
48
+
40
49
  # Get capabilities supported by this backend
41
50
  #
42
51
  # @return [Hash{Symbol => Object}] capability map
@@ -56,19 +65,117 @@ module TreeHaver
56
65
 
57
66
  # Wrapper for ruby_tree_sitter Language
58
67
  #
59
- # This is a thin pass-through to ::TreeSitter::Language from ruby_tree_sitter.
68
+ # Wraps ::TreeSitter::Language from ruby_tree_sitter to provide a consistent
69
+ # API across all backends.
60
70
  class Language
61
- # Load a language from a shared library path
71
+ include Comparable
72
+
73
+ # The wrapped TreeSitter::Language object
74
+ # @return [::TreeSitter::Language]
75
+ attr_reader :inner_language
76
+
77
+ # The backend this language is for
78
+ # @return [Symbol]
79
+ attr_reader :backend
80
+
81
+ # The path this language was loaded from (if known)
82
+ # @return [String, nil]
83
+ attr_reader :path
84
+
85
+ # The symbol name (if known)
86
+ # @return [String, nil]
87
+ attr_reader :symbol
88
+
89
+ # @api private
90
+ # @param lang [::TreeSitter::Language] the language object from ruby_tree_sitter
91
+ # @param path [String, nil] path language was loaded from
92
+ # @param symbol [String, nil] symbol name
93
+ def initialize(lang, path: nil, symbol: nil)
94
+ @inner_language = lang
95
+ @backend = :mri
96
+ @path = path
97
+ @symbol = symbol
98
+ end
99
+
100
+ # Compare languages for equality
101
+ #
102
+ # MRI languages are equal if they have the same backend, path, and symbol.
103
+ # Path and symbol uniquely identify a loaded language.
104
+ #
105
+ # @param other [Object] object to compare with
106
+ # @return [Integer, nil] -1, 0, 1, or nil if not comparable
107
+ def <=>(other)
108
+ return unless other.is_a?(Language)
109
+ return unless other.backend == @backend
110
+
111
+ # Compare by path first, then symbol
112
+ cmp = (@path || "") <=> (other.path || "")
113
+ return cmp if cmp.nonzero?
114
+
115
+ (@symbol || "") <=> (other.symbol || "")
116
+ end
117
+
118
+ # Hash value for this language (for use in Sets/Hashes)
119
+ # @return [Integer]
120
+ def hash
121
+ [@backend, @path, @symbol].hash
122
+ end
123
+
124
+ # Alias eql? to ==
125
+ alias_method :eql?, :==
126
+
127
+ # Convert to the underlying TreeSitter::Language for passing to parser
128
+ #
129
+ # @return [::TreeSitter::Language]
130
+ def to_language
131
+ @inner_language
132
+ end
133
+ alias_method :to_ts_language, :to_language
134
+
135
+ # Load a language from a shared library (preferred method)
62
136
  #
63
137
  # @param path [String] absolute path to the language shared library
64
- # @return [::TreeSitter::Language] the loaded language handle
138
+ # @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
139
+ # @param name [String, nil] optional language name (unused by MRI backend)
140
+ # @return [Language] wrapped language handle
65
141
  # @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
66
142
  # @example
67
- # lang = TreeHaver::Backends::MRI::Language.from_path("/usr/local/lib/libtree-sitter-toml.so")
143
+ # lang = TreeHaver::Backends::MRI::Language.from_library("/path/to/lib.so", symbol: "tree_sitter_json")
68
144
  class << self
69
- def from_path(path)
145
+ def from_library(path, symbol: nil, name: nil)
70
146
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
71
- ::TreeSitter::Language.load(path)
147
+
148
+ # ruby_tree_sitter's TreeSitter::Language.load takes (language_name, path_to_so)
149
+ # where language_name is the language identifier (e.g., "toml", "json")
150
+ # NOT the full symbol name (e.g., NOT "tree_sitter_toml")
151
+ # and path_to_so is the full path to the .so file
152
+ #
153
+ # If name is not provided, derive it from symbol by stripping "tree_sitter_" prefix
154
+ language_name = name || symbol&.sub(/\Atree_sitter_/, "")
155
+ ts_lang = ::TreeSitter::Language.load(language_name, path)
156
+ new(ts_lang, path: path, symbol: symbol)
157
+ rescue NameError => e
158
+ # TreeSitter constant doesn't exist - backend not loaded
159
+ raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
160
+ rescue Exception => e # rubocop:disable Lint/RescueException
161
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
162
+ # We rescue Exception and check the class name dynamically to avoid NameError
163
+ # at parse time when TreeSitter constant isn't loaded yet
164
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
165
+ raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
166
+ else
167
+ raise # Re-raise if it's not a TreeSitter error
168
+ end
169
+ end
170
+
171
+ # Load a language from a shared library path (legacy method)
172
+ #
173
+ # @param path [String] absolute path to the language shared library
174
+ # @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
175
+ # @return [Language] wrapped language handle
176
+ # @deprecated Use {from_library} instead
177
+ def from_path(path, symbol: nil)
178
+ from_library(path, symbol: symbol)
72
179
  end
73
180
  end
74
181
  end
@@ -83,35 +190,94 @@ module TreeHaver
83
190
  def initialize
84
191
  raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
85
192
  @parser = ::TreeSitter::Parser.new
193
+ rescue NameError => e
194
+ # TreeSitter constant doesn't exist - backend not loaded
195
+ raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
196
+ rescue Exception => e # rubocop:disable Lint/RescueException
197
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
198
+ # We rescue Exception and check the class name dynamically to avoid NameError
199
+ # at parse time when TreeSitter constant isn't loaded yet
200
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
201
+ raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
202
+ else
203
+ raise # Re-raise if it's not a TreeSitter error
204
+ end
86
205
  end
87
206
 
88
207
  # Set the language for this parser
89
208
  #
90
- # @param lang [::TreeSitter::Language] the language to use
209
+ # Note: TreeHaver::Parser unwraps language objects before calling this method.
210
+ # This backend receives raw ::TreeSitter::Language objects, never wrapped ones.
211
+ #
212
+ # @param lang [::TreeSitter::Language] the language to use (already unwrapped)
91
213
  # @return [::TreeSitter::Language] the language that was set
214
+ # @raise [TreeHaver::NotAvailable] if setting language fails
92
215
  def language=(lang)
216
+ # lang is already unwrapped by TreeHaver::Parser, use directly
93
217
  @parser.language = lang
218
+ # Verify it was set
219
+ raise TreeHaver::NotAvailable, "Language not set correctly" if @parser.language.nil?
220
+
221
+ # Return the language object
222
+ lang
223
+ rescue Exception => e # rubocop:disable Lint/RescueException
224
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
225
+ # We rescue Exception and check the class name dynamically to avoid NameError
226
+ # at parse time when TreeSitter constant isn't loaded yet
227
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
228
+ raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
229
+ else
230
+ raise # Re-raise if it's not a TreeSitter error
231
+ end
94
232
  end
95
233
 
96
234
  # Parse source code
97
235
  #
236
+ # ruby_tree_sitter provides parse_string for string input
237
+ #
98
238
  # @param source [String] the source code to parse
99
- # @return [TreeHaver::Tree] wrapped tree
239
+ # @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
240
+ # @raise [TreeHaver::NotAvailable] if parsing returns nil (usually means language not set)
100
241
  def parse(source)
101
- tree = @parser.parse(source)
102
- TreeHaver::Tree.new(tree, source: source)
242
+ # ruby_tree_sitter's parse_string(old_tree, string) method
243
+ # Pass nil for old_tree (initial parse)
244
+ # Return raw tree - TreeHaver::Parser will wrap it
245
+ tree = @parser.parse_string(nil, source)
246
+ raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
247
+ tree
248
+ rescue Exception => e # rubocop:disable Lint/RescueException
249
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
250
+ # We rescue Exception and check the class name dynamically to avoid NameError
251
+ # at parse time when TreeSitter constant isn't loaded yet
252
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
253
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
254
+ else
255
+ raise # Re-raise if it's not a TreeSitter error
256
+ end
103
257
  end
104
258
 
105
259
  # Parse source code with optional incremental parsing
106
260
  #
107
- # @param old_tree [TreeHaver::Tree, nil] previous tree for incremental parsing
261
+ # Note: old_tree should already be unwrapped by TreeHaver::Parser before reaching this method.
262
+ # The backend receives the raw inner tree (::TreeSitter::Tree or nil), not a wrapped TreeHaver::Tree.
263
+ #
264
+ # @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing (already unwrapped)
108
265
  # @param source [String] the source code to parse
109
- # @return [TreeHaver::Tree] wrapped tree
266
+ # @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
267
+ # @raise [TreeHaver::NotAvailable] if parsing fails
110
268
  def parse_string(old_tree, source)
111
- # Unwrap if TreeHaver::Tree to get inner tree for incremental parsing
112
- inner_old_tree = old_tree.respond_to?(:inner_tree) ? old_tree.inner_tree : old_tree
113
- tree = @parser.parse_string(inner_old_tree, source)
114
- TreeHaver::Tree.new(tree, source: source)
269
+ # old_tree is already unwrapped by TreeHaver::Parser, pass it directly
270
+ # Return raw tree - TreeHaver::Parser will wrap it
271
+ @parser.parse_string(old_tree, source)
272
+ rescue Exception => e # rubocop:disable Lint/RescueException
273
+ # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
274
+ # We rescue Exception and check the class name dynamically to avoid NameError
275
+ # at parse time when TreeSitter constant isn't loaded yet
276
+ if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
277
+ raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
278
+ else
279
+ raise # Re-raise if it's not a TreeSitter error
280
+ end
115
281
  end
116
282
  end
117
283
  end