tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,362 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Backends
5
- # MRI backend using the ruby_tree_sitter gem
6
- #
7
- # This backend wraps the ruby_tree_sitter gem, which is a native C extension
8
- # for MRI Ruby. It provides the most feature-complete tree-sitter integration
9
- # on MRI, including support for the Query API.
10
- #
11
- # == Tree/Node Architecture
12
- #
13
- # This backend (like all tree-sitter backends: MRI, Rust, FFI, Java) does NOT
14
- # define its own Tree or Node classes. Instead:
15
- #
16
- # - Parser#parse returns raw `::TreeSitter::Tree` objects
17
- # - These are wrapped by `TreeHaver::Tree` (inherits from `Base::Tree`)
18
- # - `TreeHaver::Tree#root_node` wraps raw nodes in `TreeHaver::Node`
19
- #
20
- # This differs from pure-Ruby backends (Citrus, Prism, Psych) which define
21
- # their own `Backend::X::Tree` and `Backend::X::Node` classes.
22
- #
23
- # @see TreeHaver::Tree The wrapper class for tree-sitter Tree objects
24
- # @see TreeHaver::Node The wrapper class for tree-sitter Node objects
25
- # @see TreeHaver::Base::Tree Base class documenting the Tree API contract
26
- # @see TreeHaver::Base::Node Base class documenting the Node API contract
27
- #
28
- # == Platform Compatibility
29
- #
30
- # - MRI Ruby: ✓ Full support (fastest tree-sitter backend on MRI)
31
- # - JRuby: ✗ Cannot load native C extensions (runs on JVM)
32
- # - TruffleRuby: ✗ C extension not compatible with TruffleRuby
33
- #
34
- # @see https://github.com/Faveod/ruby-tree-sitter ruby_tree_sitter
35
- module MRI
36
- @load_attempted = false
37
- @loaded = false
38
-
39
- # Check if the MRI backend is available
40
- #
41
- # Attempts to require ruby_tree_sitter on first call and caches the result.
42
- #
43
- # @note When this method returns true, the FFI backend becomes permanently
44
- # unavailable for the remainder of the process. This is because loading
45
- # ruby_tree_sitter defines `::TreeSitter::Parser`, which the FFI backend
46
- # checks to detect conflicts. The MRI backend statically links tree-sitter,
47
- # while FFI dynamically links libtree-sitter.so - when both are loaded,
48
- # FFI will segfault when trying to set a language on a parser due to
49
- # incompatible pointer types from different tree-sitter builds.
50
- #
51
- # @return [Boolean] true if ruby_tree_sitter is available
52
- # @see TreeHaver::Backends::FFI.available? FFI availability check
53
- # @example
54
- # if TreeHaver::Backends::MRI.available?
55
- # puts "MRI backend is ready"
56
- # # Note: FFI backend is now blocked for this process
57
- # end
58
- class << self
59
- def available?
60
- return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
61
- @load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
62
- begin
63
- # ruby_tree_sitter is a C extension that only works on MRI
64
- # It doesn't work on JRuby or TruffleRuby
65
- if RUBY_ENGINE == "ruby"
66
- require "tree_sitter"
67
- @loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
68
- else
69
- # :nocov: only runs on non-MRI engines (JRuby, TruffleRuby)
70
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
71
- # :nocov:
72
- end
73
- rescue LoadError
74
- # :nocov: only runs when ruby_tree_sitter gem is not installed
75
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
76
- # :nocov:
77
- rescue StandardError
78
- # :nocov: defensive code - StandardError during require is extremely rare
79
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
80
- # :nocov:
81
- end
82
- @loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
83
- end
84
-
85
- # Reset the load state (primarily for testing)
86
- #
87
- # @return [void]
88
- # @api private
89
- def reset!
90
- @load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
91
- @loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
92
- end
93
-
94
- # Get capabilities supported by this backend
95
- #
96
- # @return [Hash{Symbol => Object}] capability map
97
- # @example
98
- # TreeHaver::Backends::MRI.capabilities
99
- # # => { backend: :mri, query: true, bytes_field: true, incremental: true }
100
- def capabilities
101
- return {} unless available?
102
- {
103
- backend: :mri,
104
- query: true,
105
- bytes_field: true,
106
- incremental: true,
107
- }
108
- end
109
- end
110
-
111
- # Wrapper for ruby_tree_sitter Language
112
- #
113
- # Wraps ::TreeSitter::Language from ruby_tree_sitter to provide a consistent
114
- # API across all backends.
115
- class Language
116
- include Comparable
117
-
118
- # The wrapped TreeSitter::Language object
119
- # @return [::TreeSitter::Language]
120
- attr_reader :inner_language
121
-
122
- # The backend this language is for
123
- # @return [Symbol]
124
- attr_reader :backend
125
-
126
- # The path this language was loaded from (if known)
127
- # @return [String, nil]
128
- attr_reader :path
129
-
130
- # The symbol name (if known)
131
- # @return [String, nil]
132
- attr_reader :symbol
133
-
134
- # @api private
135
- # @param lang [::TreeSitter::Language] the language object from ruby_tree_sitter
136
- # @param path [String, nil] path language was loaded from
137
- # @param symbol [String, nil] symbol name
138
- def initialize(lang, path: nil, symbol: nil)
139
- @inner_language = lang
140
- @backend = :mri
141
- @path = path
142
- @symbol = symbol
143
- end
144
-
145
- # Get the language name
146
- #
147
- # Derives a name from the symbol or path.
148
- #
149
- # @return [Symbol] language name
150
- def language_name
151
- # Try to derive from symbol (e.g., "tree_sitter_toml" -> :toml)
152
- if @symbol
153
- name = @symbol.to_s.sub(/^tree_sitter_/, "")
154
- return name.to_sym
155
- end
156
-
157
- # Try to derive from path (e.g., "/path/to/libtree-sitter-toml.so" -> :toml)
158
- if @path
159
- name = LibraryPathUtils.derive_language_name_from_path(@path)
160
- return name.to_sym if name
161
- end
162
-
163
- :unknown
164
- end
165
-
166
- # Alias for language_name (API compatibility)
167
- alias_method :name, :language_name
168
-
169
- # Compare languages for equality
170
- #
171
- # MRI languages are equal if they have the same backend, path, and symbol.
172
- # Path and symbol uniquely identify a loaded language.
173
- #
174
- # @param other [Object] object to compare with
175
- # @return [Integer, nil] -1, 0, 1, or nil if not comparable
176
- def <=>(other)
177
- return unless other.is_a?(Language)
178
- return unless other.backend == @backend
179
-
180
- # Compare by path first, then symbol
181
- cmp = (@path || "") <=> (other.path || "")
182
- return cmp if cmp.nonzero?
183
-
184
- (@symbol || "") <=> (other.symbol || "")
185
- end
186
-
187
- # Hash value for this language (for use in Sets/Hashes)
188
- # @return [Integer]
189
- def hash
190
- [@backend, @path, @symbol].hash
191
- end
192
-
193
- # Alias eql? to ==
194
- alias_method :eql?, :==
195
-
196
- # Convert to the underlying TreeSitter::Language for passing to parser
197
- #
198
- # @return [::TreeSitter::Language]
199
- def to_language
200
- @inner_language
201
- end
202
- alias_method :to_ts_language, :to_language
203
-
204
- # Load a language from a shared library (preferred method)
205
- #
206
- # @param path [String] absolute path to the language shared library
207
- # @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
208
- # @param name [String, nil] optional language name (unused by MRI backend)
209
- # @return [Language] wrapped language handle
210
- # @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
211
- # @example
212
- # lang = TreeHaver::Backends::MRI::Language.from_library("/path/to/lib.so", symbol: "tree_sitter_json")
213
- class << self
214
- def from_library(path, symbol: nil, name: nil)
215
- # Derive symbol from path if not provided using shared utility
216
- symbol ||= LibraryPathUtils.derive_symbol_from_path(path)
217
- from_path(path, symbol: symbol, name: name)
218
- end
219
-
220
- private
221
-
222
- # Load a language from a shared library path (internal implementation)
223
- #
224
- # @param path [String] absolute path to the language shared library
225
- # @param symbol [String] the exported symbol name (e.g., "tree_sitter_json")
226
- # @param name [String, nil] optional language name
227
- # @return [Language] wrapped language handle
228
- # @api private
229
- def from_path(path, symbol: nil, name: nil)
230
- raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
231
-
232
- # ruby_tree_sitter's TreeSitter::Language.load takes (language_name, path_to_so)
233
- # where language_name is the language identifier (e.g., "toml", "json")
234
- # NOT the full symbol name (e.g., NOT "tree_sitter_toml")
235
- # and path_to_so is the full path to the .so file
236
- #
237
- # If name is not provided, derive it from symbol using shared utility
238
- language_name = name || LibraryPathUtils.derive_language_name_from_symbol(symbol)
239
- ts_lang = ::TreeSitter::Language.load(language_name, path)
240
- new(ts_lang, path: path, symbol: symbol)
241
- rescue NameError => e
242
- # TreeSitter constant doesn't exist - backend not loaded
243
- raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
244
- rescue Exception => e # rubocop:disable Lint/RescueException
245
- # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
246
- # We rescue Exception and check the class name dynamically to avoid NameError
247
- # at parse time when TreeSitter constant isn't loaded yet
248
- if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
249
- raise TreeHaver::NotAvailable, "Could not load language: #{e.message}"
250
- else
251
- raise # Re-raise if it's not a TreeSitter error
252
- end
253
- end
254
- end
255
- end
256
-
257
- # Wrapper for ruby_tree_sitter Parser
258
- #
259
- # This is a thin pass-through to ::TreeSitter::Parser from ruby_tree_sitter.
260
- class Parser
261
- # Create a new parser instance
262
- #
263
- # @raise [TreeHaver::NotAvailable] if ruby_tree_sitter is not available
264
- def initialize
265
- raise TreeHaver::NotAvailable, "ruby_tree_sitter not available" unless MRI.available?
266
- @parser = ::TreeSitter::Parser.new
267
- rescue NameError => e
268
- # TreeSitter constant doesn't exist - backend not loaded
269
- raise TreeHaver::NotAvailable, "ruby_tree_sitter not available: #{e.message}"
270
- rescue Exception => e # rubocop:disable Lint/RescueException
271
- # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
272
- # We rescue Exception and check the class name dynamically to avoid NameError
273
- # at parse time when TreeSitter constant isn't loaded yet
274
- if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
275
- raise TreeHaver::NotAvailable, "Could not create parser: #{e.message}"
276
- else
277
- raise # Re-raise if it's not a TreeSitter error
278
- end
279
- end
280
-
281
- # Set the language for this parser
282
- #
283
- # @param lang [::TreeSitter::Language, TreeHaver::Backends::MRI::Language] the language to use
284
- # @return [::TreeSitter::Language, TreeHaver::Backends::MRI::Language] the language that was set
285
- # @raise [TreeHaver::NotAvailable] if setting language fails
286
- def language=(lang)
287
- # Unwrap if it's a TreeHaver wrapper
288
- inner_lang = lang.respond_to?(:inner_language) ? lang.inner_language : lang
289
- @parser.language = inner_lang
290
- # Verify it was set
291
- raise TreeHaver::NotAvailable, "Language not set correctly" if @parser.language.nil?
292
-
293
- # Return the original language object (wrapped or unwrapped)
294
- lang
295
- rescue Exception => e # rubocop:disable Lint/RescueException
296
- # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
297
- # We rescue Exception and check the class name dynamically to avoid NameError
298
- # at parse time when TreeSitter constant isn't loaded yet
299
- if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
300
- raise TreeHaver::NotAvailable, "Could not set language: #{e.message}"
301
- else
302
- raise # Re-raise if it's not a TreeSitter error
303
- end
304
- end
305
-
306
- # Parse source code
307
- #
308
- # ruby_tree_sitter provides parse_string for string input
309
- #
310
- # @param source [String] the source code to parse
311
- # @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
312
- # @raise [TreeHaver::NotAvailable] if parsing returns nil (usually means language not set)
313
- def parse(source)
314
- # ruby_tree_sitter's parse_string(old_tree, string) method
315
- # Pass nil for old_tree (initial parse)
316
- # Return raw tree - TreeHaver::Parser will wrap it
317
- tree = @parser.parse_string(nil, source)
318
- raise TreeHaver::NotAvailable, "Parse returned nil - is language set?" if tree.nil?
319
- tree
320
- rescue Exception => e # rubocop:disable Lint/RescueException
321
- # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
322
- # We rescue Exception and check the class name dynamically to avoid NameError
323
- # at parse time when TreeSitter constant isn't loaded yet
324
- if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
325
- raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
326
- else
327
- raise # Re-raise if it's not a TreeSitter error
328
- end
329
- end
330
-
331
- # Parse source code with optional incremental parsing
332
- #
333
- # Note: old_tree should already be unwrapped by TreeHaver::Parser before reaching this method.
334
- # The backend receives the raw inner tree (::TreeSitter::Tree or nil), not a wrapped TreeHaver::Tree.
335
- #
336
- # @param old_tree [::TreeSitter::Tree, nil] previous tree for incremental parsing (already unwrapped)
337
- # @param source [String] the source code to parse
338
- # @return [::TreeSitter::Tree] raw tree (NOT wrapped - wrapping happens in TreeHaver::Parser)
339
- # @raise [TreeHaver::NotAvailable] if parsing fails
340
- def parse_string(old_tree, source)
341
- # old_tree is already unwrapped by TreeHaver::Parser, pass it directly
342
- # Return raw tree - TreeHaver::Parser will wrap it
343
- @parser.parse_string(old_tree, source)
344
- rescue Exception => e # rubocop:disable Lint/RescueException
345
- # TreeSitter errors inherit from Exception (not StandardError) in ruby_tree_sitter v2+
346
- # We rescue Exception and check the class name dynamically to avoid NameError
347
- # at parse time when TreeSitter constant isn't loaded yet
348
- if defined?(TreeSitter::TreeSitterError) && e.is_a?(TreeSitter::TreeSitterError)
349
- raise TreeHaver::NotAvailable, "Could not parse source: #{e.message}"
350
- else
351
- raise # Re-raise if it's not a TreeSitter error
352
- end
353
- end
354
- end
355
-
356
- # Register the availability checker for RSpec dependency tags
357
- TreeHaver::BackendRegistry.register_availability_checker(:mri) do
358
- available?
359
- end
360
- end
361
- end
362
- end