tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
data/lib/tree_haver.rb CHANGED
@@ -1,1322 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # External gems
4
- require "version_gem"
5
-
6
- # Standard library
7
- require "set"
8
-
9
- # This gem - only version can be required (never autoloaded)
10
3
  require_relative "tree_haver/version"
4
+ require_relative "tree_haver/backend_registry"
5
+ require_relative "tree_haver/backend_context"
6
+ require_relative "tree_haver/contracts"
7
+ require_relative "tree_haver/peg_backends"
8
+ require_relative "tree_haver/kaitai_backend"
9
+ require_relative "tree_haver/language_pack"
11
10
 
12
- # TreeHaver is a cross-Ruby adapter for code parsing with 10 backends.
13
- #
14
- # Provides a unified API for parsing source code across MRI Ruby, JRuby, and TruffleRuby
15
- # using tree-sitter grammars or language-specific native parsers.
16
- #
17
- # == Backends
18
- #
19
- # Supports 9 backends:
20
- # - Tree-sitter: MRI (C), Rust, FFI, Java
21
- # - Native parsers: Prism (Ruby), Psych (YAML), Commonmarker (Markdown), Markly (GFM)
22
- # - Pure Ruby: Citrus (portable fallback)
23
- #
24
- # == Platform Compatibility
25
- #
26
- # Not all backends work on all Ruby platforms:
27
- #
28
- # | Backend | MRI | JRuby | TruffleRuby |
29
- # |--------------|-----|-------|-------------|
30
- # | MRI (C ext) | ✓ | ✗ | ✗ |
31
- # | Rust | ✓ | ✗ | ✗ |
32
- # | FFI | ✓ | ✓ | ✗ |
33
- # | Java | ✗ | ✓ | ✗ |
34
- # | Prism | ✓ | ✓ | ✓ |
35
- # | Psych | ✓ | ✓ | ✓ |
36
- # | Citrus | ✓ | ✓ | ✓ |
37
- # | Commonmarker | ✓ | ✗ | ? |
38
- # | Markly | ✓ | ✗ | ? |
39
- #
40
- # - JRuby: Cannot load native C/Rust extensions; use FFI, Java, or pure Ruby backends
41
- # - TruffleRuby: FFI doesn't support STRUCT_BY_VALUE; magnus/rb-sys incompatible with C API;
42
- # use Prism, Psych, Citrus, or potentially Commonmarker/Markly
43
- #
44
- # @example Basic usage with tree-sitter
45
- # # Load a language grammar
46
- # language = TreeHaver::Language.from_library(
47
- # "/usr/local/lib/libtree-sitter-toml.so",
48
- # symbol: "tree_sitter_toml"
49
- # )
50
- #
51
- # # Create and configure a parser
52
- # parser = TreeHaver::Parser.new
53
- # parser.language = language
54
- #
55
- # # Parse source code
56
- # tree = parser.parse("[package]\nname = \"my-app\"")
57
- # root = tree.root_node
58
- #
59
- # # Use unified Position API (works across all backends)
60
- # puts root.start_line # => 1 (1-based)
61
- # puts root.source_position # => {start_line:, end_line:, start_column:, end_column:}
62
- #
63
- # @example Using language-specific backends
64
- # # Parse Ruby with Prism
65
- # TreeHaver.backend = :prism
66
- # parser = TreeHaver::Parser.new
67
- # parser.language = TreeHaver::Backends::Prism::Language.ruby
68
- # tree = parser.parse("class Example; end")
69
- #
70
- # # Parse YAML with Psych
71
- # TreeHaver.backend = :psych
72
- # parser = TreeHaver::Parser.new
73
- # parser.language = TreeHaver::Backends::Psych::Language.yaml
74
- # tree = parser.parse("key: value")
75
- #
76
- # # Parse Markdown with Commonmarker
77
- # TreeHaver.backend = :commonmarker
78
- # parser = TreeHaver::Parser.new
79
- # parser.language = TreeHaver::Backends::Commonmarker::Language.markdown
80
- # tree = parser.parse("# Heading\nParagraph")
81
- #
82
- # @example Using language registration
83
- # TreeHaver.register_language(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
84
- # language = TreeHaver::Language.toml
85
- #
86
- # @example Using GrammarFinder for automatic discovery
87
- # # GrammarFinder automatically locates grammar libraries on the system
88
- # finder = TreeHaver::GrammarFinder.new(:toml)
89
- # finder.register! if finder.available?
90
- # language = TreeHaver::Language.toml
91
- #
92
- # @example Selecting a backend
93
- # TreeHaver.backend = :mri # Force MRI (ruby_tree_sitter)
94
- # TreeHaver.backend = :rust # Force Rust (tree_stump)
95
- # TreeHaver.backend = :ffi # Force FFI
96
- # TreeHaver.backend = :java # Force Java (JRuby)
97
- # TreeHaver.backend = :prism # Force Prism (Ruby)
98
- # TreeHaver.backend = :psych # Force Psych (YAML)
99
- # TreeHaver.backend = :commonmarker # Force Commonmarker (Markdown)
100
- # TreeHaver.backend = :markly # Force Markly (GFM)
101
- # TreeHaver.backend = :citrus # Force Citrus (pure Ruby)
102
- # TreeHaver.backend = :auto # Auto-select (default)
103
- #
104
- # @see https://tree-sitter.github.io/tree-sitter/ tree-sitter documentation
105
- # @see GrammarFinder For automatic grammar library discovery
106
- # @see Backends For available parsing backends
107
11
  module TreeHaver
108
- # Autoload internal modules
109
- autoload :LibraryPathUtils, File.join(__dir__, "tree_haver", "library_path_utils")
110
- autoload :LanguageRegistry, File.join(__dir__, "tree_haver", "language_registry")
111
- autoload :BackendAPI, File.join(__dir__, "tree_haver", "backend_api")
112
- autoload :BackendRegistry, File.join(__dir__, "tree_haver", "backend_registry")
113
-
114
- # Base classes for backend implementations
115
- autoload :Base, File.join(__dir__, "tree_haver", "base")
116
-
117
- # Base error class for TreeHaver exceptions
118
- # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
119
- #
120
- # @abstract Subclass to create specific error types
121
- class Error < Exception; end # rubocop:disable Lint/InheritException
122
-
123
- # Raised when a requested backend or feature is not available
124
- # These are serious errors that extends Exception (not StandardError).
125
- # @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
126
- #
127
- # This can occur when:
128
- # - Required native libraries are not installed
129
- # - The selected backend is not compatible with the current Ruby implementation
130
- # - A language grammar cannot be loaded
131
- #
132
- # @example Handling unavailable backends
133
- # begin
134
- # language = TreeHaver::Language.from_library("/path/to/grammar.so")
135
- # rescue TreeHaver::NotAvailable => e
136
- # puts "Grammar not available: #{e.message}"
137
- # end
138
- class NotAvailable < Error; end
139
-
140
- # Raised when attempting to use backends that are known to conflict
141
- #
142
- # This is a serious error that extends Exception (not StandardError) because
143
- # it prevents a segmentation fault. The MRI backend (ruby_tree_sitter) and
144
- # FFI backend cannot coexist in the same process - once MRI loads, FFI will
145
- # segfault when trying to set a language on a parser.
146
- #
147
- # This protection can be disabled with `TreeHaver.backend_protect = false`
148
- # but doing so risks segfaults.
149
- #
150
- # @example Handling backend conflicts
151
- # begin
152
- # # This will raise if MRI was already used
153
- # TreeHaver.with_backend(:ffi) { parser.language = lang }
154
- # rescue TreeHaver::BackendConflict => e
155
- # puts "Backend conflict: #{e.message}"
156
- # # Fall back to a compatible backend
157
- # end
158
- #
159
- # @example Disabling protection (not recommended)
160
- # TreeHaver.backend_protect = false
161
- # # Now you can test backend conflicts (at risk of segfaults)
162
- class BackendConflict < Error; end
163
-
164
- # Default Citrus configurations for known languages
165
- #
166
- # These are used by {TreeHaver.parser_for} when no explicit citrus_config is provided
167
- # and tree-sitter backends are not available (e.g., on TruffleRuby).
168
- #
169
- # @api private
170
- CITRUS_DEFAULTS = {
171
- toml: {
172
- gem_name: "toml-rb",
173
- grammar_const: "TomlRB::Document",
174
- require_path: "toml-rb",
175
- },
176
- }.freeze
177
-
178
- # Default Parslet configurations for known languages
179
- #
180
- # These are used by {TreeHaver.parser_for} when no explicit parslet_config is provided
181
- # and tree-sitter backends are not available (e.g., on TruffleRuby).
182
- #
183
- # @api private
184
- PARSLET_DEFAULTS = {
185
- toml: {
186
- gem_name: "toml",
187
- grammar_const: "TOML::Parslet",
188
- require_path: "toml",
189
- },
190
- }.freeze
191
-
192
- # Namespace for backend implementations
193
- #
194
- # TreeHaver provides multiple backends to support different Ruby implementations:
195
- # - {Backends::MRI} - Uses ruby_tree_sitter (MRI C extension)
196
- # - {Backends::Rust} - Uses tree_stump (Rust extension with precompiled binaries)
197
- # - {Backends::FFI} - Uses Ruby FFI to call libtree-sitter directly
198
- # - {Backends::Java} - Uses JRuby's Java integration
199
- # - {Backends::Citrus} - Uses Citrus PEG parser (pure Ruby, portable)
200
- # - {Backends::Parslet} - Uses Parslet PEG parser (pure Ruby, portable)
201
- # - {Backends::Prism} - Uses Ruby's built-in Prism parser (Ruby-only, stdlib in 3.4+)
202
- # - {Backends::Psych} - Uses Ruby's built-in Psych parser (YAML-only, stdlib)
203
- module Backends
204
- autoload :MRI, File.join(__dir__, "tree_haver", "backends", "mri")
205
- autoload :Rust, File.join(__dir__, "tree_haver", "backends", "rust")
206
- autoload :FFI, File.join(__dir__, "tree_haver", "backends", "ffi")
207
- autoload :Java, File.join(__dir__, "tree_haver", "backends", "java")
208
- autoload :Citrus, File.join(__dir__, "tree_haver", "backends", "citrus")
209
- autoload :Parslet, File.join(__dir__, "tree_haver", "backends", "parslet")
210
- autoload :Prism, File.join(__dir__, "tree_haver", "backends", "prism")
211
- autoload :Psych, File.join(__dir__, "tree_haver", "backends", "psych")
212
-
213
- # Maps each backend to an array of backends that block it from working.
214
- # For example, :ffi is blocked by :mri because once ruby_tree_sitter loads,
215
- # FFI calls to ts_parser_set_language will segfault.
216
- #
217
- # @return [Hash{Symbol => Array<Symbol>}]
218
- BLOCKED_BY = {
219
- mri: [],
220
- rust: [],
221
- ffi: [:mri], # FFI segfaults if MRI (ruby_tree_sitter) has been loaded
222
- java: [],
223
- citrus: [],
224
- parslet: [], # Parslet has no conflicts with other backends
225
- prism: [], # Prism has no conflicts with other backends
226
- psych: [], # Psych has no conflicts with other backends
227
- }.freeze
228
-
229
- # Pure Ruby backends that parse specific languages
230
- # These are language-specific and register themselves via LanguageRegistry
231
- #
232
- # @return [Hash{Symbol => Hash}] Maps backend name to language and module info
233
- PURE_RUBY_BACKENDS = {
234
- prism: {language: :ruby, module_name: "Prism"},
235
- psych: {language: :yaml, module_name: "Psych"},
236
- }.freeze
237
- end
238
-
239
- # Security utilities for validating paths before loading shared libraries
240
- #
241
- # @example Validate a path
242
- # TreeHaver::PathValidator.safe_library_path?("/usr/lib/libtree-sitter-toml.so")
243
- # # => true
244
- #
245
- # @see PathValidator
246
- autoload :PathValidator, File.join(__dir__, "tree_haver", "path_validator")
247
-
248
- # Generic grammar finder utility with built-in security validations
249
- #
250
- # GrammarFinder provides platform-aware discovery of tree-sitter grammar
251
- # libraries for any language. It validates paths from environment variables
252
- # to prevent path traversal and other attacks.
253
- #
254
- # @example Find and register a language
255
- # finder = TreeHaver::GrammarFinder.new(:toml)
256
- # finder.register! if finder.available?
257
- # language = TreeHaver::Language.toml
258
- #
259
- # @example Secure mode (trusted directories only)
260
- # finder = TreeHaver::GrammarFinder.new(:toml)
261
- # path = finder.find_library_path_safe # Ignores ENV, only trusted dirs
262
- #
263
- # @see GrammarFinder
264
- # @see PathValidator
265
- autoload :GrammarFinder, File.join(__dir__, "tree_haver", "grammar_finder")
266
-
267
- # Citrus grammar finder for discovering and registering Citrus-based parsers
268
- #
269
- # @example Register toml-rb
270
- # finder = TreeHaver::CitrusGrammarFinder.new(
271
- # language: :toml,
272
- # gem_name: "toml-rb",
273
- # grammar_const: "TomlRB::Document"
274
- # )
275
- # finder.register! if finder.available?
276
- #
277
- # @see CitrusGrammarFinder
278
- autoload :CitrusGrammarFinder, File.join(__dir__, "tree_haver", "citrus_grammar_finder")
279
-
280
- # Parslet grammar finder for discovering and registering Parslet-based parsers
281
- #
282
- # @example Register toml gem
283
- # finder = TreeHaver::ParsletGrammarFinder.new(
284
- # language: :toml,
285
- # gem_name: "toml",
286
- # grammar_const: "TOML::Parslet"
287
- # )
288
- # finder.register! if finder.available?
289
- #
290
- # @see ParsletGrammarFinder
291
- autoload :ParsletGrammarFinder, File.join(__dir__, "tree_haver", "parslet_grammar_finder")
292
-
293
- # Point class for position information (row, column)
294
- autoload :Point, File.join(__dir__, "tree_haver", "point")
295
-
296
- # Unified Node wrapper providing consistent API across backends
297
- autoload :Node, File.join(__dir__, "tree_haver", "node")
298
-
299
- # Unified Tree wrapper providing consistent API across backends
300
- autoload :Tree, File.join(__dir__, "tree_haver", "tree")
301
-
302
- # Language class for loading grammar shared libraries
303
- autoload :Language, File.join(__dir__, "tree_haver", "language")
304
-
305
- # Parser class for parsing source code into syntax trees
306
- autoload :Parser, File.join(__dir__, "tree_haver", "parser")
307
-
308
- # Native tree-sitter backends that support loading shared libraries (.so files)
309
- # These backends wrap the tree-sitter C library via various bindings.
310
- # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are excluded.
311
- NATIVE_BACKENDS = %i[mri rust ffi java].freeze
312
-
313
- # Get the current backend selection
314
- #
315
- # @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
316
- # @note Can be set via ENV["TREE_HAVER_BACKEND"]
317
- class << self
318
- # Whether backend conflict protection is enabled
319
- #
320
- # When true (default), TreeHaver will raise BackendConflict if you try to
321
- # use a backend that is known to conflict with a previously used backend.
322
- # For example, FFI will not work after MRI has been used.
323
- #
324
- # Set to false to disable protection (useful for testing compatibility).
325
- #
326
- # @return [Boolean]
327
- # @example Disable protection for testing
328
- # TreeHaver.backend_protect = false
329
- def backend_protect=(value)
330
- @backend_protect_mutex ||= Mutex.new
331
- @backend_protect_mutex.synchronize { @backend_protect = value }
332
- end
333
-
334
- # Check if backend conflict protection is enabled
335
- #
336
- # @return [Boolean] true if protection is enabled (default)
337
- def backend_protect?
338
- return @backend_protect if defined?(@backend_protect) # rubocop:disable ThreadSafety/ClassInstanceVariable
339
- true # Default is protected
340
- end
341
-
342
- # Alias for backend_protect?
343
- def backend_protect
344
- backend_protect?
345
- end
346
-
347
- # Track which backends have been used in this process
348
- #
349
- # @return [Set<Symbol>] set of backend symbols that have been used
350
- def backends_used
351
- @backends_used ||= Set.new # rubocop:disable ThreadSafety/ClassInstanceVariable
352
- end
353
-
354
- # Record that a backend has been used
355
- #
356
- # @param backend [Symbol] the backend that was used
357
- # @return [void]
358
- # @api private
359
- def record_backend_usage(backend)
360
- backends_used << backend
361
- end
362
-
363
- # Check if a backend would conflict with previously used backends
364
- #
365
- # @param backend [Symbol] the backend to check
366
- # @return [Array<Symbol>] list of previously used backends that block this one
367
- def conflicting_backends_for(backend)
368
- blockers = Backends::BLOCKED_BY[backend] || []
369
- blockers & backends_used.to_a
370
- end
371
-
372
- # Check if using a backend would cause a conflict
373
- #
374
- # @param backend [Symbol] the backend to check
375
- # @raise [BackendConflict] if protection is enabled and there's a conflict
376
- # @return [void]
377
- def check_backend_conflict!(backend)
378
- return unless backend_protect?
379
-
380
- conflicts = conflicting_backends_for(backend)
381
- return if conflicts.empty?
382
-
383
- raise BackendConflict,
384
- "Cannot use #{backend} backend: it is blocked by previously used backend(s): #{conflicts.join(", ")}. " \
385
- "The #{backend} backend will segfault when #{conflicts.first} has already loaded. " \
386
- "To disable this protection (at risk of segfaults), set TreeHaver.backend_protect = false"
387
- end
388
-
389
- # @example
390
- # TreeHaver.backend # => :auto
391
- def backend
392
- return @backend if defined?(@backend) && @backend # rubocop:disable ThreadSafety/ClassInstanceVariable
393
-
394
- @backend = parse_single_backend_env # rubocop:disable ThreadSafety/ClassInstanceVariable
395
- end
396
-
397
- # Valid native backend names (require native extensions)
398
- VALID_NATIVE_BACKENDS = %w[mri rust ffi java].freeze
399
-
400
- # Valid pure Ruby backend names (no native extensions)
401
- VALID_RUBY_BACKENDS = %w[citrus parslet prism psych commonmarker markly].freeze
402
-
403
- # All valid backend names
404
- VALID_BACKENDS = (VALID_NATIVE_BACKENDS + VALID_RUBY_BACKENDS + %w[auto none]).freeze
405
-
406
- # Get allowed native backends from TREE_HAVER_NATIVE_BACKEND environment variable
407
- #
408
- # Supports comma-separated values like "mri,ffi".
409
- # Special values:
410
- # - "auto" or empty/unset: automatically select from available native backends
411
- # - "none": no native backends allowed (pure Ruby only)
412
- #
413
- # @return [Array<Symbol>] list of allowed native backend symbols, or [:auto] or [:none]
414
- # @example Allow only MRI and FFI
415
- # # TREE_HAVER_NATIVE_BACKEND=mri,ffi
416
- # TreeHaver.allowed_native_backends # => [:mri, :ffi]
417
- # @example Auto-select native backends (default)
418
- # # TREE_HAVER_NATIVE_BACKEND not set, empty, or "auto"
419
- # TreeHaver.allowed_native_backends # => [:auto]
420
- # @example Disable all native backends
421
- # # TREE_HAVER_NATIVE_BACKEND=none
422
- # TreeHaver.allowed_native_backends # => [:none]
423
- def allowed_native_backends
424
- @allowed_native_backends ||= parse_backend_list_env("TREE_HAVER_NATIVE_BACKEND", VALID_NATIVE_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
425
- end
426
-
427
- # Get allowed Ruby backends from TREE_HAVER_RUBY_BACKEND environment variable
428
- #
429
- # Supports comma-separated values like "citrus,prism".
430
- # Special values:
431
- # - "auto" or empty/unset: automatically select from available Ruby backends
432
- # - "none": no Ruby backends allowed (native only)
433
- #
434
- # @return [Array<Symbol>] list of allowed Ruby backend symbols, or [:auto] or [:none]
435
- # @example Allow only Citrus
436
- # # TREE_HAVER_RUBY_BACKEND=citrus
437
- # TreeHaver.allowed_ruby_backends # => [:citrus]
438
- # @example Auto-select Ruby backends (default)
439
- # # TREE_HAVER_RUBY_BACKEND not set, empty, or "auto"
440
- # TreeHaver.allowed_ruby_backends # => [:auto]
441
- def allowed_ruby_backends
442
- @allowed_ruby_backends ||= parse_backend_list_env("TREE_HAVER_RUBY_BACKEND", VALID_RUBY_BACKENDS) # rubocop:disable ThreadSafety/ClassInstanceVariable
443
- end
444
-
445
- # Check if a specific backend is allowed based on environment variables
446
- #
447
- # Checks TREE_HAVER_NATIVE_BACKEND for native backends and
448
- # TREE_HAVER_RUBY_BACKEND for pure Ruby backends.
449
- #
450
- # @param backend_name [Symbol, String] the backend to check
451
- # @return [Boolean] true if the backend is allowed
452
- # @example
453
- # # TREE_HAVER_NATIVE_BACKEND=mri
454
- # TreeHaver.backend_allowed?(:mri) # => true
455
- # TreeHaver.backend_allowed?(:ffi) # => false
456
- # TreeHaver.backend_allowed?(:citrus) # => true (Ruby backends use separate env var)
457
- def backend_allowed?(backend_name)
458
- backend_sym = backend_name.to_sym
459
-
460
- # Check if it's a native backend
461
- if VALID_NATIVE_BACKENDS.include?(backend_sym.to_s)
462
- allowed = allowed_native_backends
463
- return true if allowed == [:auto]
464
- return false if allowed == [:none]
465
- return allowed.include?(backend_sym)
466
- end
467
-
468
- # Check if it's a Ruby backend
469
- if VALID_RUBY_BACKENDS.include?(backend_sym.to_s)
470
- allowed = allowed_ruby_backends
471
- return true if allowed == [:auto]
472
- return false if allowed == [:none]
473
- return allowed.include?(backend_sym)
474
- end
475
-
476
- # Unknown backend or :auto - allow
477
- true
478
- end
479
-
480
- # Set the backend to use
481
- #
482
- # @param name [Symbol, String, nil] backend name (:auto, :mri, :rust, :ffi, :java, :citrus)
483
- # @return [Symbol, nil] the backend that was set
484
- # @example Force FFI backend
485
- # TreeHaver.backend = :ffi
486
- # @example Force Rust backend
487
- # TreeHaver.backend = :rust
488
- def backend=(name)
489
- @backend = name&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
490
- end
491
-
492
- # Reset backend selection memoization
493
- #
494
- # Primarily useful in tests to switch backends without cross-example leakage.
495
- #
496
- # @param to [Symbol, String, nil] backend name or nil to clear (defaults to :auto)
497
- # @return [void]
498
- # @example Reset to auto-selection
499
- # TreeHaver.reset_backend!
500
- # @example Reset to specific backend
501
- # TreeHaver.reset_backend!(to: :ffi)
502
- def reset_backend!(to: :auto)
503
- @backend = to&.to_sym # rubocop:disable ThreadSafety/ClassInstanceVariable
504
- @allowed_native_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
505
- @allowed_ruby_backends = nil # rubocop:disable ThreadSafety/ClassInstanceVariable
506
- end
507
-
508
- # Register built-in pure Ruby backends in the LanguageRegistry
509
- #
510
- # This registers Prism, Psych, Commonmarker, and Markly using the same
511
- # registration API that external backends use. This ensures consistent
512
- # behavior whether a backend is built-in or provided by an external gem.
513
- #
514
- # Called automatically when TreeHaver is first used, but can be called
515
- # manually in tests or when reset! has cleared the registry.
516
- #
517
- # @return [void]
518
- # @example Manual registration (usually not needed)
519
- # TreeHaver.register_builtin_backends!
520
- def register_builtin_backends!
521
- Backends::PURE_RUBY_BACKENDS.each do |backend_type, info|
522
- language = info[:language]
523
- module_name = info[:module_name]
524
-
525
- # Get the backend module
526
- backend_mod = Backends.const_get(module_name)
527
- next unless backend_mod
528
-
529
- # Register if available (lazy check - doesn't require the gem yet)
530
- LanguageRegistry.register(
531
- language,
532
- backend_type,
533
- backend_module: backend_mod,
534
- gem_name: module_name.downcase,
535
- )
536
- end
537
- end
538
-
539
- # Check if built-in backends have been registered
540
- #
541
- # @return [Boolean]
542
- # @api private
543
- def builtin_backends_registered?
544
- @builtin_backends_registered ||= false # rubocop:disable ThreadSafety/ClassInstanceVariable
545
- end
546
-
547
- # Ensure built-in backends are registered (idempotent)
548
- #
549
- # @return [void]
550
- # @api private
551
- def ensure_builtin_backends_registered!
552
- return if builtin_backends_registered?
553
- register_builtin_backends!
554
- @builtin_backends_registered = true # rubocop:disable ThreadSafety/ClassInstanceVariable
555
- end
556
-
557
- # Parse TREE_HAVER_BACKEND environment variable (single backend)
558
- #
559
- # @return [Symbol] the backend symbol (:auto if not set or invalid)
560
- # @api private
561
- def parse_single_backend_env
562
- env_value = ENV["TREE_HAVER_BACKEND"]
563
- return :auto if env_value.nil? || env_value.strip.empty?
564
-
565
- name = env_value.strip.downcase
566
- return :auto unless VALID_BACKENDS.include?(name) && name != "all" && name != "none"
567
-
568
- name.to_sym
569
- end
570
-
571
- # Parse a backend list environment variable
572
- #
573
- # @param env_var [String] the environment variable name
574
- # @param valid_backends [Array<String>] list of valid backend names
575
- # @return [Array<Symbol>] list of backend symbols, or [:auto] or [:none]
576
- # @api private
577
- def parse_backend_list_env(env_var, valid_backends)
578
- env_value = ENV[env_var]
579
-
580
- # Empty or unset means "auto"
581
- return [:auto] if env_value.nil? || env_value.strip.empty?
582
-
583
- normalized = env_value.strip.downcase
584
-
585
- # Handle special values
586
- return [:auto] if normalized == "auto"
587
- return [:none] if normalized == "none"
588
-
589
- # Split on comma and parse each backend
590
- backends = normalized.split(",").map(&:strip).uniq
591
-
592
- # Convert to symbols, filtering out invalid ones
593
- parsed = backends.filter_map do |name|
594
- valid_backends.include?(name) ? name.to_sym : nil
595
- end
596
-
597
- # Return :auto if no valid backends found
598
- parsed.empty? ? [:auto] : parsed
599
- end
600
-
601
- # Thread-local backend context storage
602
- #
603
- # Returns a hash containing the thread-local backend context with keys:
604
- # - :backend - The backend name (Symbol) or nil if using global default
605
- # - :depth - The nesting depth (Integer) for proper cleanup
606
- #
607
- # @return [Hash{Symbol => Object}] context hash with :backend and :depth keys
608
- # @example
609
- # ctx = TreeHaver.current_backend_context
610
- # ctx[:backend] # => nil or :ffi, :mri, etc.
611
- # ctx[:depth] # => 0, 1, 2, etc.
612
- def current_backend_context
613
- Thread.current[:tree_haver_backend_context] ||= {
614
- backend: nil, # nil means "use global default"
615
- depth: 0, # Track nesting depth for proper cleanup
616
- }
617
- end
618
-
619
- # Get the effective backend for current context
620
- #
621
- # Priority: thread-local context → global @backend → :auto
622
- #
623
- # @return [Symbol] the backend to use
624
- # @example
625
- # TreeHaver.effective_backend # => :auto (default)
626
- # @example With thread-local context
627
- # TreeHaver.with_backend(:ffi) do
628
- # TreeHaver.effective_backend # => :ffi
629
- # end
630
- def effective_backend
631
- ctx = current_backend_context
632
- ctx[:backend] || backend || :auto
633
- end
634
-
635
- # Execute a block with a specific backend in thread-local context
636
- #
637
- # This method provides temporary, thread-safe backend switching for a block of code.
638
- # The backend setting is automatically restored when the block exits, even if
639
- # an exception is raised. Supports nesting—inner blocks override outer blocks,
640
- # and each level is properly unwound.
641
- #
642
- # Thread Safety: Each thread maintains its own backend context, so concurrent
643
- # threads can safely use different backends without interfering with each other.
644
- #
645
- # Use Cases:
646
- # - Testing: Test the same code path with different backends
647
- # - Performance comparison: Benchmark parsing with different backends
648
- # - Fallback scenarios: Try one backend, fall back to another on failure
649
- # - Thread isolation: Different threads can use different backends safely
650
- #
651
- # @param name [Symbol, String] backend name (:mri, :rust, :ffi, :java, :citrus, :auto)
652
- # @yield block to execute with the specified backend
653
- # @return [Object] the return value of the block
654
- # @raise [ArgumentError] if backend name is nil
655
- # @raise [BackendConflict] if the requested backend conflicts with a previously used backend
656
- #
657
- # @example Basic usage
658
- # TreeHaver.with_backend(:mri) do
659
- # parser = TreeHaver::Parser.new
660
- # tree = parser.parse(source)
661
- # end
662
- # # Backend is automatically restored here
663
- #
664
- # @example Nested blocks (inner overrides outer)
665
- # TreeHaver.with_backend(:rust) do
666
- # parser1 = TreeHaver::Parser.new # Uses :rust
667
- # TreeHaver.with_backend(:citrus) do
668
- # parser2 = TreeHaver::Parser.new # Uses :citrus
669
- # end
670
- # parser3 = TreeHaver::Parser.new # Back to :rust
671
- # end
672
- #
673
- # @example Testing multiple backends
674
- # [:mri, :rust, :citrus].each do |backend_name|
675
- # TreeHaver.with_backend(backend_name) do
676
- # parser = TreeHaver::Parser.new
677
- # result = parser.parse(source)
678
- # puts "#{backend_name}: #{result.root_node.type}"
679
- # end
680
- # end
681
- #
682
- # @example Exception safety (backend restored even on error)
683
- # TreeHaver.with_backend(:mri) do
684
- # raise "Something went wrong"
685
- # rescue
686
- # # Handle error
687
- # end
688
- # # Backend is still restored to its previous value
689
- #
690
- # @example Thread isolation
691
- # threads = [:mri, :rust].map do |backend_name|
692
- # Thread.new do
693
- # TreeHaver.with_backend(backend_name) do
694
- # # Each thread uses its own backend independently
695
- # TreeHaver::Parser.new
696
- # end
697
- # end
698
- # end
699
- # threads.each(&:join)
700
- #
701
- # @see #effective_backend
702
- # @see #current_backend_context
703
- def with_backend(name)
704
- raise ArgumentError, "Backend name required" if name.nil?
705
-
706
- # Get context FIRST to ensure it exists
707
- ctx = current_backend_context
708
- old_backend = ctx[:backend]
709
- old_depth = ctx[:depth]
710
-
711
- begin
712
- # Set new backend and increment depth
713
- ctx[:backend] = name.to_sym
714
- ctx[:depth] += 1
715
-
716
- # Execute block
717
- yield
718
- ensure
719
- # Restore previous backend and depth
720
- # This ensures proper unwinding even with exceptions
721
- ctx[:backend] = old_backend
722
- ctx[:depth] = old_depth
723
- end
724
- end
725
-
726
- # Resolve the effective backend considering explicit override
727
- #
728
- # Priority: explicit > thread context > global > :auto
729
- #
730
- # @param explicit_backend [Symbol, String, nil] explicitly requested backend
731
- # @return [Symbol] the backend to use
732
- # @example
733
- # TreeHaver.resolve_effective_backend(:ffi) # => :ffi
734
- # @example With thread-local context
735
- # TreeHaver.with_backend(:mri) do
736
- # TreeHaver.resolve_effective_backend(nil) # => :mri
737
- # TreeHaver.resolve_effective_backend(:ffi) # => :ffi (explicit wins)
738
- # end
739
- def resolve_effective_backend(explicit_backend = nil)
740
- return explicit_backend.to_sym if explicit_backend
741
- effective_backend
742
- end
743
-
744
- # Get backend module for a specific backend (with explicit override)
745
- #
746
- # @param explicit_backend [Symbol, String, nil] explicitly requested backend
747
- # @return [Module, nil] the backend module or nil if not available
748
- # @raise [BackendConflict] if the backend conflicts with previously used backends
749
- # @example
750
- # mod = TreeHaver.resolve_backend_module(:ffi)
751
- # mod.capabilities[:backend] # => :ffi
752
- def resolve_backend_module(explicit_backend = nil)
753
- # Temporarily override effective backend
754
- requested = resolve_effective_backend(explicit_backend)
755
-
756
- mod = case requested
757
- when :mri
758
- Backends::MRI
759
- when :rust
760
- Backends::Rust
761
- when :ffi
762
- Backends::FFI
763
- when :java
764
- Backends::Java
765
- when :citrus
766
- Backends::Citrus
767
- when :parslet
768
- Backends::Parslet
769
- when :prism
770
- Backends::Prism
771
- when :psych
772
- Backends::Psych
773
- when :auto
774
- backend_module # Fall back to normal resolution for :auto
775
- else
776
- # Check if this is a registered plugin backend
777
- registered = registered_backend(requested)
778
- return registered if registered
779
-
780
- # Unknown backend name - return nil to trigger error in caller
781
- nil
782
- end
783
-
784
- # Return nil if the module doesn't exist
785
- return unless mod
786
-
787
- # Check if the backend is allowed by environment variables FIRST
788
- # This enforces TREE_HAVER_NATIVE_BACKEND and TREE_HAVER_RUBY_BACKEND as hard restrictions
789
- return if requested && requested != :auto && !backend_allowed?(requested)
790
-
791
- # Check for backend conflicts, before checking availability
792
- # This is critical because the conflict causes the backend to report unavailable
793
- # We want to raise a clear error explaining WHY it's unavailable
794
- # Use the requested backend name directly (not capabilities) because
795
- # capabilities may be empty when the backend is blocked/unavailable
796
- check_backend_conflict!(requested) if requested && requested != :auto
797
-
798
- # Now check if the backend is available
799
- # Why assume modules without available? are available?
800
- # - Some backends might be mocked in tests without an available? method
801
- # - This makes the code more defensive and test-friendly
802
- # - It allows graceful degradation if a backend module is incomplete
803
- # - Backward compatibility: if a module doesn't declare availability, assume it works
804
- return if mod.respond_to?(:available?) && !mod.available?
805
-
806
- # Record that this backend is being used
807
- record_backend_usage(requested) if requested && requested != :auto
808
-
809
- mod
810
- end
811
-
812
- # Resolve a native tree-sitter backend module (for from_library)
813
- #
814
- # This method is similar to resolve_backend_module but ONLY considers
815
- # backends that support loading shared libraries (.so files):
816
- # - MRI (ruby_tree_sitter C extension)
817
- # - Rust (tree_stump)
818
- # - FFI (ffi gem with libtree-sitter)
819
- # - Java (jtreesitter on JRuby)
820
- #
821
- # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) are NOT
822
- # considered because they don't support from_library.
823
- #
824
- # @param explicit_backend [Symbol, String, nil] explicitly requested backend
825
- # @return [Module, nil] the backend module or nil if none available
826
- # @raise [BackendConflict] if the backend conflicts with previously used backends
827
- def resolve_native_backend_module(explicit_backend = nil)
828
- # Short-circuit on TruffleRuby: no native backends work
829
- # - MRI: C extension, MRI only
830
- # - Rust: magnus requires MRI's C API
831
- # - FFI: STRUCT_BY_VALUE not supported
832
- # - Java: requires JRuby's Java interop
833
- if defined?(RUBY_ENGINE) && RUBY_ENGINE == "truffleruby"
834
- return unless explicit_backend # Auto-select: no backends available
835
- # If explicit backend requested, let it fail with proper error below
836
- end
837
-
838
- # Get the effective backend (considers thread-local and global settings)
839
- requested = resolve_effective_backend(explicit_backend)
840
-
841
- # If the effective backend is a native backend, use it
842
- if NATIVE_BACKENDS.include?(requested)
843
- return resolve_backend_module(requested)
844
- end
845
-
846
- # If a specific non-native backend was explicitly requested, return nil
847
- # (from_library only works with native backends that load .so files)
848
- return if explicit_backend
849
-
850
- # If effective backend is :auto, auto-select from native backends in priority order
851
- # Note: non-native backends set via with_backend are NOT used here because
852
- # from_library only works with native backends
853
- native_priority = if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
854
- %i[java ffi] # JRuby: Java first, then FFI
855
- else
856
- %i[mri rust ffi] # MRI: MRI first, then Rust, then FFI
857
- end
858
-
859
- native_priority.each do |backend|
860
- # Rescue BackendConflict to allow iteration to continue
861
- # This enables graceful fallback when a backend is blocked
862
-
863
- mod = resolve_backend_module(backend)
864
- return mod if mod
865
- rescue BackendConflict
866
- # This backend is blocked by a previously used backend, try the next one
867
- next
868
- end
869
-
870
- nil # No native backend available
871
- end
872
-
873
- # Determine the concrete backend module to use
874
- #
875
- # This method performs backend auto-selection when backend is :auto.
876
- # On JRuby, prefers Java backend if available, then FFI, then Citrus.
877
- # On MRI, prefers MRI backend if available, then Rust, then FFI, then Citrus.
878
- # Citrus is the final fallback as it's pure Ruby and works everywhere.
879
- #
880
- # @return [Module, nil] the backend module (Backends::MRI, Backends::Rust, Backends::FFI, Backends::Java, or Backends::Citrus), or nil if none available
881
- # @example
882
- # mod = TreeHaver.backend_module
883
- # if mod
884
- # puts "Using #{mod.capabilities[:backend]} backend"
885
- # end
886
- def backend_module
887
- requested = effective_backend # Changed from: backend
888
-
889
- # For explicit backends (not :auto), check for conflicts first
890
- # If the backend is blocked, fall through to auto-select
891
- if requested != :auto && backend_protect?
892
- conflicts = conflicting_backends_for(requested)
893
- unless conflicts.empty?
894
- # The explicitly requested backend is blocked - fall through to auto-select
895
- requested = :auto
896
- end
897
- end
898
-
899
- case requested
900
- when :mri
901
- Backends::MRI
902
- when :rust
903
- Backends::Rust
904
- when :ffi
905
- Backends::FFI
906
- when :java
907
- Backends::Java
908
- when :citrus
909
- Backends::Citrus
910
- when :parslet
911
- Backends::Parslet
912
- when :prism
913
- Backends::Prism
914
- when :psych
915
- Backends::Psych
916
- else
917
- # auto-select: prefer native/fast backends, fall back to pure Ruby (Citrus)
918
- # Each backend must be both allowed (by ENV) and available (gem installed)
919
- if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" && backend_allowed?(:java) && Backends::Java.available?
920
- Backends::Java
921
- elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:mri) && Backends::MRI.available?
922
- Backends::MRI
923
- elsif defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && backend_allowed?(:rust) && Backends::Rust.available?
924
- Backends::Rust
925
- elsif backend_allowed?(:ffi) && Backends::FFI.available?
926
- Backends::FFI
927
- elsif backend_allowed?(:citrus) && Backends::Citrus.available?
928
- Backends::Citrus # Pure Ruby fallback
929
- else
930
- # No backend available
931
- nil
932
- end
933
- end
934
- end
935
-
936
- # Get capabilities of the current backend
937
- #
938
- # Returns a hash describing what features the selected backend supports.
939
- # Common keys include:
940
- # - :backend - Symbol identifying the backend (:mri, :rust, :ffi, :java)
941
- # - :parse - Whether parsing is implemented
942
- # - :query - Whether the Query API is available
943
- # - :bytes_field - Whether byte position fields are available
944
- # - :incremental - Whether incremental parsing is supported
945
- #
946
- # @return [Hash{Symbol => Object}] capability map, or empty hash if no backend available
947
- # @example
948
- # TreeHaver.capabilities
949
- # # => { backend: :mri, query: true, bytes_field: true }
950
- def capabilities
951
- mod = backend_module
952
- return {} unless mod
953
- mod.capabilities
954
- end
955
-
956
- # -- Language registration API -------------------------------------------------
957
- # Delegates to LanguageRegistry for thread-safe registration and lookup.
958
- # Allows opting-in dynamic helpers like TreeHaver::Language.toml without
959
- # advertising all names by default.
960
-
961
- # Register a language helper by name (backend-agnostic)
962
- #
963
- # After registration, you can use dynamic helpers like `TreeHaver::Language.toml`
964
- # to load the registered language. TreeHaver will automatically use the appropriate
965
- # grammar based on the active backend.
966
- #
967
- # The `name` parameter is an arbitrary identifier you choose - it doesn't need to
968
- # match the actual language name. This is useful for:
969
- # - Testing: Use unique names like `:toml_test` to avoid collisions
970
- # - Aliasing: Register the same grammar under multiple names
971
- # - Versioning: Register different grammar versions as `:ruby_2` and `:ruby_3`
972
- #
973
- # The actual grammar identity comes from `path`/`symbol` (tree-sitter) or
974
- # `grammar_module` (Citrus), not from the name.
975
- #
976
- # IMPORTANT: This method INTENTIONALLY allows registering BOTH a tree-sitter
977
- # library AND a Citrus grammar for the same language IN A SINGLE CALL.
978
- # This is achieved by using separate `if` statements (not `elsif`) and no early
979
- # returns. This design is deliberate and provides significant benefits:
980
- #
981
- # Why register both backends for one language?
982
- # - Backend flexibility: Code works regardless of which backend is active
983
- # - Performance testing: Compare tree-sitter vs Citrus performance
984
- # - Gradual migration: Transition between backends without breaking code
985
- # - Fallback scenarios: Use Citrus when tree-sitter library unavailable
986
- # - Platform portability: tree-sitter on Linux/Mac, Citrus on JRuby/Windows
987
- #
988
- # The active backend determines which registration is used automatically.
989
- # No code changes needed to switch backends - just change TreeHaver.backend.
990
- #
991
- # @param name [Symbol, String] identifier for this registration (can be any name you choose)
992
- # @param path [String, nil] absolute path to the language shared library (for tree-sitter)
993
- # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
994
- # @param grammar_module [Module, nil] Citrus grammar module that responds to .parse(source)
995
- # @param grammar_class [Class, nil] Parslet grammar class that inherits from Parslet::Parser
996
- # @param backend_module [Module, nil] pure Ruby backend module with Language/Parser classes
997
- # @param backend_type [Symbol, nil] backend type for backend_module (defaults to module name)
998
- # @param gem_name [String, nil] optional gem name for error messages
999
- # @return [void]
1000
- # @example Register tree-sitter grammar only
1001
- # TreeHaver.register_language(
1002
- # :toml,
1003
- # path: "/usr/local/lib/libtree-sitter-toml.so",
1004
- # symbol: "tree_sitter_toml"
1005
- # )
1006
- # @example Register Citrus grammar only
1007
- # TreeHaver.register_language(
1008
- # :toml,
1009
- # grammar_module: TomlRB::Document,
1010
- # gem_name: "toml-rb"
1011
- # )
1012
- # @example Register Parslet grammar only
1013
- # TreeHaver.register_language(
1014
- # :toml,
1015
- # grammar_class: TOML::Parslet,
1016
- # gem_name: "toml"
1017
- # )
1018
- # @example Register pure Ruby backend (external gem like rbs-merge)
1019
- # TreeHaver.register_language(
1020
- # :rbs,
1021
- # backend_module: Rbs::Merge::Backends::RbsBackend,
1022
- # backend_type: :rbs,
1023
- # gem_name: "rbs"
1024
- # )
1025
- # @example Register BOTH backends in separate calls
1026
- # TreeHaver.register_language(
1027
- # :toml,
1028
- # path: "/usr/local/lib/libtree-sitter-toml.so",
1029
- # symbol: "tree_sitter_toml"
1030
- # )
1031
- # TreeHaver.register_language(
1032
- # :toml,
1033
- # grammar_module: TomlRB::Document,
1034
- # gem_name: "toml-rb"
1035
- # )
1036
- # @example Register BOTH backends in ONE call (recommended for maximum flexibility)
1037
- # TreeHaver.register_language(
1038
- # :toml,
1039
- # path: "/usr/local/lib/libtree-sitter-toml.so",
1040
- # symbol: "tree_sitter_toml",
1041
- # grammar_module: TomlRB::Document,
1042
- # gem_name: "toml-rb"
1043
- # )
1044
- # # Now TreeHaver::Language.toml works with ANY backend!
1045
- def register_language(name, path: nil, symbol: nil, grammar_module: nil, grammar_class: nil, backend_module: nil, backend_type: nil, gem_name: nil)
1046
- # Register tree-sitter backend if path provided
1047
- # Note: Uses `if` not `elsif` so both backends can be registered in one call
1048
- if path
1049
- LanguageRegistry.register(name, :tree_sitter, path: path, symbol: symbol)
1050
- end
1051
-
1052
- # Register Citrus backend if grammar_module provided
1053
- # Note: Uses `if` not `elsif` so both backends can be registered in one call
1054
- # This allows maximum flexibility - register once, use with any backend
1055
- if grammar_module
1056
- unless grammar_module.respond_to?(:parse)
1057
- raise ArgumentError, "Grammar module must respond to :parse"
1058
- end
1059
-
1060
- LanguageRegistry.register(name, :citrus, grammar_module: grammar_module, gem_name: gem_name)
1061
- end
1062
-
1063
- # Register Parslet backend if grammar_class provided
1064
- # Note: Uses `if` not `elsif` so multiple backends can be registered in one call
1065
- if grammar_class
1066
- unless grammar_class.respond_to?(:new)
1067
- raise ArgumentError, "Grammar class must respond to :new"
1068
- end
1069
-
1070
- LanguageRegistry.register(name, :parslet, grammar_class: grammar_class, gem_name: gem_name)
1071
- end
1072
-
1073
- # Register pure Ruby backend if backend_module provided
1074
- # This is used by external gems (like rbs-merge) to register their own backends
1075
- if backend_module
1076
- # Derive backend_type from module name if not provided
1077
- type = backend_type || backend_module.name.split("::").last.downcase.to_sym
1078
- LanguageRegistry.register(name, type, backend_module: backend_module, gem_name: gem_name)
1079
- end
1080
-
1081
- # Require at least one backend to be registered
1082
- if path.nil? && grammar_module.nil? && grammar_class.nil? && backend_module.nil?
1083
- raise ArgumentError, "Must provide at least one of: path (tree-sitter), grammar_module (Citrus), grammar_class (Parslet), or backend_module (pure Ruby)"
1084
- end
1085
-
1086
- # Note: No early return! This method intentionally processes all `if` blocks
1087
- # above to allow registering multiple backends for the same language.
1088
- # tree-sitter, Citrus, and Parslet can be registered simultaneously for maximum
1089
- # flexibility. See method documentation for rationale.
1090
- nil
1091
- end
1092
-
1093
- # Register a backend module
1094
- #
1095
- # Allows external gems to register their backend implementation so it can be
1096
- # found by TreeHaver.backend = :name and other lookup methods.
1097
- #
1098
- # @param name [Symbol] backend name (e.g. :rbs, :commonmarker)
1099
- # @param mod [Module] the backend module
1100
- # @return [void]
1101
- def register_backend(name, mod)
1102
- @backend_registry ||= {}
1103
- @backend_registry[name.to_sym] = mod
1104
- end
1105
-
1106
- # Get a registered backend module
1107
- #
1108
- # @param name [Symbol] backend name
1109
- # @return [Module, nil] registered backend module
1110
- def registered_backend(name)
1111
- @backend_registry ||= {}
1112
- @backend_registry[name.to_sym]
1113
- end
1114
-
1115
- # Fetch a registered language entry
1116
- #
1117
- # @api private
1118
- # @param name [Symbol, String] language identifier
1119
- # @return [Hash, nil] registration hash with keys :path and :symbol, or nil if not registered
1120
- def registered_language(name)
1121
- LanguageRegistry.registered(name)
1122
- end
1123
-
1124
- # Create a parser configured for a specific language
1125
- #
1126
- # Respects the effective backend setting (via TREE_HAVER_BACKEND env var,
1127
- # TreeHaver.backend=, or with_backend block).
1128
- #
1129
- # Supports four types of backends:
1130
- # 1. Tree-sitter native backends (auto-discovered or explicit path)
1131
- # 2. Citrus grammars (pure Ruby, via CITRUS_DEFAULTS or explicit config)
1132
- # 3. Parslet grammars (pure Ruby, via PARSLET_DEFAULTS or explicit config)
1133
- # 4. Pure Ruby backends (registered via backend_module, e.g., Prism, Psych, RBS)
1134
- #
1135
- # @param language_name [Symbol, String] the language to parse (e.g., :toml, :json, :ruby, :yaml, :rbs)
1136
- # @param library_path [String, nil] optional explicit path to tree-sitter grammar library
1137
- # @param symbol [String, nil] optional tree-sitter symbol name (defaults to "tree_sitter_<name>")
1138
- # @param citrus_config [Hash, nil] optional Citrus fallback configuration
1139
- # @param parslet_config [Hash, nil] optional Parslet fallback configuration
1140
- # @return [TreeHaver::Parser] configured parser with language set
1141
- # @raise [TreeHaver::NotAvailable] if no parser backend is available for the language
1142
- #
1143
- # @example Basic usage (auto-discovers grammar)
1144
- # parser = TreeHaver.parser_for(:toml)
1145
- #
1146
- # @example Force Citrus backend
1147
- # TreeHaver.with_backend(:citrus) { TreeHaver.parser_for(:toml) }
1148
- #
1149
- # @example Force Parslet backend
1150
- # TreeHaver.with_backend(:parslet) { TreeHaver.parser_for(:toml) }
1151
- #
1152
- # @example Use registered pure Ruby backend (e.g., RBS)
1153
- # # First, rbs-merge registers its backend:
1154
- # # TreeHaver.register_language(:rbs, backend_module: Rbs::Merge::RbsBackend, backend_type: :rbs)
1155
- # parser = TreeHaver.parser_for(:rbs)
1156
- def parser_for(language_name, library_path: nil, symbol: nil, citrus_config: nil, parslet_config: nil)
1157
- # Ensure built-in pure Ruby backends are registered
1158
- ensure_builtin_backends_registered!
1159
-
1160
- name = language_name.to_sym
1161
- symbol ||= "tree_sitter_#{name}"
1162
- requested = effective_backend
1163
-
1164
- # Determine which backends to try based on effective_backend
1165
- # When a specific backend is requested, only try that backend
1166
- try_tree_sitter = (requested == :auto) || NATIVE_BACKENDS.include?(requested)
1167
- try_citrus = (requested == :auto) || (requested == :citrus)
1168
- try_parslet = (requested == :auto) || (requested == :parslet)
1169
-
1170
- # When Citrus or Parslet is explicitly requested, don't try tree-sitter
1171
- if requested == :citrus || requested == :parslet
1172
- try_tree_sitter = false
1173
- end
1174
-
1175
- language = nil
1176
-
1177
- # First, check for registered pure Ruby backends
1178
- # These take precedence when explicitly requested or when no other backend is available
1179
- registration = registered_language(name)
1180
- # Find any registered backend_module (not tree_sitter, citrus, or parslet)
1181
- registration&.each do |backend_type, config|
1182
- next if %i[tree_sitter citrus parslet].include?(backend_type)
1183
- next unless config[:backend_module]
1184
-
1185
- backend_mod = config[:backend_module]
1186
- # Check if this backend is available
1187
- next unless backend_mod.respond_to?(:available?) && backend_mod.available?
1188
-
1189
- # If a specific backend was requested, only use if it matches
1190
- next if requested != :auto && requested != backend_type
1191
-
1192
- # Create parser from the backend module
1193
- if backend_mod.const_defined?(:Parser)
1194
- parser = backend_mod::Parser.new
1195
- if backend_mod.const_defined?(:Language)
1196
- lang_class = backend_mod::Language
1197
- # Try to get language by name (e.g., Language.ruby, Language.yaml, Language.rbs)
1198
- if lang_class.respond_to?(name)
1199
- parser.language = lang_class.public_send(name)
1200
- elsif lang_class.respond_to?(:from_library)
1201
- parser.language = lang_class.from_library(nil, name: name)
1202
- end
1203
- end
1204
- return parser
1205
- end
1206
- end
1207
-
1208
- # Try tree-sitter if applicable
1209
- if try_tree_sitter && !language
1210
- language = load_tree_sitter_language(name, library_path: library_path, symbol: symbol)
1211
- end
1212
-
1213
- # Try Citrus if applicable
1214
- if try_citrus && !language
1215
- language = load_citrus_language(name, citrus_config: citrus_config)
1216
- end
1217
-
1218
- # Try Parslet if applicable
1219
- if try_parslet && !language
1220
- language = load_parslet_language(name, parslet_config: parslet_config)
1221
- end
1222
-
1223
- # Raise if nothing worked
1224
- raise NotAvailable, "No parser available for #{name}. " \
1225
- "Install tree-sitter-#{name} or configure a Citrus/Parslet grammar." unless language
1226
-
1227
- # Create and configure parser
1228
- parser = Parser.new
1229
- parser.language = language
1230
- parser
1231
- end
1232
-
1233
- private
1234
-
1235
- # Load a tree-sitter language, either from registry or via auto-discovery
1236
- # @return [Language, nil]
1237
- # @raise [NotAvailable] if explicit library_path is provided but doesn't exist or can't load
1238
- def load_tree_sitter_language(name, library_path: nil, symbol: nil)
1239
- # If explicit path provided, it must work - don't swallow errors
1240
- if library_path && !library_path.empty?
1241
- raise NotAvailable, "Specified parser path does not exist: #{library_path}" unless File.exist?(library_path)
1242
- register_language(name, path: library_path, symbol: symbol)
1243
- return Language.public_send(name)
1244
- end
1245
-
1246
- # Auto-discovery: errors are acceptable, just return nil
1247
- begin
1248
- # Try already-registered tree-sitter language (not Citrus)
1249
- # But only if the registered path actually exists - ignore stale/test registrations
1250
- registration = registered_language(name)
1251
- ts_reg = registration&.dig(:tree_sitter)
1252
- if ts_reg && ts_reg[:path] && File.exist?(ts_reg[:path])
1253
- return Language.public_send(name, symbol: symbol)
1254
- end
1255
-
1256
- # Auto-discover via GrammarFinder
1257
- finder = GrammarFinder.new(name)
1258
- if finder.available?
1259
- finder.register!
1260
- return Language.public_send(name)
1261
- end
1262
- rescue NotAvailable, ArgumentError, LoadError
1263
- # Auto-discovery failed, that's okay
1264
- end
1265
-
1266
- nil
1267
- end
1268
-
1269
- # Load a Citrus language from configuration or defaults
1270
- # @return [Language, nil]
1271
- def load_citrus_language(name, citrus_config: nil)
1272
- config = citrus_config || CITRUS_DEFAULTS[name] || {}
1273
- return unless config[:gem_name] && config[:grammar_const]
1274
-
1275
- finder = CitrusGrammarFinder.new(
1276
- language: name,
1277
- gem_name: config[:gem_name],
1278
- grammar_const: config[:grammar_const],
1279
- require_path: config[:require_path],
1280
- )
1281
- return unless finder.available?
1282
-
1283
- finder.register!
1284
- Language.public_send(name)
1285
- rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
1286
- nil
1287
- end
1288
-
1289
- # Load a Parslet language from configuration or defaults
1290
- # @return [Language, nil]
1291
- def load_parslet_language(name, parslet_config: nil)
1292
- config = parslet_config || PARSLET_DEFAULTS[name] || {}
1293
- return unless config[:gem_name] && config[:grammar_const]
1294
-
1295
- finder = ParsletGrammarFinder.new(
1296
- language: name,
1297
- gem_name: config[:gem_name],
1298
- grammar_const: config[:grammar_const],
1299
- require_path: config[:require_path],
1300
- )
1301
- return unless finder.available?
1302
-
1303
- finder.register!
1304
- Language.public_send(name)
1305
- rescue NotAvailable, ArgumentError, LoadError, NameError, TypeError
1306
- nil
1307
- end
1308
- end
1309
-
1310
- # Language and Parser classes have been moved to separate files:
1311
- # - tree_haver/language.rb: TreeHaver::Language - loads grammar shared libraries
1312
- # - tree_haver/parser.rb: TreeHaver::Parser - parses source code into syntax trees
1313
- # - tree_haver/tree.rb: TreeHaver::Tree - unified wrapper providing consistent API
1314
- # - tree_haver/node.rb: TreeHaver::Node - unified wrapper providing consistent API
1315
- #
1316
- # These provide a unified interface across all backends (MRI, Rust, FFI, Java, Citrus).
1317
- # All backends now return properly wrapped TreeHaver::Tree and TreeHaver::Node objects.
1318
- end # end module TreeHaver
1319
-
1320
- TreeHaver::Version.class_eval do
1321
- extend VersionGem::Basic
12
+ PACKAGE_NAME = "tree_haver"
1322
13
  end