tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,295 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Factory module for loading language grammars
5
- #
6
- # Language is the entry point for loading and using grammars. It provides
7
- # a unified interface that works across all backends (MRI, Rust, FFI, Java, Citrus, Parslet).
8
- #
9
- # This is a module with only module methods (factory pattern), not a class.
10
- # Backend-specific Language classes (e.g., Backends::Citrus::Language,
11
- # Backends::Parslet::Language) inherit from Base::Language.
12
- #
13
- # For tree-sitter backends, languages are loaded from shared library files (.so/.dylib/.dll).
14
- # For pure-Ruby backends (Citrus, Parslet, Prism, Psych), languages are built-in or provided by gems.
15
- #
16
- # == Loading Languages
17
- #
18
- # The primary way to load a language is via registration:
19
- #
20
- # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
21
- # language = TreeHaver::Language.toml
22
- #
23
- # For explicit loading without registration:
24
- #
25
- # language = TreeHaver::Language.from_library(
26
- # "/path/to/libtree-sitter-toml.so",
27
- # symbol: "tree_sitter_toml"
28
- # )
29
- #
30
- # For ruby_tree_sitter compatibility:
31
- #
32
- # language = TreeHaver::Language.load("toml", "/path/to/libtree-sitter-toml.so")
33
- #
34
- # @example Register and load a language
35
- # TreeHaver.register_language(:toml, path: "/path/to/grammar.so")
36
- # language = TreeHaver::Language.toml
37
- #
38
- # @see Base::Language The base class that backend Language classes inherit from
39
- module Language
40
- class << self
41
- # Load a language grammar from a shared library (ruby_tree_sitter compatibility)
42
- #
43
- # This method provides API compatibility with ruby_tree_sitter which uses
44
- # `Language.load(name, path)`.
45
- #
46
- # @param name [String] the language name (e.g., "toml")
47
- # @param path [String] absolute path to the language shared library
48
- # @param validate [Boolean] if true, validates the path for safety (default: true)
49
- # @return [Language] loaded language handle
50
- # @raise [NotAvailable] if the library cannot be loaded
51
- # @raise [ArgumentError] if the path fails security validation
52
- # @example
53
- # language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
54
- def load(name, path, validate: true)
55
- from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
56
- end
57
-
58
- # Load a language grammar from a shared library
59
- #
60
- # The library must export a function that returns a pointer to a TSLanguage struct.
61
- # By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
62
- #
63
- # == Security
64
- #
65
- # By default, paths are validated using {PathValidator} to prevent path traversal
66
- # and other attacks. Set `validate: false` to skip validation (not recommended
67
- # unless you've already validated the path).
68
- #
69
- # @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
70
- # @param symbol [String, nil] name of the exported function (defaults to auto-detection)
71
- # @param name [String, nil] logical name for the language (used in caching)
72
- # @param validate [Boolean] if true, validates path and symbol for safety (default: true)
73
- # @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
74
- # @return [Language] loaded language handle
75
- # @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
76
- # @raise [ArgumentError] if path or symbol fails security validation
77
- # @example
78
- # language = TreeHaver::Language.from_library(
79
- # "/usr/local/lib/libtree-sitter-toml.so",
80
- # symbol: "tree_sitter_toml",
81
- # name: "toml"
82
- # )
83
- # @example With explicit backend
84
- # language = TreeHaver::Language.from_library(
85
- # "/usr/local/lib/libtree-sitter-toml.so",
86
- # symbol: "tree_sitter_toml",
87
- # backend: :ffi
88
- # )
89
- def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
90
- if validate
91
- unless PathValidator.safe_library_path?(path)
92
- errors = PathValidator.validation_errors(path)
93
- raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
94
- end
95
-
96
- if symbol && !PathValidator.safe_symbol_name?(symbol)
97
- raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
98
- "Symbol names must be valid C identifiers."
99
- end
100
- end
101
-
102
- # from_library only works with tree-sitter backends that support .so files
103
- # Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) don't support from_library
104
- mod = TreeHaver.resolve_native_backend_module(backend)
105
-
106
- if mod.nil?
107
- if backend
108
- raise NotAvailable, "Requested backend #{backend.inspect} is not available or does not support shared libraries"
109
- else
110
- raise NotAvailable,
111
- "No native tree-sitter backend is available for loading shared libraries. " \
112
- "Available native backends (MRI, Rust, FFI, Java) require platform-specific setup. " \
113
- "For pure-Ruby parsing, use backend-specific Language classes directly (e.g., Prism, Psych, Citrus)."
114
- end
115
- end
116
-
117
- # Backend must implement .from_library; fallback to .from_path for older impls
118
- # Include effective backend AND ENV vars in cache key since they affect loading
119
- effective_b = TreeHaver.resolve_effective_backend(backend)
120
- key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
121
- LanguageRegistry.fetch(key) do
122
- if mod::Language.respond_to?(:from_library)
123
- mod::Language.from_library(path, symbol: symbol, name: name)
124
- else
125
- mod::Language.from_path(path)
126
- end
127
- end
128
- end
129
- # Alias for {from_library}
130
- # @see from_library
131
- alias_method :from_path, :from_library
132
-
133
- # Dynamic helper to load a registered language by name
134
- #
135
- # After registering a language with {TreeHaver.register_language},
136
- # you can load it using a method call. The appropriate backend will be
137
- # used based on registration and current backend.
138
- #
139
- # @example With tree-sitter
140
- # TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
141
- # language = TreeHaver::Language.toml
142
- #
143
- # @example With both backends
144
- # TreeHaver.register_language(:toml,
145
- # path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
146
- # TreeHaver.register_language(:toml,
147
- # grammar_module: TomlRB::Document)
148
- # language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
149
- #
150
- # @param method_name [Symbol] the registered language name
151
- # @param args [Array] positional arguments
152
- # @param kwargs [Hash] keyword arguments
153
- # @return [Language] loaded language handle
154
- # @raise [NoMethodError] if the language name is not registered
155
- def method_missing(method_name, *args, **kwargs, &block)
156
- # Resolve only if the language name was registered
157
- all_backends = TreeHaver.registered_language(method_name)
158
- return super unless all_backends
159
-
160
- # Check current backend
161
- current_backend = TreeHaver.backend_module
162
-
163
- # Determine which backend type to use
164
- backend_type = if current_backend == Backends::Citrus
165
- :citrus
166
- elsif current_backend == Backends::Parslet
167
- :parslet
168
- else
169
- :tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
170
- end
171
-
172
- # Get backend-specific registration
173
- reg = all_backends[backend_type]
174
-
175
- # If Citrus backend is active
176
- if backend_type == :citrus
177
- if reg && reg[:grammar_module]
178
- return Backends::Citrus::Language.new(reg[:grammar_module])
179
- end
180
-
181
- # Fall back to error if no Citrus grammar registered
182
- raise NotAvailable,
183
- "Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
184
- "Either register a Citrus grammar or use a tree-sitter backend. " \
185
- "Registered backends: #{all_backends.keys.inspect}"
186
- end
187
-
188
- # If Parslet backend is active
189
- if backend_type == :parslet
190
- if reg && reg[:grammar_class]
191
- return Backends::Parslet::Language.new(reg[:grammar_class])
192
- end
193
-
194
- # Fall back to error if no Parslet grammar registered
195
- raise NotAvailable,
196
- "Parslet backend is active but no Parslet grammar registered for :#{method_name}. " \
197
- "Either register a Parslet grammar or use a tree-sitter backend. " \
198
- "Registered backends: #{all_backends.keys.inspect}"
199
- end
200
-
201
- # For tree-sitter backends, try to load from path
202
- # If that fails, fall back to Citrus if available
203
- if reg && reg[:path]
204
- path = kwargs[:path] || args.first || reg[:path]
205
- # Symbol priority: kwargs override > registration > derive from method_name
206
- symbol = if kwargs.key?(:symbol)
207
- kwargs[:symbol]
208
- elsif reg[:symbol]
209
- reg[:symbol]
210
- else
211
- "tree_sitter_#{method_name}"
212
- end
213
- # Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
214
- # Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
215
- # e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
216
- name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
217
-
218
- begin
219
- return from_library(path, symbol: symbol, name: name)
220
- rescue NotAvailable, ArgumentError, LoadError => e
221
- # Tree-sitter failed to load - check for Citrus fallback
222
- # Note: FFI::NotFoundError inherits from LoadError, so it's caught here too
223
- handle_tree_sitter_load_failure(e, all_backends)
224
- end
225
- end
226
-
227
- # No tree-sitter path registered - check for Citrus or Parslet fallback
228
- # This enables auto-fallback when tree-sitter grammar is not installed
229
- # but a pure Ruby grammar (Citrus or Parslet) is available.
230
- # Only fall back when backend is :auto - explicit native backend requests should fail.
231
- if TreeHaver.effective_backend == :auto
232
- citrus_reg = all_backends[:citrus]
233
- if citrus_reg && citrus_reg[:grammar_module]
234
- return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
235
- end
236
-
237
- parslet_reg = all_backends[:parslet]
238
- if parslet_reg && parslet_reg[:grammar_class]
239
- return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
240
- end
241
- end
242
-
243
- # No appropriate registration found
244
- raise ArgumentError,
245
- "No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
246
- "Registered backends: #{all_backends.keys.inspect}"
247
- end
248
-
249
- # @api private
250
- def respond_to_missing?(method_name, include_private = false)
251
- !!TreeHaver.registered_language(method_name) || super
252
- end
253
-
254
- private
255
-
256
- # Handle tree-sitter load failure with optional Citrus/Parslet fallback
257
- #
258
- # This handles cases where:
259
- # - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
260
- # - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError inherits from LoadError)
261
- # - Invalid arguments were provided (ArgumentError)
262
- #
263
- # Fallback to Citrus/Parslet ONLY happens when:
264
- # - The effective backend is :auto (user didn't explicitly request a native backend)
265
- # - A Citrus or Parslet grammar is registered for the language
266
- #
267
- # If the user explicitly requested a native backend (:mri, :rust, :ffi, :java),
268
- # we should NOT silently fall back to pure Ruby - that would violate the user's intent.
269
- #
270
- # @param error [Exception] the original error
271
- # @param all_backends [Hash] all registered backends for the language
272
- # @return [Backends::Citrus::Language, Backends::Parslet::Language] if fallback available and allowed
273
- # @raise [Exception] re-raises original error if no fallback or fallback not allowed
274
- # @api private
275
- def handle_tree_sitter_load_failure(error, all_backends)
276
- # Only fall back to pure Ruby when backend is :auto
277
- # If user explicitly requested a native backend, respect that choice
278
- effective = TreeHaver.effective_backend
279
- if effective == :auto
280
- citrus_reg = all_backends[:citrus]
281
- if citrus_reg && citrus_reg[:grammar_module]
282
- return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
283
- end
284
-
285
- parslet_reg = all_backends[:parslet]
286
- if parslet_reg && parslet_reg[:grammar_class]
287
- return Backends::Parslet::Language.new(parslet_reg[:grammar_class])
288
- end
289
- end
290
- # No pure Ruby fallback allowed or available, re-raise the original error
291
- raise error
292
- end
293
- end
294
- end
295
- end
@@ -1,190 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Thread-safe language registrations and cache for loaded Language handles
5
- #
6
- # The LanguageRegistry provides two main functions:
7
- # 1. **Registrations**: Store mappings from language names to backend-specific configurations
8
- # 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
9
- #
10
- # The registry supports multiple backends for the same language, allowing runtime
11
- # switching, benchmarking, and fallback scenarios.
12
- #
13
- # == Supported Backend Types
14
- #
15
- # The registry is extensible and supports any backend type. Common types include:
16
- #
17
- # - `:tree_sitter` - Native tree-sitter grammars (.so files)
18
- # - `:citrus` - Citrus PEG parser grammars (pure Ruby)
19
- # - `:prism` - Ruby's Prism parser (Ruby source only)
20
- # - `:psych` - Ruby's Psych parser (YAML only)
21
- # - `:commonmarker` - Commonmarker gem (Markdown)
22
- # - `:markly` - Markly gem (Markdown/GFM)
23
- # - `:rbs` - RBS gem (RBS type signatures) - registered externally by rbs-merge
24
- #
25
- # External gems can register their own backend types using the same API.
26
- #
27
- # Registration structure:
28
- # ```ruby
29
- # @registrations = {
30
- # toml: {
31
- # tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
32
- # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
33
- # },
34
- # ruby: {
35
- # prism: { backend_module: TreeHaver::Backends::Prism }
36
- # },
37
- # yaml: {
38
- # psych: { backend_module: TreeHaver::Backends::Psych }
39
- # },
40
- # markdown: {
41
- # commonmarker: { backend_module: TreeHaver::Backends::Commonmarker },
42
- # markly: { backend_module: TreeHaver::Backends::Markly }
43
- # },
44
- # rbs: {
45
- # rbs: { backend_module: Rbs::Merge::Backends::RbsBackend } # External
46
- # }
47
- # }
48
- # ```
49
- #
50
- # @example Register tree-sitter grammar
51
- # ```ruby
52
- # TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
53
- # path: "/path/to/lib.so", symbol: "tree_sitter_toml")
54
- # ```
55
- #
56
- # @example Register Citrus grammar
57
- # ```ruby
58
- # TreeHaver::LanguageRegistry.register(:toml, :citrus,
59
- # grammar_module: TomlRB::Document, gem_name: "toml-rb")
60
- # ```
61
- #
62
- # @example Register a pure Ruby backend (internal or external)
63
- # ```ruby
64
- # TreeHaver::LanguageRegistry.register(:rbs, :rbs,
65
- # backend_module: Rbs::Merge::Backends::RbsBackend,
66
- # gem_name: "rbs")
67
- # ```
68
- #
69
- # @api private
70
- module LanguageRegistry
71
- @mutex = Mutex.new
72
- @cache = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
73
- @registrations = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
74
-
75
- module_function
76
-
77
- # Register a language for a specific backend
78
- #
79
- # Stores backend-specific configuration for a language. Multiple backends
80
- # can be registered for the same language without conflict.
81
- #
82
- # @param name [Symbol, String] language identifier (e.g., :toml, :json, :ruby, :yaml, :rbs)
83
- # @param backend_type [Symbol] backend type (:tree_sitter, :citrus, :prism, :psych, :commonmarker, :markly, or custom)
84
- # @param config [Hash] backend-specific configuration
85
- # @option config [String] :path tree-sitter library path (for tree-sitter backends)
86
- # @option config [String] :symbol exported symbol name (for tree-sitter backends)
87
- # @option config [Module] :grammar_module Citrus grammar module (for Citrus backend)
88
- # @option config [Module] :backend_module backend module with Language/Parser classes (for pure Ruby backends)
89
- # @option config [String] :gem_name gem name for error messages and availability checks
90
- # @return [void]
91
- # @example Register tree-sitter grammar
92
- # LanguageRegistry.register(:toml, :tree_sitter,
93
- # path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
94
- # @example Register Citrus grammar
95
- # LanguageRegistry.register(:toml, :citrus,
96
- # grammar_module: TomlRB::Document, gem_name: "toml-rb")
97
- # @example Register pure Ruby backend (external gem)
98
- # LanguageRegistry.register(:rbs, :rbs,
99
- # backend_module: Rbs::Merge::Backends::RbsBackend, gem_name: "rbs")
100
- def register(name, backend_type, **config)
101
- key = name.to_sym
102
- backend_key = backend_type.to_sym
103
-
104
- @mutex.synchronize do
105
- @registrations[key] ||= {}
106
- @registrations[key][backend_key] = config.compact
107
- end
108
- nil
109
- end
110
-
111
- # Fetch registration entries for a language
112
- #
113
- # Returns all backend-specific configurations for a language.
114
- #
115
- # @param name [Symbol, String] language identifier
116
- # @param backend_type [Symbol, nil] optional backend type to filter by
117
- # @return [Hash{Symbol => Hash}, Hash, nil] all backends or specific backend config
118
- # @example Get all backends
119
- # entries = LanguageRegistry.registered(:toml)
120
- # # => {
121
- # # tree_sitter: { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" },
122
- # # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
123
- # # }
124
- # @example Get specific backend
125
- # entry = LanguageRegistry.registered(:toml, :citrus)
126
- # # => { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
127
- def registered(name, backend_type = nil)
128
- @mutex.synchronize do
129
- lang_config = @registrations[name.to_sym]
130
- return unless lang_config
131
-
132
- if backend_type
133
- lang_config[backend_type.to_sym]
134
- else
135
- lang_config
136
- end
137
- end
138
- end
139
-
140
- # Fetch a cached language by key or compute and store it
141
- #
142
- # This method provides thread-safe memoization for loaded Language objects.
143
- # If the key exists in the cache, the cached value is returned immediately.
144
- # Otherwise, the block is called to compute the value, which is then cached.
145
- #
146
- # @param key [Array] cache key, typically [path, symbol, name]
147
- # @yieldreturn [Object] the computed language handle (called only on cache miss)
148
- # @return [Object] the cached or computed language handle
149
- # @example
150
- # language = LanguageRegistry.fetch(["/path/lib.so", "symbol", "toml"]) do
151
- # expensive_language_load_operation
152
- # end
153
- def fetch(key)
154
- @mutex.synchronize do
155
- return @cache[key] if @cache.key?(key)
156
- value = yield
157
- @cache[key] = value
158
- end
159
- end
160
-
161
- # Clear the language cache
162
- #
163
- # Removes all cached Language objects. The next call to {fetch} for any key
164
- # will recompute the value. Does not clear registrations.
165
- #
166
- # @return [void]
167
- # @example
168
- # LanguageRegistry.clear_cache!
169
- def clear_cache!
170
- @mutex.synchronize { @cache.clear }
171
- nil
172
- end
173
-
174
- # Clear all registrations and cache
175
- #
176
- # Removes all language registrations and cached Language objects.
177
- # Primarily used in tests to reset state between test cases.
178
- #
179
- # @return [void]
180
- # @example
181
- # LanguageRegistry.clear
182
- def clear
183
- @mutex.synchronize do
184
- @registrations.clear
185
- @cache.clear
186
- end
187
- nil
188
- end
189
- end
190
- end
@@ -1,80 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Utility methods for deriving tree-sitter symbol and language names from library paths
5
- #
6
- # This module provides consistent path parsing across all backends that load
7
- # tree-sitter grammar libraries from shared object files (.so/.dylib/.dll).
8
- #
9
- # @example
10
- # TreeHaver::LibraryPathUtils.derive_symbol_from_path("/usr/lib/libtree-sitter-toml.so")
11
- # # => "tree_sitter_toml"
12
- #
13
- # TreeHaver::LibraryPathUtils.derive_language_name_from_path("/usr/lib/libtree-sitter-toml.so")
14
- # # => "toml"
15
- module LibraryPathUtils
16
- module_function
17
-
18
- # Derive the tree-sitter symbol name from a library path
19
- #
20
- # Symbol names are the exported C function names (e.g., "tree_sitter_toml")
21
- # that return a pointer to the TSLanguage struct.
22
- #
23
- # Handles various naming conventions:
24
- # - libtree-sitter-toml.so → tree_sitter_toml
25
- # - libtree_sitter_toml.so → tree_sitter_toml
26
- # - tree-sitter-toml.so → tree_sitter_toml
27
- # - tree_sitter_toml.so → tree_sitter_toml
28
- # - toml.so → tree_sitter_toml (assumes simple language name)
29
- #
30
- # @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
31
- # @return [String, nil] symbol like "tree_sitter_toml", or nil if path is nil
32
- def derive_symbol_from_path(path)
33
- return unless path
34
-
35
- # Extract filename without extension: "libtree-sitter-toml" or "toml"
36
- filename = File.basename(path, ".*")
37
-
38
- # Handle multi-part extensions like .so.0.24
39
- filename = filename.sub(/\.so(\.\d+)*\z/, "")
40
-
41
- # Match patterns and normalize to tree_sitter_<lang>
42
- case filename
43
- when /\Alib[-_]?tree[-_]sitter[-_](.+)\z/
44
- "tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
45
- when /\Atree[-_]sitter[-_](.+)\z/
46
- "tree_sitter_#{Regexp.last_match(1).tr("-", "_")}"
47
- else
48
- # Assume filename is just the language name (e.g., "toml.so" -> "tree_sitter_toml")
49
- # Also strip "lib" prefix if present (e.g., "libtoml.so" -> "tree_sitter_toml")
50
- lang = filename.sub(/\Alib/, "").tr("-", "_")
51
- "tree_sitter_#{lang}"
52
- end
53
- end
54
-
55
- # Derive the language name from a library path
56
- #
57
- # Language names are the short identifiers (e.g., "toml", "json", "ruby")
58
- # used by some backends (like tree_stump/Rust) to register grammars.
59
- #
60
- # @param path [String, nil] path like "/usr/lib/libtree-sitter-toml.so"
61
- # @return [String, nil] language name like "toml", or nil if path is nil
62
- def derive_language_name_from_path(path)
63
- symbol = derive_symbol_from_path(path)
64
- return unless symbol
65
-
66
- # Strip the "tree_sitter_" prefix to get the language name
67
- symbol.sub(/\Atree_sitter_/, "")
68
- end
69
-
70
- # Derive language name from a symbol
71
- #
72
- # @param symbol [String, nil] symbol like "tree_sitter_toml"
73
- # @return [String, nil] language name like "toml", or nil if symbol is nil
74
- def derive_language_name_from_symbol(symbol)
75
- return unless symbol
76
-
77
- symbol.sub(/\Atree_sitter_/, "")
78
- end
79
- end
80
- end