tree_haver 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +48 -0
- data/CITATION.cff +20 -0
- data/CODE_OF_CONDUCT.md +134 -0
- data/CONTRIBUTING.md +227 -0
- data/FUNDING.md +74 -0
- data/LICENSE.txt +21 -0
- data/README.md +1260 -0
- data/REEK +0 -0
- data/RUBOCOP.md +71 -0
- data/SECURITY.md +21 -0
- data/lib/tree_haver/backends/ffi.rb +410 -0
- data/lib/tree_haver/backends/java.rb +568 -0
- data/lib/tree_haver/backends/mri.rb +129 -0
- data/lib/tree_haver/backends/rust.rb +175 -0
- data/lib/tree_haver/compat.rb +43 -0
- data/lib/tree_haver/grammar_finder.rb +245 -0
- data/lib/tree_haver/language_registry.rb +139 -0
- data/lib/tree_haver/path_validator.rb +333 -0
- data/lib/tree_haver/version.rb +20 -0
- data/lib/tree_haver.rb +710 -0
- data/sig/tree_haver/backends.rbs +285 -0
- data/sig/tree_haver/grammar_finder.rbs +29 -0
- data/sig/tree_haver/path_validator.rbs +31 -0
- data/sig/tree_haver.rbs +131 -0
- data.tar.gz.sig +0 -0
- metadata +298 -0
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
module Backends
|
|
5
|
+
# Rust backend using the tree_stump gem
|
|
6
|
+
#
|
|
7
|
+
# This backend wraps the tree_stump gem, which provides Ruby bindings to
|
|
8
|
+
# Tree-sitter written in Rust. It offers native performance with Rust's
|
|
9
|
+
# safety guarantees and includes precompiled binaries for common platforms.
|
|
10
|
+
#
|
|
11
|
+
# tree_stump supports incremental parsing and the Query API, making it
|
|
12
|
+
# suitable for editor/IDE use cases where performance is critical.
|
|
13
|
+
#
|
|
14
|
+
# @note This backend works on MRI Ruby. JRuby/TruffleRuby support is unknown.
|
|
15
|
+
# @see https://github.com/anthropics/tree_stump tree_stump
|
|
16
|
+
module Rust
|
|
17
|
+
@load_attempted = false
|
|
18
|
+
@loaded = false
|
|
19
|
+
|
|
20
|
+
# Check if the Rust backend is available
|
|
21
|
+
#
|
|
22
|
+
# Attempts to require tree_stump on first call and caches the result.
|
|
23
|
+
#
|
|
24
|
+
# @return [Boolean] true if tree_stump is available
|
|
25
|
+
# @example
|
|
26
|
+
# if TreeHaver::Backends::Rust.available?
|
|
27
|
+
# puts "Rust backend is ready"
|
|
28
|
+
# end
|
|
29
|
+
class << self
|
|
30
|
+
def available?
|
|
31
|
+
return @loaded if @load_attempted # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
32
|
+
@load_attempted = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
33
|
+
begin
|
|
34
|
+
require "tree_stump"
|
|
35
|
+
|
|
36
|
+
@loaded = true # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
37
|
+
rescue LoadError
|
|
38
|
+
@loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
39
|
+
end
|
|
40
|
+
@loaded # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Reset the load state (primarily for testing)
|
|
44
|
+
#
|
|
45
|
+
# @return [void]
|
|
46
|
+
# @api private
|
|
47
|
+
def reset!
|
|
48
|
+
@load_attempted = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
49
|
+
@loaded = false # rubocop:disable ThreadSafety/ClassInstanceVariable
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Get capabilities supported by this backend
|
|
53
|
+
#
|
|
54
|
+
# @return [Hash{Symbol => Object}] capability map
|
|
55
|
+
# @example
|
|
56
|
+
# TreeHaver::Backends::Rust.capabilities
|
|
57
|
+
# # => { backend: :rust, query: true, bytes_field: true, incremental: true }
|
|
58
|
+
def capabilities
|
|
59
|
+
return {} unless available?
|
|
60
|
+
{
|
|
61
|
+
backend: :rust,
|
|
62
|
+
query: true,
|
|
63
|
+
bytes_field: true,
|
|
64
|
+
incremental: true,
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Wrapper for tree_stump Language
|
|
70
|
+
#
|
|
71
|
+
# Provides TreeHaver-compatible interface to tree_stump's language loading.
|
|
72
|
+
# tree_stump uses a registration-based API where languages are registered
|
|
73
|
+
# by name, then referenced by that name when setting parser language.
|
|
74
|
+
class Language
|
|
75
|
+
# The registered language name
|
|
76
|
+
# @return [String]
|
|
77
|
+
attr_reader :name
|
|
78
|
+
|
|
79
|
+
# @api private
|
|
80
|
+
# @param name [String] the registered language name
|
|
81
|
+
def initialize(name)
|
|
82
|
+
@name = name
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Load a language from a shared library path
|
|
86
|
+
#
|
|
87
|
+
# @param path [String] absolute path to the language shared library
|
|
88
|
+
# @param symbol [String, nil] the symbol name (accepted for API consistency, but tree_stump derives it from name)
|
|
89
|
+
# @param name [String, nil] logical name for the language (optional, derived from path if not provided)
|
|
90
|
+
# @return [Language] a wrapper holding the registered language name
|
|
91
|
+
# @raise [TreeHaver::NotAvailable] if tree_stump is not available
|
|
92
|
+
# @example
|
|
93
|
+
# lang = TreeHaver::Backends::Rust::Language.from_library("/usr/local/lib/libtree-sitter-toml.so")
|
|
94
|
+
class << self
|
|
95
|
+
def from_library(path, symbol: nil, name: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
96
|
+
raise TreeHaver::NotAvailable, "tree_stump not available" unless Rust.available?
|
|
97
|
+
|
|
98
|
+
# Validate the path exists before calling register_lang to provide a clear error
|
|
99
|
+
raise TreeHaver::NotAvailable, "Language library not found: #{path}" unless File.exist?(path)
|
|
100
|
+
|
|
101
|
+
# tree_stump uses TreeStump.register_lang(name, path) to register languages
|
|
102
|
+
# The name is used to derive the symbol automatically (tree_sitter_<name>)
|
|
103
|
+
lang_name = name || File.basename(path, ".*").sub(/^libtree-sitter-/, "")
|
|
104
|
+
::TreeStump.register_lang(lang_name, path)
|
|
105
|
+
new(lang_name)
|
|
106
|
+
rescue RuntimeError => e
|
|
107
|
+
raise TreeHaver::NotAvailable, "Failed to load language from #{path}: #{e.message}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Alias for compatibility
|
|
111
|
+
#
|
|
112
|
+
# @see from_library
|
|
113
|
+
alias_method :from_path, :from_library
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Wrapper for tree_stump Parser
|
|
118
|
+
#
|
|
119
|
+
# Provides TreeHaver-compatible interface to tree_stump's parser.
|
|
120
|
+
class Parser
|
|
121
|
+
# Create a new parser instance
|
|
122
|
+
#
|
|
123
|
+
# @raise [TreeHaver::NotAvailable] if tree_stump is not available
|
|
124
|
+
def initialize
|
|
125
|
+
raise TreeHaver::NotAvailable, "tree_stump not available" unless Rust.available?
|
|
126
|
+
@parser = ::TreeStump::Parser.new
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Set the language for this parser
|
|
130
|
+
#
|
|
131
|
+
# @param lang [Language, String] the language to use (Language wrapper or name string)
|
|
132
|
+
# @return [Language, String] the language that was set
|
|
133
|
+
def language=(lang)
|
|
134
|
+
# tree_stump uses set_language with a string name
|
|
135
|
+
lang_name = lang.respond_to?(:name) ? lang.name : lang.to_s
|
|
136
|
+
@parser.set_language(lang_name)
|
|
137
|
+
lang
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Parse source code
|
|
141
|
+
#
|
|
142
|
+
# @param source [String] the source code to parse
|
|
143
|
+
# @return [Object] the parsed syntax tree
|
|
144
|
+
def parse(source)
|
|
145
|
+
@parser.parse(source)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Parse source code with optional incremental parsing
|
|
149
|
+
#
|
|
150
|
+
# @param old_tree [Object, nil] previous tree for incremental parsing
|
|
151
|
+
# @param source [String] the source code to parse
|
|
152
|
+
# @return [Object] the parsed syntax tree
|
|
153
|
+
def parse_string(old_tree, source)
|
|
154
|
+
# tree_stump doesn't have parse_string, use parse instead
|
|
155
|
+
# TODO: Check if tree_stump supports incremental parsing
|
|
156
|
+
@parser.parse(source)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Wrapper for tree_stump Tree
|
|
161
|
+
#
|
|
162
|
+
# Not used directly; TreeHaver passes through tree_stump Tree objects.
|
|
163
|
+
class Tree
|
|
164
|
+
# Not used directly; we pass through tree_stump::Tree
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Wrapper for tree_stump Node
|
|
168
|
+
#
|
|
169
|
+
# Not used directly; TreeHaver passes through tree_stump::Node objects.
|
|
170
|
+
class Node
|
|
171
|
+
# Not used directly; we pass through tree_stump::Node
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Compatibility shim for code that expects TreeSitter constants
|
|
4
|
+
#
|
|
5
|
+
# When required, this file creates a TreeSitter module that maps to TreeHaver
|
|
6
|
+
# equivalents, allowing code written for ruby_tree_sitter to work with TreeHaver
|
|
7
|
+
# without modification.
|
|
8
|
+
#
|
|
9
|
+
# This shim is safe and idempotent:
|
|
10
|
+
# - If TreeSitter is already defined (real ruby_tree_sitter is loaded), this does nothing
|
|
11
|
+
# - If TreeSitter is not defined, it creates aliases to TreeHaver
|
|
12
|
+
#
|
|
13
|
+
# @example Using the compatibility shim
|
|
14
|
+
# require "tree_haver/compat"
|
|
15
|
+
#
|
|
16
|
+
# # Now code expecting TreeSitter will work
|
|
17
|
+
# parser = TreeSitter::Parser.new # Actually creates TreeHaver::Parser
|
|
18
|
+
# tree = parser.parse(source)
|
|
19
|
+
#
|
|
20
|
+
# @note This is an opt-in feature. Only require this file if you need compatibility
|
|
21
|
+
# @see TreeHaver The main module this aliases to
|
|
22
|
+
|
|
23
|
+
unless defined?(TreeSitter)
|
|
24
|
+
# Compatibility module aliasing TreeHaver classes to TreeSitter
|
|
25
|
+
#
|
|
26
|
+
# @note Only defined if TreeSitter doesn't already exist
|
|
27
|
+
module TreeSitter; end
|
|
28
|
+
|
|
29
|
+
# @!parse
|
|
30
|
+
# module TreeSitter
|
|
31
|
+
# Error = TreeHaver::Error
|
|
32
|
+
# Parser = TreeHaver::Parser
|
|
33
|
+
# Tree = TreeHaver::Tree
|
|
34
|
+
# Node = TreeHaver::Node
|
|
35
|
+
# Language = TreeHaver::Language
|
|
36
|
+
# end
|
|
37
|
+
|
|
38
|
+
TreeSitter::Error = TreeHaver::Error
|
|
39
|
+
TreeSitter::Parser = TreeHaver::Parser
|
|
40
|
+
TreeSitter::Tree = TreeHaver::Tree
|
|
41
|
+
TreeSitter::Node = TreeHaver::Node
|
|
42
|
+
TreeSitter::Language = TreeHaver::Language
|
|
43
|
+
end
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rbconfig"
|
|
4
|
+
|
|
5
|
+
module TreeHaver
|
|
6
|
+
# Generic utility for finding tree-sitter grammar shared libraries.
|
|
7
|
+
#
|
|
8
|
+
# GrammarFinder provides platform-aware discovery of tree-sitter grammar
|
|
9
|
+
# libraries. Given a language name, it searches common installation paths
|
|
10
|
+
# and supports environment variable overrides.
|
|
11
|
+
#
|
|
12
|
+
# This class is designed to be used by language-specific merge gems
|
|
13
|
+
# (toml-merge, json-merge, bash-merge, etc.) without requiring TreeHaver
|
|
14
|
+
# to have knowledge of each specific language.
|
|
15
|
+
#
|
|
16
|
+
# == Security Considerations
|
|
17
|
+
#
|
|
18
|
+
# Loading shared libraries is inherently dangerous as it executes arbitrary
|
|
19
|
+
# native code. GrammarFinder performs the following security validations:
|
|
20
|
+
#
|
|
21
|
+
# - Language names are validated to contain only safe characters
|
|
22
|
+
# - Paths from environment variables are validated before use
|
|
23
|
+
# - Path traversal attempts (../) are rejected
|
|
24
|
+
# - Only files with expected extensions (.so, .dylib, .dll) are accepted
|
|
25
|
+
#
|
|
26
|
+
# For additional security, use {#find_library_path_safe} which only returns
|
|
27
|
+
# paths from trusted system directories.
|
|
28
|
+
#
|
|
29
|
+
# @example Basic usage
|
|
30
|
+
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
31
|
+
# path = finder.find_library_path
|
|
32
|
+
# # => "/usr/lib/libtree-sitter-toml.so"
|
|
33
|
+
#
|
|
34
|
+
# @example Check availability
|
|
35
|
+
# finder = TreeHaver::GrammarFinder.new(:json)
|
|
36
|
+
# if finder.available?
|
|
37
|
+
# language = TreeHaver::Language.load(finder.language_name, finder.find_library_path)
|
|
38
|
+
# end
|
|
39
|
+
#
|
|
40
|
+
# @example Register with TreeHaver
|
|
41
|
+
# finder = TreeHaver::GrammarFinder.new(:bash)
|
|
42
|
+
# finder.register! if finder.available?
|
|
43
|
+
# # Now you can use: TreeHaver::Language.bash
|
|
44
|
+
#
|
|
45
|
+
# @example With custom search paths
|
|
46
|
+
# finder = TreeHaver::GrammarFinder.new(:toml, extra_paths: ["/opt/custom/lib"])
|
|
47
|
+
#
|
|
48
|
+
# @example Secure mode (trusted directories only)
|
|
49
|
+
# finder = TreeHaver::GrammarFinder.new(:toml)
|
|
50
|
+
# path = finder.find_library_path_safe # Only returns paths in trusted dirs
|
|
51
|
+
#
|
|
52
|
+
# @see PathValidator For details on security validations
|
|
53
|
+
class GrammarFinder
|
|
54
|
+
# Common base directories where tree-sitter libraries are installed
|
|
55
|
+
# Platform-specific extensions are appended automatically
|
|
56
|
+
BASE_SEARCH_DIRS = [
|
|
57
|
+
"/usr/lib",
|
|
58
|
+
"/usr/lib64",
|
|
59
|
+
"/usr/local/lib",
|
|
60
|
+
"/opt/homebrew/lib",
|
|
61
|
+
].freeze
|
|
62
|
+
|
|
63
|
+
# @return [Symbol] the language identifier
|
|
64
|
+
attr_reader :language_name
|
|
65
|
+
|
|
66
|
+
# @return [Array<String>] additional search paths provided at initialization
|
|
67
|
+
attr_reader :extra_paths
|
|
68
|
+
|
|
69
|
+
# Initialize a grammar finder for a specific language
|
|
70
|
+
#
|
|
71
|
+
# @param language_name [Symbol, String] the tree-sitter language name (e.g., :toml, :json, :bash)
|
|
72
|
+
# @param extra_paths [Array<String>] additional paths to search (searched first after ENV)
|
|
73
|
+
# @param validate [Boolean] if true, validates the language name (default: true)
|
|
74
|
+
# @raise [ArgumentError] if language_name is invalid and validate is true
|
|
75
|
+
def initialize(language_name, extra_paths: [], validate: true)
|
|
76
|
+
name_str = language_name.to_s.downcase
|
|
77
|
+
|
|
78
|
+
if validate && !PathValidator.safe_language_name?(name_str)
|
|
79
|
+
raise ArgumentError, "Invalid language name: #{language_name.inspect}. " \
|
|
80
|
+
"Language names must start with a letter and contain only lowercase letters, numbers, and underscores."
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
@language_name = name_str.to_sym
|
|
84
|
+
@extra_paths = Array(extra_paths)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Get the environment variable name for this language
|
|
88
|
+
#
|
|
89
|
+
# @return [String] the ENV var name (e.g., "TREE_SITTER_TOML_PATH")
|
|
90
|
+
def env_var_name
|
|
91
|
+
"TREE_SITTER_#{@language_name.to_s.upcase}_PATH"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Get the expected symbol name exported by the grammar library
|
|
95
|
+
#
|
|
96
|
+
# @return [String] the symbol name (e.g., "tree_sitter_toml")
|
|
97
|
+
def symbol_name
|
|
98
|
+
"tree_sitter_#{@language_name}"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Get the library filename for the current platform
|
|
102
|
+
#
|
|
103
|
+
# @return [String] the library filename (e.g., "libtree-sitter-toml.so")
|
|
104
|
+
def library_filename
|
|
105
|
+
ext = platform_extension
|
|
106
|
+
"libtree-sitter-#{@language_name}#{ext}"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Generate the full list of search paths for this language
|
|
110
|
+
#
|
|
111
|
+
# Order: ENV override, extra_paths, then common system paths
|
|
112
|
+
#
|
|
113
|
+
# @return [Array<String>] all paths to search
|
|
114
|
+
def search_paths
|
|
115
|
+
paths = []
|
|
116
|
+
|
|
117
|
+
# Extra paths provided at initialization (searched after ENV)
|
|
118
|
+
@extra_paths.each do |dir|
|
|
119
|
+
paths << File.join(dir, library_filename)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Common system paths with platform-appropriate extension
|
|
123
|
+
BASE_SEARCH_DIRS.each do |dir|
|
|
124
|
+
paths << File.join(dir, library_filename)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
paths
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Find the grammar library path
|
|
131
|
+
#
|
|
132
|
+
# Searches in order:
|
|
133
|
+
# 1. Environment variable override (validated for safety)
|
|
134
|
+
# 2. Extra paths provided at initialization
|
|
135
|
+
# 3. Common system installation paths
|
|
136
|
+
#
|
|
137
|
+
# @note Paths from ENV are validated using {PathValidator.safe_library_path?}
|
|
138
|
+
# to prevent path traversal and other attacks. Invalid ENV paths are ignored.
|
|
139
|
+
#
|
|
140
|
+
# @return [String, nil] the path to the library, or nil if not found
|
|
141
|
+
# @see #find_library_path_safe For stricter validation (trusted directories only)
|
|
142
|
+
def find_library_path
|
|
143
|
+
# Check environment variable first (highest priority)
|
|
144
|
+
env_path = ENV[env_var_name]
|
|
145
|
+
if env_path && PathValidator.safe_library_path?(env_path) && File.exist?(env_path)
|
|
146
|
+
return env_path
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Search all paths (these are constructed from trusted base dirs)
|
|
150
|
+
search_paths.find { |path| File.exist?(path) }
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Find the grammar library path with strict security validation
|
|
154
|
+
#
|
|
155
|
+
# This method only returns paths that are in trusted system directories.
|
|
156
|
+
# Use this when you want maximum security and don't need to support
|
|
157
|
+
# custom installation locations.
|
|
158
|
+
#
|
|
159
|
+
# @return [String, nil] the path to the library, or nil if not found
|
|
160
|
+
# @see PathValidator::TRUSTED_DIRECTORIES For the list of trusted directories
|
|
161
|
+
def find_library_path_safe
|
|
162
|
+
# Environment variable is NOT checked in safe mode - only trusted system paths
|
|
163
|
+
search_paths.find do |path|
|
|
164
|
+
File.exist?(path) && PathValidator.in_trusted_directory?(path)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Check if the grammar library is available
|
|
169
|
+
#
|
|
170
|
+
# @return [Boolean] true if the library can be found
|
|
171
|
+
def available?
|
|
172
|
+
!find_library_path.nil?
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Check if the grammar library is available in a trusted directory
|
|
176
|
+
#
|
|
177
|
+
# @return [Boolean] true if the library can be found in a trusted directory
|
|
178
|
+
# @see #find_library_path_safe
|
|
179
|
+
def available_safe?
|
|
180
|
+
!find_library_path_safe.nil?
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Register this language with TreeHaver
|
|
184
|
+
#
|
|
185
|
+
# After registration, the language can be loaded via dynamic method
|
|
186
|
+
# (e.g., `TreeHaver::Language.toml`).
|
|
187
|
+
#
|
|
188
|
+
# @param raise_on_missing [Boolean] if true, raises when library not found
|
|
189
|
+
# @return [Boolean] true if registration succeeded
|
|
190
|
+
# @raise [NotAvailable] if library not found and raise_on_missing is true
|
|
191
|
+
def register!(raise_on_missing: false)
|
|
192
|
+
path = find_library_path
|
|
193
|
+
unless path
|
|
194
|
+
if raise_on_missing
|
|
195
|
+
raise NotAvailable, not_found_message
|
|
196
|
+
end
|
|
197
|
+
return false
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
TreeHaver.register_language(@language_name, path: path, symbol: symbol_name)
|
|
201
|
+
true
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Get debug information about the search
|
|
205
|
+
#
|
|
206
|
+
# @return [Hash] diagnostic information
|
|
207
|
+
def search_info
|
|
208
|
+
{
|
|
209
|
+
language: @language_name,
|
|
210
|
+
env_var: env_var_name,
|
|
211
|
+
env_value: ENV[env_var_name],
|
|
212
|
+
symbol: symbol_name,
|
|
213
|
+
library_filename: library_filename,
|
|
214
|
+
search_paths: search_paths,
|
|
215
|
+
found_path: find_library_path,
|
|
216
|
+
available: available?,
|
|
217
|
+
}
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Get a human-readable error message when library is not found
|
|
221
|
+
#
|
|
222
|
+
# @return [String] error message with installation hints
|
|
223
|
+
def not_found_message
|
|
224
|
+
"Tree-sitter #{@language_name} grammar not found. " \
|
|
225
|
+
"Searched: #{search_paths.join(", ")}. " \
|
|
226
|
+
"Install tree-sitter-#{@language_name} or set #{env_var_name}."
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
private
|
|
230
|
+
|
|
231
|
+
# Get the platform-appropriate shared library extension
|
|
232
|
+
#
|
|
233
|
+
# @return [String] ".so" on Linux, ".dylib" on macOS
|
|
234
|
+
def platform_extension
|
|
235
|
+
case RbConfig::CONFIG["host_os"]
|
|
236
|
+
when /darwin/i
|
|
237
|
+
".dylib"
|
|
238
|
+
when /mswin|mingw|cygwin/i
|
|
239
|
+
".dll"
|
|
240
|
+
else
|
|
241
|
+
".so"
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TreeHaver
|
|
4
|
+
# Thread-safe language registrations and cache for loaded Language handles
|
|
5
|
+
#
|
|
6
|
+
# The LanguageRegistry provides two main functions:
|
|
7
|
+
# 1. **Registrations**: Store mappings from language names to shared library paths
|
|
8
|
+
# 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
|
|
9
|
+
#
|
|
10
|
+
# All operations are thread-safe and protected by a mutex.
|
|
11
|
+
#
|
|
12
|
+
# @example Register and cache a language
|
|
13
|
+
# TreeHaver::LanguageRegistry.register(:toml, path: "/path/to/lib.so", symbol: "tree_sitter_toml")
|
|
14
|
+
# lang = TreeHaver::LanguageRegistry.fetch(["/path/to/lib.so", "tree_sitter_toml", "toml"]) do
|
|
15
|
+
# # This block is called only if not cached
|
|
16
|
+
# load_language_from_library(...)
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @api private
|
|
20
|
+
module LanguageRegistry
|
|
21
|
+
@mutex = Mutex.new
|
|
22
|
+
@cache = {}
|
|
23
|
+
@registrations = {}
|
|
24
|
+
|
|
25
|
+
module_function
|
|
26
|
+
|
|
27
|
+
# Register a language helper by name
|
|
28
|
+
#
|
|
29
|
+
# Stores a mapping from a language name to its shared library path and
|
|
30
|
+
# optional exported symbol name. After registration, the language can be
|
|
31
|
+
# accessed via dynamic helpers on {TreeHaver::Language}.
|
|
32
|
+
#
|
|
33
|
+
# @param name [Symbol, String] language identifier (e.g., :toml, :json)
|
|
34
|
+
# @param path [String] absolute path to the language shared library
|
|
35
|
+
# @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
|
|
36
|
+
# @return [void]
|
|
37
|
+
# @example
|
|
38
|
+
# LanguageRegistry.register(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
|
|
39
|
+
def register(name, path:, symbol: nil)
|
|
40
|
+
key = name.to_sym
|
|
41
|
+
@mutex.synchronize do
|
|
42
|
+
@registrations[key] = {path: path, symbol: symbol}
|
|
43
|
+
end
|
|
44
|
+
nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Unregister a previously registered language helper
|
|
48
|
+
#
|
|
49
|
+
# Removes the registration entry but does not affect cached Language objects.
|
|
50
|
+
#
|
|
51
|
+
# @param name [Symbol, String] language identifier to unregister
|
|
52
|
+
# @return [void]
|
|
53
|
+
# @example
|
|
54
|
+
# LanguageRegistry.unregister(:toml)
|
|
55
|
+
def unregister(name)
|
|
56
|
+
key = name.to_sym
|
|
57
|
+
@mutex.synchronize do
|
|
58
|
+
@registrations.delete(key)
|
|
59
|
+
end
|
|
60
|
+
nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Fetch a registration entry
|
|
64
|
+
#
|
|
65
|
+
# Returns the stored path and symbol for a registered language name.
|
|
66
|
+
#
|
|
67
|
+
# @param name [Symbol, String] language identifier
|
|
68
|
+
# @return [Hash{Symbol => String, nil}, nil] hash with :path and :symbol keys, or nil if not registered
|
|
69
|
+
# @example
|
|
70
|
+
# entry = LanguageRegistry.registered(:toml)
|
|
71
|
+
# # => { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" }
|
|
72
|
+
def registered(name)
|
|
73
|
+
@mutex.synchronize { @registrations[name.to_sym] }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Clear all registrations
|
|
77
|
+
#
|
|
78
|
+
# Removes all registered language mappings. Primarily intended for test cleanup.
|
|
79
|
+
# Does not clear the language cache.
|
|
80
|
+
#
|
|
81
|
+
# @return [void]
|
|
82
|
+
# @example
|
|
83
|
+
# LanguageRegistry.clear_registrations!
|
|
84
|
+
def clear_registrations!
|
|
85
|
+
@mutex.synchronize { @registrations.clear }
|
|
86
|
+
nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Fetch a cached language by key or compute and store it
|
|
90
|
+
#
|
|
91
|
+
# This method provides thread-safe memoization for loaded Language objects.
|
|
92
|
+
# If the key exists in the cache, the cached value is returned immediately.
|
|
93
|
+
# Otherwise, the block is called to compute the value, which is then cached.
|
|
94
|
+
#
|
|
95
|
+
# @param key [Array] cache key, typically [path, symbol, name]
|
|
96
|
+
# @yieldreturn [Object] the computed language handle (called only on cache miss)
|
|
97
|
+
# @return [Object] the cached or computed language handle
|
|
98
|
+
# @example
|
|
99
|
+
# language = LanguageRegistry.fetch(["/path/lib.so", "symbol", "toml"]) do
|
|
100
|
+
# expensive_language_load_operation
|
|
101
|
+
# end
|
|
102
|
+
def fetch(key)
|
|
103
|
+
@mutex.synchronize do
|
|
104
|
+
return @cache[key] if @cache.key?(key)
|
|
105
|
+
value = yield
|
|
106
|
+
@cache[key] = value
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Clear the language cache
|
|
111
|
+
#
|
|
112
|
+
# Removes all cached Language objects. The next call to {fetch} for any key
|
|
113
|
+
# will recompute the value. Does not clear registrations.
|
|
114
|
+
#
|
|
115
|
+
# @return [void]
|
|
116
|
+
# @example
|
|
117
|
+
# LanguageRegistry.clear_cache!
|
|
118
|
+
def clear_cache!
|
|
119
|
+
@mutex.synchronize { @cache.clear }
|
|
120
|
+
nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Clear everything (registrations and cache)
|
|
124
|
+
#
|
|
125
|
+
# Removes all registered languages and all cached Language objects.
|
|
126
|
+
# Useful for complete teardown in tests.
|
|
127
|
+
#
|
|
128
|
+
# @return [void]
|
|
129
|
+
# @example
|
|
130
|
+
# LanguageRegistry.clear_all!
|
|
131
|
+
def clear_all!
|
|
132
|
+
@mutex.synchronize do
|
|
133
|
+
@registrations.clear
|
|
134
|
+
@cache.clear
|
|
135
|
+
end
|
|
136
|
+
nil
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|