tree_haver 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Utility for finding and registering Citrus grammar gems.
5
+ #
6
+ # CitrusGrammarFinder provides language-agnostic discovery of Citrus grammar
7
+ # gems. Given a language name and gem information, it attempts to load the
8
+ # grammar and register it with tree_haver.
9
+ #
10
+ # Unlike tree-sitter grammars (which are .so files), Citrus grammars are
11
+ # Ruby modules that respond to .parse(source). This class handles the
12
+ # discovery and registration of these grammars.
13
+ #
14
+ # @example Basic usage with toml-rb
15
+ # finder = TreeHaver::CitrusGrammarFinder.new(
16
+ # language: :toml,
17
+ # gem_name: "toml-rb",
18
+ # grammar_const: "TomlRB::Document"
19
+ # )
20
+ # finder.register! if finder.available?
21
+ #
22
+ # @example With custom require path
23
+ # finder = TreeHaver::CitrusGrammarFinder.new(
24
+ # language: :json,
25
+ # gem_name: "json-rb",
26
+ # grammar_const: "JsonRB::Grammar",
27
+ # require_path: "json/rb"
28
+ # )
29
+ #
30
+ # @see GrammarFinder For tree-sitter grammar discovery
31
+ class CitrusGrammarFinder
32
+ # @return [Symbol] the language identifier
33
+ attr_reader :language_name
34
+
35
+ # @return [String] the gem name to require
36
+ attr_reader :gem_name
37
+
38
+ # @return [String] the constant path to the grammar (e.g., "TomlRB::Document")
39
+ attr_reader :grammar_const
40
+
41
+ # @return [String, nil] custom require path (defaults to gem_name with dashes to slashes)
42
+ attr_reader :require_path
43
+
44
+ # Initialize a Citrus grammar finder
45
+ #
46
+ # @param language [Symbol, String] the language name (e.g., :toml, :json)
47
+ # @param gem_name [String] the gem name (e.g., "toml-rb")
48
+ # @param grammar_const [String] constant path to grammar (e.g., "TomlRB::Document")
49
+ # @param require_path [String, nil] custom require path (defaults to gem_name with dashes→slashes)
50
+ def initialize(language:, gem_name:, grammar_const:, require_path: nil)
51
+ @language_name = language.to_sym
52
+ @gem_name = gem_name
53
+ @grammar_const = grammar_const
54
+ @require_path = require_path || gem_name.tr("-", "/")
55
+ @load_attempted = false
56
+ @available = false
57
+ @grammar_module = nil
58
+ end
59
+
60
+ # Check if the Citrus grammar is available
61
+ #
62
+ # Attempts to require the gem and resolve the grammar constant.
63
+ # Result is cached after first call.
64
+ #
65
+ # @return [Boolean] true if grammar is available
66
+ def available?
67
+ return @available if @load_attempted
68
+
69
+ @load_attempted = true
70
+ begin
71
+ # Try to require the gem
72
+ require @require_path
73
+
74
+ # Try to resolve the constant
75
+ @grammar_module = resolve_constant(@grammar_const)
76
+
77
+ # Verify it responds to parse
78
+ unless @grammar_module.respond_to?(:parse)
79
+ warn("#{@grammar_const} doesn't respond to :parse")
80
+ @available = false
81
+ return false
82
+ end
83
+
84
+ @available = true
85
+ rescue LoadError => e
86
+ # Always show LoadError for debugging
87
+ warn("CitrusGrammarFinder: Failed to load '#{@require_path}': #{e.class}: #{e.message}")
88
+ @available = false
89
+ rescue NameError => e
90
+ # Always show NameError for debugging
91
+ warn("CitrusGrammarFinder: Failed to resolve '#{@grammar_const}': #{e.class}: #{e.message}")
92
+ @available = false
93
+ rescue => e
94
+ # Catch any other errors
95
+ warn("CitrusGrammarFinder: Unexpected error: #{e.class}: #{e.message}")
96
+ warn(e.backtrace.first(3).join("\n")) if ENV["TREE_HAVER_DEBUG"]
97
+ @available = false
98
+ end
99
+
100
+ @available
101
+ end
102
+
103
+ # Get the resolved grammar module
104
+ #
105
+ # @return [Module, nil] the grammar module if available
106
+ def grammar_module
107
+ available? # Ensure we've tried to load
108
+ @grammar_module
109
+ end
110
+
111
+ # Register this Citrus grammar with TreeHaver
112
+ #
113
+ # After registration, the language can be used via:
114
+ # TreeHaver::Language.{language_name}
115
+ #
116
+ # @param raise_on_missing [Boolean] if true, raises when grammar not available
117
+ # @return [Boolean] true if registration succeeded
118
+ # @raise [NotAvailable] if grammar not available and raise_on_missing is true
119
+ def register!(raise_on_missing: false)
120
+ unless available?
121
+ if raise_on_missing
122
+ raise NotAvailable, not_found_message
123
+ end
124
+ return false
125
+ end
126
+
127
+ TreeHaver.register_language(
128
+ @language_name,
129
+ grammar_module: @grammar_module,
130
+ gem_name: @gem_name,
131
+ )
132
+ true
133
+ end
134
+
135
+ # Get debug information about the search
136
+ #
137
+ # @return [Hash] diagnostic information
138
+ def search_info
139
+ {
140
+ language: @language_name,
141
+ gem_name: @gem_name,
142
+ grammar_const: @grammar_const,
143
+ require_path: @require_path,
144
+ available: available?,
145
+ grammar_module: @grammar_module&.name,
146
+ }
147
+ end
148
+
149
+ # Get a human-readable error message when grammar is not found
150
+ #
151
+ # @return [String] error message with installation hints
152
+ def not_found_message
153
+ "Citrus grammar for #{@language_name} not found. " \
154
+ "Install #{@gem_name} gem: gem install #{@gem_name}"
155
+ end
156
+
157
+ private
158
+
159
+ # Resolve a constant path like "TomlRB::Document"
160
+ #
161
+ # @param const_path [String] constant path
162
+ # @return [Object] the constant
163
+ # @raise [NameError] if constant not found
164
+ def resolve_constant(const_path)
165
+ const_path.split("::").reduce(Object) do |mod, const_name|
166
+ mod.const_get(const_name)
167
+ end
168
+ end
169
+ end
170
+ end
@@ -142,14 +142,37 @@ module TreeHaver
142
142
  def find_library_path
143
143
  # Check environment variable first (highest priority)
144
144
  env_path = ENV[env_var_name]
145
- if env_path && PathValidator.safe_library_path?(env_path) && File.exist?(env_path)
146
- return env_path
145
+ if env_path
146
+ # Store why env path was rejected for better error messages
147
+ @env_rejection_reason = validate_env_path(env_path)
148
+ return env_path if @env_rejection_reason.nil?
147
149
  end
148
150
 
149
151
  # Search all paths (these are constructed from trusted base dirs)
150
152
  search_paths.find { |path| File.exist?(path) }
151
153
  end
152
154
 
155
+ # Validate an environment variable path and return reason if invalid
156
+ # @return [String, nil] rejection reason or nil if valid
157
+ def validate_env_path(path)
158
+ # Check for leading/trailing whitespace
159
+ if path != path.strip
160
+ return "contains leading or trailing whitespace (use #{path.strip.inspect})"
161
+ end
162
+
163
+ # Check if path is safe
164
+ unless PathValidator.safe_library_path?(path)
165
+ return "failed security validation (may contain path traversal or suspicious characters)"
166
+ end
167
+
168
+ # Check if file exists
169
+ unless File.exist?(path)
170
+ return "file does not exist"
171
+ end
172
+
173
+ nil # Valid!
174
+ end
175
+
153
176
  # Find the grammar library path with strict security validation
154
177
  #
155
178
  # This method only returns paths that are in trusted system directories.
@@ -205,15 +228,17 @@ module TreeHaver
205
228
  #
206
229
  # @return [Hash] diagnostic information
207
230
  def search_info
231
+ found = find_library_path # This populates @env_rejection_reason
208
232
  {
209
233
  language: @language_name,
210
234
  env_var: env_var_name,
211
235
  env_value: ENV[env_var_name],
236
+ env_rejection_reason: @env_rejection_reason,
212
237
  symbol: symbol_name,
213
238
  library_filename: library_filename,
214
239
  search_paths: search_paths,
215
- found_path: find_library_path,
216
- available: available?,
240
+ found_path: found,
241
+ available: !found.nil?,
217
242
  }
218
243
  end
219
244
 
@@ -221,9 +246,19 @@ module TreeHaver
221
246
  #
222
247
  # @return [String] error message with installation hints
223
248
  def not_found_message
224
- "Tree-sitter #{@language_name} grammar not found. " \
225
- "Searched: #{search_paths.join(", ")}. " \
226
- "Install tree-sitter-#{@language_name} or set #{env_var_name}."
249
+ msg = "tree-sitter #{@language_name} grammar not found."
250
+
251
+ # Check if env var is set but rejected
252
+ env_value = ENV[env_var_name]
253
+ msg += if env_value && @env_rejection_reason
254
+ " #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
255
+ elsif env_value
256
+ " #{env_var_name} is set but was not used (file may have been removed)."
257
+ else
258
+ " Searched: #{search_paths.join(", ")}."
259
+ end
260
+
261
+ msg + " Install tree-sitter-#{@language_name} or set #{env_var_name} to a valid path."
227
262
  end
228
263
 
229
264
  private
@@ -4,86 +4,93 @@ module TreeHaver
4
4
  # Thread-safe language registrations and cache for loaded Language handles
5
5
  #
6
6
  # The LanguageRegistry provides two main functions:
7
- # 1. **Registrations**: Store mappings from language names to shared library paths
7
+ # 1. **Registrations**: Store mappings from language names to backend-specific configurations
8
8
  # 2. **Cache**: Memoize loaded Language objects to avoid repeated dlopen calls
9
9
  #
10
- # All operations are thread-safe and protected by a mutex.
10
+ # The registry supports multiple backends for the same language, allowing runtime
11
+ # switching, benchmarking, and fallback scenarios.
11
12
  #
12
- # @example Register and cache a language
13
- # TreeHaver::LanguageRegistry.register(:toml, path: "/path/to/lib.so", symbol: "tree_sitter_toml")
14
- # lang = TreeHaver::LanguageRegistry.fetch(["/path/to/lib.so", "tree_sitter_toml", "toml"]) do
15
- # # This block is called only if not cached
16
- # load_language_from_library(...)
17
- # end
13
+ # Registration structure:
14
+ # @registrations = {
15
+ # toml: {
16
+ # tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
17
+ # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
18
+ # }
19
+ # }
20
+ #
21
+ # @example Register tree-sitter grammar
22
+ # TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
23
+ # path: "/path/to/lib.so", symbol: "tree_sitter_toml")
24
+ #
25
+ # @example Register Citrus grammar
26
+ # TreeHaver::LanguageRegistry.register(:toml, :citrus,
27
+ # grammar_module: TomlRB::Document, gem_name: "toml-rb")
18
28
  #
19
29
  # @api private
20
30
  module LanguageRegistry
21
31
  @mutex = Mutex.new
22
- @cache = {}
23
- @registrations = {}
32
+ @cache = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
33
+ @registrations = {} # rubocop:disable ThreadSafety/MutableClassInstanceVariable
24
34
 
25
35
  module_function
26
36
 
27
- # Register a language helper by name
37
+ # Register a language for a specific backend
28
38
  #
29
- # Stores a mapping from a language name to its shared library path and
30
- # optional exported symbol name. After registration, the language can be
31
- # accessed via dynamic helpers on {TreeHaver::Language}.
39
+ # Stores backend-specific configuration for a language. Multiple backends
40
+ # can be registered for the same language without conflict.
32
41
  #
33
42
  # @param name [Symbol, String] language identifier (e.g., :toml, :json)
34
- # @param path [String] absolute path to the language shared library
35
- # @param symbol [String, nil] optional exported factory symbol (e.g., "tree_sitter_toml")
43
+ # @param backend_type [Symbol] backend type (:tree_sitter, :citrus, :mri, :rust, :ffi, :java)
44
+ # @param config [Hash] backend-specific configuration
45
+ # @option config [String] :path tree-sitter library path (for tree-sitter backends)
46
+ # @option config [String] :symbol exported symbol name (for tree-sitter backends)
47
+ # @option config [Module] :grammar_module Citrus grammar module (for Citrus backend)
48
+ # @option config [String] :gem_name gem name for error messages (for Citrus backend)
36
49
  # @return [void]
37
- # @example
38
- # LanguageRegistry.register(:toml, path: "/usr/local/lib/libtree-sitter-toml.so")
39
- def register(name, path:, symbol: nil)
50
+ # @example Register tree-sitter grammar
51
+ # LanguageRegistry.register(:toml, :tree_sitter,
52
+ # path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
53
+ # @example Register Citrus grammar
54
+ # LanguageRegistry.register(:toml, :citrus,
55
+ # grammar_module: TomlRB::Document, gem_name: "toml-rb")
56
+ def register(name, backend_type, **config)
40
57
  key = name.to_sym
41
- @mutex.synchronize do
42
- @registrations[key] = {path: path, symbol: symbol}
43
- end
44
- nil
45
- end
58
+ backend_key = backend_type.to_sym
46
59
 
47
- # Unregister a previously registered language helper
48
- #
49
- # Removes the registration entry but does not affect cached Language objects.
50
- #
51
- # @param name [Symbol, String] language identifier to unregister
52
- # @return [void]
53
- # @example
54
- # LanguageRegistry.unregister(:toml)
55
- def unregister(name)
56
- key = name.to_sym
57
60
  @mutex.synchronize do
58
- @registrations.delete(key)
61
+ @registrations[key] ||= {}
62
+ @registrations[key][backend_key] = config.compact
59
63
  end
60
64
  nil
61
65
  end
62
66
 
63
- # Fetch a registration entry
67
+ # Fetch registration entries for a language
64
68
  #
65
- # Returns the stored path and symbol for a registered language name.
69
+ # Returns all backend-specific configurations for a language.
66
70
  #
67
71
  # @param name [Symbol, String] language identifier
68
- # @return [Hash{Symbol => String, nil}, nil] hash with :path and :symbol keys, or nil if not registered
69
- # @example
70
- # entry = LanguageRegistry.registered(:toml)
71
- # # => { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" }
72
- def registered(name)
73
- @mutex.synchronize { @registrations[name.to_sym] }
74
- end
72
+ # @param backend_type [Symbol, nil] optional backend type to filter by
73
+ # @return [Hash{Symbol => Hash}, Hash, nil] all backends or specific backend config
74
+ # @example Get all backends
75
+ # entries = LanguageRegistry.registered(:toml)
76
+ # # => {
77
+ # # tree_sitter: { path: "/usr/local/lib/libtree-sitter-toml.so", symbol: "tree_sitter_toml" },
78
+ # # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
79
+ # # }
80
+ # @example Get specific backend
81
+ # entry = LanguageRegistry.registered(:toml, :citrus)
82
+ # # => { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
83
+ def registered(name, backend_type = nil)
84
+ @mutex.synchronize do
85
+ lang_config = @registrations[name.to_sym]
86
+ return unless lang_config
75
87
 
76
- # Clear all registrations
77
- #
78
- # Removes all registered language mappings. Primarily intended for test cleanup.
79
- # Does not clear the language cache.
80
- #
81
- # @return [void]
82
- # @example
83
- # LanguageRegistry.clear_registrations!
84
- def clear_registrations!
85
- @mutex.synchronize { @registrations.clear }
86
- nil
88
+ if backend_type
89
+ lang_config[backend_type.to_sym]
90
+ else
91
+ lang_config
92
+ end
93
+ end
87
94
  end
88
95
 
89
96
  # Fetch a cached language by key or compute and store it
@@ -119,21 +126,5 @@ module TreeHaver
119
126
  @mutex.synchronize { @cache.clear }
120
127
  nil
121
128
  end
122
-
123
- # Clear everything (registrations and cache)
124
- #
125
- # Removes all registered languages and all cached Language objects.
126
- # Useful for complete teardown in tests.
127
- #
128
- # @return [void]
129
- # @example
130
- # LanguageRegistry.clear_all!
131
- def clear_all!
132
- @mutex.synchronize do
133
- @registrations.clear
134
- @cache.clear
135
- end
136
- nil
137
- end
138
129
  end
139
130
  end