tree_haver 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -135,17 +135,49 @@ module TreeHaver
135
135
  # 3. Common system installation paths
136
136
  #
137
137
  # @note Paths from ENV are validated using {PathValidator.safe_library_path?}
138
- # to prevent path traversal and other attacks. Invalid ENV paths are ignored.
138
+ # to prevent path traversal and other attacks. Invalid ENV paths cause
139
+ # an error to be raised (Principle of Least Surprise - explicit paths must work).
140
+ #
141
+ # @note Setting the ENV variable to an empty string explicitly disables
142
+ # this grammar. This allows fallback to alternative backends (e.g., Citrus).
139
143
  #
140
144
  # @return [String, nil] the path to the library, or nil if not found
145
+ # @raise [TreeHaver::NotAvailable] if ENV variable is set to an invalid path
141
146
  # @see #find_library_path_safe For stricter validation (trusted directories only)
142
147
  def find_library_path
143
148
  # Check environment variable first (highest priority)
144
- env_path = ENV[env_var_name]
145
- if env_path
149
+ # Use key? to distinguish between "not set" and "set to empty"
150
+ if ENV.key?(env_var_name)
151
+ env_path = ENV[env_var_name]
152
+
153
+ # :nocov: defensive - ENV.key? true with nil value is rare edge case
154
+ if env_path.nil?
155
+ @env_rejection_reason = "explicitly disabled (set to nil)"
156
+ return
157
+ end
158
+ # :nocov:
159
+
160
+ # Empty string means "explicitly skip this grammar"
161
+ # This allows users to disable tree-sitter for specific languages
162
+ # and fall back to alternative backends like Citrus
163
+ if env_path.empty?
164
+ @env_rejection_reason = "explicitly disabled (set to empty string)"
165
+ return
166
+ end
167
+
146
168
  # Store why env path was rejected for better error messages
147
169
  @env_rejection_reason = validate_env_path(env_path)
148
- return env_path if @env_rejection_reason.nil?
170
+
171
+ # Principle of Least Surprise: If user explicitly sets an ENV variable
172
+ # to a path, that path MUST work. Don't silently fall back to auto-discovery.
173
+ if @env_rejection_reason
174
+ raise TreeHaver::NotAvailable,
175
+ "#{env_var_name} is set to #{env_path.inspect} but #{@env_rejection_reason}. " \
176
+ "Either fix the path, unset the variable to use auto-discovery, " \
177
+ "or set it to empty string to explicitly disable this grammar."
178
+ end
179
+
180
+ return env_path
149
181
  end
150
182
 
151
183
  # Search all paths (these are constructed from trusted base dirs)
@@ -188,11 +220,67 @@ module TreeHaver
188
220
  end
189
221
  end
190
222
 
191
- # Check if the grammar library is available
223
+ # Check if the grammar library is available AND usable
192
224
  #
193
- # @return [Boolean] true if the library can be found
225
+ # This checks:
226
+ # 1. The grammar library file exists
227
+ # 2. The tree-sitter runtime is functional (can create a parser)
228
+ #
229
+ # This prevents registering grammars when tree-sitter isn't actually usable,
230
+ # allowing clean fallback to alternative backends like Citrus.
231
+ #
232
+ # @return [Boolean] true if the library can be found AND tree-sitter runtime works
194
233
  def available?
195
- !find_library_path.nil?
234
+ path = find_library_path
235
+ return false if path.nil?
236
+
237
+ # Check if tree-sitter runtime is actually functional
238
+ # This is cached at the class level since it's the same for all grammars
239
+ self.class.tree_sitter_runtime_usable?
240
+ end
241
+
242
+ # Backends that use tree-sitter (require native runtime libraries)
243
+ # Other backends (Citrus, Prism, Psych, etc.) don't use tree-sitter
244
+ TREE_SITTER_BACKENDS = [
245
+ TreeHaver::Backends::MRI,
246
+ TreeHaver::Backends::FFI,
247
+ TreeHaver::Backends::Rust,
248
+ TreeHaver::Backends::Java,
249
+ ].freeze
250
+
251
+ class << self
252
+ # Check if the tree-sitter runtime is usable
253
+ #
254
+ # Tests whether we can actually create a tree-sitter parser.
255
+ # Result is cached since this is expensive and won't change during runtime.
256
+ #
257
+ # @return [Boolean] true if tree-sitter runtime is functional
258
+ def tree_sitter_runtime_usable?
259
+ return @tree_sitter_runtime_usable if defined?(@tree_sitter_runtime_usable)
260
+
261
+ @tree_sitter_runtime_usable = begin
262
+ # Try to create a parser using the current backend
263
+ mod = TreeHaver.resolve_backend_module(nil)
264
+
265
+ # Only tree-sitter backends are relevant here
266
+ # Non-tree-sitter backends (Citrus, Prism, Psych, etc.) don't use grammar files
267
+ return false if mod.nil?
268
+ return false unless TREE_SITTER_BACKENDS.include?(mod)
269
+
270
+ # Try to instantiate a parser - this will fail if runtime isn't available
271
+ mod::Parser.new
272
+ true
273
+ rescue NoMethodError, FFI::NotFoundError, LoadError, NotAvailable => _e
274
+ false
275
+ end
276
+ end
277
+
278
+ # Reset the cached tree-sitter runtime check (for testing)
279
+ #
280
+ # @api private
281
+ def reset_runtime_check!
282
+ remove_instance_variable(:@tree_sitter_runtime_usable) if defined?(@tree_sitter_runtime_usable)
283
+ end
196
284
  end
197
285
 
198
286
  # Check if the grammar library is available in a trusted directory
@@ -252,6 +340,9 @@ module TreeHaver
252
340
  env_value = ENV[env_var_name]
253
341
  msg += if env_value && @env_rejection_reason
254
342
  " #{env_var_name} is set to #{env_value.inspect} but #{@env_rejection_reason}."
343
+ elsif env_value && File.exist?(env_value) && !self.class.tree_sitter_runtime_usable?
344
+ " #{env_var_name} is set and file exists, but no tree-sitter runtime is available. " \
345
+ "Add ruby_tree_sitter, ffi, or tree_stump gem to your Gemfile."
255
346
  elsif env_value
256
347
  " #{env_var_name} is set but was not used (file may have been removed)."
257
348
  else
@@ -11,20 +11,26 @@ module TreeHaver
11
11
  # switching, benchmarking, and fallback scenarios.
12
12
  #
13
13
  # Registration structure:
14
+ # ```ruby
14
15
  # @registrations = {
15
16
  # toml: {
16
17
  # tree_sitter: { path: "/path/to/lib.so", symbol: "tree_sitter_toml" },
17
18
  # citrus: { grammar_module: TomlRB::Document, gem_name: "toml-rb" }
18
19
  # }
19
20
  # }
21
+ # ```
20
22
  #
21
23
  # @example Register tree-sitter grammar
24
+ # ```ruby
22
25
  # TreeHaver::LanguageRegistry.register(:toml, :tree_sitter,
23
26
  # path: "/path/to/lib.so", symbol: "tree_sitter_toml")
27
+ # ```
24
28
  #
25
29
  # @example Register Citrus grammar
30
+ # ```ruby
26
31
  # TreeHaver::LanguageRegistry.register(:toml, :citrus,
27
32
  # grammar_module: TomlRB::Document, gem_name: "toml-rb")
33
+ # ```
28
34
  #
29
35
  # @api private
30
36
  module LanguageRegistry
@@ -1,40 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TreeHaver
4
- # Point class that works as both a Hash and an object with row/column accessors
5
- #
6
- # This provides compatibility with code expecting either:
7
- # - Hash access: point[:row], point[:column]
8
- # - Method access: point.row, point.column
9
- class Point
10
- attr_reader :row, :column
11
-
12
- def initialize(row, column)
13
- @row = row
14
- @column = column
15
- end
16
-
17
- # Hash-like access for compatibility
18
- def [](key)
19
- case key
20
- when :row, "row" then @row
21
- when :column, "column" then @column
22
- end
23
- end
24
-
25
- def to_h
26
- {row: @row, column: @column}
27
- end
28
-
29
- def to_s
30
- "(#{@row}, #{@column})"
31
- end
32
-
33
- def inspect
34
- "#<TreeHaver::Point row=#{@row} column=#{@column}>"
35
- end
36
- end
37
-
38
4
  # Unified Node wrapper providing a consistent API across all backends
39
5
  #
40
6
  # This class wraps backend-specific node objects (TreeSitter::Node, TreeStump::Node, etc.)
@@ -95,6 +61,7 @@ module TreeHaver
95
61
  # @note This is the key to tree_haver's "write once, run anywhere" promise
96
62
  class Node
97
63
  include Comparable
64
+ include Enumerable
98
65
 
99
66
  # The wrapped backend-specific node object
100
67
  #
@@ -165,10 +132,20 @@ module TreeHaver
165
132
  def start_point
166
133
  if @inner_node.respond_to?(:start_point)
167
134
  point = @inner_node.start_point
168
- Point.new(point.row, point.column)
135
+ # Handle both Point objects and hashes
136
+ if point.is_a?(Hash)
137
+ Point.new(point[:row], point[:column])
138
+ else
139
+ Point.new(point.row, point.column)
140
+ end
169
141
  elsif @inner_node.respond_to?(:start_position)
170
142
  point = @inner_node.start_position
171
- Point.new(point.row, point.column)
143
+ # Handle both Point objects and hashes
144
+ if point.is_a?(Hash)
145
+ Point.new(point[:row], point[:column])
146
+ else
147
+ Point.new(point.row, point.column)
148
+ end
172
149
  else
173
150
  raise TreeHaver::Error, "Backend node does not support start_point/start_position"
174
151
  end
@@ -180,15 +157,71 @@ module TreeHaver
180
157
  def end_point
181
158
  if @inner_node.respond_to?(:end_point)
182
159
  point = @inner_node.end_point
183
- Point.new(point.row, point.column)
160
+ # Handle both Point objects and hashes
161
+ if point.is_a?(Hash)
162
+ Point.new(point[:row], point[:column])
163
+ else
164
+ Point.new(point.row, point.column)
165
+ end
184
166
  elsif @inner_node.respond_to?(:end_position)
185
167
  point = @inner_node.end_position
186
- Point.new(point.row, point.column)
168
+ # Handle both Point objects and hashes
169
+ if point.is_a?(Hash)
170
+ Point.new(point[:row], point[:column])
171
+ else
172
+ Point.new(point.row, point.column)
173
+ end
187
174
  else
188
175
  raise TreeHaver::Error, "Backend node does not support end_point/end_position"
189
176
  end
190
177
  end
191
178
 
179
+ # Get the 1-based line number where this node starts
180
+ #
181
+ # Convenience method that converts 0-based row to 1-based line number.
182
+ # This is useful for error messages and matching with editor line numbers.
183
+ #
184
+ # @return [Integer] 1-based line number
185
+ def start_line
186
+ start_point.row + 1
187
+ end
188
+
189
+ # Get the 1-based line number where this node ends
190
+ #
191
+ # Convenience method that converts 0-based row to 1-based line number.
192
+ #
193
+ # @return [Integer] 1-based line number
194
+ def end_line
195
+ end_point.row + 1
196
+ end
197
+
198
+ # Get position information as a hash
199
+ #
200
+ # Returns a hash with 1-based line numbers and 0-based columns.
201
+ # This format is compatible with *-merge gems' FileAnalysisBase.
202
+ #
203
+ # @return [Hash{Symbol => Integer}] Position hash
204
+ # @example
205
+ # node.source_position
206
+ # # => { start_line: 1, end_line: 3, start_column: 0, end_column: 10 }
207
+ def source_position
208
+ {
209
+ start_line: start_line,
210
+ end_line: end_line,
211
+ start_column: start_point.column,
212
+ end_column: end_point.column,
213
+ }
214
+ end
215
+
216
+ # Get the first child node
217
+ #
218
+ # Convenience method for iteration patterns that expect first_child.
219
+ #
220
+ # @return [Node, nil] First child node or nil if no children
221
+ def first_child
222
+ child(0)
223
+ end
224
+
192
225
  # Get the node's text content
193
226
  #
194
227
  # @return [String]
@@ -435,10 +468,10 @@ module TreeHaver
435
468
 
436
469
  # Compare by position first (start_byte, then end_byte)
437
470
  cmp = start_byte <=> other.start_byte
438
- return cmp unless cmp.zero?
471
+ return cmp if cmp.nonzero?
439
472
 
440
473
  cmp = end_byte <=> other.end_byte
441
- return cmp unless cmp.zero?
474
+ return cmp if cmp.nonzero?
442
475
 
443
476
  # For nodes at the same position with same span, compare by type
444
477
  type <=> other.type
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Point class that works as both a Hash and an object with row/column accessors
5
+ #
6
+ # This provides compatibility with code expecting either:
7
+ # - Hash access: point[:row], point[:column]
8
+ # - Method access: point.row, point.column
9
+ #
10
+ # @example Method access
11
+ # point = TreeHaver::Point.new(5, 10)
12
+ # point.row # => 5
13
+ # point.column # => 10
14
+ #
15
+ # @example Hash-like access
16
+ # point[:row] # => 5
17
+ # point[:column] # => 10
18
+ #
19
+ # @example Converting to hash
20
+ # point.to_h # => {row: 5, column: 10}
21
+ class Point
22
+ attr_reader :row, :column
23
+
24
+ # Create a new Point
25
+ #
26
+ # @param row [Integer] the row (line) number, 0-indexed
27
+ # @param column [Integer] the column number, 0-indexed
28
+ def initialize(row, column)
29
+ @row = row
30
+ @column = column
31
+ end
32
+
33
+ # Hash-like access for compatibility
34
+ #
35
+ # @param key [Symbol, String] :row or :column
36
+ # @return [Integer, nil] the value or nil if key not recognized
37
+ def [](key)
38
+ case key
39
+ when :row, "row" then @row
40
+ when :column, "column" then @column
41
+ end
42
+ end
43
+
44
+ # Convert to a hash
45
+ #
46
+ # @return [Hash{Symbol => Integer}]
47
+ def to_h
48
+ {row: @row, column: @column}
49
+ end
50
+
51
+ # String representation
52
+ #
53
+ # @return [String]
54
+ def to_s
55
+ "(#{@row}, #{@column})"
56
+ end
57
+
58
+ # Inspect representation
59
+ #
60
+ # @return [String]
61
+ def inspect
62
+ "#<TreeHaver::Point row=#{@row} column=#{@column}>"
63
+ end
64
+ end
65
+ end