tree_haver 3.2.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Base
5
+ # Base class for backend Language implementations
6
+ #
7
+ # This class defines the API contract for all language implementations.
8
+ # Backend-specific Language classes should inherit from this and implement
9
+ # the required interface.
10
+ #
11
+ # @abstract Subclasses must implement #name and #backend at minimum
12
+ class Language
13
+ include Comparable
14
+
15
+ # The language name (e.g., :markdown, :ruby, :json)
16
+ # @return [Symbol] Language name
17
+ attr_reader :name
18
+
19
+ # The backend this language is for
20
+ # @return [Symbol] Backend identifier (e.g., :commonmarker, :markly, :prism)
21
+ attr_reader :backend
22
+
23
+ # Language-specific options
24
+ # @return [Hash] Options hash
25
+ attr_reader :options
26
+
27
+ # Create a new Language instance
28
+ #
29
+ # @param name [Symbol, String] Language name
30
+ # @param backend [Symbol] Backend identifier
31
+ # @param options [Hash] Backend-specific options
32
+ def initialize(name, backend:, options: {})
33
+ @name = name.to_sym
34
+ @backend = backend.to_sym
35
+ @options = options
36
+ end
37
+
38
+ # Alias for name (tree-sitter compatibility)
39
+ alias_method :language_name, :name
40
+
41
+ # -- Shared Implementation ------------------------------------------------
42
+
43
+ # Comparison based on backend then name
44
+ # @param other [Object]
45
+ # @return [Integer, nil]
46
+ def <=>(other)
47
+ return unless other.is_a?(Language)
48
+ return unless other.respond_to?(:backend) && other.backend == backend
49
+
50
+ name <=> other.name
51
+ end
52
+
53
+ # Hash value for use in Sets/Hashes
54
+ # @return [Integer]
55
+ def hash
56
+ [backend, name, options.to_a.sort].hash
57
+ end
58
+
59
+ # Equality check for Hash keys
60
+ # @param other [Object]
61
+ # @return [Boolean]
62
+ def eql?(other)
63
+ return false unless other.is_a?(Language)
64
+
65
+ backend == other.backend && name == other.name && options == other.options
66
+ end
67
+
68
+ # Human-readable representation
69
+ # @return [String]
70
+ def inspect
71
+ opts = options.empty? ? "" : " options=#{options}"
72
+ class_name = self.class.name || "#{self.class.superclass.name}(anonymous)"
73
+ "#<#{class_name} name=#{name} backend=#{backend}#{opts}>"
74
+ end
75
+
76
+ # -- Class Methods --------------------------------------------------------
77
+
78
+ class << self
79
+ # Load a language from a library path (factory method)
80
+ #
81
+ # For pure-Ruby backends (Commonmarker, Markly, Prism, Psych), this
82
+ # typically ignores the path and returns the single supported language.
83
+ #
84
+ # For tree-sitter backends (MRI, Rust, FFI, Java), this loads the
85
+ # language from the shared library file.
86
+ #
87
+ # @param _path [String, nil] Path to shared library (optional for pure-Ruby)
88
+ # @param symbol [String, nil] Symbol name to load (optional)
89
+ # @param name [String, nil] Language name hint (optional)
90
+ # @return [Language] Loaded language instance
91
+ # @raise [NotImplementedError] If not implemented by subclass
92
+ def from_library(_path = nil, symbol: nil, name: nil)
93
+ raise NotImplementedError, "#{self}.from_library must be implemented"
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,315 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Base
5
+ # Base class for all backend Node implementations
6
+ #
7
+ # This class defines the API contract for Node objects across all backends.
8
+ # It provides shared implementation for common behaviors and documents
9
+ # required/optional methods that subclasses must implement.
10
+ #
11
+ # == Backend Architecture
12
+ #
13
+ # TreeHaver supports two categories of backends:
14
+ #
15
+ # === Tree-sitter Backends (MRI, Rust, FFI, Java)
16
+ #
17
+ # These backends use the native tree-sitter library (via different bindings).
18
+ # They return raw `::TreeSitter::Node` objects which are wrapped by
19
+ # `TreeHaver::Node` (which inherits from this class).
20
+ #
21
+ # - Backend Tree#root_node returns: `::TreeSitter::Node` (raw)
22
+ # - TreeHaver::Tree#root_node wraps it in: `TreeHaver::Node`
23
+ # - These backends do NOT define their own Tree/Node classes
24
+ #
25
+ # === Pure-Ruby/Plugin Backends (Citrus, Prism, Psych, Commonmarker, Markly)
26
+ #
27
+ # These backends define their own complete implementations:
28
+ # - `Backend::X::Node` - wraps parser-specific node objects
29
+ # - `Backend::X::Tree` - wraps parser-specific tree objects
30
+ #
31
+ # For consistency, these should also inherit from `Base::Node` and `Base::Tree`.
32
+ #
33
+ # @abstract Subclasses must implement #type, #start_byte, #end_byte, and #children
34
+ # @see TreeHaver::Node The main wrapper class that inherits from this
35
+ # @see TreeHaver::Backends::Citrus::Node Example of a backend-specific Node
36
+ class Node
37
+ include Comparable
38
+ include Enumerable
39
+
40
+ # The underlying backend-specific node object
41
+ # @return [Object] Backend node
42
+ attr_reader :inner_node
43
+
44
+ # The source text
45
+ # @return [String] Source code
46
+ attr_reader :source
47
+
48
+ # Source lines for byte offset calculations
49
+ # @return [Array<String>] Lines of source
50
+ attr_reader :lines
51
+
52
+ # Create a new Node wrapper
53
+ #
54
+ # @param node [Object] The backend-specific node object
55
+ # @param source [String, nil] The source code
56
+ # @param lines [Array<String>, nil] Pre-split lines (optional optimization)
57
+ def initialize(node, source: nil, lines: nil)
58
+ @inner_node = node
59
+ @source = source
60
+ @lines = lines || source&.lines || []
61
+ end
62
+
63
+ # -- Required API Methods ------------------------------------------------
64
+
65
+ # Get the node type as a string
66
+ # @return [String] Node type
67
+ def type
68
+ raise NotImplementedError, "#{self.class}#type must be implemented"
69
+ end
70
+
71
+ # Get byte offset where the node starts
72
+ # @return [Integer] Start byte offset
73
+ def start_byte
74
+ raise NotImplementedError, "#{self.class}#start_byte must be implemented"
75
+ end
76
+
77
+ # Get byte offset where the node ends
78
+ # @return [Integer] End byte offset
79
+ def end_byte
80
+ raise NotImplementedError, "#{self.class}#end_byte must be implemented"
81
+ end
82
+
83
+ # Get all children as an array
84
+ # @return [Array<Node>]
85
+ def children
86
+ raise NotImplementedError, "#{self.class}#children must be implemented"
87
+ end
88
+
89
+ # -- Derived Methods (use #children) -------------------------------------
90
+
91
+ # Get the number of child nodes
92
+ # @return [Integer] Number of children
93
+ def child_count
94
+ children.size
95
+ end
96
+
97
+ # Get a child node by index
98
+ # @param index [Integer] Child index
99
+ # @return [Node, nil] The child node or nil
100
+ def child(index)
101
+ children[index]
102
+ end
103
+
104
+ # Iterate over children
105
+ # @yield [Node] Child node
106
+ def each(&block)
107
+ return to_enum(__method__) unless block
108
+
109
+ children.each(&block)
110
+ end
111
+
112
+ # Retrieve the first child
113
+ # @return [Node, nil]
114
+ def first_child
115
+ children.first
116
+ end
117
+
118
+ # Retrieve the last child
119
+ # @return [Node, nil]
120
+ def last_child
121
+ children.last
122
+ end
123
+
124
+ # -- Optional API Methods (with default implementations) -----------------
125
+
126
+ # Get the parent node
127
+ # @return [Node, nil] Parent node or nil
128
+ def parent
129
+ nil
130
+ end
131
+
132
+ # Get the next sibling node
133
+ # @return [Node, nil] Next sibling or nil
134
+ def next_sibling
135
+ nil
136
+ end
137
+
138
+ # Get the previous sibling node
139
+ # @return [Node, nil] Previous sibling or nil
140
+ def prev_sibling
141
+ nil
142
+ end
143
+
144
+ # Check if this node is named (structural)
145
+ # @return [Boolean] true if named
146
+ def named?
147
+ true
148
+ end
149
+
150
+ # Alias for named?
151
+ alias_method :structural?, :named?
152
+
153
+ # Check if this node represents a syntax error
154
+ # @return [Boolean] true on error
155
+ def has_error?
156
+ false
157
+ end
158
+
159
+ # Check if this node was inserted for error recovery
160
+ # @return [Boolean] true if missing
161
+ def missing?
162
+ false
163
+ end
164
+
165
+ # Get the text content of this node
166
+ # @return [String] Node text
167
+ def text
168
+ return "" unless source
169
+
170
+ source[start_byte...end_byte] || ""
171
+ end
172
+
173
+ # Get a child by field name
174
+ # @param _name [String, Symbol] Field name
175
+ # @return [Node, nil] Child node or nil
176
+ def child_by_field_name(_name)
177
+ nil
178
+ end
179
+
180
+ # Get start position (row/col) - 0-based
181
+ # @return [Hash{Symbol => Integer}] {row: 0, column: 0}
182
+ def start_point
183
+ {row: 0, column: 0}
184
+ end
185
+
186
+ # Get end position (row/col) - 0-based
187
+ # @return [Hash{Symbol => Integer}] {row: 0, column: 0}
188
+ def end_point
189
+ {row: 0, column: 0}
190
+ end
191
+
192
+ # -- Shared Implementation -----------------------------------------------
193
+
194
+ # Comparison based on byte range
195
+ # @param other [Object]
196
+ # @return [Integer, nil]
197
+ def <=>(other)
198
+ return unless other.respond_to?(:start_byte) && other.respond_to?(:end_byte)
199
+
200
+ cmp = start_byte <=> other.start_byte
201
+ return cmp unless cmp == 0
202
+
203
+ end_byte <=> other.end_byte
204
+ end
205
+
206
+ # Get 1-based start line
207
+ # @return [Integer]
208
+ def start_line
209
+ sp = start_point
210
+ row = if sp.is_a?(Hash)
211
+ sp[:row]
212
+ else
213
+ (sp.respond_to?(:row) ? sp.row : 0)
214
+ end
215
+ row + 1
216
+ end
217
+
218
+ # Get 1-based end line
219
+ # @return [Integer]
220
+ def end_line
221
+ ep = end_point
222
+ row = if ep.is_a?(Hash)
223
+ ep[:row]
224
+ else
225
+ (ep.respond_to?(:row) ? ep.row : 0)
226
+ end
227
+ row + 1
228
+ end
229
+
230
+ # Get unified source position hash
231
+ # @return [Hash{Symbol => Integer}]
232
+ def source_position
233
+ sp = start_point
234
+ ep = end_point
235
+
236
+ sp_row = if sp.is_a?(Hash)
237
+ sp[:row]
238
+ else
239
+ (sp.respond_to?(:row) ? sp.row : 0)
240
+ end
241
+ sp_col = if sp.is_a?(Hash)
242
+ sp[:column]
243
+ else
244
+ (sp.respond_to?(:column) ? sp.column : 0)
245
+ end
246
+ ep_row = if ep.is_a?(Hash)
247
+ ep[:row]
248
+ else
249
+ (ep.respond_to?(:row) ? ep.row : 0)
250
+ end
251
+ ep_col = if ep.is_a?(Hash)
252
+ ep[:column]
253
+ else
254
+ (ep.respond_to?(:column) ? ep.column : 0)
255
+ end
256
+
257
+ {
258
+ start_line: sp_row + 1,
259
+ end_line: ep_row + 1,
260
+ start_column: sp_col,
261
+ end_column: ep_col,
262
+ }
263
+ end
264
+
265
+ # Human-readable representation
266
+ # @return [String]
267
+ def inspect
268
+ class_name = self.class.name || "#{self.class.superclass&.name}(anonymous)"
269
+ node_type = begin
270
+ type
271
+ rescue NotImplementedError
272
+ "(not implemented)"
273
+ end
274
+ "#<#{class_name} type=#{node_type}>"
275
+ end
276
+
277
+ # String conversion returns the text content
278
+ # @return [String]
279
+ def to_s
280
+ text
281
+ end
282
+
283
+ # Equality based on type and byte range
284
+ # @param other [Object]
285
+ # @return [Boolean]
286
+ def ==(other)
287
+ return false unless other.respond_to?(:type) && other.respond_to?(:start_byte) && other.respond_to?(:end_byte)
288
+
289
+ type == other.type && start_byte == other.start_byte && end_byte == other.end_byte
290
+ end
291
+
292
+ protected
293
+
294
+ # Calculate byte offset from line and column
295
+ #
296
+ # @param line [Integer] 0-based line number
297
+ # @param column [Integer] 0-based column number
298
+ # @return [Integer] Byte offset
299
+ def calculate_byte_offset(line, column)
300
+ return 0 if lines.empty?
301
+
302
+ offset = 0
303
+ lines.each_with_index do |line_content, idx|
304
+ if idx < line
305
+ offset += line_content.bytesize
306
+ else
307
+ offset += [column, line_content.bytesize].min
308
+ break
309
+ end
310
+ end
311
+ offset
312
+ end
313
+ end
314
+ end
315
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Base
5
+ # Base class for backend Parser implementations
6
+ # Used by wrapper backends (Commonmarker, Markly, etc.)
7
+ # Raw backends (MRI/Rust) do not inherit from this.
8
+ class Parser
9
+ attr_accessor :language
10
+
11
+ def initialize
12
+ @language = nil
13
+ end
14
+
15
+ def parse(source)
16
+ raise NotImplementedError
17
+ end
18
+
19
+ def parse_string(_old_tree, source)
20
+ parse(source)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Base
5
+ # Point struct for position information (row/column)
6
+ #
7
+ # Provides a consistent interface for 0-based row/column positions.
8
+ # Compatible with both hash-style access and method access.
9
+ #
10
+ # @example
11
+ # point = TreeHaver::Base::Point.new(5, 10)
12
+ # point.row # => 5
13
+ # point.column # => 10
14
+ # point[:row] # => 5
15
+ # point[:column] # => 10
16
+ Point = Struct.new(:row, :column) do
17
+ # Hash-style access for compatibility
18
+ # @param key [Symbol, String] :row or :column
19
+ # @return [Integer, nil]
20
+ def [](key)
21
+ case key
22
+ when :row, "row", 0
23
+ row
24
+ when :column, "column", 1
25
+ column
26
+ end
27
+ end
28
+
29
+ # Convert to hash
30
+ # @return [Hash{Symbol => Integer}]
31
+ def to_h
32
+ {row: row, column: column}
33
+ end
34
+
35
+ # String representation
36
+ # @return [String]
37
+ def to_s
38
+ "(#{row}, #{column})"
39
+ end
40
+
41
+ # Human-readable representation
42
+ # @return [String]
43
+ def inspect
44
+ "#<TreeHaver::Base::Point row=#{row} column=#{column}>"
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ module Base
5
+ # Base class for all backend Tree implementations
6
+ #
7
+ # This class defines the API contract for Tree objects across all backends.
8
+ # It provides shared implementation and documents required/optional methods.
9
+ #
10
+ # == Backend Architecture
11
+ #
12
+ # TreeHaver supports two categories of backends:
13
+ #
14
+ # === Tree-sitter Backends (MRI, Rust, FFI, Java)
15
+ #
16
+ # These backends use the native tree-sitter library (via different bindings).
17
+ # They return raw `::TreeSitter::Tree` objects which are wrapped by
18
+ # `TreeHaver::Tree` (which inherits from this class).
19
+ #
20
+ # - Backend Parser returns: `::TreeSitter::Tree` (raw)
21
+ # - TreeHaver::Parser wraps it in: `TreeHaver::Tree`
22
+ # - These backends do NOT define their own Tree/Node classes
23
+ #
24
+ # === Pure-Ruby/Plugin Backends (Citrus, Prism, Psych, Commonmarker, Markly)
25
+ #
26
+ # These backends define their own complete implementations:
27
+ # - `Backend::X::Tree` - wraps parser-specific tree objects
28
+ # - `Backend::X::Node` - wraps parser-specific node objects
29
+ #
30
+ # For consistency, these should also inherit from `Base::Tree` and `Base::Node`.
31
+ #
32
+ # @abstract Subclasses must implement #root_node
33
+ # @see TreeHaver::Tree The main wrapper class that inherits from this
34
+ # @see TreeHaver::Backends::Citrus::Tree Example of a backend-specific Tree
35
+ class Tree
36
+ # The underlying backend-specific tree object
37
+ # @return [Object] Backend tree
38
+ attr_reader :inner_tree
39
+
40
+ # The source text
41
+ # @return [String] The original source code
42
+ attr_reader :source
43
+
44
+ # Source lines for byte offset calculations
45
+ # @return [Array<String>] Lines of source
46
+ attr_reader :lines
47
+
48
+ # Create a new Tree
49
+ #
50
+ # @param inner_tree [Object] The backend-specific tree object
51
+ # @param source [String, nil] The source code
52
+ # @param lines [Array<String>, nil] Pre-split lines (optional, derived from source if not provided)
53
+ def initialize(inner_tree = nil, source: nil, lines: nil)
54
+ @inner_tree = inner_tree
55
+ @source = source
56
+ @lines = lines || source&.lines || []
57
+ end
58
+
59
+ # -- Required API Methods ------------------------------------------------
60
+
61
+ # Get the root node of the tree
62
+ # @return [Node] Root node
63
+ def root_node
64
+ raise NotImplementedError, "#{self.class}#root_node must be implemented"
65
+ end
66
+
67
+ # -- Optional API Methods (with defaults) --------------------------------
68
+
69
+ # Get parse errors
70
+ # @return [Array] Errors (empty for most pure-Ruby backends)
71
+ def errors
72
+ []
73
+ end
74
+
75
+ # Get parse warnings
76
+ # @return [Array] Warnings (empty for most pure-Ruby backends)
77
+ def warnings
78
+ []
79
+ end
80
+
81
+ # Get comments from the document
82
+ # @return [Array] Comments (empty for most pure-Ruby backends)
83
+ def comments
84
+ []
85
+ end
86
+
87
+ # Mark the tree as edited for incremental re-parsing
88
+ # @return [void]
89
+ def edit(
90
+ start_byte:,
91
+ old_end_byte:,
92
+ new_end_byte:,
93
+ start_point:,
94
+ old_end_point:,
95
+ new_end_point:
96
+ )
97
+ # Default implementation: no-op (incremental parsing not supported)
98
+ # Backends that support it should override this
99
+ end
100
+
101
+ # Check if this tree has syntax errors
102
+ # @return [Boolean]
103
+ def has_error?
104
+ root = root_node
105
+ return false unless root
106
+ return true if root.has_error?
107
+
108
+ # Deep check: traverse tree looking for error nodes
109
+ # Use queue-based traversal to avoid deep recursion
110
+ queue = [root]
111
+ while (node = queue.shift)
112
+ return true if node.has_error? || node.missing?
113
+
114
+ # Add children to queue
115
+ node.each { |child| queue.push(child) }
116
+ end
117
+
118
+ false
119
+ end
120
+
121
+ # Human-readable representation
122
+ # @return [String]
123
+ def inspect
124
+ "#<#{self.class.name}>"
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TreeHaver
4
+ # Base classes for backend implementation
5
+ module Base
6
+ autoload :Node, File.join(__dir__, "base", "node")
7
+ autoload :Tree, File.join(__dir__, "base", "tree")
8
+ autoload :Parser, File.join(__dir__, "base", "parser")
9
+ autoload :Language, File.join(__dir__, "base", "language")
10
+ autoload :Point, File.join(__dir__, "base", "point")
11
+ end
12
+ end
@@ -59,10 +59,7 @@ module TreeHaver
59
59
  # end
60
60
  #
61
61
  # @note This is the key to tree_haver's "write once, run anywhere" promise
62
- class Node
63
- include Comparable
64
- include Enumerable
65
-
62
+ class Node < Base::Node
66
63
  # The wrapped backend-specific node object
67
64
  #
68
65
  # This provides direct access to the underlying backend node for advanced usage
@@ -83,17 +80,16 @@ module TreeHaver
83
80
  # when /TreeSitter/
84
81
  # # ruby_tree_sitter-specific code
85
82
  # end
86
- attr_reader :inner_node
83
+ # NOTE: inner_node is inherited from Base::Node
87
84
 
88
85
  # The source text for text extraction
89
86
  # @return [String]
90
- attr_reader :source
87
+ # NOTE: source is inherited from Base::Node
91
88
 
92
89
  # @param node [Object] Backend-specific node object
93
90
  # @param source [String] Source text for text extraction
94
91
  def initialize(node, source: nil)
95
- @inner_node = node
96
- @source = source
92
+ super(node, source: source)
97
93
  end
98
94
 
99
95
  # Get the node's type/kind as a string
@@ -114,8 +110,16 @@ module TreeHaver
114
110
  end
115
111
  end
116
112
 
117
- # Get the node's start byte offset
118
- # @return [Integer]
113
+ # Alias for type (tree_stump compatibility)
114
+ #
115
+ # tree_stump uses `kind` instead of `type` for node types.
116
+ # This method delegates to `type` so either can be used.
117
+ #
118
+ # @return [String] The node type
119
+ def kind
120
+ type
121
+ end
122
+
119
123
  def start_byte
120
124
  @inner_node.start_byte
121
125
  end