tree_haver 5.0.4 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/lib/tree_haver/backend_context.rb +28 -0
  4. data/lib/tree_haver/backend_registry.rb +19 -432
  5. data/lib/tree_haver/contracts.rb +460 -0
  6. data/lib/tree_haver/kaitai_backend.rb +30 -0
  7. data/lib/tree_haver/language_pack.rb +190 -0
  8. data/lib/tree_haver/peg_backends.rb +76 -0
  9. data/lib/tree_haver/version.rb +1 -12
  10. data/lib/tree_haver.rb +7 -1316
  11. data.tar.gz.sig +0 -0
  12. metadata +34 -245
  13. metadata.gz.sig +0 -0
  14. data/CHANGELOG.md +0 -1366
  15. data/CITATION.cff +0 -20
  16. data/CODE_OF_CONDUCT.md +0 -134
  17. data/CONTRIBUTING.md +0 -359
  18. data/FUNDING.md +0 -74
  19. data/LICENSE.txt +0 -21
  20. data/README.md +0 -2347
  21. data/REEK +0 -0
  22. data/RUBOCOP.md +0 -71
  23. data/SECURITY.md +0 -21
  24. data/lib/tree_haver/backend_api.rb +0 -349
  25. data/lib/tree_haver/backends/citrus.rb +0 -487
  26. data/lib/tree_haver/backends/ffi.rb +0 -1009
  27. data/lib/tree_haver/backends/java.rb +0 -893
  28. data/lib/tree_haver/backends/mri.rb +0 -362
  29. data/lib/tree_haver/backends/parslet.rb +0 -560
  30. data/lib/tree_haver/backends/prism.rb +0 -471
  31. data/lib/tree_haver/backends/psych.rb +0 -375
  32. data/lib/tree_haver/backends/rust.rb +0 -239
  33. data/lib/tree_haver/base/language.rb +0 -98
  34. data/lib/tree_haver/base/node.rb +0 -322
  35. data/lib/tree_haver/base/parser.rb +0 -24
  36. data/lib/tree_haver/base/point.rb +0 -48
  37. data/lib/tree_haver/base/tree.rb +0 -128
  38. data/lib/tree_haver/base.rb +0 -12
  39. data/lib/tree_haver/citrus_grammar_finder.rb +0 -218
  40. data/lib/tree_haver/compat.rb +0 -43
  41. data/lib/tree_haver/grammar_finder.rb +0 -374
  42. data/lib/tree_haver/language.rb +0 -295
  43. data/lib/tree_haver/language_registry.rb +0 -190
  44. data/lib/tree_haver/library_path_utils.rb +0 -80
  45. data/lib/tree_haver/node.rb +0 -579
  46. data/lib/tree_haver/parser.rb +0 -438
  47. data/lib/tree_haver/parslet_grammar_finder.rb +0 -224
  48. data/lib/tree_haver/path_validator.rb +0 -353
  49. data/lib/tree_haver/point.rb +0 -27
  50. data/lib/tree_haver/rspec/dependency_tags.rb +0 -1392
  51. data/lib/tree_haver/rspec/testable_node.rb +0 -217
  52. data/lib/tree_haver/rspec.rb +0 -33
  53. data/lib/tree_haver/tree.rb +0 -258
  54. data/sig/tree_haver/backends.rbs +0 -352
  55. data/sig/tree_haver/grammar_finder.rbs +0 -29
  56. data/sig/tree_haver/path_validator.rbs +0 -32
  57. data/sig/tree_haver.rbs +0 -234
@@ -1,98 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Base
5
- # Base class for backend Language implementations
6
- #
7
- # This class defines the API contract for all language implementations.
8
- # Backend-specific Language classes should inherit from this and implement
9
- # the required interface.
10
- #
11
- # @abstract Subclasses must implement #name and #backend at minimum
12
- class Language
13
- include Comparable
14
-
15
- # The language name (e.g., :markdown, :ruby, :json)
16
- # @return [Symbol] Language name
17
- attr_reader :name
18
-
19
- # The backend this language is for
20
- # @return [Symbol] Backend identifier (e.g., :commonmarker, :markly, :prism)
21
- attr_reader :backend
22
-
23
- # Language-specific options
24
- # @return [Hash] Options hash
25
- attr_reader :options
26
-
27
- # Create a new Language instance
28
- #
29
- # @param name [Symbol, String] Language name
30
- # @param backend [Symbol] Backend identifier
31
- # @param options [Hash] Backend-specific options
32
- def initialize(name, backend:, options: {})
33
- @name = name.to_sym
34
- @backend = backend.to_sym
35
- @options = options
36
- end
37
-
38
- # Alias for name (tree-sitter compatibility)
39
- alias_method :language_name, :name
40
-
41
- # -- Shared Implementation ------------------------------------------------
42
-
43
- # Comparison based on backend then name
44
- # @param other [Object]
45
- # @return [Integer, nil]
46
- def <=>(other)
47
- return unless other.is_a?(Language)
48
- return unless other.respond_to?(:backend) && other.backend == backend
49
-
50
- name <=> other.name
51
- end
52
-
53
- # Hash value for use in Sets/Hashes
54
- # @return [Integer]
55
- def hash
56
- [backend, name, options.to_a.sort].hash
57
- end
58
-
59
- # Equality check for Hash keys
60
- # @param other [Object]
61
- # @return [Boolean]
62
- def eql?(other)
63
- return false unless other.is_a?(Language)
64
-
65
- backend == other.backend && name == other.name && options == other.options
66
- end
67
-
68
- # Human-readable representation
69
- # @return [String]
70
- def inspect
71
- opts = options.empty? ? "" : " options=#{options}"
72
- class_name = self.class.name || "#{self.class.superclass.name}(anonymous)"
73
- "#<#{class_name} name=#{name} backend=#{backend}#{opts}>"
74
- end
75
-
76
- # -- Class Methods --------------------------------------------------------
77
-
78
- class << self
79
- # Load a language from a library path (factory method)
80
- #
81
- # For pure-Ruby backends (Commonmarker, Markly, Prism, Psych), this
82
- # typically ignores the path and returns the single supported language.
83
- #
84
- # For tree-sitter backends (MRI, Rust, FFI, Java), this loads the
85
- # language from the shared library file.
86
- #
87
- # @param _path [String, nil] Path to shared library (optional for pure-Ruby)
88
- # @param symbol [String, nil] Symbol name to load (optional)
89
- # @param name [String, nil] Language name hint (optional)
90
- # @return [Language] Loaded language instance
91
- # @raise [NotImplementedError] If not implemented by subclass
92
- def from_library(_path = nil, symbol: nil, name: nil)
93
- raise NotImplementedError, "#{self}.from_library must be implemented"
94
- end
95
- end
96
- end
97
- end
98
- end
@@ -1,322 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Base
5
- # Base class for all backend Node implementations
6
- #
7
- # This class defines the API contract for Node objects across all backends.
8
- # It provides shared implementation for common behaviors and documents
9
- # required/optional methods that subclasses must implement.
10
- #
11
- # == Backend Architecture
12
- #
13
- # TreeHaver supports two categories of backends:
14
- #
15
- # === Tree-sitter Backends (MRI, Rust, FFI, Java)
16
- #
17
- # These backends use the native tree-sitter library (via different bindings).
18
- # They return raw `::TreeSitter::Node` objects which are wrapped by
19
- # `TreeHaver::Node` (which inherits from this class).
20
- #
21
- # - Backend Tree#root_node returns: `::TreeSitter::Node` (raw)
22
- # - TreeHaver::Tree#root_node wraps it in: `TreeHaver::Node`
23
- # - These backends do NOT define their own Tree/Node classes
24
- #
25
- # === Pure-Ruby/Plugin Backends (Citrus, Prism, Psych, Commonmarker, Markly)
26
- #
27
- # These backends define their own complete implementations:
28
- # - `Backend::X::Node` - wraps parser-specific node objects
29
- # - `Backend::X::Tree` - wraps parser-specific tree objects
30
- #
31
- # For consistency, these should also inherit from `Base::Node` and `Base::Tree`.
32
- #
33
- # @abstract Subclasses must implement #type, #start_byte, #end_byte, and #children
34
- # @see TreeHaver::Node The main wrapper class that inherits from this
35
- # @see TreeHaver::Backends::Citrus::Node Example of a backend-specific Node
36
- class Node
37
- include Comparable
38
- include Enumerable
39
-
40
- # The underlying backend-specific node object
41
- # @return [Object] Backend node
42
- attr_reader :inner_node
43
-
44
- # The source text
45
- # @return [String] Source code
46
- attr_reader :source
47
-
48
- # Source lines for byte offset calculations
49
- # @return [Array<String>] Lines of source
50
- attr_reader :lines
51
-
52
- # Create a new Node wrapper
53
- #
54
- # @param node [Object] The backend-specific node object
55
- # @param source [String, nil] The source code
56
- # @param lines [Array<String>, nil] Pre-split lines (optional optimization)
57
- def initialize(node, source: nil, lines: nil)
58
- @inner_node = node
59
- @source = source
60
- @lines = lines || source&.lines || []
61
- end
62
-
63
- # -- Required API Methods ------------------------------------------------
64
-
65
- # Get the node type as a string
66
- # @return [String] Node type
67
- def type
68
- raise NotImplementedError, "#{self.class}#type must be implemented"
69
- end
70
-
71
- # Get byte offset where the node starts
72
- # @return [Integer] Start byte offset
73
- def start_byte
74
- raise NotImplementedError, "#{self.class}#start_byte must be implemented"
75
- end
76
-
77
- # Get byte offset where the node ends
78
- # @return [Integer] End byte offset
79
- def end_byte
80
- raise NotImplementedError, "#{self.class}#end_byte must be implemented"
81
- end
82
-
83
- # Get all children as an array
84
- # @return [Array<Node>]
85
- def children
86
- raise NotImplementedError, "#{self.class}#children must be implemented"
87
- end
88
-
89
- # -- Derived Methods (use #children) -------------------------------------
90
-
91
- # Get the number of child nodes
92
- # @return [Integer] Number of children
93
- def child_count
94
- children.size
95
- end
96
-
97
- # Get a child node by index
98
- #
99
- # Returns nil for negative indices or indices out of bounds.
100
- # This matches tree-sitter behavior where negative indices are invalid.
101
- #
102
- # @param index [Integer] Child index (0-based, non-negative)
103
- # @return [Node, nil] The child node or nil
104
- def child(index)
105
- return if index.negative?
106
- return if index >= child_count
107
-
108
- children[index]
109
- end
110
-
111
- # Iterate over children
112
- # @yield [Node] Child node
113
- def each(&block)
114
- return to_enum(__method__) unless block
115
-
116
- children.each(&block)
117
- end
118
-
119
- # Retrieve the first child
120
- # @return [Node, nil]
121
- def first_child
122
- children.first
123
- end
124
-
125
- # Retrieve the last child
126
- # @return [Node, nil]
127
- def last_child
128
- children.last
129
- end
130
-
131
- # -- Optional API Methods (with default implementations) -----------------
132
-
133
- # Get the parent node
134
- # @return [Node, nil] Parent node or nil
135
- def parent
136
- nil
137
- end
138
-
139
- # Get the next sibling node
140
- # @return [Node, nil] Next sibling or nil
141
- def next_sibling
142
- nil
143
- end
144
-
145
- # Get the previous sibling node
146
- # @return [Node, nil] Previous sibling or nil
147
- def prev_sibling
148
- nil
149
- end
150
-
151
- # Check if this node is named (structural)
152
- # @return [Boolean] true if named
153
- def named?
154
- true
155
- end
156
-
157
- # Alias for named?
158
- alias_method :structural?, :named?
159
-
160
- # Check if this node represents a syntax error
161
- # @return [Boolean] true on error
162
- def has_error?
163
- false
164
- end
165
-
166
- # Check if this node was inserted for error recovery
167
- # @return [Boolean] true if missing
168
- def missing?
169
- false
170
- end
171
-
172
- # Get the text content of this node
173
- # @return [String] Node text
174
- def text
175
- return "" unless source
176
-
177
- source[start_byte...end_byte] || ""
178
- end
179
-
180
- # Get a child by field name
181
- # @param _name [String, Symbol] Field name
182
- # @return [Node, nil] Child node or nil
183
- def child_by_field_name(_name)
184
- nil
185
- end
186
-
187
- # Get start position (row/col) - 0-based
188
- # @return [Hash{Symbol => Integer}] {row: 0, column: 0}
189
- def start_point
190
- {row: 0, column: 0}
191
- end
192
-
193
- # Get end position (row/col) - 0-based
194
- # @return [Hash{Symbol => Integer}] {row: 0, column: 0}
195
- def end_point
196
- {row: 0, column: 0}
197
- end
198
-
199
- # -- Shared Implementation -----------------------------------------------
200
-
201
- # Comparison based on byte range
202
- # @param other [Object]
203
- # @return [Integer, nil]
204
- def <=>(other)
205
- return unless other.respond_to?(:start_byte) && other.respond_to?(:end_byte)
206
-
207
- cmp = start_byte <=> other.start_byte
208
- return cmp unless cmp == 0
209
-
210
- end_byte <=> other.end_byte
211
- end
212
-
213
- # Get 1-based start line
214
- # @return [Integer]
215
- def start_line
216
- sp = start_point
217
- row = if sp.is_a?(Hash)
218
- sp[:row]
219
- else
220
- (sp.respond_to?(:row) ? sp.row : 0)
221
- end
222
- row + 1
223
- end
224
-
225
- # Get 1-based end line
226
- # @return [Integer]
227
- def end_line
228
- ep = end_point
229
- row = if ep.is_a?(Hash)
230
- ep[:row]
231
- else
232
- (ep.respond_to?(:row) ? ep.row : 0)
233
- end
234
- row + 1
235
- end
236
-
237
- # Get unified source position hash
238
- # @return [Hash{Symbol => Integer}]
239
- def source_position
240
- sp = start_point
241
- ep = end_point
242
-
243
- sp_row = if sp.is_a?(Hash)
244
- sp[:row]
245
- else
246
- (sp.respond_to?(:row) ? sp.row : 0)
247
- end
248
- sp_col = if sp.is_a?(Hash)
249
- sp[:column]
250
- else
251
- (sp.respond_to?(:column) ? sp.column : 0)
252
- end
253
- ep_row = if ep.is_a?(Hash)
254
- ep[:row]
255
- else
256
- (ep.respond_to?(:row) ? ep.row : 0)
257
- end
258
- ep_col = if ep.is_a?(Hash)
259
- ep[:column]
260
- else
261
- (ep.respond_to?(:column) ? ep.column : 0)
262
- end
263
-
264
- {
265
- start_line: sp_row + 1,
266
- end_line: ep_row + 1,
267
- start_column: sp_col,
268
- end_column: ep_col,
269
- }
270
- end
271
-
272
- # Human-readable representation
273
- # @return [String]
274
- def inspect
275
- class_name = self.class.name || "#{self.class.superclass&.name}(anonymous)"
276
- node_type = begin
277
- type
278
- rescue NotImplementedError
279
- "(not implemented)"
280
- end
281
- "#<#{class_name} type=#{node_type}>"
282
- end
283
-
284
- # String conversion returns the text content
285
- # @return [String]
286
- def to_s
287
- text
288
- end
289
-
290
- # Equality based on type and byte range
291
- # @param other [Object]
292
- # @return [Boolean]
293
- def ==(other)
294
- return false unless other.respond_to?(:type) && other.respond_to?(:start_byte) && other.respond_to?(:end_byte)
295
-
296
- type == other.type && start_byte == other.start_byte && end_byte == other.end_byte
297
- end
298
-
299
- protected
300
-
301
- # Calculate byte offset from line and column
302
- #
303
- # @param line [Integer] 0-based line number
304
- # @param column [Integer] 0-based column number
305
- # @return [Integer] Byte offset
306
- def calculate_byte_offset(line, column)
307
- return 0 if lines.empty?
308
-
309
- offset = 0
310
- lines.each_with_index do |line_content, idx|
311
- if idx < line
312
- offset += line_content.bytesize
313
- else
314
- offset += [column, line_content.bytesize].min
315
- break
316
- end
317
- end
318
- offset
319
- end
320
- end
321
- end
322
- end
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Base
5
- # Base class for backend Parser implementations
6
- # Used by wrapper backends (Commonmarker, Markly, etc.)
7
- # Raw backends (MRI/Rust) do not inherit from this.
8
- class Parser
9
- attr_accessor :language
10
-
11
- def initialize
12
- @language = nil
13
- end
14
-
15
- def parse(source)
16
- raise NotImplementedError
17
- end
18
-
19
- def parse_string(_old_tree, source)
20
- parse(source)
21
- end
22
- end
23
- end
24
- end
@@ -1,48 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Base
5
- # Point struct for position information (row/column)
6
- #
7
- # Provides a consistent interface for 0-based row/column positions.
8
- # Compatible with both hash-style access and method access.
9
- #
10
- # @example
11
- # point = TreeHaver::Base::Point.new(5, 10)
12
- # point.row # => 5
13
- # point.column # => 10
14
- # point[:row] # => 5
15
- # point[:column] # => 10
16
- Point = Struct.new(:row, :column) do
17
- # Hash-style access for compatibility
18
- # @param key [Symbol, String] :row or :column
19
- # @return [Integer, nil]
20
- def [](key)
21
- case key
22
- when :row, "row", 0
23
- row
24
- when :column, "column", 1
25
- column
26
- end
27
- end
28
-
29
- # Convert to hash
30
- # @return [Hash{Symbol => Integer}]
31
- def to_h
32
- {row: row, column: column}
33
- end
34
-
35
- # String representation
36
- # @return [String]
37
- def to_s
38
- "(#{row}, #{column})"
39
- end
40
-
41
- # Human-readable representation
42
- # @return [String]
43
- def inspect
44
- "#<TreeHaver::Base::Point row=#{row} column=#{column}>"
45
- end
46
- end
47
- end
48
- end
@@ -1,128 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- module Base
5
- # Base class for all backend Tree implementations
6
- #
7
- # This class defines the API contract for Tree objects across all backends.
8
- # It provides shared implementation and documents required/optional methods.
9
- #
10
- # == Backend Architecture
11
- #
12
- # TreeHaver supports two categories of backends:
13
- #
14
- # === Tree-sitter Backends (MRI, Rust, FFI, Java)
15
- #
16
- # These backends use the native tree-sitter library (via different bindings).
17
- # They return raw `::TreeSitter::Tree` objects which are wrapped by
18
- # `TreeHaver::Tree` (which inherits from this class).
19
- #
20
- # - Backend Parser returns: `::TreeSitter::Tree` (raw)
21
- # - TreeHaver::Parser wraps it in: `TreeHaver::Tree`
22
- # - These backends do NOT define their own Tree/Node classes
23
- #
24
- # === Pure-Ruby/Plugin Backends (Citrus, Prism, Psych, Commonmarker, Markly)
25
- #
26
- # These backends define their own complete implementations:
27
- # - `Backend::X::Tree` - wraps parser-specific tree objects
28
- # - `Backend::X::Node` - wraps parser-specific node objects
29
- #
30
- # For consistency, these should also inherit from `Base::Tree` and `Base::Node`.
31
- #
32
- # @abstract Subclasses must implement #root_node
33
- # @see TreeHaver::Tree The main wrapper class that inherits from this
34
- # @see TreeHaver::Backends::Citrus::Tree Example of a backend-specific Tree
35
- class Tree
36
- # The underlying backend-specific tree object
37
- # @return [Object] Backend tree
38
- attr_reader :inner_tree
39
-
40
- # The source text
41
- # @return [String] The original source code
42
- attr_reader :source
43
-
44
- # Source lines for byte offset calculations
45
- # @return [Array<String>] Lines of source
46
- attr_reader :lines
47
-
48
- # Create a new Tree
49
- #
50
- # @param inner_tree [Object] The backend-specific tree object
51
- # @param source [String, nil] The source code
52
- # @param lines [Array<String>, nil] Pre-split lines (optional, derived from source if not provided)
53
- def initialize(inner_tree = nil, source: nil, lines: nil)
54
- @inner_tree = inner_tree
55
- @source = source
56
- @lines = lines || source&.lines || []
57
- end
58
-
59
- # -- Required API Methods ------------------------------------------------
60
-
61
- # Get the root node of the tree
62
- # @return [Node] Root node
63
- def root_node
64
- raise NotImplementedError, "#{self.class}#root_node must be implemented"
65
- end
66
-
67
- # -- Optional API Methods (with defaults) --------------------------------
68
-
69
- # Get parse errors
70
- # @return [Array] Errors (empty for most pure-Ruby backends)
71
- def errors
72
- []
73
- end
74
-
75
- # Get parse warnings
76
- # @return [Array] Warnings (empty for most pure-Ruby backends)
77
- def warnings
78
- []
79
- end
80
-
81
- # Get comments from the document
82
- # @return [Array] Comments (empty for most pure-Ruby backends)
83
- def comments
84
- []
85
- end
86
-
87
- # Mark the tree as edited for incremental re-parsing
88
- # @return [void]
89
- def edit(
90
- start_byte:,
91
- old_end_byte:,
92
- new_end_byte:,
93
- start_point:,
94
- old_end_point:,
95
- new_end_point:
96
- )
97
- # Default implementation: no-op (incremental parsing not supported)
98
- # Backends that support it should override this
99
- end
100
-
101
- # Check if this tree has syntax errors
102
- # @return [Boolean]
103
- def has_error?
104
- root = root_node
105
- return false unless root
106
- return true if root.has_error?
107
-
108
- # Deep check: traverse tree looking for error nodes
109
- # Use queue-based traversal to avoid deep recursion
110
- queue = [root]
111
- while (node = queue.shift)
112
- return true if node.has_error? || node.missing?
113
-
114
- # Add children to queue
115
- node.each { |child| queue.push(child) }
116
- end
117
-
118
- false
119
- end
120
-
121
- # Human-readable representation
122
- # @return [String]
123
- def inspect
124
- "#<#{self.class.name}>"
125
- end
126
- end
127
- end
128
- end
@@ -1,12 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module TreeHaver
4
- # Base classes for backend implementation
5
- module Base
6
- autoload :Node, File.join(__dir__, "base", "node")
7
- autoload :Tree, File.join(__dir__, "base", "tree")
8
- autoload :Parser, File.join(__dir__, "base", "parser")
9
- autoload :Language, File.join(__dir__, "base", "language")
10
- autoload :Point, File.join(__dir__, "base", "point")
11
- end
12
- end