parsanol 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +546 -0
- data/Cargo.toml +9 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/ext/parsanol_native/Cargo.toml +34 -0
- data/ext/parsanol_native/extconf.rb +15 -0
- data/ext/parsanol_native/src/lib.rs +17 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +293 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::SourceLocation - Source Location Tracking
|
|
4
|
+
#
|
|
5
|
+
# Track source positions (line, column, offset) through the parsing and
|
|
6
|
+
# transformation pipeline. This is useful for error reporting, IDE integration,
|
|
7
|
+
# and source mapping.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# # Parse with source tracking
|
|
11
|
+
# result = parser.parse_with_spans("hello world")
|
|
12
|
+
# tree = result.tree
|
|
13
|
+
# spans = result.spans
|
|
14
|
+
#
|
|
15
|
+
# # Access span for a node
|
|
16
|
+
# span = spans[node_id]
|
|
17
|
+
# puts "Matched at line #{span.start.line}, column #{span.start.column}"
|
|
18
|
+
#
|
|
19
|
+
# Requires native extension for full functionality.
|
|
20
|
+
|
|
21
|
+
module Parsanol
|
|
22
|
+
# Represents a position in source code
|
|
23
|
+
class SourcePosition
|
|
24
|
+
attr_reader :offset, :line, :column
|
|
25
|
+
|
|
26
|
+
def initialize(offset:, line:, column:)
|
|
27
|
+
@offset = offset
|
|
28
|
+
@line = line
|
|
29
|
+
@column = column
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def to_s
|
|
33
|
+
"line #{@line}, column #{@column} (offset #{@offset})"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def to_h
|
|
37
|
+
{ offset: @offset, line: @line, column: @column }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def ==(other)
|
|
41
|
+
return false unless other.is_a?(SourcePosition)
|
|
42
|
+
|
|
43
|
+
@offset == other.offset && @line == other.line && @column == other.column
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def eql?(other)
|
|
47
|
+
self == other
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def hash
|
|
51
|
+
[@offset, @line, @column].hash
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Represents a span in source code (from start to end position)
|
|
56
|
+
class SourceSpan
|
|
57
|
+
attr_reader :start, :end
|
|
58
|
+
|
|
59
|
+
def initialize(start_pos:, end_pos:)
|
|
60
|
+
@start = start_pos.is_a?(SourcePosition) ? start_pos : SourcePosition.new(**start_pos)
|
|
61
|
+
@end = end_pos.is_a?(SourcePosition) ? end_pos : SourcePosition.new(**end_pos)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Create a span from offsets (computes line/column from input)
|
|
65
|
+
def self.from_offsets(input, start_offset, end_offset)
|
|
66
|
+
start_pos = compute_position(input, start_offset)
|
|
67
|
+
end_pos = compute_position(input, end_offset)
|
|
68
|
+
new(start_pos: start_pos, end_pos: end_pos)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Merge two spans (returns a new span covering both)
|
|
72
|
+
def merge(other)
|
|
73
|
+
return self if other.nil?
|
|
74
|
+
|
|
75
|
+
SourceSpan.new(
|
|
76
|
+
start_pos: [@start, other.start].min_by(&:offset),
|
|
77
|
+
end_pos: [@end, other.end].max_by(&:offset)
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Check if this span overlaps with another
|
|
82
|
+
def overlaps?(other)
|
|
83
|
+
return false if other.nil?
|
|
84
|
+
|
|
85
|
+
@start.offset < other.end.offset && @end.offset > other.start.offset
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Check if this span is adjacent to another
|
|
89
|
+
def adjacent?(other)
|
|
90
|
+
return false if other.nil?
|
|
91
|
+
|
|
92
|
+
@end.offset == other.start.offset || other.end.offset == @start.offset
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Check if a position is within this span
|
|
96
|
+
def contains?(position)
|
|
97
|
+
offset = position.is_a?(SourcePosition) ? position.offset : position
|
|
98
|
+
offset.between?(@start.offset, @end.offset)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Get the length of the span in bytes
|
|
102
|
+
def length
|
|
103
|
+
@end.offset - @start.offset
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Extract the source text from the input
|
|
107
|
+
def extract(input)
|
|
108
|
+
input.byteslice(@start.offset, length)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def to_s
|
|
112
|
+
"#{@start} - #{@end}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def to_h
|
|
116
|
+
{ start: @start.to_h, end: @end.to_h }
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def ==(other)
|
|
120
|
+
return false unless other.is_a?(SourceSpan)
|
|
121
|
+
|
|
122
|
+
@start == other.start && @end == other.end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Compute line and column from offset
|
|
126
|
+
def self.compute_position(input, offset)
|
|
127
|
+
line = 1
|
|
128
|
+
column = 1
|
|
129
|
+
current_offset = 0
|
|
130
|
+
|
|
131
|
+
input.each_char do |char|
|
|
132
|
+
break if current_offset >= offset
|
|
133
|
+
|
|
134
|
+
if char == "\n"
|
|
135
|
+
line += 1
|
|
136
|
+
column = 1
|
|
137
|
+
else
|
|
138
|
+
column += 1
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
current_offset += 1
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
SourcePosition.new(offset: offset, line: line, column: column)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Result wrapper for parse_with_spans
|
|
149
|
+
class ParseResultWithSpans
|
|
150
|
+
attr_reader :tree, :spans
|
|
151
|
+
|
|
152
|
+
def initialize(tree:, spans:)
|
|
153
|
+
@tree = tree
|
|
154
|
+
@spans = spans
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Get span for a specific node
|
|
158
|
+
def span_for(node_id)
|
|
159
|
+
@spans[node_id]
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Get all spans that contain a position
|
|
163
|
+
def spans_at(offset)
|
|
164
|
+
@spans.values.select { |span| span.contains?(offset) }
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::StreamingParser - Streaming Parser for Large Inputs
|
|
4
|
+
#
|
|
5
|
+
# Parse large inputs in chunks without loading the entire input into memory.
|
|
6
|
+
# Useful for file parsing, network streams, or very large documents.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# parser = Parsanol::StreamingParser.new(json_grammar)
|
|
10
|
+
#
|
|
11
|
+
# File.open("large.json") do |f|
|
|
12
|
+
# parser.parse_stream(f) do |partial_result|
|
|
13
|
+
# # Process each complete element as it's parsed
|
|
14
|
+
# process_item(partial_result)
|
|
15
|
+
# end
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# Requires native extension for full functionality.
|
|
19
|
+
|
|
20
|
+
module Parsanol
|
|
21
|
+
class StreamingParser
|
|
22
|
+
# Default chunk size (4KB)
|
|
23
|
+
DEFAULT_CHUNK_SIZE = 4096
|
|
24
|
+
|
|
25
|
+
# Create a new streaming parser
|
|
26
|
+
#
|
|
27
|
+
# @param grammar [Parsanol::Parser, Parsanol::Atoms::Base] Grammar to use
|
|
28
|
+
# @param chunk_size [Integer] Size of chunks to read (default: 4096)
|
|
29
|
+
def initialize(grammar, chunk_size: DEFAULT_CHUNK_SIZE)
|
|
30
|
+
@grammar = grammar
|
|
31
|
+
@chunk_size = chunk_size
|
|
32
|
+
|
|
33
|
+
if Parsanol::Native.available?
|
|
34
|
+
grammar_json = Parsanol::Native.serialize_grammar(grammar.root)
|
|
35
|
+
@native_parser = Parsanol::Native.streaming_parser_new(grammar_json)
|
|
36
|
+
else
|
|
37
|
+
@native_parser = nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
@buffer = String.new
|
|
41
|
+
@position = 0
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Add a chunk of input
|
|
45
|
+
#
|
|
46
|
+
# @param chunk [String] Input chunk to add
|
|
47
|
+
# @return [Boolean] True if more chunks needed, false if ready for parsing
|
|
48
|
+
def add_chunk(chunk)
|
|
49
|
+
@buffer << chunk
|
|
50
|
+
|
|
51
|
+
if @native_parser
|
|
52
|
+
Parsanol::Native.streaming_parser_add_chunk(@native_parser, chunk)
|
|
53
|
+
else
|
|
54
|
+
# Pure Ruby fallback
|
|
55
|
+
false
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Parse what we have so far
|
|
60
|
+
#
|
|
61
|
+
# @return [Object, nil] Parsed result or nil if need more data
|
|
62
|
+
def parse_chunk
|
|
63
|
+
if @native_parser
|
|
64
|
+
Parsanol::Native.streaming_parser_parse_chunk(@native_parser)
|
|
65
|
+
else
|
|
66
|
+
# Pure Ruby fallback - not supported
|
|
67
|
+
raise NotImplementedError,
|
|
68
|
+
'Streaming parser requires native extension for full functionality.'
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Check if we have enough data to make progress
|
|
73
|
+
#
|
|
74
|
+
# @return [Boolean] True if parser can make progress
|
|
75
|
+
def enough_data?
|
|
76
|
+
if @native_parser
|
|
77
|
+
!Parsanol::Native.streaming_parser_parse_chunk(@native_parser).nil?
|
|
78
|
+
else
|
|
79
|
+
false
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Parse entire stream (yields partial results)
|
|
84
|
+
#
|
|
85
|
+
# @param io [IO, StringIO] Input source to read from
|
|
86
|
+
# @param chunk_size [Integer] Size of chunks to read
|
|
87
|
+
# @yield [Object] Each complete element as it's parsed
|
|
88
|
+
# @return [Array] All parsed results
|
|
89
|
+
def parse_stream(io, chunk_size: @chunk_size)
|
|
90
|
+
results = []
|
|
91
|
+
|
|
92
|
+
loop do
|
|
93
|
+
chunk = io.read(chunk_size)
|
|
94
|
+
break if chunk.nil? || chunk.empty?
|
|
95
|
+
|
|
96
|
+
add_chunk(chunk)
|
|
97
|
+
|
|
98
|
+
while (result = parse_chunk)
|
|
99
|
+
results << result
|
|
100
|
+
yield result if block_given?
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
results
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Reset the parser for reuse
|
|
108
|
+
def reset
|
|
109
|
+
@buffer = String.new
|
|
110
|
+
@position = 0
|
|
111
|
+
|
|
112
|
+
return unless @native_parser
|
|
113
|
+
|
|
114
|
+
grammar_json = Parsanol::Native.serialize_grammar(@grammar.root)
|
|
115
|
+
@native_parser = Parsanol::Native.streaming_parser_new(grammar_json)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Get the current buffer
|
|
119
|
+
attr_reader :buffer
|
|
120
|
+
|
|
121
|
+
# Get the chunk size
|
|
122
|
+
attr_reader :chunk_size
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Zero-copy string view that references original input.
|
|
5
|
+
#
|
|
6
|
+
# StringView avoids string copies by maintaining a reference to the
|
|
7
|
+
# original string with offset and length. Strings are only materialized
|
|
8
|
+
# when explicitly requested via #to_s.
|
|
9
|
+
#
|
|
10
|
+
# == Usage
|
|
11
|
+
#
|
|
12
|
+
# view = StringView.new(input_string, offset: 10, length: 5)
|
|
13
|
+
# view.to_s # Materializes string only when needed
|
|
14
|
+
# view[0] # Direct character access without copying
|
|
15
|
+
#
|
|
16
|
+
# == Performance
|
|
17
|
+
#
|
|
18
|
+
# - No string allocation until to_s called
|
|
19
|
+
# - Direct character access without copying
|
|
20
|
+
# - Reduced GC pressure from intermediate strings
|
|
21
|
+
# - Caches materialized strings for reuse
|
|
22
|
+
#
|
|
23
|
+
# == Design Principles
|
|
24
|
+
#
|
|
25
|
+
# 1. Zero-Copy: Reference original string, don't copy
|
|
26
|
+
# 2. Lazy Materialization: Create strings only when to_s called
|
|
27
|
+
# 3. Caching: Cache materialized strings for reuse
|
|
28
|
+
# 4. Compatibility: Acts like String where needed
|
|
29
|
+
# 5. Extensibility: Foundation for Rope (Phase 3.2)
|
|
30
|
+
#
|
|
31
|
+
class StringView
|
|
32
|
+
include Resettable
|
|
33
|
+
|
|
34
|
+
# @return [String] Original input string
|
|
35
|
+
attr_reader :string
|
|
36
|
+
|
|
37
|
+
# @return [Integer] Byte offset into string
|
|
38
|
+
attr_reader :offset
|
|
39
|
+
|
|
40
|
+
# @return [Integer] Length in bytes
|
|
41
|
+
attr_reader :length
|
|
42
|
+
|
|
43
|
+
# Initialize a new StringView.
|
|
44
|
+
#
|
|
45
|
+
# @param string [String] Original input string
|
|
46
|
+
# @param offset [Integer] Byte offset (default: 0)
|
|
47
|
+
# @param length [Integer] Length in bytes (default: string.bytesize)
|
|
48
|
+
#
|
|
49
|
+
def initialize(string, offset: 0, length: nil)
|
|
50
|
+
@string = string
|
|
51
|
+
@offset = offset
|
|
52
|
+
@length = length || (string.bytesize - offset)
|
|
53
|
+
@materialized = nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Materialize to string (with caching).
|
|
57
|
+
#
|
|
58
|
+
# First call creates string slice, subsequent calls return cached.
|
|
59
|
+
# This implements lazy evaluation - strings are only created when
|
|
60
|
+
# explicitly needed, not during parsing.
|
|
61
|
+
#
|
|
62
|
+
# @return [String] Materialized string
|
|
63
|
+
#
|
|
64
|
+
def to_s
|
|
65
|
+
@materialized ||= @string.byteslice(@offset, @length)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Get character at index (zero-copy).
|
|
69
|
+
#
|
|
70
|
+
# Direct access to character in original string without creating
|
|
71
|
+
# intermediate string objects.
|
|
72
|
+
#
|
|
73
|
+
# @param index [Integer] Zero-based index
|
|
74
|
+
# @return [String, nil] Character at index or nil
|
|
75
|
+
#
|
|
76
|
+
def [](index)
|
|
77
|
+
return nil if index.negative? || index >= @length
|
|
78
|
+
|
|
79
|
+
@string.byteslice(@offset + index, 1)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Get byte size.
|
|
83
|
+
#
|
|
84
|
+
# @return [Integer] Length in bytes
|
|
85
|
+
#
|
|
86
|
+
def bytesize
|
|
87
|
+
@length
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
alias size bytesize
|
|
91
|
+
alias length bytesize
|
|
92
|
+
|
|
93
|
+
# Check if empty.
|
|
94
|
+
#
|
|
95
|
+
# @return [Boolean] true if length is 0
|
|
96
|
+
#
|
|
97
|
+
def empty?
|
|
98
|
+
@length.zero?
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Compare with another object.
|
|
102
|
+
#
|
|
103
|
+
# StringViews are only equal if they reference the exact same string object
|
|
104
|
+
# (by object_id) and have the same offset/length. This is consistent with
|
|
105
|
+
# the view pattern - they're views of a specific string instance.
|
|
106
|
+
#
|
|
107
|
+
# When comparing with a String, content is compared.
|
|
108
|
+
#
|
|
109
|
+
# @param other [Object] Object to compare with
|
|
110
|
+
# @return [Boolean] true if equal
|
|
111
|
+
#
|
|
112
|
+
def ==(other)
|
|
113
|
+
case other
|
|
114
|
+
when String
|
|
115
|
+
to_s == other
|
|
116
|
+
when StringView
|
|
117
|
+
# Only equal if viewing the exact same string object with same range
|
|
118
|
+
@string.equal?(other.string) &&
|
|
119
|
+
@offset == other.offset &&
|
|
120
|
+
@length == other.length
|
|
121
|
+
else
|
|
122
|
+
super
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
alias eql? ==
|
|
127
|
+
|
|
128
|
+
# Hash code for hashing.
|
|
129
|
+
#
|
|
130
|
+
# Uses object_id of string to avoid materializing the view.
|
|
131
|
+
#
|
|
132
|
+
# @return [Integer] Hash code
|
|
133
|
+
#
|
|
134
|
+
def hash
|
|
135
|
+
[@string.object_id, @offset, @length].hash
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Create substring view (zero-copy).
|
|
139
|
+
#
|
|
140
|
+
# Returns a new StringView referencing a substring of this view.
|
|
141
|
+
# No string allocation occurs - just a new view with adjusted offset.
|
|
142
|
+
#
|
|
143
|
+
# @param start [Integer] Start offset (relative to view)
|
|
144
|
+
# @param len [Integer] Length
|
|
145
|
+
# @return [StringView] New view of substring
|
|
146
|
+
#
|
|
147
|
+
def slice(start, len)
|
|
148
|
+
# Handle edge cases
|
|
149
|
+
return self.class.new(@string, offset: @offset, length: 0) if len <= 0 || start >= @length
|
|
150
|
+
|
|
151
|
+
# Clamp start to valid range [0, @length)
|
|
152
|
+
clamped_start = [[start, 0].max, @length].min
|
|
153
|
+
|
|
154
|
+
# Calculate actual offset in original string
|
|
155
|
+
actual_offset = @offset + clamped_start
|
|
156
|
+
|
|
157
|
+
# Calculate actual length (min of requested and available)
|
|
158
|
+
available = @length - clamped_start
|
|
159
|
+
actual_length = [len, available].min
|
|
160
|
+
|
|
161
|
+
self.class.new(@string, offset: actual_offset, length: actual_length)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Inspect for debugging.
|
|
165
|
+
#
|
|
166
|
+
# Shows whether string has been materialized.
|
|
167
|
+
#
|
|
168
|
+
# @return [String] Inspection string
|
|
169
|
+
#
|
|
170
|
+
def inspect
|
|
171
|
+
if @materialized
|
|
172
|
+
"#<StringView:#{object_id} @offset=#{@offset} @length=#{@length} cached=#{@materialized.inspect}>"
|
|
173
|
+
else
|
|
174
|
+
"#<StringView:#{object_id} @offset=#{@offset} @length=#{@length}>"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Reset for pooling (if needed in future phases).
|
|
179
|
+
#
|
|
180
|
+
# Allows StringView objects to be reused from a pool.
|
|
181
|
+
#
|
|
182
|
+
# @param string [String] New string
|
|
183
|
+
# @param offset [Integer] New offset
|
|
184
|
+
# @param length [Integer] New length
|
|
185
|
+
# @return [self]
|
|
186
|
+
#
|
|
187
|
+
def reset!(string, offset, length)
|
|
188
|
+
@string = string
|
|
189
|
+
@offset = offset
|
|
190
|
+
@length = length
|
|
191
|
+
@materialized = nil
|
|
192
|
+
self
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'parsanol/pattern'
|
|
4
|
+
|
|
5
|
+
# Tree transformation engine for converting parse trees into abstract syntax trees.
|
|
6
|
+
#
|
|
7
|
+
# Transforms expression trees through depth-first post-order traversal.
|
|
8
|
+
# When a rule pattern matches a node, that node is replaced by the result
|
|
9
|
+
# of the rule's transformation block. Unmatched nodes pass through unchanged.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic transformation class
|
|
12
|
+
# class NumberTransform < Parsanol::Transform
|
|
13
|
+
# rule(int: simple(:value)) { Integer(value) }
|
|
14
|
+
# rule(float: simple(:value)) { Float(value) }
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# transform = NumberTransform.new
|
|
18
|
+
# transform.apply({ int: '42' }) # => 42
|
|
19
|
+
#
|
|
20
|
+
# @example Inline transformation definition
|
|
21
|
+
# transform = Parsanol::Transform.new do
|
|
22
|
+
# rule(a: simple(:x)) { x.upcase }
|
|
23
|
+
# end
|
|
24
|
+
# transform.apply({ a: 'hello' }) # => 'HELLO'
|
|
25
|
+
#
|
|
26
|
+
# @example Using context for external dependencies
|
|
27
|
+
# builder = AstBuilder.new
|
|
28
|
+
# transform = Parsanol::Transform.new do
|
|
29
|
+
# rule(expr: simple(:e)) { builder.build_node(e) }
|
|
30
|
+
# rule(expr: simple(:e)) { |ctx| ctx[:builder].build_node(e) }
|
|
31
|
+
# end
|
|
32
|
+
# transform.apply(tree, builder: builder)
|
|
33
|
+
#
|
|
34
|
+
# Rule blocks can have two forms:
|
|
35
|
+
# - Zero-arity: executed in a context where pattern bindings are local variables
|
|
36
|
+
# - Arity-1: receives a hash of bindings as the argument
|
|
37
|
+
#
|
|
38
|
+
# Inspired by tree transformation patterns in parser combinators.
|
|
39
|
+
#
|
|
40
|
+
module Parsanol
|
|
41
|
+
class Transform
|
|
42
|
+
include Parsanol
|
|
43
|
+
|
|
44
|
+
# Class-level rule definition for subclass inheritance.
|
|
45
|
+
class << self
|
|
46
|
+
include Parsanol
|
|
47
|
+
|
|
48
|
+
# Defines a transformation rule at the class level.
|
|
49
|
+
# Rules are inherited by subclasses and evaluated in reverse order
|
|
50
|
+
# (most recently defined rules have highest precedence).
|
|
51
|
+
#
|
|
52
|
+
# @param expression [Object] pattern to match against tree nodes
|
|
53
|
+
# @yield block to execute when pattern matches, receives bindings
|
|
54
|
+
# @return [Array] the updated rules list
|
|
55
|
+
#
|
|
56
|
+
def rule(expression, &transformer)
|
|
57
|
+
class_rules.unshift([Parsanol::Pattern.new(expression), transformer])
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Returns all class-level rules defined for this transform.
|
|
61
|
+
#
|
|
62
|
+
# @return [Array<Array>] array of [pattern, block] pairs
|
|
63
|
+
#
|
|
64
|
+
def class_rules
|
|
65
|
+
@class_rules ||= []
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Ensures subclasses inherit parent rules.
|
|
69
|
+
def inherited(subclass)
|
|
70
|
+
super
|
|
71
|
+
subclass.instance_variable_set(:@class_rules, class_rules.dup)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Creates a new transform instance.
|
|
76
|
+
#
|
|
77
|
+
# @param strict [Boolean] if true, raises on unmatched hash nodes
|
|
78
|
+
# @yield optional block for inline rule definition
|
|
79
|
+
#
|
|
80
|
+
def initialize(strict = false, &definition)
|
|
81
|
+
@strict_mode = strict
|
|
82
|
+
@instance_rules = []
|
|
83
|
+
|
|
84
|
+
instance_eval(&definition) if definition
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Defines an instance-level transformation rule.
|
|
88
|
+
# Instance rules are checked before class rules.
|
|
89
|
+
#
|
|
90
|
+
# @param expression [Object] pattern to match
|
|
91
|
+
# @yield transformation block
|
|
92
|
+
#
|
|
93
|
+
def rule(expression, &transformer)
|
|
94
|
+
@instance_rules.unshift([Parsanol::Pattern.new(expression), transformer])
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Applies transformation to a parse tree.
|
|
98
|
+
#
|
|
99
|
+
# Performs depth-first post-order traversal, transforming nodes
|
|
100
|
+
# from leaves to root. Context values are available in rule blocks.
|
|
101
|
+
#
|
|
102
|
+
# @param tree [Object] parse tree to transform
|
|
103
|
+
# @param context [Hash, nil] optional context bindings
|
|
104
|
+
# @return [Object] transformed tree
|
|
105
|
+
#
|
|
106
|
+
def apply(tree, context = nil)
|
|
107
|
+
# First, recursively transform children (depth-first)
|
|
108
|
+
transformed = transform_children(tree, context)
|
|
109
|
+
|
|
110
|
+
# Then, try to match and transform this node
|
|
111
|
+
attempt_transformation(transformed, context)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Returns combined class and instance rules.
|
|
115
|
+
# Instance rules take precedence over class rules.
|
|
116
|
+
#
|
|
117
|
+
# @return [Array<Array>] all applicable rules
|
|
118
|
+
#
|
|
119
|
+
def all_rules
|
|
120
|
+
@instance_rules + self.class.class_rules
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Executes a transformation block with the given bindings.
|
|
124
|
+
# Public API for testing and advanced usage.
|
|
125
|
+
#
|
|
126
|
+
# @param bindings [Hash] pattern bindings
|
|
127
|
+
# @param block [Proc] transformation block
|
|
128
|
+
# @return [Object] block result
|
|
129
|
+
#
|
|
130
|
+
def call_on_match(bindings, block)
|
|
131
|
+
return nil unless block
|
|
132
|
+
|
|
133
|
+
if block.arity == 1
|
|
134
|
+
block.call(bindings)
|
|
135
|
+
else
|
|
136
|
+
Context.new(bindings).instance_eval(&block)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
private
|
|
141
|
+
|
|
142
|
+
# Recursively transforms child nodes based on tree type.
|
|
143
|
+
#
|
|
144
|
+
# @param node [Object] current tree node
|
|
145
|
+
# @param ctx [Hash, nil] context bindings
|
|
146
|
+
# @return [Object] node with transformed children
|
|
147
|
+
#
|
|
148
|
+
def transform_children(node, ctx)
|
|
149
|
+
case node
|
|
150
|
+
when Hash
|
|
151
|
+
transform_hash_children(node, ctx)
|
|
152
|
+
when Array
|
|
153
|
+
transform_array_children(node, ctx)
|
|
154
|
+
else
|
|
155
|
+
node
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Transforms all values in a hash.
|
|
160
|
+
#
|
|
161
|
+
def transform_hash_children(hash, ctx)
|
|
162
|
+
result = {}
|
|
163
|
+
hash.each { |key, val| result[key] = apply(val, ctx) }
|
|
164
|
+
result
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Transforms all elements in an array.
|
|
168
|
+
#
|
|
169
|
+
def transform_array_children(array, ctx)
|
|
170
|
+
array.map { |element| apply(element, ctx) }
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Attempts to match a node against all rules and transform if matched.
|
|
174
|
+
#
|
|
175
|
+
# @param node [Object] node to potentially transform
|
|
176
|
+
# @param ctx [Hash, nil] context bindings
|
|
177
|
+
# @return [Object] transformed node or original if no match
|
|
178
|
+
#
|
|
179
|
+
def attempt_transformation(node, ctx)
|
|
180
|
+
all_rules.each do |pattern, block|
|
|
181
|
+
bindings = pattern.match(node, ctx)
|
|
182
|
+
next unless bindings
|
|
183
|
+
|
|
184
|
+
return execute_block(block, bindings)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# No rule matched
|
|
188
|
+
handle_unmatched(node)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Executes a transformation block with proper binding context.
|
|
192
|
+
#
|
|
193
|
+
# @param block [Proc] transformation block
|
|
194
|
+
# @param bindings [Hash] matched pattern bindings
|
|
195
|
+
# @return [Object] block result
|
|
196
|
+
#
|
|
197
|
+
def execute_block(block, bindings)
|
|
198
|
+
return nil unless block
|
|
199
|
+
|
|
200
|
+
if block.arity == 1
|
|
201
|
+
# Block expects bindings as argument
|
|
202
|
+
block.call(bindings)
|
|
203
|
+
else
|
|
204
|
+
# Block executes in context with bindings as local variables
|
|
205
|
+
Context.new(bindings).instance_eval(&block)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Handles nodes that didn't match any rule.
|
|
210
|
+
#
|
|
211
|
+
# @param node [Object] unmatched node
|
|
212
|
+
# @return [Object] the node (or raises in strict mode)
|
|
213
|
+
# @raise [NotImplementedError] if strict mode and node is a Hash
|
|
214
|
+
#
|
|
215
|
+
def handle_unmatched(node)
|
|
216
|
+
return node unless @strict_mode
|
|
217
|
+
return node unless node.is_a?(Hash)
|
|
218
|
+
|
|
219
|
+
# In strict mode, provide helpful error about what wasn't matched
|
|
220
|
+
signature = node.transform_values(&:class)
|
|
221
|
+
raise NotImplementedError, "Failed to match #{signature.inspect}"
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
require 'parsanol/context'
|