parsanol 1.0.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +280 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Repetition - matches a parser multiple times.
|
|
4
|
+
# Supports min/max bounds for various quantifier patterns.
|
|
5
|
+
#
|
|
6
|
+
# @example Quantifiers
|
|
7
|
+
# str('a').repeat(1,3) # 1 to 3 'a's
|
|
8
|
+
# str('a').maybe # optional 'a' (0 or 1)
|
|
9
|
+
# str('a').repeat # zero or more
|
|
10
|
+
#
|
|
11
|
+
module Parsanol
|
|
12
|
+
module Atoms
|
|
13
|
+
class Repetition < Parsanol::Atoms::Base
|
|
14
|
+
# @return [Integer] minimum matches required
|
|
15
|
+
attr_reader :min
|
|
16
|
+
|
|
17
|
+
# @return [Integer, nil] maximum matches allowed
|
|
18
|
+
attr_reader :max
|
|
19
|
+
|
|
20
|
+
# @return [Parsanol::Atoms::Base] repeated parser
|
|
21
|
+
attr_reader :parslet
|
|
22
|
+
|
|
23
|
+
# @return [Symbol] result tag
|
|
24
|
+
attr_reader :result_tag
|
|
25
|
+
|
|
26
|
+
# Alias for compatibility
|
|
27
|
+
alias tag result_tag
|
|
28
|
+
|
|
29
|
+
# Creates a new repetition.
|
|
30
|
+
#
|
|
31
|
+
# @param parser [Parsanol::Atoms::Base] parser to repeat
|
|
32
|
+
# @param min_count [Integer] minimum repetitions
|
|
33
|
+
# @param max_count [Integer, nil] maximum repetitions
|
|
34
|
+
# @param tag [Symbol] result tag
|
|
35
|
+
def initialize(parser, min_count, max_count, tag = :repetition)
|
|
36
|
+
super()
|
|
37
|
+
|
|
38
|
+
# Handle nil max_count (unbounded repetition)
|
|
39
|
+
if max_count && max_count.zero?
|
|
40
|
+
raise ArgumentError, "Cannot repeat zero times: #{parser.inspect}"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
@parslet = parser
|
|
44
|
+
@min = min_count
|
|
45
|
+
@max = max_count
|
|
46
|
+
@result_tag = tag
|
|
47
|
+
|
|
48
|
+
# Internal value for comparisons (nil becomes infinity)
|
|
49
|
+
@max_internal = max_count || Float::INFINITY
|
|
50
|
+
|
|
51
|
+
# Pre-built error messages
|
|
52
|
+
@min_error = "Expected at least #{min_count} of #{parser.inspect}"
|
|
53
|
+
@extra_error = 'Extra input after last repetition'
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Error messages hash (for compatibility)
|
|
57
|
+
def error_msgs
|
|
58
|
+
{ minrep: @min_error, unconsumed: @extra_error }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Executes the repetition.
|
|
62
|
+
#
|
|
63
|
+
# @param source [Parsanol::Source] input
|
|
64
|
+
# @param context [Parsanol::Atoms::Context] context
|
|
65
|
+
# @param consume_all [Boolean] require full consumption
|
|
66
|
+
# @return [Array(Boolean, Object)] result
|
|
67
|
+
def try(source, context, consume_all)
|
|
68
|
+
# Check for tree memoization support
|
|
69
|
+
if context.respond_to?(:use_tree_memoization?) && context.use_tree_memoization?
|
|
70
|
+
return with_tree_cache(source, context, consume_all)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Maybe (0 or 1) - very common, optimize
|
|
74
|
+
return try_maybe(source, context, consume_all) if @min.zero? && @max == 1
|
|
75
|
+
|
|
76
|
+
# Exact count optimization
|
|
77
|
+
return try_exact(source, context, consume_all) if @min == @max && @max && @max <= 3
|
|
78
|
+
|
|
79
|
+
# General case
|
|
80
|
+
try_general(source, context, consume_all)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
precedence REPETITION
|
|
84
|
+
|
|
85
|
+
# String representation.
|
|
86
|
+
#
|
|
87
|
+
# @param prec [Integer] precedence
|
|
88
|
+
# @return [String]
|
|
89
|
+
def to_s_inner(prec)
|
|
90
|
+
suffix = if @min.zero? && @max == 1
|
|
91
|
+
'?'
|
|
92
|
+
else
|
|
93
|
+
"{#{@min}, #{@max}}"
|
|
94
|
+
end
|
|
95
|
+
@parslet.to_s(prec) + suffix
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# FIRST set includes EPSILON if min == 0.
|
|
99
|
+
#
|
|
100
|
+
# @return [Set]
|
|
101
|
+
def compute_first_set
|
|
102
|
+
first = @parslet.first_set.dup
|
|
103
|
+
first.add(Parsanol::FirstSet::EPSILON) if @min.zero?
|
|
104
|
+
first
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
private
|
|
108
|
+
|
|
109
|
+
# Optional match (0 or 1)
|
|
110
|
+
def try_maybe(source, context, _consume_all)
|
|
111
|
+
success, value = @parslet.apply(source, context, false)
|
|
112
|
+
return ok([@result_tag, value]) if success
|
|
113
|
+
|
|
114
|
+
ok(@result_tag == :repetition ? Parsanol::Atoms::Base::REP_TAG : [@result_tag])
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Exact count match (1, 2, or 3)
|
|
118
|
+
def try_exact(source, context, consume_all)
|
|
119
|
+
case @max
|
|
120
|
+
when 1
|
|
121
|
+
single_match(source, context, consume_all)
|
|
122
|
+
when 2
|
|
123
|
+
double_match(source, context, consume_all)
|
|
124
|
+
when 3
|
|
125
|
+
triple_match(source, context, consume_all)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def single_match(source, context, consume_all)
|
|
130
|
+
success, value = @parslet.apply(source, context, consume_all)
|
|
131
|
+
return ok([@result_tag, value]) if success
|
|
132
|
+
|
|
133
|
+
context.err_at(self, source, @min_error, source.bytepos, [value])
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def double_match(source, context, consume_all)
|
|
137
|
+
success, v1 = @parslet.apply(source, context, false)
|
|
138
|
+
return context.err_at(self, source, @min_error, source.bytepos, [v1]) unless success
|
|
139
|
+
|
|
140
|
+
success, v2 = @parslet.apply(source, context, consume_all)
|
|
141
|
+
return ok([@result_tag, v1, v2]) if success
|
|
142
|
+
|
|
143
|
+
context.err_at(self, source, @min_error, source.bytepos, [v2])
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def triple_match(source, context, consume_all)
|
|
147
|
+
success, v1 = @parslet.apply(source, context, false)
|
|
148
|
+
return context.err_at(self, source, @min_error, source.bytepos, [v1]) unless success
|
|
149
|
+
|
|
150
|
+
success, v2 = @parslet.apply(source, context, false)
|
|
151
|
+
return context.err_at(self, source, @min_error, source.bytepos, [v2]) unless success
|
|
152
|
+
|
|
153
|
+
success, v3 = @parslet.apply(source, context, consume_all)
|
|
154
|
+
return ok([@result_tag, v1, v2, v3]) if success
|
|
155
|
+
|
|
156
|
+
context.err_at(self, source, @min_error, source.bytepos, [v3])
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# General repetition with buffer pooling
|
|
160
|
+
def try_general(source, context, consume_all)
|
|
161
|
+
start_pos = source.bytepos
|
|
162
|
+
occurrence = 0
|
|
163
|
+
|
|
164
|
+
# Estimate buffer size
|
|
165
|
+
estimate = [@max || 10, 10].min
|
|
166
|
+
buffer = context.acquire_buffer(size: estimate + 1)
|
|
167
|
+
buffer.push(@result_tag)
|
|
168
|
+
|
|
169
|
+
last_error = nil
|
|
170
|
+
|
|
171
|
+
loop do
|
|
172
|
+
success, value = @parslet.apply(source, context, false)
|
|
173
|
+
last_error = value
|
|
174
|
+
|
|
175
|
+
break unless success
|
|
176
|
+
|
|
177
|
+
occurrence += 1
|
|
178
|
+
buffer.push(value)
|
|
179
|
+
|
|
180
|
+
break if @max && occurrence >= @max
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Check minimum bound
|
|
184
|
+
if occurrence < @min
|
|
185
|
+
context.release_buffer(buffer)
|
|
186
|
+
source.bytepos = start_pos
|
|
187
|
+
return context.err_at(self, source, @min_error, start_pos, [last_error])
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Check complete consumption
|
|
191
|
+
if consume_all && source.chars_left.positive?
|
|
192
|
+
context.release_buffer(buffer)
|
|
193
|
+
return context.err(self, source, @extra_error, [last_error])
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
ok(Parsanol::LazyResult.new(buffer, context))
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Tree memoization for GPEG-style caching
|
|
200
|
+
def with_tree_cache(source, context, consume_all)
|
|
201
|
+
start_pos = source.bytepos
|
|
202
|
+
cache_key = object_id
|
|
203
|
+
|
|
204
|
+
# Check cache
|
|
205
|
+
cached = context.query_tree_memo(cache_key, start_pos)
|
|
206
|
+
if cached
|
|
207
|
+
values, end_pos = cached
|
|
208
|
+
source.bytepos = end_pos
|
|
209
|
+
return ok([@result_tag] + values)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Parse and cache
|
|
213
|
+
occurrence = 0
|
|
214
|
+
estimate = [@max || 10, 10].min
|
|
215
|
+
buffer = context.acquire_buffer(size: estimate + 1)
|
|
216
|
+
buffer.push(@result_tag)
|
|
217
|
+
|
|
218
|
+
positions = context.acquire_array
|
|
219
|
+
positions << start_pos
|
|
220
|
+
last_error = nil
|
|
221
|
+
|
|
222
|
+
loop do
|
|
223
|
+
source.bytepos
|
|
224
|
+
success, value = @parslet.apply(source, context, false)
|
|
225
|
+
last_error = value
|
|
226
|
+
|
|
227
|
+
break unless success
|
|
228
|
+
|
|
229
|
+
occurrence += 1
|
|
230
|
+
buffer.push(value)
|
|
231
|
+
positions << source.bytepos
|
|
232
|
+
|
|
233
|
+
break if @max && occurrence >= @max
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Cache successful prefix
|
|
237
|
+
if occurrence.positive?
|
|
238
|
+
end_pos = positions[occurrence]
|
|
239
|
+
context.store_tree_memo(cache_key, start_pos, buffer.to_a[1..], end_pos)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Check minimum
|
|
243
|
+
if occurrence < @min
|
|
244
|
+
context.release_buffer(buffer)
|
|
245
|
+
source.bytepos = start_pos
|
|
246
|
+
return context.err_at(self, source, @min_error, start_pos, [last_error])
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Check consumption
|
|
250
|
+
if consume_all && source.chars_left.positive?
|
|
251
|
+
context.release_buffer(buffer)
|
|
252
|
+
return context.err(self, source, @extra_error, [last_error])
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
ok(Parsanol::LazyResult.new(buffer, context))
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Starts a new scope in the parsing process. Please also see the #captures
|
|
4
|
+
# method.
|
|
5
|
+
#
|
|
6
|
+
module Parsanol
|
|
7
|
+
module Atoms
|
|
8
|
+
class Scope < Parsanol::Atoms::Base
|
|
9
|
+
attr_reader :block
|
|
10
|
+
|
|
11
|
+
def initialize(block)
|
|
12
|
+
super()
|
|
13
|
+
|
|
14
|
+
@block = block
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def cached?
|
|
18
|
+
false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def apply(source, context, consume_all)
|
|
22
|
+
# Phase 55: Cache @block ivar to reduce lookup overhead
|
|
23
|
+
block = @block
|
|
24
|
+
context.scope do
|
|
25
|
+
parslet = block.call
|
|
26
|
+
return parslet.apply(source, context, consume_all)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def to_s_inner(prec)
|
|
31
|
+
"scope { #{block.call.to_s(prec)} }"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Sequential composition - matches parsers in left-to-right order.
|
|
4
|
+
# All parsers must succeed for the sequence to succeed.
|
|
5
|
+
#
|
|
6
|
+
# @example Sequence of matches
|
|
7
|
+
# str('a') >> str('b') # matches 'a' then 'b'
|
|
8
|
+
#
|
|
9
|
+
module Parsanol
|
|
10
|
+
module Atoms
|
|
11
|
+
class Sequence < Parsanol::Atoms::Base
|
|
12
|
+
# @return [Array<Parsanol::Atoms::Base>] sequence members
|
|
13
|
+
attr_reader :parslets
|
|
14
|
+
|
|
15
|
+
# Creates a new sequence.
|
|
16
|
+
#
|
|
17
|
+
# @param components [Array<Parsanol::Atoms::Base>] parsers to sequence
|
|
18
|
+
def initialize(*components)
|
|
19
|
+
super()
|
|
20
|
+
@parslets = components
|
|
21
|
+
|
|
22
|
+
# Pre-built error message
|
|
23
|
+
@fail_msg = "Failed to match sequence (#{inspect})"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Error messages hash (for compatibility)
|
|
27
|
+
def error_msgs
|
|
28
|
+
{ failed: @fail_msg }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Appends a parser to this sequence with flattening.
|
|
32
|
+
#
|
|
33
|
+
# @param parser [Parsanol::Atoms::Base] parser to append
|
|
34
|
+
# @return [Parsanol::Atoms::Sequence] new flattened sequence
|
|
35
|
+
def >>(other)
|
|
36
|
+
# Flatten nested sequences
|
|
37
|
+
expanded = if other.is_a?(Parsanol::Atoms::Sequence)
|
|
38
|
+
@parslets + other.parslets
|
|
39
|
+
else
|
|
40
|
+
@parslets + [other]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Merge adjacent string literals
|
|
44
|
+
merged = merge_adjacent_strings(expanded)
|
|
45
|
+
|
|
46
|
+
self.class.new(*merged)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Executes all parsers in sequence.
|
|
50
|
+
#
|
|
51
|
+
# @param source [Parsanol::Source] input
|
|
52
|
+
# @param context [Parsanol::Atoms::Context] context
|
|
53
|
+
# @param consume_all [Boolean] require full consumption
|
|
54
|
+
# @return [Array(Boolean, Object)] result
|
|
55
|
+
def try(source, context, consume_all)
|
|
56
|
+
components = @parslets
|
|
57
|
+
count = components.size
|
|
58
|
+
|
|
59
|
+
# Dispatch based on size for optimization
|
|
60
|
+
case count
|
|
61
|
+
when 1
|
|
62
|
+
match_single(components[0], source, context, consume_all)
|
|
63
|
+
when 2
|
|
64
|
+
match_pair(components[0], components[1], source, context, consume_all)
|
|
65
|
+
when 3
|
|
66
|
+
match_triple(components[0], components[1], components[2], source, context, consume_all)
|
|
67
|
+
else
|
|
68
|
+
match_general(components, source, context, consume_all)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
precedence SEQUENCE
|
|
73
|
+
|
|
74
|
+
# String representation.
|
|
75
|
+
#
|
|
76
|
+
# @param prec [Integer] precedence
|
|
77
|
+
# @return [String]
|
|
78
|
+
def to_s_inner(prec)
|
|
79
|
+
@parslets.map { |p| p.to_s(prec) }.join(' ')
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# FIRST set is first element's FIRST set (with epsilon propagation).
|
|
83
|
+
#
|
|
84
|
+
# @return [Set]
|
|
85
|
+
def compute_first_set
|
|
86
|
+
return Set.new if @parslets.empty?
|
|
87
|
+
|
|
88
|
+
result = Set.new
|
|
89
|
+
@parslets.each do |parser|
|
|
90
|
+
first = parser.first_set
|
|
91
|
+
result.merge(first.reject { |x| x == Parsanol::FirstSet::EPSILON })
|
|
92
|
+
break unless first.include?(Parsanol::FirstSet::EPSILON)
|
|
93
|
+
end
|
|
94
|
+
result
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
# Single element sequence
|
|
100
|
+
def match_single(parser, source, context, consume_all)
|
|
101
|
+
success, value = parser.apply(source, context, consume_all)
|
|
102
|
+
return context.err(self, source, @fail_msg, [value]) unless success
|
|
103
|
+
|
|
104
|
+
ok([:sequence, value])
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Two-element sequence with buffer pooling
|
|
108
|
+
def match_pair(p1, p2, source, context, consume_all)
|
|
109
|
+
success, v1 = p1.apply(source, context, false)
|
|
110
|
+
return context.err(self, source, @fail_msg, [v1]) unless success
|
|
111
|
+
|
|
112
|
+
success, v2 = p2.apply(source, context, consume_all)
|
|
113
|
+
return context.err(self, source, @fail_msg, [v2]) unless success
|
|
114
|
+
|
|
115
|
+
buffer = context.acquire_buffer(size: 3)
|
|
116
|
+
buffer.push(:sequence)
|
|
117
|
+
buffer.push(v1)
|
|
118
|
+
buffer.push(v2)
|
|
119
|
+
ok(Parsanol::LazyResult.new(buffer, context))
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Three-element sequence with buffer pooling
|
|
123
|
+
def match_triple(p1, p2, p3, source, context, consume_all)
|
|
124
|
+
success, v1 = p1.apply(source, context, false)
|
|
125
|
+
return context.err(self, source, @fail_msg, [v1]) unless success
|
|
126
|
+
|
|
127
|
+
success, v2 = p2.apply(source, context, false)
|
|
128
|
+
return context.err(self, source, @fail_msg, [v2]) unless success
|
|
129
|
+
|
|
130
|
+
success, v3 = p3.apply(source, context, consume_all)
|
|
131
|
+
return context.err(self, source, @fail_msg, [v3]) unless success
|
|
132
|
+
|
|
133
|
+
buffer = context.acquire_buffer(size: 4)
|
|
134
|
+
buffer.push(:sequence)
|
|
135
|
+
buffer.push(v1)
|
|
136
|
+
buffer.push(v2)
|
|
137
|
+
buffer.push(v3)
|
|
138
|
+
ok(Parsanol::LazyResult.new(buffer, context))
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# General case for N elements
|
|
142
|
+
def match_general(components, source, context, consume_all)
|
|
143
|
+
buffer = context.acquire_buffer(size: components.size + 1)
|
|
144
|
+
buffer.push(:sequence)
|
|
145
|
+
|
|
146
|
+
last_idx = components.size - 1
|
|
147
|
+
idx = 0
|
|
148
|
+
|
|
149
|
+
while idx <= last_idx
|
|
150
|
+
must_consume = consume_all && (idx == last_idx)
|
|
151
|
+
success, value = components[idx].apply(source, context, must_consume)
|
|
152
|
+
|
|
153
|
+
unless success
|
|
154
|
+
context.release_buffer(buffer)
|
|
155
|
+
return context.err(self, source, @fail_msg, [value])
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
buffer.push(value)
|
|
159
|
+
idx += 1
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
ok(Parsanol::LazyResult.new(buffer, context))
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Merges adjacent string atoms using Rope for efficiency
|
|
166
|
+
def merge_adjacent_strings(components)
|
|
167
|
+
result = []
|
|
168
|
+
idx = 0
|
|
169
|
+
|
|
170
|
+
while idx < components.size
|
|
171
|
+
current = components[idx]
|
|
172
|
+
|
|
173
|
+
if current.is_a?(Parsanol::Atoms::Str)
|
|
174
|
+
rope = Parsanol::Rope.new.append(current.str)
|
|
175
|
+
next_idx = idx + 1
|
|
176
|
+
|
|
177
|
+
while next_idx < components.size && components[next_idx].is_a?(Parsanol::Atoms::Str)
|
|
178
|
+
rope.append(components[next_idx].str)
|
|
179
|
+
next_idx += 1
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
result << (next_idx > idx + 1 ? Parsanol::Atoms::Str.new(rope.to_s) : current)
|
|
183
|
+
idx = next_idx
|
|
184
|
+
else
|
|
185
|
+
result << current
|
|
186
|
+
idx += 1
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
result
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Literal string matcher. Matches an exact sequence of characters.
|
|
4
|
+
#
|
|
5
|
+
# @example Match literal text
|
|
6
|
+
# str('hello') # matches exactly 'hello'
|
|
7
|
+
#
|
|
8
|
+
module Parsanol
|
|
9
|
+
module Atoms
|
|
10
|
+
class Str < Parsanol::Atoms::Base
|
|
11
|
+
# @return [String] the literal to match
|
|
12
|
+
attr_reader :str
|
|
13
|
+
|
|
14
|
+
# Creates a new string matcher.
|
|
15
|
+
#
|
|
16
|
+
# @param text [String, Object] the literal string to match
|
|
17
|
+
def initialize(text)
|
|
18
|
+
super()
|
|
19
|
+
@str = text.to_s
|
|
20
|
+
@byte_size = @str.bytesize
|
|
21
|
+
@char_count = @str.length
|
|
22
|
+
|
|
23
|
+
# Pre-built error messages (frozen)
|
|
24
|
+
@early_eof_msg = 'Unexpected end of input'
|
|
25
|
+
@mismatch_msg = "Expected #{@str.inspect}, but got "
|
|
26
|
+
|
|
27
|
+
# Optimization: single-char fast path
|
|
28
|
+
@single_char = (@str if @char_count == 1)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Attempts to match the literal at current position.
|
|
32
|
+
#
|
|
33
|
+
# @param source [Parsanol::Source] input
|
|
34
|
+
# @param context [Parsanol::Atoms::Context] context
|
|
35
|
+
# @param _consume_all [Boolean] ignored
|
|
36
|
+
# @return [Array(Boolean, Object)] result
|
|
37
|
+
def try(source, context, _consume_all)
|
|
38
|
+
# Single-character optimization
|
|
39
|
+
return single_char_match(source, context) if @single_char
|
|
40
|
+
|
|
41
|
+
# Multi-character matching
|
|
42
|
+
multi_char_match(source, context)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# String representation.
|
|
46
|
+
#
|
|
47
|
+
# @param _prec [Integer] unused
|
|
48
|
+
# @return [String]
|
|
49
|
+
def to_s_inner(_prec)
|
|
50
|
+
"'#{@str}'"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Simple atoms don't benefit from caching.
|
|
54
|
+
#
|
|
55
|
+
# @return [Boolean]
|
|
56
|
+
def cached?
|
|
57
|
+
false
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Produces flat results (Slice).
|
|
61
|
+
#
|
|
62
|
+
# @return [Boolean]
|
|
63
|
+
def flat?
|
|
64
|
+
true
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# FIRST set is this atom itself.
|
|
68
|
+
#
|
|
69
|
+
# @return [Set]
|
|
70
|
+
def compute_first_set
|
|
71
|
+
Set.new([self])
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
# Fast path for single-character strings.
|
|
77
|
+
def single_char_match(source, context)
|
|
78
|
+
return context.err(self, source, @early_eof_msg) if source.chars_left < 1
|
|
79
|
+
|
|
80
|
+
pos = source.pos
|
|
81
|
+
slice = source.consume(1)
|
|
82
|
+
|
|
83
|
+
return ok(slice) if slice.content == @single_char
|
|
84
|
+
|
|
85
|
+
source.bytepos = pos
|
|
86
|
+
context.err_at(self, source, [@mismatch_msg, slice], pos)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Standard path for multi-character strings.
|
|
90
|
+
def multi_char_match(source, context)
|
|
91
|
+
return context.err(self, source, @early_eof_msg) if source.chars_left < @char_count
|
|
92
|
+
|
|
93
|
+
pos = source.pos
|
|
94
|
+
slice = source.consume(@char_count)
|
|
95
|
+
|
|
96
|
+
return ok(slice) if slice.content == @str
|
|
97
|
+
|
|
98
|
+
source.bytepos = pos
|
|
99
|
+
context.err_at(self, source, [@mismatch_msg, slice], pos)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Visitor pattern for traversing parser atom trees.
|
|
4
|
+
# Each atom type dispatches to a corresponding visitor method.
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Atoms
|
|
7
|
+
class Base
|
|
8
|
+
# Accepts visitor and dispatches to type-specific method.
|
|
9
|
+
# Override in subclasses.
|
|
10
|
+
#
|
|
11
|
+
# @param visitor [Object] implements visit_* methods
|
|
12
|
+
# @raise [NotImplementedError] if not overridden
|
|
13
|
+
def accept(visitor)
|
|
14
|
+
raise NotImplementedError,
|
|
15
|
+
"Missing #accept in #{self.class.name}"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
class Str
|
|
20
|
+
# Dispatches to visitor's visit_str.
|
|
21
|
+
#
|
|
22
|
+
# @param visitor [Object] visitor object
|
|
23
|
+
def accept(visitor)
|
|
24
|
+
visitor.visit_str(str)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
class Entity
|
|
29
|
+
# Dispatches to visitor's visit_entity.
|
|
30
|
+
#
|
|
31
|
+
# @param visitor [Object] visitor object
|
|
32
|
+
def accept(visitor)
|
|
33
|
+
visitor.visit_entity(rule_name, @body)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class Named
|
|
38
|
+
# Dispatches to visitor's visit_named.
|
|
39
|
+
#
|
|
40
|
+
# @param visitor [Object] visitor object
|
|
41
|
+
def accept(visitor)
|
|
42
|
+
visitor.visit_named(name, parslet)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class Sequence
|
|
47
|
+
# Dispatches to visitor's visit_sequence.
|
|
48
|
+
#
|
|
49
|
+
# @param visitor [Object] visitor object
|
|
50
|
+
def accept(visitor)
|
|
51
|
+
visitor.visit_sequence(parslets)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
class Repetition
|
|
56
|
+
# Dispatches to visitor's visit_repetition.
|
|
57
|
+
#
|
|
58
|
+
# @param visitor [Object] visitor object
|
|
59
|
+
def accept(visitor)
|
|
60
|
+
visitor.visit_repetition(result_tag, min, max, parslet)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
class Alternative
|
|
65
|
+
# Dispatches to visitor's visit_alternative.
|
|
66
|
+
#
|
|
67
|
+
# @param visitor [Object] visitor object
|
|
68
|
+
def accept(visitor)
|
|
69
|
+
visitor.visit_alternative(alternatives)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
class Lookahead
|
|
74
|
+
# Dispatches to visitor's visit_lookahead.
|
|
75
|
+
#
|
|
76
|
+
# @param visitor [Object] visitor object
|
|
77
|
+
def accept(visitor)
|
|
78
|
+
visitor.visit_lookahead(positive, bound_parslet)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
class Re
|
|
83
|
+
# Dispatches to visitor's visit_re.
|
|
84
|
+
#
|
|
85
|
+
# @param visitor [Object] visitor object
|
|
86
|
+
def accept(visitor)
|
|
87
|
+
visitor.visit_re(match)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|