parsanol 1.0.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +280 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module Atoms
|
|
5
|
+
# Parsing context that coordinates memoization caching, error reporting,
|
|
6
|
+
# and resource pooling. Created fresh for each parse operation.
|
|
7
|
+
#
|
|
8
|
+
# Key responsibilities:
|
|
9
|
+
# - Packrat-style memoization (caching parse results by position+atom)
|
|
10
|
+
# - Pluggable error reporting through reporter interface
|
|
11
|
+
# - Object pooling for arrays and buffers to reduce GC pressure
|
|
12
|
+
# - Adaptive caching based on input size
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage
|
|
15
|
+
# ctx = Context.new(reporter)
|
|
16
|
+
# result = ctx.try_with_cache(parser, source, true)
|
|
17
|
+
#
|
|
18
|
+
# Inspired by packrat parsing memoization and incremental parsing techniques.
|
|
19
|
+
#
|
|
20
|
+
class Context
|
|
21
|
+
# Per-parser cache size thresholds based on profiling different grammar types
|
|
22
|
+
# Different grammars benefit from caching at different input sizes
|
|
23
|
+
PARSER_CACHE_LIMITS = {
|
|
24
|
+
'JsonParser' => 10_000, # JSON needs large inputs to benefit
|
|
25
|
+
'ErbParser' => 800, # ERB benefits earlier
|
|
26
|
+
'CalcParser' => 2000, # Calculator has low repetition
|
|
27
|
+
'SentenceParser' => 5000, # Linear grammar, minimal benefit
|
|
28
|
+
:default => 1000
|
|
29
|
+
}.freeze
|
|
30
|
+
|
|
31
|
+
# Creates a new parsing context.
|
|
32
|
+
#
|
|
33
|
+
# @param error_reporter [#err, #err_at] error reporter instance
|
|
34
|
+
# @param interval_cache: [Boolean] enable GPeg-style interval caching
|
|
35
|
+
# @param adaptive_cache_threshold: [Integer, nil] minimum input size for caching
|
|
36
|
+
# @param parser_class: [Class, nil] parser class for threshold selection
|
|
37
|
+
#
|
|
38
|
+
def initialize(error_reporter = Parsanol::ErrorReporter::Tree.new,
|
|
39
|
+
interval_cache: false,
|
|
40
|
+
adaptive_cache_threshold: nil,
|
|
41
|
+
parser_class: nil)
|
|
42
|
+
# Core memoization cache: position -> { atom_id -> [result, advance] }
|
|
43
|
+
@memo = Hash.new { |h, k| h[k] = {} }
|
|
44
|
+
|
|
45
|
+
# Error reporting delegate
|
|
46
|
+
@reporter = error_reporter
|
|
47
|
+
|
|
48
|
+
# Capture scope for variable bindings
|
|
49
|
+
@captures = Parsanol::Scope.new
|
|
50
|
+
|
|
51
|
+
# Cache eviction state
|
|
52
|
+
@furthest_pos = 0
|
|
53
|
+
@evict_threshold = 200
|
|
54
|
+
@evict_counter = 0
|
|
55
|
+
@evict_interval = 100
|
|
56
|
+
|
|
57
|
+
# Object pools for reducing allocations
|
|
58
|
+
@array_pool = Parsanol::Pools::ArrayPool.new(size: 10_000)
|
|
59
|
+
@buffer_pool = Parsanol::Pools::BufferPool.new(pool_size: 100)
|
|
60
|
+
|
|
61
|
+
# Selective memoization tracking
|
|
62
|
+
@hit_stats = Hash.new(0)
|
|
63
|
+
@miss_stats = Hash.new(0)
|
|
64
|
+
@min_hits_for_cache = 2
|
|
65
|
+
|
|
66
|
+
# Optional GPeg-style interval caching
|
|
67
|
+
@use_intervals = interval_cache
|
|
68
|
+
if @use_intervals
|
|
69
|
+
require 'parsanol/interval_tree'
|
|
70
|
+
require 'parsanol/edit_tracker'
|
|
71
|
+
@interval_trees = Hash.new { |h, k| h[k] = Parsanol::IntervalTree.new }
|
|
72
|
+
@edits = Parsanol::EditTracker.new
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Cut operator support for aggressive eviction
|
|
76
|
+
@cut_pos = 0
|
|
77
|
+
|
|
78
|
+
# Determine adaptive cache threshold
|
|
79
|
+
threshold = adaptive_cache_threshold
|
|
80
|
+
if threshold.nil? && parser_class
|
|
81
|
+
name = parser_class.name&.split('::')&.last
|
|
82
|
+
threshold = PARSER_CACHE_LIMITS[name] || PARSER_CACHE_LIMITS[:default]
|
|
83
|
+
end
|
|
84
|
+
threshold ||= PARSER_CACHE_LIMITS[:default]
|
|
85
|
+
|
|
86
|
+
@adaptive_threshold = threshold
|
|
87
|
+
@input_len = nil
|
|
88
|
+
@caching_active = nil
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Attempts to parse using memoization. Returns cached result if available,
|
|
92
|
+
# otherwise executes the parser and caches the result.
|
|
93
|
+
#
|
|
94
|
+
# @param atom [Parsanol::Atoms::Base] parser to apply
|
|
95
|
+
# @param src [Parsanol::Source] input source
|
|
96
|
+
# @param must_consume_all [Boolean] require complete consumption
|
|
97
|
+
# @return [Array(Boolean, Object)] parse result tuple
|
|
98
|
+
#
|
|
99
|
+
def try_with_cache(atom, src, must_consume_all)
|
|
100
|
+
# Skip caching for atoms that don't benefit from it
|
|
101
|
+
return atom.try(src, self, must_consume_all) unless atom.cached?
|
|
102
|
+
|
|
103
|
+
# Determine if caching should be active (lazy initialization)
|
|
104
|
+
if @caching_active.nil?
|
|
105
|
+
total_len = src.bytepos + src.chars_left
|
|
106
|
+
@input_len = total_len
|
|
107
|
+
@caching_active = total_len >= @adaptive_threshold
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# For small inputs, skip caching overhead
|
|
111
|
+
return atom.try(src, self, must_consume_all) unless @caching_active
|
|
112
|
+
|
|
113
|
+
# Use interval-based caching if enabled
|
|
114
|
+
return try_with_interval(atom, src, must_consume_all) if @use_intervals
|
|
115
|
+
|
|
116
|
+
pos = src.bytepos
|
|
117
|
+
key = atom.object_id
|
|
118
|
+
|
|
119
|
+
# Periodic cache eviction to prevent unbounded growth
|
|
120
|
+
if pos > @furthest_pos
|
|
121
|
+
@furthest_pos = pos
|
|
122
|
+
@evict_counter += 1
|
|
123
|
+
|
|
124
|
+
if @evict_counter >= @evict_interval
|
|
125
|
+
@evict_counter = 0
|
|
126
|
+
cutoff = pos - @evict_threshold
|
|
127
|
+
@memo.delete_if { |p, _| p < cutoff }
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Check for cache hit
|
|
132
|
+
if @memo[pos].key?(key)
|
|
133
|
+
@hit_stats[key] += 1
|
|
134
|
+
outcome, delta = @memo[pos][key]
|
|
135
|
+
src.bytepos = pos + delta
|
|
136
|
+
return outcome
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Cache miss - execute and store
|
|
140
|
+
@miss_stats[key] += 1
|
|
141
|
+
outcome = atom.try(src, self, must_consume_all)
|
|
142
|
+
delta = src.bytepos - pos
|
|
143
|
+
|
|
144
|
+
# Only cache if beneficial (heuristic)
|
|
145
|
+
attempts = @hit_stats[key] + @miss_stats[key]
|
|
146
|
+
@memo[pos][key] = [outcome, delta] if attempts <= @min_hits_for_cache || @hit_stats[key].positive?
|
|
147
|
+
|
|
148
|
+
outcome
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# GPeg-style interval-based caching for incremental parsing.
|
|
152
|
+
#
|
|
153
|
+
# @param atom [Parsanol::Atoms::Base] parser to apply
|
|
154
|
+
# @param src [Parsanol::Source] input source
|
|
155
|
+
# @param must_consume_all [Boolean] require complete consumption
|
|
156
|
+
# @return [Array(Boolean, Object)] parse result tuple
|
|
157
|
+
#
|
|
158
|
+
def try_with_interval(atom, src, must_consume_all)
|
|
159
|
+
pos = src.bytepos
|
|
160
|
+
key = atom.object_id
|
|
161
|
+
|
|
162
|
+
tree = @interval_trees[key]
|
|
163
|
+
cached = tree.query_exact(pos, pos)
|
|
164
|
+
|
|
165
|
+
if cached
|
|
166
|
+
@hit_stats[key] += 1
|
|
167
|
+
outcome, delta = cached
|
|
168
|
+
src.bytepos = pos + delta
|
|
169
|
+
return outcome
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
@miss_stats[key] += 1
|
|
173
|
+
outcome = atom.try(src, self, must_consume_all)
|
|
174
|
+
delta = src.bytepos - pos
|
|
175
|
+
end_pos = pos + delta
|
|
176
|
+
|
|
177
|
+
attempts = @hit_stats[key] + @miss_stats[key]
|
|
178
|
+
tree.insert(pos, end_pos, [outcome, delta]) if attempts <= @min_hits_for_cache || @hit_stats[key].positive?
|
|
179
|
+
|
|
180
|
+
outcome
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Pre-allocated result constants
|
|
184
|
+
SUCCESS_RESULT = [true, nil].freeze
|
|
185
|
+
ERROR_RESULT = [false, nil].freeze
|
|
186
|
+
|
|
187
|
+
# Reports an error at a specific position.
|
|
188
|
+
#
|
|
189
|
+
# @return [Array(Boolean, Object)] error result tuple
|
|
190
|
+
#
|
|
191
|
+
def err_at(*args)
|
|
192
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
|
193
|
+
|
|
194
|
+
ERROR_RESULT
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Reports an error at the current position.
|
|
198
|
+
#
|
|
199
|
+
# @return [Array(Boolean, Object)] error result tuple
|
|
200
|
+
#
|
|
201
|
+
def err(*args)
|
|
202
|
+
return [false, @reporter.err(*args)] if @reporter
|
|
203
|
+
|
|
204
|
+
ERROR_RESULT
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Reports a successful parse.
|
|
208
|
+
#
|
|
209
|
+
# @return [Array(Boolean, Object)] success result tuple
|
|
210
|
+
#
|
|
211
|
+
def succ(*args)
|
|
212
|
+
return SUCCESS_RESULT unless @reporter
|
|
213
|
+
|
|
214
|
+
val = @reporter.succ(*args)
|
|
215
|
+
return SUCCESS_RESULT if val.nil?
|
|
216
|
+
|
|
217
|
+
[true, val]
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# @return [Parsanol::Scope] capture variable bindings
|
|
221
|
+
attr_reader :captures
|
|
222
|
+
|
|
223
|
+
# @return [Parsanol::Pools::ArrayPool] array object pool
|
|
224
|
+
attr_reader :array_pool
|
|
225
|
+
|
|
226
|
+
# @return [Parsanol::Pools::BufferPool] buffer object pool
|
|
227
|
+
attr_reader :buffer_pool
|
|
228
|
+
|
|
229
|
+
# Acquires an empty array from the pool.
|
|
230
|
+
#
|
|
231
|
+
# @return [Array] cleared array ready for use
|
|
232
|
+
#
|
|
233
|
+
def acquire_array
|
|
234
|
+
@array_pool.acquire
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Returns an array to the pool for reuse.
|
|
238
|
+
#
|
|
239
|
+
# @param arr [Array] array to release
|
|
240
|
+
# @return [Boolean] true if pooled, false if discarded
|
|
241
|
+
#
|
|
242
|
+
def release_array(arr)
|
|
243
|
+
@array_pool.release(arr)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Acquires a buffer with minimum capacity from the pool.
|
|
247
|
+
#
|
|
248
|
+
# @param size: [Integer] minimum required capacity
|
|
249
|
+
# @return [Parsanol::Buffer] buffer with capacity >= size
|
|
250
|
+
#
|
|
251
|
+
def acquire_buffer(size:)
|
|
252
|
+
@buffer_pool.acquire(size: size)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Returns a buffer to the pool for reuse.
|
|
256
|
+
#
|
|
257
|
+
# @param buf [Parsanol::Buffer] buffer to release
|
|
258
|
+
# @return [Boolean] true if pooled, false if discarded
|
|
259
|
+
#
|
|
260
|
+
def release_buffer(buf)
|
|
261
|
+
@buffer_pool.release(buf)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Creates a new capture scope for the duration of the block.
|
|
265
|
+
#
|
|
266
|
+
# @yield block executed in new scope
|
|
267
|
+
#
|
|
268
|
+
def scope
|
|
269
|
+
captures.push
|
|
270
|
+
yield
|
|
271
|
+
ensure
|
|
272
|
+
captures.pop
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Checks if interval-based caching is active.
|
|
276
|
+
#
|
|
277
|
+
# @return [Boolean] true if interval caching enabled
|
|
278
|
+
#
|
|
279
|
+
def use_tree_memoization?
|
|
280
|
+
@use_intervals
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Queries interval cache for a cached result.
|
|
284
|
+
#
|
|
285
|
+
# @param key [Integer] cache key (atom object_id)
|
|
286
|
+
# @param start_pos [Integer] starting position
|
|
287
|
+
# @return [Array, nil] cached [values, end_pos] or nil
|
|
288
|
+
#
|
|
289
|
+
def query_tree_memo(key, start_pos)
|
|
290
|
+
return nil unless @use_intervals
|
|
291
|
+
|
|
292
|
+
tree = @interval_trees[key]
|
|
293
|
+
matches = tree.query_overlapping(start_pos, start_pos + 1)
|
|
294
|
+
found = matches.find { |interval, _| interval[0] == start_pos }
|
|
295
|
+
found ? found[1] : nil
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Stores a result in the interval cache.
|
|
299
|
+
#
|
|
300
|
+
# @param key [Integer] cache key
|
|
301
|
+
# @param start_pos [Integer] start position
|
|
302
|
+
# @param values [Array] parsed values
|
|
303
|
+
# @param end_pos [Integer] end position
|
|
304
|
+
#
|
|
305
|
+
def store_tree_memo(key, start_pos, values, end_pos)
|
|
306
|
+
return unless @use_intervals
|
|
307
|
+
|
|
308
|
+
@interval_trees[key].insert(start_pos, end_pos, [values, end_pos])
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Marks a cut position for aggressive cache eviction.
|
|
312
|
+
# Called when a cut operator succeeds.
|
|
313
|
+
#
|
|
314
|
+
# @param position [Integer] cut position
|
|
315
|
+
#
|
|
316
|
+
def cut!(position)
|
|
317
|
+
@cut_pos = position
|
|
318
|
+
@memo.delete_if { |pos, _| pos < position }
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
private
|
|
322
|
+
|
|
323
|
+
# Lookup cached result (uses object_id for speed)
|
|
324
|
+
def lookup(atom, pos)
|
|
325
|
+
@memo[pos][atom.object_id]
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Store result in cache
|
|
329
|
+
def set(atom, pos, val)
|
|
330
|
+
@memo[pos][atom.object_id] = val
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Experimental: Position-based cache eviction for Context
|
|
4
|
+
# Based on PEG theory: in linear parsing, positions behind current position
|
|
5
|
+
# will never be revisited, so we can evict them to reduce memory
|
|
6
|
+
|
|
7
|
+
module Parsanol
|
|
8
|
+
module Atoms
|
|
9
|
+
class Context
|
|
10
|
+
# Add position tracking for cache eviction
|
|
11
|
+
attr_reader :current_position
|
|
12
|
+
|
|
13
|
+
def try_with_cache(obj, source, consume_all)
|
|
14
|
+
return obj.try(source, self, consume_all) unless obj.cached?
|
|
15
|
+
|
|
16
|
+
key = source.pos
|
|
17
|
+
@current_position = key
|
|
18
|
+
atom_cache = @cache[obj]
|
|
19
|
+
|
|
20
|
+
# Try to fetch from cache
|
|
21
|
+
return atom_cache.fetch(key) if atom_cache.key?(key)
|
|
22
|
+
|
|
23
|
+
# Cache miss - compute result
|
|
24
|
+
result = obj.try(source, self, consume_all)
|
|
25
|
+
atom_cache[key] = result
|
|
26
|
+
|
|
27
|
+
# Evict old positions if cache is getting large
|
|
28
|
+
# Keep only positions within a window of current position
|
|
29
|
+
if atom_cache.size > 100
|
|
30
|
+
min_pos = key - 50 # Keep 50 positions behind
|
|
31
|
+
atom_cache.delete_if { |pos, _| pos < min_pos }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
result
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module Atoms
|
|
5
|
+
# Base class for creating custom parser atoms.
|
|
6
|
+
#
|
|
7
|
+
# Custom atoms allow extending Parsanol with domain-specific matching logic
|
|
8
|
+
# that cannot be expressed with the built-in combinators.
|
|
9
|
+
#
|
|
10
|
+
# @example Custom atom for matching indentation-sensitive content
|
|
11
|
+
# class IndentAtom < Parsanol::Atoms::Custom
|
|
12
|
+
# def initialize(expected_indent)
|
|
13
|
+
# @expected_indent = expected_indent
|
|
14
|
+
# super()
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# # Required: Implement try_match
|
|
18
|
+
# def try_match(source, context, consume_all)
|
|
19
|
+
# pos = source.pos
|
|
20
|
+
# indent = count_indent(source)
|
|
21
|
+
#
|
|
22
|
+
# if indent == @expected_indent
|
|
23
|
+
# content = read_until_newline(source)
|
|
24
|
+
# [true, content]
|
|
25
|
+
# else
|
|
26
|
+
# source.pos = pos # Restore position on failure
|
|
27
|
+
# [false, nil]
|
|
28
|
+
# end
|
|
29
|
+
# end
|
|
30
|
+
#
|
|
31
|
+
# private
|
|
32
|
+
#
|
|
33
|
+
# def count_indent(source)
|
|
34
|
+
# # ... implementation ...
|
|
35
|
+
# end
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# # Usage in parser
|
|
39
|
+
# class MyParser < Parsanol::Parser
|
|
40
|
+
# rule(:indented_line) { IndentAtom.new(2) }
|
|
41
|
+
# end
|
|
42
|
+
#
|
|
43
|
+
class Custom < Base
|
|
44
|
+
# Required: Implement this method to define matching behavior
|
|
45
|
+
#
|
|
46
|
+
# @param source [Parsanol::Source] The input source with position tracking
|
|
47
|
+
# @param context [Parsanol::Atoms::Context] Parse context for memoization
|
|
48
|
+
# @param consume_all [Boolean] If true, must consume entire input
|
|
49
|
+
# @return [Array<Boolean, Object>] Tuple of [success, result]
|
|
50
|
+
# - success: true if match succeeded, false otherwise
|
|
51
|
+
# - result: matched value on success, nil on failure
|
|
52
|
+
#
|
|
53
|
+
# @note You MUST restore source.bytepos on failure for proper backtracking
|
|
54
|
+
#
|
|
55
|
+
def try_match(source, context, consume_all)
|
|
56
|
+
raise NotImplementedError,
|
|
57
|
+
'Custom atoms must implement #try_match(source, context, consume_all)'
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Override of Base#try that delegates to try_match
|
|
61
|
+
# Handles error reporting and result wrapping
|
|
62
|
+
#
|
|
63
|
+
# @api private
|
|
64
|
+
def try(source, context, consume_all)
|
|
65
|
+
success, result = try_match(source, context, consume_all)
|
|
66
|
+
|
|
67
|
+
if success
|
|
68
|
+
[true, result]
|
|
69
|
+
else
|
|
70
|
+
# Generate error cause for reporting
|
|
71
|
+
context.err(
|
|
72
|
+
self,
|
|
73
|
+
source,
|
|
74
|
+
"Failed to match custom atom: #{self.class.name}"
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Optional: Override to provide first set for optimization
|
|
80
|
+
# Returns the set of characters/strings this atom can match at start
|
|
81
|
+
#
|
|
82
|
+
# @return [Set<String>, nil] First set, or nil if not determinable
|
|
83
|
+
def first_set
|
|
84
|
+
nil # Unknown by default
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Optional: Override to enable caching for this atom
|
|
88
|
+
# Return false for context-dependent matching (e.g., indentation)
|
|
89
|
+
#
|
|
90
|
+
# @return [Boolean] true if atom can be cached
|
|
91
|
+
def cacheable?
|
|
92
|
+
true
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Optional: Override to provide custom serialization for native parser
|
|
96
|
+
# Return nil if atom cannot be serialized (must use pure Ruby mode)
|
|
97
|
+
#
|
|
98
|
+
# @return [Hash, nil] JSON-serializable representation
|
|
99
|
+
def to_native_format
|
|
100
|
+
nil # Not serializable by default
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Override to_s_inner for debug printing
|
|
104
|
+
# @api private
|
|
105
|
+
def to_s_inner(_prec = nil)
|
|
106
|
+
"custom(#{self.class.name})"
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Cut operator for PEG grammars
|
|
4
|
+
#
|
|
5
|
+
# A cut operator (↑) instructs the parser to discard backtrack information
|
|
6
|
+
# at a specific point. This enables more aggressive cache eviction and can
|
|
7
|
+
# reduce space complexity from O(n) to O(1).
|
|
8
|
+
#
|
|
9
|
+
# Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
|
|
10
|
+
# Grammars in Mostly Constant Space"
|
|
11
|
+
#
|
|
12
|
+
# Example:
|
|
13
|
+
#
|
|
14
|
+
# rule(:statement) {
|
|
15
|
+
# str('if').cut >> condition >> then_clause |
|
|
16
|
+
# str('while').cut >> condition >> body |
|
|
17
|
+
# str('print').cut >> expression
|
|
18
|
+
# }
|
|
19
|
+
#
|
|
20
|
+
# After 'if' succeeds, the cut discards backtrack info for 'while' and 'print'.
|
|
21
|
+
# This means if the parse fails later in the 'if' branch, we won't try the
|
|
22
|
+
# other alternatives.
|
|
23
|
+
#
|
|
24
|
+
module Parsanol
|
|
25
|
+
module Atoms
|
|
26
|
+
class Cut < Parsanol::Atoms::Base
|
|
27
|
+
attr_reader :parslet
|
|
28
|
+
|
|
29
|
+
def initialize(parslet)
|
|
30
|
+
super()
|
|
31
|
+
@parslet = parslet
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def try(source, context, consume_all)
|
|
35
|
+
# First, try to match the parslet
|
|
36
|
+
success, value = parslet.apply(source, context, consume_all)
|
|
37
|
+
|
|
38
|
+
return [success, value] unless success
|
|
39
|
+
|
|
40
|
+
# On success, signal to context that a cut has occurred
|
|
41
|
+
# This allows the context to:
|
|
42
|
+
# 1. Mark the current position as a cut point
|
|
43
|
+
# 2. Empty the backtrack stack (we won't backtrack past here)
|
|
44
|
+
# 3. Aggressively evict cache entries before this position
|
|
45
|
+
context.cut!(source.bytepos) if context.respond_to?(:cut!)
|
|
46
|
+
|
|
47
|
+
[success, value]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Cut doesn't need caching - it's a thin wrapper
|
|
51
|
+
def cached?
|
|
52
|
+
false
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def to_s_inner(prec)
|
|
56
|
+
"#{parslet.to_s(prec)}↑"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# FIRST set of cut is same as wrapped parslet
|
|
60
|
+
# Cut doesn't change matching behavior, only affects backtracking
|
|
61
|
+
def compute_first_set
|
|
62
|
+
parslet.first_set
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parser composition DSL - chainable methods for building parser atoms.
|
|
4
|
+
# All atoms can use these methods to combine into larger parsers.
|
|
5
|
+
#
|
|
6
|
+
# Inspired by Parslet (MIT License).
|
|
7
|
+
|
|
8
|
+
module Parsanol
|
|
9
|
+
module Atoms
|
|
10
|
+
module DSL
|
|
11
|
+
# Repeats the current atom between min and max times.
|
|
12
|
+
# If max is nil, there is no upper limit.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# str('a').repeat # match zero or more 'a's
|
|
16
|
+
# str('a').repeat(1, 3) # match 1-3 `a`s
|
|
17
|
+
def repeat(min = 0, max = nil)
|
|
18
|
+
Parsanol::Atoms::Repetition.new(self, min, max)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Matches atom optionally (0 or 1 times).
|
|
22
|
+
# Result is nil if not present, otherwise the matched value.
|
|
23
|
+
#
|
|
24
|
+
# @example
|
|
25
|
+
# str('foo').maybe # => nil or 'foo'
|
|
26
|
+
def maybe
|
|
27
|
+
Parsanol::Atoms::Repetition.new(self, 0, 1, :maybe)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Ignores the result of a match - returns nil always.
|
|
31
|
+
#
|
|
32
|
+
# @example
|
|
33
|
+
# str('foo').ignore # => nil (not 'foo')
|
|
34
|
+
def ignore
|
|
35
|
+
Parsanol::Atoms::Ignored.new(self)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Chains two atoms in sequence.
|
|
39
|
+
#
|
|
40
|
+
# @example
|
|
41
|
+
# str('a') >> str('b')
|
|
42
|
+
def >>(other)
|
|
43
|
+
Parsanol::Atoms::Sequence.new(self, other)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Chains two atoms as alternatives (ordered choice).
|
|
47
|
+
#
|
|
48
|
+
# @example
|
|
49
|
+
# str('a') | str('b') # matches 'a' or `b`
|
|
50
|
+
def |(other)
|
|
51
|
+
Parsanol::Atoms::Alternative.new(self, other)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Negative lookahead - succeeds only if atom is absent.
|
|
55
|
+
#
|
|
56
|
+
# @example
|
|
57
|
+
# str('a').absent?
|
|
58
|
+
def absent?
|
|
59
|
+
Parsanol::Atoms::Lookahead.new(self, false)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Positive lookahead - succeeds only if atom is present.
|
|
63
|
+
#
|
|
64
|
+
# @example
|
|
65
|
+
# str('a').present?
|
|
66
|
+
def present?
|
|
67
|
+
Parsanol::Atoms::Lookahead.new(self, true)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Labels a match for tree output.
|
|
71
|
+
#
|
|
72
|
+
# @example
|
|
73
|
+
# str('a').as(:b) # => {:b => 'a'}
|
|
74
|
+
def as(name)
|
|
75
|
+
Parsanol::Atoms::Named.new(self, name)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Captures match result for later reference.
|
|
79
|
+
#
|
|
80
|
+
# @example
|
|
81
|
+
# str('a').capture(:first) >> dynamic { str(ctx.captures[:first]) }
|
|
82
|
+
def capture(name)
|
|
83
|
+
Parsanol::Atoms::Capture.new(self, name)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Commit point - prevents backtracking after successful match.
|
|
87
|
+
# Use with caution: cuts prevent backtracking to alternatives.
|
|
88
|
+
#
|
|
89
|
+
# @example
|
|
90
|
+
# str('if').cut >> condition >> body |
|
|
91
|
+
def cut
|
|
92
|
+
Parsanol::Atoms::Cut.new(self)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Evaluates a block at parse time. The result from the block must be a parser
|
|
4
|
+
# (something which implements #apply). In the first case, the parser will then
|
|
5
|
+
# be applied to the input, creating the result.
|
|
6
|
+
#
|
|
7
|
+
# Dynamic parses are never cached.
|
|
8
|
+
#
|
|
9
|
+
# Example:
|
|
10
|
+
# dynamic { rand < 0.5 ? str('a') : str('b') }
|
|
11
|
+
#
|
|
12
|
+
module Parsanol
|
|
13
|
+
module Atoms
|
|
14
|
+
class Dynamic < Parsanol::Atoms::Base
|
|
15
|
+
attr_reader :block
|
|
16
|
+
|
|
17
|
+
def initialize(block)
|
|
18
|
+
@block = block
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def cached?
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def try(source, context, consume_all)
|
|
26
|
+
# Phase 55: Cache @block ivar to reduce lookup overhead
|
|
27
|
+
block = @block
|
|
28
|
+
result = block.call(source, context)
|
|
29
|
+
|
|
30
|
+
# Result is a parslet atom.
|
|
31
|
+
result.apply(source, context, consume_all)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_s_inner(_prec)
|
|
35
|
+
'dynamic { ... }'
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|