parsanol 1.0.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +12 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +487 -0
  5. data/Rakefile +135 -0
  6. data/lib/parsanol/3.2/parsanol_native.so +0 -0
  7. data/lib/parsanol/3.3/parsanol_native.so +0 -0
  8. data/lib/parsanol/3.4/parsanol_native.so +0 -0
  9. data/lib/parsanol/4.0/parsanol_native.so +0 -0
  10. data/lib/parsanol/ast_visitor.rb +122 -0
  11. data/lib/parsanol/atoms/alternative.rb +122 -0
  12. data/lib/parsanol/atoms/base.rb +202 -0
  13. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  14. data/lib/parsanol/atoms/capture.rb +38 -0
  15. data/lib/parsanol/atoms/context.rb +334 -0
  16. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  17. data/lib/parsanol/atoms/custom.rb +110 -0
  18. data/lib/parsanol/atoms/cut.rb +66 -0
  19. data/lib/parsanol/atoms/dsl.rb +96 -0
  20. data/lib/parsanol/atoms/dynamic.rb +39 -0
  21. data/lib/parsanol/atoms/entity.rb +75 -0
  22. data/lib/parsanol/atoms/ignored.rb +37 -0
  23. data/lib/parsanol/atoms/infix.rb +162 -0
  24. data/lib/parsanol/atoms/lookahead.rb +82 -0
  25. data/lib/parsanol/atoms/named.rb +74 -0
  26. data/lib/parsanol/atoms/re.rb +83 -0
  27. data/lib/parsanol/atoms/repetition.rb +259 -0
  28. data/lib/parsanol/atoms/scope.rb +35 -0
  29. data/lib/parsanol/atoms/sequence.rb +194 -0
  30. data/lib/parsanol/atoms/str.rb +103 -0
  31. data/lib/parsanol/atoms/visitor.rb +91 -0
  32. data/lib/parsanol/atoms.rb +46 -0
  33. data/lib/parsanol/buffer.rb +133 -0
  34. data/lib/parsanol/builder_callbacks.rb +353 -0
  35. data/lib/parsanol/cause.rb +122 -0
  36. data/lib/parsanol/context.rb +39 -0
  37. data/lib/parsanol/convenience.rb +36 -0
  38. data/lib/parsanol/edit_tracker.rb +111 -0
  39. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  40. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  41. data/lib/parsanol/error_reporter/tree.rb +63 -0
  42. data/lib/parsanol/error_reporter.rb +100 -0
  43. data/lib/parsanol/expression/treetop.rb +154 -0
  44. data/lib/parsanol/expression.rb +106 -0
  45. data/lib/parsanol/fast_mode.rb +149 -0
  46. data/lib/parsanol/first_set.rb +79 -0
  47. data/lib/parsanol/grammar_builder.rb +177 -0
  48. data/lib/parsanol/incremental_parser.rb +177 -0
  49. data/lib/parsanol/interval_tree.rb +217 -0
  50. data/lib/parsanol/lazy_result.rb +179 -0
  51. data/lib/parsanol/lexer.rb +144 -0
  52. data/lib/parsanol/mermaid.rb +139 -0
  53. data/lib/parsanol/native/parser.rb +612 -0
  54. data/lib/parsanol/native/serializer.rb +248 -0
  55. data/lib/parsanol/native/transformer.rb +435 -0
  56. data/lib/parsanol/native/types.rb +42 -0
  57. data/lib/parsanol/native.rb +217 -0
  58. data/lib/parsanol/optimizer.rb +85 -0
  59. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  60. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  61. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  62. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  63. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  64. data/lib/parsanol/options/ruby_transform.rb +107 -0
  65. data/lib/parsanol/options/serialized.rb +94 -0
  66. data/lib/parsanol/options/zero_copy.rb +128 -0
  67. data/lib/parsanol/options.rb +20 -0
  68. data/lib/parsanol/parallel.rb +133 -0
  69. data/lib/parsanol/parser.rb +182 -0
  70. data/lib/parsanol/parslet.rb +151 -0
  71. data/lib/parsanol/pattern/binding.rb +91 -0
  72. data/lib/parsanol/pattern.rb +159 -0
  73. data/lib/parsanol/pool.rb +219 -0
  74. data/lib/parsanol/pools/array_pool.rb +75 -0
  75. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  76. data/lib/parsanol/pools/position_pool.rb +92 -0
  77. data/lib/parsanol/pools/slice_pool.rb +64 -0
  78. data/lib/parsanol/position.rb +94 -0
  79. data/lib/parsanol/resettable.rb +29 -0
  80. data/lib/parsanol/result.rb +46 -0
  81. data/lib/parsanol/result_builder.rb +208 -0
  82. data/lib/parsanol/result_stream.rb +261 -0
  83. data/lib/parsanol/rig/rspec.rb +71 -0
  84. data/lib/parsanol/rope.rb +81 -0
  85. data/lib/parsanol/scope.rb +104 -0
  86. data/lib/parsanol/slice.rb +146 -0
  87. data/lib/parsanol/source/line_cache.rb +109 -0
  88. data/lib/parsanol/source.rb +180 -0
  89. data/lib/parsanol/source_location.rb +167 -0
  90. data/lib/parsanol/streaming_parser.rb +124 -0
  91. data/lib/parsanol/string_view.rb +195 -0
  92. data/lib/parsanol/transform.rb +226 -0
  93. data/lib/parsanol/version.rb +5 -0
  94. data/lib/parsanol/wasm/README.md +80 -0
  95. data/lib/parsanol/wasm/package.json +51 -0
  96. data/lib/parsanol/wasm/parsanol.js +252 -0
  97. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  98. data/lib/parsanol/wasm_parser.rb +240 -0
  99. data/lib/parsanol.rb +280 -0
  100. data/parsanol-ruby.gemspec +67 -0
  101. metadata +280 -0
@@ -0,0 +1,334 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module Atoms
5
+ # Parsing context that coordinates memoization caching, error reporting,
6
+ # and resource pooling. Created fresh for each parse operation.
7
+ #
8
+ # Key responsibilities:
9
+ # - Packrat-style memoization (caching parse results by position+atom)
10
+ # - Pluggable error reporting through reporter interface
11
+ # - Object pooling for arrays and buffers to reduce GC pressure
12
+ # - Adaptive caching based on input size
13
+ #
14
+ # @example Basic usage
15
+ # ctx = Context.new(reporter)
16
+ # result = ctx.try_with_cache(parser, source, true)
17
+ #
18
+ # Inspired by packrat parsing memoization and incremental parsing techniques.
19
+ #
20
+ class Context
21
+ # Per-parser cache size thresholds based on profiling different grammar types
22
+ # Different grammars benefit from caching at different input sizes
23
+ PARSER_CACHE_LIMITS = {
24
+ 'JsonParser' => 10_000, # JSON needs large inputs to benefit
25
+ 'ErbParser' => 800, # ERB benefits earlier
26
+ 'CalcParser' => 2000, # Calculator has low repetition
27
+ 'SentenceParser' => 5000, # Linear grammar, minimal benefit
28
+ :default => 1000
29
+ }.freeze
30
+
31
+ # Creates a new parsing context.
32
+ #
33
+ # @param error_reporter [#err, #err_at] error reporter instance
34
+ # @param interval_cache: [Boolean] enable GPeg-style interval caching
35
+ # @param adaptive_cache_threshold: [Integer, nil] minimum input size for caching
36
+ # @param parser_class: [Class, nil] parser class for threshold selection
37
+ #
38
+ def initialize(error_reporter = Parsanol::ErrorReporter::Tree.new,
39
+ interval_cache: false,
40
+ adaptive_cache_threshold: nil,
41
+ parser_class: nil)
42
+ # Core memoization cache: position -> { atom_id -> [result, advance] }
43
+ @memo = Hash.new { |h, k| h[k] = {} }
44
+
45
+ # Error reporting delegate
46
+ @reporter = error_reporter
47
+
48
+ # Capture scope for variable bindings
49
+ @captures = Parsanol::Scope.new
50
+
51
+ # Cache eviction state
52
+ @furthest_pos = 0
53
+ @evict_threshold = 200
54
+ @evict_counter = 0
55
+ @evict_interval = 100
56
+
57
+ # Object pools for reducing allocations
58
+ @array_pool = Parsanol::Pools::ArrayPool.new(size: 10_000)
59
+ @buffer_pool = Parsanol::Pools::BufferPool.new(pool_size: 100)
60
+
61
+ # Selective memoization tracking
62
+ @hit_stats = Hash.new(0)
63
+ @miss_stats = Hash.new(0)
64
+ @min_hits_for_cache = 2
65
+
66
+ # Optional GPeg-style interval caching
67
+ @use_intervals = interval_cache
68
+ if @use_intervals
69
+ require 'parsanol/interval_tree'
70
+ require 'parsanol/edit_tracker'
71
+ @interval_trees = Hash.new { |h, k| h[k] = Parsanol::IntervalTree.new }
72
+ @edits = Parsanol::EditTracker.new
73
+ end
74
+
75
+ # Cut operator support for aggressive eviction
76
+ @cut_pos = 0
77
+
78
+ # Determine adaptive cache threshold
79
+ threshold = adaptive_cache_threshold
80
+ if threshold.nil? && parser_class
81
+ name = parser_class.name&.split('::')&.last
82
+ threshold = PARSER_CACHE_LIMITS[name] || PARSER_CACHE_LIMITS[:default]
83
+ end
84
+ threshold ||= PARSER_CACHE_LIMITS[:default]
85
+
86
+ @adaptive_threshold = threshold
87
+ @input_len = nil
88
+ @caching_active = nil
89
+ end
90
+
91
+ # Attempts to parse using memoization. Returns cached result if available,
92
+ # otherwise executes the parser and caches the result.
93
+ #
94
+ # @param atom [Parsanol::Atoms::Base] parser to apply
95
+ # @param src [Parsanol::Source] input source
96
+ # @param must_consume_all [Boolean] require complete consumption
97
+ # @return [Array(Boolean, Object)] parse result tuple
98
+ #
99
+ def try_with_cache(atom, src, must_consume_all)
100
+ # Skip caching for atoms that don't benefit from it
101
+ return atom.try(src, self, must_consume_all) unless atom.cached?
102
+
103
+ # Determine if caching should be active (lazy initialization)
104
+ if @caching_active.nil?
105
+ total_len = src.bytepos + src.chars_left
106
+ @input_len = total_len
107
+ @caching_active = total_len >= @adaptive_threshold
108
+ end
109
+
110
+ # For small inputs, skip caching overhead
111
+ return atom.try(src, self, must_consume_all) unless @caching_active
112
+
113
+ # Use interval-based caching if enabled
114
+ return try_with_interval(atom, src, must_consume_all) if @use_intervals
115
+
116
+ pos = src.bytepos
117
+ key = atom.object_id
118
+
119
+ # Periodic cache eviction to prevent unbounded growth
120
+ if pos > @furthest_pos
121
+ @furthest_pos = pos
122
+ @evict_counter += 1
123
+
124
+ if @evict_counter >= @evict_interval
125
+ @evict_counter = 0
126
+ cutoff = pos - @evict_threshold
127
+ @memo.delete_if { |p, _| p < cutoff }
128
+ end
129
+ end
130
+
131
+ # Check for cache hit
132
+ if @memo[pos].key?(key)
133
+ @hit_stats[key] += 1
134
+ outcome, delta = @memo[pos][key]
135
+ src.bytepos = pos + delta
136
+ return outcome
137
+ end
138
+
139
+ # Cache miss - execute and store
140
+ @miss_stats[key] += 1
141
+ outcome = atom.try(src, self, must_consume_all)
142
+ delta = src.bytepos - pos
143
+
144
+ # Only cache if beneficial (heuristic)
145
+ attempts = @hit_stats[key] + @miss_stats[key]
146
+ @memo[pos][key] = [outcome, delta] if attempts <= @min_hits_for_cache || @hit_stats[key].positive?
147
+
148
+ outcome
149
+ end
150
+
151
+ # GPeg-style interval-based caching for incremental parsing.
152
+ #
153
+ # @param atom [Parsanol::Atoms::Base] parser to apply
154
+ # @param src [Parsanol::Source] input source
155
+ # @param must_consume_all [Boolean] require complete consumption
156
+ # @return [Array(Boolean, Object)] parse result tuple
157
+ #
158
+ def try_with_interval(atom, src, must_consume_all)
159
+ pos = src.bytepos
160
+ key = atom.object_id
161
+
162
+ tree = @interval_trees[key]
163
+ cached = tree.query_exact(pos, pos)
164
+
165
+ if cached
166
+ @hit_stats[key] += 1
167
+ outcome, delta = cached
168
+ src.bytepos = pos + delta
169
+ return outcome
170
+ end
171
+
172
+ @miss_stats[key] += 1
173
+ outcome = atom.try(src, self, must_consume_all)
174
+ delta = src.bytepos - pos
175
+ end_pos = pos + delta
176
+
177
+ attempts = @hit_stats[key] + @miss_stats[key]
178
+ tree.insert(pos, end_pos, [outcome, delta]) if attempts <= @min_hits_for_cache || @hit_stats[key].positive?
179
+
180
+ outcome
181
+ end
182
+
183
+ # Pre-allocated result constants
184
+ SUCCESS_RESULT = [true, nil].freeze
185
+ ERROR_RESULT = [false, nil].freeze
186
+
187
+ # Reports an error at a specific position.
188
+ #
189
+ # @return [Array(Boolean, Object)] error result tuple
190
+ #
191
+ def err_at(*args)
192
+ return [false, @reporter.err_at(*args)] if @reporter
193
+
194
+ ERROR_RESULT
195
+ end
196
+
197
+ # Reports an error at the current position.
198
+ #
199
+ # @return [Array(Boolean, Object)] error result tuple
200
+ #
201
+ def err(*args)
202
+ return [false, @reporter.err(*args)] if @reporter
203
+
204
+ ERROR_RESULT
205
+ end
206
+
207
+ # Reports a successful parse.
208
+ #
209
+ # @return [Array(Boolean, Object)] success result tuple
210
+ #
211
+ def succ(*args)
212
+ return SUCCESS_RESULT unless @reporter
213
+
214
+ val = @reporter.succ(*args)
215
+ return SUCCESS_RESULT if val.nil?
216
+
217
+ [true, val]
218
+ end
219
+
220
+ # @return [Parsanol::Scope] capture variable bindings
221
+ attr_reader :captures
222
+
223
+ # @return [Parsanol::Pools::ArrayPool] array object pool
224
+ attr_reader :array_pool
225
+
226
+ # @return [Parsanol::Pools::BufferPool] buffer object pool
227
+ attr_reader :buffer_pool
228
+
229
+ # Acquires an empty array from the pool.
230
+ #
231
+ # @return [Array] cleared array ready for use
232
+ #
233
+ def acquire_array
234
+ @array_pool.acquire
235
+ end
236
+
237
+ # Returns an array to the pool for reuse.
238
+ #
239
+ # @param arr [Array] array to release
240
+ # @return [Boolean] true if pooled, false if discarded
241
+ #
242
+ def release_array(arr)
243
+ @array_pool.release(arr)
244
+ end
245
+
246
+ # Acquires a buffer with minimum capacity from the pool.
247
+ #
248
+ # @param size: [Integer] minimum required capacity
249
+ # @return [Parsanol::Buffer] buffer with capacity >= size
250
+ #
251
+ def acquire_buffer(size:)
252
+ @buffer_pool.acquire(size: size)
253
+ end
254
+
255
+ # Returns a buffer to the pool for reuse.
256
+ #
257
+ # @param buf [Parsanol::Buffer] buffer to release
258
+ # @return [Boolean] true if pooled, false if discarded
259
+ #
260
+ def release_buffer(buf)
261
+ @buffer_pool.release(buf)
262
+ end
263
+
264
+ # Creates a new capture scope for the duration of the block.
265
+ #
266
+ # @yield block executed in new scope
267
+ #
268
+ def scope
269
+ captures.push
270
+ yield
271
+ ensure
272
+ captures.pop
273
+ end
274
+
275
+ # Checks if interval-based caching is active.
276
+ #
277
+ # @return [Boolean] true if interval caching enabled
278
+ #
279
+ def use_tree_memoization?
280
+ @use_intervals
281
+ end
282
+
283
+ # Queries interval cache for a cached result.
284
+ #
285
+ # @param key [Integer] cache key (atom object_id)
286
+ # @param start_pos [Integer] starting position
287
+ # @return [Array, nil] cached [values, end_pos] or nil
288
+ #
289
+ def query_tree_memo(key, start_pos)
290
+ return nil unless @use_intervals
291
+
292
+ tree = @interval_trees[key]
293
+ matches = tree.query_overlapping(start_pos, start_pos + 1)
294
+ found = matches.find { |interval, _| interval[0] == start_pos }
295
+ found ? found[1] : nil
296
+ end
297
+
298
+ # Stores a result in the interval cache.
299
+ #
300
+ # @param key [Integer] cache key
301
+ # @param start_pos [Integer] start position
302
+ # @param values [Array] parsed values
303
+ # @param end_pos [Integer] end position
304
+ #
305
+ def store_tree_memo(key, start_pos, values, end_pos)
306
+ return unless @use_intervals
307
+
308
+ @interval_trees[key].insert(start_pos, end_pos, [values, end_pos])
309
+ end
310
+
311
+ # Marks a cut position for aggressive cache eviction.
312
+ # Called when a cut operator succeeds.
313
+ #
314
+ # @param position [Integer] cut position
315
+ #
316
+ def cut!(position)
317
+ @cut_pos = position
318
+ @memo.delete_if { |pos, _| pos < position }
319
+ end
320
+
321
+ private
322
+
323
+ # Lookup cached result (uses object_id for speed)
324
+ def lookup(atom, pos)
325
+ @memo[pos][atom.object_id]
326
+ end
327
+
328
+ # Store result in cache
329
+ def set(atom, pos, val)
330
+ @memo[pos][atom.object_id] = val
331
+ end
332
+ end
333
+ end
334
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Experimental: Position-based cache eviction for Context
4
+ # Based on PEG theory: in linear parsing, positions behind current position
5
+ # will never be revisited, so we can evict them to reduce memory
6
+
7
+ module Parsanol
8
+ module Atoms
9
+ class Context
10
+ # Add position tracking for cache eviction
11
+ attr_reader :current_position
12
+
13
+ def try_with_cache(obj, source, consume_all)
14
+ return obj.try(source, self, consume_all) unless obj.cached?
15
+
16
+ key = source.pos
17
+ @current_position = key
18
+ atom_cache = @cache[obj]
19
+
20
+ # Try to fetch from cache
21
+ return atom_cache.fetch(key) if atom_cache.key?(key)
22
+
23
+ # Cache miss - compute result
24
+ result = obj.try(source, self, consume_all)
25
+ atom_cache[key] = result
26
+
27
+ # Evict old positions if cache is getting large
28
+ # Keep only positions within a window of current position
29
+ if atom_cache.size > 100
30
+ min_pos = key - 50 # Keep 50 positions behind
31
+ atom_cache.delete_if { |pos, _| pos < min_pos }
32
+ end
33
+
34
+ result
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module Atoms
5
+ # Base class for creating custom parser atoms.
6
+ #
7
+ # Custom atoms allow extending Parsanol with domain-specific matching logic
8
+ # that cannot be expressed with the built-in combinators.
9
+ #
10
+ # @example Custom atom for matching indentation-sensitive content
11
+ # class IndentAtom < Parsanol::Atoms::Custom
12
+ # def initialize(expected_indent)
13
+ # @expected_indent = expected_indent
14
+ # super()
15
+ # end
16
+ #
17
+ # # Required: Implement try_match
18
+ # def try_match(source, context, consume_all)
19
+ # pos = source.pos
20
+ # indent = count_indent(source)
21
+ #
22
+ # if indent == @expected_indent
23
+ # content = read_until_newline(source)
24
+ # [true, content]
25
+ # else
26
+ # source.pos = pos # Restore position on failure
27
+ # [false, nil]
28
+ # end
29
+ # end
30
+ #
31
+ # private
32
+ #
33
+ # def count_indent(source)
34
+ # # ... implementation ...
35
+ # end
36
+ # end
37
+ #
38
+ # # Usage in parser
39
+ # class MyParser < Parsanol::Parser
40
+ # rule(:indented_line) { IndentAtom.new(2) }
41
+ # end
42
+ #
43
+ class Custom < Base
44
+ # Required: Implement this method to define matching behavior
45
+ #
46
+ # @param source [Parsanol::Source] The input source with position tracking
47
+ # @param context [Parsanol::Atoms::Context] Parse context for memoization
48
+ # @param consume_all [Boolean] If true, must consume entire input
49
+ # @return [Array<Boolean, Object>] Tuple of [success, result]
50
+ # - success: true if match succeeded, false otherwise
51
+ # - result: matched value on success, nil on failure
52
+ #
53
+ # @note You MUST restore source.bytepos on failure for proper backtracking
54
+ #
55
+ def try_match(source, context, consume_all)
56
+ raise NotImplementedError,
57
+ 'Custom atoms must implement #try_match(source, context, consume_all)'
58
+ end
59
+
60
+ # Override of Base#try that delegates to try_match
61
+ # Handles error reporting and result wrapping
62
+ #
63
+ # @api private
64
+ def try(source, context, consume_all)
65
+ success, result = try_match(source, context, consume_all)
66
+
67
+ if success
68
+ [true, result]
69
+ else
70
+ # Generate error cause for reporting
71
+ context.err(
72
+ self,
73
+ source,
74
+ "Failed to match custom atom: #{self.class.name}"
75
+ )
76
+ end
77
+ end
78
+
79
+ # Optional: Override to provide first set for optimization
80
+ # Returns the set of characters/strings this atom can match at start
81
+ #
82
+ # @return [Set<String>, nil] First set, or nil if not determinable
83
+ def first_set
84
+ nil # Unknown by default
85
+ end
86
+
87
+ # Optional: Override to enable caching for this atom
88
+ # Return false for context-dependent matching (e.g., indentation)
89
+ #
90
+ # @return [Boolean] true if atom can be cached
91
+ def cacheable?
92
+ true
93
+ end
94
+
95
+ # Optional: Override to provide custom serialization for native parser
96
+ # Return nil if atom cannot be serialized (must use pure Ruby mode)
97
+ #
98
+ # @return [Hash, nil] JSON-serializable representation
99
+ def to_native_format
100
+ nil # Not serializable by default
101
+ end
102
+
103
+ # Override to_s_inner for debug printing
104
+ # @api private
105
+ def to_s_inner(_prec = nil)
106
+ "custom(#{self.class.name})"
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Cut operator for PEG grammars
4
+ #
5
+ # A cut operator (↑) instructs the parser to discard backtrack information
6
+ # at a specific point. This enables more aggressive cache eviction and can
7
+ # reduce space complexity from O(n) to O(1).
8
+ #
9
+ # Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
10
+ # Grammars in Mostly Constant Space"
11
+ #
12
+ # Example:
13
+ #
14
+ # rule(:statement) {
15
+ # str('if').cut >> condition >> then_clause |
16
+ # str('while').cut >> condition >> body |
17
+ # str('print').cut >> expression
18
+ # }
19
+ #
20
+ # After 'if' succeeds, the cut discards backtrack info for 'while' and 'print'.
21
+ # This means if the parse fails later in the 'if' branch, we won't try the
22
+ # other alternatives.
23
+ #
24
+ module Parsanol
25
+ module Atoms
26
+ class Cut < Parsanol::Atoms::Base
27
+ attr_reader :parslet
28
+
29
+ def initialize(parslet)
30
+ super()
31
+ @parslet = parslet
32
+ end
33
+
34
+ def try(source, context, consume_all)
35
+ # First, try to match the parslet
36
+ success, value = parslet.apply(source, context, consume_all)
37
+
38
+ return [success, value] unless success
39
+
40
+ # On success, signal to context that a cut has occurred
41
+ # This allows the context to:
42
+ # 1. Mark the current position as a cut point
43
+ # 2. Empty the backtrack stack (we won't backtrack past here)
44
+ # 3. Aggressively evict cache entries before this position
45
+ context.cut!(source.bytepos) if context.respond_to?(:cut!)
46
+
47
+ [success, value]
48
+ end
49
+
50
+ # Cut doesn't need caching - it's a thin wrapper
51
+ def cached?
52
+ false
53
+ end
54
+
55
+ def to_s_inner(prec)
56
+ "#{parslet.to_s(prec)}↑"
57
+ end
58
+
59
+ # FIRST set of cut is same as wrapped parslet
60
+ # Cut doesn't change matching behavior, only affects backtracking
61
+ def compute_first_set
62
+ parslet.first_set
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parser composition DSL - chainable methods for building parser atoms.
4
+ # All atoms can use these methods to combine into larger parsers.
5
+ #
6
+ # Inspired by Parslet (MIT License).
7
+
8
+ module Parsanol
9
+ module Atoms
10
+ module DSL
11
+ # Repeats the current atom between min and max times.
12
+ # If max is nil, there is no upper limit.
13
+ #
14
+ # @example
15
+ # str('a').repeat # match zero or more 'a's
16
+ # str('a').repeat(1, 3) # match 1-3 `a`s
17
+ def repeat(min = 0, max = nil)
18
+ Parsanol::Atoms::Repetition.new(self, min, max)
19
+ end
20
+
21
+ # Matches atom optionally (0 or 1 times).
22
+ # Result is nil if not present, otherwise the matched value.
23
+ #
24
+ # @example
25
+ # str('foo').maybe # => nil or 'foo'
26
+ def maybe
27
+ Parsanol::Atoms::Repetition.new(self, 0, 1, :maybe)
28
+ end
29
+
30
+ # Ignores the result of a match - returns nil always.
31
+ #
32
+ # @example
33
+ # str('foo').ignore # => nil (not 'foo')
34
+ def ignore
35
+ Parsanol::Atoms::Ignored.new(self)
36
+ end
37
+
38
+ # Chains two atoms in sequence.
39
+ #
40
+ # @example
41
+ # str('a') >> str('b')
42
+ def >>(other)
43
+ Parsanol::Atoms::Sequence.new(self, other)
44
+ end
45
+
46
+ # Chains two atoms as alternatives (ordered choice).
47
+ #
48
+ # @example
49
+ # str('a') | str('b') # matches 'a' or `b`
50
+ def |(other)
51
+ Parsanol::Atoms::Alternative.new(self, other)
52
+ end
53
+
54
+ # Negative lookahead - succeeds only if atom is absent.
55
+ #
56
+ # @example
57
+ # str('a').absent?
58
+ def absent?
59
+ Parsanol::Atoms::Lookahead.new(self, false)
60
+ end
61
+
62
+ # Positive lookahead - succeeds only if atom is present.
63
+ #
64
+ # @example
65
+ # str('a').present?
66
+ def present?
67
+ Parsanol::Atoms::Lookahead.new(self, true)
68
+ end
69
+
70
+ # Labels a match for tree output.
71
+ #
72
+ # @example
73
+ # str('a').as(:b) # => {:b => 'a'}
74
+ def as(name)
75
+ Parsanol::Atoms::Named.new(self, name)
76
+ end
77
+
78
+ # Captures match result for later reference.
79
+ #
80
+ # @example
81
+ # str('a').capture(:first) >> dynamic { str(ctx.captures[:first]) }
82
+ def capture(name)
83
+ Parsanol::Atoms::Capture.new(self, name)
84
+ end
85
+
86
+ # Commit point - prevents backtracking after successful match.
87
+ # Use with caution: cuts prevent backtracking to alternatives.
88
+ #
89
+ # @example
90
+ # str('if').cut >> condition >> body |
91
+ def cut
92
+ Parsanol::Atoms::Cut.new(self)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Evaluates a block at parse time. The result from the block must be a parser
4
+ # (something which implements #apply). In the first case, the parser will then
5
+ # be applied to the input, creating the result.
6
+ #
7
+ # Dynamic parses are never cached.
8
+ #
9
+ # Example:
10
+ # dynamic { rand < 0.5 ? str('a') : str('b') }
11
+ #
12
+ module Parsanol
13
+ module Atoms
14
+ class Dynamic < Parsanol::Atoms::Base
15
+ attr_reader :block
16
+
17
+ def initialize(block)
18
+ @block = block
19
+ end
20
+
21
+ def cached?
22
+ false
23
+ end
24
+
25
+ def try(source, context, consume_all)
26
+ # Phase 55: Cache @block ivar to reduce lookup overhead
27
+ block = @block
28
+ result = block.call(source, context)
29
+
30
+ # Result is a parslet atom.
31
+ result.apply(source, context, consume_all)
32
+ end
33
+
34
+ def to_s_inner(_prec)
35
+ 'dynamic { ... }'
36
+ end
37
+ end
38
+ end
39
+ end