parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
module Parsanol::Atoms
|
|
2
|
+
# Helper class that implements a transient cache that maps position and
|
|
3
|
+
# parslet object to results. This is used for memoization in the packrat
|
|
4
|
+
# style.
|
|
5
|
+
#
|
|
6
|
+
# Also, error reporter is stored here and error reporting happens through
|
|
7
|
+
# this class. This makes the reporting pluggable.
|
|
8
|
+
#
|
|
9
|
+
class Context
|
|
10
|
+
# Parser-specific cache thresholds (Session 13)
|
|
11
|
+
# Based on profiling: different parsers have different cache benefit points
|
|
12
|
+
# - JSON: High recursion on large files, but medium files (5KB) see overhead
|
|
13
|
+
# - ERB: Moderate repetition, benefits from cache earlier
|
|
14
|
+
# - Calc: Lower repetition, needs larger input
|
|
15
|
+
# - Sentence: Simple linear grammar, minimal cache benefit
|
|
16
|
+
PARSER_CACHE_THRESHOLDS = {
|
|
17
|
+
'JsonParser' => 10_000, # High threshold - json/medium regressed at 1000
|
|
18
|
+
'ErbParser' => 800, # Moderate - working well
|
|
19
|
+
'CalcParser' => 2000, # Low repetition
|
|
20
|
+
'SentenceParser' => 5000, # Linear grammar
|
|
21
|
+
:default => 1000
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
|
25
|
+
# reporter)
|
|
26
|
+
# @param interval_cache [Boolean] Use GPeg-style interval tree caching
|
|
27
|
+
# @param adaptive_cache_threshold [Integer] Disable caching for inputs smaller than this (bytes)
|
|
28
|
+
# @param parser_class [Class] Parser class for per-parser threshold selection
|
|
29
|
+
def initialize(reporter=Parsanol::ErrorReporter::Tree.new, interval_cache: false, adaptive_cache_threshold: nil, parser_class: nil)
|
|
30
|
+
@cache = Hash.new { |h, k| h[k] = {} }
|
|
31
|
+
@reporter = reporter
|
|
32
|
+
@captures = Parsanol::Scope.new
|
|
33
|
+
@max_position = 0 # Track furthest position for cache eviction
|
|
34
|
+
@eviction_threshold = 200 # Evict positions more than 200 bytes behind
|
|
35
|
+
@eviction_counter = 0 # Counter for periodic eviction
|
|
36
|
+
@eviction_frequency = 100 # Only evict every N position advances
|
|
37
|
+
|
|
38
|
+
# Phase 1.3: ArrayPool for reducing GC pressure from array allocations
|
|
39
|
+
# Arrays are the highest allocation source (74% of memory allocations)
|
|
40
|
+
# Initialize pool with reasonable size for typical parsing workloads
|
|
41
|
+
@array_pool = Parsanol::Pools::ArrayPool.new(size: 10000)
|
|
42
|
+
|
|
43
|
+
# Phase 2.1: BufferPool for fixed-size buffer pre-allocation
|
|
44
|
+
# Reduces array allocations through buffer reuse by size class
|
|
45
|
+
@buffer_pool = Parsanol::Pools::BufferPool.new(pool_size: 100)
|
|
46
|
+
|
|
47
|
+
# Selective memoization: track hit/miss rates to only cache beneficial parslets
|
|
48
|
+
@hit_counts = Hash.new(0)
|
|
49
|
+
@miss_counts = Hash.new(0)
|
|
50
|
+
@cache_threshold = 2 # Only cache if we've had 2+ hits
|
|
51
|
+
|
|
52
|
+
# GPeg-style interval tree caching (optional)
|
|
53
|
+
@use_interval_cache = interval_cache
|
|
54
|
+
if @use_interval_cache
|
|
55
|
+
require 'parsanol/interval_tree'
|
|
56
|
+
require 'parsanol/edit_tracker'
|
|
57
|
+
# Map parslet object_id to interval tree
|
|
58
|
+
@interval_cache = Hash.new { |h, k| h[k] = Parsanol::IntervalTree.new }
|
|
59
|
+
# Track edits for lazy position shifts
|
|
60
|
+
@edit_tracker = Parsanol::EditTracker.new
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Cut operator support (Phase 46b)
|
|
64
|
+
# Track the last cut position to enable aggressive cache eviction
|
|
65
|
+
@last_cut_position = 0
|
|
66
|
+
|
|
67
|
+
# Adaptive caching (Session 12-13): Disable cache for small inputs
|
|
68
|
+
# Session 13: Per-parser thresholds based on profiling
|
|
69
|
+
# - JSON medium (5KB) regressed with 1000-byte threshold → raised to 10KB
|
|
70
|
+
# - Different parsers have different cache benefit points
|
|
71
|
+
|
|
72
|
+
# Determine threshold: explicit > parser-specific > default
|
|
73
|
+
threshold = adaptive_cache_threshold
|
|
74
|
+
if threshold.nil? && parser_class
|
|
75
|
+
# Extract simple class name (e.g., "MyJson::Parser" -> "Parser")
|
|
76
|
+
parser_name = parser_class.name&.split('::')&.last
|
|
77
|
+
threshold = PARSER_CACHE_THRESHOLDS[parser_name] || PARSER_CACHE_THRESHOLDS[:default]
|
|
78
|
+
end
|
|
79
|
+
threshold ||= PARSER_CACHE_THRESHOLDS[:default]
|
|
80
|
+
|
|
81
|
+
@adaptive_cache_threshold = threshold
|
|
82
|
+
@input_size = nil # Will be set on first parse attempt
|
|
83
|
+
@caching_enabled = nil # Will be determined based on input size
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
|
87
|
+
# at one position of input always yields the same result, unless the input
|
|
88
|
+
# has changed.
|
|
89
|
+
#
|
|
90
|
+
# We need the entire source here so we can ask for how many characters
|
|
91
|
+
# were consumed by a successful parse. Imitation of such a parse must
|
|
92
|
+
# advance the input pos by the same amount of bytes.
|
|
93
|
+
#
|
|
94
|
+
def try_with_cache(obj, source, consume_all)
|
|
95
|
+
# Skip caching entirely for atoms that don't benefit from it
|
|
96
|
+
unless obj.cached?
|
|
97
|
+
return obj.try(source, self, consume_all)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Session 12: Adaptive caching based on input size
|
|
101
|
+
# Determine if caching should be enabled (only on first call)
|
|
102
|
+
if @caching_enabled.nil?
|
|
103
|
+
# Get total input size from source
|
|
104
|
+
input_size = source.bytepos + source.chars_left
|
|
105
|
+
@input_size = input_size
|
|
106
|
+
@caching_enabled = input_size >= @adaptive_cache_threshold
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# For small inputs, skip caching entirely - the overhead exceeds benefit
|
|
110
|
+
# Profiling shows cache overhead is 15-20% for inputs < 1000 bytes
|
|
111
|
+
unless @caching_enabled
|
|
112
|
+
return obj.try(source, self, consume_all)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Phase 55: Cache ivars to reduce lookup overhead in hot method
|
|
116
|
+
use_interval_cache = @use_interval_cache
|
|
117
|
+
cache = @cache
|
|
118
|
+
hit_counts = @hit_counts
|
|
119
|
+
miss_counts = @miss_counts
|
|
120
|
+
cache_threshold = @cache_threshold
|
|
121
|
+
|
|
122
|
+
# Use interval-based caching if enabled (GPeg-style)
|
|
123
|
+
if use_interval_cache
|
|
124
|
+
return try_with_interval_cache(obj, source, consume_all)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
beg = source.bytepos
|
|
128
|
+
cache_key = obj.object_id
|
|
129
|
+
|
|
130
|
+
# Track furthest position and evict old cache entries PERIODICALLY
|
|
131
|
+
# In left-to-right parsing, positions far behind won't be revisited
|
|
132
|
+
if beg > @max_position
|
|
133
|
+
@max_position = beg
|
|
134
|
+
eviction_counter = @eviction_counter + 1
|
|
135
|
+
@eviction_counter = eviction_counter
|
|
136
|
+
|
|
137
|
+
# Evict positions that are too far behind current position
|
|
138
|
+
# This prevents unbounded cache growth (O(n*m) memory issue in packrat)
|
|
139
|
+
# Phase 42: Only evict periodically instead of on every position advance
|
|
140
|
+
# This reduces delete_if calls from ~900K to ~9K (100x reduction)
|
|
141
|
+
if eviction_counter >= @eviction_frequency
|
|
142
|
+
@eviction_counter = 0
|
|
143
|
+
min_keep_pos = beg - @eviction_threshold
|
|
144
|
+
cache.delete_if { |pos, _| pos < min_keep_pos }
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Check if this parslet/position combo is already cached
|
|
149
|
+
if cache[beg].key?(cache_key)
|
|
150
|
+
# Cache hit - track it
|
|
151
|
+
hit_counts[cache_key] += 1
|
|
152
|
+
result, advance = cache[beg][cache_key]
|
|
153
|
+
source.bytepos = beg + advance
|
|
154
|
+
return result
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Cache miss - execute the parslet
|
|
158
|
+
miss_counts[cache_key] += 1
|
|
159
|
+
result = obj.try(source, self, consume_all)
|
|
160
|
+
advance = source.bytepos - beg
|
|
161
|
+
|
|
162
|
+
# Only cache if this parslet has shown it benefits from caching
|
|
163
|
+
# (has had multiple hits, or we're still learning about it)
|
|
164
|
+
total_attempts = hit_counts[cache_key] + miss_counts[cache_key]
|
|
165
|
+
if total_attempts <= cache_threshold || hit_counts[cache_key] > 0
|
|
166
|
+
cache[beg][cache_key] = [result, advance]
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
return result
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# GPeg-style interval-based caching
|
|
173
|
+
# Caches results keyed by intervals [start, end) rather than single positions
|
|
174
|
+
# This enables efficient invalidation of changed regions during incremental parsing
|
|
175
|
+
def try_with_interval_cache(obj, source, consume_all)
|
|
176
|
+
beg = source.bytepos
|
|
177
|
+
cache_key = obj.object_id
|
|
178
|
+
|
|
179
|
+
# Try to find exact match in interval tree
|
|
180
|
+
tree = @interval_cache[cache_key]
|
|
181
|
+
result_data = tree.query_exact(beg, beg) # Start with point query
|
|
182
|
+
|
|
183
|
+
if result_data
|
|
184
|
+
# Exact match found - restore result
|
|
185
|
+
@hit_counts[cache_key] += 1
|
|
186
|
+
result, advance = result_data
|
|
187
|
+
source.bytepos = beg + advance
|
|
188
|
+
return result
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# No exact match - execute the parslet
|
|
192
|
+
@miss_counts[cache_key] += 1
|
|
193
|
+
result = obj.try(source, self, consume_all)
|
|
194
|
+
advance = source.bytepos - beg
|
|
195
|
+
end_pos = beg + advance
|
|
196
|
+
|
|
197
|
+
# Store in interval tree: [start, end) -> [result, advance]
|
|
198
|
+
# Only cache if beneficial (selective memoization)
|
|
199
|
+
total_attempts = @hit_counts[cache_key] + @miss_counts[cache_key]
|
|
200
|
+
if total_attempts <= @cache_threshold || @hit_counts[cache_key] > 0
|
|
201
|
+
tree.insert(beg, end_pos, [result, advance])
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
return result
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Pre-allocated constants to avoid repeated array allocations
|
|
208
|
+
# These are the most common return values during parsing
|
|
209
|
+
SUCCESS_NIL = [true, nil].freeze
|
|
210
|
+
ERROR_NIL = [false, nil].freeze
|
|
211
|
+
|
|
212
|
+
# Report an error at a given position.
|
|
213
|
+
# @see ErrorReporter
|
|
214
|
+
#
|
|
215
|
+
def err_at(*args)
|
|
216
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
|
217
|
+
ERROR_NIL
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Report an error.
|
|
221
|
+
# @see ErrorReporter
|
|
222
|
+
#
|
|
223
|
+
def err(*args)
|
|
224
|
+
return [false, @reporter.err(*args)] if @reporter
|
|
225
|
+
ERROR_NIL
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Report a successful parse.
|
|
229
|
+
# @see ErrorReporter::Contextual
|
|
230
|
+
#
|
|
231
|
+
def succ(*args)
|
|
232
|
+
# The default error reporter (Tree) has an empty succ method that returns nil
|
|
233
|
+
# So for the common case (no reporter or default reporter), use pre-allocated constant
|
|
234
|
+
return SUCCESS_NIL unless @reporter
|
|
235
|
+
result = @reporter.succ(*args)
|
|
236
|
+
return SUCCESS_NIL if result.nil?
|
|
237
|
+
[true, result]
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Returns the current captures made on the input (see
|
|
241
|
+
# Parsanol::Atoms::Base#capture). Use as follows:
|
|
242
|
+
#
|
|
243
|
+
# context.captures[:foobar] # => returns capture :foobar
|
|
244
|
+
#
|
|
245
|
+
attr_reader :captures
|
|
246
|
+
|
|
247
|
+
# Phase 1.3: Expose ArrayPool for array acquisition/release
|
|
248
|
+
# @return [Parsanol::Pools::ArrayPool] The array pool instance
|
|
249
|
+
attr_reader :array_pool
|
|
250
|
+
|
|
251
|
+
# Acquire an array from the pool.
|
|
252
|
+
# Returns a cleared, empty array ready for use.
|
|
253
|
+
#
|
|
254
|
+
# @return [Array] An empty array from the pool
|
|
255
|
+
def acquire_array
|
|
256
|
+
@array_pool.acquire
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Release an array back to the pool.
|
|
260
|
+
# The array will be cleared and made available for reuse.
|
|
261
|
+
#
|
|
262
|
+
# @param array [Array] The array to return to the pool
|
|
263
|
+
# @return [Boolean] true if returned to pool, false if discarded
|
|
264
|
+
def release_array(array)
|
|
265
|
+
@array_pool.release(array)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Phase 2.1: Expose BufferPool for buffer acquisition/release
|
|
269
|
+
# @return [Parsanol::Pools::BufferPool] The buffer pool instance
|
|
270
|
+
attr_reader :buffer_pool
|
|
271
|
+
|
|
272
|
+
# Acquire a buffer from the pool with specified minimum capacity.
|
|
273
|
+
#
|
|
274
|
+
# @param size [Integer] Minimum required capacity
|
|
275
|
+
# @return [Parsanol::Buffer] Buffer with capacity >= size
|
|
276
|
+
def acquire_buffer(size:)
|
|
277
|
+
@buffer_pool.acquire(size: size)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Release a buffer back to the pool.
|
|
281
|
+
# The buffer will be cleared and made available for reuse.
|
|
282
|
+
#
|
|
283
|
+
# @param buffer [Parsanol::Buffer] The buffer to return to the pool
|
|
284
|
+
# @return [Boolean] true if returned to pool, false if discarded
|
|
285
|
+
def release_buffer(buffer)
|
|
286
|
+
@buffer_pool.release(buffer)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Starts a new scope. Use the #scope method of Parsanol::Atoms::DSL
|
|
290
|
+
# to call this.
|
|
291
|
+
#
|
|
292
|
+
def scope
|
|
293
|
+
captures.push
|
|
294
|
+
yield
|
|
295
|
+
ensure
|
|
296
|
+
captures.pop
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# GPeg-style tree memoization support
|
|
300
|
+
# Check if tree memoization is enabled
|
|
301
|
+
def use_tree_memoization?
|
|
302
|
+
@use_interval_cache
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Query tree memo cache for a given key and position
|
|
306
|
+
# Returns [values, end_pos] if found, nil otherwise
|
|
307
|
+
def query_tree_memo(cache_key, start_pos)
|
|
308
|
+
return nil unless @use_interval_cache
|
|
309
|
+
tree = @interval_cache[cache_key]
|
|
310
|
+
# Query for any intervals that overlap with [start_pos, start_pos+1)
|
|
311
|
+
# This will find intervals that start at start_pos
|
|
312
|
+
overlapping = tree.query_overlapping(start_pos, start_pos + 1)
|
|
313
|
+
# Find exact match where interval starts at start_pos
|
|
314
|
+
result = overlapping.find { |interval, _data| interval[0] == start_pos }
|
|
315
|
+
result ? result[1] : nil
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Store tree memo: cache array of values for repetition
|
|
319
|
+
def store_tree_memo(cache_key, start_pos, values, end_pos)
|
|
320
|
+
return unless @use_interval_cache
|
|
321
|
+
tree = @interval_cache[cache_key]
|
|
322
|
+
tree.insert(start_pos, end_pos, [values, end_pos])
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Cut operator support (Phase 46b)
|
|
326
|
+
# Called when a cut operator succeeds. This enables aggressive cache eviction
|
|
327
|
+
# by marking that we won't backtrack before this position.
|
|
328
|
+
#
|
|
329
|
+
# @param position [Integer] The position where the cut occurred
|
|
330
|
+
def cut!(position)
|
|
331
|
+
@last_cut_position = position
|
|
332
|
+
|
|
333
|
+
# Aggressively evict all cache entries before the cut position
|
|
334
|
+
# This is safe because we won't backtrack past the cut point
|
|
335
|
+
# This is the key to achieving O(1) space complexity with cuts
|
|
336
|
+
@cache.delete_if { |pos, _| pos < position }
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
private
|
|
340
|
+
# NOTE These methods use #object_id directly, since that seems to bring the
|
|
341
|
+
# most performance benefit. This is a hot spot; going through
|
|
342
|
+
# Atoms::Base#hash doesn't yield as much.
|
|
343
|
+
#
|
|
344
|
+
def lookup(obj, pos)
|
|
345
|
+
@cache[pos][obj.object_id]
|
|
346
|
+
end
|
|
347
|
+
def set(obj, pos, val)
|
|
348
|
+
@cache[pos][obj.object_id] = val
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Experimental: Position-based cache eviction for Context
|
|
4
|
+
# Based on PEG theory: in linear parsing, positions behind current position
|
|
5
|
+
# will never be revisited, so we can evict them to reduce memory
|
|
6
|
+
|
|
7
|
+
module Parsanol
|
|
8
|
+
module Atoms
|
|
9
|
+
class Context
|
|
10
|
+
# Add position tracking for cache eviction
|
|
11
|
+
attr_reader :current_position
|
|
12
|
+
|
|
13
|
+
def try_with_cache(obj, source, consume_all)
|
|
14
|
+
unless obj.cached?
|
|
15
|
+
return obj.try(source, self, consume_all)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
key = source.pos
|
|
19
|
+
@current_position = key
|
|
20
|
+
atom_cache = @cache[obj]
|
|
21
|
+
|
|
22
|
+
# Try to fetch from cache
|
|
23
|
+
if atom_cache.key?(key)
|
|
24
|
+
return atom_cache.fetch(key)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Cache miss - compute result
|
|
28
|
+
result = obj.try(source, self, consume_all)
|
|
29
|
+
atom_cache[key] = result
|
|
30
|
+
|
|
31
|
+
# Evict old positions if cache is getting large
|
|
32
|
+
# Keep only positions within a window of current position
|
|
33
|
+
if atom_cache.size > 100
|
|
34
|
+
min_pos = key - 50 # Keep 50 positions behind
|
|
35
|
+
atom_cache.delete_if { |pos, _| pos < min_pos }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
result
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module Atoms
|
|
5
|
+
# Base class for creating custom parser atoms.
|
|
6
|
+
#
|
|
7
|
+
# Custom atoms allow extending Parsanol with domain-specific matching logic
|
|
8
|
+
# that cannot be expressed with the built-in combinators.
|
|
9
|
+
#
|
|
10
|
+
# @example Custom atom for matching indentation-sensitive content
|
|
11
|
+
# class IndentAtom < Parsanol::Atoms::Custom
|
|
12
|
+
# def initialize(expected_indent)
|
|
13
|
+
# @expected_indent = expected_indent
|
|
14
|
+
# super()
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# # Required: Implement try_match
|
|
18
|
+
# def try_match(source, context, consume_all)
|
|
19
|
+
# pos = source.pos
|
|
20
|
+
# indent = count_indent(source)
|
|
21
|
+
#
|
|
22
|
+
# if indent == @expected_indent
|
|
23
|
+
# content = read_until_newline(source)
|
|
24
|
+
# [true, content]
|
|
25
|
+
# else
|
|
26
|
+
# source.pos = pos # Restore position on failure
|
|
27
|
+
# [false, nil]
|
|
28
|
+
# end
|
|
29
|
+
# end
|
|
30
|
+
#
|
|
31
|
+
# private
|
|
32
|
+
#
|
|
33
|
+
# def count_indent(source)
|
|
34
|
+
# # ... implementation ...
|
|
35
|
+
# end
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# # Usage in parser
|
|
39
|
+
# class MyParser < Parsanol::Parser
|
|
40
|
+
# rule(:indented_line) { IndentAtom.new(2) }
|
|
41
|
+
# end
|
|
42
|
+
#
|
|
43
|
+
class Custom < Base
|
|
44
|
+
# Required: Implement this method to define matching behavior
|
|
45
|
+
#
|
|
46
|
+
# @param source [Parsanol::Source] The input source with position tracking
|
|
47
|
+
# @param context [Parsanol::Atoms::Context] Parse context for memoization
|
|
48
|
+
# @param consume_all [Boolean] If true, must consume entire input
|
|
49
|
+
# @return [Array<Boolean, Object>] Tuple of [success, result]
|
|
50
|
+
# - success: true if match succeeded, false otherwise
|
|
51
|
+
# - result: matched value on success, nil on failure
|
|
52
|
+
#
|
|
53
|
+
# @note You MUST restore source.bytepos on failure for proper backtracking
|
|
54
|
+
#
|
|
55
|
+
def try_match(source, context, consume_all)
|
|
56
|
+
raise NotImplementedError,
|
|
57
|
+
"Custom atoms must implement #try_match(source, context, consume_all)"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Override of Base#try that delegates to try_match
|
|
61
|
+
# Handles error reporting and result wrapping
|
|
62
|
+
#
|
|
63
|
+
# @api private
|
|
64
|
+
def try(source, context, consume_all)
|
|
65
|
+
success, result = try_match(source, context, consume_all)
|
|
66
|
+
|
|
67
|
+
if success
|
|
68
|
+
[true, result]
|
|
69
|
+
else
|
|
70
|
+
# Generate error cause for reporting
|
|
71
|
+
context.err(
|
|
72
|
+
self,
|
|
73
|
+
source,
|
|
74
|
+
"Failed to match custom atom: #{self.class.name}"
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Optional: Override to provide first set for optimization
|
|
80
|
+
# Returns the set of characters/strings this atom can match at start
|
|
81
|
+
#
|
|
82
|
+
# @return [Set<String>, nil] First set, or nil if not determinable
|
|
83
|
+
def first_set
|
|
84
|
+
nil # Unknown by default
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Optional: Override to enable caching for this atom
|
|
88
|
+
# Return false for context-dependent matching (e.g., indentation)
|
|
89
|
+
#
|
|
90
|
+
# @return [Boolean] true if atom can be cached
|
|
91
|
+
def cacheable?
|
|
92
|
+
true
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Optional: Override to provide custom serialization for native parser
|
|
96
|
+
# Return nil if atom cannot be serialized (must use pure Ruby mode)
|
|
97
|
+
#
|
|
98
|
+
# @return [Hash, nil] JSON-serializable representation
|
|
99
|
+
def to_native_format
|
|
100
|
+
nil # Not serializable by default
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Override to_s_inner for debug printing
|
|
104
|
+
# @api private
|
|
105
|
+
def to_s_inner(prec = nil)
|
|
106
|
+
"custom(#{self.class.name})"
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Cut operator for PEG grammars
|
|
2
|
+
#
|
|
3
|
+
# A cut operator (↑) instructs the parser to discard backtrack information
|
|
4
|
+
# at a specific point. This enables more aggressive cache eviction and can
|
|
5
|
+
# reduce space complexity from O(n) to O(1).
|
|
6
|
+
#
|
|
7
|
+
# Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
|
|
8
|
+
# Grammars in Mostly Constant Space"
|
|
9
|
+
#
|
|
10
|
+
# Example:
|
|
11
|
+
#
|
|
12
|
+
# rule(:statement) {
|
|
13
|
+
# str('if').cut >> condition >> then_clause |
|
|
14
|
+
# str('while').cut >> condition >> body |
|
|
15
|
+
# str('print').cut >> expression
|
|
16
|
+
# }
|
|
17
|
+
#
|
|
18
|
+
# After 'if' succeeds, the cut discards backtrack info for 'while' and 'print'.
|
|
19
|
+
# This means if the parse fails later in the 'if' branch, we won't try the
|
|
20
|
+
# other alternatives.
|
|
21
|
+
#
|
|
22
|
+
class Parsanol::Atoms::Cut < Parsanol::Atoms::Base
|
|
23
|
+
attr_reader :parslet
|
|
24
|
+
|
|
25
|
+
def initialize(parslet)
|
|
26
|
+
super()
|
|
27
|
+
@parslet = parslet
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def try(source, context, consume_all)
|
|
31
|
+
# First, try to match the parslet
|
|
32
|
+
success, value = parslet.apply(source, context, consume_all)
|
|
33
|
+
|
|
34
|
+
return [success, value] unless success
|
|
35
|
+
|
|
36
|
+
# On success, signal to context that a cut has occurred
|
|
37
|
+
# This allows the context to:
|
|
38
|
+
# 1. Mark the current position as a cut point
|
|
39
|
+
# 2. Empty the backtrack stack (we won't backtrack past here)
|
|
40
|
+
# 3. Aggressively evict cache entries before this position
|
|
41
|
+
if context.respond_to?(:cut!)
|
|
42
|
+
context.cut!(source.bytepos)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
return [success, value]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Cut doesn't need caching - it's a thin wrapper
|
|
49
|
+
def cached?
|
|
50
|
+
false
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def to_s_inner(prec)
|
|
54
|
+
"#{parslet.to_s(prec)}↑"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# FIRST set of cut is same as wrapped parslet
|
|
58
|
+
# Cut doesn't change matching behavior, only affects backtracking
|
|
59
|
+
def compute_first_set
|
|
60
|
+
parslet.first_set
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# A mixin module that defines operations that can be called on any subclass
|
|
5
|
+
# of Parsanol::Atoms::Base. These operations make parslets atoms chainable and
|
|
6
|
+
# allow combination of parslet atoms to form bigger parsers.
|
|
7
|
+
#
|
|
8
|
+
# Example:
|
|
9
|
+
#
|
|
10
|
+
# str('foo') >> str('bar')
|
|
11
|
+
# str('f').repeat
|
|
12
|
+
# any.absent? # also called The Epsilon
|
|
13
|
+
#
|
|
14
|
+
module Parsanol::Atoms::DSL
|
|
15
|
+
# Construct a new atom that repeats the current atom min times at least and
|
|
16
|
+
# at most max times. max can be nil to indicate that no maximum is present.
|
|
17
|
+
#
|
|
18
|
+
# Example:
|
|
19
|
+
# # match any number of 'a's
|
|
20
|
+
# str('a').repeat
|
|
21
|
+
#
|
|
22
|
+
# # match between 1 and 3 'a's
|
|
23
|
+
# str('a').repeat(1,3)
|
|
24
|
+
#
|
|
25
|
+
def repeat(min=0, max=nil)
|
|
26
|
+
Parsanol::Atoms::Repetition.new(self, min, max)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns a new parslet atom that is only maybe present in the input. This
|
|
30
|
+
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
|
31
|
+
# either nil (if atom is not present in the input) or the matched subtree.
|
|
32
|
+
#
|
|
33
|
+
# Example:
|
|
34
|
+
# str('foo').maybe
|
|
35
|
+
#
|
|
36
|
+
def maybe
|
|
37
|
+
Parsanol::Atoms::Repetition.new(self, 0, 1, :maybe)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Returns a new parslet atom that will not show up in the output. This
|
|
41
|
+
# is synonymous to calling #repeat(0,1). Generated tree value will always be
|
|
42
|
+
# nil.
|
|
43
|
+
#
|
|
44
|
+
# Example:
|
|
45
|
+
# str('foo').ignore
|
|
46
|
+
#
|
|
47
|
+
def ignore
|
|
48
|
+
Parsanol::Atoms::Ignored.new(self)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Chains two parslet atoms together as a sequence.
|
|
52
|
+
#
|
|
53
|
+
# Example:
|
|
54
|
+
# str('a') >> str('b')
|
|
55
|
+
#
|
|
56
|
+
def >>(parslet)
|
|
57
|
+
Parsanol::Atoms::Sequence.new(self, parslet)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Chains two parslet atoms together to express alternation. A match will
|
|
61
|
+
# always be attempted with the parslet on the left side first. If it doesn't
|
|
62
|
+
# match, the right side will be tried.
|
|
63
|
+
#
|
|
64
|
+
# Example:
|
|
65
|
+
# # matches either 'a' OR 'b'
|
|
66
|
+
# str('a') | str('b')
|
|
67
|
+
#
|
|
68
|
+
def |(parslet)
|
|
69
|
+
Parsanol::Atoms::Alternative.new(self, parslet)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Tests for absence of a parslet atom in the input stream without consuming
|
|
73
|
+
# it.
|
|
74
|
+
#
|
|
75
|
+
# Example:
|
|
76
|
+
# # Only proceed the parse if 'a' is absent.
|
|
77
|
+
# str('a').absent?
|
|
78
|
+
#
|
|
79
|
+
def absent?
|
|
80
|
+
Parsanol::Atoms::Lookahead.new(self, false)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Tests for presence of a parslet atom in the input stream without consuming
|
|
84
|
+
# it.
|
|
85
|
+
#
|
|
86
|
+
# Example:
|
|
87
|
+
# # Only proceed the parse if 'a' is present.
|
|
88
|
+
# str('a').present?
|
|
89
|
+
#
|
|
90
|
+
def present?
|
|
91
|
+
Parsanol::Atoms::Lookahead.new(self, true)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Marks a parslet atom as important for the tree output. This must be used
|
|
95
|
+
# to achieve meaningful output from the #parse method.
|
|
96
|
+
#
|
|
97
|
+
# Example:
|
|
98
|
+
# str('a').as(:b) # will produce {:b => 'a'}
|
|
99
|
+
#
|
|
100
|
+
def as(name)
|
|
101
|
+
Parsanol::Atoms::Named.new(self, name)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Captures a part of the input and stores it under the name given. This
|
|
105
|
+
# is very useful to create self-referential parses. A capture stores
|
|
106
|
+
# the result of its parse (may be complex) on a successful parse action.
|
|
107
|
+
#
|
|
108
|
+
# Example:
|
|
109
|
+
# str('a').capture(:b) # will store captures[:b] == 'a'
|
|
110
|
+
#
|
|
111
|
+
def capture(name)
|
|
112
|
+
Parsanol::Atoms::Capture.new(self, name)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Marks this parslet atom as a cut point. After this atom succeeds,
|
|
116
|
+
# the parser will discard backtrack information, enabling O(1) space
|
|
117
|
+
# complexity. Use with caution: cuts prevent backtracking to alternative
|
|
118
|
+
# branches.
|
|
119
|
+
#
|
|
120
|
+
# Example:
|
|
121
|
+
# str('if').cut >> condition >> then_clause |
|
|
122
|
+
# str('while') >> condition >> body
|
|
123
|
+
#
|
|
124
|
+
# If 'if' matches, we commit to the first branch. If condition or then_clause
|
|
125
|
+
# fail, we won't try the 'while' alternative.
|
|
126
|
+
#
|
|
127
|
+
def cut
|
|
128
|
+
Parsanol::Atoms::Cut.new(self)
|
|
129
|
+
end
|
|
130
|
+
end
|