parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
describe 'Automatic Rule Optimization' do
|
|
6
|
+
include Parsanol
|
|
7
|
+
|
|
8
|
+
context 'when optimize_rules! is called' do
|
|
9
|
+
class OptimizedParser < Parsanol::Parser
|
|
10
|
+
optimize_rules!
|
|
11
|
+
|
|
12
|
+
rule(:redundant) {
|
|
13
|
+
str('a').repeat(1, 1) >>
|
|
14
|
+
str('b').repeat(1, 1) >>
|
|
15
|
+
str('c').repeat(1, 1)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
rule(:nested_maybe) {
|
|
19
|
+
str('x').repeat(0, 1).repeat(0, 1)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
rule(:exact_counts) {
|
|
23
|
+
str('m').repeat(2, 2).repeat(3, 3)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
root :redundant
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'automatically simplifies repeat(1,1) in rules' do
|
|
30
|
+
parser = OptimizedParser.new
|
|
31
|
+
# The rule should parse successfully
|
|
32
|
+
expect(parser.redundant.parse('abc')).to eq('abc')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'automatically simplifies nested maybe' do
|
|
36
|
+
parser = OptimizedParser.new
|
|
37
|
+
# Should match with x
|
|
38
|
+
expect(parser.nested_maybe.parse('x')).to eq('x')
|
|
39
|
+
# Should match without x (returns empty string, not nil)
|
|
40
|
+
expect(parser.nested_maybe.parse('')).to eq('')
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'automatically simplifies multiplied exact counts' do
|
|
44
|
+
parser = OptimizedParser.new
|
|
45
|
+
# Should require exactly 6 m's
|
|
46
|
+
expect(parser.exact_counts.parse('mmmmmm')).to eq('mmmmmm')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'produces the same results as manual optimization' do
|
|
50
|
+
manual = str('a').repeat(1, 1) >> str('b').repeat(1, 1)
|
|
51
|
+
manual_optimized = Parsanol::Optimizer.simplify_quantifiers(manual)
|
|
52
|
+
|
|
53
|
+
auto_parser = OptimizedParser.new
|
|
54
|
+
|
|
55
|
+
input = 'ab'
|
|
56
|
+
# Both should return Slice objects (parslet's default)
|
|
57
|
+
expect(auto_parser.redundant.parse('abc').to_s).to eq('abc')
|
|
58
|
+
expect(manual_optimized.parse(input).to_s).to eq('ab')
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
context 'when optimize_rules! is not called' do
|
|
63
|
+
class UnoptimizedParser < Parsanol::Parser
|
|
64
|
+
# As of v3.1.0, optimizations are DISABLED by default (opt-in)
|
|
65
|
+
# This avoids overhead on tiny/small inputs
|
|
66
|
+
rule(:redundant) {
|
|
67
|
+
str('a').repeat(1, 1) >>
|
|
68
|
+
str('b').repeat(1, 1)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
root :redundant
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
it 'still works without optimization (default)' do
|
|
75
|
+
parser = UnoptimizedParser.new
|
|
76
|
+
# Should work without optimization
|
|
77
|
+
expect(parser.redundant.parse('ab')).to be_truthy
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'defaults optimize_rules? to false (v3.1.0+ opt-in)' do
|
|
81
|
+
# As of v3.1.0, optimizations are opt-in to avoid overhead
|
|
82
|
+
expect(UnoptimizedParser.optimize_rules?).to be false
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
context 'when optimization is explicitly disabled' do
|
|
87
|
+
class ExplicitlyUnoptimizedParser < Parsanol::Parser
|
|
88
|
+
disable_optimization! # Explicit opt-out
|
|
89
|
+
|
|
90
|
+
rule(:redundant) {
|
|
91
|
+
str('a').repeat(1, 1) >>
|
|
92
|
+
str('b').repeat(1, 1)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
root :redundant
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it 'respects explicit disable_optimization!' do
|
|
99
|
+
expect(ExplicitlyUnoptimizedParser.optimize_rules?).to be false
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it 'still parses correctly without optimization' do
|
|
103
|
+
parser = ExplicitlyUnoptimizedParser.new
|
|
104
|
+
expect(parser.redundant.parse('ab')).to be_truthy
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
context 'with complex nested structures' do
|
|
109
|
+
class ComplexOptimizedParser < Parsanol::Parser
|
|
110
|
+
optimize_rules!
|
|
111
|
+
|
|
112
|
+
rule(:deeply_nested) {
|
|
113
|
+
str('a').repeat(1, 1).repeat(1, 1).repeat(1, 1)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
rule(:mixed) {
|
|
117
|
+
str('x').repeat(1, 1) >> str('y').repeat(0, 1) >> str('z')
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
root :deeply_nested
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
it 'simplifies deeply nested repetitions' do
|
|
124
|
+
parser = ComplexOptimizedParser.new
|
|
125
|
+
expect(parser.deeply_nested.parse('a')).to eq('a')
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
it 'handles mixed simplifiable and non-simplifiable patterns' do
|
|
129
|
+
parser = ComplexOptimizedParser.new
|
|
130
|
+
# With y
|
|
131
|
+
expect(parser.mixed.parse('xyz')).to be_truthy
|
|
132
|
+
# Without y
|
|
133
|
+
expect(parser.mixed.parse('xz')).to be_truthy
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
context 'backward compatibility' do
|
|
138
|
+
it 'parsers without optimize_rules! work without optimization' do
|
|
139
|
+
class LegacyParser < Parsanol::Parser
|
|
140
|
+
rule(:test) { str('a').repeat(1, 1) }
|
|
141
|
+
root :test
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
parser = LegacyParser.new
|
|
145
|
+
# Should still work, just not optimized (opt-in model)
|
|
146
|
+
expect(parser.test.parse('a')).to be_truthy
|
|
147
|
+
expect(LegacyParser.optimize_rules?).to be false
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it 'does not break existing test suite' do
|
|
151
|
+
# Run a sample from existing tests to ensure compatibility
|
|
152
|
+
parser = str('hello').repeat(1, 1)
|
|
153
|
+
expect(parser.parse('hello')).to eq('hello')
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
context 'combined optimizations' do
|
|
158
|
+
class CombinedOptParser < Parsanol::Parser
|
|
159
|
+
optimize_rules!
|
|
160
|
+
|
|
161
|
+
rule(:combined) {
|
|
162
|
+
# Has both quantifier and sequence issues
|
|
163
|
+
(str('h') >> str('e') >> str('l') >> str('l') >> str('o')).repeat(1, 1) >>
|
|
164
|
+
str(' ') >>
|
|
165
|
+
(str('w') >> str('o') >> str('r') >> str('l') >> str('d')).repeat(1, 1)
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
root :combined
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
it 'applies both quantifier and sequence optimizations' do
|
|
172
|
+
parser = CombinedOptParser.new
|
|
173
|
+
# Should merge strings and unwrap repeat(1,1)
|
|
174
|
+
# Original: (Str('h') >> Str('e') >> ... >> Str('o')).repeat(1,1) >> Str(' ') >> (Str('w') >> ... >> Str('d')).repeat(1,1)
|
|
175
|
+
# After quantifier: Sequence(Str('h'), Str('e'), ..., Str('o')) >> Str(' ') >> Sequence(Str('w'), ..., Str('d'))
|
|
176
|
+
# After sequence: Str('hello') >> Str(' ') >> Str('world')
|
|
177
|
+
# Final merge: Str('hello world')
|
|
178
|
+
|
|
179
|
+
result = parser.combined.parse('hello world')
|
|
180
|
+
expect(result).to eq('hello world')
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
it 'produces same results as manual optimization' do
|
|
184
|
+
parser = CombinedOptParser.new
|
|
185
|
+
|
|
186
|
+
# Manual construction without optimization
|
|
187
|
+
manual = (str('h') >> str('e') >> str('l') >> str('l') >> str('o')).repeat(1, 1) >>
|
|
188
|
+
str(' ') >>
|
|
189
|
+
(str('w') >> str('o') >> str('r') >> str('l') >> str('d')).repeat(1, 1)
|
|
190
|
+
|
|
191
|
+
input = 'hello world'
|
|
192
|
+
expect(parser.combined.parse(input)).to eq(manual.parse(input))
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
context 'edge cases' do
|
|
197
|
+
class EdgeCaseParser < Parsanol::Parser
|
|
198
|
+
optimize_rules!
|
|
199
|
+
|
|
200
|
+
rule(:normal_repeat) {
|
|
201
|
+
str('a').repeat(0, 3) # Should not be simplified
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
rule(:variable_repeat) {
|
|
205
|
+
str('b').repeat(1) # Should not be simplified (unbounded)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
root :normal_repeat
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
it 'does not simplify non-trivial repetitions' do
|
|
212
|
+
parser = EdgeCaseParser.new
|
|
213
|
+
expect(parser.normal_repeat.parse('a')).to be_truthy
|
|
214
|
+
expect(parser.normal_repeat.parse('aa')).to be_truthy
|
|
215
|
+
expect(parser.normal_repeat.parse('aaa')).to be_truthy
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
it 'does not simplify unbounded repetitions' do
|
|
219
|
+
parser = EdgeCaseParser.new
|
|
220
|
+
expect(parser.variable_repeat.parse('b')).to be_truthy
|
|
221
|
+
expect(parser.variable_repeat.parse('bbb')).to be_truthy
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
context 'choice optimizations' do
|
|
226
|
+
class ChoiceOptParser < Parsanol::Parser
|
|
227
|
+
optimize_rules!
|
|
228
|
+
|
|
229
|
+
rule(:duplicate_choices) {
|
|
230
|
+
str('a') | str('b') | str('a') | str('c') | str('b')
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
rule(:nested_alternatives) {
|
|
234
|
+
(str('x') | str('y')) | (str('z') | str('w'))
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
root :duplicate_choices
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
it 'deduplicates alternative choices' do
|
|
241
|
+
parser = ChoiceOptParser.new
|
|
242
|
+
# All three unique options should still parse
|
|
243
|
+
expect(parser.duplicate_choices.parse('a')).to eq('a')
|
|
244
|
+
expect(parser.duplicate_choices.parse('b')).to eq('b')
|
|
245
|
+
expect(parser.duplicate_choices.parse('c')).to eq('c')
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
it 'flattens nested alternatives' do
|
|
249
|
+
parser = ChoiceOptParser.new
|
|
250
|
+
# All four flattened options should parse
|
|
251
|
+
expect(parser.nested_alternatives.parse('x')).to eq('x')
|
|
252
|
+
expect(parser.nested_alternatives.parse('y')).to eq('y')
|
|
253
|
+
expect(parser.nested_alternatives.parse('z')).to eq('z')
|
|
254
|
+
expect(parser.nested_alternatives.parse('w')).to eq('w')
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
context 'lookahead optimizations' do
|
|
259
|
+
class LookaheadOptParser < Parsanol::Parser
|
|
260
|
+
optimize_rules!
|
|
261
|
+
|
|
262
|
+
rule(:double_negation) {
|
|
263
|
+
str('a').absent?.absent? >> str('a')
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
rule(:idempotent_positive) {
|
|
267
|
+
str('b').present?.present? >> str('b')
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
rule(:negative_of_positive) {
|
|
271
|
+
str('c').present?.absent? >> str('d')
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
root :double_negation
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
it 'simplifies double negation !(!x) to &x' do
|
|
278
|
+
parser = LookaheadOptParser.new
|
|
279
|
+
# Double negation becomes positive lookahead
|
|
280
|
+
expect(parser.double_negation.parse('a')).to eq('a')
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
it 'simplifies idempotent positive &(&x) to &x' do
|
|
284
|
+
parser = LookaheadOptParser.new
|
|
285
|
+
# Nested positive lookaheads are idempotent
|
|
286
|
+
expect(parser.idempotent_positive.parse('b')).to eq('b')
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
it 'simplifies negative of positive !(&x) to !x' do
|
|
290
|
+
parser = LookaheadOptParser.new
|
|
291
|
+
# !(&x) becomes !x
|
|
292
|
+
expect(parser.negative_of_positive.parse('d')).to eq('d')
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
context 'all optimizations combined' do
|
|
297
|
+
class AllOptimizationsParser < Parsanol::Parser
|
|
298
|
+
optimize_rules!
|
|
299
|
+
|
|
300
|
+
rule(:everything) {
|
|
301
|
+
# Quantifiers: repeat(1,1)
|
|
302
|
+
# Sequences: adjacent strings
|
|
303
|
+
# Choices: duplicate alternatives
|
|
304
|
+
((str('a') >> str('b')).repeat(1, 1) | (str('a') >> str('b')).repeat(1, 1)) >>
|
|
305
|
+
(str('c') | str('c') | str('d'))
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
rule(:with_lookahead) {
|
|
309
|
+
# Add lookahead optimization test
|
|
310
|
+
str('x').absent?.absent? >> str('y')
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
root :everything
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
it 'applies all four optimizers together' do
|
|
317
|
+
parser = AllOptimizationsParser.new
|
|
318
|
+
# Should optimize:
|
|
319
|
+
# 1. Remove repeat(1,1) with quantifier optimizer
|
|
320
|
+
# 2. Merge str('a') >> str('b') with sequence optimizer
|
|
321
|
+
# 3. Deduplicate alternatives with choice optimizer
|
|
322
|
+
expect(parser.everything.parse('abc')).to be_truthy
|
|
323
|
+
expect(parser.everything.parse('abd')).to be_truthy
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
it 'optimizes lookaheads in combination with other optimizers' do
|
|
327
|
+
parser = AllOptimizationsParser.new
|
|
328
|
+
# 4. Simplify lookaheads: !(!x) becomes &x
|
|
329
|
+
# The other 3 lookahead tests already verify functional correctness
|
|
330
|
+
# This test just confirms lookahead optimization is integrated
|
|
331
|
+
expect(parser).to respond_to(:with_lookahead)
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
end
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Parsanol::Buffer do
|
|
4
|
+
let(:buffer) { described_class.new(capacity: 10) }
|
|
5
|
+
|
|
6
|
+
describe '#initialize' do
|
|
7
|
+
it 'creates buffer with specified capacity' do
|
|
8
|
+
expect(buffer.capacity).to eq(10)
|
|
9
|
+
expect(buffer.size).to eq(0)
|
|
10
|
+
expect(buffer.storage).to be_a(Array)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'uses default capacity if not specified' do
|
|
14
|
+
default_buffer = described_class.new
|
|
15
|
+
expect(default_buffer.capacity).to eq(10)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe '#push' do
|
|
20
|
+
it 'adds elements to buffer' do
|
|
21
|
+
buffer.push("a")
|
|
22
|
+
buffer.push("b")
|
|
23
|
+
|
|
24
|
+
expect(buffer.size).to eq(2)
|
|
25
|
+
expect(buffer[0]).to eq("a")
|
|
26
|
+
expect(buffer[1]).to eq("b")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'returns self for method chaining' do
|
|
30
|
+
result = buffer.push("a")
|
|
31
|
+
expect(result).to eq(buffer)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'grows buffer when capacity is exceeded' do
|
|
35
|
+
original_capacity = buffer.capacity
|
|
36
|
+
|
|
37
|
+
# Fill to capacity
|
|
38
|
+
(original_capacity + 1).times { |i| buffer.push(i) }
|
|
39
|
+
|
|
40
|
+
expect(buffer.size).to eq(original_capacity + 1)
|
|
41
|
+
expect(buffer.capacity).to be > original_capacity
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it 'preserves all elements when growing' do
|
|
45
|
+
11.times { |i| buffer.push(i) }
|
|
46
|
+
|
|
47
|
+
arr = buffer.to_a
|
|
48
|
+
expect(arr.size).to eq(11)
|
|
49
|
+
expect(arr).to eq((0..10).to_a)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
describe '#<<' do
|
|
54
|
+
it 'is an alias for push' do
|
|
55
|
+
buffer << "a" << "b"
|
|
56
|
+
|
|
57
|
+
expect(buffer.size).to eq(2)
|
|
58
|
+
expect(buffer.to_a).to eq(["a", "b"])
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe '#[]' do
|
|
63
|
+
before do
|
|
64
|
+
buffer.push("a")
|
|
65
|
+
buffer.push("b")
|
|
66
|
+
buffer.push("c")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it 'returns element at index' do
|
|
70
|
+
expect(buffer[0]).to eq("a")
|
|
71
|
+
expect(buffer[1]).to eq("b")
|
|
72
|
+
expect(buffer[2]).to eq("c")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'returns nil for out of bounds index' do
|
|
76
|
+
expect(buffer[3]).to be_nil
|
|
77
|
+
expect(buffer[100]).to be_nil
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
describe '#[]=' do
|
|
82
|
+
before do
|
|
83
|
+
buffer.push("a")
|
|
84
|
+
buffer.push("b")
|
|
85
|
+
buffer.push("c")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it 'sets element at index' do
|
|
89
|
+
buffer[1] = "x"
|
|
90
|
+
expect(buffer[1]).to eq("x")
|
|
91
|
+
expect(buffer.to_a).to eq(["a", "x", "c"])
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it 'does not set element beyond size' do
|
|
95
|
+
buffer[10] = "x"
|
|
96
|
+
expect(buffer.size).to eq(3)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
describe '#to_a' do
|
|
101
|
+
it 'returns array of logical size' do
|
|
102
|
+
buffer.push("a")
|
|
103
|
+
buffer.push("b")
|
|
104
|
+
buffer.push("c")
|
|
105
|
+
|
|
106
|
+
arr = buffer.to_a
|
|
107
|
+
expect(arr).to be_a(Array)
|
|
108
|
+
expect(arr.size).to eq(3)
|
|
109
|
+
expect(arr).to eq(["a", "b", "c"])
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it 'returns empty array for empty buffer' do
|
|
113
|
+
expect(buffer.to_a).to eq([])
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it 'returns new array instance' do
|
|
117
|
+
buffer.push("a")
|
|
118
|
+
arr1 = buffer.to_a
|
|
119
|
+
arr2 = buffer.to_a
|
|
120
|
+
|
|
121
|
+
expect(arr1.object_id).not_to eq(arr2.object_id)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
describe '#clear!' do
|
|
126
|
+
before do
|
|
127
|
+
buffer.push("a")
|
|
128
|
+
buffer.push("b")
|
|
129
|
+
buffer.push("c")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
it 'resets size to zero' do
|
|
133
|
+
buffer.clear!
|
|
134
|
+
expect(buffer.size).to eq(0)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
it 'keeps capacity unchanged' do
|
|
138
|
+
original_capacity = buffer.capacity
|
|
139
|
+
buffer.clear!
|
|
140
|
+
expect(buffer.capacity).to eq(original_capacity)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
it 'clears references for GC' do
|
|
144
|
+
buffer.clear!
|
|
145
|
+
expect(buffer.to_a).to eq([])
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
it 'returns self for method chaining' do
|
|
149
|
+
result = buffer.clear!
|
|
150
|
+
expect(result).to eq(buffer)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it 'allows reuse after clear' do
|
|
154
|
+
buffer.clear!
|
|
155
|
+
buffer.push("x")
|
|
156
|
+
buffer.push("y")
|
|
157
|
+
|
|
158
|
+
expect(buffer.size).to eq(2)
|
|
159
|
+
expect(buffer.to_a).to eq(["x", "y"])
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
describe '#empty?' do
|
|
164
|
+
it 'returns true for new buffer' do
|
|
165
|
+
expect(buffer.empty?).to be true
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
it 'returns false after adding elements' do
|
|
169
|
+
buffer.push("a")
|
|
170
|
+
expect(buffer.empty?).to be false
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
it 'returns true after clear' do
|
|
174
|
+
buffer.push("a")
|
|
175
|
+
buffer.clear!
|
|
176
|
+
expect(buffer.empty?).to be true
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
describe '#reset!' do
|
|
181
|
+
it 'is an alias for clear!' do
|
|
182
|
+
buffer.push("a")
|
|
183
|
+
buffer.push("b")
|
|
184
|
+
|
|
185
|
+
buffer.reset!
|
|
186
|
+
|
|
187
|
+
expect(buffer.size).to eq(0)
|
|
188
|
+
expect(buffer.empty?).to be true
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
describe 'buffer reuse' do
|
|
193
|
+
it 'efficiently reuses buffer without reallocation' do
|
|
194
|
+
# First use
|
|
195
|
+
5.times { |i| buffer.push(i) }
|
|
196
|
+
expect(buffer.size).to eq(5)
|
|
197
|
+
original_capacity = buffer.capacity
|
|
198
|
+
|
|
199
|
+
# Clear and reuse
|
|
200
|
+
buffer.clear!
|
|
201
|
+
5.times { |i| buffer.push(i + 10) }
|
|
202
|
+
|
|
203
|
+
expect(buffer.size).to eq(5)
|
|
204
|
+
expect(buffer.capacity).to eq(original_capacity)
|
|
205
|
+
expect(buffer.to_a).to eq([10, 11, 12, 13, 14])
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
describe 'growth strategy' do
|
|
210
|
+
it 'doubles capacity when growing' do
|
|
211
|
+
small_buffer = described_class.new(capacity: 4)
|
|
212
|
+
|
|
213
|
+
# Fill to capacity
|
|
214
|
+
5.times { |i| small_buffer.push(i) }
|
|
215
|
+
|
|
216
|
+
expect(small_buffer.capacity).to eq(8) # 4 * 2
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|