parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Parsanol::ErrorReporter::Deepest do
|
|
4
|
+
let(:reporter) { described_class.new }
|
|
5
|
+
let(:fake_source) { double('source') }
|
|
6
|
+
|
|
7
|
+
describe '#err' do
|
|
8
|
+
before do
|
|
9
|
+
allow(fake_source).to receive(:pos).and_return(13)
|
|
10
|
+
allow(fake_source).to receive(:line_and_column).and_return([1, 1])
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'returns the deepest cause' do
|
|
14
|
+
expect(reporter).to receive(:deepest).and_return(:deepest)
|
|
15
|
+
expect(reporter.err('parslet', fake_source, 'message')).to eq(:deepest)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe '#err_at' do
|
|
20
|
+
before do
|
|
21
|
+
allow(fake_source).to receive(:pos).and_return(13)
|
|
22
|
+
allow(fake_source).to receive(:line_and_column).and_return([1, 1])
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'returns the deepest cause' do
|
|
26
|
+
expect(reporter).to receive(:deepest).and_return(:deepest)
|
|
27
|
+
expect(reporter.err('parslet', fake_source, 'message', 13)).to eq(:deepest)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
describe '#deepest(cause)' do
|
|
32
|
+
def fake_cause(pos = 13, children = nil)
|
|
33
|
+
double('cause' + pos.to_s, pos: pos, children: children)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
context 'when there is no deepest cause yet' do
|
|
37
|
+
let(:cause) { fake_cause }
|
|
38
|
+
|
|
39
|
+
it 'returns the given cause' do
|
|
40
|
+
reporter.deepest(cause).should == cause
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
context 'when the previous cause is deeper (no relationship)' do
|
|
45
|
+
let(:previous) { fake_cause }
|
|
46
|
+
|
|
47
|
+
before do
|
|
48
|
+
reporter.deepest(previous)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it 'returns the previous cause' do
|
|
52
|
+
reporter.deepest(fake_cause(12))
|
|
53
|
+
.should == previous
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
context 'when the previous cause is deeper (child)' do
|
|
58
|
+
let(:previous) { fake_cause }
|
|
59
|
+
|
|
60
|
+
before do
|
|
61
|
+
reporter.deepest(previous)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it 'returns the given cause' do
|
|
65
|
+
given = fake_cause(12, [previous])
|
|
66
|
+
reporter.deepest(given).should == given
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
context 'when the previous cause is shallower' do
|
|
71
|
+
before do
|
|
72
|
+
reporter.deepest(fake_cause)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'stores the cause as deepest' do
|
|
76
|
+
deeper = fake_cause(14)
|
|
77
|
+
reporter.deepest(deeper)
|
|
78
|
+
reporter.deepest_cause.should == deeper
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Parsanol::Parser, "exporting to other lingos" do
|
|
4
|
+
class MiniLisp < Parsanol::Parser
|
|
5
|
+
root :expression
|
|
6
|
+
rule(:expression) {
|
|
7
|
+
space? >> str('(') >> space? >> body >> str(')')
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
rule(:body) {
|
|
11
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
rule(:space) {
|
|
15
|
+
match('\s').repeat(1)
|
|
16
|
+
}
|
|
17
|
+
rule(:space?) {
|
|
18
|
+
space.maybe
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
rule(:identifier) {
|
|
22
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
rule(:float) {
|
|
26
|
+
(
|
|
27
|
+
integer >> (
|
|
28
|
+
str('.') >> match('[0-9]').repeat(1) |
|
|
29
|
+
str('e') >> match('[0-9]').repeat(1)
|
|
30
|
+
).as(:e)
|
|
31
|
+
).as(:float) >> space?
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
rule(:integer) {
|
|
35
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
rule(:string) {
|
|
39
|
+
str('"') >> (
|
|
40
|
+
str('\\') >> any |
|
|
41
|
+
str('"').absent? >> any
|
|
42
|
+
).repeat.as(:string) >> str('"') >> space?
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# I only update the files once I've verified the new syntax to work with
|
|
47
|
+
# the respective tools. This is more an acceptance test than a real spec.
|
|
48
|
+
|
|
49
|
+
describe "<- #to_citrus" do
|
|
50
|
+
let(:citrus) { File.read(
|
|
51
|
+
File.join(File.dirname(__FILE__), 'minilisp.citrus'))
|
|
52
|
+
}
|
|
53
|
+
it "should be valid citrus syntax" do
|
|
54
|
+
# puts MiniLisp.new.to_citrus
|
|
55
|
+
MiniLisp.new.to_citrus.should == citrus
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
describe "<- #to_treetop" do
|
|
59
|
+
let(:treetop) { File.read(
|
|
60
|
+
File.join(File.dirname(__FILE__), 'minilisp.tt'))
|
|
61
|
+
}
|
|
62
|
+
it "should be valid treetop syntax" do
|
|
63
|
+
# puts MiniLisp.new.to_treetop
|
|
64
|
+
MiniLisp.new.to_treetop.should == treetop
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
require 'parsanol/parslet'
|
|
4
|
+
require 'parsanol/expression/treetop'
|
|
5
|
+
|
|
6
|
+
describe Parsanol::Expression::Treetop do
|
|
7
|
+
include Parsanol
|
|
8
|
+
|
|
9
|
+
describe "positive samples" do
|
|
10
|
+
[ # pattern # input
|
|
11
|
+
"'abc'", 'abc',
|
|
12
|
+
"...", 'abc',
|
|
13
|
+
"[1-4]", '3',
|
|
14
|
+
|
|
15
|
+
"'abc'?", 'abc',
|
|
16
|
+
"'abc'?", '',
|
|
17
|
+
|
|
18
|
+
"('abc')", 'abc',
|
|
19
|
+
|
|
20
|
+
"'a' 'b'", 'ab',
|
|
21
|
+
"'a' ('b')", 'ab',
|
|
22
|
+
|
|
23
|
+
"'a' / 'b'", 'a',
|
|
24
|
+
"'a' / 'b'", 'b',
|
|
25
|
+
|
|
26
|
+
"'a'*", 'aaa',
|
|
27
|
+
"'a'*", '',
|
|
28
|
+
|
|
29
|
+
"'a'+", 'aa',
|
|
30
|
+
"'a'+", 'a',
|
|
31
|
+
|
|
32
|
+
"'a'{1,2}", 'a',
|
|
33
|
+
"'a'{1,2}", 'aa',
|
|
34
|
+
|
|
35
|
+
"'a'{1,}", 'a',
|
|
36
|
+
"'a'{1,}", 'aa',
|
|
37
|
+
|
|
38
|
+
"'a'{,2}", '',
|
|
39
|
+
"'a'{,2}", 'a',
|
|
40
|
+
"'a'{,2}", 'aa',
|
|
41
|
+
].each_slice(2) do |pattern, input|
|
|
42
|
+
context "exp(#{pattern.inspect})" do
|
|
43
|
+
let(:parslet) { exp(pattern) }
|
|
44
|
+
subject { parslet }
|
|
45
|
+
it { should parse(input) }
|
|
46
|
+
context "string representation" do
|
|
47
|
+
subject { exp(parslet.to_s) }
|
|
48
|
+
it { should parse(input, :trace => true) }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
describe "negative samples" do
|
|
54
|
+
[ # pattern # input
|
|
55
|
+
"'abc'", 'cba',
|
|
56
|
+
"[1-4]", '5',
|
|
57
|
+
|
|
58
|
+
"'a' / 'b'", 'c',
|
|
59
|
+
|
|
60
|
+
"'a'+", '',
|
|
61
|
+
|
|
62
|
+
"'a'{1,2}", '',
|
|
63
|
+
"'a'{1,2}", 'aaa',
|
|
64
|
+
|
|
65
|
+
"'a'{1,}", '',
|
|
66
|
+
|
|
67
|
+
"'a'{,2}", 'aaa',
|
|
68
|
+
].each_slice(2) do |pattern, input|
|
|
69
|
+
context "exp(#{pattern.inspect})" do
|
|
70
|
+
subject { exp(pattern) }
|
|
71
|
+
it { should_not parse(input) }
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe "FIRST set computation" do
|
|
4
|
+
include Parsanol
|
|
5
|
+
|
|
6
|
+
describe "Str atom" do
|
|
7
|
+
it "returns itself as FIRST set" do
|
|
8
|
+
atom = str('foo')
|
|
9
|
+
first = atom.first_set
|
|
10
|
+
expect(first.size).to eq(1)
|
|
11
|
+
expect(first.first).to be_a(Parsanol::Atoms::Str)
|
|
12
|
+
expect(first.first.str).to eq('foo')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "different strings have different FIRST sets" do
|
|
16
|
+
atom1 = str('foo')
|
|
17
|
+
atom2 = str('bar')
|
|
18
|
+
first1 = atom1.first_set
|
|
19
|
+
first2 = atom2.first_set
|
|
20
|
+
expect(first1.to_a & first2.to_a).to be_empty
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe "Re atom" do
|
|
25
|
+
it "returns itself as FIRST set" do
|
|
26
|
+
atom = match('[a-z]')
|
|
27
|
+
first = atom.first_set
|
|
28
|
+
expect(first.size).to eq(1)
|
|
29
|
+
expect(first.first).to be_a(Parsanol::Atoms::Re)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
describe "Sequence atom" do
|
|
34
|
+
it "returns FIRST of first element" do
|
|
35
|
+
atom = str('a') >> str('b')
|
|
36
|
+
first = atom.first_set
|
|
37
|
+
expect(first.size).to eq(1)
|
|
38
|
+
expect(first.first).to be_a(Parsanol::Atoms::Str)
|
|
39
|
+
expect(first.first.str).to eq('a')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it "handles sequences of more than 2 elements" do
|
|
43
|
+
# Note: Due to Phase 24 string concatenation optimization,
|
|
44
|
+
# str('x') >> str('y') >> str('z') becomes str('xyz')
|
|
45
|
+
atom = str('x') >> str('y') >> str('z')
|
|
46
|
+
first = atom.first_set
|
|
47
|
+
expect(first.size).to eq(1)
|
|
48
|
+
expect(first.first.str).to eq('xyz') # Optimized to single string
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "propagates through EPSILON when first element can match empty" do
|
|
52
|
+
# This test would require a .maybe or similar
|
|
53
|
+
atom = str('a').maybe >> str('b')
|
|
54
|
+
first = atom.first_set
|
|
55
|
+
# Should include both 'a' and 'b' since 'a'.maybe can match empty
|
|
56
|
+
expect(first.size).to eq(2)
|
|
57
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
|
|
58
|
+
expect(strs).to include('a', 'b')
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe "Alternative atom" do
|
|
63
|
+
it "returns union of all alternatives" do
|
|
64
|
+
atom = str('a') | str('b')
|
|
65
|
+
first = atom.first_set
|
|
66
|
+
expect(first.size).to eq(2)
|
|
67
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
|
|
68
|
+
expect(strs).to contain_exactly('a', 'b')
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "handles three alternatives" do
|
|
72
|
+
atom = str('x') | str('y') | str('z')
|
|
73
|
+
first = atom.first_set
|
|
74
|
+
expect(first.size).to eq(3)
|
|
75
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
|
|
76
|
+
expect(strs).to contain_exactly('x', 'y', 'z')
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "detects disjoint FIRST sets" do
|
|
80
|
+
atom = str('if') | str('while') | str('for')
|
|
81
|
+
first = atom.first_set
|
|
82
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
|
|
83
|
+
# All three keywords are disjoint
|
|
84
|
+
expect(strs.size).to eq(3)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
describe "Repetition atom" do
|
|
89
|
+
it "includes EPSILON for min=0 (maybe)" do
|
|
90
|
+
atom = str('a').maybe
|
|
91
|
+
first = atom.first_set
|
|
92
|
+
expect(first).to include(Parsanol::FirstSet::EPSILON)
|
|
93
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }
|
|
94
|
+
expect(strs.size).to eq(1)
|
|
95
|
+
expect(strs.first.str).to eq('a')
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it "includes EPSILON for min=0 (repeat)" do
|
|
99
|
+
atom = str('a').repeat(0, 3)
|
|
100
|
+
first = atom.first_set
|
|
101
|
+
expect(first).to include(Parsanol::FirstSet::EPSILON)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
it "does not include EPSILON for min=1" do
|
|
105
|
+
atom = str('a').repeat(1, 3)
|
|
106
|
+
first = atom.first_set
|
|
107
|
+
expect(first).not_to include(Parsanol::FirstSet::EPSILON)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "includes parslet's FIRST set" do
|
|
111
|
+
atom = str('x').repeat(0, 5)
|
|
112
|
+
first = atom.first_set
|
|
113
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }
|
|
114
|
+
expect(strs.first.str).to eq('x')
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
describe "Lookahead atom" do
|
|
119
|
+
it "returns EPSILON for positive lookahead" do
|
|
120
|
+
atom = str('foo').present?
|
|
121
|
+
first = atom.first_set
|
|
122
|
+
expect(first).to eq(Set.new([Parsanol::FirstSet::EPSILON]))
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
it "returns EPSILON for negative lookahead" do
|
|
126
|
+
atom = str('foo').absent?
|
|
127
|
+
first = atom.first_set
|
|
128
|
+
expect(first).to eq(Set.new([Parsanol::FirstSet::EPSILON]))
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
describe "Named atom" do
|
|
133
|
+
it "delegates to wrapped parslet" do
|
|
134
|
+
atom = str('hello').as(:greeting)
|
|
135
|
+
first = atom.first_set
|
|
136
|
+
expect(first.size).to eq(1)
|
|
137
|
+
expect(first.first).to be_a(Parsanol::Atoms::Str)
|
|
138
|
+
expect(first.first.str).to eq('hello')
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
describe "Complex grammars" do
|
|
143
|
+
it "computes FIRST for statement-like pattern" do
|
|
144
|
+
# Simulates: if_stmt | while_stmt | print_stmt
|
|
145
|
+
atom = str('if') | str('while') | str('print')
|
|
146
|
+
first = atom.first_set
|
|
147
|
+
strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
|
|
148
|
+
expect(strs).to contain_exactly('if', 'while', 'print')
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
it "computes FIRST for expression-like pattern" do
|
|
152
|
+
# Simulates: '(' expr ')' | number
|
|
153
|
+
# Note: str('(') >> str('x') >> str(')') gets optimized to str('(x)')
|
|
154
|
+
# by Phase 24 string concatenation
|
|
155
|
+
atom = (str('(') >> match('[a-z]') >> str(')')) | match('[0-9]')
|
|
156
|
+
first = atom.first_set
|
|
157
|
+
# FIRST should include '(' and [0-9]
|
|
158
|
+
expect(first.size).to eq(2)
|
|
159
|
+
has_paren = first.any? { |x| x.is_a?(Parsanol::Atoms::Str) && x.str == '(' }
|
|
160
|
+
has_digit = first.any? { |x| x.is_a?(Parsanol::Atoms::Re) }
|
|
161
|
+
expect(has_paren).to be true
|
|
162
|
+
expect(has_digit).to be true
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
describe "FIRST set caching" do
|
|
167
|
+
it "caches computed FIRST sets" do
|
|
168
|
+
atom = str('test')
|
|
169
|
+
first1 = atom.first_set
|
|
170
|
+
first2 = atom.first_set
|
|
171
|
+
# Should return same object (cached)
|
|
172
|
+
expect(first1.object_id).to eq(first2.object_id)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
it "can clear cache" do
|
|
176
|
+
atom = str('test')
|
|
177
|
+
first1 = atom.first_set
|
|
178
|
+
atom.clear_first_set_cache
|
|
179
|
+
first2 = atom.first_set
|
|
180
|
+
# After clearing, should compute fresh (different object)
|
|
181
|
+
expect(first1.object_id).not_to eq(first2.object_id)
|
|
182
|
+
# But content should be same
|
|
183
|
+
expect(first1).to eq(first2)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
describe "Disjoint detection (for cut operator insertion)" do
|
|
188
|
+
it "detects disjoint alternatives" do
|
|
189
|
+
alt1 = str('if')
|
|
190
|
+
alt2 = str('while')
|
|
191
|
+
first1 = alt1.first_set
|
|
192
|
+
first2 = alt2.first_set
|
|
193
|
+
# Disjoint: intersection is empty
|
|
194
|
+
expect(first1.to_a & first2.to_a).to be_empty
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
it "detects overlapping alternatives" do
|
|
198
|
+
# Both start with 'a'
|
|
199
|
+
alt1 = str('apple')
|
|
200
|
+
alt2 = str('apricot')
|
|
201
|
+
first1 = alt1.first_set
|
|
202
|
+
first2 = alt2.first_set
|
|
203
|
+
# Not disjoint - but note: str atoms are compared by identity
|
|
204
|
+
# so these will appear disjoint even though strings start same
|
|
205
|
+
# This is conservative and safe for cut insertion
|
|
206
|
+
expect(first1.to_a & first2.to_a).to be_empty
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
it "handles regex overlaps conservatively" do
|
|
210
|
+
alt1 = match('[a-z]')
|
|
211
|
+
alt2 = match('[A-Z]')
|
|
212
|
+
first1 = alt1.first_set
|
|
213
|
+
first2 = alt2.first_set
|
|
214
|
+
# Different Re objects are treated as potentially overlapping
|
|
215
|
+
# (conservative approach)
|
|
216
|
+
expect(first1.to_a & first2.to_a).to be_empty
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
describe "Parsanol::FirstSet class methods" do
|
|
221
|
+
describe ".disjoint?" do
|
|
222
|
+
it "returns true for disjoint sets" do
|
|
223
|
+
set1 = Set.new([str('if')])
|
|
224
|
+
set2 = Set.new([str('while')])
|
|
225
|
+
expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
it "returns false for overlapping sets" do
|
|
229
|
+
atom = str('same')
|
|
230
|
+
set1 = Set.new([atom])
|
|
231
|
+
set2 = Set.new([atom])
|
|
232
|
+
expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be false
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
it "ignores EPSILON when checking disjointness" do
|
|
236
|
+
set1 = Set.new([str('a'), Parsanol::FirstSet::EPSILON])
|
|
237
|
+
set2 = Set.new([str('b'), Parsanol::FirstSet::EPSILON])
|
|
238
|
+
# Should be disjoint despite both having EPSILON
|
|
239
|
+
expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
it "ignores nil when checking disjointness" do
|
|
243
|
+
set1 = Set.new([str('a'), nil])
|
|
244
|
+
set2 = Set.new([str('b'), nil])
|
|
245
|
+
# Should be disjoint despite both having nil
|
|
246
|
+
expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
it "returns true for empty sets" do
|
|
250
|
+
set1 = Set.new([Parsanol::FirstSet::EPSILON])
|
|
251
|
+
set2 = Set.new([str('a')])
|
|
252
|
+
# set1 is empty after removing EPSILON
|
|
253
|
+
expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
describe ".all_disjoint?" do
|
|
258
|
+
it "returns true for mutually disjoint sets" do
|
|
259
|
+
sets = [
|
|
260
|
+
Set.new([str('if')]),
|
|
261
|
+
Set.new([str('while')]),
|
|
262
|
+
Set.new([str('print')])
|
|
263
|
+
]
|
|
264
|
+
expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
it "returns false when any two sets overlap" do
|
|
268
|
+
atom = str('same')
|
|
269
|
+
sets = [
|
|
270
|
+
Set.new([str('if')]),
|
|
271
|
+
Set.new([atom]),
|
|
272
|
+
Set.new([atom])
|
|
273
|
+
]
|
|
274
|
+
expect(Parsanol::FirstSet.all_disjoint?(sets)).to be false
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
it "returns true for less than 2 sets" do
|
|
278
|
+
sets = [Set.new([str('a')])]
|
|
279
|
+
expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
it "returns true for empty array" do
|
|
283
|
+
sets = []
|
|
284
|
+
expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it "handles sets with EPSILON correctly" do
|
|
288
|
+
sets = [
|
|
289
|
+
Set.new([str('a'), Parsanol::FirstSet::EPSILON]),
|
|
290
|
+
Set.new([str('b'), Parsanol::FirstSet::EPSILON]),
|
|
291
|
+
Set.new([str('c'), Parsanol::FirstSet::EPSILON])
|
|
292
|
+
]
|
|
293
|
+
# All disjoint despite all having EPSILON
|
|
294
|
+
expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|