parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module ErrorReporter
|
|
5
|
+
|
|
6
|
+
# A reporter that tries to improve on the deepest error reporter by
|
|
7
|
+
# using heuristics to find the most relevant error and provide more
|
|
8
|
+
# context.
|
|
9
|
+
# The heuristic chooses the deepest error when parsing a sequence for which
|
|
10
|
+
# no alternative parsed successfully.
|
|
11
|
+
#
|
|
12
|
+
# Given the following parser:
|
|
13
|
+
#
|
|
14
|
+
# root(:call)
|
|
15
|
+
#
|
|
16
|
+
# rule(:call, label: 'call') {
|
|
17
|
+
# identifier >> str('.') >> method
|
|
18
|
+
# }
|
|
19
|
+
#
|
|
20
|
+
# rule(:method, label: 'method call') {
|
|
21
|
+
# identifier >> str('(') >> arguments.maybe >> str(')')
|
|
22
|
+
# }
|
|
23
|
+
#
|
|
24
|
+
# rule(:identifier, label: 'identifier') {
|
|
25
|
+
# match['[:alnum:]'].repeat(1)
|
|
26
|
+
# }
|
|
27
|
+
#
|
|
28
|
+
# rule(:arguments, label: 'method call arguments') {
|
|
29
|
+
# argument >> str(',') >> arguments | argument
|
|
30
|
+
# }
|
|
31
|
+
#
|
|
32
|
+
# rule(:argument) {
|
|
33
|
+
# call | identifier
|
|
34
|
+
# }
|
|
35
|
+
#
|
|
36
|
+
# and the following source:
|
|
37
|
+
#
|
|
38
|
+
# foo.bar(a,goo.baz(),c,)
|
|
39
|
+
#
|
|
40
|
+
# The contextual reporter returns the following causes:
|
|
41
|
+
#
|
|
42
|
+
# 0: Failed to match sequence (identifier '.' method call) at line 1 char 5
|
|
43
|
+
# when parsing method call arguments.
|
|
44
|
+
# 1: Failed to match sequence (identifier '(' method call arguments? ')') at
|
|
45
|
+
# line 1 char 22 when parsing method call arguments.
|
|
46
|
+
# 2: Failed to match [[:alnum:]] at line 1 char 23 when parsing method call
|
|
47
|
+
# arguments.
|
|
48
|
+
#
|
|
49
|
+
# (where 2 is a child cause of 1 and 1 a child cause of 0)
|
|
50
|
+
#
|
|
51
|
+
# The last piece used by the reporter is the (newly introduced) ability
|
|
52
|
+
# to attach a label to rules that describe a sequence in the grammar. The
|
|
53
|
+
# labels are used in two places:
|
|
54
|
+
# - In the "to_s" of Atom::Base so that any error message uses labels to
|
|
55
|
+
# refer to atoms
|
|
56
|
+
# - In the cause error messages to give information about which expression
|
|
57
|
+
# failed to parse
|
|
58
|
+
#
|
|
59
|
+
class Contextual < Deepest
|
|
60
|
+
|
|
61
|
+
def initialize
|
|
62
|
+
@last_reset_pos = 0
|
|
63
|
+
reset
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# A sequence expression successfully parsed, reset all errors reported
|
|
67
|
+
# for previous expressions in the sequence (an alternative matched)
|
|
68
|
+
# Only reset errors if the position of the source that matched is higher
|
|
69
|
+
# than the position of the source that was last successful (so we keep
|
|
70
|
+
# errors that are the "deepest" but for which no alternative succeeded)
|
|
71
|
+
#
|
|
72
|
+
def succ(source)
|
|
73
|
+
source_pos = source.pos.bytepos
|
|
74
|
+
return if source_pos < @last_reset_pos
|
|
75
|
+
@last_reset_pos = source_pos
|
|
76
|
+
reset
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Reset deepest error and its position and sequence index
|
|
80
|
+
#
|
|
81
|
+
def reset
|
|
82
|
+
@deepest_cause = nil
|
|
83
|
+
@label_pos = -1
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Produces an error cause that combines the message at the current level
|
|
87
|
+
# with the errors that happened at a level below (children).
|
|
88
|
+
# Compute and set label used by Cause to produce error message.
|
|
89
|
+
#
|
|
90
|
+
# @param atom [Parsanol::Atoms::Base] parslet that failed
|
|
91
|
+
# @param source [Source] Source that we're using for this parse. (line
|
|
92
|
+
# number information...)
|
|
93
|
+
# @param message [String, Array] Error message at this level.
|
|
94
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
|
95
|
+
# @return [Cause] An error tree combining children with message.
|
|
96
|
+
#
|
|
97
|
+
def err(atom, source, message, children=nil)
|
|
98
|
+
cause = super(atom, source, message, children)
|
|
99
|
+
if (label = atom.respond_to?(:label) && atom.label)
|
|
100
|
+
update_label(label, source.pos.bytepos)
|
|
101
|
+
cause.set_label(@label)
|
|
102
|
+
end
|
|
103
|
+
cause
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Update error message label if given label is more relevant.
|
|
107
|
+
# A label is more relevant if the position of the matched source is
|
|
108
|
+
# bigger.
|
|
109
|
+
#
|
|
110
|
+
# @param label [String] label to apply if more relevant
|
|
111
|
+
# @param bytepos [Integer] position in source code of matched source
|
|
112
|
+
#
|
|
113
|
+
def update_label(label, bytepos)
|
|
114
|
+
if bytepos >= @label_pos
|
|
115
|
+
@label_pos = bytepos
|
|
116
|
+
@label = label
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module ErrorReporter
|
|
5
|
+
# Instead of reporting the latest error that happens like {Tree} does,
|
|
6
|
+
# this class reports the deepest error. Depth is defined here as how
|
|
7
|
+
# advanced into the input an error happens. The errors close to the
|
|
8
|
+
# greatest depth tend to be more relevant to the end user, since they
|
|
9
|
+
# specify what could be done to make them go away.
|
|
10
|
+
#
|
|
11
|
+
# More specifically, errors produced by this reporter won't be related to
|
|
12
|
+
# the structure of the grammar at all. The positions of the errors will
|
|
13
|
+
# be advanced and convey at every grammar level what the deepest rule
|
|
14
|
+
# was to fail.
|
|
15
|
+
#
|
|
16
|
+
# @example Using Deepest reporter
|
|
17
|
+
# parser.parse(input, reporter: Parsanol::ErrorReporter::Deepest.new)
|
|
18
|
+
#
|
|
19
|
+
class Deepest < Base
|
|
20
|
+
def initialize
|
|
21
|
+
@deepest_cause = nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Produces an error cause that combines the message at the current level
|
|
25
|
+
# with the errors that happened at a level below (children).
|
|
26
|
+
#
|
|
27
|
+
# @param atom [Parsanol::Atoms::Base] parslet that failed
|
|
28
|
+
# @param source [Source] Source that we're using for this parse. (line
|
|
29
|
+
# number information...)
|
|
30
|
+
# @param message [String, Array] Error message at this level.
|
|
31
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
|
32
|
+
# @return [Cause] An error tree combining children with message.
|
|
33
|
+
#
|
|
34
|
+
def err(atom, source, message, children = nil)
|
|
35
|
+
position = source.pos
|
|
36
|
+
cause = Cause.format(source, position, message, children)
|
|
37
|
+
return deepest(cause)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Produces an error cause that combines the message at the current level
|
|
41
|
+
# with the errors that happened at a level below (children).
|
|
42
|
+
#
|
|
43
|
+
# @param atom [Parsanol::Atoms::Base] parslet that failed
|
|
44
|
+
# @param source [Source] Source that we're using for this parse. (line
|
|
45
|
+
# number information...)
|
|
46
|
+
# @param message [String, Array] Error message at this level.
|
|
47
|
+
# @param pos [Fixnum] The real position of the error.
|
|
48
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
|
49
|
+
# @return [Cause] An error tree combining children with message.
|
|
50
|
+
#
|
|
51
|
+
def err_at(atom, source, message, pos, children = nil)
|
|
52
|
+
position = pos
|
|
53
|
+
cause = Cause.format(source, position, message, children)
|
|
54
|
+
return deepest(cause)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns the cause that is currently deepest. Mainly for specs.
|
|
58
|
+
|
|
59
|
+
# Notification that an expression successfully parsed
|
|
60
|
+
# not used, see ErrorReporter::Contextual
|
|
61
|
+
def succ(source)
|
|
62
|
+
# No-op for Deepest reporter
|
|
63
|
+
end
|
|
64
|
+
#
|
|
65
|
+
attr_reader :deepest_cause
|
|
66
|
+
|
|
67
|
+
# Checks to see if the lineage of the cause given includes a cause with
|
|
68
|
+
# an error position deeper than the current deepest cause stored. If
|
|
69
|
+
# yes, it passes the cause through to the caller. If no, it returns the
|
|
70
|
+
# current deepest error that was saved as a reference.
|
|
71
|
+
#
|
|
72
|
+
def deepest(cause)
|
|
73
|
+
_, leaf = deepest_child(cause)
|
|
74
|
+
|
|
75
|
+
if !deepest_cause || leaf.pos >= deepest_cause.pos
|
|
76
|
+
# This error reaches deeper into the input, save it as reference.
|
|
77
|
+
@deepest_cause = leaf
|
|
78
|
+
return cause
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
return deepest_cause
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
# Returns the leaf from a given error tree with the biggest rank.
|
|
86
|
+
#
|
|
87
|
+
def deepest_child(cause, rank = 0)
|
|
88
|
+
max_child = cause
|
|
89
|
+
max_rank = rank
|
|
90
|
+
|
|
91
|
+
if cause.children && !cause.children.empty?
|
|
92
|
+
cause.children.each do |child|
|
|
93
|
+
c_rank, c_cause = deepest_child(child, rank + 1)
|
|
94
|
+
|
|
95
|
+
if c_rank > max_rank
|
|
96
|
+
max_rank = c_rank
|
|
97
|
+
max_child = c_cause
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
return max_rank, max_child
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module ErrorReporter
|
|
5
|
+
# An error reporter has two central methods, one for reporting errors at
|
|
6
|
+
# the current parse position (#err) and one for reporting errors at a
|
|
7
|
+
# given parse position (#err_at). The reporter can return an object (a
|
|
8
|
+
# 'cause') that will be returned to the caller along with the information
|
|
9
|
+
# that the parse failed.
|
|
10
|
+
#
|
|
11
|
+
# When reporting errors on the outer levels of your parser, these methods
|
|
12
|
+
# get passed a list of error objects ('causes') from the inner levels. In
|
|
13
|
+
# this default implementation, the inner levels are considered error
|
|
14
|
+
# subtrees and are appended to the generated tree node at each level,
|
|
15
|
+
# thereby constructing an error tree.
|
|
16
|
+
#
|
|
17
|
+
# This error tree will report in parallel with the grammar structure that
|
|
18
|
+
# failed. A one-to-one correspondence exists between each error in the
|
|
19
|
+
# tree and the parslet atom that produced that error.
|
|
20
|
+
#
|
|
21
|
+
# The implementor is really free to use these return values as he sees
|
|
22
|
+
# fit. One example would be to return an error state object from these
|
|
23
|
+
# methods that is then updated as errors cascade up the parse derivation
|
|
24
|
+
# tree.
|
|
25
|
+
#
|
|
26
|
+
# @example Using Tree reporter
|
|
27
|
+
# parser.parse(input, reporter: Parsanol::ErrorReporter::Tree.new)
|
|
28
|
+
#
|
|
29
|
+
class Tree < Base
|
|
30
|
+
# Produces an error cause that combines the message at the current level
|
|
31
|
+
# with the errors that happened at a level below (children).
|
|
32
|
+
#
|
|
33
|
+
# @param atom [Parsanol::Atoms::Base] parslet that failed
|
|
34
|
+
# @param source [Source] Source that we're using for this parse. (line
|
|
35
|
+
# number information...)
|
|
36
|
+
# @param message [String, Array] Error message at this level.
|
|
37
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
|
38
|
+
# @return [Cause] An error tree combining children with message.
|
|
39
|
+
#
|
|
40
|
+
def err(atom, source, message, children = nil)
|
|
41
|
+
position = source.pos
|
|
42
|
+
Cause.format(source, position, message, children)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Produces an error cause that combines the message at the current level
|
|
46
|
+
# with the errors that happened at a level below (children).
|
|
47
|
+
#
|
|
48
|
+
# @param atom [Parsanol::Atoms::Base] parslet that failed
|
|
49
|
+
# @param source [Source] Source that we're using for this parse. (line
|
|
50
|
+
# number information...)
|
|
51
|
+
# @param message [String, Array] Error message at this level.
|
|
52
|
+
# @param pos [Fixnum] The real position of the error.
|
|
53
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
|
54
|
+
# @return [Cause] An error tree combining children with message.
|
|
55
|
+
#
|
|
56
|
+
def err_at(atom, source, message, pos, children = nil)
|
|
57
|
+
position = pos
|
|
58
|
+
Cause.format(source, position, message, children)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Notification that an expression successfully parsed
|
|
62
|
+
# not used, see ErrorReporter::Contextual
|
|
63
|
+
def succ(source)
|
|
64
|
+
# No-op for Tree reporter
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# A namespace for all error reporters.
|
|
4
|
+
#
|
|
5
|
+
# Error reporters collect and format parse errors. The parsing engine
|
|
6
|
+
# calls reporter methods as it attempts to match atoms, building up
|
|
7
|
+
# an error structure that can be presented to the user.
|
|
8
|
+
#
|
|
9
|
+
# @example Using a specific error reporter
|
|
10
|
+
# parser = MyParser.new
|
|
11
|
+
# parser.parse(input, reporter: Parsanol::ErrorReporter::Deepest.new)
|
|
12
|
+
#
|
|
13
|
+
# @example Creating a custom error reporter
|
|
14
|
+
# class MyReporter < Parsanol::ErrorReporter::Base
|
|
15
|
+
# def initialize
|
|
16
|
+
# @errors = []
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# def err(atom, source, message, children = nil)
|
|
20
|
+
# @errors << { position: source.pos, message: message }
|
|
21
|
+
# @errors.last
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# def err_at(atom, source, message, pos, children = nil)
|
|
25
|
+
# @errors << { position: pos, message: message }
|
|
26
|
+
# @errors.last
|
|
27
|
+
# end
|
|
28
|
+
# end
|
|
29
|
+
#
|
|
30
|
+
module Parsanol::ErrorReporter
|
|
31
|
+
# Base class for error reporters.
|
|
32
|
+
#
|
|
33
|
+
# Error reporters collect and format parse errors. The parsing engine
|
|
34
|
+
# calls reporter methods as it attempts to match atoms, building up
|
|
35
|
+
# an error structure that can be presented to the user.
|
|
36
|
+
#
|
|
37
|
+
# Subclasses must implement {#err} and {#err_at} methods.
|
|
38
|
+
#
|
|
39
|
+
class Base
|
|
40
|
+
# Report an error at the current parse position.
|
|
41
|
+
#
|
|
42
|
+
# @param atom [Parsanol::Atoms::Base] The atom that failed to match
|
|
43
|
+
# @param source [Parsanol::Source] The input source
|
|
44
|
+
# @param message [String, Array<String>] Error message(s)
|
|
45
|
+
# @param children [Array<Cause>, nil] Child errors from deeper levels
|
|
46
|
+
# @return [Object] An error cause object (implementation-specific)
|
|
47
|
+
#
|
|
48
|
+
# @abstract Subclasses must implement this method
|
|
49
|
+
#
|
|
50
|
+
def err(atom, source, message, children = nil)
|
|
51
|
+
raise NotImplementedError,
|
|
52
|
+
"Error reporters must implement #err(atom, source, message, children)"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Report an error at a specific position.
|
|
56
|
+
#
|
|
57
|
+
# @param atom [Parsanol::Atoms::Base] The atom that failed to match
|
|
58
|
+
# @param source [Parsanol::Source] The input source
|
|
59
|
+
# @param message [String, Array<String>] Error message(s)
|
|
60
|
+
# @param pos [Integer] The byte position of the error
|
|
61
|
+
# @param children [Array<Cause>, nil] Child errors from deeper levels
|
|
62
|
+
# @return [Object] An error cause object (implementation-specific)
|
|
63
|
+
#
|
|
64
|
+
# @abstract Subclasses must implement this method
|
|
65
|
+
#
|
|
66
|
+
def err_at(atom, source, message, pos, children = nil)
|
|
67
|
+
raise NotImplementedError,
|
|
68
|
+
"Error reporters must implement #err_at(atom, source, message, pos, children)"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Called when an expression successfully parses.
|
|
72
|
+
#
|
|
73
|
+
# This method allows reporters to track successful parses for
|
|
74
|
+
# better error context. The default implementation does nothing.
|
|
75
|
+
#
|
|
76
|
+
# @param source [Parsanol::Source] The input source at success position
|
|
77
|
+
# @return [void]
|
|
78
|
+
#
|
|
79
|
+
def succ(source)
|
|
80
|
+
# Default: no-op
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Called after parse completes for finalization.
|
|
84
|
+
#
|
|
85
|
+
# Override this method to perform cleanup or generate final reports.
|
|
86
|
+
# The default implementation does nothing.
|
|
87
|
+
#
|
|
88
|
+
# @return [void]
|
|
89
|
+
#
|
|
90
|
+
def finalize
|
|
91
|
+
# Default: no-op
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
require 'parsanol/error_reporter/tree'
|
|
97
|
+
require 'parsanol/error_reporter/deepest'
|
|
98
|
+
require 'parsanol/error_reporter/contextual'
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Allows exporting parslet grammars to other lingos.
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
require 'parsanol/atoms/visitor'
|
|
5
|
+
|
|
6
|
+
class Parsanol::Parser
|
|
7
|
+
module Visitors
|
|
8
|
+
class Citrus
|
|
9
|
+
attr_reader :context, :output
|
|
10
|
+
def initialize(context)
|
|
11
|
+
@context = context
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def visit_str(str)
|
|
15
|
+
"\"#{str.inspect[1..-2]}\""
|
|
16
|
+
end
|
|
17
|
+
def visit_re(match)
|
|
18
|
+
match.to_s
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def visit_entity(name, block)
|
|
22
|
+
context.deferred(name, block)
|
|
23
|
+
|
|
24
|
+
"(#{context.mangle_name(name)})"
|
|
25
|
+
end
|
|
26
|
+
def visit_named(name, parslet)
|
|
27
|
+
parslet.accept(self)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def visit_sequence(parslets)
|
|
31
|
+
'(' +
|
|
32
|
+
parslets.
|
|
33
|
+
map { |el| el.accept(self) }.
|
|
34
|
+
join(' ') +
|
|
35
|
+
')'
|
|
36
|
+
end
|
|
37
|
+
def visit_repetition(tag, min, max, parslet)
|
|
38
|
+
parslet.accept(self) + "#{min}*#{max}"
|
|
39
|
+
end
|
|
40
|
+
def visit_alternative(alternatives)
|
|
41
|
+
'(' +
|
|
42
|
+
alternatives.
|
|
43
|
+
map { |el| el.accept(self) }.
|
|
44
|
+
join(' | ') +
|
|
45
|
+
')'
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def visit_lookahead(positive, bound_parslet)
|
|
49
|
+
(positive ? '&' : '!') +
|
|
50
|
+
bound_parslet.accept(self)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
class Treetop < Citrus
|
|
55
|
+
def visit_repetition(tag, min, max, parslet)
|
|
56
|
+
parslet.accept(self) + "#{min}..#{max}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def visit_alternative(alternatives)
|
|
60
|
+
'(' +
|
|
61
|
+
alternatives.
|
|
62
|
+
map { |el| el.accept(self) }.
|
|
63
|
+
join(' / ') +
|
|
64
|
+
')'
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# A helper class that formats Citrus and Treetop grammars as a string.
|
|
70
|
+
#
|
|
71
|
+
class PrettyPrinter
|
|
72
|
+
attr_reader :visitor
|
|
73
|
+
def initialize(visitor_klass)
|
|
74
|
+
@visitor = visitor_klass.new(self)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Pretty prints the given parslet using the visitor that has been
|
|
78
|
+
# configured in initialize. Returns the string representation of the
|
|
79
|
+
# Citrus or Treetop grammar.
|
|
80
|
+
#
|
|
81
|
+
def pretty_print(name, parslet)
|
|
82
|
+
output = ["grammar #{name}\n"]
|
|
83
|
+
|
|
84
|
+
output << rule('root', parslet)
|
|
85
|
+
|
|
86
|
+
seen = Set.new
|
|
87
|
+
loop do
|
|
88
|
+
# @todo is constantly filled by the visitor (see #deferred). We
|
|
89
|
+
# keep going until it is empty.
|
|
90
|
+
break if @todo.empty?
|
|
91
|
+
name, block = @todo.shift
|
|
92
|
+
|
|
93
|
+
# Track what rules we've already seen. This breaks loops.
|
|
94
|
+
next if seen.include?(name)
|
|
95
|
+
seen << name
|
|
96
|
+
|
|
97
|
+
output << rule(name, block.call)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
output << "end\n"
|
|
101
|
+
output.join
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Formats a rule in either dialect.
|
|
105
|
+
#
|
|
106
|
+
def rule(name, parslet)
|
|
107
|
+
" rule #{mangle_name name}\n" +
|
|
108
|
+
" " + parslet.accept(visitor) + "\n" +
|
|
109
|
+
" end\n"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Whenever the visitor encounters an rule in a parslet, it defers the
|
|
113
|
+
# pretty printing of the rule by calling this method.
|
|
114
|
+
#
|
|
115
|
+
def deferred(name, content)
|
|
116
|
+
@todo ||= []
|
|
117
|
+
@todo << [name, content]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Mangles names so that Citrus and Treetop can live with it. This mostly
|
|
121
|
+
# transforms some of the things that Ruby allows into other patterns. If
|
|
122
|
+
# there is collision, we will not detect it for now.
|
|
123
|
+
#
|
|
124
|
+
def mangle_name(str)
|
|
125
|
+
str.to_s.sub(/\?$/, '_p')
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Exports the current parser instance as a string in the Citrus dialect.
|
|
130
|
+
#
|
|
131
|
+
# Example:
|
|
132
|
+
#
|
|
133
|
+
# require 'parsanol/export'
|
|
134
|
+
# class MyParser < Parsanol::Parser
|
|
135
|
+
# root(:expression)
|
|
136
|
+
# rule(:expression) { str('foo') }
|
|
137
|
+
# end
|
|
138
|
+
#
|
|
139
|
+
# MyParser.new.to_citrus # => a citrus grammar as a string
|
|
140
|
+
#
|
|
141
|
+
def to_citrus
|
|
142
|
+
PrettyPrinter.new(Visitors::Citrus).
|
|
143
|
+
pretty_print(self.class.name, root)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Exports the current parser instance as a string in the Treetop dialect.
|
|
147
|
+
#
|
|
148
|
+
# Example:
|
|
149
|
+
#
|
|
150
|
+
# require 'parsanol/export'
|
|
151
|
+
# class MyParser < Parsanol::Parser
|
|
152
|
+
# root(:expression)
|
|
153
|
+
# rule(:expression) { str('foo') }
|
|
154
|
+
# end
|
|
155
|
+
#
|
|
156
|
+
# MyParser.new.to_treetop # => a treetop grammar as a string
|
|
157
|
+
#
|
|
158
|
+
def to_treetop
|
|
159
|
+
PrettyPrinter.new(Visitors::Treetop).
|
|
160
|
+
pretty_print(self.class.name, root)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require 'parsanol'
|
|
2
|
+
|
|
3
|
+
class Parsanol::Expression::Treetop
|
|
4
|
+
class Parser < Parsanol::Parser
|
|
5
|
+
root(:expression)
|
|
6
|
+
|
|
7
|
+
rule(:expression) { alternatives }
|
|
8
|
+
|
|
9
|
+
# alternative 'a' / 'b'
|
|
10
|
+
rule(:alternatives) {
|
|
11
|
+
(simple >> (spaced('/') >> simple).repeat).as(:alt)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# sequence by simple concatenation 'a' 'b'
|
|
15
|
+
rule(:simple) { occurrence.repeat(1).as(:seq) }
|
|
16
|
+
|
|
17
|
+
# occurrence modifiers
|
|
18
|
+
rule(:occurrence) {
|
|
19
|
+
atom.as(:repetition) >> spaced('*').as(:sign) |
|
|
20
|
+
atom.as(:repetition) >> spaced('+').as(:sign) |
|
|
21
|
+
atom.as(:repetition) >> repetition_spec |
|
|
22
|
+
|
|
23
|
+
atom.as(:maybe) >> spaced('?') |
|
|
24
|
+
atom
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
rule(:atom) {
|
|
28
|
+
spaced('(') >> expression.as(:unwrap) >> spaced(')') |
|
|
29
|
+
dot |
|
|
30
|
+
string |
|
|
31
|
+
char_class
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# a character class
|
|
35
|
+
rule(:char_class) {
|
|
36
|
+
(str('[') >>
|
|
37
|
+
(str('\\') >> any |
|
|
38
|
+
str(']').absent? >> any).repeat(1) >>
|
|
39
|
+
str(']')).as(:match) >> space?
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# anything at all
|
|
43
|
+
rule(:dot) { spaced('.').as(:any) }
|
|
44
|
+
|
|
45
|
+
# recognizing strings
|
|
46
|
+
rule(:string) {
|
|
47
|
+
str('\'') >>
|
|
48
|
+
(
|
|
49
|
+
(str('\\') >> any) |
|
|
50
|
+
(str("'").absent? >> any)
|
|
51
|
+
).repeat.as(:string) >>
|
|
52
|
+
str('\'') >> space?
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# repetition specification like {1, 2}
|
|
56
|
+
rule(:repetition_spec) {
|
|
57
|
+
spaced('{') >>
|
|
58
|
+
integer.maybe.as(:min) >> spaced(',') >>
|
|
59
|
+
integer.maybe.as(:max) >> spaced('}')
|
|
60
|
+
}
|
|
61
|
+
rule(:integer) {
|
|
62
|
+
match['0-9'].repeat(1)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# whitespace handling
|
|
66
|
+
rule(:space) { match("\s").repeat(1) }
|
|
67
|
+
rule(:space?) { space.maybe }
|
|
68
|
+
|
|
69
|
+
def spaced(str)
|
|
70
|
+
str(str) >> space?
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
class Transform < Parsanol::Transform
|
|
75
|
+
|
|
76
|
+
rule(:repetition => simple(:rep), :sign => simple(:sign)) {
|
|
77
|
+
min = sign=='+' ? 1 : 0
|
|
78
|
+
Parsanol::Atoms::Repetition.new(rep, min, nil) }
|
|
79
|
+
rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
|
|
80
|
+
Parsanol::Atoms::Repetition.new(rep,
|
|
81
|
+
Integer(min || 0),
|
|
82
|
+
max && Integer(max) || nil) }
|
|
83
|
+
|
|
84
|
+
rule(:alt => subtree(:alt)) { Parsanol::Atoms::Alternative.new(*alt) }
|
|
85
|
+
rule(:seq => sequence(:s)) { Parsanol::Atoms::Sequence.new(*s) }
|
|
86
|
+
rule(:unwrap => simple(:u)) { u }
|
|
87
|
+
rule(:maybe => simple(:m)) { |d| d[:m].maybe }
|
|
88
|
+
rule(:string => simple(:s)) { Parsanol::Atoms::Str.new(s) }
|
|
89
|
+
rule(:match => simple(:m)) { Parsanol::Atoms::Re.new(m) }
|
|
90
|
+
rule(:any => simple(:a)) { Parsanol::Atoms::Re.new('.') }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
end
|
|
94
|
+
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
|
|
2
|
+
# Allows specifying rules as strings using the exact same grammar that treetop
|
|
3
|
+
# does, minus the actions. This is on one hand a good example of a fully
|
|
4
|
+
# fledged parser and on the other hand might even turn out really useful.
|
|
5
|
+
#
|
|
6
|
+
# This can be viewed as an extension to parslet and might even be hosted in
|
|
7
|
+
# its own gem one fine day.
|
|
8
|
+
#
|
|
9
|
+
class Parsanol::Expression
|
|
10
|
+
include Parsanol
|
|
11
|
+
|
|
12
|
+
autoload :Treetop, 'parsanol/expression/treetop'
|
|
13
|
+
|
|
14
|
+
# Creates a parslet from a foreign language expression.
|
|
15
|
+
#
|
|
16
|
+
# Example:
|
|
17
|
+
#
|
|
18
|
+
# Parsanol::Expression.new("'a' 'b'")
|
|
19
|
+
#
|
|
20
|
+
def initialize(str, opts={}, context=self)
|
|
21
|
+
@type = opts[:type] || :treetop
|
|
22
|
+
@exp = str
|
|
23
|
+
@parslet = transform(
|
|
24
|
+
parse(str))
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Transforms the parse tree into a parslet expression.
|
|
28
|
+
#
|
|
29
|
+
def transform(tree)
|
|
30
|
+
transform = Treetop::Transform.new
|
|
31
|
+
|
|
32
|
+
# pp tree
|
|
33
|
+
transform.apply(tree)
|
|
34
|
+
rescue
|
|
35
|
+
warn "Could not transform: " + tree.inspect
|
|
36
|
+
raise
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Parses the string and returns a parse tree.
|
|
40
|
+
#
|
|
41
|
+
def parse(str)
|
|
42
|
+
parser = Treetop::Parser.new
|
|
43
|
+
parser.parse(str)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Turns this expression into a parslet.
|
|
47
|
+
#
|
|
48
|
+
def to_parslet
|
|
49
|
+
@parslet
|
|
50
|
+
end
|
|
51
|
+
end
|