parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Base class for AST visitors following the Visitor pattern
|
|
4
|
+
# This separates tree traversal logic from transformation logic
|
|
5
|
+
# making the code more maintainable and extensible.
|
|
6
|
+
module Parsanol
|
|
7
|
+
# Base visitor class that traverses the Parslet AST
|
|
8
|
+
# Subclasses override visit_* methods to perform transformations
|
|
9
|
+
class ASTVisitor
|
|
10
|
+
# Visit a parslet and its children
|
|
11
|
+
# Subclasses should override specific visit_* methods
|
|
12
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to visit
|
|
13
|
+
# @return [Parsanol::Atoms::Base] transformed parslet
|
|
14
|
+
def visit(parslet)
|
|
15
|
+
case parslet
|
|
16
|
+
when Parsanol::Atoms::Sequence
|
|
17
|
+
visit_sequence(parslet)
|
|
18
|
+
when Parsanol::Atoms::Alternative
|
|
19
|
+
visit_alternative(parslet)
|
|
20
|
+
when Parsanol::Atoms::Repetition
|
|
21
|
+
visit_repetition(parslet)
|
|
22
|
+
when Parsanol::Atoms::Lookahead
|
|
23
|
+
visit_lookahead(parslet)
|
|
24
|
+
when Parsanol::Atoms::Named
|
|
25
|
+
visit_named(parslet)
|
|
26
|
+
when Parsanol::Atoms::Str
|
|
27
|
+
visit_str(parslet)
|
|
28
|
+
when Parsanol::Atoms::Re
|
|
29
|
+
visit_re(parslet)
|
|
30
|
+
else
|
|
31
|
+
# Leaf nodes or unknown types - return as-is
|
|
32
|
+
parslet
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Visit a sequence node
|
|
37
|
+
# Default implementation visits children and reconstructs if changed
|
|
38
|
+
# @param parslet [Parsanol::Atoms::Sequence] sequence to visit
|
|
39
|
+
# @return [Parsanol::Atoms::Base] transformed sequence
|
|
40
|
+
def visit_sequence(parslet)
|
|
41
|
+
new_parslets = parslet.parslets.map { |p| visit(p) }
|
|
42
|
+
if new_parslets == parslet.parslets
|
|
43
|
+
parslet
|
|
44
|
+
else
|
|
45
|
+
Parsanol::Atoms::Sequence.new(*new_parslets)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Visit an alternative node
|
|
50
|
+
# Default implementation visits children and reconstructs if changed
|
|
51
|
+
# @param parslet [Parsanol::Atoms::Alternative] alternative to visit
|
|
52
|
+
# @return [Parsanol::Atoms::Base] transformed alternative
|
|
53
|
+
def visit_alternative(parslet)
|
|
54
|
+
new_alternatives = parslet.alternatives.map { |p| visit(p) }
|
|
55
|
+
if new_alternatives == parslet.alternatives
|
|
56
|
+
parslet
|
|
57
|
+
else
|
|
58
|
+
Parsanol::Atoms::Alternative.new(*new_alternatives)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Visit a repetition node
|
|
63
|
+
# Default implementation visits child and reconstructs if changed
|
|
64
|
+
# @param parslet [Parsanol::Atoms::Repetition] repetition to visit
|
|
65
|
+
# @return [Parsanol::Atoms::Base] transformed repetition
|
|
66
|
+
def visit_repetition(parslet)
|
|
67
|
+
new_parslet = visit(parslet.parslet)
|
|
68
|
+
if new_parslet.equal?(parslet.parslet)
|
|
69
|
+
parslet
|
|
70
|
+
else
|
|
71
|
+
Parsanol::Atoms::Repetition.new(
|
|
72
|
+
new_parslet,
|
|
73
|
+
parslet.min,
|
|
74
|
+
parslet.max,
|
|
75
|
+
parslet.instance_variable_get(:@tag)
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Visit a lookahead node
|
|
81
|
+
# Default implementation visits child and reconstructs if changed
|
|
82
|
+
# @param parslet [Parsanol::Atoms::Lookahead] lookahead to visit
|
|
83
|
+
# @return [Parsanol::Atoms::Base] transformed lookahead
|
|
84
|
+
def visit_lookahead(parslet)
|
|
85
|
+
new_bound = visit(parslet.bound_parslet)
|
|
86
|
+
if new_bound.equal?(parslet.bound_parslet)
|
|
87
|
+
parslet
|
|
88
|
+
else
|
|
89
|
+
Parsanol::Atoms::Lookahead.new(new_bound, parslet.positive)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Visit a named node
|
|
94
|
+
# Default implementation visits child and reconstructs if changed
|
|
95
|
+
# @param parslet [Parsanol::Atoms::Named] named to visit
|
|
96
|
+
# @return [Parsanol::Atoms::Base] transformed named
|
|
97
|
+
def visit_named(parslet)
|
|
98
|
+
new_parslet = visit(parslet.parslet)
|
|
99
|
+
if new_parslet.equal?(parslet.parslet)
|
|
100
|
+
parslet
|
|
101
|
+
else
|
|
102
|
+
Parsanol::Atoms::Named.new(new_parslet, parslet.name)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Visit a string literal node
|
|
107
|
+
# Default implementation returns as-is (leaf node)
|
|
108
|
+
# @param parslet [Parsanol::Atoms::Str] string to visit
|
|
109
|
+
# @return [Parsanol::Atoms::Base] transformed string
|
|
110
|
+
def visit_str(parslet)
|
|
111
|
+
parslet
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Visit a regex node
|
|
115
|
+
# Default implementation returns as-is (leaf node)
|
|
116
|
+
# @param parslet [Parsanol::Atoms::Re] regex to visit
|
|
117
|
+
# @return [Parsanol::Atoms::Base] transformed regex
|
|
118
|
+
def visit_re(parslet)
|
|
119
|
+
parslet
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Alternative during matching. Contains a list of parslets that is tried each
|
|
4
|
+
# one in turn. Only fails if all alternatives fail.
|
|
5
|
+
#
|
|
6
|
+
# Example:
|
|
7
|
+
#
|
|
8
|
+
# str('a') | str('b') # matches either 'a' or 'b'
|
|
9
|
+
#
|
|
10
|
+
class Parsanol::Atoms::Alternative < Parsanol::Atoms::Base
|
|
11
|
+
attr_reader :alternatives
|
|
12
|
+
|
|
13
|
+
# Constructs an Alternative instance using all given parslets in the order
|
|
14
|
+
# given. This is what happens if you call '|' on existing parslets, like
|
|
15
|
+
# this:
|
|
16
|
+
#
|
|
17
|
+
# str('a') | str('b')
|
|
18
|
+
#
|
|
19
|
+
def initialize(*alternatives)
|
|
20
|
+
super()
|
|
21
|
+
|
|
22
|
+
@alternatives = alternatives
|
|
23
|
+
|
|
24
|
+
# Phase 60: Pre-compute and freeze error message
|
|
25
|
+
@error_msg = "Expected one of #{alternatives.inspect}".freeze
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
#---
|
|
29
|
+
# Don't construct a hanging tree of Alternative parslets, instead store them
|
|
30
|
+
# all here. This reduces the number of objects created.
|
|
31
|
+
#+++
|
|
32
|
+
def |(parslet)
|
|
33
|
+
# Phase 25: Alternative Flattening (similar to Phase 21 for Sequence)
|
|
34
|
+
# Flatten nested alternatives: (A | B) | C becomes Alternative(A, B, C)
|
|
35
|
+
# instead of Alternative(Alternative(A, B), C)
|
|
36
|
+
new_alts = if parslet.is_a?(Parsanol::Atoms::Alternative)
|
|
37
|
+
@alternatives + parslet.alternatives
|
|
38
|
+
else
|
|
39
|
+
@alternatives + [parslet]
|
|
40
|
+
end
|
|
41
|
+
self.class.new(*new_alts)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def try(source, context, consume_all)
|
|
46
|
+
# Phase 52: Cache @alternatives ivar to reduce lookup overhead
|
|
47
|
+
alternatives = @alternatives
|
|
48
|
+
|
|
49
|
+
# Fast paths for common alternative sizes (avoid iteration overhead)
|
|
50
|
+
case alternatives.size
|
|
51
|
+
when 2
|
|
52
|
+
success, value = alternatives[0].apply(source, context, consume_all)
|
|
53
|
+
return [success, value] if success
|
|
54
|
+
success2, value2 = alternatives[1].apply(source, context, consume_all)
|
|
55
|
+
return [success2, value2] if success2
|
|
56
|
+
return context.err(self, source, @error_msg, [value, value2])
|
|
57
|
+
when 3
|
|
58
|
+
success, value = alternatives[0].apply(source, context, consume_all)
|
|
59
|
+
return [success, value] if success
|
|
60
|
+
success2, value2 = alternatives[1].apply(source, context, consume_all)
|
|
61
|
+
return [success2, value2] if success2
|
|
62
|
+
success3, value3 = alternatives[2].apply(source, context, consume_all)
|
|
63
|
+
return [success3, value3] if success3
|
|
64
|
+
return context.err(self, source, @error_msg, [value, value2, value3])
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# General case: Optimize by not allocating error array until we know all alternatives fail
|
|
68
|
+
# This saves significant allocation overhead when early alternatives succeed
|
|
69
|
+
errors = nil
|
|
70
|
+
|
|
71
|
+
alternatives.each do |a|
|
|
72
|
+
success, value = result = a.apply(source, context, consume_all)
|
|
73
|
+
return result if success
|
|
74
|
+
|
|
75
|
+
# Lazily allocate errors array only if needed
|
|
76
|
+
errors ||= []
|
|
77
|
+
errors << value
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# If we reach this point, all alternatives have failed.
|
|
81
|
+
context.err(self, source, @error_msg, errors)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
precedence ALTERNATE
|
|
85
|
+
def to_s_inner(prec)
|
|
86
|
+
alternatives.map { |a| a.to_s(prec) }.join(' / ')
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# FIRST set of alternative is union of all alternatives' FIRST sets
|
|
90
|
+
# This is the key computation for cut operator insertion:
|
|
91
|
+
# If FIRST(alt1) ∩ FIRST(alt2) = ∅, we can insert a cut after alt1
|
|
92
|
+
def compute_first_set
|
|
93
|
+
return Set.new if alternatives.empty?
|
|
94
|
+
|
|
95
|
+
alternatives.map(&:first_set).reduce(&:union)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Base class for all parslets, handles orchestration of calls and implements
|
|
4
|
+
# a lot of the operator and chaining methods.
|
|
5
|
+
#
|
|
6
|
+
# Also see Parsanol::Atoms::DSL chaining parslet atoms together.
|
|
7
|
+
#
|
|
8
|
+
class Parsanol::Atoms::Base
|
|
9
|
+
include Parsanol::Atoms::Precedence
|
|
10
|
+
include Parsanol::Atoms::DSL
|
|
11
|
+
include Parsanol::Atoms::CanFlatten
|
|
12
|
+
include Parsanol::FirstSet
|
|
13
|
+
|
|
14
|
+
# Parslet label as provided in grammar
|
|
15
|
+
attr_accessor :label
|
|
16
|
+
|
|
17
|
+
# Phase 61: Frozen error message for unknown input
|
|
18
|
+
ERROR_UNKNOWN_INPUT = "Don't know what to do with ".freeze
|
|
19
|
+
|
|
20
|
+
# Given a string or an IO object, this will attempt a parse of its contents
|
|
21
|
+
# and return a result. If the parse fails, a Parsanol::ParseFailed exception
|
|
22
|
+
# will be thrown.
|
|
23
|
+
#
|
|
24
|
+
# @param io [String, Source] input for the parse process
|
|
25
|
+
# @option options [Parsanol::ErrorReporter] :reporter error reporter to use,
|
|
26
|
+
# defaults to Parsanol::ErrorReporter::Tree
|
|
27
|
+
# @option options [Boolean] :prefix Should a prefix match be accepted?
|
|
28
|
+
# (default: false)
|
|
29
|
+
# @return [Hash, Array, Parsanol::Slice] PORO (Plain old Ruby object) result
|
|
30
|
+
# tree
|
|
31
|
+
#
|
|
32
|
+
def parse(io, options={})
|
|
33
|
+
source = io.respond_to?(:line_and_column) ?
|
|
34
|
+
io :
|
|
35
|
+
Parsanol::Source.new(io)
|
|
36
|
+
|
|
37
|
+
# Try to cheat. Assuming that we'll be able to parse the input, don't
|
|
38
|
+
# run error reporting code.
|
|
39
|
+
success, value = setup_and_apply(source, nil, !options[:prefix])
|
|
40
|
+
|
|
41
|
+
# If we didn't succeed the parse, raise an exception for the user.
|
|
42
|
+
# Stack trace will be off, but the error tree should explain the reason
|
|
43
|
+
# it failed.
|
|
44
|
+
unless success
|
|
45
|
+
# Cheating has not paid off. Now pay the cost: Rerun the parse,
|
|
46
|
+
# gathering error information in the process.
|
|
47
|
+
reporter = options[:reporter] || Parsanol::ErrorReporter::Tree.new
|
|
48
|
+
source.bytepos = 0
|
|
49
|
+
success, value = setup_and_apply(source, reporter, !options[:prefix])
|
|
50
|
+
|
|
51
|
+
fail "Assertion failed: success was true when parsing with reporter" \
|
|
52
|
+
if success
|
|
53
|
+
|
|
54
|
+
# Value is a Parsanol::Cause, which can be turned into an exception:
|
|
55
|
+
value.raise
|
|
56
|
+
|
|
57
|
+
fail "NEVER REACHED"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# assert: success is true
|
|
61
|
+
|
|
62
|
+
# Extra input is now handled inline with the rest of the parsing. If
|
|
63
|
+
# really we have success == true, prefix: false and still some input
|
|
64
|
+
# is left dangling, that is a BUG.
|
|
65
|
+
if !options[:prefix] && source.chars_left > 0
|
|
66
|
+
fail "BUG: New error strategy should not reach this point."
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
return flatten(value)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Creates a context for parsing and applies the current atom to the input.
|
|
73
|
+
# Returns the parse result.
|
|
74
|
+
#
|
|
75
|
+
# @return [<Boolean, Object>] Result of the parse. If the first member is
|
|
76
|
+
# true, the parse has succeeded.
|
|
77
|
+
def setup_and_apply(source, error_reporter, consume_all)
|
|
78
|
+
# Session 13: Pass parser class for per-parser cache threshold selection
|
|
79
|
+
# If self is a Parser instance, pass its class for threshold lookup
|
|
80
|
+
parser_class = self.is_a?(Parsanol::Parser) ? self.class : nil
|
|
81
|
+
context = Parsanol::Atoms::Context.new(error_reporter, parser_class: parser_class)
|
|
82
|
+
apply(source, context, consume_all)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Calls the #try method of this parslet. Success consumes input, error will
|
|
86
|
+
# rewind the input.
|
|
87
|
+
#
|
|
88
|
+
# @param source [Parsanol::Source] source to read input from
|
|
89
|
+
# @param context [Parsanol::Atoms::Context] context to use for the parsing
|
|
90
|
+
# @param consume_all [Boolean] true if the current parse must consume
|
|
91
|
+
# all input by itself.
|
|
92
|
+
def apply(source, context, consume_all=false)
|
|
93
|
+
old_pos = source.bytepos
|
|
94
|
+
|
|
95
|
+
success, _ = result = context.try_with_cache(self, source, consume_all)
|
|
96
|
+
|
|
97
|
+
if success
|
|
98
|
+
# Notify context
|
|
99
|
+
context.succ(source)
|
|
100
|
+
# If a consume_all parse was made and doesn't result in the consumption
|
|
101
|
+
# of all the input, that is considered an error.
|
|
102
|
+
if consume_all && source.chars_left>0
|
|
103
|
+
# Read 10 characters ahead. Why ten? I don't know.
|
|
104
|
+
offending_pos = source.bytepos
|
|
105
|
+
offending_input = source.consume(10)
|
|
106
|
+
|
|
107
|
+
# Rewind input (as happens always in error case)
|
|
108
|
+
source.bytepos = old_pos
|
|
109
|
+
|
|
110
|
+
return context.err_at(
|
|
111
|
+
self,
|
|
112
|
+
source,
|
|
113
|
+
ERROR_UNKNOWN_INPUT + offending_input.to_s.inspect,
|
|
114
|
+
offending_pos
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Looks like the parse was successful after all. Don't rewind the input.
|
|
119
|
+
return result
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# We only reach this point if the parse has failed. Rewind the input.
|
|
123
|
+
source.bytepos = old_pos
|
|
124
|
+
return result
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Override this in your Atoms::Base subclasses to implement parsing
|
|
128
|
+
# behaviour.
|
|
129
|
+
#
|
|
130
|
+
def try(source, context, consume_all)
|
|
131
|
+
raise NotImplementedError, \
|
|
132
|
+
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Returns true if this atom can be cached in the packrat cache. Most parslet
|
|
136
|
+
# atoms are cached, so this always returns true, unless overridden.
|
|
137
|
+
#
|
|
138
|
+
def cached?
|
|
139
|
+
true
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Returns true if this atom produces flat results by construction
|
|
143
|
+
# (no nested arrays/hashes that need flattening). This allows flatten
|
|
144
|
+
# to skip processing for atoms that are known to produce simple values.
|
|
145
|
+
#
|
|
146
|
+
# Session 13: Flatten optimization to reduce 5.27% overhead
|
|
147
|
+
# Atoms like Str and Re always produce strings (Parsanol::Slice),
|
|
148
|
+
# which don't need flattening.
|
|
149
|
+
#
|
|
150
|
+
# @return [Boolean] true if results are flat by construction
|
|
151
|
+
def flat?
|
|
152
|
+
false # Default: assume needs flattening
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Debug printing - in Treetop syntax.
|
|
156
|
+
#
|
|
157
|
+
def self.precedence(prec)
|
|
158
|
+
define_method(:precedence) { prec }
|
|
159
|
+
end
|
|
160
|
+
precedence BASE
|
|
161
|
+
def to_s(outer_prec=OUTER)
|
|
162
|
+
str = label || to_s_inner(precedence)
|
|
163
|
+
if outer_prec < precedence
|
|
164
|
+
"(#{str})"
|
|
165
|
+
else
|
|
166
|
+
str
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
def inspect
|
|
170
|
+
to_s(OUTER)
|
|
171
|
+
end
|
|
172
|
+
private
|
|
173
|
+
|
|
174
|
+
# Produces an instance of Success and returns it.
|
|
175
|
+
#
|
|
176
|
+
# Phase 57a-57b: Frozen constants for common result patterns to reduce allocations.
|
|
177
|
+
# These constants are used extensively in hot paths to avoid creating new arrays.
|
|
178
|
+
#
|
|
179
|
+
SUCCESS_NIL = [true, nil].freeze
|
|
180
|
+
|
|
181
|
+
# Common patterns for repetition results with empty values
|
|
182
|
+
# Format: [true, [tag]] where tag is the repetition marker
|
|
183
|
+
# Pre-allocated for common tags to avoid array creation
|
|
184
|
+
EMPTY_ARRAY = [].freeze
|
|
185
|
+
|
|
186
|
+
# Phase 57b: Additional frozen constants for tagged empty arrays
|
|
187
|
+
# These are common in repetitions that match 0 times (.maybe, .repeat(0,n))
|
|
188
|
+
EMPTY_REPETITION_ARRAY = [:repetition].freeze
|
|
189
|
+
SUCCESS_EMPTY_REPETITION = [true, EMPTY_REPETITION_ARRAY].freeze
|
|
190
|
+
|
|
191
|
+
EMPTY_SEQUENCE_ARRAY = [:sequence].freeze
|
|
192
|
+
SUCCESS_EMPTY_SEQUENCE = [true, EMPTY_SEQUENCE_ARRAY].freeze
|
|
193
|
+
|
|
194
|
+
# Phase 57c: Additional frozen constants for common patterns
|
|
195
|
+
EMPTY_HASH = {}.freeze
|
|
196
|
+
SUCCESS_EMPTY_HASH = [true, EMPTY_HASH].freeze
|
|
197
|
+
|
|
198
|
+
# Common single-element arrays for captures and tags
|
|
199
|
+
EMPTY_CAPTURE_ARRAY = [:capture].freeze
|
|
200
|
+
SUCCESS_EMPTY_CAPTURE = [true, EMPTY_CAPTURE_ARRAY].freeze
|
|
201
|
+
|
|
202
|
+
def succ(result)
|
|
203
|
+
return SUCCESS_NIL if result.nil?
|
|
204
|
+
# Check for empty array (common in repetitions with 0 matches)
|
|
205
|
+
return [true, EMPTY_ARRAY] if result.equal?(EMPTY_ARRAY)
|
|
206
|
+
# Check for empty hash (common in named captures with no matches)
|
|
207
|
+
return SUCCESS_EMPTY_HASH if result.equal?(EMPTY_HASH)
|
|
208
|
+
# Check for common tagged empty arrays
|
|
209
|
+
return SUCCESS_EMPTY_REPETITION if result.equal?(EMPTY_REPETITION_ARRAY)
|
|
210
|
+
return SUCCESS_EMPTY_SEQUENCE if result.equal?(EMPTY_SEQUENCE_ARRAY)
|
|
211
|
+
return SUCCESS_EMPTY_CAPTURE if result.equal?(EMPTY_CAPTURE_ARRAY)
|
|
212
|
+
[true, result]
|
|
213
|
+
end
|
|
214
|
+
end
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
module Parsanol::Atoms
|
|
5
|
+
# A series of helper functions that have the common topic of flattening
|
|
6
|
+
# result values into the intermediary tree that consists of Ruby Hashes and
|
|
7
|
+
# Arrays.
|
|
8
|
+
#
|
|
9
|
+
# This module has one main function, #flatten, that takes an annotated
|
|
10
|
+
# structure as input and returns the reduced form that users expect from
|
|
11
|
+
# Atom#parse.
|
|
12
|
+
#
|
|
13
|
+
# NOTE: Since all of these functions are just that, functions without
|
|
14
|
+
# side effects, they are in a module and not in a class. Its hard to draw
|
|
15
|
+
# the line sometimes, but this is beyond.
|
|
16
|
+
#
|
|
17
|
+
module CanFlatten
|
|
18
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
|
19
|
+
# value for the user by dropping things and merging hashes.
|
|
20
|
+
#
|
|
21
|
+
# Named is set to true if this result will be embedded in a Hash result from
|
|
22
|
+
# naming something using <code>.as(...)</code>. It changes the folding
|
|
23
|
+
# semantics of repetition.
|
|
24
|
+
#
|
|
25
|
+
def flatten(value, named=false)
|
|
26
|
+
# Passes through everything that isn't an array of things
|
|
27
|
+
# Phase 43: Use simpler check - if it's not an Array, return as-is
|
|
28
|
+
return value unless value.is_a?(Array)
|
|
29
|
+
|
|
30
|
+
# Extracts the s-expression tag
|
|
31
|
+
tag = value[0]
|
|
32
|
+
|
|
33
|
+
# Phase 43: Optimize flattening - reduce method call overhead
|
|
34
|
+
# For single element arrays (common case), handle directly
|
|
35
|
+
tail_size = value.size - 1
|
|
36
|
+
if tail_size == 1
|
|
37
|
+
flattened = flatten(value[1])
|
|
38
|
+
case tag
|
|
39
|
+
when :sequence
|
|
40
|
+
return flattened
|
|
41
|
+
when :maybe
|
|
42
|
+
return named ? flattened : (flattened || '')
|
|
43
|
+
when :repetition
|
|
44
|
+
return flatten_repetition([flattened], named)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Flatten each element
|
|
49
|
+
result = Array.new(tail_size)
|
|
50
|
+
i = 0
|
|
51
|
+
while i < tail_size
|
|
52
|
+
result[i] = flatten(value[i + 1])
|
|
53
|
+
i += 1
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
case tag
|
|
57
|
+
when :sequence
|
|
58
|
+
return flatten_sequence(result)
|
|
59
|
+
when :maybe
|
|
60
|
+
return named ? result.first : result.first || ''
|
|
61
|
+
when :repetition
|
|
62
|
+
return flatten_repetition(result, named)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Lisp style fold left where the first element builds the basis for
|
|
69
|
+
# an inject. Optimized with early return and reduced method calls.
|
|
70
|
+
#
|
|
71
|
+
def foldl(list, &block)
|
|
72
|
+
len = list.size
|
|
73
|
+
return '' if len == 0
|
|
74
|
+
return list[0] if len == 1 # Fast path for single element
|
|
75
|
+
|
|
76
|
+
result = list[0]
|
|
77
|
+
i = 1
|
|
78
|
+
while i < len
|
|
79
|
+
result = block.call(result, list[i])
|
|
80
|
+
i += 1
|
|
81
|
+
end
|
|
82
|
+
result
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Flatten results from a sequence of parslets.
|
|
86
|
+
#
|
|
87
|
+
# @api private
|
|
88
|
+
#
|
|
89
|
+
def flatten_sequence(list)
|
|
90
|
+
foldl(list.compact) { |r, e| # and then merge flat elements
|
|
91
|
+
merge_fold(r, e)
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
# @api private
|
|
95
|
+
# Phase 43: Optimized merge_fold - reduce repeated class checks
|
|
96
|
+
def merge_fold(l, r)
|
|
97
|
+
l_class = l.class
|
|
98
|
+
r_class = r.class
|
|
99
|
+
|
|
100
|
+
# equal pairs: merge. ----------------------------------------------------
|
|
101
|
+
if l_class == r_class
|
|
102
|
+
if l_class == Hash
|
|
103
|
+
warn_about_duplicate_keys(l, r)
|
|
104
|
+
return l.merge(r)
|
|
105
|
+
else
|
|
106
|
+
return l + r
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Phase 43: Cache instance_of? checks to avoid repeated method calls
|
|
111
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
|
112
|
+
l_is_slice = l.instance_of?(Parsanol::Slice)
|
|
113
|
+
r_is_slice = r.instance_of?(Parsanol::Slice)
|
|
114
|
+
l_is_str = l_class == String || l_is_slice
|
|
115
|
+
r_is_str = r_class == String || r_is_slice
|
|
116
|
+
|
|
117
|
+
# Maybe classes are not equal, but both are stringlike?
|
|
118
|
+
if l_is_str && r_is_str
|
|
119
|
+
# if we're merging a String with a Slice, the slice wins.
|
|
120
|
+
return r if r_is_slice
|
|
121
|
+
return l if l_is_slice
|
|
122
|
+
|
|
123
|
+
fail "NOTREACHED: What other stringlike classes are there?"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# special case: If one of them is a string/slice, the other is more important
|
|
127
|
+
return l if r_is_str
|
|
128
|
+
return r if l_is_str
|
|
129
|
+
|
|
130
|
+
# otherwise just create an array for one of them to live in
|
|
131
|
+
return l + [r] if r_class == Hash
|
|
132
|
+
return [l] + r if l_class == Hash
|
|
133
|
+
|
|
134
|
+
fail "Unhandled case when foldr'ing sequence."
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
|
138
|
+
# whether the user has named the result or not. If the user has named
|
|
139
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
|
140
|
+
# turned into an empty string.
|
|
141
|
+
#
|
|
142
|
+
# @api private
|
|
143
|
+
#
|
|
144
|
+
# Phase 43: Optimized flatten_repetition - reduce array iterations
|
|
145
|
+
def flatten_repetition(list, named)
|
|
146
|
+
# Phase 43: Single pass to check for hashes and arrays
|
|
147
|
+
has_hash = false
|
|
148
|
+
has_array = false
|
|
149
|
+
|
|
150
|
+
i = 0
|
|
151
|
+
len = list.size
|
|
152
|
+
while i < len
|
|
153
|
+
e = list[i]
|
|
154
|
+
has_hash = true if e.instance_of?(Hash)
|
|
155
|
+
has_array = true if e.instance_of?(Array)
|
|
156
|
+
break if has_hash && has_array # Early exit if both found
|
|
157
|
+
i += 1
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
if has_hash
|
|
161
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
|
162
|
+
# strings inbetween. To keep them, name them.
|
|
163
|
+
return list.select { |e| e.instance_of?(Hash) }
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
if has_array
|
|
167
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
|
168
|
+
# level.
|
|
169
|
+
return list.
|
|
170
|
+
select { |e| e.instance_of?(Array) }.
|
|
171
|
+
flatten(1)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Consistent handling of empty lists, when we act on a named result
|
|
175
|
+
return [] if named && list.empty?
|
|
176
|
+
|
|
177
|
+
# If there are only strings, concatenate them and return that.
|
|
178
|
+
foldl(list.compact) { |s,e| s+e }
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# That annoying warning 'Duplicate subtrees while merging result' comes
|
|
182
|
+
# from here. You should add more '.as(...)' names to your intermediary tree.
|
|
183
|
+
#
|
|
184
|
+
def warn_about_duplicate_keys(h1, h2)
|
|
185
|
+
d = h1.keys & h2.keys
|
|
186
|
+
unless d.empty?
|
|
187
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
|
188
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
|
|
2
|
+
# Stores the result of matching an atom against input in the #captures in
|
|
3
|
+
# parse context. Doing so will allow you to pull parts of the ongoing parse
|
|
4
|
+
# out later and use them to match other pieces of input.
|
|
5
|
+
#
|
|
6
|
+
# Example:
|
|
7
|
+
# # After this, context.captures[:an_a] returns 'a'
|
|
8
|
+
# str('a').capture(:an_a)
|
|
9
|
+
#
|
|
10
|
+
# # Capture and use of the capture: (matches either 'aa' or 'bb')
|
|
11
|
+
# match['ab'].capture(:first) >>
|
|
12
|
+
# dynamic { |src, ctx| str(ctx.captures[:first]) }
|
|
13
|
+
#
|
|
14
|
+
class Parsanol::Atoms::Capture < Parsanol::Atoms::Base
|
|
15
|
+
attr_reader :parslet, :name
|
|
16
|
+
|
|
17
|
+
def initialize(parslet, name)
|
|
18
|
+
super()
|
|
19
|
+
|
|
20
|
+
@parslet, @name = parslet, name
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def apply(source, context, consume_all)
|
|
24
|
+
# Phase 55: Cache ivars to reduce lookup overhead
|
|
25
|
+
parslet = @parslet
|
|
26
|
+
name = @name
|
|
27
|
+
|
|
28
|
+
success, value = result = parslet.apply(source, context, consume_all)
|
|
29
|
+
|
|
30
|
+
if success
|
|
31
|
+
context.captures[name.to_sym] =
|
|
32
|
+
flatten(value)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
return result
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def to_s_inner(prec)
|
|
39
|
+
"(#{name.inspect} = #{parslet.to_s(prec)})"
|
|
40
|
+
end
|
|
41
|
+
end
|