parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Module for streaming builder callbacks.
|
|
5
|
+
# Include this module in your builder class to receive callbacks
|
|
6
|
+
# during single-pass parsing with the streaming builder API.
|
|
7
|
+
#
|
|
8
|
+
# The streaming builder API allows maximum performance by eliminating
|
|
9
|
+
# intermediate AST construction. Instead, callbacks are invoked as
|
|
10
|
+
# parsing progresses, allowing you to construct custom output directly.
|
|
11
|
+
#
|
|
12
|
+
# @example Basic string collector
|
|
13
|
+
# class StringCollector
|
|
14
|
+
# include Parsanol::BuilderCallbacks
|
|
15
|
+
#
|
|
16
|
+
# def initialize
|
|
17
|
+
# @strings = []
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# def on_string(value, offset, length)
|
|
21
|
+
# @strings << value
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# def finish
|
|
25
|
+
# @strings
|
|
26
|
+
# end
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
# grammar = Parsanol::Native.serialize_grammar(parser.root)
|
|
30
|
+
# builder = StringCollector.new
|
|
31
|
+
# result = Parsanol::Native.parse_with_builder(grammar, input, builder)
|
|
32
|
+
# # result: ["hello", "world"]
|
|
33
|
+
#
|
|
34
|
+
# @example Building a typed AST
|
|
35
|
+
# class AstBuilder
|
|
36
|
+
# include Parsanol::BuilderCallbacks
|
|
37
|
+
#
|
|
38
|
+
# def initialize
|
|
39
|
+
# @stack = []
|
|
40
|
+
# @current_hash = nil
|
|
41
|
+
# @current_key = nil
|
|
42
|
+
# end
|
|
43
|
+
#
|
|
44
|
+
# def on_hash_start(size = nil)
|
|
45
|
+
# @stack.push(@current_hash) if @current_hash
|
|
46
|
+
# @current_hash = {}
|
|
47
|
+
# end
|
|
48
|
+
#
|
|
49
|
+
# def on_hash_end(size)
|
|
50
|
+
# finished = @current_hash
|
|
51
|
+
# @current_hash = @stack.pop
|
|
52
|
+
# if @current_hash && @current_key
|
|
53
|
+
# @current_hash[@current_key] = finished
|
|
54
|
+
# @current_key = nil
|
|
55
|
+
# end
|
|
56
|
+
# finished
|
|
57
|
+
# end
|
|
58
|
+
#
|
|
59
|
+
# def on_hash_key(key)
|
|
60
|
+
# @current_key = key
|
|
61
|
+
# end
|
|
62
|
+
#
|
|
63
|
+
# def on_string(value, offset, length)
|
|
64
|
+
# if @current_hash && @current_key
|
|
65
|
+
# @current_hash[@current_key] = value
|
|
66
|
+
# @current_key = nil
|
|
67
|
+
# end
|
|
68
|
+
# end
|
|
69
|
+
#
|
|
70
|
+
# def finish
|
|
71
|
+
# @current_hash
|
|
72
|
+
# end
|
|
73
|
+
# end
|
|
74
|
+
#
|
|
75
|
+
module BuilderCallbacks
|
|
76
|
+
# Called when parsing starts.
|
|
77
|
+
#
|
|
78
|
+
# @param input [String] The input being parsed
|
|
79
|
+
# @return [void]
|
|
80
|
+
def on_start(input); end
|
|
81
|
+
|
|
82
|
+
# Called when parsing succeeds.
|
|
83
|
+
#
|
|
84
|
+
# @return [void]
|
|
85
|
+
def on_success; end
|
|
86
|
+
|
|
87
|
+
# Called when parsing fails.
|
|
88
|
+
#
|
|
89
|
+
# @param message [String] The error message
|
|
90
|
+
# @return [void]
|
|
91
|
+
def on_error(message); end
|
|
92
|
+
|
|
93
|
+
# Called when a string value is matched.
|
|
94
|
+
#
|
|
95
|
+
# @param value [String] The matched string value
|
|
96
|
+
# @param offset [Integer] Byte offset in the original input
|
|
97
|
+
# @param length [Integer] Length of the matched string in bytes
|
|
98
|
+
# @return [void]
|
|
99
|
+
def on_string(value, offset, length); end
|
|
100
|
+
|
|
101
|
+
# Called when an integer value is matched.
|
|
102
|
+
#
|
|
103
|
+
# @param value [Integer] The matched integer value
|
|
104
|
+
# @return [void]
|
|
105
|
+
def on_int(value); end
|
|
106
|
+
|
|
107
|
+
# Called when a float value is matched.
|
|
108
|
+
#
|
|
109
|
+
# @param value [Float] The matched float value
|
|
110
|
+
# @return [void]
|
|
111
|
+
def on_float(value); end
|
|
112
|
+
|
|
113
|
+
# Called when a boolean value is matched.
|
|
114
|
+
#
|
|
115
|
+
# @param value [Boolean] The matched boolean value
|
|
116
|
+
# @return [void]
|
|
117
|
+
def on_bool(value); end
|
|
118
|
+
|
|
119
|
+
# Called when a nil/null value is matched.
|
|
120
|
+
#
|
|
121
|
+
# @return [void]
|
|
122
|
+
def on_nil; end
|
|
123
|
+
|
|
124
|
+
# Called when starting to parse a hash/object.
|
|
125
|
+
# Use this to initialize state for collecting key-value pairs.
|
|
126
|
+
#
|
|
127
|
+
# @param size [Integer, nil] Expected number of entries (may be nil)
|
|
128
|
+
# @return [void]
|
|
129
|
+
def on_hash_start(size = nil); end
|
|
130
|
+
|
|
131
|
+
# Called when finishing parsing a hash/object.
|
|
132
|
+
#
|
|
133
|
+
# @param size [Integer] Actual number of entries
|
|
134
|
+
# @return [void]
|
|
135
|
+
def on_hash_end(size); end
|
|
136
|
+
|
|
137
|
+
# Called when a hash key is encountered.
|
|
138
|
+
# The next value callback(s) will provide the value for this key.
|
|
139
|
+
#
|
|
140
|
+
# @param key [String] The hash key name
|
|
141
|
+
# @return [void]
|
|
142
|
+
def on_hash_key(key); end
|
|
143
|
+
|
|
144
|
+
# Called when a hash value is about to be parsed.
|
|
145
|
+
# Called after on_hash_key for the corresponding value.
|
|
146
|
+
#
|
|
147
|
+
# @param key [String] The hash key name
|
|
148
|
+
# @return [void]
|
|
149
|
+
def on_hash_value(key); end
|
|
150
|
+
|
|
151
|
+
# Called when starting to parse an array.
|
|
152
|
+
# Use this to initialize state for collecting array elements.
|
|
153
|
+
#
|
|
154
|
+
# @param size [Integer, nil] Expected number of elements (may be nil)
|
|
155
|
+
# @return [void]
|
|
156
|
+
def on_array_start(size = nil); end
|
|
157
|
+
|
|
158
|
+
# Called when an array element is about to be parsed.
|
|
159
|
+
#
|
|
160
|
+
# @param index [Integer] The index of the element
|
|
161
|
+
# @return [void]
|
|
162
|
+
def on_array_element(index); end
|
|
163
|
+
|
|
164
|
+
# Called when finishing parsing an array.
|
|
165
|
+
#
|
|
166
|
+
# @param size [Integer] Actual number of elements
|
|
167
|
+
# @return [void]
|
|
168
|
+
def on_array_end(size); end
|
|
169
|
+
|
|
170
|
+
# Called when starting to parse a named rule.
|
|
171
|
+
#
|
|
172
|
+
# @param name [String] The rule name
|
|
173
|
+
# @return [void]
|
|
174
|
+
def on_named_start(name); end
|
|
175
|
+
|
|
176
|
+
# Called when finishing parsing a named rule.
|
|
177
|
+
#
|
|
178
|
+
# @param name [String] The rule name
|
|
179
|
+
# @return [void]
|
|
180
|
+
def on_named_end(name); end
|
|
181
|
+
|
|
182
|
+
# Called when parsing is complete.
|
|
183
|
+
# Override this method to return your final constructed result.
|
|
184
|
+
#
|
|
185
|
+
# @return [Object] The final result of the builder
|
|
186
|
+
def finish; end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Built-in builders for common use cases
|
|
190
|
+
module Builders
|
|
191
|
+
# Debug builder that collects all events as strings.
|
|
192
|
+
# Useful for understanding the parsing flow.
|
|
193
|
+
class DebugBuilder
|
|
194
|
+
include BuilderCallbacks
|
|
195
|
+
|
|
196
|
+
attr_reader :events
|
|
197
|
+
|
|
198
|
+
def initialize
|
|
199
|
+
@events = []
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def on_start(input)
|
|
203
|
+
@events << "start: #{input.inspect}"
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def on_success
|
|
207
|
+
@events << "success"
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def on_error(message)
|
|
211
|
+
@events << "error: #{message}"
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def on_string(value, offset, length)
|
|
215
|
+
@events << "string: #{value.inspect} @ #{offset}(#{length})"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def on_int(value)
|
|
219
|
+
@events << "int: #{value}"
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def on_float(value)
|
|
223
|
+
@events << "float: #{value}"
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def on_bool(value)
|
|
227
|
+
@events << "bool: #{value}"
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def on_nil
|
|
231
|
+
@events << "nil"
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def on_hash_start(size = nil)
|
|
235
|
+
@events << "hash_start(#{size.inspect})"
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def on_hash_end(size)
|
|
239
|
+
@events << "hash_end(#{size})"
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def on_hash_key(key)
|
|
243
|
+
@events << "hash_key: #{key.inspect}"
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def on_hash_value(key)
|
|
247
|
+
@events << "hash_value: #{key.inspect}"
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def on_array_start(size = nil)
|
|
251
|
+
@events << "array_start(#{size.inspect})"
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def on_array_element(index)
|
|
255
|
+
@events << "array_element[#{index}]"
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def on_array_end(size)
|
|
259
|
+
@events << "array_end(#{size})"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def on_named_start(name)
|
|
263
|
+
@events << "named_start: #{name}"
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def on_named_end(name)
|
|
267
|
+
@events << "named_end: #{name}"
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def finish
|
|
271
|
+
@events.join("\n")
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Builder that collects all string values.
|
|
276
|
+
class StringCollector
|
|
277
|
+
include BuilderCallbacks
|
|
278
|
+
|
|
279
|
+
attr_reader :strings
|
|
280
|
+
|
|
281
|
+
def initialize
|
|
282
|
+
@strings = []
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def on_start(input); end
|
|
286
|
+
|
|
287
|
+
def on_success; end
|
|
288
|
+
|
|
289
|
+
def on_error(message); end
|
|
290
|
+
|
|
291
|
+
def on_string(value, offset, length)
|
|
292
|
+
@strings << value
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def finish
|
|
296
|
+
@strings
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Builder that counts nodes by type.
|
|
301
|
+
class NodeCounter
|
|
302
|
+
include BuilderCallbacks
|
|
303
|
+
|
|
304
|
+
attr_reader :counts
|
|
305
|
+
|
|
306
|
+
def initialize
|
|
307
|
+
@counts = Hash.new(0)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def on_start(input); end
|
|
311
|
+
|
|
312
|
+
def on_success; end
|
|
313
|
+
|
|
314
|
+
def on_error(message); end
|
|
315
|
+
|
|
316
|
+
def on_string(value, offset, length)
|
|
317
|
+
@counts[:string] += 1
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def on_int(value)
|
|
321
|
+
@counts[:int] += 1
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def on_float(value)
|
|
325
|
+
@counts[:float] += 1
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def on_bool(value)
|
|
329
|
+
@counts[:bool] += 1
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def on_nil
|
|
333
|
+
@counts[:nil] += 1
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def on_hash_start(size = nil)
|
|
337
|
+
@counts[:hash] += 1
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def on_array_start(size = nil)
|
|
341
|
+
@counts[:array] += 1
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def on_named_start(name)
|
|
345
|
+
@counts[:named] += 1
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def finish
|
|
349
|
+
@counts
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
module Parsanol
|
|
2
|
+
# Represents a cause why a parse did fail. A lot of these objects are
|
|
3
|
+
# constructed - not all of the causes turn out to be failures for the whole
|
|
4
|
+
# parse.
|
|
5
|
+
#
|
|
6
|
+
class Cause
|
|
7
|
+
def initialize(message, source, pos, children)
|
|
8
|
+
@message, @source, @pos, @children, @context =
|
|
9
|
+
message, source, pos, children, nil
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# @return [String, Array] A string or an array of message pieces that
|
|
13
|
+
# provide failure information. Use #to_s to get a formatted string.
|
|
14
|
+
attr_reader :message
|
|
15
|
+
|
|
16
|
+
# @return [Parsanol::Source] Source that was parsed when this error
|
|
17
|
+
# happend. Mainly used for line number information.
|
|
18
|
+
attr_reader :source
|
|
19
|
+
|
|
20
|
+
# Location of the error.
|
|
21
|
+
#
|
|
22
|
+
# @return [Fixnum] Position where the error happened. (character offset)
|
|
23
|
+
attr_reader :pos
|
|
24
|
+
|
|
25
|
+
# When this cause is part of a tree of error causes: child nodes for this
|
|
26
|
+
# node. Very often carries the reasons for this cause.
|
|
27
|
+
#
|
|
28
|
+
# @return [Array<Parsanol::Cause>] A list of reasons for this cause.
|
|
29
|
+
def children
|
|
30
|
+
@children ||= []
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
|
|
34
|
+
# override the position of the +source+. This method returns an object
|
|
35
|
+
# that can be turned into a string using #to_s.
|
|
36
|
+
#
|
|
37
|
+
# @param source [Parsanol::Source] source that was parsed when this error
|
|
38
|
+
# happened
|
|
39
|
+
# @param pos [Fixnum] position of error
|
|
40
|
+
# @param str [String, Array<String>] message parts
|
|
41
|
+
# @param children [Array<Parsanol::Cause>] child nodes for this error tree
|
|
42
|
+
# @return [Parsanol::Cause] a new instance of {Parsanol::Cause}
|
|
43
|
+
#
|
|
44
|
+
def self.format(source, pos, str, children=[])
|
|
45
|
+
self.new(str, source, pos, children)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Update error message to include context provided by label
|
|
49
|
+
# Update all child causes too (the same context applies to all causes)
|
|
50
|
+
def set_label(l)
|
|
51
|
+
@context = " when parsing #{l}"
|
|
52
|
+
children.each { |c| c.set_label(l) }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def to_s
|
|
56
|
+
line, column = source.line_and_column(pos)
|
|
57
|
+
# Allow message to be a list of objects. Join them here, since we now
|
|
58
|
+
# really need it.
|
|
59
|
+
Array(message).map { |o|
|
|
60
|
+
o.respond_to?(:to_slice) ?
|
|
61
|
+
o.str.inspect :
|
|
62
|
+
o.to_s }.join + " at line #{line} char #{column}#{@context}."
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Signals to the outside that the parse has failed. Use this in
|
|
66
|
+
# conjunction with .format for nice error messages.
|
|
67
|
+
#
|
|
68
|
+
def raise(exception_klass=Parsanol::ParseFailed)
|
|
69
|
+
exception = exception_klass.new(self.to_s, self)
|
|
70
|
+
Kernel.raise exception
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Returns an ascii tree representation of the causes of this node and its
|
|
74
|
+
# children.
|
|
75
|
+
#
|
|
76
|
+
def ascii_tree
|
|
77
|
+
StringIO.new.tap { |io|
|
|
78
|
+
recursive_ascii_tree(self, io, [true]) }.
|
|
79
|
+
string
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
def recursive_ascii_tree(node, stream, curved)
|
|
84
|
+
append_prefix(stream, curved)
|
|
85
|
+
stream.puts node.to_s
|
|
86
|
+
|
|
87
|
+
node.children.each do |child|
|
|
88
|
+
last_child = (node.children.last == child)
|
|
89
|
+
|
|
90
|
+
recursive_ascii_tree(child, stream, curved + [last_child])
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
def append_prefix(stream, curved)
|
|
94
|
+
return if curved.size < 2
|
|
95
|
+
curved[1..-2].each do |c|
|
|
96
|
+
stream.print c ? " " : "| "
|
|
97
|
+
end
|
|
98
|
+
stream.print curved.last ? "`- " : "|- "
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Provides a context for tree transformations to run in. The context allows
|
|
4
|
+
# accessing each of the bindings in the bindings hash as local method.
|
|
5
|
+
#
|
|
6
|
+
# Example:
|
|
7
|
+
#
|
|
8
|
+
# ctx = Context.new(:a => :b)
|
|
9
|
+
# ctx.instance_eval do
|
|
10
|
+
# a # => :b
|
|
11
|
+
# end
|
|
12
|
+
#
|
|
13
|
+
# @api private
|
|
14
|
+
class Parsanol::Context
|
|
15
|
+
include Parsanol
|
|
16
|
+
|
|
17
|
+
def initialize(bindings)
|
|
18
|
+
bindings.each do |key, value|
|
|
19
|
+
singleton_class.send(:define_method, key) { value }
|
|
20
|
+
instance_variable_set("@#{key}", value)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Parsanol::Atoms::Base
|
|
4
|
+
|
|
5
|
+
# Packages the common idiom
|
|
6
|
+
#
|
|
7
|
+
# begin
|
|
8
|
+
# tree = parser.parse('something')
|
|
9
|
+
# rescue Parsanol::ParseFailed => error
|
|
10
|
+
# puts parser.parse_failure_cause.ascii_tree
|
|
11
|
+
# end
|
|
12
|
+
#
|
|
13
|
+
# into a convenient method.
|
|
14
|
+
#
|
|
15
|
+
# Usage:
|
|
16
|
+
#
|
|
17
|
+
# require 'parslet'
|
|
18
|
+
# require 'parsanol/convenience'
|
|
19
|
+
#
|
|
20
|
+
# class FooParser < Parsanol::Parser
|
|
21
|
+
# rule(:foo) { str('foo') }
|
|
22
|
+
# root(:foo)
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# FooParser.new.parse_with_debug('bar')
|
|
26
|
+
#
|
|
27
|
+
# @see Parsanol::Atoms::Base#parse
|
|
28
|
+
#
|
|
29
|
+
def parse_with_debug str, opts={}
|
|
30
|
+
parse str, opts
|
|
31
|
+
rescue Parsanol::ParseFailed => error
|
|
32
|
+
puts error.parse_failure_cause.ascii_tree
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Edit tracking for GPeg-style incremental parsing
|
|
2
|
+
# Based on the GPeg paper: "Fast Incremental PEG Parsing" (Yedidia, SLE 2021)
|
|
3
|
+
#
|
|
4
|
+
# Tracks edits to the input as [position, delta] pairs and enables lazy shifting
|
|
5
|
+
# of cached intervals without rebuilding the entire cache (O(1) edit cost).
|
|
6
|
+
#
|
|
7
|
+
class Parsanol::EditTracker
|
|
8
|
+
# An edit operation: insertion (+delta) or deletion (-delta) at a position
|
|
9
|
+
class Edit
|
|
10
|
+
attr_reader :position, :delta
|
|
11
|
+
|
|
12
|
+
def initialize(position, delta)
|
|
13
|
+
@position = position
|
|
14
|
+
@delta = delta
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def to_s
|
|
18
|
+
if @delta > 0
|
|
19
|
+
"Insert(#{@delta} chars at #{@position})"
|
|
20
|
+
else
|
|
21
|
+
"Delete(#{-@delta} chars at #{@position})"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def initialize
|
|
27
|
+
@edits = [] # List of edits in chronological order
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Record an insertion at position
|
|
31
|
+
# @param position [Integer] Where the insertion occurred
|
|
32
|
+
# @param length [Integer] Number of characters inserted
|
|
33
|
+
def insert(position, length)
|
|
34
|
+
@edits << Edit.new(position, length)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Record a deletion at position
|
|
38
|
+
# @param position [Integer] Where the deletion occurred
|
|
39
|
+
# @param length [Integer] Number of characters deleted
|
|
40
|
+
def delete(position, length)
|
|
41
|
+
@edits << Edit.new(position, -length)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Shift an interval based on accumulated edits
|
|
45
|
+
# Returns the shifted interval [low', high') or nil if interval is invalidated
|
|
46
|
+
#
|
|
47
|
+
# An interval is invalidated if any edit overlaps with it, as the cached
|
|
48
|
+
# parse result is no longer valid.
|
|
49
|
+
#
|
|
50
|
+
# @param low [Integer] Interval start position
|
|
51
|
+
# @param high [Integer] Interval end position (exclusive)
|
|
52
|
+
# @return [Array<Integer>, nil] Shifted [low, high) or nil if invalidated
|
|
53
|
+
def shift_interval(low, high)
|
|
54
|
+
shifted_low = low
|
|
55
|
+
shifted_high = high
|
|
56
|
+
|
|
57
|
+
@edits.each do |edit|
|
|
58
|
+
# Skip zero-length edits (no-ops)
|
|
59
|
+
next if edit.delta == 0
|
|
60
|
+
|
|
61
|
+
# Check if edit overlaps with current interval
|
|
62
|
+
# Edit overlaps if it occurs within [shifted_low, shifted_high)
|
|
63
|
+
if edit.position >= shifted_low && edit.position < shifted_high
|
|
64
|
+
# Edit inside interval - invalidate
|
|
65
|
+
return nil
|
|
66
|
+
elsif edit.position < shifted_low
|
|
67
|
+
# Edit before interval - shift both boundaries
|
|
68
|
+
shifted_low += edit.delta
|
|
69
|
+
shifted_high += edit.delta
|
|
70
|
+
elsif edit.position >= shifted_high
|
|
71
|
+
# Edit after interval - no shift needed
|
|
72
|
+
# Continue to next edit
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Sanity check: ensure interval remains valid
|
|
76
|
+
return nil if shifted_low < 0 || shifted_high < shifted_low
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
[shifted_low, shifted_high]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Check if interval needs invalidation (overlaps with any edit)
|
|
83
|
+
# @param low [Integer] Interval start position
|
|
84
|
+
# @param high [Integer] Interval end position (exclusive)
|
|
85
|
+
# @return [Boolean] true if interval should be invalidated
|
|
86
|
+
def invalidates?(low, high)
|
|
87
|
+
shift_interval(low, high).nil?
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Clear all recorded edits
|
|
91
|
+
def clear
|
|
92
|
+
@edits.clear
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Number of edits tracked
|
|
96
|
+
def size
|
|
97
|
+
@edits.size
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Check if any edits have been recorded
|
|
101
|
+
def empty?
|
|
102
|
+
@edits.empty?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Get all edits (for debugging)
|
|
106
|
+
attr_reader :edits
|
|
107
|
+
end
|