parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: '01840daa27539f714d2270ca715a635a783f72adcd4370d3e1196fbcd05015c3'
|
|
4
|
+
data.tar.gz: a54323f2b2da9bbc65f8a984411eb67c44495af2530f19e5b57a845e832d7648
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: debaafa0bf490766dc6d9b4e871c444c532b9882a9074e2603d779339c4706b68afc9b94d6a476a8d84ea3c458ebe1538fb67e9bd9ff0c153a8c64eda71387ee
|
|
7
|
+
data.tar.gz: 5efa3a1e216953644811211e48bb0aeaed5ef2106b6a387f3a8b27a9a858e60b95c23f063c549ba236fc5e5ef8b75f7de0e1a98f61836624a2533ea7c0335f74
|
data/HISTORY.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## Unreleased
|
|
4
|
+
|
|
5
|
+
## 3.0.0 (2025-02-28)
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- Updated to use parsanol 0.1.2 from crates.io
|
|
9
|
+
- Fixed repetition with separator pattern in Parslet compatibility layer
|
|
10
|
+
- Added spec for repetition pattern handling
|
|
11
|
+
|
|
12
|
+
### Changes
|
|
13
|
+
- Updated Cargo.toml to use published parsanol crate instead of local path
|
|
14
|
+
|
|
15
|
+
## 2.0.0
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- Initial release as Parsanol (renamed from Parslet)
|
|
19
|
+
- Ruby native extension support
|
|
20
|
+
- Multiple parser modes: RubyTransform, JsonOutput, DirectObjects
|
|
21
|
+
|
|
22
|
+
## 1.0.0
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
- Original Parslet parser library
|
data/LICENSE
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Copyright (c) 2025 Ribose Inc.
|
|
2
|
+
Copyright (c) 2010-2018 Kaspar Schiess
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person
|
|
5
|
+
obtaining a copy of this software and associated documentation
|
|
6
|
+
files (the "Software"), to deal in the Software without
|
|
7
|
+
restriction, including without limitation the rights to use,
|
|
8
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the
|
|
10
|
+
Software is furnished to do so, subject to the following
|
|
11
|
+
conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
18
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
20
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
21
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
22
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
23
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.adoc
ADDED
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
= Parsanol
|
|
2
|
+
|
|
3
|
+
image:https://img.shields.io/gem/v/parsanol.svg[RubyGems Version]
|
|
4
|
+
image:https://img.shields.io/github/license/parsanol/parsanol-ruby.svg[License]
|
|
5
|
+
image:https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml/badge.svg["Build", link="https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml"]
|
|
6
|
+
|
|
7
|
+
A high-performance PEG (Parsing Expression Grammar) parser construction library for Ruby, with support for static frozen parsers and dynamic parsers.
|
|
8
|
+
|
|
9
|
+
== Purpose
|
|
10
|
+
|
|
11
|
+
Parsanol provides a DSL for constructing parsers using PEG semantics. It offers excellent error reporting and supports both pure Ruby parsing and optional Rust native extensions for maximum performance.
|
|
12
|
+
|
|
13
|
+
== Features
|
|
14
|
+
|
|
15
|
+
* PEG-based parser construction
|
|
16
|
+
* Great error reporting with detailed parse failure information
|
|
17
|
+
* Optional Rust native extension for up to 29x faster parsing
|
|
18
|
+
* **Slice support** - preserves source positions for linters and IDEs
|
|
19
|
+
* WebAssembly support for browser/Node.js environments
|
|
20
|
+
* Tree transformation for converting parse results to AST
|
|
21
|
+
* Grammar optimization passes
|
|
22
|
+
* Memory-efficient parsing with object pooling
|
|
23
|
+
* **Streaming Builder API** - single-pass parsing with Ruby callbacks
|
|
24
|
+
* **Parallel Parsing** - batch processing with multi-core speedup
|
|
25
|
+
* **Infix Expression Parsing** - built-in operator precedence support
|
|
26
|
+
* **Security Features** - input size and recursion limits for untrusted data
|
|
27
|
+
* **Debug Tools** - tracing and grammar visualization
|
|
28
|
+
|
|
29
|
+
== Installation
|
|
30
|
+
|
|
31
|
+
[source,ruby]
|
|
32
|
+
----
|
|
33
|
+
gem install parsanol
|
|
34
|
+
----
|
|
35
|
+
|
|
36
|
+
Or add to your Gemfile:
|
|
37
|
+
|
|
38
|
+
[source,ruby]
|
|
39
|
+
----
|
|
40
|
+
gem 'parsanol'
|
|
41
|
+
----
|
|
42
|
+
|
|
43
|
+
== Usage
|
|
44
|
+
|
|
45
|
+
=== Basic Parser
|
|
46
|
+
|
|
47
|
+
[source,ruby]
|
|
48
|
+
----
|
|
49
|
+
require 'parsanol'
|
|
50
|
+
|
|
51
|
+
class MyParser < Parsanol::Parser
|
|
52
|
+
rule(:keyword) { str('if') | str('while') }
|
|
53
|
+
rule(:expression) { keyword >> str('(') >> expression >> str(')') }
|
|
54
|
+
root(:expression)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
parser = MyParser.new
|
|
58
|
+
result = parser.parse('if(x)')
|
|
59
|
+
----
|
|
60
|
+
|
|
61
|
+
=== Parslet Compatibility
|
|
62
|
+
|
|
63
|
+
Parsanol is fully compatible with the Parslet API:
|
|
64
|
+
|
|
65
|
+
[source,ruby]
|
|
66
|
+
----
|
|
67
|
+
require 'parslet' # Works exactly like original Parslet
|
|
68
|
+
|
|
69
|
+
class MyParser < Parslet::Parser
|
|
70
|
+
rule(:hello) { str('hello') }
|
|
71
|
+
root(:hello)
|
|
72
|
+
end
|
|
73
|
+
----
|
|
74
|
+
|
|
75
|
+
=== Transformation
|
|
76
|
+
|
|
77
|
+
[source,ruby]
|
|
78
|
+
----
|
|
79
|
+
class MyTransform < Parsanol::Transform
|
|
80
|
+
rule(keyword: simple(:k)) { Keyword.new(k) }
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
MyTransform.apply(parse_tree)
|
|
84
|
+
----
|
|
85
|
+
|
|
86
|
+
== Migrating from Parslet
|
|
87
|
+
|
|
88
|
+
Parsanol provides full Parslet API compatibility with two migration modes.
|
|
89
|
+
|
|
90
|
+
=== Mode 1: Drop-in Replacement (Zero Code Changes)
|
|
91
|
+
|
|
92
|
+
Simply replace the parslet gem with parsanol in your Gemfile:
|
|
93
|
+
|
|
94
|
+
[source,ruby]
|
|
95
|
+
----
|
|
96
|
+
# Gemfile
|
|
97
|
+
- gem 'parslet'
|
|
98
|
+
+ gem 'parsanol'
|
|
99
|
+
----
|
|
100
|
+
|
|
101
|
+
Your existing code works without modification:
|
|
102
|
+
|
|
103
|
+
[source,ruby]
|
|
104
|
+
----
|
|
105
|
+
# No changes needed!
|
|
106
|
+
require 'parslet' # Parsanol aliases itself
|
|
107
|
+
|
|
108
|
+
class MyParser < Parslet::Parser
|
|
109
|
+
rule(:number) { match('[0-9]').repeat(1) }
|
|
110
|
+
root(:number)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
parser = MyParser.new
|
|
114
|
+
parser.parse('123') # Works exactly the same
|
|
115
|
+
----
|
|
116
|
+
|
|
117
|
+
*What you get:*
|
|
118
|
+
- Up to 29x performance improvement (measured with Expressir)
|
|
119
|
+
- Slice support for source position tracking
|
|
120
|
+
- 99.5% fewer allocations
|
|
121
|
+
- Same error messages
|
|
122
|
+
- 100% API compatibility (including Parslet::Slice)
|
|
123
|
+
|
|
124
|
+
=== Mode 2: Native Parsanol API (Enhanced Features)
|
|
125
|
+
|
|
126
|
+
For maximum performance and new features:
|
|
127
|
+
|
|
128
|
+
[source,ruby]
|
|
129
|
+
----
|
|
130
|
+
require 'parsanol' # Use Parsanol namespace
|
|
131
|
+
|
|
132
|
+
class MyParser < Parsanol::Parser
|
|
133
|
+
rule(:number) { match('[0-9]').repeat(1) }
|
|
134
|
+
root(:number)
|
|
135
|
+
end
|
|
136
|
+
----
|
|
137
|
+
|
|
138
|
+
*Additional features in native mode:*
|
|
139
|
+
- Direct Rust FFI (fastest)
|
|
140
|
+
- JSON grammar export
|
|
141
|
+
- Expression parser with precedence climbing
|
|
142
|
+
- Streaming parsing (planned)
|
|
143
|
+
|
|
144
|
+
=== API Compatibility Matrix
|
|
145
|
+
|
|
146
|
+
[cols="2,1,3"]
|
|
147
|
+
|===
|
|
148
|
+
| Parslet API | Status | Notes
|
|
149
|
+
|
|
150
|
+
| `str('foo')` | ✅ | Literal string match
|
|
151
|
+
| `match('[0-9]')` | ✅ | Character class
|
|
152
|
+
| `any` | ✅ | Any single character
|
|
153
|
+
| `>>` (sequence) | ✅ | Sequential composition
|
|
154
|
+
| `\|` (choice) | ✅ | Ordered choice
|
|
155
|
+
| `.repeat(n, m)` | ✅ | Repetition with bounds
|
|
156
|
+
| `.repeat(1)` | ✅ | One or more
|
|
157
|
+
| `.repeat` | ✅ | Zero or more
|
|
158
|
+
| `.maybe` | ✅ | Optional
|
|
159
|
+
| `.as(:name)` | ✅ | Label capture
|
|
160
|
+
| `.absent?` | ✅ | Negative lookahead
|
|
161
|
+
| `.present?` | ✅ | Positive lookahead
|
|
162
|
+
| `infix_expression` | ✅ | Precedence climbing
|
|
163
|
+
| `parse()` | ✅ | Parse and return tree
|
|
164
|
+
| `parse_with_debug()` | ✅ | Parse with error output
|
|
165
|
+
| `Parslet::Transform` | ✅ | Tree transformation
|
|
166
|
+
| `simple(:x)` | ✅ | Match simple value
|
|
167
|
+
| `sequence(:x)` | ✅ | Match array of values
|
|
168
|
+
| `subtree(:x)` | ✅ | Match any subtree
|
|
169
|
+
| `Parslet::Slice` | ✅ | Parsanol::Slice compatible
|
|
170
|
+
|===
|
|
171
|
+
|
|
172
|
+
=== Migration Checklist
|
|
173
|
+
|
|
174
|
+
. **Backup your project**
|
|
175
|
+
|
|
176
|
+
. **Update Gemfile**
|
|
177
|
+
+
|
|
178
|
+
[source,ruby]
|
|
179
|
+
----
|
|
180
|
+
gem 'parsanol' # Replace parslet
|
|
181
|
+
----
|
|
182
|
+
|
|
183
|
+
. **Run tests**
|
|
184
|
+
+
|
|
185
|
+
[source,shell]
|
|
186
|
+
----
|
|
187
|
+
bundle install
|
|
188
|
+
bundle exec rspec
|
|
189
|
+
----
|
|
190
|
+
|
|
191
|
+
. **Verify performance** (optional)
|
|
192
|
+
+
|
|
193
|
+
[source,ruby]
|
|
194
|
+
----
|
|
195
|
+
require 'benchmark'
|
|
196
|
+
|
|
197
|
+
input = "your test input"
|
|
198
|
+
Benchmark.bm do |x|
|
|
199
|
+
x.report("parse") { 1000.times { parser.parse(input) } }
|
|
200
|
+
end
|
|
201
|
+
----
|
|
202
|
+
|
|
203
|
+
=== Common Gotchas
|
|
204
|
+
|
|
205
|
+
. **Native extension requires Rust**
|
|
206
|
+
+
|
|
207
|
+
Parsanol falls back to pure Ruby if Rust is unavailable. For maximum performance, install Rust:
|
|
208
|
+
+
|
|
209
|
+
[source,shell]
|
|
210
|
+
----
|
|
211
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
212
|
+
----
|
|
213
|
+
|
|
214
|
+
. **Error message formatting**
|
|
215
|
+
+
|
|
216
|
+
Error messages have slightly different formatting but contain the same information.
|
|
217
|
+
|
|
218
|
+
. **Grammar introspection**
|
|
219
|
+
+
|
|
220
|
+
Works the same way in both modes.
|
|
221
|
+
|
|
222
|
+
=== Performance Comparison
|
|
223
|
+
|
|
224
|
+
[cols="3,2,2,2"]
|
|
225
|
+
|===
|
|
226
|
+
| Operation | Parslet | Parsanol (Ruby) | Parsanol (ZeroCopy+Slice)
|
|
227
|
+
|
|
228
|
+
| EXPRESS parsing (22KB) | 3036 ms | - | 106 ms (28.7x)
|
|
229
|
+
| JSON parsing | 1x | 1x | 29x
|
|
230
|
+
| Calculator | 1x | 1x | 25x
|
|
231
|
+
| Memory allocations | 100% | 100% | 0.5%
|
|
232
|
+
|===
|
|
233
|
+
|
|
234
|
+
== Performance
|
|
235
|
+
|
|
236
|
+
Parsanol offers 5 different approaches for parsing in Ruby, each moving more work from Ruby to Rust:
|
|
237
|
+
|
|
238
|
+
=== The 5 Approaches
|
|
239
|
+
|
|
240
|
+
[cols="5,2,2,3"]
|
|
241
|
+
|===
|
|
242
|
+
| Approach | Speed | Use Case | How It Works
|
|
243
|
+
|
|
244
|
+
| 1. parslet-ruby | 1x (baseline) | Compatibility, debugging | Pure Ruby parsing
|
|
245
|
+
| 2. parsanol-ruby | ~1x | Learning, prototyping | Parsanol Ruby backend
|
|
246
|
+
| 3. parsanol-native (Batch) | ~20x | Need Ruby objects | Rust parsing, AST via u64
|
|
247
|
+
| 4. parsanol-native (ZeroCopy) | ~25x | Maximum performance | Direct FFI construction
|
|
248
|
+
| 5. parsanol-native (ZeroCopy + Slice) | ~29x | Linters, IDEs, Expressir | Zero-copy + source positions
|
|
249
|
+
|===
|
|
250
|
+
|
|
251
|
+
=== Slice Support (New)
|
|
252
|
+
|
|
253
|
+
The ZeroCopy + Slice mode preserves source position information for each parsed element:
|
|
254
|
+
|
|
255
|
+
[source,ruby]
|
|
256
|
+
----
|
|
257
|
+
# Before (plain strings - no position info):
|
|
258
|
+
[{"word"=>"hello"}, " ", {"name"=>"world"}]
|
|
259
|
+
|
|
260
|
+
# After (Slice objects with position info):
|
|
261
|
+
[{"word"=>"hello"@0}, " "@5, {"name"=>"world"@6}]
|
|
262
|
+
|
|
263
|
+
# The @N notation shows the byte offset in the original input
|
|
264
|
+
# Parsanol::Slice is compatible with Parslet::Slice
|
|
265
|
+
----
|
|
266
|
+
|
|
267
|
+
This is essential for tools like linters, IDEs, and Expressir that need to track where in the source code each element came from.
|
|
268
|
+
|
|
269
|
+
=== Evidence-Based Results
|
|
270
|
+
|
|
271
|
+
Actual benchmark results from Expressir parsing EXPRESS schemas (22KB file, 733 lines):
|
|
272
|
+
|
|
273
|
+
[cols="3,2,2,3"]
|
|
274
|
+
|===
|
|
275
|
+
| Mode | Time | Speedup | Notes
|
|
276
|
+
|
|
277
|
+
| Ruby (Parslet) | 3036 ms | 1x (baseline) | Pure Ruby parsing
|
|
278
|
+
| Native Batch (u64) | 153 ms | 19.9x faster | AST via u64 array transfer
|
|
279
|
+
| Native ZeroCopy (Slice) | 106 ms | 28.7x faster | Zero-copy with source positions
|
|
280
|
+
|===
|
|
281
|
+
|
|
282
|
+
*Run `bundle exec ruby benchmark/run_all.rb` to see results on YOUR machine.*
|
|
283
|
+
|
|
284
|
+
=== Running Benchmarks
|
|
285
|
+
|
|
286
|
+
Verify these results yourself:
|
|
287
|
+
|
|
288
|
+
[source,shell]
|
|
289
|
+
----
|
|
290
|
+
cd parsanol-ruby
|
|
291
|
+
bundle install
|
|
292
|
+
bundle exec rake compile # Build native extension
|
|
293
|
+
bundle exec ruby benchmark/run_all.rb --quick
|
|
294
|
+
----
|
|
295
|
+
|
|
296
|
+
See `benchmark/APPROACHES.md` for detailed diagrams explaining each approach.
|
|
297
|
+
|
|
298
|
+
== Architecture
|
|
299
|
+
|
|
300
|
+
Parsanol consists of two main components:
|
|
301
|
+
|
|
302
|
+
* *parsanol-ruby*: The Ruby gem with parser DSL and transformation engine
|
|
303
|
+
* *parsanol-rs*: Rust crate providing native parsing acceleration
|
|
304
|
+
|
|
305
|
+
=== Three Transformation Modes
|
|
306
|
+
|
|
307
|
+
Parsanol supports three transformation modes:
|
|
308
|
+
|
|
309
|
+
1. **Ruby Transform** (Parslet-compatible): Parse in Rust, transform in Ruby
|
|
310
|
+
2. **Serialized Output**: Parse and transform in Rust, return JSON
|
|
311
|
+
3. **Native FFI**: Parse, transform, and return Ruby objects directly
|
|
312
|
+
|
|
313
|
+
=== Static vs Dynamic Parsers
|
|
314
|
+
|
|
315
|
+
Parsanol supports two parser modes:
|
|
316
|
+
|
|
317
|
+
* *Dynamic Parsers*: Can be modified during parsing, more flexible but slower
|
|
318
|
+
* *Static/Frozen Parsers*: Pre-compiled grammar, fastest path for production use
|
|
319
|
+
|
|
320
|
+
=== Streaming Builder API
|
|
321
|
+
|
|
322
|
+
For maximum performance, use the streaming builder API which eliminates intermediate AST construction. Your Ruby callbacks receive parsed values directly during parsing:
|
|
323
|
+
|
|
324
|
+
[IMPORTANT]
|
|
325
|
+
====
|
|
326
|
+
The streaming builder API requires the native extension. If the native extension is not available, use the pure Ruby parser instead.
|
|
327
|
+
====
|
|
328
|
+
|
|
329
|
+
[source,ruby]
|
|
330
|
+
----
|
|
331
|
+
require 'parsanol'
|
|
332
|
+
|
|
333
|
+
# Define a custom builder by including Parsanol::BuilderCallbacks
|
|
334
|
+
class StringCollector
|
|
335
|
+
include Parsanol::BuilderCallbacks
|
|
336
|
+
|
|
337
|
+
def initialize
|
|
338
|
+
@strings = []
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def on_string(value, offset, length)
|
|
342
|
+
@strings << value
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def finish
|
|
346
|
+
@strings
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Serialize grammar from a parser
|
|
351
|
+
grammar = Parsanol::Native.serialize_grammar(MyParser.new.root)
|
|
352
|
+
|
|
353
|
+
# Parse with the builder
|
|
354
|
+
builder = StringCollector.new
|
|
355
|
+
result = Parsanol::Native.parse_with_builder(grammar, input, builder)
|
|
356
|
+
# result: ["hello", "world"]
|
|
357
|
+
----
|
|
358
|
+
|
|
359
|
+
==== Available Callback Methods
|
|
360
|
+
|
|
361
|
+
Include `Parsanol::BuilderCallbacks` in your class and override these methods:
|
|
362
|
+
|
|
363
|
+
[cols="1,3,2"]
|
|
364
|
+
|===
|
|
365
|
+
| Method | Description | Default
|
|
366
|
+
|
|
367
|
+
| `on_start(input)` | Parsing started | No-op
|
|
368
|
+
| `on_success` | Parsing succeeded | No-op
|
|
369
|
+
| `on_error(message)` | Parsing failed | No-op
|
|
370
|
+
| `on_string(value, offset, length)` | String/slice matched | No-op
|
|
371
|
+
| `on_int(value)` | Integer matched | No-op
|
|
372
|
+
| `on_float(value)` | Float matched | No-op
|
|
373
|
+
| `on_bool(value)` | Boolean matched | No-op
|
|
374
|
+
| `on_nil` | Nil matched | No-op
|
|
375
|
+
| `on_hash_start(size)` | Entering a hash/object | No-op
|
|
376
|
+
| `on_hash_key(key)` | Hash key encountered | No-op
|
|
377
|
+
| `on_hash_value(key)` | About to parse hash value | No-op
|
|
378
|
+
| `on_hash_end(size)` | Exiting a hash/object | No-op
|
|
379
|
+
| `on_array_start(size)` | Entering an array | No-op
|
|
380
|
+
| `on_array_element(index)` | About to parse array element | No-op
|
|
381
|
+
| `on_array_end(size)` | Exiting an array | No-op
|
|
382
|
+
| `on_named_start(name)` | Starting named rule | No-op
|
|
383
|
+
| `on_named_end(name)` | Finished named rule | No-op
|
|
384
|
+
| `finish` | Parsing complete, return result | Returns nil
|
|
385
|
+
|===
|
|
386
|
+
|
|
387
|
+
The `size` parameter in `on_array_start`, `on_hash_start`, `on_array_end`, and `on_hash_end` indicates the number of elements (may be `nil` for start callbacks).
|
|
388
|
+
|
|
389
|
+
==== Built-in Builders
|
|
390
|
+
|
|
391
|
+
Parsanol provides three built-in builders for common use cases:
|
|
392
|
+
|
|
393
|
+
* `Parsanol::Builders::DebugBuilder` - Collects all parsing events as strings for debugging
|
|
394
|
+
* `Parsanol::Builders::StringCollector` - Collects all string values into an array
|
|
395
|
+
* `Parsanol::Builders::NodeCounter` - Counts nodes by type (strings, ints, arrays, hashes, etc.)
|
|
396
|
+
|
|
397
|
+
[source,ruby]
|
|
398
|
+
----
|
|
399
|
+
# DebugBuilder - see all parsing events
|
|
400
|
+
debug = Parsanol::Builders::DebugBuilder.new
|
|
401
|
+
Parsanol::Native.parse_with_builder(grammar, input, debug)
|
|
402
|
+
puts debug.events # => ["start: ...", "string: ...", "hash_start", ...]
|
|
403
|
+
|
|
404
|
+
# StringCollector - extract all strings
|
|
405
|
+
collector = Parsanol::Builders::StringCollector.new
|
|
406
|
+
Parsanol::Native.parse_with_builder(grammar, input, collector)
|
|
407
|
+
puts collector.strings # => ["hello", "world", ...]
|
|
408
|
+
|
|
409
|
+
# NodeCounter - count node types
|
|
410
|
+
counter = Parsanol::Builders::NodeCounter.new
|
|
411
|
+
Parsanol::Native.parse_with_builder(grammar, input, counter)
|
|
412
|
+
puts counter.counts # => {:string=>5, :int=>3, :hash=>2, :array=>1}
|
|
413
|
+
----
|
|
414
|
+
|
|
415
|
+
==== Advanced Example: Building Custom Objects
|
|
416
|
+
|
|
417
|
+
[source,ruby]
|
|
418
|
+
----
|
|
419
|
+
class JsonBuilder
|
|
420
|
+
include Parsanol::BuilderCallbacks
|
|
421
|
+
|
|
422
|
+
def initialize
|
|
423
|
+
@stack = []
|
|
424
|
+
@current_key = nil
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def on_string(value, offset, length)
|
|
428
|
+
add_value(value)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
def on_int(value)
|
|
432
|
+
add_value(value)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def on_float(value)
|
|
436
|
+
add_value(value)
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def on_bool(value)
|
|
440
|
+
add_value(value)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def on_nil
|
|
444
|
+
add_value(nil)
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def on_hash_start(size = nil)
|
|
448
|
+
@stack.push({})
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def on_hash_key(key)
|
|
452
|
+
@current_key = key
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def on_hash_end(size)
|
|
456
|
+
finished = @stack.pop
|
|
457
|
+
add_value(finished) unless @stack.empty?
|
|
458
|
+
@result = finished if @stack.empty?
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def on_array_start(size = nil)
|
|
462
|
+
@stack.push([])
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def on_array_element(index)
|
|
466
|
+
# Called before each array element
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def on_array_end(size)
|
|
470
|
+
finished = @stack.pop
|
|
471
|
+
add_value(finished) unless @stack.empty?
|
|
472
|
+
@result = finished if @stack.empty?
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def on_named_start(name)
|
|
476
|
+
# Called when entering a named rule (e.g., .as(:name))
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
def on_named_end(name)
|
|
480
|
+
# Called when exiting a named rule
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def finish
|
|
484
|
+
@result
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
private
|
|
488
|
+
|
|
489
|
+
def add_value(value)
|
|
490
|
+
return if @stack.empty?
|
|
491
|
+
case @stack.last
|
|
492
|
+
when Hash
|
|
493
|
+
@stack.last[@current_key] = value
|
|
494
|
+
@current_key = nil
|
|
495
|
+
when Array
|
|
496
|
+
@stack.last << value
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
builder = JsonBuilder.new
|
|
502
|
+
result = Parsanol::Native.parse_with_builder(grammar, input, builder)
|
|
503
|
+
# result: Parsed JSON as Ruby objects
|
|
504
|
+
----
|
|
505
|
+
|
|
506
|
+
=== Parallel Parsing
|
|
507
|
+
|
|
508
|
+
Parse multiple inputs in parallel using all CPU cores:
|
|
509
|
+
|
|
510
|
+
[source,ruby]
|
|
511
|
+
----
|
|
512
|
+
require 'parsanol/parallel'
|
|
513
|
+
|
|
514
|
+
grammar = MyParser.new.serialize_grammar
|
|
515
|
+
inputs = Dir.glob("*.json").map { |f| File.read(f) }
|
|
516
|
+
|
|
517
|
+
# Parse all files in parallel (8x faster on 8 cores)
|
|
518
|
+
results = Parsanol::Parallel.parse_batch(grammar, inputs)
|
|
519
|
+
|
|
520
|
+
# With configuration
|
|
521
|
+
config = Parsanol::Parallel::Config.new
|
|
522
|
+
.with_num_threads(4)
|
|
523
|
+
.with_min_chunk_size(50)
|
|
524
|
+
|
|
525
|
+
results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
|
|
526
|
+
----
|
|
527
|
+
|
|
528
|
+
=== Infix Expression Parsing
|
|
529
|
+
|
|
530
|
+
Built-in support for parsing infix expressions with operator precedence:
|
|
531
|
+
|
|
532
|
+
[source,ruby]
|
|
533
|
+
----
|
|
534
|
+
class CalculatorParser < Parsanol::Parser
|
|
535
|
+
rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
536
|
+
rule(:primary) { number | str('(') >> expr >> str(')') }
|
|
537
|
+
|
|
538
|
+
# Define operators with precedence and associativity
|
|
539
|
+
rule(:expr) {
|
|
540
|
+
infix_expression(primary,
|
|
541
|
+
[str('*'), 2, :left],
|
|
542
|
+
[str('/'), 2, :left],
|
|
543
|
+
[str('+'), 1, :left],
|
|
544
|
+
[str('-'), 1, :left],
|
|
545
|
+
[str('^'), 3, :right] # Right-associative
|
|
546
|
+
)
|
|
547
|
+
}
|
|
548
|
+
root(:expr)
|
|
549
|
+
end
|
|
550
|
+
----
|
|
551
|
+
|
|
552
|
+
=== Security Features
|
|
553
|
+
|
|
554
|
+
For parsing untrusted input, use built-in limits:
|
|
555
|
+
|
|
556
|
+
[source,ruby]
|
|
557
|
+
----
|
|
558
|
+
# Configure limits for untrusted input
|
|
559
|
+
result = Parsanol::Native.parse_with_limits(
|
|
560
|
+
grammar_json,
|
|
561
|
+
untrusted_input,
|
|
562
|
+
max_input_size: 10 * 1024 * 1024, # 10 MB max
|
|
563
|
+
max_recursion_depth: 100 # Limit recursion
|
|
564
|
+
)
|
|
565
|
+
----
|
|
566
|
+
|
|
567
|
+
=== Debug Tools
|
|
568
|
+
|
|
569
|
+
Enable tracing for debugging grammars:
|
|
570
|
+
|
|
571
|
+
[source,ruby]
|
|
572
|
+
----
|
|
573
|
+
# Parse with trace
|
|
574
|
+
result, trace = Parsanol::Native.parse_with_trace(grammar_json, input)
|
|
575
|
+
puts trace
|
|
576
|
+
|
|
577
|
+
# Generate grammar visualization
|
|
578
|
+
mermaid = Parsanol::Native.grammar_to_mermaid(grammar_json)
|
|
579
|
+
dot = Parsanol::Native.grammar_to_dot(grammar_json)
|
|
580
|
+
----
|
|
581
|
+
|
|
582
|
+
== Development
|
|
583
|
+
|
|
584
|
+
=== Setup
|
|
585
|
+
|
|
586
|
+
[source,shell]
|
|
587
|
+
----
|
|
588
|
+
bundle install
|
|
589
|
+
----
|
|
590
|
+
|
|
591
|
+
=== Testing
|
|
592
|
+
|
|
593
|
+
[source,shell]
|
|
594
|
+
----
|
|
595
|
+
# Run all tests
|
|
596
|
+
bundle exec rake spec
|
|
597
|
+
|
|
598
|
+
# Run unit tests only
|
|
599
|
+
bundle exec rake spec:unit
|
|
600
|
+
|
|
601
|
+
# Run specific test file
|
|
602
|
+
bundle exec rspec spec/parsanol/atoms/str_spec.rb
|
|
603
|
+
----
|
|
604
|
+
|
|
605
|
+
=== Compiling Native Extension
|
|
606
|
+
|
|
607
|
+
The native extension requires Rust 1.75+:
|
|
608
|
+
|
|
609
|
+
[source,shell]
|
|
610
|
+
----
|
|
611
|
+
# Install Rust (if not already installed)
|
|
612
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
613
|
+
|
|
614
|
+
# Compile the native extension
|
|
615
|
+
bundle exec rake compile
|
|
616
|
+
|
|
617
|
+
# Verify native extension is working
|
|
618
|
+
ruby -I lib -e "require 'parsanol'; puts Parsanol::Native.available?"
|
|
619
|
+
# => true
|
|
620
|
+
----
|
|
621
|
+
|
|
622
|
+
=== Running Benchmarks
|
|
623
|
+
|
|
624
|
+
[source,shell]
|
|
625
|
+
----
|
|
626
|
+
# Quick benchmarks
|
|
627
|
+
bundle exec rake benchmark
|
|
628
|
+
|
|
629
|
+
# Comprehensive benchmark suite
|
|
630
|
+
bundle exec rake benchmark:all
|
|
631
|
+
|
|
632
|
+
# Run specific benchmark
|
|
633
|
+
bundle exec ruby benchmark/run_all.rb --quick
|
|
634
|
+
----
|
|
635
|
+
|
|
636
|
+
== License
|
|
637
|
+
|
|
638
|
+
MIT License - see LICENSE file for details.
|
|
639
|
+
|
|
640
|
+
== Resources
|
|
641
|
+
|
|
642
|
+
* https://github.com/parsanol/parsanol-ruby[GitHub Repository]
|
|
643
|
+
* https://github.com/parsanol/parsanol-rs[Rust Crate]
|