parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Markup Parser - Ruby Implementation
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/markup
|
|
7
|
+
ruby basic.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### Document Structure
|
|
13
|
+
|
|
14
|
+
A document is a sequence of blocks:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
rule(:document) { block.repeat(1).as(:document) }
|
|
18
|
+
|
|
19
|
+
rule(:block) {
|
|
20
|
+
heading |
|
|
21
|
+
unordered_list |
|
|
22
|
+
paragraph |
|
|
23
|
+
blank_line.as(:blank)
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Blank lines are captured separately and filtered during transformation.
|
|
28
|
+
|
|
29
|
+
### Heading Rule
|
|
30
|
+
|
|
31
|
+
Equal signs define heading level (1-3):
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
rule(:heading) {
|
|
35
|
+
(str('=').repeat(1, 3).as(:level) >>
|
|
36
|
+
space >>
|
|
37
|
+
heading_content.as(:text) >>
|
|
38
|
+
newline).as(:heading)
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
`=` is H1, `==` is H2, `===` is H3.
|
|
43
|
+
|
|
44
|
+
### Paragraph Rule
|
|
45
|
+
|
|
46
|
+
Consecutive lines form paragraphs:
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
rule(:paragraph) {
|
|
50
|
+
(paragraph_line >> newline).repeat(1).as(:paragraph)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
rule(:paragraph_line) {
|
|
54
|
+
(blank_line.absent? >> (str('=').absent? | space.absent?) >> any).repeat(1)
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Paragraph lines don't start with `=` followed by space (which would be a heading).
|
|
59
|
+
|
|
60
|
+
### List Rule
|
|
61
|
+
|
|
62
|
+
Hyphen-prefixed items:
|
|
63
|
+
|
|
64
|
+
```ruby
|
|
65
|
+
rule(:unordered_list) {
|
|
66
|
+
list_item.repeat(1).as(:unordered_list)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
rule(:list_item) {
|
|
70
|
+
(str('-') >>
|
|
71
|
+
space >>
|
|
72
|
+
list_content.as(:text) >>
|
|
73
|
+
newline).as(:item)
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Each item must be on its own line with hyphen and space.
|
|
78
|
+
|
|
79
|
+
## Output Types
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
# Document with blocks
|
|
83
|
+
MarkupDocument.new([
|
|
84
|
+
MarkupHeading.new("=", "Title"),
|
|
85
|
+
MarkupParagraph.new(["text"]),
|
|
86
|
+
MarkupList.new([{text: "item"}, {text: "item2"}])
|
|
87
|
+
])
|
|
88
|
+
|
|
89
|
+
# Heading
|
|
90
|
+
MarkupHeading.new("==", "Section")
|
|
91
|
+
# to_html => "<h2>Section</h2>"
|
|
92
|
+
|
|
93
|
+
# Paragraph
|
|
94
|
+
MarkupParagraph.new(["Line one", "Line two"])
|
|
95
|
+
# to_html => "<p>Line one Line two</p>"
|
|
96
|
+
|
|
97
|
+
# List
|
|
98
|
+
MarkupList.new([{text: "First"}, {text: "Second"}])
|
|
99
|
+
# to_html => "<ul>\n<li>First</li>\n<li>Second</li>\n</ul>"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Design Decisions
|
|
103
|
+
|
|
104
|
+
### Why Equal Signs for Headings?
|
|
105
|
+
|
|
106
|
+
Equal signs are visually intuitive and don't conflict with common text. They're also easier to type than `#` on some keyboards.
|
|
107
|
+
|
|
108
|
+
### Why Filter Blank Lines in Transform?
|
|
109
|
+
|
|
110
|
+
Blank lines separate blocks but aren't content. Filtering them during transformation keeps the AST clean.
|
|
111
|
+
|
|
112
|
+
### Why Separate Paragraph Lines?
|
|
113
|
+
|
|
114
|
+
Keeping lines separate allows joining with spaces during HTML generation, preserving word boundaries across line breaks.
|
|
115
|
+
|
|
116
|
+
### Why Limit Heading Levels to 3?
|
|
117
|
+
|
|
118
|
+
This markup language is intentionally simple. Real-world use might extend to 6 levels, matching HTML.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Demonstrates that we have a compatibility fix to mathn's weird idea of
|
|
2
|
+
# integer mathematics.
|
|
3
|
+
# Originally contributed to Parslet, ported to Parsanol as an example.
|
|
4
|
+
|
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
|
6
|
+
|
|
7
|
+
require 'parsanol/parslet'
|
|
8
|
+
require 'parsanol/convenience'
|
|
9
|
+
include Parsanol::Parslet
|
|
10
|
+
|
|
11
|
+
def attempt_parse
|
|
12
|
+
possible_whitespace = match['\s'].repeat
|
|
13
|
+
|
|
14
|
+
cephalopod =
|
|
15
|
+
str('octopus') |
|
|
16
|
+
str('squid')
|
|
17
|
+
|
|
18
|
+
parenthesized_cephalopod =
|
|
19
|
+
str('(') >>
|
|
20
|
+
possible_whitespace >>
|
|
21
|
+
cephalopod >>
|
|
22
|
+
possible_whitespace >>
|
|
23
|
+
str(')')
|
|
24
|
+
|
|
25
|
+
parser =
|
|
26
|
+
possible_whitespace >>
|
|
27
|
+
parenthesized_cephalopod >>
|
|
28
|
+
possible_whitespace
|
|
29
|
+
|
|
30
|
+
# This parse fails, but that is not the point. When mathn is in the current
|
|
31
|
+
# ruby environment, it modifies integer division in a way that makes
|
|
32
|
+
# parslet loop indefinitely.
|
|
33
|
+
parser.parse %{(\nsqeed)\n}
|
|
34
|
+
rescue Parsanol::ParseFailed
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
attempt_parse
|
|
38
|
+
puts 'it terminates before we require mathn'
|
|
39
|
+
|
|
40
|
+
puts "requiring mathn now"
|
|
41
|
+
# mathn was deprecated as of Ruby 2.5
|
|
42
|
+
if RUBY_VERSION.gsub(/[^\d]/, '').to_i < 250
|
|
43
|
+
require 'mathn'
|
|
44
|
+
end
|
|
45
|
+
puts "and trying again (will hang without the fix)"
|
|
46
|
+
attempt_parse # but it doesn't terminate after requiring mathn
|
|
47
|
+
puts "okay!"
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Mathn Compatibility - Ruby Implementation
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/mathn
|
|
7
|
+
ruby basic.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### The Mathn Problem
|
|
13
|
+
|
|
14
|
+
Ruby's deprecated `mathn` library changed integer division behavior:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
# Without mathn:
|
|
18
|
+
3 / 2 # => 1 (integer division)
|
|
19
|
+
|
|
20
|
+
# With mathn:
|
|
21
|
+
3 / 2 # => (3/2) (Rational)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
This broke Parslet's internal calculations.
|
|
25
|
+
|
|
26
|
+
### The Grammar
|
|
27
|
+
|
|
28
|
+
Simple cephalopod matching:
|
|
29
|
+
|
|
30
|
+
```ruby
|
|
31
|
+
cephalopod =
|
|
32
|
+
str('octopus') |
|
|
33
|
+
str('squid')
|
|
34
|
+
|
|
35
|
+
parenthesized_cephalopod =
|
|
36
|
+
str('(') >>
|
|
37
|
+
possible_whitespace >>
|
|
38
|
+
cephalopod >>
|
|
39
|
+
possible_whitespace >>
|
|
40
|
+
str(')')
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### The Compatibility Fix
|
|
44
|
+
|
|
45
|
+
Parsanol includes a fix that works regardless of mathn:
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
def attempt_parse
|
|
49
|
+
parser = possible_whitespace >>
|
|
50
|
+
parenthesized_cephalopod >>
|
|
51
|
+
possible_whitespace
|
|
52
|
+
|
|
53
|
+
# This would hang without the fix
|
|
54
|
+
parser.parse %{(\nsqeed)\n}
|
|
55
|
+
rescue Parsanol::ParseFailed
|
|
56
|
+
end
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Version Check
|
|
60
|
+
|
|
61
|
+
The example checks Ruby version:
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
if RUBY_VERSION.gsub(/[^\d]/, '').to_i < 250
|
|
65
|
+
require 'mathn'
|
|
66
|
+
end
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Mathn was deprecated in Ruby 2.5, so it's only loaded on older Rubies.
|
|
70
|
+
|
|
71
|
+
## Output Types
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
it terminates before we require mathn
|
|
75
|
+
requiring mathn now
|
|
76
|
+
and trying again (will hang without the fix)
|
|
77
|
+
okay!
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Design Decisions
|
|
81
|
+
|
|
82
|
+
### Why This Example?
|
|
83
|
+
|
|
84
|
+
It documents a historical compatibility issue. Users encountering similar problems can find this reference.
|
|
85
|
+
|
|
86
|
+
### Why Keep Mathn Support?
|
|
87
|
+
|
|
88
|
+
Some legacy systems still use mathn. Parsanol aims for broad Ruby version compatibility.
|
|
89
|
+
|
|
90
|
+
### Ruby-Only Feature
|
|
91
|
+
|
|
92
|
+
This is purely about Ruby library compatibility. Rust has no equivalent issue.
|
|
93
|
+
|
|
94
|
+
### Modern Relevance
|
|
95
|
+
|
|
96
|
+
As of Ruby 2.5+, mathn is deprecated. This example is mostly historical but demonstrates Parslet's robustness.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "mathn",
|
|
3
|
+
"title": "Mathn Compatibility",
|
|
4
|
+
"description": "Demonstrate compatibility with Ruby's deprecated mathn library which changed integer division behavior.",
|
|
5
|
+
"category": "conceptual",
|
|
6
|
+
"tags": ["mathn", "compatibility", "ruby", "integer-division"],
|
|
7
|
+
"difficulty": "beginner",
|
|
8
|
+
"concepts": ["compatibility", "integer division", "Ruby libraries", "legacy support"],
|
|
9
|
+
|
|
10
|
+
"motivation": {
|
|
11
|
+
"why": "Documents handling of Ruby library compatibility issues that affect parsing behavior. The deprecated mathn library changed integer division to return Rationals, which broke Parslet's internal calculations.",
|
|
12
|
+
"useCases": [
|
|
13
|
+
"Legacy system support",
|
|
14
|
+
"Ruby version compatibility",
|
|
15
|
+
"Handling library conflicts"
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
|
|
19
|
+
"inputFormat": {
|
|
20
|
+
"description": "Simple parser input that works regardless of mathn.",
|
|
21
|
+
"examples": [
|
|
22
|
+
{ "input": "(squid)", "description": "Parenthesized cephalopod", "valid": true },
|
|
23
|
+
{ "input": "(sqeed)", "description": "Invalid - should error gracefully", "valid": false }
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
|
|
27
|
+
"outputFormat": {
|
|
28
|
+
"description": "Parse results that work with or without mathn.",
|
|
29
|
+
"structure": {
|
|
30
|
+
"result": { "description": "The parse result" }
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
|
|
34
|
+
"rubyOnly": true,
|
|
35
|
+
"parsletCompatible": true,
|
|
36
|
+
"implementations": {
|
|
37
|
+
"ruby": { "basic": "basic.rb" }
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Reproduces [1] using parslet.
|
|
2
|
+
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
|
3
|
+
|
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
|
5
|
+
|
|
6
|
+
require 'pp'
|
|
7
|
+
require 'parsanol/parslet'
|
|
8
|
+
require 'parsanol/convenience'
|
|
9
|
+
|
|
10
|
+
module MiniLisp
|
|
11
|
+
class Parser < Parsanol::Parser
|
|
12
|
+
root :expression
|
|
13
|
+
rule(:expression) {
|
|
14
|
+
space? >> str('(') >> space? >> body >> str(')') >> space?
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
rule(:body) {
|
|
18
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
rule(:space) {
|
|
22
|
+
match('\s').repeat(1)
|
|
23
|
+
}
|
|
24
|
+
rule(:space?) {
|
|
25
|
+
space.maybe
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
rule(:identifier) {
|
|
29
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
rule(:float) {
|
|
33
|
+
(
|
|
34
|
+
integer >> (
|
|
35
|
+
str('.') >> match('[0-9]').repeat(1) |
|
|
36
|
+
str('e') >> match('[0-9]').repeat(1)
|
|
37
|
+
).as(:e)
|
|
38
|
+
).as(:float) >> space?
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
rule(:integer) {
|
|
42
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
rule(:string) {
|
|
46
|
+
str('"') >> (
|
|
47
|
+
str('\\') >> any |
|
|
48
|
+
str('"').absent? >> any
|
|
49
|
+
).repeat.as(:string) >> str('"') >> space?
|
|
50
|
+
}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
class Transform
|
|
54
|
+
include Parsanol::Parslet
|
|
55
|
+
|
|
56
|
+
attr_reader :t
|
|
57
|
+
def initialize
|
|
58
|
+
@t = Parsanol::Transform.new
|
|
59
|
+
|
|
60
|
+
# To understand these, take a look at what comes out of the parser.
|
|
61
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
|
62
|
+
|
|
63
|
+
t.rule(:string => simple(:str)) { str }
|
|
64
|
+
|
|
65
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
|
66
|
+
|
|
67
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
|
68
|
+
|
|
69
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def do(tree)
|
|
73
|
+
t.apply(tree)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
parser = MiniLisp::Parser.new
|
|
79
|
+
transform = MiniLisp::Transform.new
|
|
80
|
+
|
|
81
|
+
result = parser.parse_with_debug %Q{
|
|
82
|
+
(define test (lambda ()
|
|
83
|
+
(begin
|
|
84
|
+
(display "something")
|
|
85
|
+
(display 1)
|
|
86
|
+
(display 3.08))))
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Transform the result
|
|
90
|
+
pp transform.do(result) if result
|
|
91
|
+
|
|
92
|
+
# Thereby reducing it to the earlier problem:
|
|
93
|
+
# http://github.com/kschiess/toylisp
|
|
94
|
+
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Mini Lisp Parser - Ruby Implementation
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/minilisp
|
|
7
|
+
ruby basic.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### Expression Rule
|
|
13
|
+
|
|
14
|
+
S-expressions are recursively defined:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
rule(:expression) {
|
|
18
|
+
space? >> str('(') >> space? >> body >> str(')') >> space?
|
|
19
|
+
}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Whitespace is optional around parentheses for flexible formatting.
|
|
23
|
+
|
|
24
|
+
### Body Rule
|
|
25
|
+
|
|
26
|
+
Body contains multiple expressions:
|
|
27
|
+
|
|
28
|
+
```ruby
|
|
29
|
+
rule(:body) {
|
|
30
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
The repeat allows empty lists `()` and nested structures.
|
|
35
|
+
|
|
36
|
+
### Identifier Rule
|
|
37
|
+
|
|
38
|
+
Identifiers allow operator characters:
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
rule(:identifier) {
|
|
42
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`=` and `*` are valid identifier chars for Lisp operators like `=` and `*`.
|
|
47
|
+
|
|
48
|
+
### Float Rule
|
|
49
|
+
|
|
50
|
+
Floats have decimal or exponent parts:
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
rule(:float) {
|
|
54
|
+
(
|
|
55
|
+
integer >> (
|
|
56
|
+
str('.') >> match('[0-9]').repeat(1) |
|
|
57
|
+
str('e') >> match('[0-9]').repeat(1)
|
|
58
|
+
).as(:e)
|
|
59
|
+
).as(:float) >> space?
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Captures the integer part and the exponent/fraction separately.
|
|
64
|
+
|
|
65
|
+
### String Rule
|
|
66
|
+
|
|
67
|
+
Strings handle escape sequences:
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
rule(:string) {
|
|
71
|
+
str('"') >> (
|
|
72
|
+
str('\\') >> any |
|
|
73
|
+
str('"').absent? >> any
|
|
74
|
+
).repeat.as(:string) >> str('"') >> space?
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
`str('\\') >> any` handles any escaped character including `\"`.
|
|
79
|
+
|
|
80
|
+
### Transform Class
|
|
81
|
+
|
|
82
|
+
Transforms convert parse trees to Ruby objects:
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
class Transform
|
|
86
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
|
87
|
+
t.rule(:string => simple(:str)) { str }
|
|
88
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
|
89
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
|
90
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
|
91
|
+
end
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
`simple(:x)` matches single values; `subtree(:x)` matches nested structures.
|
|
95
|
+
|
|
96
|
+
## Output Types
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
# Parse tree
|
|
100
|
+
{:exp=>[
|
|
101
|
+
{:identifier=>"+"@s},
|
|
102
|
+
{:integer=>"1"@s},
|
|
103
|
+
{:integer=>"2"@s}
|
|
104
|
+
]}
|
|
105
|
+
|
|
106
|
+
# After transform
|
|
107
|
+
[:+, 1, 2]
|
|
108
|
+
|
|
109
|
+
# Nested expression
|
|
110
|
+
[:define, :test, [:lambda, [], [:begin,
|
|
111
|
+
[:display, "something"],
|
|
112
|
+
[:display, 1],
|
|
113
|
+
[:display, 3.08]
|
|
114
|
+
]]]
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Design Decisions
|
|
118
|
+
|
|
119
|
+
### Why Symbol for Identifiers?
|
|
120
|
+
|
|
121
|
+
Ruby symbols are immutable and efficient for identifiers. They're commonly used for code representation.
|
|
122
|
+
|
|
123
|
+
### Why Float Assembly in Transform?
|
|
124
|
+
|
|
125
|
+
The float rule captures integer and exponent parts separately. The transform combines them into a Ruby Float.
|
|
126
|
+
|
|
127
|
+
### Why subtree for Expressions?
|
|
128
|
+
|
|
129
|
+
`subtree(:exp)` recursively transforms nested lists. This handles arbitrary nesting depth automatically.
|
|
130
|
+
|
|
131
|
+
### Why Separate Parser and Transform Classes?
|
|
132
|
+
|
|
133
|
+
Separation keeps grammar definition clean. The transform can evolve independently of parsing rules.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
|
2
|
+
|
|
3
|
+
require 'pp'
|
|
4
|
+
require "parsanol/parslet"
|
|
5
|
+
|
|
6
|
+
# Demonstrates modular parsers, split out over many classes. Please look at
|
|
7
|
+
# ip_address.rb as well.
|
|
8
|
+
|
|
9
|
+
module ALanguage
|
|
10
|
+
include Parsanol::Parslet
|
|
11
|
+
|
|
12
|
+
# Parslet rules are really a special kind of method. Mix them into your
|
|
13
|
+
# classes!
|
|
14
|
+
rule(:a_language) { str('aaa') }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Parslet parsers are parslet atoms as well. Create an instance and chain them
|
|
18
|
+
# to your other rules.
|
|
19
|
+
#
|
|
20
|
+
class BLanguage < Parsanol::Parser
|
|
21
|
+
root :blang
|
|
22
|
+
|
|
23
|
+
rule(:blang) { str('bbb') }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Parslet atoms are really Ruby values, pass them around.
|
|
27
|
+
c_language = Parsanol.str('ccc')
|
|
28
|
+
|
|
29
|
+
class Language < Parsanol::Parser
|
|
30
|
+
def initialize(c_language)
|
|
31
|
+
@c_language = c_language
|
|
32
|
+
super()
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
root :root
|
|
36
|
+
|
|
37
|
+
include ALanguage
|
|
38
|
+
|
|
39
|
+
rule(:root) { str('a(') >> a_language >> str(')') >> space |
|
|
40
|
+
str('b(') >> BLanguage.new >> str(')') >> space |
|
|
41
|
+
str('c(') >> @c_language >> str(')') >> space }
|
|
42
|
+
rule(:space) { str(' ').maybe }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
Language.new(c_language).parse('a(aaa)')
|
|
46
|
+
Language.new(c_language).parse('b(bbb)')
|
|
47
|
+
Language.new(c_language).parse('c(ccc)')
|