parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# String Literal Parser - Ruby Implementation
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/string-literal
|
|
7
|
+
ruby basic.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### String Literal Rule
|
|
13
|
+
|
|
14
|
+
Strings are quoted with escape support:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
rule :string do
|
|
18
|
+
str('"') >>
|
|
19
|
+
(
|
|
20
|
+
(str('\\') >> any) |
|
|
21
|
+
(str('"').absent? >> any)
|
|
22
|
+
).repeat.as(:string) >>
|
|
23
|
+
str('"')
|
|
24
|
+
end
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Escaped characters (including escaped quotes) are handled; otherwise quotes terminate.
|
|
28
|
+
|
|
29
|
+
### Integer Literal Rule
|
|
30
|
+
|
|
31
|
+
Integers are sequences of digits:
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
rule :integer do
|
|
35
|
+
match('[0-9]').repeat(1).as(:integer)
|
|
36
|
+
end
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Simple repetition captures multi-digit numbers.
|
|
40
|
+
|
|
41
|
+
### Literal Alternation
|
|
42
|
+
|
|
43
|
+
Literals can be strings or integers:
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
rule :literal do
|
|
47
|
+
(integer | string).as(:literal) >> space.maybe
|
|
48
|
+
end
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Order matters: try more specific patterns first.
|
|
52
|
+
|
|
53
|
+
### File-Level Grammar
|
|
54
|
+
|
|
55
|
+
Multiple literals separated by newlines:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
rule :literals do
|
|
59
|
+
(literal >> eol).repeat
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
rule :eol do
|
|
63
|
+
line_end.repeat(1)
|
|
64
|
+
end
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Each line contains one literal definition.
|
|
68
|
+
|
|
69
|
+
### AST Node Classes
|
|
70
|
+
|
|
71
|
+
Ruby structs represent AST nodes:
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
class Lit < Struct.new(:text)
|
|
75
|
+
def to_s
|
|
76
|
+
text.inspect
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
class StringLit < Lit
|
|
80
|
+
end
|
|
81
|
+
class IntLit < Lit
|
|
82
|
+
def to_s
|
|
83
|
+
text
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Inheritance allows shared behavior with type-specific formatting.
|
|
89
|
+
|
|
90
|
+
### Transform Rules
|
|
91
|
+
|
|
92
|
+
Transform creates typed AST nodes:
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
transform = Parsanol::Transform.new do
|
|
96
|
+
rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
|
|
97
|
+
rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
|
|
98
|
+
end
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Pattern matching on content type determines node class.
|
|
102
|
+
|
|
103
|
+
## Output Types
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
# Parse tree:
|
|
107
|
+
[
|
|
108
|
+
{:literal=>{:integer=>"42"@s}},
|
|
109
|
+
{:literal=>{:string=>"hello world"@s}}
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# After transform:
|
|
113
|
+
[IntLit.new("42"), StringLit.new("hello world")]
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Design Decisions
|
|
117
|
+
|
|
118
|
+
### Why Handle Escapes in Strings?
|
|
119
|
+
|
|
120
|
+
Real-world string literals support escape sequences. `\"` allows quotes inside strings.
|
|
121
|
+
|
|
122
|
+
### Why Separate StringLit and IntLit?
|
|
123
|
+
|
|
124
|
+
Type-specific classes enable different behavior (formatting, validation, evaluation).
|
|
125
|
+
|
|
126
|
+
### Why Read from File?
|
|
127
|
+
|
|
128
|
+
Demonstrates parsing external input, common in compiler frontends.
|
data/example/test.lit
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# TOML Parser - Ruby Implementation
|
|
2
|
+
#
|
|
3
|
+
# Parse TOML configuration files: key-value pairs, tables, arrays.
|
|
4
|
+
#
|
|
5
|
+
# Run with: ruby example/toml/basic.rb
|
|
6
|
+
|
|
7
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
|
8
|
+
|
|
9
|
+
require 'parsanol/parslet'
|
|
10
|
+
|
|
11
|
+
# TOML parser
|
|
12
|
+
class TomlParser < Parsanol::Parser
|
|
13
|
+
root :document
|
|
14
|
+
|
|
15
|
+
# Document is a sequence of entries
|
|
16
|
+
rule(:document) { (comment | table | key_value | newline).repeat.as(:document) }
|
|
17
|
+
|
|
18
|
+
# Comment: # to end of line
|
|
19
|
+
rule(:comment) {
|
|
20
|
+
(str('#') >> (newline.absent? >> any).repeat).as(:comment) >> newline
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# Table: [name] or [name.sub]
|
|
24
|
+
rule(:table) {
|
|
25
|
+
(str('[') >>
|
|
26
|
+
table_name.as(:name) >>
|
|
27
|
+
str(']') >>
|
|
28
|
+
newline).as(:table)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
rule(:table_name) {
|
|
32
|
+
(match('[a-zA-Z0-9_]') | str('.') | str('-')).repeat(1)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# Key-value pair: key = value
|
|
36
|
+
rule(:key_value) {
|
|
37
|
+
(key.as(:key) >>
|
|
38
|
+
space? >>
|
|
39
|
+
str('=') >>
|
|
40
|
+
space? >>
|
|
41
|
+
value.as(:value) >>
|
|
42
|
+
newline).as(:key_value)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
rule(:key) {
|
|
46
|
+
(match('[a-zA-Z_]') >> match('[a-zA-Z0-9_]').repeat).as(:key)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Value types
|
|
50
|
+
rule(:value) {
|
|
51
|
+
string |
|
|
52
|
+
integer |
|
|
53
|
+
float |
|
|
54
|
+
boolean |
|
|
55
|
+
array |
|
|
56
|
+
inline_table
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# String: basic "..." or literal '...'
|
|
60
|
+
rule(:string) {
|
|
61
|
+
basic_string | literal_string
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
rule(:basic_string) {
|
|
65
|
+
(str('"') >>
|
|
66
|
+
(str('\\').ignore >> any | str('"').absent? >> any).repeat.as(:string) >>
|
|
67
|
+
str('"')).as(:basic_string)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
rule(:literal_string) {
|
|
71
|
+
(str("'") >>
|
|
72
|
+
(str("'").absent? >> any).repeat.as(:string) >>
|
|
73
|
+
str("'")).as(:literal_string)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Integer: +/-digits
|
|
77
|
+
rule(:integer) {
|
|
78
|
+
(str('+') | str('-')).maybe >>
|
|
79
|
+
match('[0-9]').repeat(1).as(:integer)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Float: digits.digits or scientific notation
|
|
83
|
+
rule(:float) {
|
|
84
|
+
((str('+') | str('-')).maybe >>
|
|
85
|
+
match('[0-9]').repeat(1) >>
|
|
86
|
+
str('.') >>
|
|
87
|
+
match('[0-9]').repeat(1) >>
|
|
88
|
+
(match('[eE]') >> (str('+') | str('-')).maybe >> match('[0-9]').repeat(1)).maybe).as(:float)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# Boolean: true or false
|
|
92
|
+
rule(:boolean) {
|
|
93
|
+
(str('true') | str('false')).as(:boolean)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
# Array: [...]
|
|
97
|
+
rule(:array) {
|
|
98
|
+
(str('[') >>
|
|
99
|
+
space? >>
|
|
100
|
+
(value >> (comma >> value).repeat).maybe.as(:elements) >>
|
|
101
|
+
space? >>
|
|
102
|
+
str(']').as(:array)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Inline table: {...}
|
|
106
|
+
rule(:inline_table) {
|
|
107
|
+
(str('{') >>
|
|
108
|
+
space? >>
|
|
109
|
+
(key_value_inline >> (comma >> key_value_inline).repeat).maybe.as(:pairs) >>
|
|
110
|
+
space? >>
|
|
111
|
+
str('}')).as(:inline_table)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
rule(:key_value_inline) {
|
|
115
|
+
key.as(:key) >> space? >> str('=') >> space? >> value.as(:value)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Helpers
|
|
119
|
+
rule(:space?) { match('\s').repeat }
|
|
120
|
+
rule(:comma) { str(',') >> space? }
|
|
121
|
+
rule(:newline) { match('\n') | match('\r\n') }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# TOML result classes
|
|
125
|
+
TomlDocument = Struct.new(:entries) do
|
|
126
|
+
def to_h
|
|
127
|
+
result = {}
|
|
128
|
+
current_table = nil
|
|
129
|
+
|
|
130
|
+
entries.each do |entry|
|
|
131
|
+
case entry
|
|
132
|
+
when TomlTable
|
|
133
|
+
current_table = entry.name
|
|
134
|
+
result[current_table] ||= {}
|
|
135
|
+
when TomlKeyValue
|
|
136
|
+
if current_table
|
|
137
|
+
result[current_table][entry.key] = entry.value
|
|
138
|
+
else
|
|
139
|
+
result[entry.key] = entry.value
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
result
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
TomlTable = Struct.new(:name)
|
|
149
|
+
TomlKeyValue = Struct.new(:key, :value)
|
|
150
|
+
TomlComment = Struct.new(:text)
|
|
151
|
+
|
|
152
|
+
# Transform parse tree to AST
|
|
153
|
+
class TomlTransform < Parsanol::Transform
|
|
154
|
+
rule(document: sequence(:entries)) { TomlDocument.new(entries) }
|
|
155
|
+
|
|
156
|
+
rule(table: { name: simple(:n) }) { TomlTable.new(n.to_s) }
|
|
157
|
+
|
|
158
|
+
rule(key_value: { key: simple(:k), value: simple(:v) }) {
|
|
159
|
+
TomlKeyValue.new(k.to_s, v)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
rule(comment: simple(:c)) { TomlComment.new(c.to_s) }
|
|
163
|
+
|
|
164
|
+
# Value transformations
|
|
165
|
+
rule(basic_string: simple(:s)) { s.to_s }
|
|
166
|
+
rule(literal_string: simple(:s)) { s.to_s }
|
|
167
|
+
rule(integer: simple(:i)) { i.to_s.to_i }
|
|
168
|
+
rule(float: simple(:f)) { f.to_s.to_f }
|
|
169
|
+
rule(boolean: simple(:b)) { b.to_s == 'true' }
|
|
170
|
+
rule(array: { elements: simple(:e) }) { [e] }
|
|
171
|
+
rule(array: { elements: sequence(:es) }) { es }
|
|
172
|
+
rule(inline_table: { pairs: simple(:p) }) { { p[:key] => p[:value] } }
|
|
173
|
+
rule(inline_table: { pairs: sequence(:ps) }) {
|
|
174
|
+
ps.each_with_object({}) { |p, h| h[p[:key]] = p[:value] }
|
|
175
|
+
}
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Parse TOML string
|
|
179
|
+
def parse_toml(str)
|
|
180
|
+
parser = TomlParser.new
|
|
181
|
+
transform = TomlTransform.new
|
|
182
|
+
|
|
183
|
+
tree = parser.parse(str)
|
|
184
|
+
transform.apply(tree)
|
|
185
|
+
rescue Parsanol::ParseError => e
|
|
186
|
+
puts "Parse error: #{e.message}"
|
|
187
|
+
nil
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Main demo
|
|
191
|
+
if __FILE__ == $0
|
|
192
|
+
puts "TOML Parser"
|
|
193
|
+
puts "=" * 50
|
|
194
|
+
puts
|
|
195
|
+
|
|
196
|
+
toml = <<~TOML
|
|
197
|
+
# This is a comment
|
|
198
|
+
title = "TOML Example"
|
|
199
|
+
|
|
200
|
+
[database]
|
|
201
|
+
host = "localhost"
|
|
202
|
+
port = 5432
|
|
203
|
+
enabled = true
|
|
204
|
+
connection_timeout = 30.5
|
|
205
|
+
|
|
206
|
+
[server]
|
|
207
|
+
hosts = ["alpha", "beta", "gamma"]
|
|
208
|
+
ports = [8080, 8081, 8082]
|
|
209
|
+
TOML
|
|
210
|
+
|
|
211
|
+
puts "Input:"
|
|
212
|
+
puts "-" * 50
|
|
213
|
+
puts toml
|
|
214
|
+
puts "-" * 50
|
|
215
|
+
puts
|
|
216
|
+
|
|
217
|
+
result = parse_toml(toml)
|
|
218
|
+
|
|
219
|
+
if result
|
|
220
|
+
puts "Parsed AST:"
|
|
221
|
+
pp result
|
|
222
|
+
puts
|
|
223
|
+
puts "Hash Output:"
|
|
224
|
+
pp result.to_h
|
|
225
|
+
end
|
|
226
|
+
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# TOML Parser - Ruby Implementation
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/toml
|
|
7
|
+
ruby basic.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### Document Structure
|
|
13
|
+
|
|
14
|
+
A TOML document contains entries in sequence:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
rule(:document) { (comment | table | key_value | newline).repeat.as(:document) }
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Order matters: comments and tables appear where they are in the source.
|
|
21
|
+
|
|
22
|
+
### Comment Rule
|
|
23
|
+
|
|
24
|
+
Hash-prefixed comments to end of line:
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
rule(:comment) {
|
|
28
|
+
(str('#') >> (newline.absent? >> any).repeat).as(:comment) >> newline
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Comments are captured but typically ignored in output.
|
|
33
|
+
|
|
34
|
+
### Table Rule
|
|
35
|
+
|
|
36
|
+
Square-bracketed section headers:
|
|
37
|
+
|
|
38
|
+
```ruby
|
|
39
|
+
rule(:table) {
|
|
40
|
+
(str('[') >>
|
|
41
|
+
table_name.as(:name) >>
|
|
42
|
+
str(']') >>
|
|
43
|
+
newline).as(:table)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
rule(:table_name) {
|
|
47
|
+
(match('[a-zA-Z0-9_]') | str('.') | str('-')).repeat(1)
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Dotted names like `[database.server]` create nested tables.
|
|
52
|
+
|
|
53
|
+
### Key-Value Rule
|
|
54
|
+
|
|
55
|
+
Assignment with various value types:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
rule(:key_value) {
|
|
59
|
+
(key.as(:key) >>
|
|
60
|
+
space? >>
|
|
61
|
+
str('=') >>
|
|
62
|
+
space? >>
|
|
63
|
+
value.as(:value) >>
|
|
64
|
+
newline).as(:key_value)
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Keys are alphanumeric with underscores.
|
|
69
|
+
|
|
70
|
+
### String Rules
|
|
71
|
+
|
|
72
|
+
Basic strings with escapes and literal strings:
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
rule(:basic_string) {
|
|
76
|
+
(str('"') >>
|
|
77
|
+
(str('\\').ignore >> any | str('"').absent? >> any).repeat.as(:string) >>
|
|
78
|
+
str('"')).as(:basic_string)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
rule(:literal_string) {
|
|
82
|
+
(str("'") >>
|
|
83
|
+
(str("'").absent? >> any).repeat.as(:string) >>
|
|
84
|
+
str("'")).as(:literal_string)
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Basic strings process escapes; literal strings don't.
|
|
89
|
+
|
|
90
|
+
### Numeric Rules
|
|
91
|
+
|
|
92
|
+
Integers and floating-point numbers:
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
rule(:integer) {
|
|
96
|
+
(str('+') | str('-')).maybe >>
|
|
97
|
+
match('[0-9]').repeat(1).as(:integer)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
rule(:float) {
|
|
101
|
+
((str('+') | str('-')).maybe >>
|
|
102
|
+
match('[0-9]').repeat(1) >>
|
|
103
|
+
str('.') >>
|
|
104
|
+
match('[0-9]').repeat(1) >>
|
|
105
|
+
(match('[eE]') >> ...).maybe).as(:float)
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Scientific notation is supported for floats.
|
|
110
|
+
|
|
111
|
+
### Array Rule
|
|
112
|
+
|
|
113
|
+
Square-bracketed value lists:
|
|
114
|
+
|
|
115
|
+
```ruby
|
|
116
|
+
rule(:array) {
|
|
117
|
+
(str('[') >>
|
|
118
|
+
space? >>
|
|
119
|
+
(value >> (comma >> value).repeat).maybe.as(:elements) >>
|
|
120
|
+
space? >>
|
|
121
|
+
str(']')).as(:array)
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Arrays can contain any value type, mixed.
|
|
126
|
+
|
|
127
|
+
### Inline Table Rule
|
|
128
|
+
|
|
129
|
+
Curly-braced key-value pairs:
|
|
130
|
+
|
|
131
|
+
```ruby
|
|
132
|
+
rule(:inline_table) {
|
|
133
|
+
(str('{') >>
|
|
134
|
+
space? >>
|
|
135
|
+
(key_value_inline >> (comma >> key_value_inline).repeat).maybe.as(:pairs) >>
|
|
136
|
+
space? >>
|
|
137
|
+
str('}')).as(:inline_table)
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Inline tables are compact single-line objects.
|
|
142
|
+
|
|
143
|
+
## Output Types
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
# Document with entries
|
|
147
|
+
TomlDocument.new([
|
|
148
|
+
TomlComment.new(" comment"),
|
|
149
|
+
TomlKeyValue.new("title", "TOML Example"),
|
|
150
|
+
TomlTable.new("database"),
|
|
151
|
+
TomlKeyValue.new("host", "localhost")
|
|
152
|
+
])
|
|
153
|
+
|
|
154
|
+
# to_h produces:
|
|
155
|
+
{
|
|
156
|
+
"title" => "TOML Example",
|
|
157
|
+
"database" => { "host" => "localhost" }
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Design Decisions
|
|
162
|
+
|
|
163
|
+
### Why Track Current Table in to_h?
|
|
164
|
+
|
|
165
|
+
TOML key-value pairs belong to the most recent table. The transformation tracks this context.
|
|
166
|
+
|
|
167
|
+
### Why Two String Types?
|
|
168
|
+
|
|
169
|
+
TOML specification defines basic strings (with escapes) and literal strings (raw). Different rules handle them correctly.
|
|
170
|
+
|
|
171
|
+
### Why Separate Integer and Float?
|
|
172
|
+
|
|
173
|
+
Type distinction matters for configuration values. Separate rules preserve type information.
|