parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Comments Parser - Ruby Implementation
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/comments
|
|
7
|
+
ruby basic.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### Line Comment Rule
|
|
13
|
+
|
|
14
|
+
Single-line comments start with `//`:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
rule(:line_comment) {
|
|
18
|
+
(str('//') >> (newline.absent? >> any).repeat).as(:line)
|
|
19
|
+
}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Content extends to end of line; newline is not consumed.
|
|
23
|
+
|
|
24
|
+
### Multiline Comment Rule
|
|
25
|
+
|
|
26
|
+
Block comments use `/* */` delimiters:
|
|
27
|
+
|
|
28
|
+
```ruby
|
|
29
|
+
rule(:multiline_comment) {
|
|
30
|
+
(str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi)
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Negative lookahead prevents early termination.
|
|
35
|
+
|
|
36
|
+
### Space Rule with Comments
|
|
37
|
+
|
|
38
|
+
Comments are treated as whitespace:
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
rule(:spaces) { space.repeat }
|
|
42
|
+
rule(:space) { multiline_comment | line_comment | str(' ') }
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
This allows comments anywhere whitespace is permitted.
|
|
46
|
+
|
|
47
|
+
### Expression Rule
|
|
48
|
+
|
|
49
|
+
Simple expressions demonstrate comment handling:
|
|
50
|
+
|
|
51
|
+
```ruby
|
|
52
|
+
rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The `spaces` rule consumes any trailing comments.
|
|
56
|
+
|
|
57
|
+
### Lines and Line Endings
|
|
58
|
+
|
|
59
|
+
Input is structured as lines:
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
rule(:lines) { line.repeat }
|
|
63
|
+
rule(:line) { spaces >> expression.repeat >> newline }
|
|
64
|
+
rule(:newline) { str("\n") >> str("\r").maybe }
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Each line ends with a newline (CRLF or LF).
|
|
68
|
+
|
|
69
|
+
### parse_with_debug
|
|
70
|
+
|
|
71
|
+
Debug output shows the complete parse tree:
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
pp ALanguage.new.parse_with_debug(code)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Useful for understanding how comments integrate with the grammar.
|
|
78
|
+
|
|
79
|
+
## Output Types
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
# Parse tree for:
|
|
83
|
+
# a // comment
|
|
84
|
+
# a a a /* inline */ a
|
|
85
|
+
#
|
|
86
|
+
[
|
|
87
|
+
{:exp=>[{:a=>"a"}]},
|
|
88
|
+
{:exp=>[{:a=>"a"}, {:a=>"a"}, {:a=>"a"}, {:a=>"a"}]}
|
|
89
|
+
]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Comments are consumed by the `spaces` rule and don't appear in output.
|
|
93
|
+
|
|
94
|
+
## Design Decisions
|
|
95
|
+
|
|
96
|
+
### Why Treat Comments as Whitespace?
|
|
97
|
+
|
|
98
|
+
Comments should be allowed anywhere whitespace is. Making them part of the `space` rule achieves this elegantly.
|
|
99
|
+
|
|
100
|
+
### Why Not Include Newline in Line Comments?
|
|
101
|
+
|
|
102
|
+
Newlines are handled separately by the line structure. This keeps comment content clean.
|
|
103
|
+
|
|
104
|
+
### Why Use Negative Lookahead for Multiline Comments?
|
|
105
|
+
|
|
106
|
+
`str('*/').absent?` ensures we don't prematurely match the closing delimiter. This is cleaner than trying to enumerate valid characters.
|
|
107
|
+
|
|
108
|
+
### Why parse_with_debug?
|
|
109
|
+
|
|
110
|
+
When building grammars, seeing the full parse tree helps debug unexpected matches or failures.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# CSV Parser Example - Ruby Transform: Ruby Transform (Parslet-Compatible)
|
|
2
|
+
#
|
|
3
|
+
# This example demonstrates Ruby Transform for parsing CSV:
|
|
4
|
+
# 1. Rust parser (parsanol-rs) does the fast parsing
|
|
5
|
+
# 2. Returns a generic tree (hash/array/string structure)
|
|
6
|
+
# 3. Ruby transform converts tree to Ruby objects
|
|
7
|
+
|
|
8
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
|
9
|
+
|
|
10
|
+
require 'parsanol'
|
|
11
|
+
|
|
12
|
+
# Step 1: Define the CSV parser grammar
|
|
13
|
+
class CsvParser < Parsanol::Parser
|
|
14
|
+
root :csv
|
|
15
|
+
|
|
16
|
+
rule(:csv) {
|
|
17
|
+
space? >> (row >> (newline >> row).repeat).maybe >> space?
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
rule(:row) {
|
|
21
|
+
(field.as(:f) >> (comma >> field.as(:f)).repeat).as(:row)
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
rule(:field) {
|
|
25
|
+
quoted_field | simple_field
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Quoted field: "value with ""escaped"" quotes"
|
|
29
|
+
rule(:quoted_field) {
|
|
30
|
+
str('"') >> (
|
|
31
|
+
str('""') | str('"').absent? >> any
|
|
32
|
+
).repeat.as(:quoted) >> str('"')
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# Simple field: value without commas or quotes
|
|
36
|
+
rule(:simple_field) {
|
|
37
|
+
(comma.absent? >> newline.absent? >> any).repeat.as(:simple)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Helpers
|
|
41
|
+
rule(:comma) { str(',') }
|
|
42
|
+
rule(:newline) { str("\n") | str("\r\n") | str("\r") }
|
|
43
|
+
rule(:space) { match('\s').repeat(1) }
|
|
44
|
+
rule(:space?) { space.maybe }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Step 2: Define the transform (Parslet-style)
|
|
48
|
+
class CsvTransform < Parsanol::Transform
|
|
49
|
+
# Transform a row (sequence of fields)
|
|
50
|
+
rule(row: sequence(:fields)) {
|
|
51
|
+
fields.map { |f| f.is_a?(Hash) ? unescape(f) : f }
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# Transform quoted field
|
|
55
|
+
rule(quoted: simple(:q)) {
|
|
56
|
+
q.to_s.gsub('""', '"')
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Transform simple field
|
|
60
|
+
rule(simple: simple(:s)) {
|
|
61
|
+
s.to_s.strip
|
|
62
|
+
}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Step 3: Parse and transform
|
|
66
|
+
def parse_csv(input)
|
|
67
|
+
parser = CsvParser.new
|
|
68
|
+
transform = CsvTransform.new
|
|
69
|
+
|
|
70
|
+
# Ruby Transform: Parse in Rust, transform in Ruby
|
|
71
|
+
tree = parser.parse(input)
|
|
72
|
+
puts "Parse tree: #{tree.inspect[0..200]}..."
|
|
73
|
+
|
|
74
|
+
result = transform.apply(tree)
|
|
75
|
+
puts "Result: #{result.inspect[0..200]}..."
|
|
76
|
+
|
|
77
|
+
result
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Step 4: Convert to array of hashes (for CSV with headers)
|
|
81
|
+
def parse_csv_with_headers(input)
|
|
82
|
+
rows = parse_csv(input)
|
|
83
|
+
|
|
84
|
+
return [] if rows.empty?
|
|
85
|
+
|
|
86
|
+
# First row is headers
|
|
87
|
+
headers = rows.first
|
|
88
|
+
data = rows[1..]
|
|
89
|
+
|
|
90
|
+
data.map { |row| headers.zip(row).to_h }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Example usage
|
|
94
|
+
if __FILE__ == $0
|
|
95
|
+
puts "=" * 60
|
|
96
|
+
puts "CSV Parser Example - Ruby Transform: Ruby Transform"
|
|
97
|
+
puts "=" * 60
|
|
98
|
+
|
|
99
|
+
# Simple CSV
|
|
100
|
+
simple_csv = <<~CSV
|
|
101
|
+
name,age,city
|
|
102
|
+
Alice,30,New York
|
|
103
|
+
Bob,25,San Francisco
|
|
104
|
+
CSV
|
|
105
|
+
|
|
106
|
+
puts
|
|
107
|
+
puts "Simple CSV:"
|
|
108
|
+
puts "-" * 40
|
|
109
|
+
result = parse_csv(simple_csv)
|
|
110
|
+
puts result.inspect
|
|
111
|
+
|
|
112
|
+
# CSV with headers parsed to hashes
|
|
113
|
+
puts
|
|
114
|
+
puts "CSV with headers:"
|
|
115
|
+
puts "-" * 40
|
|
116
|
+
result = parse_csv_with_headers(simple_csv)
|
|
117
|
+
result.each { |row| puts row.inspect }
|
|
118
|
+
|
|
119
|
+
# CSV with quoted fields
|
|
120
|
+
quoted_csv = <<~CSV
|
|
121
|
+
name,description,city
|
|
122
|
+
Alice,"Hello, World",New York
|
|
123
|
+
Bob,"Test ""quoted"" text",Boston
|
|
124
|
+
CSV
|
|
125
|
+
|
|
126
|
+
puts
|
|
127
|
+
puts "CSV with quoted fields:"
|
|
128
|
+
puts "-" * 40
|
|
129
|
+
result = parse_csv_with_headers(quoted_csv)
|
|
130
|
+
result.each { |row| puts row.inspect }
|
|
131
|
+
|
|
132
|
+
# Empty CSV
|
|
133
|
+
empty_csv = ""
|
|
134
|
+
|
|
135
|
+
puts
|
|
136
|
+
puts "Empty CSV:"
|
|
137
|
+
puts "-" * 40
|
|
138
|
+
result = parse_csv(empty_csv)
|
|
139
|
+
puts result.inspect
|
|
140
|
+
|
|
141
|
+
puts
|
|
142
|
+
puts "=" * 60
|
|
143
|
+
puts "Ruby Transform Benefits for CSV:"
|
|
144
|
+
puts "- Flexible transform logic"
|
|
145
|
+
puts "- Easy to add custom processing"
|
|
146
|
+
puts "- Compatible with existing Parslet code"
|
|
147
|
+
puts "=" * 60
|
|
148
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# CSV Parser - Ruby Implementation (Transform)
|
|
2
|
+
|
|
3
|
+
## How to Run
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd parsanol-ruby/example/csv
|
|
7
|
+
ruby ruby_transform.rb
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Code Walkthrough
|
|
11
|
+
|
|
12
|
+
### CSV Grammar Definition
|
|
13
|
+
|
|
14
|
+
The grammar handles rows, fields, and quoted content:
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
rule(:csv) {
|
|
18
|
+
space? >> (row >> (newline >> row).repeat).maybe >> space?
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
rule(:row) {
|
|
22
|
+
(field.as(:f) >> (comma >> field.as(:f)).repeat).as(:row)
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Each row captures multiple fields labeled `:f`, wrapped in `:row`.
|
|
27
|
+
|
|
28
|
+
### Quoted Field Handling
|
|
29
|
+
|
|
30
|
+
Quoted fields support escaped quotes:
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
rule(:quoted_field) {
|
|
34
|
+
str('"') >> (
|
|
35
|
+
str('""') | str('"').absent? >> any
|
|
36
|
+
).repeat.as(:quoted) >> str('"')
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Double quotes (`""`) inside quoted fields represent literal quotes.
|
|
41
|
+
|
|
42
|
+
### Simple Field Handling
|
|
43
|
+
|
|
44
|
+
Simple fields exclude commas and newlines:
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
rule(:simple_field) {
|
|
48
|
+
(comma.absent? >> newline.absent? >> any).repeat.as(:simple)
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Negative lookahead prevents field content from including delimiters.
|
|
53
|
+
|
|
54
|
+
### Field Rule Selection
|
|
55
|
+
|
|
56
|
+
The field rule tries quoted first:
|
|
57
|
+
|
|
58
|
+
```ruby
|
|
59
|
+
rule(:field) {
|
|
60
|
+
quoted_field | simple_field
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Quoted fields have priority to correctly handle fields starting with `"`.
|
|
65
|
+
|
|
66
|
+
### Transform Rules
|
|
67
|
+
|
|
68
|
+
The transform converts parse tree to Ruby arrays:
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
class CsvTransform < Parsanol::Transform
|
|
72
|
+
# Transform a row (sequence of fields)
|
|
73
|
+
rule(row: sequence(:fields)) {
|
|
74
|
+
fields.map { |f| f.is_a?(Hash) ? unescape(f) : f }
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Transform quoted field
|
|
78
|
+
rule(quoted: simple(:q)) {
|
|
79
|
+
q.to_s.gsub('""', '"')
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Transform simple field
|
|
83
|
+
rule(simple: simple(:s)) {
|
|
84
|
+
s.to_s.strip
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Pattern matching extracts field content and converts to strings.
|
|
90
|
+
|
|
91
|
+
### Header-Based Parsing
|
|
92
|
+
|
|
93
|
+
CSV with headers converts to array of hashes:
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
def parse_csv_with_headers(input)
|
|
97
|
+
rows = parse_csv(input)
|
|
98
|
+
return [] if rows.empty?
|
|
99
|
+
|
|
100
|
+
headers = rows.first
|
|
101
|
+
data = rows[1..]
|
|
102
|
+
|
|
103
|
+
data.map { |row| headers.zip(row).to_h }
|
|
104
|
+
end
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
First row becomes keys; subsequent rows become values.
|
|
108
|
+
|
|
109
|
+
## Output Types
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
# Without headers:
|
|
113
|
+
[["name", "age", "city"], ["Alice", "30", "New York"], ...]
|
|
114
|
+
|
|
115
|
+
# With headers:
|
|
116
|
+
[{"name"=>"Alice", "age"=>"30", "city"=>"New York"}, ...]
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Design Decisions
|
|
120
|
+
|
|
121
|
+
### Why Ruby Transform Over Rust?
|
|
122
|
+
|
|
123
|
+
Ruby transform allows custom processing logic without modifying Rust code. Useful for domain-specific transformations and data enrichment.
|
|
124
|
+
|
|
125
|
+
### Why Sequence Pattern for Rows?
|
|
126
|
+
|
|
127
|
+
`sequence(:fields)` handles both single and multiple fields uniformly, avoiding special cases for one-field rows.
|
|
128
|
+
|
|
129
|
+
### Why Priority for Quoted Fields?
|
|
130
|
+
|
|
131
|
+
If simple field were first, `"hello"` would match as simple field `"` followed by errors. Quoted field priority ensures correct parsing.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# CSV Parser Example - Serialized: JSON Serialization
|
|
2
|
+
#
|
|
3
|
+
# This example demonstrates Serialized for parsing CSV:
|
|
4
|
+
# 1. Rust parser (parsanol-rs) does the parsing
|
|
5
|
+
# 2. Rust transform converts to typed structs
|
|
6
|
+
# 3. Result is serialized to JSON
|
|
7
|
+
# 4. Ruby deserializes JSON to Ruby objects
|
|
8
|
+
#
|
|
9
|
+
# This option is useful when you need to validate/proces CSV
|
|
10
|
+
# and get structured output for other tools.
|
|
11
|
+
|
|
12
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
|
13
|
+
|
|
14
|
+
require 'parsanol'
|
|
15
|
+
require 'json'
|
|
16
|
+
|
|
17
|
+
# NOTE: This example requires the native extension to support parse_to_json
|
|
18
|
+
# which is planned but not yet implemented. This serves as an API preview.
|
|
19
|
+
|
|
20
|
+
# Step 1: Define the CSV parser grammar (same as Option A)
|
|
21
|
+
class CsvParser < Parsanol::Parser
|
|
22
|
+
root :csv
|
|
23
|
+
|
|
24
|
+
rule(:csv) {
|
|
25
|
+
space? >> (row >> (newline >> row).repeat).maybe >> space?
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
rule(:row) {
|
|
29
|
+
(field.as(:f) >> (comma >> field.as(:f)).repeat).as(:row)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
rule(:field) {
|
|
33
|
+
quoted_field | simple_field
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
rule(:quoted_field) {
|
|
37
|
+
str('"') >> (
|
|
38
|
+
str('""') | str('"').absent? >> any
|
|
39
|
+
).repeat.as(:quoted) >> str('"')
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
rule(:simple_field) {
|
|
43
|
+
(comma.absent? >> newline.absent? >> any).repeat.as(:simple)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
rule(:comma) { str(',') }
|
|
47
|
+
rule(:newline) { str("\n") | str("\r\n") | str("\r") }
|
|
48
|
+
rule(:space) { match('\s').repeat(1) }
|
|
49
|
+
rule(:space?) { space.maybe }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Step 2: Define typed classes for CSV data
|
|
53
|
+
class CsvRow
|
|
54
|
+
attr_reader :fields
|
|
55
|
+
|
|
56
|
+
def initialize(fields)
|
|
57
|
+
@fields = fields
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def to_a = @fields
|
|
61
|
+
|
|
62
|
+
def [](index)
|
|
63
|
+
@fields[index]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def each(&block)
|
|
67
|
+
@fields.each(&block)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class CsvDocument
|
|
72
|
+
attr_reader :rows
|
|
73
|
+
|
|
74
|
+
def initialize(rows)
|
|
75
|
+
@rows = rows
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def to_a
|
|
79
|
+
@rows.map(&:to_a)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def headers
|
|
83
|
+
@rows.first&.fields
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def data
|
|
87
|
+
@rows[1..] || []
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def to_hashes
|
|
91
|
+
return [] unless headers && !data.empty?
|
|
92
|
+
|
|
93
|
+
headers = self.headers
|
|
94
|
+
data.map { |row| headers.zip(row.fields).to_h }
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Step 3: Deserializer
|
|
99
|
+
class CsvDeserializer
|
|
100
|
+
def self.from_json(json_string)
|
|
101
|
+
data = JSON.parse(json_string)
|
|
102
|
+
|
|
103
|
+
case data
|
|
104
|
+
when Array
|
|
105
|
+
rows = data.map { |row_data| CsvRow.new(row_data) }
|
|
106
|
+
CsvDocument.new(rows)
|
|
107
|
+
else
|
|
108
|
+
raise "Expected array of rows, got #{data.class}"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Step 4: Parse with JSON output
|
|
114
|
+
def parse_csv(input)
|
|
115
|
+
parser = CsvParser.new
|
|
116
|
+
|
|
117
|
+
# Serialized: Parse and get JSON from Rust
|
|
118
|
+
# NOTE: This requires native extension support
|
|
119
|
+
# output_json = parser.parse_to_json(input)
|
|
120
|
+
|
|
121
|
+
# For now, simulate by using Option A then serializing
|
|
122
|
+
require_relative 'csv_option_a'
|
|
123
|
+
tree = parser.parse(input)
|
|
124
|
+
transform = CsvTransform.new
|
|
125
|
+
result = transform.apply(tree)
|
|
126
|
+
|
|
127
|
+
# This would come from Rust in Serialized
|
|
128
|
+
# Convert to array format for JSON
|
|
129
|
+
output_json = result.to_json
|
|
130
|
+
puts "Output JSON (first 200 chars): #{output_json[0..200]}..."
|
|
131
|
+
|
|
132
|
+
# Deserialize to typed objects
|
|
133
|
+
csv_doc = CsvDeserializer.from_json(output_json)
|
|
134
|
+
puts "Parsed: #{csv_doc.class} with #{csv_doc.rows.size} rows"
|
|
135
|
+
|
|
136
|
+
csv_doc
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Transform class (needed for simulation)
|
|
140
|
+
class CsvTransform < Parsanol::Transform
|
|
141
|
+
rule(row: sequence(:fields)) {
|
|
142
|
+
fields.map { |f| f.is_a?(Hash) ? unescape(f) : f }
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
rule(quoted: simple(:q)) { unescape_quoted(q.to_s) }
|
|
146
|
+
rule(simple: simple(:s)) { s.to_s.strip }
|
|
147
|
+
|
|
148
|
+
private
|
|
149
|
+
|
|
150
|
+
def unescape(field)
|
|
151
|
+
if field.is_a?(Hash) && field[:quoted]
|
|
152
|
+
unescape_quoted(field[:quoted])
|
|
153
|
+
elsif field.is_a?(Hash) && field[:simple]
|
|
154
|
+
field[:simple].to_s.strip
|
|
155
|
+
else
|
|
156
|
+
field
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def unescape_quoted(str)
|
|
161
|
+
str.gsub('""', '"')
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Example usage
|
|
166
|
+
if __FILE__ == $0
|
|
167
|
+
puts "=" * 60
|
|
168
|
+
puts "CSV Parser Example - Serialized: JSON Serialization"
|
|
169
|
+
puts "=" * 60
|
|
170
|
+
puts
|
|
171
|
+
puts "NOTE: This example shows the planned API for Serialized."
|
|
172
|
+
puts "The native extension support for parse_to_json is coming soon."
|
|
173
|
+
puts
|
|
174
|
+
|
|
175
|
+
simple_csv = <<~CSV
|
|
176
|
+
name,age,city
|
|
177
|
+
Alice,30,New York
|
|
178
|
+
Bob,25,San Francisco
|
|
179
|
+
CSV
|
|
180
|
+
|
|
181
|
+
puts "Simple CSV:"
|
|
182
|
+
puts "-" * 40
|
|
183
|
+
csv_doc = parse_csv(simple_csv)
|
|
184
|
+
|
|
185
|
+
puts
|
|
186
|
+
puts "As arrays:"
|
|
187
|
+
csv_doc.to_a.each { |row| puts row.inspect }
|
|
188
|
+
|
|
189
|
+
puts
|
|
190
|
+
puts "As hashes:"
|
|
191
|
+
csv_doc.to_hashes.each { |row| puts row.inspect }
|
|
192
|
+
|
|
193
|
+
puts
|
|
194
|
+
puts "=" * 60
|
|
195
|
+
puts "Serialized Benefits for CSV:"
|
|
196
|
+
puts "- Structured JSON output for other tools"
|
|
197
|
+
puts "- Easy to cache/store results"
|
|
198
|
+
puts "- Type-safe access via CsvRow/CsvDocument classes"
|
|
199
|
+
puts "- Cross-language compatibility"
|
|
200
|
+
puts "=" * 60
|
|
201
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# CSV (Serialized - Option B)
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
This implementation demonstrates full Rust processing with JSON output
|
|
6
|
+
for CSV parsing.
|
|
7
|
+
|
|
8
|
+
## When to Use
|
|
9
|
+
|
|
10
|
+
- Cross-language compatibility
|
|
11
|
+
- Structured output required
|
|
12
|
+
- Performance-critical applications
|
|
13
|
+
|
|
14
|
+
## Key Concepts
|
|
15
|
+
|
|
16
|
+
1. **Rust Parsing + Transform**: All processing in Rust
|
|
17
|
+
2. **JSON Serialization**: Language-agnostic output
|
|
18
|
+
3. **Type Safety**: Schema-driven structure
|
|
19
|
+
|
|
20
|
+
## Running
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
ruby example/csv/serialized.rb
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Output
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
Input: a,b,c
|
|
30
|
+
JSON: [["a","b","c"]]
|
|
31
|
+
```
|