parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,110 @@
1
+ # Comments Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/comments
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Line Comment Rule
13
+
14
+ Single-line comments start with `//`:
15
+
16
+ ```ruby
17
+ rule(:line_comment) {
18
+ (str('//') >> (newline.absent? >> any).repeat).as(:line)
19
+ }
20
+ ```
21
+
22
+ Content extends to end of line; newline is not consumed.
23
+
24
+ ### Multiline Comment Rule
25
+
26
+ Block comments use `/* */` delimiters:
27
+
28
+ ```ruby
29
+ rule(:multiline_comment) {
30
+ (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi)
31
+ }
32
+ ```
33
+
34
+ Negative lookahead prevents early termination.
35
+
36
+ ### Space Rule with Comments
37
+
38
+ Comments are treated as whitespace:
39
+
40
+ ```ruby
41
+ rule(:spaces) { space.repeat }
42
+ rule(:space) { multiline_comment | line_comment | str(' ') }
43
+ ```
44
+
45
+ This allows comments anywhere whitespace is permitted.
46
+
47
+ ### Expression Rule
48
+
49
+ Simple expressions demonstrate comment handling:
50
+
51
+ ```ruby
52
+ rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
53
+ ```
54
+
55
+ The `spaces` rule consumes any trailing comments.
56
+
57
+ ### Lines and Line Endings
58
+
59
+ Input is structured as lines:
60
+
61
+ ```ruby
62
+ rule(:lines) { line.repeat }
63
+ rule(:line) { spaces >> expression.repeat >> newline }
64
+ rule(:newline) { str("\n") >> str("\r").maybe }
65
+ ```
66
+
67
+ Each line ends with a newline (CRLF or LF).
68
+
69
+ ### parse_with_debug
70
+
71
+ Debug output shows the complete parse tree:
72
+
73
+ ```ruby
74
+ pp ALanguage.new.parse_with_debug(code)
75
+ ```
76
+
77
+ Useful for understanding how comments integrate with the grammar.
78
+
79
+ ## Output Types
80
+
81
+ ```ruby
82
+ # Parse tree for:
83
+ # a // comment
84
+ # a a a /* inline */ a
85
+ #
86
+ [
87
+ {:exp=>[{:a=>"a"}]},
88
+ {:exp=>[{:a=>"a"}, {:a=>"a"}, {:a=>"a"}, {:a=>"a"}]}
89
+ ]
90
+ ```
91
+
92
+ Comments are consumed by the `spaces` rule and don't appear in output.
93
+
94
+ ## Design Decisions
95
+
96
+ ### Why Treat Comments as Whitespace?
97
+
98
+ Comments should be allowed anywhere whitespace is. Making them part of the `space` rule achieves this elegantly.
99
+
100
+ ### Why Not Include Newline in Line Comments?
101
+
102
+ Newlines are handled separately by the line structure. This keeps comment content clean.
103
+
104
+ ### Why Use Negative Lookahead for Multiline Comments?
105
+
106
+ `str('*/').absent?` ensures we don't prematurely match the closing delimiter. This is cleaner than trying to enumerate valid characters.
107
+
108
+ ### Why parse_with_debug?
109
+
110
+ When building grammars, seeing the full parse tree helps debug unexpected matches or failures.
@@ -0,0 +1,148 @@
1
+ # CSV Parser Example - Ruby Transform: Ruby Transform (Parslet-Compatible)
2
+ #
3
+ # This example demonstrates Ruby Transform for parsing CSV:
4
+ # 1. Rust parser (parsanol-rs) does the fast parsing
5
+ # 2. Returns a generic tree (hash/array/string structure)
6
+ # 3. Ruby transform converts tree to Ruby objects
7
+
8
+ $:.unshift File.dirname(__FILE__) + "/../lib"
9
+
10
+ require 'parsanol'
11
+
12
+ # Step 1: Define the CSV parser grammar
13
+ class CsvParser < Parsanol::Parser
14
+ root :csv
15
+
16
+ rule(:csv) {
17
+ space? >> (row >> (newline >> row).repeat).maybe >> space?
18
+ }
19
+
20
+ rule(:row) {
21
+ (field.as(:f) >> (comma >> field.as(:f)).repeat).as(:row)
22
+ }
23
+
24
+ rule(:field) {
25
+ quoted_field | simple_field
26
+ }
27
+
28
+ # Quoted field: "value with ""escaped"" quotes"
29
+ rule(:quoted_field) {
30
+ str('"') >> (
31
+ str('""') | str('"').absent? >> any
32
+ ).repeat.as(:quoted) >> str('"')
33
+ }
34
+
35
+ # Simple field: value without commas or quotes
36
+ rule(:simple_field) {
37
+ (comma.absent? >> newline.absent? >> any).repeat.as(:simple)
38
+ }
39
+
40
+ # Helpers
41
+ rule(:comma) { str(',') }
42
+ rule(:newline) { str("\n") | str("\r\n") | str("\r") }
43
+ rule(:space) { match('\s').repeat(1) }
44
+ rule(:space?) { space.maybe }
45
+ end
46
+
47
+ # Step 2: Define the transform (Parslet-style)
48
+ class CsvTransform < Parsanol::Transform
49
+ # Transform a row (sequence of fields)
50
+ rule(row: sequence(:fields)) {
51
+ fields.map { |f| f.is_a?(Hash) ? unescape(f) : f }
52
+ }
53
+
54
+ # Transform quoted field
55
+ rule(quoted: simple(:q)) {
56
+ q.to_s.gsub('""', '"')
57
+ }
58
+
59
+ # Transform simple field
60
+ rule(simple: simple(:s)) {
61
+ s.to_s.strip
62
+ }
63
+ end
64
+
65
+ # Step 3: Parse and transform
66
+ def parse_csv(input)
67
+ parser = CsvParser.new
68
+ transform = CsvTransform.new
69
+
70
+ # Ruby Transform: Parse in Rust, transform in Ruby
71
+ tree = parser.parse(input)
72
+ puts "Parse tree: #{tree.inspect[0..200]}..."
73
+
74
+ result = transform.apply(tree)
75
+ puts "Result: #{result.inspect[0..200]}..."
76
+
77
+ result
78
+ end
79
+
80
+ # Step 4: Convert to array of hashes (for CSV with headers)
81
+ def parse_csv_with_headers(input)
82
+ rows = parse_csv(input)
83
+
84
+ return [] if rows.empty?
85
+
86
+ # First row is headers
87
+ headers = rows.first
88
+ data = rows[1..]
89
+
90
+ data.map { |row| headers.zip(row).to_h }
91
+ end
92
+
93
+ # Example usage
94
+ if __FILE__ == $0
95
+ puts "=" * 60
96
+ puts "CSV Parser Example - Ruby Transform: Ruby Transform"
97
+ puts "=" * 60
98
+
99
+ # Simple CSV
100
+ simple_csv = <<~CSV
101
+ name,age,city
102
+ Alice,30,New York
103
+ Bob,25,San Francisco
104
+ CSV
105
+
106
+ puts
107
+ puts "Simple CSV:"
108
+ puts "-" * 40
109
+ result = parse_csv(simple_csv)
110
+ puts result.inspect
111
+
112
+ # CSV with headers parsed to hashes
113
+ puts
114
+ puts "CSV with headers:"
115
+ puts "-" * 40
116
+ result = parse_csv_with_headers(simple_csv)
117
+ result.each { |row| puts row.inspect }
118
+
119
+ # CSV with quoted fields
120
+ quoted_csv = <<~CSV
121
+ name,description,city
122
+ Alice,"Hello, World",New York
123
+ Bob,"Test ""quoted"" text",Boston
124
+ CSV
125
+
126
+ puts
127
+ puts "CSV with quoted fields:"
128
+ puts "-" * 40
129
+ result = parse_csv_with_headers(quoted_csv)
130
+ result.each { |row| puts row.inspect }
131
+
132
+ # Empty CSV
133
+ empty_csv = ""
134
+
135
+ puts
136
+ puts "Empty CSV:"
137
+ puts "-" * 40
138
+ result = parse_csv(empty_csv)
139
+ puts result.inspect
140
+
141
+ puts
142
+ puts "=" * 60
143
+ puts "Ruby Transform Benefits for CSV:"
144
+ puts "- Flexible transform logic"
145
+ puts "- Easy to add custom processing"
146
+ puts "- Compatible with existing Parslet code"
147
+ puts "=" * 60
148
+ end
@@ -0,0 +1,131 @@
1
+ # CSV Parser - Ruby Implementation (Transform)
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/csv
7
+ ruby ruby_transform.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### CSV Grammar Definition
13
+
14
+ The grammar handles rows, fields, and quoted content:
15
+
16
+ ```ruby
17
+ rule(:csv) {
18
+ space? >> (row >> (newline >> row).repeat).maybe >> space?
19
+ }
20
+
21
+ rule(:row) {
22
+ (field.as(:f) >> (comma >> field.as(:f)).repeat).as(:row)
23
+ }
24
+ ```
25
+
26
+ Each row captures multiple fields labeled `:f`, wrapped in `:row`.
27
+
28
+ ### Quoted Field Handling
29
+
30
+ Quoted fields support escaped quotes:
31
+
32
+ ```ruby
33
+ rule(:quoted_field) {
34
+ str('"') >> (
35
+ str('""') | str('"').absent? >> any
36
+ ).repeat.as(:quoted) >> str('"')
37
+ }
38
+ ```
39
+
40
+ Double quotes (`""`) inside quoted fields represent literal quotes.
41
+
42
+ ### Simple Field Handling
43
+
44
+ Simple fields exclude commas and newlines:
45
+
46
+ ```ruby
47
+ rule(:simple_field) {
48
+ (comma.absent? >> newline.absent? >> any).repeat.as(:simple)
49
+ }
50
+ ```
51
+
52
+ Negative lookahead prevents field content from including delimiters.
53
+
54
+ ### Field Rule Selection
55
+
56
+ The field rule tries quoted first:
57
+
58
+ ```ruby
59
+ rule(:field) {
60
+ quoted_field | simple_field
61
+ }
62
+ ```
63
+
64
+ Quoted fields have priority to correctly handle fields starting with `"`.
65
+
66
+ ### Transform Rules
67
+
68
+ The transform converts parse tree to Ruby arrays:
69
+
70
+ ```ruby
71
+ class CsvTransform < Parsanol::Transform
72
+ # Transform a row (sequence of fields)
73
+ rule(row: sequence(:fields)) {
74
+ fields.map { |f| f.is_a?(Hash) ? unescape(f) : f }
75
+ }
76
+
77
+ # Transform quoted field
78
+ rule(quoted: simple(:q)) {
79
+ q.to_s.gsub('""', '"')
80
+ }
81
+
82
+ # Transform simple field
83
+ rule(simple: simple(:s)) {
84
+ s.to_s.strip
85
+ }
86
+ end
87
+ ```
88
+
89
+ Pattern matching extracts field content and converts to strings.
90
+
91
+ ### Header-Based Parsing
92
+
93
+ CSV with headers converts to array of hashes:
94
+
95
+ ```ruby
96
+ def parse_csv_with_headers(input)
97
+ rows = parse_csv(input)
98
+ return [] if rows.empty?
99
+
100
+ headers = rows.first
101
+ data = rows[1..]
102
+
103
+ data.map { |row| headers.zip(row).to_h }
104
+ end
105
+ ```
106
+
107
+ First row becomes keys; subsequent rows become values.
108
+
109
+ ## Output Types
110
+
111
+ ```ruby
112
+ # Without headers:
113
+ [["name", "age", "city"], ["Alice", "30", "New York"], ...]
114
+
115
+ # With headers:
116
+ [{"name"=>"Alice", "age"=>"30", "city"=>"New York"}, ...]
117
+ ```
118
+
119
+ ## Design Decisions
120
+
121
+ ### Why Ruby Transform Over Rust?
122
+
123
+ Ruby transform allows custom processing logic without modifying Rust code. Useful for domain-specific transformations and data enrichment.
124
+
125
+ ### Why Sequence Pattern for Rows?
126
+
127
+ `sequence(:fields)` handles both single and multiple fields uniformly, avoiding special cases for one-field rows.
128
+
129
+ ### Why Priority for Quoted Fields?
130
+
131
+ If simple field were first, `"hello"` would match as simple field `"` followed by errors. Quoted field priority ensures correct parsing.
@@ -0,0 +1,201 @@
1
+ # CSV Parser Example - Serialized: JSON Serialization
2
+ #
3
+ # This example demonstrates Serialized for parsing CSV:
4
+ # 1. Rust parser (parsanol-rs) does the parsing
5
+ # 2. Rust transform converts to typed structs
6
+ # 3. Result is serialized to JSON
7
+ # 4. Ruby deserializes JSON to Ruby objects
8
+ #
9
+ # This option is useful when you need to validate/proces CSV
10
+ # and get structured output for other tools.
11
+
12
+ $:.unshift File.dirname(__FILE__) + "/../lib"
13
+
14
+ require 'parsanol'
15
+ require 'json'
16
+
17
+ # NOTE: This example requires the native extension to support parse_to_json
18
+ # which is planned but not yet implemented. This serves as an API preview.
19
+
20
+ # Step 1: Define the CSV parser grammar (same as Option A)
21
+ class CsvParser < Parsanol::Parser
22
+ root :csv
23
+
24
+ rule(:csv) {
25
+ space? >> (row >> (newline >> row).repeat).maybe >> space?
26
+ }
27
+
28
+ rule(:row) {
29
+ (field.as(:f) >> (comma >> field.as(:f)).repeat).as(:row)
30
+ }
31
+
32
+ rule(:field) {
33
+ quoted_field | simple_field
34
+ }
35
+
36
+ rule(:quoted_field) {
37
+ str('"') >> (
38
+ str('""') | str('"').absent? >> any
39
+ ).repeat.as(:quoted) >> str('"')
40
+ }
41
+
42
+ rule(:simple_field) {
43
+ (comma.absent? >> newline.absent? >> any).repeat.as(:simple)
44
+ }
45
+
46
+ rule(:comma) { str(',') }
47
+ rule(:newline) { str("\n") | str("\r\n") | str("\r") }
48
+ rule(:space) { match('\s').repeat(1) }
49
+ rule(:space?) { space.maybe }
50
+ end
51
+
52
+ # Step 2: Define typed classes for CSV data
53
+ class CsvRow
54
+ attr_reader :fields
55
+
56
+ def initialize(fields)
57
+ @fields = fields
58
+ end
59
+
60
+ def to_a = @fields
61
+
62
+ def [](index)
63
+ @fields[index]
64
+ end
65
+
66
+ def each(&block)
67
+ @fields.each(&block)
68
+ end
69
+ end
70
+
71
+ class CsvDocument
72
+ attr_reader :rows
73
+
74
+ def initialize(rows)
75
+ @rows = rows
76
+ end
77
+
78
+ def to_a
79
+ @rows.map(&:to_a)
80
+ end
81
+
82
+ def headers
83
+ @rows.first&.fields
84
+ end
85
+
86
+ def data
87
+ @rows[1..] || []
88
+ end
89
+
90
+ def to_hashes
91
+ return [] unless headers && !data.empty?
92
+
93
+ headers = self.headers
94
+ data.map { |row| headers.zip(row.fields).to_h }
95
+ end
96
+ end
97
+
98
+ # Step 3: Deserializer
99
+ class CsvDeserializer
100
+ def self.from_json(json_string)
101
+ data = JSON.parse(json_string)
102
+
103
+ case data
104
+ when Array
105
+ rows = data.map { |row_data| CsvRow.new(row_data) }
106
+ CsvDocument.new(rows)
107
+ else
108
+ raise "Expected array of rows, got #{data.class}"
109
+ end
110
+ end
111
+ end
112
+
113
+ # Step 4: Parse with JSON output
114
+ def parse_csv(input)
115
+ parser = CsvParser.new
116
+
117
+ # Serialized: Parse and get JSON from Rust
118
+ # NOTE: This requires native extension support
119
+ # output_json = parser.parse_to_json(input)
120
+
121
+ # For now, simulate by using Option A then serializing
122
+ require_relative 'csv_option_a'
123
+ tree = parser.parse(input)
124
+ transform = CsvTransform.new
125
+ result = transform.apply(tree)
126
+
127
+ # This would come from Rust in Serialized
128
+ # Convert to array format for JSON
129
+ output_json = result.to_json
130
+ puts "Output JSON (first 200 chars): #{output_json[0..200]}..."
131
+
132
+ # Deserialize to typed objects
133
+ csv_doc = CsvDeserializer.from_json(output_json)
134
+ puts "Parsed: #{csv_doc.class} with #{csv_doc.rows.size} rows"
135
+
136
+ csv_doc
137
+ end
138
+
139
+ # Transform class (needed for simulation)
140
+ class CsvTransform < Parsanol::Transform
141
+ rule(row: sequence(:fields)) {
142
+ fields.map { |f| f.is_a?(Hash) ? unescape(f) : f }
143
+ }
144
+
145
+ rule(quoted: simple(:q)) { unescape_quoted(q.to_s) }
146
+ rule(simple: simple(:s)) { s.to_s.strip }
147
+
148
+ private
149
+
150
+ def unescape(field)
151
+ if field.is_a?(Hash) && field[:quoted]
152
+ unescape_quoted(field[:quoted])
153
+ elsif field.is_a?(Hash) && field[:simple]
154
+ field[:simple].to_s.strip
155
+ else
156
+ field
157
+ end
158
+ end
159
+
160
+ def unescape_quoted(str)
161
+ str.gsub('""', '"')
162
+ end
163
+ end
164
+
165
+ # Example usage
166
+ if __FILE__ == $0
167
+ puts "=" * 60
168
+ puts "CSV Parser Example - Serialized: JSON Serialization"
169
+ puts "=" * 60
170
+ puts
171
+ puts "NOTE: This example shows the planned API for Serialized."
172
+ puts "The native extension support for parse_to_json is coming soon."
173
+ puts
174
+
175
+ simple_csv = <<~CSV
176
+ name,age,city
177
+ Alice,30,New York
178
+ Bob,25,San Francisco
179
+ CSV
180
+
181
+ puts "Simple CSV:"
182
+ puts "-" * 40
183
+ csv_doc = parse_csv(simple_csv)
184
+
185
+ puts
186
+ puts "As arrays:"
187
+ csv_doc.to_a.each { |row| puts row.inspect }
188
+
189
+ puts
190
+ puts "As hashes:"
191
+ csv_doc.to_hashes.each { |row| puts row.inspect }
192
+
193
+ puts
194
+ puts "=" * 60
195
+ puts "Serialized Benefits for CSV:"
196
+ puts "- Structured JSON output for other tools"
197
+ puts "- Easy to cache/store results"
198
+ puts "- Type-safe access via CsvRow/CsvDocument classes"
199
+ puts "- Cross-language compatibility"
200
+ puts "=" * 60
201
+ end
@@ -0,0 +1,31 @@
1
+ # CSV (Serialized - Option B)
2
+
3
+ ## Purpose
4
+
5
+ This implementation demonstrates full Rust processing with JSON output
6
+ for CSV parsing.
7
+
8
+ ## When to Use
9
+
10
+ - Cross-language compatibility
11
+ - Structured output required
12
+ - Performance-critical applications
13
+
14
+ ## Key Concepts
15
+
16
+ 1. **Rust Parsing + Transform**: All processing in Rust
17
+ 2. **JSON Serialization**: Language-agnostic output
18
+ 3. **Type Safety**: Schema-driven structure
19
+
20
+ ## Running
21
+
22
+ ```bash
23
+ ruby example/csv/serialized.rb
24
+ ```
25
+
26
+ ## Output
27
+
28
+ ```
29
+ Input: a,b,c
30
+ JSON: [["a","b","c"]]
31
+ ```