parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,97 @@
1
+ # README Example - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/documentation
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### String Parser Construction
13
+
14
+ This example demonstrates the minimal parser from the README:
15
+
16
+ ```ruby
17
+ parser = str('"') >>
18
+ (
19
+ str('\\') >> any |
20
+ str('"').absent? >> any
21
+ ).repeat.as(:string) >>
22
+ str('"')
23
+ ```
24
+
25
+ The parser matches quoted strings with escape sequences.
26
+
27
+ ### Escape Sequence Handling
28
+
29
+ The pattern `str('\\') >> any` handles escaped characters:
30
+
31
+ ```ruby
32
+ # Matches: \" \\ \n etc.
33
+ str('\\') >> any
34
+ ```
35
+
36
+ Any character following a backslash is accepted.
37
+
38
+ ### String Content Matching
39
+
40
+ The pattern `str('"').absent? >> any` matches non-quote characters:
41
+
42
+ ```ruby
43
+ # Matches any character except "
44
+ str('"').absent? >> any
45
+ ```
46
+
47
+ Combined with escape handling via alternation.
48
+
49
+ ### Named Capture
50
+
51
+ `.as(:string)` labels the matched content:
52
+
53
+ ```ruby
54
+ tree = parser.parse('"Hello"')
55
+ # => {:string=>"Hello"}
56
+ ```
57
+
58
+ The result is a hash with the captured content.
59
+
60
+ ### Transform Application
61
+
62
+ Transforms extract and process captured content:
63
+
64
+ ```ruby
65
+ transform = Parsanol::Transform.new do
66
+ rule(:string => simple(:x)) {
67
+ puts "String contents: #{x}"
68
+ }
69
+ end
70
+ transform.apply(tree)
71
+ ```
72
+
73
+ Pattern matching on the tree structure.
74
+
75
+ ## Output Types
76
+
77
+ ```ruby
78
+ # Parse result:
79
+ {:string=>"This is a \"String\" in which you can escape stuff"}
80
+
81
+ # Transform output:
82
+ String contents: This is a "String" in which you can escape stuff
83
+ ```
84
+
85
+ ## Design Decisions
86
+
87
+ ### Why This Example?
88
+
89
+ This is the canonical "hello world" for Parslet-style parsing. It demonstrates the core concepts in minimal form.
90
+
91
+ ### Why Transform?
92
+
93
+ Transforms separate parsing from processing. The parser creates structure; transforms interpret it.
94
+
95
+ ### Ruby-Only Feature
96
+
97
+ This example uses Parslet-compatible API for demonstration purposes.
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Email address parser with sanitization support.
4
+ # Originally contributed to Parslet, ported to Parsanol as an example.
5
+
6
+ $:.unshift File.dirname(__FILE__) + "/../lib"
7
+ require 'parsanol/parslet'
8
+ require 'parsanol/convenience'
9
+
10
+ class EmailParser < Parsanol::Parser
11
+ rule(:space) { match('\s').repeat(1) }
12
+ rule(:space?) { space.maybe }
13
+ rule(:dash?) { match['_-'].maybe }
14
+
15
+ rule(:at) {
16
+ str('@') |
17
+ (dash? >> (str('at') | str('AT')) >> dash?)
18
+ }
19
+ rule(:dot) {
20
+ str('.') |
21
+ (dash? >> (str('dot') | str('DOT')) >> dash?)
22
+ }
23
+
24
+ rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
25
+ rule(:separator) { dot.as(:dot) >> space? | space }
26
+ rule(:words) { word >> (separator >> word).repeat }
27
+
28
+ rule(:email) {
29
+ (words.as(:username) >> space? >> at >> space? >> words).as(:email)
30
+ }
31
+
32
+ root(:email)
33
+ end
34
+
35
+ class EmailSanitizer < Parsanol::Transform
36
+ rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
37
+ rule(:word => simple(:word)) { word }
38
+
39
+ rule(:username => sequence(:username)) { username.join + "@" }
40
+ rule(:username => simple(:username)) { username.to_s + "@" }
41
+
42
+ rule(:email => sequence(:email)) { email.join }
43
+ end
44
+
45
+ parser = EmailParser.new
46
+ sanitizer = EmailSanitizer.new
47
+
48
+ input = ARGV[0] || begin
49
+ default = "a.b.c.d@gmail.com"
50
+ STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
51
+ STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using #{default}"
52
+ default
53
+ end
54
+
55
+ p sanitizer.apply(parser.parse_with_debug(input))
@@ -0,0 +1,102 @@
1
+ # Email Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/email
7
+ ruby basic.rb "user@example.com"
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Flexible At Symbol
13
+
14
+ The parser handles obfuscated email formats:
15
+
16
+ ```ruby
17
+ rule(:at) {
18
+ str('@') |
19
+ (dash? >> (str('at') | str('AT')) >> dash?)
20
+ }
21
+ ```
22
+
23
+ Supports both `@` and `at` (with optional dashes) for spam protection.
24
+
25
+ ### Flexible Dot
26
+
27
+ Similar flexibility for the dot separator:
28
+
29
+ ```ruby
30
+ rule(:dot) {
31
+ str('.') |
32
+ (dash? >> (str('dot') | str('DOT')) >> dash?)
33
+ }
34
+ ```
35
+
36
+ Handles `user@example.com` and `user at example dot com`.
37
+
38
+ ### Word and Words Rules
39
+
40
+ Email parts are sequences of words:
41
+
42
+ ```ruby
43
+ rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
44
+ rule(:separator) { dot.as(:dot) >> space? | space }
45
+ rule(:words) { word >> (separator >> word).repeat }
46
+ ```
47
+
48
+ Words are alphanumeric; separators are dots or spaces.
49
+
50
+ ### Email Structure
51
+
52
+ The complete email combines username and domain:
53
+
54
+ ```ruby
55
+ rule(:email) {
56
+ (words.as(:username) >> space? >> at >> space? >> words).as(:email)
57
+ }
58
+ ```
59
+
60
+ Labels distinguish local part from domain.
61
+
62
+ ### Sanitizing Transform
63
+
64
+ The transform normalizes obfuscated emails:
65
+
66
+ ```ruby
67
+ class EmailSanitizer < Parsanol::Transform
68
+ rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
69
+ rule(:word => simple(:word)) { word }
70
+
71
+ rule(:username => sequence(:username)) { username.join + "@" }
72
+ rule(:username => simple(:username)) { username.to_s + "@" }
73
+
74
+ rule(:email => sequence(:email)) { email.join }
75
+ end
76
+ ```
77
+
78
+ Converts "user at example dot com" to "user@example.com".
79
+
80
+ ## Output Types
81
+
82
+ ```ruby
83
+ # Parse tree for "a.b.c.d@gmail.com":
84
+ {:email=>{:username=>[{:word=>"a"}, {:dot=>".", :word=>"b"}, ...], ...}}
85
+
86
+ # After transform:
87
+ "a.b.c.d@gmail.com"
88
+ ```
89
+
90
+ ## Design Decisions
91
+
92
+ ### Why Handle Obfuscated Formats?
93
+
94
+ Email addresses are often obfuscated to prevent spam harvesting. This parser can extract and normalize them.
95
+
96
+ ### Why Sequence vs Simple in Transform?
97
+
98
+ `sequence(:x)` handles arrays of values; `simple(:x)` handles single values. Both cases occur depending on email complexity.
99
+
100
+ ### Why Add @ in Username Transform?
101
+
102
+ The username rule ends with the @ separator, so the transform adds it back during reconstruction.
@@ -0,0 +1,106 @@
1
+ # Email Parser Example - RubyTransform
2
+ #
3
+ # This example demonstrates parsing email addresses with validation.
4
+ # Shows character classes, repetition, and structured output.
5
+ #
6
+ # Run with: ruby -Ilib example/email_ruby_transform.rb
7
+
8
+ $:.unshift File.dirname(__FILE__) + "/../lib"
9
+
10
+ require 'parsanol'
11
+
12
+ # Step 1: Define the email grammar
13
+ class EmailParser < Parsanol::Parser
14
+ root :email
15
+
16
+ rule(:email) {
17
+ local_part.as(:local) >>
18
+ str('@') >>
19
+ domain.as(:domain)
20
+ }
21
+
22
+ rule(:local_part) {
23
+ (alphanumeric | match('[._%+-]')).repeat(1)
24
+ }
25
+
26
+ rule(:domain) {
27
+ label >> (str('.') >> label).repeat
28
+ }
29
+
30
+ rule(:label) {
31
+ alphanumeric.repeat(1)
32
+ }
33
+
34
+ rule(:alphanumeric) { match('[a-zA-Z0-9]') }
35
+ end
36
+
37
+ # Step 2: Email address class
38
+ class EmailAddress
39
+ attr_reader :local, :domain
40
+
41
+ def initialize(local, domain)
42
+ @local = local.to_s
43
+ @domain = domain.to_s
44
+ end
45
+
46
+ def to_s
47
+ "#{@local}@#{@domain}"
48
+ end
49
+
50
+ def eql?(other)
51
+ other.is_a?(EmailAddress) && to_s == other.to_s
52
+ end
53
+
54
+ alias == eql?
55
+
56
+ def hash
57
+ to_s.hash
58
+ end
59
+ end
60
+
61
+ def parse_email(input)
62
+ parser = EmailParser.new
63
+ tree = parser.parse(input)
64
+
65
+ puts "Parse tree: #{tree.inspect}"
66
+
67
+ # Extract local and domain from tree
68
+ local = tree[:local].to_s
69
+ domain = tree[:domain].to_s
70
+
71
+ email = EmailAddress.new(local, domain)
72
+
73
+ email
74
+ rescue Parsanol::ParseFailed => e
75
+ puts "Parse failed: #{e.message}"
76
+ nil
77
+ end
78
+
79
+ # Example usage
80
+ if __FILE__ == $0
81
+ puts "=" * 60
82
+ puts "Email Parser - RubyTransform"
83
+ puts "=" * 60
84
+ puts
85
+
86
+ test_emails = [
87
+ "user@example.com",
88
+ "john.doe@example.org",
89
+ "test123@subdomain.example.co.uk",
90
+ "invalid-email",
91
+ "@missing-local.com",
92
+ "no-at-sign.com",
93
+ ]
94
+
95
+ test_emails.each do |email_str|
96
+ puts "-" * 40
97
+ puts "Input: #{email_str}"
98
+ email = parse_email(email_str)
99
+ if email
100
+ puts " Email: #{email.to_s}"
101
+ puts " Local: #{email.local}"
102
+ puts " Domain: #{email.domain}"
103
+ end
104
+ puts
105
+ end
106
+ end
@@ -0,0 +1,13 @@
1
+ # Basically just demonstrates that you can leave rules empty and get a nice
2
+ # NotImplementedError. A way to quickly spec out your parser rules?
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'parsanol/parslet'
7
+
8
+ class MyParser < Parsanol::Parser
9
+ rule(:empty) { }
10
+ end
11
+
12
+
13
+ MyParser.new.empty.parslet
@@ -0,0 +1,73 @@
1
+ # Empty Rule Handling - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/empty
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Empty Rule Definition
13
+
14
+ Rules can be defined without bodies:
15
+
16
+ ```ruby
17
+ class MyParser < Parsanol::Parser
18
+ rule(:empty) { }
19
+ end
20
+ ```
21
+
22
+ This creates a rule placeholder without implementation.
23
+
24
+ ### Error on Use
25
+
26
+ Calling an empty rule raises `NotImplementedError`:
27
+
28
+ ```ruby
29
+ MyParser.new.empty.parslet
30
+ # => NotImplementedError: rule :empty not implemented
31
+ ```
32
+
33
+ ### Use Case: Grammar Sketching
34
+
35
+ Empty rules help sketch out grammar structure:
36
+
37
+ ```ruby
38
+ class MyParser < Parsanol::Parser
39
+ rule(:expression) { term >> (operator >> term).repeat }
40
+ rule(:term) { } # TODO: implement
41
+ rule(:operator) { } # TODO: implement
42
+ end
43
+ ```
44
+
45
+ This lets you plan the structure before filling in details.
46
+
47
+ ## Output Types
48
+
49
+ ```ruby
50
+ # No parse output - raises error
51
+ NotImplementedError: rule :empty not implemented
52
+ ```
53
+
54
+ ## Design Decisions
55
+
56
+ ### Why Allow Empty Rules?
57
+
58
+ Empty rules support incremental grammar development. They provide a way to stub out parts of a grammar.
59
+
60
+ ### Why NotImplementedError?
61
+
62
+ Using `NotImplementedError` clearly indicates the issue is a missing implementation, not a parse error.
63
+
64
+ ### Ruby-Only Feature
65
+
66
+ This is a Parslet-specific convenience for Ruby development. In Rust, you would use option types or placeholder patterns.
67
+
68
+ ### When to Use
69
+
70
+ - Prototyping grammars
71
+ - Planning rule structure
72
+ - Documenting intended grammar layout
73
+ - Teaching parser concepts
@@ -0,0 +1,38 @@
1
+ {
2
+ "id": "empty",
3
+ "title": "Empty Rule Stub",
4
+ "description": "Demonstrate the empty rule pattern for grammar prototyping and incremental development.",
5
+ "category": "conceptual",
6
+ "tags": ["empty", "stub", "prototyping", "parslet"],
7
+ "difficulty": "beginner",
8
+ "concepts": ["empty rule", "prototyping", "incremental development", "stubs"],
9
+
10
+ "motivation": {
11
+ "why": "Empty rules allow prototyping grammars incrementally by stubbing out incomplete parts. This enables testing partial grammars before all rules are implemented.",
12
+ "useCases": [
13
+ "Incremental grammar development",
14
+ "Testing partial grammars",
15
+ "Prototyping complex languages"
16
+ ]
17
+ },
18
+
19
+ "inputFormat": {
20
+ "description": "Any input that matches the partial grammar.",
21
+ "examples": [
22
+ { "input": "test", "description": "Input to match against stubbed grammar", "valid": true }
23
+ ]
24
+ },
25
+
26
+ "outputFormat": {
27
+ "description": "Partial parse results from incomplete grammar.",
28
+ "structure": {
29
+ "partial": { "description": "Partial match from stubbed rules" }
30
+ }
31
+ },
32
+
33
+ "rubyOnly": true,
34
+ "parsletCompatible": true,
35
+ "implementations": {
36
+ "ruby": { "basic": "basic.rb" }
37
+ }
38
+ }
@@ -0,0 +1,47 @@
1
+ # Example that demonstrates how a simple erb-like parser could be constructed.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'parsanol/parslet'
6
+
7
+ class ErbParser < Parsanol::Parser
8
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
9
+
10
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
11
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
12
+ rule(:code) { ruby.as(:code) }
13
+ rule(:erb) { expression | comment | code }
14
+
15
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
16
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
17
+
18
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
19
+ root(:text_with_ruby)
20
+ end
21
+
22
+ parser = ErbParser.new
23
+ p parser.parse "The value of x is <%= x %>."
24
+ p parser.parse "<% 1 + 2 %>"
25
+ p parser.parse "<%# commented %>"
26
+
27
+
28
+ evaluator = Parsanol::Transform.new do
29
+
30
+ erb_binding = binding
31
+
32
+ rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
33
+ rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
34
+ rule(:comment => { :ruby => simple(:ruby) }) { '' }
35
+
36
+ rule(:text => simple(:text)) { text }
37
+ rule(:text => sequence(:texts)) { texts.join }
38
+
39
+ end
40
+
41
+ puts evaluator.apply(parser.parse(<<-ERB
42
+ The <% a = 2 %>not printed result of "a = 2".
43
+ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below.
44
+ The <%= 'nicely' %> printed result.
45
+ The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
46
+ ERB
47
+ ))
@@ -0,0 +1,103 @@
1
+ # ERB Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/erb
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### ERB Tag Content Rule
13
+
14
+ The ruby content inside ERB tags excludes the closing delimiter:
15
+
16
+ ```ruby
17
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
18
+ ```
19
+
20
+ Negative lookahead prevents premature termination when parsing embedded Ruby code.
21
+
22
+ ### ERB Expression Types
23
+
24
+ Three types of ERB tags are supported:
25
+
26
+ ```ruby
27
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
28
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
29
+ rule(:code) { ruby.as(:code) }
30
+ rule(:erb) { expression | comment | code }
31
+ ```
32
+
33
+ - Expression (`<%= %>`) outputs values
34
+ - Comment (`<%# %>`) is ignored
35
+ - Code (`<% %>`) executes without output
36
+
37
+ ### Complete ERB Tag
38
+
39
+ Tags combine opening delimiter, content, and closing delimiter:
40
+
41
+ ```ruby
42
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
43
+ ```
44
+
45
+ The `erb` rule handles the type prefix (=, #, or nothing).
46
+
47
+ ### Text and Template Body
48
+
49
+ Text content excludes ERB opening tags:
50
+
51
+ ```ruby
52
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
53
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
54
+ ```
55
+
56
+ Alternation between text and ERB tags allows interleaving.
57
+
58
+ ### Transform for Evaluation
59
+
60
+ The transform evaluates Ruby code:
61
+
62
+ ```ruby
63
+ evaluator = Parsanol::Transform.new do
64
+ erb_binding = binding
65
+
66
+ rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
67
+ rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
68
+ rule(:comment => { :ruby => simple(:ruby) }) { '' }
69
+
70
+ rule(:text => simple(:text)) { text }
71
+ rule(:text => sequence(:texts)) { texts.join }
72
+ end
73
+ ```
74
+
75
+ Code blocks execute silently; expressions return values; comments produce empty strings.
76
+
77
+ ## Output Types
78
+
79
+ ```ruby
80
+ # Parse tree:
81
+ {:text=>[
82
+ {:text=>"The value of x is "@s},
83
+ {:expression=>{:ruby=>" x "@s}},
84
+ {:text=>"."@s}
85
+ ]}
86
+
87
+ # After transform (evaluated):
88
+ "The value of x is 42."
89
+ ```
90
+
91
+ ## Design Decisions
92
+
93
+ ### Why Separate Expression and Code Tags?
94
+
95
+ `<%= %>` outputs the result, `<% %>` executes for side effects. This distinction is fundamental to template evaluation.
96
+
97
+ ### Why Use Binding in Transform?
98
+
99
+ A shared binding allows code in one tag to affect later expressions (e.g., setting a variable).
100
+
101
+ ### Why Comment as Separate Type?
102
+
103
+ Comments should be completely ignored during evaluation, not parsed as Ruby code.
@@ -0,0 +1,42 @@
1
+ # Please also look at the more naive 'erb.rb'. This shows how to optimize an
2
+ # ERB like parser using parslet.
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__), "/../lib")
5
+
6
+ require 'parsanol/parslet'
7
+ require './qed/applique/gobbleup'
8
+ require 'parsanol/accelerator'
9
+
10
+ class ErbParser < Parsanol::Parser
11
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
12
+
13
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
14
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
15
+ rule(:code) { ruby.as(:code) }
16
+ rule(:erb) { expression | comment | code }
17
+
18
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
19
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
20
+
21
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
22
+ root(:text_with_ruby)
23
+ end
24
+
25
+ parser = ErbParser.new
26
+
27
+ A = Parsanol::Accelerator
28
+ optimized = A.apply(parser,
29
+ A.rule((A.str(:x).absent? >> A.any).repeat(1)) { GobbleUp.new(x, 1) },
30
+ A.rule((A.str(:x).absent? >> A.any).repeat(0)) { GobbleUp.new(x, 0) })
31
+
32
+ input = File.read(File.dirname(__FILE__) + "/big.erb")
33
+
34
+ # Remove the comment marks here to see what difference the optimisation makes.
35
+ # Commented out for the acceptance tests to run.
36
+ #
37
+ # require 'benchmark'
38
+ # Benchmark.bm(7) do |bm|
39
+ # bm.report('original') { parser.parse(input) }
40
+ # bm.report('gobble') { optimized.parse(input) }
41
+ # end
42
+ p optimized.parse(input)