parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,118 @@
1
+ # Markup Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/markup
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Document Structure
13
+
14
+ A document is a sequence of blocks:
15
+
16
+ ```ruby
17
+ rule(:document) { block.repeat(1).as(:document) }
18
+
19
+ rule(:block) {
20
+ heading |
21
+ unordered_list |
22
+ paragraph |
23
+ blank_line.as(:blank)
24
+ }
25
+ ```
26
+
27
+ Blank lines are captured separately and filtered during transformation.
28
+
29
+ ### Heading Rule
30
+
31
+ Equal signs define heading level (1-3):
32
+
33
+ ```ruby
34
+ rule(:heading) {
35
+ (str('=').repeat(1, 3).as(:level) >>
36
+ space >>
37
+ heading_content.as(:text) >>
38
+ newline).as(:heading)
39
+ }
40
+ ```
41
+
42
+ `=` is H1, `==` is H2, `===` is H3.
43
+
44
+ ### Paragraph Rule
45
+
46
+ Consecutive lines form paragraphs:
47
+
48
+ ```ruby
49
+ rule(:paragraph) {
50
+ (paragraph_line >> newline).repeat(1).as(:paragraph)
51
+ }
52
+
53
+ rule(:paragraph_line) {
54
+ (blank_line.absent? >> (str('=').absent? | space.absent?) >> any).repeat(1)
55
+ }
56
+ ```
57
+
58
+ Paragraph lines don't start with `=` followed by space (which would be a heading).
59
+
60
+ ### List Rule
61
+
62
+ Hyphen-prefixed items:
63
+
64
+ ```ruby
65
+ rule(:unordered_list) {
66
+ list_item.repeat(1).as(:unordered_list)
67
+ }
68
+
69
+ rule(:list_item) {
70
+ (str('-') >>
71
+ space >>
72
+ list_content.as(:text) >>
73
+ newline).as(:item)
74
+ }
75
+ ```
76
+
77
+ Each item must be on its own line with hyphen and space.
78
+
79
+ ## Output Types
80
+
81
+ ```ruby
82
+ # Document with blocks
83
+ MarkupDocument.new([
84
+ MarkupHeading.new("=", "Title"),
85
+ MarkupParagraph.new(["text"]),
86
+ MarkupList.new([{text: "item"}, {text: "item2"}])
87
+ ])
88
+
89
+ # Heading
90
+ MarkupHeading.new("==", "Section")
91
+ # to_html => "<h2>Section</h2>"
92
+
93
+ # Paragraph
94
+ MarkupParagraph.new(["Line one", "Line two"])
95
+ # to_html => "<p>Line one Line two</p>"
96
+
97
+ # List
98
+ MarkupList.new([{text: "First"}, {text: "Second"}])
99
+ # to_html => "<ul>\n<li>First</li>\n<li>Second</li>\n</ul>"
100
+ ```
101
+
102
+ ## Design Decisions
103
+
104
+ ### Why Equal Signs for Headings?
105
+
106
+ Equal signs are visually intuitive and don't conflict with common text. They're also easier to type than `#` on some keyboards.
107
+
108
+ ### Why Filter Blank Lines in Transform?
109
+
110
+ Blank lines separate blocks but aren't content. Filtering them during transformation keeps the AST clean.
111
+
112
+ ### Why Separate Paragraph Lines?
113
+
114
+ Keeping lines separate allows joining with spaces during HTML generation, preserving word boundaries across line breaks.
115
+
116
+ ### Why Limit Heading Levels to 3?
117
+
118
+ This markup language is intentionally simple. Real-world use might extend to 6 levels, matching HTML.
@@ -0,0 +1,47 @@
1
+ # Demonstrates that we have a compatibility fix to mathn's weird idea of
2
+ # integer mathematics.
3
+ # Originally contributed to Parslet, ported to Parsanol as an example.
4
+
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
+
7
+ require 'parsanol/parslet'
8
+ require 'parsanol/convenience'
9
+ include Parsanol::Parslet
10
+
11
+ def attempt_parse
12
+ possible_whitespace = match['\s'].repeat
13
+
14
+ cephalopod =
15
+ str('octopus') |
16
+ str('squid')
17
+
18
+ parenthesized_cephalopod =
19
+ str('(') >>
20
+ possible_whitespace >>
21
+ cephalopod >>
22
+ possible_whitespace >>
23
+ str(')')
24
+
25
+ parser =
26
+ possible_whitespace >>
27
+ parenthesized_cephalopod >>
28
+ possible_whitespace
29
+
30
+ # This parse fails, but that is not the point. When mathn is in the current
31
+ # ruby environment, it modifies integer division in a way that makes
32
+ # parslet loop indefinitely.
33
+ parser.parse %{(\nsqeed)\n}
34
+ rescue Parsanol::ParseFailed
35
+ end
36
+
37
+ attempt_parse
38
+ puts 'it terminates before we require mathn'
39
+
40
+ puts "requiring mathn now"
41
+ # mathn was deprecated as of Ruby 2.5
42
+ if RUBY_VERSION.gsub(/[^\d]/, '').to_i < 250
43
+ require 'mathn'
44
+ end
45
+ puts "and trying again (will hang without the fix)"
46
+ attempt_parse # but it doesn't terminate after requiring mathn
47
+ puts "okay!"
@@ -0,0 +1,96 @@
1
+ # Mathn Compatibility - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/mathn
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### The Mathn Problem
13
+
14
+ Ruby's deprecated `mathn` library changed integer division behavior:
15
+
16
+ ```ruby
17
+ # Without mathn:
18
+ 3 / 2 # => 1 (integer division)
19
+
20
+ # With mathn:
21
+ 3 / 2 # => (3/2) (Rational)
22
+ ```
23
+
24
+ This broke Parslet's internal calculations.
25
+
26
+ ### The Grammar
27
+
28
+ Simple cephalopod matching:
29
+
30
+ ```ruby
31
+ cephalopod =
32
+ str('octopus') |
33
+ str('squid')
34
+
35
+ parenthesized_cephalopod =
36
+ str('(') >>
37
+ possible_whitespace >>
38
+ cephalopod >>
39
+ possible_whitespace >>
40
+ str(')')
41
+ ```
42
+
43
+ ### The Compatibility Fix
44
+
45
+ Parsanol includes a fix that works regardless of mathn:
46
+
47
+ ```ruby
48
+ def attempt_parse
49
+ parser = possible_whitespace >>
50
+ parenthesized_cephalopod >>
51
+ possible_whitespace
52
+
53
+ # This would hang without the fix
54
+ parser.parse %{(\nsqeed)\n}
55
+ rescue Parsanol::ParseFailed
56
+ end
57
+ ```
58
+
59
+ ### Version Check
60
+
61
+ The example checks Ruby version:
62
+
63
+ ```ruby
64
+ if RUBY_VERSION.gsub(/[^\d]/, '').to_i < 250
65
+ require 'mathn'
66
+ end
67
+ ```
68
+
69
+ Mathn was deprecated in Ruby 2.5, so it's only loaded on older Rubies.
70
+
71
+ ## Output Types
72
+
73
+ ```
74
+ it terminates before we require mathn
75
+ requiring mathn now
76
+ and trying again (will hang without the fix)
77
+ okay!
78
+ ```
79
+
80
+ ## Design Decisions
81
+
82
+ ### Why This Example?
83
+
84
+ It documents a historical compatibility issue. Users encountering similar problems can find this reference.
85
+
86
+ ### Why Keep Mathn Support?
87
+
88
+ Some legacy systems still use mathn. Parsanol aims for broad Ruby version compatibility.
89
+
90
+ ### Ruby-Only Feature
91
+
92
+ This is purely about Ruby library compatibility. Rust has no equivalent issue.
93
+
94
+ ### Modern Relevance
95
+
96
+ As of Ruby 2.5+, mathn is deprecated. This example is mostly historical but demonstrates Parslet's robustness.
@@ -0,0 +1,39 @@
1
+ {
2
+ "id": "mathn",
3
+ "title": "Mathn Compatibility",
4
+ "description": "Demonstrate compatibility with Ruby's deprecated mathn library which changed integer division behavior.",
5
+ "category": "conceptual",
6
+ "tags": ["mathn", "compatibility", "ruby", "integer-division"],
7
+ "difficulty": "beginner",
8
+ "concepts": ["compatibility", "integer division", "Ruby libraries", "legacy support"],
9
+
10
+ "motivation": {
11
+ "why": "Documents handling of Ruby library compatibility issues that affect parsing behavior. The deprecated mathn library changed integer division to return Rationals, which broke Parslet's internal calculations.",
12
+ "useCases": [
13
+ "Legacy system support",
14
+ "Ruby version compatibility",
15
+ "Handling library conflicts"
16
+ ]
17
+ },
18
+
19
+ "inputFormat": {
20
+ "description": "Simple parser input that works regardless of mathn.",
21
+ "examples": [
22
+ { "input": "(squid)", "description": "Parenthesized cephalopod", "valid": true },
23
+ { "input": "(sqeed)", "description": "Invalid - should error gracefully", "valid": false }
24
+ ]
25
+ },
26
+
27
+ "outputFormat": {
28
+ "description": "Parse results that work with or without mathn.",
29
+ "structure": {
30
+ "result": { "description": "The parse result" }
31
+ }
32
+ },
33
+
34
+ "rubyOnly": true,
35
+ "parsletCompatible": true,
36
+ "implementations": {
37
+ "ruby": { "basic": "basic.rb" }
38
+ }
39
+ }
@@ -0,0 +1,94 @@
1
+ # Reproduces [1] using parslet.
2
+ # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parsanol/parslet'
8
+ require 'parsanol/convenience'
9
+
10
+ module MiniLisp
11
+ class Parser < Parsanol::Parser
12
+ root :expression
13
+ rule(:expression) {
14
+ space? >> str('(') >> space? >> body >> str(')') >> space?
15
+ }
16
+
17
+ rule(:body) {
18
+ (expression | identifier | float | integer | string).repeat.as(:exp)
19
+ }
20
+
21
+ rule(:space) {
22
+ match('\s').repeat(1)
23
+ }
24
+ rule(:space?) {
25
+ space.maybe
26
+ }
27
+
28
+ rule(:identifier) {
29
+ (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
30
+ }
31
+
32
+ rule(:float) {
33
+ (
34
+ integer >> (
35
+ str('.') >> match('[0-9]').repeat(1) |
36
+ str('e') >> match('[0-9]').repeat(1)
37
+ ).as(:e)
38
+ ).as(:float) >> space?
39
+ }
40
+
41
+ rule(:integer) {
42
+ ((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
43
+ }
44
+
45
+ rule(:string) {
46
+ str('"') >> (
47
+ str('\\') >> any |
48
+ str('"').absent? >> any
49
+ ).repeat.as(:string) >> str('"') >> space?
50
+ }
51
+ end
52
+
53
+ class Transform
54
+ include Parsanol::Parslet
55
+
56
+ attr_reader :t
57
+ def initialize
58
+ @t = Parsanol::Transform.new
59
+
60
+ # To understand these, take a look at what comes out of the parser.
61
+ t.rule(:identifier => simple(:ident)) { ident.to_sym }
62
+
63
+ t.rule(:string => simple(:str)) { str }
64
+
65
+ t.rule(:integer => simple(:int)) { Integer(int) }
66
+
67
+ t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
68
+
69
+ t.rule(:exp => subtree(:exp)) { exp }
70
+ end
71
+
72
+ def do(tree)
73
+ t.apply(tree)
74
+ end
75
+ end
76
+ end
77
+
78
+ parser = MiniLisp::Parser.new
79
+ transform = MiniLisp::Transform.new
80
+
81
+ result = parser.parse_with_debug %Q{
82
+ (define test (lambda ()
83
+ (begin
84
+ (display "something")
85
+ (display 1)
86
+ (display 3.08))))
87
+ }
88
+
89
+ # Transform the result
90
+ pp transform.do(result) if result
91
+
92
+ # Thereby reducing it to the earlier problem:
93
+ # http://github.com/kschiess/toylisp
94
+
@@ -0,0 +1,133 @@
1
+ # Mini Lisp Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/minilisp
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Expression Rule
13
+
14
+ S-expressions are recursively defined:
15
+
16
+ ```ruby
17
+ rule(:expression) {
18
+ space? >> str('(') >> space? >> body >> str(')') >> space?
19
+ }
20
+ ```
21
+
22
+ Whitespace is optional around parentheses for flexible formatting.
23
+
24
+ ### Body Rule
25
+
26
+ Body contains multiple expressions:
27
+
28
+ ```ruby
29
+ rule(:body) {
30
+ (expression | identifier | float | integer | string).repeat.as(:exp)
31
+ }
32
+ ```
33
+
34
+ The repeat allows empty lists `()` and nested structures.
35
+
36
+ ### Identifier Rule
37
+
38
+ Identifiers allow operator characters:
39
+
40
+ ```ruby
41
+ rule(:identifier) {
42
+ (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
43
+ }
44
+ ```
45
+
46
+ `=` and `*` are valid identifier chars for Lisp operators like `=` and `*`.
47
+
48
+ ### Float Rule
49
+
50
+ Floats have decimal or exponent parts:
51
+
52
+ ```ruby
53
+ rule(:float) {
54
+ (
55
+ integer >> (
56
+ str('.') >> match('[0-9]').repeat(1) |
57
+ str('e') >> match('[0-9]').repeat(1)
58
+ ).as(:e)
59
+ ).as(:float) >> space?
60
+ }
61
+ ```
62
+
63
+ Captures the integer part and the exponent/fraction separately.
64
+
65
+ ### String Rule
66
+
67
+ Strings handle escape sequences:
68
+
69
+ ```ruby
70
+ rule(:string) {
71
+ str('"') >> (
72
+ str('\\') >> any |
73
+ str('"').absent? >> any
74
+ ).repeat.as(:string) >> str('"') >> space?
75
+ }
76
+ ```
77
+
78
+ `str('\\') >> any` handles any escaped character including `\"`.
79
+
80
+ ### Transform Class
81
+
82
+ Transforms convert parse trees to Ruby objects:
83
+
84
+ ```ruby
85
+ class Transform
86
+ t.rule(:identifier => simple(:ident)) { ident.to_sym }
87
+ t.rule(:string => simple(:str)) { str }
88
+ t.rule(:integer => simple(:int)) { Integer(int) }
89
+ t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
90
+ t.rule(:exp => subtree(:exp)) { exp }
91
+ end
92
+ ```
93
+
94
+ `simple(:x)` matches single values; `subtree(:x)` matches nested structures.
95
+
96
+ ## Output Types
97
+
98
+ ```ruby
99
+ # Parse tree
100
+ {:exp=>[
101
+ {:identifier=>"+"@s},
102
+ {:integer=>"1"@s},
103
+ {:integer=>"2"@s}
104
+ ]}
105
+
106
+ # After transform
107
+ [:+, 1, 2]
108
+
109
+ # Nested expression
110
+ [:define, :test, [:lambda, [], [:begin,
111
+ [:display, "something"],
112
+ [:display, 1],
113
+ [:display, 3.08]
114
+ ]]]
115
+ ```
116
+
117
+ ## Design Decisions
118
+
119
+ ### Why Symbol for Identifiers?
120
+
121
+ Ruby symbols are immutable and efficient for identifiers. They're commonly used for code representation.
122
+
123
+ ### Why Float Assembly in Transform?
124
+
125
+ The float rule captures integer and exponent parts separately. The transform combines them into a Ruby Float.
126
+
127
+ ### Why subtree for Expressions?
128
+
129
+ `subtree(:exp)` recursively transforms nested lists. This handles arbitrary nesting depth automatically.
130
+
131
+ ### Why Separate Parser and Transform Classes?
132
+
133
+ Separation keeps grammar definition clean. The transform can evolve independently of parsing rules.
@@ -0,0 +1,47 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'pp'
4
+ require "parsanol/parslet"
5
+
6
+ # Demonstrates modular parsers, split out over many classes. Please look at
7
+ # ip_address.rb as well.
8
+
9
+ module ALanguage
10
+ include Parsanol::Parslet
11
+
12
+ # Parslet rules are really a special kind of method. Mix them into your
13
+ # classes!
14
+ rule(:a_language) { str('aaa') }
15
+ end
16
+
17
+ # Parslet parsers are parslet atoms as well. Create an instance and chain them
18
+ # to your other rules.
19
+ #
20
+ class BLanguage < Parsanol::Parser
21
+ root :blang
22
+
23
+ rule(:blang) { str('bbb') }
24
+ end
25
+
26
+ # Parslet atoms are really Ruby values, pass them around.
27
+ c_language = Parsanol.str('ccc')
28
+
29
+ class Language < Parsanol::Parser
30
+ def initialize(c_language)
31
+ @c_language = c_language
32
+ super()
33
+ end
34
+
35
+ root :root
36
+
37
+ include ALanguage
38
+
39
+ rule(:root) { str('a(') >> a_language >> str(')') >> space |
40
+ str('b(') >> BLanguage.new >> str(')') >> space |
41
+ str('c(') >> @c_language >> str(')') >> space }
42
+ rule(:space) { str(' ').maybe }
43
+ end
44
+
45
+ Language.new(c_language).parse('a(aaa)')
46
+ Language.new(c_language).parse('b(bbb)')
47
+ Language.new(c_language).parse('c(ccc)')