parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '01840daa27539f714d2270ca715a635a783f72adcd4370d3e1196fbcd05015c3'
4
+ data.tar.gz: a54323f2b2da9bbc65f8a984411eb67c44495af2530f19e5b57a845e832d7648
5
+ SHA512:
6
+ metadata.gz: debaafa0bf490766dc6d9b4e871c444c532b9882a9074e2603d779339c4706b68afc9b94d6a476a8d84ea3c458ebe1538fb67e9bd9ff0c153a8c64eda71387ee
7
+ data.tar.gz: 5efa3a1e216953644811211e48bb0aeaed5ef2106b6a387f3a8b27a9a858e60b95c23f063c549ba236fc5e5ef8b75f7de0e1a98f61836624a2533ea7c0335f74
data/HISTORY.txt ADDED
@@ -0,0 +1,25 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ ## 3.0.0 (2025-02-28)
6
+
7
+ ### Added
8
+ - Updated to use parsanol 0.1.2 from crates.io
9
+ - Fixed repetition with separator pattern in Parslet compatibility layer
10
+ - Added spec for repetition pattern handling
11
+
12
+ ### Changes
13
+ - Updated Cargo.toml to use published parsanol crate instead of local path
14
+
15
+ ## 2.0.0
16
+
17
+ ### Added
18
+ - Initial release as Parsanol (renamed from Parslet)
19
+ - Ruby native extension support
20
+ - Multiple parser modes: RubyTransform, JsonOutput, DirectObjects
21
+
22
+ ## 1.0.0
23
+
24
+ ### Added
25
+ - Original Parslet parser library
data/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2025 Ribose Inc.
2
+ Copyright (c) 2010-2018 Kaspar Schiess
3
+
4
+ Permission is hereby granted, free of charge, to any person
5
+ obtaining a copy of this software and associated documentation
6
+ files (the "Software"), to deal in the Software without
7
+ restriction, including without limitation the rights to use,
8
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the
10
+ Software is furnished to do so, subject to the following
11
+ conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23
+ OTHER DEALINGS IN THE SOFTWARE.
data/README.adoc ADDED
@@ -0,0 +1,643 @@
1
+ = Parsanol
2
+
3
+ image:https://img.shields.io/gem/v/parsanol.svg[RubyGems Version]
4
+ image:https://img.shields.io/github/license/parsanol/parsanol-ruby.svg[License]
5
+ image:https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml/badge.svg["Build", link="https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml"]
6
+
7
+ A high-performance PEG (Parsing Expression Grammar) parser construction library for Ruby, with support for static frozen parsers and dynamic parsers.
8
+
9
+ == Purpose
10
+
11
+ Parsanol provides a DSL for constructing parsers using PEG semantics. It offers excellent error reporting and supports both pure Ruby parsing and optional Rust native extensions for maximum performance.
12
+
13
+ == Features
14
+
15
+ * PEG-based parser construction
16
+ * Great error reporting with detailed parse failure information
17
+ * Optional Rust native extension for up to 29x faster parsing
18
+ * **Slice support** - preserves source positions for linters and IDEs
19
+ * WebAssembly support for browser/Node.js environments
20
+ * Tree transformation for converting parse results to AST
21
+ * Grammar optimization passes
22
+ * Memory-efficient parsing with object pooling
23
+ * **Streaming Builder API** - single-pass parsing with Ruby callbacks
24
+ * **Parallel Parsing** - batch processing with multi-core speedup
25
+ * **Infix Expression Parsing** - built-in operator precedence support
26
+ * **Security Features** - input size and recursion limits for untrusted data
27
+ * **Debug Tools** - tracing and grammar visualization
28
+
29
+ == Installation
30
+
31
+ [source,ruby]
32
+ ----
33
+ gem install parsanol
34
+ ----
35
+
36
+ Or add to your Gemfile:
37
+
38
+ [source,ruby]
39
+ ----
40
+ gem 'parsanol'
41
+ ----
42
+
43
+ == Usage
44
+
45
+ === Basic Parser
46
+
47
+ [source,ruby]
48
+ ----
49
+ require 'parsanol'
50
+
51
+ class MyParser < Parsanol::Parser
52
+ rule(:keyword) { str('if') | str('while') }
53
+ rule(:expression) { keyword >> str('(') >> expression >> str(')') }
54
+ root(:expression)
55
+ end
56
+
57
+ parser = MyParser.new
58
+ result = parser.parse('if(x)')
59
+ ----
60
+
61
+ === Parslet Compatibility
62
+
63
+ Parsanol is fully compatible with the Parslet API:
64
+
65
+ [source,ruby]
66
+ ----
67
+ require 'parslet' # Works exactly like original Parslet
68
+
69
+ class MyParser < Parslet::Parser
70
+ rule(:hello) { str('hello') }
71
+ root(:hello)
72
+ end
73
+ ----
74
+
75
+ === Transformation
76
+
77
+ [source,ruby]
78
+ ----
79
+ class MyTransform < Parsanol::Transform
80
+ rule(keyword: simple(:k)) { Keyword.new(k) }
81
+ end
82
+
83
+ MyTransform.apply(parse_tree)
84
+ ----
85
+
86
+ == Migrating from Parslet
87
+
88
+ Parsanol provides full Parslet API compatibility with two migration modes.
89
+
90
+ === Mode 1: Drop-in Replacement (Zero Code Changes)
91
+
92
+ Simply replace the parslet gem with parsanol in your Gemfile:
93
+
94
+ [source,ruby]
95
+ ----
96
+ # Gemfile
97
+ - gem 'parslet'
98
+ + gem 'parsanol'
99
+ ----
100
+
101
+ Your existing code works without modification:
102
+
103
+ [source,ruby]
104
+ ----
105
+ # No changes needed!
106
+ require 'parslet' # Parsanol aliases itself
107
+
108
+ class MyParser < Parslet::Parser
109
+ rule(:number) { match('[0-9]').repeat(1) }
110
+ root(:number)
111
+ end
112
+
113
+ parser = MyParser.new
114
+ parser.parse('123') # Works exactly the same
115
+ ----
116
+
117
+ *What you get:*
118
+ - Up to 29x performance improvement (measured with Expressir)
119
+ - Slice support for source position tracking
120
+ - 99.5% fewer allocations
121
+ - Same error messages
122
+ - 100% API compatibility (including Parslet::Slice)
123
+
124
+ === Mode 2: Native Parsanol API (Enhanced Features)
125
+
126
+ For maximum performance and new features:
127
+
128
+ [source,ruby]
129
+ ----
130
+ require 'parsanol' # Use Parsanol namespace
131
+
132
+ class MyParser < Parsanol::Parser
133
+ rule(:number) { match('[0-9]').repeat(1) }
134
+ root(:number)
135
+ end
136
+ ----
137
+
138
+ *Additional features in native mode:*
139
+ - Direct Rust FFI (fastest)
140
+ - JSON grammar export
141
+ - Expression parser with precedence climbing
142
+ - Streaming parsing (planned)
143
+
144
+ === API Compatibility Matrix
145
+
146
+ [cols="2,1,3"]
147
+ |===
148
+ | Parslet API | Status | Notes
149
+
150
+ | `str('foo')` | ✅ | Literal string match
151
+ | `match('[0-9]')` | ✅ | Character class
152
+ | `any` | ✅ | Any single character
153
+ | `>>` (sequence) | ✅ | Sequential composition
154
+ | `\|` (choice) | ✅ | Ordered choice
155
+ | `.repeat(n, m)` | ✅ | Repetition with bounds
156
+ | `.repeat(1)` | ✅ | One or more
157
+ | `.repeat` | ✅ | Zero or more
158
+ | `.maybe` | ✅ | Optional
159
+ | `.as(:name)` | ✅ | Label capture
160
+ | `.absent?` | ✅ | Negative lookahead
161
+ | `.present?` | ✅ | Positive lookahead
162
+ | `infix_expression` | ✅ | Precedence climbing
163
+ | `parse()` | ✅ | Parse and return tree
164
+ | `parse_with_debug()` | ✅ | Parse with error output
165
+ | `Parslet::Transform` | ✅ | Tree transformation
166
+ | `simple(:x)` | ✅ | Match simple value
167
+ | `sequence(:x)` | ✅ | Match array of values
168
+ | `subtree(:x)` | ✅ | Match any subtree
169
+ | `Parslet::Slice` | ✅ | Parsanol::Slice compatible
170
+ |===
171
+
172
+ === Migration Checklist
173
+
174
+ . **Backup your project**
175
+
176
+ . **Update Gemfile**
177
+ +
178
+ [source,ruby]
179
+ ----
180
+ gem 'parsanol' # Replace parslet
181
+ ----
182
+
183
+ . **Run tests**
184
+ +
185
+ [source,shell]
186
+ ----
187
+ bundle install
188
+ bundle exec rspec
189
+ ----
190
+
191
+ . **Verify performance** (optional)
192
+ +
193
+ [source,ruby]
194
+ ----
195
+ require 'benchmark'
196
+
197
+ input = "your test input"
198
+ Benchmark.bm do |x|
199
+ x.report("parse") { 1000.times { parser.parse(input) } }
200
+ end
201
+ ----
202
+
203
+ === Common Gotchas
204
+
205
+ . **Native extension requires Rust**
206
+ +
207
+ Parsanol falls back to pure Ruby if Rust is unavailable. For maximum performance, install Rust:
208
+ +
209
+ [source,shell]
210
+ ----
211
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
212
+ ----
213
+
214
+ . **Error message formatting**
215
+ +
216
+ Error messages have slightly different formatting but contain the same information.
217
+
218
+ . **Grammar introspection**
219
+ +
220
+ Works the same way in both modes.
221
+
222
+ === Performance Comparison
223
+
224
+ [cols="3,2,2,2"]
225
+ |===
226
+ | Operation | Parslet | Parsanol (Ruby) | Parsanol (ZeroCopy+Slice)
227
+
228
+ | EXPRESS parsing (22KB) | 3036 ms | - | 106 ms (28.7x)
229
+ | JSON parsing | 1x | 1x | 29x
230
+ | Calculator | 1x | 1x | 25x
231
+ | Memory allocations | 100% | 100% | 0.5%
232
+ |===
233
+
234
+ == Performance
235
+
236
+ Parsanol offers 5 different approaches for parsing in Ruby, each moving more work from Ruby to Rust:
237
+
238
+ === The 5 Approaches
239
+
240
+ [cols="5,2,2,3"]
241
+ |===
242
+ | Approach | Speed | Use Case | How It Works
243
+
244
+ | 1. parslet-ruby | 1x (baseline) | Compatibility, debugging | Pure Ruby parsing
245
+ | 2. parsanol-ruby | ~1x | Learning, prototyping | Parsanol Ruby backend
246
+ | 3. parsanol-native (Batch) | ~20x | Need Ruby objects | Rust parsing, AST via u64
247
+ | 4. parsanol-native (ZeroCopy) | ~25x | Maximum performance | Direct FFI construction
248
+ | 5. parsanol-native (ZeroCopy + Slice) | ~29x | Linters, IDEs, Expressir | Zero-copy + source positions
249
+ |===
250
+
251
+ === Slice Support (New)
252
+
253
+ The ZeroCopy + Slice mode preserves source position information for each parsed element:
254
+
255
+ [source,ruby]
256
+ ----
257
+ # Before (plain strings - no position info):
258
+ [{"word"=>"hello"}, " ", {"name"=>"world"}]
259
+
260
+ # After (Slice objects with position info):
261
+ [{"word"=>"hello"@0}, " "@5, {"name"=>"world"@6}]
262
+
263
+ # The @N notation shows the byte offset in the original input
264
+ # Parsanol::Slice is compatible with Parslet::Slice
265
+ ----
266
+
267
+ This is essential for tools like linters, IDEs, and Expressir that need to track where in the source code each element came from.
268
+
269
+ === Evidence-Based Results
270
+
271
+ Actual benchmark results from Expressir parsing EXPRESS schemas (22KB file, 733 lines):
272
+
273
+ [cols="3,2,2,3"]
274
+ |===
275
+ | Mode | Time | Speedup | Notes
276
+
277
+ | Ruby (Parslet) | 3036 ms | 1x (baseline) | Pure Ruby parsing
278
+ | Native Batch (u64) | 153 ms | 19.9x faster | AST via u64 array transfer
279
+ | Native ZeroCopy (Slice) | 106 ms | 28.7x faster | Zero-copy with source positions
280
+ |===
281
+
282
+ *Run `bundle exec ruby benchmark/run_all.rb` to see results on YOUR machine.*
283
+
284
+ === Running Benchmarks
285
+
286
+ Verify these results yourself:
287
+
288
+ [source,shell]
289
+ ----
290
+ cd parsanol-ruby
291
+ bundle install
292
+ bundle exec rake compile # Build native extension
293
+ bundle exec ruby benchmark/run_all.rb --quick
294
+ ----
295
+
296
+ See `benchmark/APPROACHES.md` for detailed diagrams explaining each approach.
297
+
298
+ == Architecture
299
+
300
+ Parsanol consists of two main components:
301
+
302
+ * *parsanol-ruby*: The Ruby gem with parser DSL and transformation engine
303
+ * *parsanol-rs*: Rust crate providing native parsing acceleration
304
+
305
+ === Three Transformation Modes
306
+
307
+ Parsanol supports three transformation modes:
308
+
309
+ 1. **Ruby Transform** (Parslet-compatible): Parse in Rust, transform in Ruby
310
+ 2. **Serialized Output**: Parse and transform in Rust, return JSON
311
+ 3. **Native FFI**: Parse, transform, and return Ruby objects directly
312
+
313
+ === Static vs Dynamic Parsers
314
+
315
+ Parsanol supports two parser modes:
316
+
317
+ * *Dynamic Parsers*: Can be modified during parsing, more flexible but slower
318
+ * *Static/Frozen Parsers*: Pre-compiled grammar, fastest path for production use
319
+
320
+ === Streaming Builder API
321
+
322
+ For maximum performance, use the streaming builder API which eliminates intermediate AST construction. Your Ruby callbacks receive parsed values directly during parsing:
323
+
324
+ [IMPORTANT]
325
+ ====
326
+ The streaming builder API requires the native extension. If the native extension is not available, use the pure Ruby parser instead.
327
+ ====
328
+
329
+ [source,ruby]
330
+ ----
331
+ require 'parsanol'
332
+
333
+ # Define a custom builder by including Parsanol::BuilderCallbacks
334
+ class StringCollector
335
+ include Parsanol::BuilderCallbacks
336
+
337
+ def initialize
338
+ @strings = []
339
+ end
340
+
341
+ def on_string(value, offset, length)
342
+ @strings << value
343
+ end
344
+
345
+ def finish
346
+ @strings
347
+ end
348
+ end
349
+
350
+ # Serialize grammar from a parser
351
+ grammar = Parsanol::Native.serialize_grammar(MyParser.new.root)
352
+
353
+ # Parse with the builder
354
+ builder = StringCollector.new
355
+ result = Parsanol::Native.parse_with_builder(grammar, input, builder)
356
+ # result: ["hello", "world"]
357
+ ----
358
+
359
+ ==== Available Callback Methods
360
+
361
+ Include `Parsanol::BuilderCallbacks` in your class and override these methods:
362
+
363
+ [cols="1,3,2"]
364
+ |===
365
+ | Method | Description | Default
366
+
367
+ | `on_start(input)` | Parsing started | No-op
368
+ | `on_success` | Parsing succeeded | No-op
369
+ | `on_error(message)` | Parsing failed | No-op
370
+ | `on_string(value, offset, length)` | String/slice matched | No-op
371
+ | `on_int(value)` | Integer matched | No-op
372
+ | `on_float(value)` | Float matched | No-op
373
+ | `on_bool(value)` | Boolean matched | No-op
374
+ | `on_nil` | Nil matched | No-op
375
+ | `on_hash_start(size)` | Entering a hash/object | No-op
376
+ | `on_hash_key(key)` | Hash key encountered | No-op
377
+ | `on_hash_value(key)` | About to parse hash value | No-op
378
+ | `on_hash_end(size)` | Exiting a hash/object | No-op
379
+ | `on_array_start(size)` | Entering an array | No-op
380
+ | `on_array_element(index)` | About to parse array element | No-op
381
+ | `on_array_end(size)` | Exiting an array | No-op
382
+ | `on_named_start(name)` | Starting named rule | No-op
383
+ | `on_named_end(name)` | Finished named rule | No-op
384
+ | `finish` | Parsing complete, return result | Returns nil
385
+ |===
386
+
387
+ The `size` parameter in `on_array_start`, `on_hash_start`, `on_array_end`, and `on_hash_end` indicates the number of elements (may be `nil` for start callbacks).
388
+
389
+ ==== Built-in Builders
390
+
391
+ Parsanol provides three built-in builders for common use cases:
392
+
393
+ * `Parsanol::Builders::DebugBuilder` - Collects all parsing events as strings for debugging
394
+ * `Parsanol::Builders::StringCollector` - Collects all string values into an array
395
+ * `Parsanol::Builders::NodeCounter` - Counts nodes by type (strings, ints, arrays, hashes, etc.)
396
+
397
+ [source,ruby]
398
+ ----
399
+ # DebugBuilder - see all parsing events
400
+ debug = Parsanol::Builders::DebugBuilder.new
401
+ Parsanol::Native.parse_with_builder(grammar, input, debug)
402
+ puts debug.events # => ["start: ...", "string: ...", "hash_start", ...]
403
+
404
+ # StringCollector - extract all strings
405
+ collector = Parsanol::Builders::StringCollector.new
406
+ Parsanol::Native.parse_with_builder(grammar, input, collector)
407
+ puts collector.strings # => ["hello", "world", ...]
408
+
409
+ # NodeCounter - count node types
410
+ counter = Parsanol::Builders::NodeCounter.new
411
+ Parsanol::Native.parse_with_builder(grammar, input, counter)
412
+ puts counter.counts # => {:string=>5, :int=>3, :hash=>2, :array=>1}
413
+ ----
414
+
415
+ ==== Advanced Example: Building Custom Objects
416
+
417
+ [source,ruby]
418
+ ----
419
+ class JsonBuilder
420
+ include Parsanol::BuilderCallbacks
421
+
422
+ def initialize
423
+ @stack = []
424
+ @current_key = nil
425
+ end
426
+
427
+ def on_string(value, offset, length)
428
+ add_value(value)
429
+ end
430
+
431
+ def on_int(value)
432
+ add_value(value)
433
+ end
434
+
435
+ def on_float(value)
436
+ add_value(value)
437
+ end
438
+
439
+ def on_bool(value)
440
+ add_value(value)
441
+ end
442
+
443
+ def on_nil
444
+ add_value(nil)
445
+ end
446
+
447
+ def on_hash_start(size = nil)
448
+ @stack.push({})
449
+ end
450
+
451
+ def on_hash_key(key)
452
+ @current_key = key
453
+ end
454
+
455
+ def on_hash_end(size)
456
+ finished = @stack.pop
457
+ add_value(finished) unless @stack.empty?
458
+ @result = finished if @stack.empty?
459
+ end
460
+
461
+ def on_array_start(size = nil)
462
+ @stack.push([])
463
+ end
464
+
465
+ def on_array_element(index)
466
+ # Called before each array element
467
+ end
468
+
469
+ def on_array_end(size)
470
+ finished = @stack.pop
471
+ add_value(finished) unless @stack.empty?
472
+ @result = finished if @stack.empty?
473
+ end
474
+
475
+ def on_named_start(name)
476
+ # Called when entering a named rule (e.g., .as(:name))
477
+ end
478
+
479
+ def on_named_end(name)
480
+ # Called when exiting a named rule
481
+ end
482
+
483
+ def finish
484
+ @result
485
+ end
486
+
487
+ private
488
+
489
+ def add_value(value)
490
+ return if @stack.empty?
491
+ case @stack.last
492
+ when Hash
493
+ @stack.last[@current_key] = value
494
+ @current_key = nil
495
+ when Array
496
+ @stack.last << value
497
+ end
498
+ end
499
+ end
500
+
501
+ builder = JsonBuilder.new
502
+ result = Parsanol::Native.parse_with_builder(grammar, input, builder)
503
+ # result: Parsed JSON as Ruby objects
504
+ ----
505
+
506
+ === Parallel Parsing
507
+
508
+ Parse multiple inputs in parallel using all CPU cores:
509
+
510
+ [source,ruby]
511
+ ----
512
+ require 'parsanol/parallel'
513
+
514
+ grammar = MyParser.new.serialize_grammar
515
+ inputs = Dir.glob("*.json").map { |f| File.read(f) }
516
+
517
+ # Parse all files in parallel (8x faster on 8 cores)
518
+ results = Parsanol::Parallel.parse_batch(grammar, inputs)
519
+
520
+ # With configuration
521
+ config = Parsanol::Parallel::Config.new
522
+ .with_num_threads(4)
523
+ .with_min_chunk_size(50)
524
+
525
+ results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
526
+ ----
527
+
528
+ === Infix Expression Parsing
529
+
530
+ Built-in support for parsing infix expressions with operator precedence:
531
+
532
+ [source,ruby]
533
+ ----
534
+ class CalculatorParser < Parsanol::Parser
535
+ rule(:number) { match('[0-9]').repeat(1).as(:int) }
536
+ rule(:primary) { number | str('(') >> expr >> str(')') }
537
+
538
+ # Define operators with precedence and associativity
539
+ rule(:expr) {
540
+ infix_expression(primary,
541
+ [str('*'), 2, :left],
542
+ [str('/'), 2, :left],
543
+ [str('+'), 1, :left],
544
+ [str('-'), 1, :left],
545
+ [str('^'), 3, :right] # Right-associative
546
+ )
547
+ }
548
+ root(:expr)
549
+ end
550
+ ----
551
+
552
+ === Security Features
553
+
554
+ For parsing untrusted input, use built-in limits:
555
+
556
+ [source,ruby]
557
+ ----
558
+ # Configure limits for untrusted input
559
+ result = Parsanol::Native.parse_with_limits(
560
+ grammar_json,
561
+ untrusted_input,
562
+ max_input_size: 10 * 1024 * 1024, # 10 MB max
563
+ max_recursion_depth: 100 # Limit recursion
564
+ )
565
+ ----
566
+
567
+ === Debug Tools
568
+
569
+ Enable tracing for debugging grammars:
570
+
571
+ [source,ruby]
572
+ ----
573
+ # Parse with trace
574
+ result, trace = Parsanol::Native.parse_with_trace(grammar_json, input)
575
+ puts trace
576
+
577
+ # Generate grammar visualization
578
+ mermaid = Parsanol::Native.grammar_to_mermaid(grammar_json)
579
+ dot = Parsanol::Native.grammar_to_dot(grammar_json)
580
+ ----
581
+
582
+ == Development
583
+
584
+ === Setup
585
+
586
+ [source,shell]
587
+ ----
588
+ bundle install
589
+ ----
590
+
591
+ === Testing
592
+
593
+ [source,shell]
594
+ ----
595
+ # Run all tests
596
+ bundle exec rake spec
597
+
598
+ # Run unit tests only
599
+ bundle exec rake spec:unit
600
+
601
+ # Run specific test file
602
+ bundle exec rspec spec/parsanol/atoms/str_spec.rb
603
+ ----
604
+
605
+ === Compiling Native Extension
606
+
607
+ The native extension requires Rust 1.75+:
608
+
609
+ [source,shell]
610
+ ----
611
+ # Install Rust (if not already installed)
612
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
613
+
614
+ # Compile the native extension
615
+ bundle exec rake compile
616
+
617
+ # Verify native extension is working
618
+ ruby -I lib -e "require 'parsanol'; puts Parsanol::Native.available?"
619
+ # => true
620
+ ----
621
+
622
+ === Running Benchmarks
623
+
624
+ [source,shell]
625
+ ----
626
+ # Quick benchmarks
627
+ bundle exec rake benchmark
628
+
629
+ # Comprehensive benchmark suite
630
+ bundle exec rake benchmark:all
631
+
632
+ # Run specific benchmark
633
+ bundle exec ruby benchmark/run_all.rb --quick
634
+ ----
635
+
636
+ == License
637
+
638
+ MIT License - see LICENSE file for details.
639
+
640
+ == Resources
641
+
642
+ * https://github.com/parsanol/parsanol-ruby[GitHub Repository]
643
+ * https://github.com/parsanol/parsanol-rs[Rust Crate]