parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,216 @@
1
+ # YAML Parser - Ruby Implementation
2
+ #
3
+ # Parse a subset of YAML: key-value pairs, lists, nested maps, scalars.
4
+ #
5
+ # Run with: ruby example/yaml/basic.rb
6
+
7
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
+
9
+ require 'parsanol/parslet'
10
+
11
+ # YAML parser (subset)
12
+ class YamlParser < Parsanol::Parser
13
+ root :document
14
+
15
+ # Document is a sequence of mappings or list items
16
+ rule(:document) { (mapping | list_item | comment | blank_line).repeat(1).as(:document) }
17
+
18
+ # Comment: # to end of line
19
+ rule(:comment) {
20
+ (space? >> str('#') >> (newline.absent? >> any).repeat).as(:comment) >> newline
21
+ }
22
+
23
+ # Blank line
24
+ rule(:blank_line) { space? >> newline }
25
+
26
+ # Mapping (key-value)
27
+ rule(:mapping) {
28
+ (key.as(:key) >>
29
+ colon >>
30
+ (inline_value | indented_value)).as(:mapping)
31
+ }
32
+
33
+ rule(:key) {
34
+ (match('[a-zA-Z_]') >> match('[a-zA-Z0-9_]').repeat).as(:key)
35
+ }
36
+
37
+ rule(:colon) { str(':') >> space }
38
+
39
+ # Inline value: on same line as key
40
+ rule(:inline_value) {
41
+ (space? >> (string | integer | float | boolean | null).as(:value) >> newline)
42
+ }
43
+
44
+ # Indented value: nested block
45
+ rule(:indented_value) {
46
+ (newline >> indented_block.as(:block))
47
+ }
48
+
49
+ rule(:indented_block) {
50
+ (indent >> (mapping | list_item) >> (newline >> indent >> (mapping | list_item)).repeat).as(:block)
51
+ }
52
+
53
+ # List item: - value
54
+ rule(:list_item) {
55
+ (str('-') >> space >>
56
+ (inline_list_value | indented_value)).as(:list_item)
57
+ }
58
+
59
+ rule(:inline_list_value) {
60
+ (space? >> (string | integer | float | boolean | null).as(:value) >> newline)
61
+ }
62
+
63
+ # Scalar types
64
+ rule(:string) {
65
+ quoted_string | plain_string
66
+ }
67
+
68
+ rule(:quoted_string) {
69
+ (str('"') >>
70
+ (str('\\').ignore >> any | str('"').absent? >> any).repeat.as(:string) >>
71
+ str('"')) |
72
+ (str("'") >>
73
+ (str("'").absent? >> any).repeat.as(:string) >>
74
+ str("'"))
75
+ }
76
+
77
+ rule(:plain_string) {
78
+ (newline.absent? >> str(':').absent? >> any).repeat(1).as(:string)
79
+ }
80
+
81
+ rule(:integer) {
82
+ (str('+') | str('-')).maybe >>
83
+ match('[0-9]').repeat(1)
84
+ }
85
+
86
+ rule(:float) {
87
+ ((str('+') | str('-')).maybe >>
88
+ match('[0-9]').repeat(1) >>
89
+ str('.') >>
90
+ match('[0-9]').repeat(1))
91
+ }
92
+
93
+ rule(:boolean) {
94
+ str('true') | str('false')
95
+ }
96
+
97
+ rule(:null) {
98
+ str('null') | str('~')
99
+ }
100
+
101
+ # Helpers
102
+ rule(:space) { match('\s').repeat(1) }
103
+ rule(:space?) { match('\s').repeat }
104
+ rule(:newline) { match('\n') | match('\r\n') }
105
+ rule(:indent) { str(' ') | str("\t") }
106
+ end
107
+
108
+ # YAML result classes
109
+ YamlDocument = Struct.new(:entries) do
110
+ def to_h
111
+ result = {}
112
+ entries.each do |entry|
113
+ case entry
114
+ when YamlMapping
115
+ result[entry.key] = entry.value
116
+ end
117
+ end
118
+ result
119
+ end
120
+ end
121
+
122
+ YamlMapping = Struct.new(:key, :value)
123
+ YamlListItem = Struct.new(:value)
124
+ YamlComment = Struct.new(:text)
125
+
126
+ # Transform parse tree to AST
127
+ class YamlTransform < Parsanol::Transform
128
+ rule(document: sequence(:entries)) { YamlDocument.new(entries) }
129
+
130
+ rule(mapping: { key: simple(:k), value: simple(:v) }) {
131
+ YamlMapping.new(k.to_s, v)
132
+ }
133
+
134
+ rule(mapping: { key: simple(:k), block: simple(:b) }) {
135
+ YamlMapping.new(k.to_s, b)
136
+ }
137
+
138
+ rule(list_item: { value: simple(:v) }) {
139
+ YamlListItem.new(v)
140
+ }
141
+
142
+ rule(list_item: { block: simple(:b) }) {
143
+ YamlListItem.new(b)
144
+ }
145
+
146
+ rule(comment: simple(:c)) { YamlComment.new(c.to_s) }
147
+
148
+ # Value transformations
149
+ rule(string: simple(:s)) { s.to_s.strip }
150
+ rule(integer: simple(:i)) { i.to_s.to_i }
151
+ rule(float: simple(:f)) { f.to_s.to_f }
152
+ rule(value: simple(:v)) { v }
153
+ rule(block: simple(:b)) { b }
154
+ rule(block: sequence(:bs)) {
155
+ result = {}
156
+ bs.each do |b|
157
+ if b.is_a?(YamlMapping)
158
+ result[b.key] = b.value
159
+ end
160
+ end
161
+ result
162
+ }
163
+ end
164
+
165
+ # Parse YAML string
166
+ def parse_yaml(str)
167
+ parser = YamlParser.new
168
+ transform = YamlTransform.new
169
+
170
+ tree = parser.parse(str)
171
+ transform.apply(tree)
172
+ rescue Parsanol::ParseError => e
173
+ puts "Parse error: #{e.message}"
174
+ nil
175
+ end
176
+
177
+ # Main demo
178
+ if __FILE__ == $0
179
+ puts "YAML Parser"
180
+ puts "=" * 50
181
+ puts
182
+
183
+ yaml = <<~YAML
184
+ # Configuration file
185
+ name: Example Application
186
+ version: 1.0.0
187
+ debug: true
188
+ timeout: 30.5
189
+
190
+ database:
191
+ host: localhost
192
+ port: 5432
193
+ name: myapp
194
+
195
+ servers:
196
+ - alpha
197
+ - beta
198
+ - gamma
199
+ YAML
200
+
201
+ puts "Input:"
202
+ puts "-" * 50
203
+ puts yaml
204
+ puts "-" * 50
205
+ puts
206
+
207
+ result = parse_yaml(yaml)
208
+
209
+ if result
210
+ puts "Parsed AST:"
211
+ pp result
212
+ puts
213
+ puts "Hash Output:"
214
+ pp result.to_h
215
+ end
216
+ end
@@ -0,0 +1,148 @@
1
+ # YAML Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/yaml
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Document Structure
13
+
14
+ A YAML document contains mappings, list items, and comments:
15
+
16
+ ```ruby
17
+ rule(:document) { (mapping | list_item | comment | blank_line).repeat(1).as(:document) }
18
+ ```
19
+
20
+ YAML is line-oriented with significant indentation.
21
+
22
+ ### Mapping Rule
23
+
24
+ Key-value pairs with colon separator:
25
+
26
+ ```ruby
27
+ rule(:mapping) {
28
+ (key.as(:key) >>
29
+ colon >>
30
+ (inline_value | indented_value)).as(:mapping)
31
+ }
32
+
33
+ rule(:key) {
34
+ (match('[a-zA-Z_]') >> match('[a-zA-Z0-9_]').repeat).as(:key)
35
+ }
36
+ ```
37
+
38
+ Values can be inline (same line) or indented (nested block).
39
+
40
+ ### Inline Value
41
+
42
+ Scalar on same line as key:
43
+
44
+ ```ruby
45
+ rule(:inline_value) {
46
+ (space? >> (string | integer | float | boolean | null).as(:value) >> newline)
47
+ }
48
+ ```
49
+
50
+ Detects scalar type automatically.
51
+
52
+ ### Indented Value
53
+
54
+ Nested block with increased indentation:
55
+
56
+ ```ruby
57
+ rule(:indented_value) {
58
+ (newline >> indented_block.as(:block))
59
+ }
60
+
61
+ rule(:indented_block) {
62
+ (indent >> (mapping | list_item) >>
63
+ (newline >> indent >> (mapping | list_item)).repeat).as(:block)
64
+ }
65
+ ```
66
+
67
+ All items at same indent level belong to same block.
68
+
69
+ ### List Item Rule
70
+
71
+ Hyphen-prefixed values:
72
+
73
+ ```ruby
74
+ rule(:list_item) {
75
+ (str('-') >> space >>
76
+ (inline_list_value | indented_value)).as(:list_item)
77
+ }
78
+ ```
79
+
80
+ List items can contain scalars or nested structures.
81
+
82
+ ### String Rules
83
+
84
+ Quoted and plain strings:
85
+
86
+ ```ruby
87
+ rule(:quoted_string) {
88
+ (str('"') >>
89
+ (str('\\').ignore >> any | str('"').absent? >> any).repeat.as(:string) >>
90
+ str('"')) |
91
+ (str("'") >>
92
+ (str("'").absent? >> any).repeat.as(:string) >>
93
+ str("'"))
94
+ }
95
+
96
+ rule(:plain_string) {
97
+ (newline.absent? >> str(':').absent? >> any).repeat(1).as(:string)
98
+ }
99
+ ```
100
+
101
+ Quoted strings handle escapes; plain strings don't contain colons.
102
+
103
+ ### Scalar Types
104
+
105
+ Type detection by pattern:
106
+
107
+ ```ruby
108
+ rule(:integer) { (str('+') | str('-')).maybe >> match('[0-9]').repeat(1) }
109
+ rule(:float) { ... match('[0-9]').repeat(1) >> str('.') >> match('[0-9]').repeat(1) }
110
+ rule(:boolean) { str('true') | str('false') }
111
+ rule(:null) { str('null') | str('~') }
112
+ ```
113
+
114
+ Order matters: try float before integer to capture decimal point.
115
+
116
+ ## Output Types
117
+
118
+ ```ruby
119
+ # Document with mappings
120
+ YamlDocument.new([
121
+ YamlMapping.new("name", "Example"),
122
+ YamlMapping.new("database", { "host" => "localhost" })
123
+ ])
124
+
125
+ # to_h produces:
126
+ {
127
+ "name" => "Example",
128
+ "database" => { "host" => "localhost" }
129
+ }
130
+ ```
131
+
132
+ ## Design Decisions
133
+
134
+ ### Why Inline vs Indented Values?
135
+
136
+ YAML allows values on same line or nested. Different rules handle the distinct parsing requirements.
137
+
138
+ ### Why Separate String Types?
139
+
140
+ Quoted strings process escape sequences; plain strings are literal. Different semantics require different rules.
141
+
142
+ ### Why Indentation Tracking?
143
+
144
+ YAML uses indentation for structure. The parser tracks indent level to correctly nest blocks.
145
+
146
+ ### Why This Subset?
147
+
148
+ Full YAML is complex (anchors, tags, multiline strings). This subset covers common configuration use cases.
@@ -0,0 +1,4 @@
1
+ require "mkmf"
2
+ require "rb_sys/mkmf"
3
+
4
+ create_rust_makefile("parsanol/parsanol_native")
@@ -0,0 +1,62 @@
1
+
2
+ # @api private
3
+ module Parsanol::Accelerator
4
+ class Application
5
+ def initialize atom, rules
6
+ @atom = atom
7
+ @rules = rules
8
+ end
9
+
10
+ def call
11
+ @atom.accept(self)
12
+ end
13
+
14
+ def visit_parser(root)
15
+ transform root.accept(self)
16
+ end
17
+ def visit_entity(name, block)
18
+ transform Parsanol::Atoms::Entity.new(name) { block.call.accept(self) }
19
+ end
20
+ def visit_named(name, atom)
21
+ transform Parsanol::Atoms::Named.new(atom.accept(self), name)
22
+ end
23
+ def visit_repetition(tag, min, max, atom)
24
+ transform Parsanol::Atoms::Repetition.new(atom.accept(self), min, max, tag)
25
+ end
26
+ def visit_alternative(alternatives)
27
+ transform Parsanol::Atoms::Alternative.new(
28
+ *alternatives.map { |atom| atom.accept(self) })
29
+ end
30
+ def visit_sequence(sequence)
31
+ transform Parsanol::Atoms::Sequence.new(
32
+ *sequence.map { |atom| atom.accept(self) })
33
+ end
34
+ def visit_lookahead(positive, atom)
35
+ transform Parsanol::Atoms::Lookahead.new(atom, positive)
36
+ end
37
+ def visit_re(regexp)
38
+ transform Parsanol::Atoms::Re.new(regexp)
39
+ end
40
+ def visit_str(str)
41
+ transform Parsanol::Atoms::Str.new(str)
42
+ end
43
+
44
+ def transform atom
45
+ @rules.each do |expr, action|
46
+ # Try and match each rule in turn
47
+ binding = Parsanol::Accelerator.match(atom, expr)
48
+ if binding
49
+ # On a successful match, allow the rule action to transform the
50
+ # parslet into something new.
51
+ ctx = Parsanol::Context.new(binding)
52
+ return ctx.instance_eval(&action)
53
+ end
54
+ end # rules.each
55
+
56
+ # If no rule matches, this is the fallback - a clean new parslet atom.
57
+ return atom
58
+ end
59
+ end
60
+ end
61
+
62
+ require 'parsanol/context'
@@ -0,0 +1,112 @@
1
+
2
+ require 'parsanol/atoms/visitor'
3
+
4
+ module Parsanol::Accelerator
5
+ # @api private
6
+ class Apply
7
+ def initialize(engine, expr)
8
+ @engine = engine
9
+ @expr = expr
10
+ end
11
+
12
+ def visit_parser(root)
13
+ false
14
+ end
15
+ def visit_entity(name, block)
16
+ false
17
+ end
18
+ def visit_named(name, atom)
19
+ match(:as) do |key|
20
+ @engine.try_bind(key, name)
21
+ end
22
+ end
23
+ def visit_repetition(tag, min, max, atom)
24
+ match(:rep) do |e_min, e_max, expr|
25
+ e_min == min && e_max == max && @engine.match(atom, expr)
26
+ end
27
+ end
28
+ def visit_alternative(alternatives)
29
+ match(:alt) do |*expressions|
30
+ return false if alternatives.size != expressions.size
31
+
32
+ alternatives.zip(expressions).all? do |atom, expr|
33
+ @engine.match(atom, expr)
34
+ end
35
+ end
36
+ end
37
+ def visit_sequence(sequence)
38
+ match(:seq) do |*expressions|
39
+ return false if sequence.size != expressions.size
40
+
41
+ sequence.zip(expressions).all? do |atom, expr|
42
+ @engine.match(atom, expr)
43
+ end
44
+ end
45
+ end
46
+ def visit_lookahead(positive, atom)
47
+ match(:absent) do |expr|
48
+ return positive == false && @engine.match(atom, expr)
49
+ end
50
+ match(:present) do |expr|
51
+ return positive == true && @engine.match(atom, expr)
52
+ end
53
+ end
54
+ def visit_re(regexp)
55
+ match(:re) do |*bind_conditions|
56
+ bind_conditions.all? { |bind_cond|
57
+ @engine.try_bind(bind_cond, regexp) }
58
+ end
59
+ end
60
+ def visit_str(str)
61
+ match(:str) do |*bind_conditions|
62
+ bind_conditions.all? { |bind_cond|
63
+ @engine.try_bind(bind_cond, str) }
64
+ end
65
+ end
66
+
67
+ def match(type_tag)
68
+ expr_tag = @expr.type
69
+ if expr_tag == type_tag
70
+ yield *@expr.args
71
+ end
72
+ end
73
+ end
74
+
75
+ # @api private
76
+ class Engine
77
+ attr_reader :bindings
78
+
79
+ def initialize
80
+ @bindings = {}
81
+ end
82
+
83
+ def match(atom, expr)
84
+ atom.accept(
85
+ Apply.new(self, expr))
86
+ end
87
+
88
+ def try_bind(variable, value)
89
+ if bound? variable
90
+ return value == lookup(variable)
91
+ else
92
+ case variable
93
+ when Symbol
94
+ bind(variable, value)
95
+ else
96
+ # This does not look like a variable - let's try matching it against
97
+ # the value:
98
+ variable === value
99
+ end
100
+ end
101
+ end
102
+ def bound? var
103
+ @bindings.has_key? var
104
+ end
105
+ def lookup var
106
+ @bindings[var]
107
+ end
108
+ def bind var, val
109
+ @bindings[var] = val
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,162 @@
1
+
2
+
3
+ # Optimizes the parsers by pattern matching on the parser atoms and replacing
4
+ # matches with better versions. See the file qed/accelerators.md for a more
5
+ # in-depth description.
6
+ #
7
+ # Example:
8
+ # quote = str('"')
9
+ # parser = quote >> (quote.absent? >> any).repeat >> quote
10
+ #
11
+ # A = Accelerator # for making what follows a bit shorter
12
+ # optimized_parser = A.apply(parser,
13
+ # A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
14
+ #
15
+ # optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
16
+ #
17
+ module Parsanol::Accelerator
18
+
19
+ # An expression to match against a tree of parser atoms. Normally, an
20
+ # expression is produced by Parsanol::Accelerator.any,
21
+ # Parsanol::Accelerator.str or Parsanol::Accelerator.re.
22
+ #
23
+ # Expressions can be chained much like parslet atoms can be:
24
+ #
25
+ # expr.repeat(1) # matching repetition
26
+ # expr.absent? # matching absent?
27
+ # expr.present? # matching present?
28
+ # expr1 >> expr2 # matching a sequence
29
+ # expr1 | expr2 # matching an alternation
30
+ #
31
+ # @see Parsanol::Accelerator.str
32
+ # @see Parsanol::Accelerator.re
33
+ # @see Parsanol::Accelerator.any
34
+ #
35
+ # @see Parsanol::Accelerator
36
+ #
37
+ class Expression
38
+ attr_reader :type
39
+ attr_reader :args
40
+
41
+ def initialize(type, *args)
42
+ @type = type
43
+ @args = args
44
+ end
45
+
46
+ # @return [Expression]
47
+ def >> other_expr
48
+ join_or_new :seq, other_expr
49
+ end
50
+
51
+ # @return [Expression]
52
+ def | other_expr
53
+ join_or_new :alt, other_expr
54
+ end
55
+
56
+ # @return [Expression]
57
+ def absent?
58
+ Expression.new(:absent, self)
59
+ end
60
+ # @return [Expression]
61
+ def present?
62
+ Expression.new(:present, self)
63
+ end
64
+
65
+ # @return [Expression]
66
+ def repeat min=0, max=nil
67
+ Expression.new(:rep, min, max, self)
68
+ end
69
+
70
+ # @return [Expression]
71
+ def as name
72
+ Expression.new(:as, name)
73
+ end
74
+
75
+ # @api private
76
+ # @return [Expression]
77
+ def join_or_new tag, other_expr
78
+ if type == tag
79
+ @args << other_expr
80
+ self
81
+ else
82
+ Expression.new(tag, self, other_expr)
83
+ end
84
+ end
85
+ end
86
+
87
+ module_function
88
+ # Returns a match expression that will match `str` parslet atoms.
89
+ #
90
+ # @return [Parsanol::Accelerator::Expression]
91
+ #
92
+ def str variable, *constraints
93
+ Expression.new(:str, variable, *constraints)
94
+ end
95
+
96
+ # Returns a match expression that will match `match` parslet atoms.
97
+ #
98
+ # @return [Parsanol::Accelerator::Expression]
99
+ #
100
+ def re variable, *constraints
101
+ Expression.new(:re, variable, *constraints)
102
+ end
103
+
104
+ # Returns a match expression that will match `any` parslet atoms.
105
+ #
106
+ # @return [Parsanol::Accelerator::Expression]
107
+ #
108
+ def any
109
+ Expression.new(:re, ".")
110
+ end
111
+
112
+ # Given a parslet atom and an expression, will determine if the expression
113
+ # matches the atom. If successful, returns the bindings into the pattern
114
+ # that were made. If no bindings had to be made to make the match successful,
115
+ # the empty hash is returned.
116
+ #
117
+ # @param atom [Parsanol::Atoms::Base] parslet atom to match against
118
+ # @param expr [Parsanol::Accelerator::Expression] expression to match
119
+ # @return [nil, Hash] bindings for the match, nil on failure
120
+ #
121
+ def match atom, expr
122
+ engine = Engine.new
123
+
124
+ return engine.bindings if engine.match(atom, expr)
125
+ end
126
+
127
+ # Constructs an accelerator rule. A rule is a matching expression and the
128
+ # code that should be executed once the expression could be bound to a
129
+ # parser.
130
+ #
131
+ # Example:
132
+ # Accelerator.rule(Accelerator.any) { Parslet.match('.') }
133
+ #
134
+ def rule expression, &action
135
+ [expression, action]
136
+ end
137
+
138
+ # Given a parslet atom and a set of rules, tries to match the rules
139
+ # recursively through the parslet atom. Once a rule could be matched,
140
+ # its action block will be called.
141
+ #
142
+ # Example:
143
+ # quote = str('"')
144
+ # parser = quote >> (quote.absent? >> any).repeat >> quote
145
+ #
146
+ # A = Accelerator # for making what follows a bit shorter
147
+ # optimized_parser = A.apply(parser,
148
+ # A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
149
+ #
150
+ # optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
151
+ #
152
+ # @param atom [Parsanol::Atoms::Base] a parser to optimize
153
+ # @param *rules [Parsanol::Accelerator::Rule] rules produced by .rule
154
+ # @return [Parsanol::Atoms::Base] optimized parser
155
+ #
156
+ def apply atom, *rules
157
+ Application.new(atom, rules).call
158
+ end
159
+ end
160
+
161
+ require 'parsanol/accelerator/engine'
162
+ require 'parsanol/accelerator/application'