parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
data/lib/parsanol.rb ADDED
@@ -0,0 +1,408 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A simple parser generator library. Typical usage would look like this:
4
+ #
5
+ # require 'parslet'
6
+ #
7
+ # class MyParser < Parsanol::Parser
8
+ # rule(:a) { str('a').repeat }
9
+ # root(:a)
10
+ # end
11
+ #
12
+ # pp MyParser.new.parse('aaaa') # => 'aaaa'@0
13
+ # pp MyParser.new.parse('bbbb') # => Parsanol::Atoms::ParseFailed:
14
+ # # Don't know what to do with bbbb at line 1 char 1.
15
+ #
16
+ # The simple DSL allows you to define grammars in PEG-style. This kind of
17
+ # grammar construction does away with the ambiguities that usually comes with
18
+ # parsers; instead, it allows you to construct grammars that are easier to
19
+ # debug, since less magic is involved.
20
+ #
21
+ # Parslet is typically used in stages:
22
+ #
23
+ #
24
+ # * Parsing the input string; this yields an intermediary tree, see
25
+ # Parslet.any, Parslet.match, Parslet.str, Parsanol::ClassMethods#rule and
26
+ # Parsanol::ClassMethods#root.
27
+ # * Transformation of the tree into something useful to you, see
28
+ # Parsanol::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
29
+ #
30
+ # The first stage is traditionally intermingled with the second stage; output
31
+ # from the second stage is usually called the 'Abstract Syntax Tree' or AST.
32
+ #
33
+ # The stages are completely decoupled; You can change your grammar around and
34
+ # use the second stage to isolate the rest of your code from the changes
35
+ # you've effected.
36
+ #
37
+ # == Further reading
38
+ #
39
+ # All parslet atoms are subclasses of {Parsanol::Atoms::Base}. You might want to
40
+ # look at all of those: {Parsanol::Atoms::Re}, {Parsanol::Atoms::Str},
41
+ # {Parsanol::Atoms::Repetition}, {Parsanol::Atoms::Sequence},
42
+ # {Parsanol::Atoms::Alternative}.
43
+ #
44
+ # == When things go wrong
45
+ #
46
+ # A parse that fails will raise {Parsanol::ParseFailed}. This exception contains
47
+ # all the details of what went wrong, including a detailed error trace that
48
+ # can be printed out as an ascii tree. ({Parsanol::Cause})
49
+ #
50
+ module Parsanol
51
+ # Extends classes that include Parslet with the module
52
+ # {Parsanol::ClassMethods}.
53
+ #
54
+ def self.included(base)
55
+ base.extend(ClassMethods)
56
+ end
57
+
58
+ # Raised when the parse failed to match. It contains the message that should
59
+ # be presented to the user. More details can be extracted from the
60
+ # exceptions #parse_failure_cause member: It contains an instance of {Parsanol::Cause} that
61
+ # stores all the details of your failed parse in a tree structure.
62
+ #
63
+ # begin
64
+ # parslet.parse(str)
65
+ # rescue Parsanol::ParseFailed => failure
66
+ # puts failure.parse_failure_cause.ascii_tree
67
+ # end
68
+ #
69
+ # Alternatively, you can just require 'parsanol/convenience' and call the
70
+ # method #parse_with_debug instead of #parse. This method will never raise
71
+ # and print error trees to stdout.
72
+ #
73
+ # require 'parsanol/convenience'
74
+ # parslet.parse_with_debug(str)
75
+ #
76
+ class ParseFailed < StandardError
77
+ def initialize(message, parse_failure_cause=nil)
78
+ super(message)
79
+ @parse_failure_cause = parse_failure_cause
80
+ end
81
+
82
+ # Why the parse failed.
83
+ #
84
+ # @return [Parsanol::Cause]
85
+ attr_reader :parse_failure_cause
86
+ end
87
+
88
+ module ClassMethods
89
+ # Enable automatic rule optimization for all rules in this parser.
90
+ # This includes quantifier simplification, sequence optimization,
91
+ # choice optimization, and lookahead optimization.
92
+ #
93
+ # NOTE: As of v3.1.0, optimizations are DISABLED BY DEFAULT to avoid
94
+ # overhead on tiny/small inputs. Use optimize_rules! to opt-in for
95
+ # complex parsers (JSON, ERB, large grammars) where benefits outweigh costs.
96
+ #
97
+ # class SimpleParser < Parsanol::Parser
98
+ # # Optimizations disabled by default - good for tiny inputs
99
+ # rule(:fast) { str('a').repeat(1, 1) } # remains as repeat
100
+ # end
101
+ #
102
+ # class ComplexParser < Parsanol::Parser
103
+ # optimize_rules! # Opt-in for complex grammars
104
+ # rule(:optimized) { str('a').repeat(1, 1) } # becomes str('a')
105
+ # end
106
+ #
107
+ def optimize_rules!(enable = true)
108
+ @optimize_rules = enable
109
+ end
110
+
111
+ # Disable automatic rule optimization.
112
+ # Use this for compatibility with parsers that rely on specific
113
+ # unoptimized behavior or for debugging purposes.
114
+ #
115
+ # @example Disable optimization
116
+ # class MyParser < Parsanol::Parser
117
+ # disable_optimization!
118
+ # # rules will not be optimized
119
+ # end
120
+ #
121
+ def disable_optimization!
122
+ @optimize_rules = false
123
+ end
124
+
125
+ # Check if rule optimization is enabled.
126
+ # As of v3.1.0, defaults to FALSE for compatibility.
127
+ # Use optimize_rules! to opt-in for complex parsers that benefit.
128
+ #
129
+ # @return [Boolean] true if optimization enabled
130
+ def optimize_rules?
131
+ # Default to false (disabled) for compatibility
132
+ # Use optimize_rules! to opt-in for performance
133
+ @optimize_rules = false if @optimize_rules.nil?
134
+ @optimize_rules
135
+ end
136
+
137
+ # Define an entity for the parser. This generates a method of the same
138
+ # name that can be used as part of other patterns. Those methods can be
139
+ # freely mixed in your parser class with real ruby methods.
140
+ #
141
+ # class MyParser
142
+ # include Parslet
143
+ #
144
+ # rule(:bar) { str('bar') }
145
+ # rule(:twobar) do
146
+ # bar >> bar
147
+ # end
148
+ #
149
+ # root :twobar
150
+ # end
151
+ #
152
+ # To enable automatic quantifier simplification:
153
+ #
154
+ # class OptimizedParser
155
+ # include Parslet
156
+ # optimize_rules!
157
+ #
158
+ # rule(:bar) { str('a').repeat(1, 1) } # becomes str('a')
159
+ # end
160
+ #
161
+ def rule(name, opts={}, &definition)
162
+ undef_method name if method_defined? name
163
+ define_method(name) do
164
+ @rules ||= {} # <name, rule> memoization
165
+ return @rules[name] if @rules.has_key?(name)
166
+
167
+ # Capture the self of the parser class along with the definition.
168
+ definition_closure = proc {
169
+ result = self.instance_eval(&definition)
170
+
171
+ # Apply optimizations if enabled (only for classes that support it)
172
+ if self.class.respond_to?(:optimize_rules?) && self.class.optimize_rules?
173
+ # Apply all optimizers: quantifiers, sequences, choices, and lookaheads
174
+ result = Parsanol::Optimizer.simplify_quantifiers(result)
175
+ result = Parsanol::Optimizer.simplify_sequences(result)
176
+ result = Parsanol::Optimizer.simplify_choices(result)
177
+ result = Parsanol::Optimizer.simplify_lookaheads(result)
178
+ end
179
+
180
+ result
181
+ }
182
+
183
+ @rules[name] = Atoms::Entity.new(name, opts[:label], &definition_closure)
184
+ end
185
+ end
186
+ end
187
+
188
+ # Allows for delayed construction of #match. See also Parslet.match.
189
+ #
190
+ # @api private
191
+ class DelayedMatchConstructor
192
+ def [](str)
193
+ Atoms::Re.new("[" + str + "]")
194
+ end
195
+ end
196
+
197
+ # Returns an atom matching a character class. All regular expressions can be
198
+ # used, as long as they match only a single character at a time.
199
+ #
200
+ # match('[ab]') # will match either 'a' or 'b'
201
+ # match('[\n\s]') # will match newlines and spaces
202
+ #
203
+ # There is also another (convenience) form of this method:
204
+ #
205
+ # match['a-z'] # synonymous to match('[a-z]')
206
+ # match['\n'] # synonymous to match('[\n]')
207
+ #
208
+ # @overload match(str)
209
+ # @param str [String] character class to match (regexp syntax)
210
+ # @return [Parsanol::Atoms::Re] a parslet atom
211
+ #
212
+ def match(str=nil)
213
+ return DelayedMatchConstructor.new unless str
214
+
215
+ return Atoms::Re.new(str)
216
+ end
217
+ module_function :match
218
+
219
+ # Returns an atom matching the +str+ given:
220
+ #
221
+ # str('class') # will match 'class'
222
+ #
223
+ # @param str [String] string to match verbatim
224
+ # @return [Parsanol::Atoms::Str] a parslet atom
225
+ #
226
+ def str(str)
227
+ Atoms::Str.new(str)
228
+ end
229
+ module_function :str
230
+
231
+ # Returns an atom matching any character. It acts like the '.' (dot)
232
+ # character in regular expressions.
233
+ #
234
+ # any.parse('a') # => 'a'
235
+ #
236
+ # @return [Parsanol::Atoms::Re] a parslet atom
237
+ #
238
+ def any
239
+ Atoms::Re.new('.')
240
+ end
241
+ module_function :any
242
+
243
+ # Introduces a new capture scope. This means that all old captures stay
244
+ # accessible, but new values stored will only be available during the block
245
+ # given and the old values will be restored after the block.
246
+ #
247
+ # Example:
248
+ # # :a will be available until the end of the block. Afterwards,
249
+ # # :a from the outer scope will be available again, if such a thing
250
+ # # exists.
251
+ # scope { str('a').capture(:a) }
252
+ #
253
+ def scope(&block)
254
+ Parsanol::Atoms::Scope.new(block)
255
+ end
256
+ module_function :scope
257
+
258
+ # Designates a piece of the parser as being dynamic. Dynamic parsers can
259
+ # either return a parser at runtime, which will be applied on the input, or
260
+ # return a result from a parse.
261
+ #
262
+ # Dynamic parse pieces are never cached and can introduce performance
263
+ # abnormalitites - use sparingly where other constructs fail.
264
+ #
265
+ # Example:
266
+ # # Parses either 'a' or 'b', depending on the weather
267
+ # dynamic { rand() < 0.5 ? str('a') : str('b') }
268
+ #
269
+ def dynamic(&block)
270
+ Parsanol::Atoms::Dynamic.new(block)
271
+ end
272
+ module_function :dynamic
273
+
274
+ # Returns a parslet atom that parses infix expressions. Operations are
275
+ # specified as a list of <atom, precedence, associativity> tuples, where
276
+ # atom is simply the parslet atom that matches an operator, precedence is
277
+ # a number and associativity is either :left or :right.
278
+ #
279
+ # Higher precedence indicates that the operation should bind tighter than
280
+ # other operations with lower precedence. In common algebra, '+' has
281
+ # lower precedence than '*'. So you would have a precedence of 1 for '+' and
282
+ # a precedence of 2 for '*'. Only the order relation between these two
283
+ # counts, so any number would work.
284
+ #
285
+ # Associativity is what decides what interpretation to take for strings that
286
+ # are ambiguous like '1 + 2 + 3'. If '+' is specified as left associative,
287
+ # the expression would be interpreted as '(1 + 2) + 3'. If right
288
+ # associativity is chosen, it would be interpreted as '1 + (2 + 3)'. Note
289
+ # that the hash trees output reflect that choice as well.
290
+ #
291
+ # An optional block can be provided in order to manipulate the generated tree.
292
+ # The block will be called on each operator and passed 3 arguments: the left
293
+ # operand, the operator, and the right operand.
294
+ #
295
+ # Examples:
296
+ # infix_expression(integer, [add_op, 1, :left])
297
+ # # would parse things like '1 + 2'
298
+ #
299
+ # infix_expression(integer, [add_op, 1, :left]) { |l,o,r| { :plus => [l, r] } }
300
+ # # would parse '1 + 2 + 3' as:
301
+ # # { :plus => [1, { :plus => [2, 3] }] }
302
+ #
303
+ # @param element [Parsanol::Atoms::Base] elements that take the NUMBER position
304
+ # in the expression
305
+ # @param operations [Array<(Parsanol::Atoms::Base, Integer, {:left, :right})>]
306
+ #
307
+ # @see Parsanol::Atoms::Infix
308
+ #
309
+ def infix_expression(element, *operations, &reducer)
310
+ Parsanol::Atoms::Infix.new(element, operations, &reducer)
311
+ end
312
+ module_function :infix_expression
313
+
314
+ # A special kind of atom that allows embedding whole treetop expressions
315
+ # into parslet construction.
316
+ #
317
+ # # the same as str('a') >> str('b').maybe
318
+ # exp(%Q("a" "b"?))
319
+ #
320
+ # @param str [String] a treetop expression
321
+ # @return [Parsanol::Atoms::Base] the corresponding parslet parser
322
+ #
323
+ def exp(str)
324
+ Parsanol::Expression.new(str).to_parslet
325
+ end
326
+ module_function :exp
327
+
328
+ # Returns a placeholder for a tree transformation that will only match a
329
+ # sequence of elements. The +symbol+ you specify will be the key for the
330
+ # matched sequence in the returned dictionary.
331
+ #
332
+ # # This would match a body element that contains several declarations.
333
+ # { :body => sequence(:declarations) }
334
+ #
335
+ # The above example would match <code>:body => ['a', 'b']</code>, but not
336
+ # <code>:body => 'a'</code>.
337
+ #
338
+ # see {Parsanol::Transform}
339
+ #
340
+ def sequence(symbol)
341
+ Pattern::SequenceBind.new(symbol)
342
+ end
343
+ module_function :sequence
344
+
345
+ # Returns a placeholder for a tree transformation that will only match
346
+ # simple elements. This matches everything that <code>#sequence</code>
347
+ # doesn't match.
348
+ #
349
+ # # Matches a single header.
350
+ # { :header => simple(:header) }
351
+ #
352
+ # see {Parsanol::Transform}
353
+ #
354
+ def simple(symbol)
355
+ Pattern::SimpleBind.new(symbol)
356
+ end
357
+ module_function :simple
358
+
359
+ # Returns a placeholder for tree transformation patterns that will match
360
+ # any kind of subtree.
361
+ #
362
+ # { :expression => subtree(:exp) }
363
+ #
364
+ def subtree(symbol)
365
+ Pattern::SubtreeBind.new(symbol)
366
+ end
367
+ module_function :subtree
368
+
369
+ autoload :Expression, 'parsanol/expression'
370
+ end
371
+
372
+ require 'parsanol/version'
373
+ require 'parsanol/result'
374
+ require 'parsanol/slice'
375
+ require 'parsanol/string_view'
376
+ require 'parsanol/rope'
377
+ require 'parsanol/pool'
378
+ require 'parsanol/pools/slice_pool'
379
+ require 'parsanol/pools/array_pool'
380
+ require 'parsanol/pools/position_pool'
381
+ require 'parsanol/buffer'
382
+ require 'parsanol/pools/buffer_pool'
383
+ require 'parsanol/lazy_result'
384
+ require 'parsanol/result_builder'
385
+ require 'parsanol/first_set'
386
+ require 'parsanol/cause'
387
+ require 'parsanol/source'
388
+ require 'parsanol/atoms'
389
+ require 'parsanol/pattern'
390
+ require 'parsanol/pattern/binding'
391
+ require 'parsanol/transform'
392
+ require 'parsanol/parser'
393
+ require 'parsanol/error_reporter'
394
+ require 'parsanol/scope'
395
+ require 'parsanol/optimizer'
396
+ require 'parsanol/options'
397
+ require 'parsanol/native'
398
+
399
+ # New features (require native extension for full functionality)
400
+ require 'parsanol/source_location'
401
+ require 'parsanol/grammar_builder'
402
+ require 'parsanol/streaming_parser'
403
+ require 'parsanol/incremental_parser'
404
+ require 'parsanol/builder_callbacks'
405
+ require 'parsanol/parallel'
406
+
407
+ # Add GrammarBuilder DSL to Parsanol module
408
+ Parsanol.extend(Parsanol::GrammarBuilderDSL)
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/parsanol/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'parsanol'
7
+ spec.version = Parsanol::VERSION
8
+ spec.platform = Gem::Platform::RUBY
9
+
10
+ spec.authors = ['Ribose Inc.']
11
+ spec.email = ['open.source@ribose.com']
12
+
13
+ spec.summary = 'Parser construction library with great error reporting in Ruby.'
14
+ spec.description = 'A small Ruby library for constructing parsers in the PEG (Parsing Expression Grammar) fashion. ' \
15
+ 'Parsanol provides Parslet-compatible API with additional features including ' \
16
+ 'static frozen parsers and dynamic parsers, with optional Rust native extension for improved performance.'
17
+ spec.homepage = 'https://github.com/parsanol/parsanol-ruby'
18
+ spec.license = 'MIT'
19
+
20
+ spec.metadata = {
21
+ 'bug_tracker_uri' => 'https://github.com/parsanol/parsanol-ruby/issues',
22
+ 'changelog_uri' => 'https://github.com/parsanol/parsanol-ruby/blob/main/HISTORY.txt',
23
+ 'documentation_uri' => 'https://parsanol.github.io/parsanol-ruby/',
24
+ 'homepage_uri' => 'https://github.com/parsanol/parsanol-ruby',
25
+ 'source_code_uri' => 'https://github.com/parsanol/parsanol-ruby',
26
+ 'rubygems_mfa_required' => 'true',
27
+ }
28
+
29
+ # Rust extension
30
+ spec.extensions = ['ext/parsanol_native/extconf.rb']
31
+
32
+ spec.files = Dir.glob('{lib,spec,example}/**/*') + %w[
33
+ HISTORY.txt
34
+ LICENSE
35
+ Rakefile
36
+ README.adoc
37
+ parsanol-ruby.gemspec
38
+ ]
39
+ spec.require_paths = ['lib']
40
+
41
+ spec.required_ruby_version = '>= 2.7.0'
42
+
43
+ # Required for Rust extension
44
+ spec.add_dependency 'rb_sys', '~> 0.9.39'
45
+
46
+ spec.add_development_dependency 'rake', '~> 13.0'
47
+ spec.add_development_dependency 'rake-compiler', '~> 1.2.0'
48
+ spec.add_development_dependency 'rdoc', '~> 6.0'
49
+ spec.add_development_dependency 'rspec', '~> 3.0'
50
+
51
+ # For Parslet compatibility verification
52
+ spec.add_development_dependency 'parslet', '~> 2.0.0'
53
+
54
+ # For benchmarking
55
+ spec.add_development_dependency 'benchmark-ips', '~> 2.0'
56
+ end
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'open3'
3
+
4
+ # Strip positions from the output, so that we can compare it with the expected output.
5
+ # (Some other specs utilize the inspect of inner objects outside example/*.rb expections)
6
+ # The behavior of comparison of #inspect as done in parselet behaves differently
7
+ # in Opal and MRI.
8
+
9
+ describe 'Regression on' do
10
+ Dir['example/*.rb'].each do |example|
11
+ context example do
12
+ # Generates a product path for a given example file.
13
+ def product_path(str, ext)
14
+ str
15
+ .gsub('.rb', ".#{ext}")
16
+ .gsub('example/', 'example/output/')
17
+ end
18
+
19
+ it 'runs successfully' do
20
+ # Skip if output files don't exist (new examples may not have expected outputs yet)
21
+ unless Dir["example/output/#{File.basename(example, '.rb')}.*"].any?
22
+ skip "No output file found for #{example}"
23
+ end
24
+
25
+ # Skip examples that have missing dependencies
26
+ skip_missing_deps = %w{
27
+ example/optimized_erb.rb
28
+ }
29
+ if skip_missing_deps.include?(example)
30
+ skip "Missing dependencies"
31
+ end
32
+
33
+ if RUBY_ENGINE == 'opal'
34
+
35
+ skip_examples = %w{
36
+ example/calc.rb
37
+ example/empty.rb
38
+ example/erb.rb
39
+ example/ip_address.rb
40
+ example/mathn.rb
41
+ example/nested_errors.rb
42
+ example/optimized_erb.rb
43
+ example/prec_calc.rb
44
+ }
45
+ if skip_examples.include?(example)
46
+ skip "Opal does not support #{example} yet"
47
+ end
48
+
49
+ begin
50
+ system("opal -srubygems -ropal-parser -rnodejs -Ilib -I. #{example} >_stdout 2>_stderr")
51
+
52
+ handle_map = {
53
+ '_stdout' => :out,
54
+ '_stderr' => :err,
55
+ }
56
+ expectation_found = handle_map.any? do |io, ext|
57
+ name = product_path(example, ext)
58
+
59
+ if File.exist?(name)
60
+ actual_output = File.read(io).strip
61
+ expected_output = File.read(name).strip.gsub(/:(\w+)(=>|,|\]|\})/, '"\1"\2').gsub('1.0e+23', '1e+23').gsub(/@\d+/, '').strip
62
+ expect(strip_positions(actual_output)).to eq(strip_positions(expected_output))
63
+ true
64
+ end
65
+ end
66
+ ensure
67
+ File.unlink('_stdout')
68
+ File.unlink('_stderr')
69
+ end
70
+ else
71
+ _, stdout, stderr = Open3.popen3("ruby #{example}")
72
+
73
+ handle_map = {
74
+ stdout => :out,
75
+ stderr => :err,
76
+ }
77
+ expectation_found = handle_map.any? do |io, ext|
78
+ name = product_path(example, ext)
79
+
80
+ if File.exist?(name)
81
+ actual_output = io.read.strip
82
+ expected_output = File.read(name).strip
83
+ expect(actual_output).to eq(expected_output)
84
+ true
85
+ end
86
+ end
87
+ end
88
+
89
+ unless expectation_found
90
+ raise "Example doesn't have either an .err or an .out file. " +
91
+ 'Please create in examples/output!'
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,145 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Infix expression parsing' do
4
+ class InfixExpressionParser < Parsanol::Parser
5
+ rule(:space) { match['\s'] }
6
+
7
+ def cts(atom)
8
+ atom >> space.repeat
9
+ end
10
+
11
+ def infix(*args)
12
+ Infix.new(*args)
13
+ end
14
+
15
+ rule(:mul_op) { match['*/'] >> str(' ').maybe }
16
+ rule(:add_op) { match['+-'] >> str(' ').maybe }
17
+ rule(:digit) { match['0-9'] }
18
+ rule(:integer) { cts digit.repeat(1) }
19
+
20
+ rule(:expression) do
21
+ infix_expression(integer,
22
+ [mul_op, 2, :left],
23
+ [add_op, 1, :right])
24
+ end
25
+ end
26
+
27
+ let(:p) { InfixExpressionParser.new }
28
+
29
+ describe '#integer' do
30
+ let(:i) { p.integer }
31
+
32
+ it 'parses integers' do
33
+ i.should parse('1')
34
+ i.should parse('123')
35
+ end
36
+
37
+ it 'consumes trailing white space' do
38
+ i.should parse('1 ')
39
+ i.should parse('134 ')
40
+ end
41
+
42
+ it "doesn't parse floats" do
43
+ i.should_not parse('1.3')
44
+ end
45
+ end
46
+
47
+ describe '#multiplication' do
48
+ let(:m) { p.expression }
49
+
50
+ it 'parses simple multiplication' do
51
+ m.should parse('1*2').as(l: '1', o: '*', r: '2')
52
+ end
53
+
54
+ it 'parses simple multiplication with spaces' do
55
+ m.should parse('1 * 2').as(l: '1 ', o: '* ', r: '2')
56
+ end
57
+
58
+ it 'parses division' do
59
+ m.should parse('1/2')
60
+ end
61
+ end
62
+
63
+ describe '#addition' do
64
+ let(:a) { p.expression }
65
+
66
+ it 'parses simple addition' do
67
+ a.should parse('1+2')
68
+ end
69
+
70
+ it 'parses complex addition' do
71
+ a.should parse('1+2+3-4')
72
+ end
73
+
74
+ it 'parses a single element' do
75
+ a.should parse('1')
76
+ end
77
+ end
78
+
79
+ describe 'mixed operations' do
80
+ let(:mo) { p.expression }
81
+
82
+ describe 'inspection' do
83
+ it 'produces useful expressions' do
84
+ p.expression.parslet.inspect.should ==
85
+ 'infix_expression(INTEGER, [MUL_OP, ADD_OP])'
86
+ end
87
+ end
88
+
89
+ describe 'right associativity' do
90
+ it 'produces trees that lean right' do
91
+ mo.should parse('1+2+3').as(
92
+ l: '1', o: '+', r: { l: '2', o: '+', r: '3' },
93
+ )
94
+ end
95
+ end
96
+
97
+ describe 'left associativity' do
98
+ it 'produces trees that lean left' do
99
+ mo.should parse('1*2*3').as(
100
+ l: { l: '1', o: '*', r: '2' }, o: '*', r: '3',
101
+ )
102
+ end
103
+ end
104
+
105
+ describe 'error handling' do
106
+ describe 'incomplete expression' do
107
+ it 'produces the right error' do
108
+ cause = catch_failed_parse do
109
+ mo.parse('1+')
110
+ end
111
+
112
+ cause.ascii_tree.to_s.should == <<~ERROR
113
+ INTEGER was expected at line 1 char 3.
114
+ `- Failed to match sequence (DIGIT{1, } SPACE{0, }) at line 1 char 3.
115
+ `- Expected at least 1 of DIGIT at line 1 char 3.
116
+ `- Premature end of input at line 1 char 3.
117
+ ERROR
118
+ end
119
+ end
120
+
121
+ describe 'invalid operator' do
122
+ it 'produces the right error' do
123
+ cause = catch_failed_parse do
124
+ mo.parse('1%')
125
+ end
126
+
127
+ cause.ascii_tree.to_s.should == <<~ERROR
128
+ Don't know what to do with "%" at line 1 char 2.
129
+ ERROR
130
+ end
131
+ end
132
+ end
133
+ end
134
+
135
+ describe 'providing a reducer block' do
136
+ class InfixExpressionReducerParser < Parsanol::Parser
137
+ rule(:top) { infix_expression(str('a'), [str('-'), 1, :right]) { |l, _o, r| { and: [l, r] } } }
138
+ end
139
+
140
+ it 'applies the reducer' do
141
+ result = InfixExpressionReducerParser.new.top.parse('a-a-a')
142
+ strip_positions(result).should == { and: ['a', { and: %w[a a] }] }
143
+ end
144
+ end
145
+ end