parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module ErrorReporter
5
+
6
+ # A reporter that tries to improve on the deepest error reporter by
7
+ # using heuristics to find the most relevant error and provide more
8
+ # context.
9
+ # The heuristic chooses the deepest error when parsing a sequence for which
10
+ # no alternative parsed successfully.
11
+ #
12
+ # Given the following parser:
13
+ #
14
+ # root(:call)
15
+ #
16
+ # rule(:call, label: 'call') {
17
+ # identifier >> str('.') >> method
18
+ # }
19
+ #
20
+ # rule(:method, label: 'method call') {
21
+ # identifier >> str('(') >> arguments.maybe >> str(')')
22
+ # }
23
+ #
24
+ # rule(:identifier, label: 'identifier') {
25
+ # match['[:alnum:]'].repeat(1)
26
+ # }
27
+ #
28
+ # rule(:arguments, label: 'method call arguments') {
29
+ # argument >> str(',') >> arguments | argument
30
+ # }
31
+ #
32
+ # rule(:argument) {
33
+ # call | identifier
34
+ # }
35
+ #
36
+ # and the following source:
37
+ #
38
+ # foo.bar(a,goo.baz(),c,)
39
+ #
40
+ # The contextual reporter returns the following causes:
41
+ #
42
+ # 0: Failed to match sequence (identifier '.' method call) at line 1 char 5
43
+ # when parsing method call arguments.
44
+ # 1: Failed to match sequence (identifier '(' method call arguments? ')') at
45
+ # line 1 char 22 when parsing method call arguments.
46
+ # 2: Failed to match [[:alnum:]] at line 1 char 23 when parsing method call
47
+ # arguments.
48
+ #
49
+ # (where 2 is a child cause of 1 and 1 a child cause of 0)
50
+ #
51
+ # The last piece used by the reporter is the (newly introduced) ability
52
+ # to attach a label to rules that describe a sequence in the grammar. The
53
+ # labels are used in two places:
54
+ # - In the "to_s" of Atom::Base so that any error message uses labels to
55
+ # refer to atoms
56
+ # - In the cause error messages to give information about which expression
57
+ # failed to parse
58
+ #
59
+ class Contextual < Deepest
60
+
61
+ def initialize
62
+ @last_reset_pos = 0
63
+ reset
64
+ end
65
+
66
+ # A sequence expression successfully parsed, reset all errors reported
67
+ # for previous expressions in the sequence (an alternative matched)
68
+ # Only reset errors if the position of the source that matched is higher
69
+ # than the position of the source that was last successful (so we keep
70
+ # errors that are the "deepest" but for which no alternative succeeded)
71
+ #
72
+ def succ(source)
73
+ source_pos = source.pos.bytepos
74
+ return if source_pos < @last_reset_pos
75
+ @last_reset_pos = source_pos
76
+ reset
77
+ end
78
+
79
+ # Reset deepest error and its position and sequence index
80
+ #
81
+ def reset
82
+ @deepest_cause = nil
83
+ @label_pos = -1
84
+ end
85
+
86
+ # Produces an error cause that combines the message at the current level
87
+ # with the errors that happened at a level below (children).
88
+ # Compute and set label used by Cause to produce error message.
89
+ #
90
+ # @param atom [Parsanol::Atoms::Base] parslet that failed
91
+ # @param source [Source] Source that we're using for this parse. (line
92
+ # number information...)
93
+ # @param message [String, Array] Error message at this level.
94
+ # @param children [Array] A list of errors from a deeper level (or nil).
95
+ # @return [Cause] An error tree combining children with message.
96
+ #
97
+ def err(atom, source, message, children=nil)
98
+ cause = super(atom, source, message, children)
99
+ if (label = atom.respond_to?(:label) && atom.label)
100
+ update_label(label, source.pos.bytepos)
101
+ cause.set_label(@label)
102
+ end
103
+ cause
104
+ end
105
+
106
+ # Update error message label if given label is more relevant.
107
+ # A label is more relevant if the position of the matched source is
108
+ # bigger.
109
+ #
110
+ # @param label [String] label to apply if more relevant
111
+ # @param bytepos [Integer] position in source code of matched source
112
+ #
113
+ def update_label(label, bytepos)
114
+ if bytepos >= @label_pos
115
+ @label_pos = bytepos
116
+ @label = label
117
+ end
118
+ end
119
+
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module ErrorReporter
5
+ # Instead of reporting the latest error that happens like {Tree} does,
6
+ # this class reports the deepest error. Depth is defined here as how
7
+ # advanced into the input an error happens. The errors close to the
8
+ # greatest depth tend to be more relevant to the end user, since they
9
+ # specify what could be done to make them go away.
10
+ #
11
+ # More specifically, errors produced by this reporter won't be related to
12
+ # the structure of the grammar at all. The positions of the errors will
13
+ # be advanced and convey at every grammar level what the deepest rule
14
+ # was to fail.
15
+ #
16
+ # @example Using Deepest reporter
17
+ # parser.parse(input, reporter: Parsanol::ErrorReporter::Deepest.new)
18
+ #
19
+ class Deepest < Base
20
+ def initialize
21
+ @deepest_cause = nil
22
+ end
23
+
24
+ # Produces an error cause that combines the message at the current level
25
+ # with the errors that happened at a level below (children).
26
+ #
27
+ # @param atom [Parsanol::Atoms::Base] parslet that failed
28
+ # @param source [Source] Source that we're using for this parse. (line
29
+ # number information...)
30
+ # @param message [String, Array] Error message at this level.
31
+ # @param children [Array] A list of errors from a deeper level (or nil).
32
+ # @return [Cause] An error tree combining children with message.
33
+ #
34
+ def err(atom, source, message, children = nil)
35
+ position = source.pos
36
+ cause = Cause.format(source, position, message, children)
37
+ return deepest(cause)
38
+ end
39
+
40
+ # Produces an error cause that combines the message at the current level
41
+ # with the errors that happened at a level below (children).
42
+ #
43
+ # @param atom [Parsanol::Atoms::Base] parslet that failed
44
+ # @param source [Source] Source that we're using for this parse. (line
45
+ # number information...)
46
+ # @param message [String, Array] Error message at this level.
47
+ # @param pos [Fixnum] The real position of the error.
48
+ # @param children [Array] A list of errors from a deeper level (or nil).
49
+ # @return [Cause] An error tree combining children with message.
50
+ #
51
+ def err_at(atom, source, message, pos, children = nil)
52
+ position = pos
53
+ cause = Cause.format(source, position, message, children)
54
+ return deepest(cause)
55
+ end
56
+
57
+ # Returns the cause that is currently deepest. Mainly for specs.
58
+
59
+ # Notification that an expression successfully parsed
60
+ # not used, see ErrorReporter::Contextual
61
+ def succ(source)
62
+ # No-op for Deepest reporter
63
+ end
64
+ #
65
+ attr_reader :deepest_cause
66
+
67
+ # Checks to see if the lineage of the cause given includes a cause with
68
+ # an error position deeper than the current deepest cause stored. If
69
+ # yes, it passes the cause through to the caller. If no, it returns the
70
+ # current deepest error that was saved as a reference.
71
+ #
72
+ def deepest(cause)
73
+ _, leaf = deepest_child(cause)
74
+
75
+ if !deepest_cause || leaf.pos >= deepest_cause.pos
76
+ # This error reaches deeper into the input, save it as reference.
77
+ @deepest_cause = leaf
78
+ return cause
79
+ end
80
+
81
+ return deepest_cause
82
+ end
83
+
84
+ private
85
+ # Returns the leaf from a given error tree with the biggest rank.
86
+ #
87
+ def deepest_child(cause, rank = 0)
88
+ max_child = cause
89
+ max_rank = rank
90
+
91
+ if cause.children && !cause.children.empty?
92
+ cause.children.each do |child|
93
+ c_rank, c_cause = deepest_child(child, rank + 1)
94
+
95
+ if c_rank > max_rank
96
+ max_rank = c_rank
97
+ max_child = c_cause
98
+ end
99
+ end
100
+ end
101
+
102
+ return max_rank, max_child
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module ErrorReporter
5
+ # An error reporter has two central methods, one for reporting errors at
6
+ # the current parse position (#err) and one for reporting errors at a
7
+ # given parse position (#err_at). The reporter can return an object (a
8
+ # 'cause') that will be returned to the caller along with the information
9
+ # that the parse failed.
10
+ #
11
+ # When reporting errors on the outer levels of your parser, these methods
12
+ # get passed a list of error objects ('causes') from the inner levels. In
13
+ # this default implementation, the inner levels are considered error
14
+ # subtrees and are appended to the generated tree node at each level,
15
+ # thereby constructing an error tree.
16
+ #
17
+ # This error tree will report in parallel with the grammar structure that
18
+ # failed. A one-to-one correspondence exists between each error in the
19
+ # tree and the parslet atom that produced that error.
20
+ #
21
+ # The implementor is really free to use these return values as he sees
22
+ # fit. One example would be to return an error state object from these
23
+ # methods that is then updated as errors cascade up the parse derivation
24
+ # tree.
25
+ #
26
+ # @example Using Tree reporter
27
+ # parser.parse(input, reporter: Parsanol::ErrorReporter::Tree.new)
28
+ #
29
+ class Tree < Base
30
+ # Produces an error cause that combines the message at the current level
31
+ # with the errors that happened at a level below (children).
32
+ #
33
+ # @param atom [Parsanol::Atoms::Base] parslet that failed
34
+ # @param source [Source] Source that we're using for this parse. (line
35
+ # number information...)
36
+ # @param message [String, Array] Error message at this level.
37
+ # @param children [Array] A list of errors from a deeper level (or nil).
38
+ # @return [Cause] An error tree combining children with message.
39
+ #
40
+ def err(atom, source, message, children = nil)
41
+ position = source.pos
42
+ Cause.format(source, position, message, children)
43
+ end
44
+
45
+ # Produces an error cause that combines the message at the current level
46
+ # with the errors that happened at a level below (children).
47
+ #
48
+ # @param atom [Parsanol::Atoms::Base] parslet that failed
49
+ # @param source [Source] Source that we're using for this parse. (line
50
+ # number information...)
51
+ # @param message [String, Array] Error message at this level.
52
+ # @param pos [Fixnum] The real position of the error.
53
+ # @param children [Array] A list of errors from a deeper level (or nil).
54
+ # @return [Cause] An error tree combining children with message.
55
+ #
56
+ def err_at(atom, source, message, pos, children = nil)
57
+ position = pos
58
+ Cause.format(source, position, message, children)
59
+ end
60
+
61
+ # Notification that an expression successfully parsed
62
+ # not used, see ErrorReporter::Contextual
63
+ def succ(source)
64
+ # No-op for Tree reporter
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A namespace for all error reporters.
4
+ #
5
+ # Error reporters collect and format parse errors. The parsing engine
6
+ # calls reporter methods as it attempts to match atoms, building up
7
+ # an error structure that can be presented to the user.
8
+ #
9
+ # @example Using a specific error reporter
10
+ # parser = MyParser.new
11
+ # parser.parse(input, reporter: Parsanol::ErrorReporter::Deepest.new)
12
+ #
13
+ # @example Creating a custom error reporter
14
+ # class MyReporter < Parsanol::ErrorReporter::Base
15
+ # def initialize
16
+ # @errors = []
17
+ # end
18
+ #
19
+ # def err(atom, source, message, children = nil)
20
+ # @errors << { position: source.pos, message: message }
21
+ # @errors.last
22
+ # end
23
+ #
24
+ # def err_at(atom, source, message, pos, children = nil)
25
+ # @errors << { position: pos, message: message }
26
+ # @errors.last
27
+ # end
28
+ # end
29
+ #
30
+ module Parsanol::ErrorReporter
31
+ # Base class for error reporters.
32
+ #
33
+ # Error reporters collect and format parse errors. The parsing engine
34
+ # calls reporter methods as it attempts to match atoms, building up
35
+ # an error structure that can be presented to the user.
36
+ #
37
+ # Subclasses must implement {#err} and {#err_at} methods.
38
+ #
39
+ class Base
40
+ # Report an error at the current parse position.
41
+ #
42
+ # @param atom [Parsanol::Atoms::Base] The atom that failed to match
43
+ # @param source [Parsanol::Source] The input source
44
+ # @param message [String, Array<String>] Error message(s)
45
+ # @param children [Array<Cause>, nil] Child errors from deeper levels
46
+ # @return [Object] An error cause object (implementation-specific)
47
+ #
48
+ # @abstract Subclasses must implement this method
49
+ #
50
+ def err(atom, source, message, children = nil)
51
+ raise NotImplementedError,
52
+ "Error reporters must implement #err(atom, source, message, children)"
53
+ end
54
+
55
+ # Report an error at a specific position.
56
+ #
57
+ # @param atom [Parsanol::Atoms::Base] The atom that failed to match
58
+ # @param source [Parsanol::Source] The input source
59
+ # @param message [String, Array<String>] Error message(s)
60
+ # @param pos [Integer] The byte position of the error
61
+ # @param children [Array<Cause>, nil] Child errors from deeper levels
62
+ # @return [Object] An error cause object (implementation-specific)
63
+ #
64
+ # @abstract Subclasses must implement this method
65
+ #
66
+ def err_at(atom, source, message, pos, children = nil)
67
+ raise NotImplementedError,
68
+ "Error reporters must implement #err_at(atom, source, message, pos, children)"
69
+ end
70
+
71
+ # Called when an expression successfully parses.
72
+ #
73
+ # This method allows reporters to track successful parses for
74
+ # better error context. The default implementation does nothing.
75
+ #
76
+ # @param source [Parsanol::Source] The input source at success position
77
+ # @return [void]
78
+ #
79
+ def succ(source)
80
+ # Default: no-op
81
+ end
82
+
83
+ # Called after parse completes for finalization.
84
+ #
85
+ # Override this method to perform cleanup or generate final reports.
86
+ # The default implementation does nothing.
87
+ #
88
+ # @return [void]
89
+ #
90
+ def finalize
91
+ # Default: no-op
92
+ end
93
+ end
94
+ end
95
+
96
+ require 'parsanol/error_reporter/tree'
97
+ require 'parsanol/error_reporter/deepest'
98
+ require 'parsanol/error_reporter/contextual'
@@ -0,0 +1,163 @@
1
+ # Allows exporting parslet grammars to other lingos.
2
+
3
+ require 'set'
4
+ require 'parsanol/atoms/visitor'
5
+
6
+ class Parsanol::Parser
7
+ module Visitors
8
+ class Citrus
9
+ attr_reader :context, :output
10
+ def initialize(context)
11
+ @context = context
12
+ end
13
+
14
+ def visit_str(str)
15
+ "\"#{str.inspect[1..-2]}\""
16
+ end
17
+ def visit_re(match)
18
+ match.to_s
19
+ end
20
+
21
+ def visit_entity(name, block)
22
+ context.deferred(name, block)
23
+
24
+ "(#{context.mangle_name(name)})"
25
+ end
26
+ def visit_named(name, parslet)
27
+ parslet.accept(self)
28
+ end
29
+
30
+ def visit_sequence(parslets)
31
+ '(' +
32
+ parslets.
33
+ map { |el| el.accept(self) }.
34
+ join(' ') +
35
+ ')'
36
+ end
37
+ def visit_repetition(tag, min, max, parslet)
38
+ parslet.accept(self) + "#{min}*#{max}"
39
+ end
40
+ def visit_alternative(alternatives)
41
+ '(' +
42
+ alternatives.
43
+ map { |el| el.accept(self) }.
44
+ join(' | ') +
45
+ ')'
46
+ end
47
+
48
+ def visit_lookahead(positive, bound_parslet)
49
+ (positive ? '&' : '!') +
50
+ bound_parslet.accept(self)
51
+ end
52
+ end
53
+
54
+ class Treetop < Citrus
55
+ def visit_repetition(tag, min, max, parslet)
56
+ parslet.accept(self) + "#{min}..#{max}"
57
+ end
58
+
59
+ def visit_alternative(alternatives)
60
+ '(' +
61
+ alternatives.
62
+ map { |el| el.accept(self) }.
63
+ join(' / ') +
64
+ ')'
65
+ end
66
+ end
67
+ end
68
+
69
+ # A helper class that formats Citrus and Treetop grammars as a string.
70
+ #
71
+ class PrettyPrinter
72
+ attr_reader :visitor
73
+ def initialize(visitor_klass)
74
+ @visitor = visitor_klass.new(self)
75
+ end
76
+
77
+ # Pretty prints the given parslet using the visitor that has been
78
+ # configured in initialize. Returns the string representation of the
79
+ # Citrus or Treetop grammar.
80
+ #
81
+ def pretty_print(name, parslet)
82
+ output = ["grammar #{name}\n"]
83
+
84
+ output << rule('root', parslet)
85
+
86
+ seen = Set.new
87
+ loop do
88
+ # @todo is constantly filled by the visitor (see #deferred). We
89
+ # keep going until it is empty.
90
+ break if @todo.empty?
91
+ name, block = @todo.shift
92
+
93
+ # Track what rules we've already seen. This breaks loops.
94
+ next if seen.include?(name)
95
+ seen << name
96
+
97
+ output << rule(name, block.call)
98
+ end
99
+
100
+ output << "end\n"
101
+ output.join
102
+ end
103
+
104
+ # Formats a rule in either dialect.
105
+ #
106
+ def rule(name, parslet)
107
+ " rule #{mangle_name name}\n" +
108
+ " " + parslet.accept(visitor) + "\n" +
109
+ " end\n"
110
+ end
111
+
112
+ # Whenever the visitor encounters an rule in a parslet, it defers the
113
+ # pretty printing of the rule by calling this method.
114
+ #
115
+ def deferred(name, content)
116
+ @todo ||= []
117
+ @todo << [name, content]
118
+ end
119
+
120
+ # Mangles names so that Citrus and Treetop can live with it. This mostly
121
+ # transforms some of the things that Ruby allows into other patterns. If
122
+ # there is collision, we will not detect it for now.
123
+ #
124
+ def mangle_name(str)
125
+ str.to_s.sub(/\?$/, '_p')
126
+ end
127
+ end
128
+
129
+ # Exports the current parser instance as a string in the Citrus dialect.
130
+ #
131
+ # Example:
132
+ #
133
+ # require 'parsanol/export'
134
+ # class MyParser < Parsanol::Parser
135
+ # root(:expression)
136
+ # rule(:expression) { str('foo') }
137
+ # end
138
+ #
139
+ # MyParser.new.to_citrus # => a citrus grammar as a string
140
+ #
141
+ def to_citrus
142
+ PrettyPrinter.new(Visitors::Citrus).
143
+ pretty_print(self.class.name, root)
144
+ end
145
+
146
+ # Exports the current parser instance as a string in the Treetop dialect.
147
+ #
148
+ # Example:
149
+ #
150
+ # require 'parsanol/export'
151
+ # class MyParser < Parsanol::Parser
152
+ # root(:expression)
153
+ # rule(:expression) { str('foo') }
154
+ # end
155
+ #
156
+ # MyParser.new.to_treetop # => a treetop grammar as a string
157
+ #
158
+ def to_treetop
159
+ PrettyPrinter.new(Visitors::Treetop).
160
+ pretty_print(self.class.name, root)
161
+ end
162
+ end
163
+
@@ -0,0 +1,94 @@
1
+ require 'parsanol'
2
+
3
+ class Parsanol::Expression::Treetop
4
+ class Parser < Parsanol::Parser
5
+ root(:expression)
6
+
7
+ rule(:expression) { alternatives }
8
+
9
+ # alternative 'a' / 'b'
10
+ rule(:alternatives) {
11
+ (simple >> (spaced('/') >> simple).repeat).as(:alt)
12
+ }
13
+
14
+ # sequence by simple concatenation 'a' 'b'
15
+ rule(:simple) { occurrence.repeat(1).as(:seq) }
16
+
17
+ # occurrence modifiers
18
+ rule(:occurrence) {
19
+ atom.as(:repetition) >> spaced('*').as(:sign) |
20
+ atom.as(:repetition) >> spaced('+').as(:sign) |
21
+ atom.as(:repetition) >> repetition_spec |
22
+
23
+ atom.as(:maybe) >> spaced('?') |
24
+ atom
25
+ }
26
+
27
+ rule(:atom) {
28
+ spaced('(') >> expression.as(:unwrap) >> spaced(')') |
29
+ dot |
30
+ string |
31
+ char_class
32
+ }
33
+
34
+ # a character class
35
+ rule(:char_class) {
36
+ (str('[') >>
37
+ (str('\\') >> any |
38
+ str(']').absent? >> any).repeat(1) >>
39
+ str(']')).as(:match) >> space?
40
+ }
41
+
42
+ # anything at all
43
+ rule(:dot) { spaced('.').as(:any) }
44
+
45
+ # recognizing strings
46
+ rule(:string) {
47
+ str('\'') >>
48
+ (
49
+ (str('\\') >> any) |
50
+ (str("'").absent? >> any)
51
+ ).repeat.as(:string) >>
52
+ str('\'') >> space?
53
+ }
54
+
55
+ # repetition specification like {1, 2}
56
+ rule(:repetition_spec) {
57
+ spaced('{') >>
58
+ integer.maybe.as(:min) >> spaced(',') >>
59
+ integer.maybe.as(:max) >> spaced('}')
60
+ }
61
+ rule(:integer) {
62
+ match['0-9'].repeat(1)
63
+ }
64
+
65
+ # whitespace handling
66
+ rule(:space) { match("\s").repeat(1) }
67
+ rule(:space?) { space.maybe }
68
+
69
+ def spaced(str)
70
+ str(str) >> space?
71
+ end
72
+ end
73
+
74
+ class Transform < Parsanol::Transform
75
+
76
+ rule(:repetition => simple(:rep), :sign => simple(:sign)) {
77
+ min = sign=='+' ? 1 : 0
78
+ Parsanol::Atoms::Repetition.new(rep, min, nil) }
79
+ rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
80
+ Parsanol::Atoms::Repetition.new(rep,
81
+ Integer(min || 0),
82
+ max && Integer(max) || nil) }
83
+
84
+ rule(:alt => subtree(:alt)) { Parsanol::Atoms::Alternative.new(*alt) }
85
+ rule(:seq => sequence(:s)) { Parsanol::Atoms::Sequence.new(*s) }
86
+ rule(:unwrap => simple(:u)) { u }
87
+ rule(:maybe => simple(:m)) { |d| d[:m].maybe }
88
+ rule(:string => simple(:s)) { Parsanol::Atoms::Str.new(s) }
89
+ rule(:match => simple(:m)) { Parsanol::Atoms::Re.new(m) }
90
+ rule(:any => simple(:a)) { Parsanol::Atoms::Re.new('.') }
91
+ end
92
+
93
+ end
94
+
@@ -0,0 +1,51 @@
1
+
2
+ # Allows specifying rules as strings using the exact same grammar that treetop
3
+ # does, minus the actions. This is on one hand a good example of a fully
4
+ # fledged parser and on the other hand might even turn out really useful.
5
+ #
6
+ # This can be viewed as an extension to parslet and might even be hosted in
7
+ # its own gem one fine day.
8
+ #
9
+ class Parsanol::Expression
10
+ include Parsanol
11
+
12
+ autoload :Treetop, 'parsanol/expression/treetop'
13
+
14
+ # Creates a parslet from a foreign language expression.
15
+ #
16
+ # Example:
17
+ #
18
+ # Parsanol::Expression.new("'a' 'b'")
19
+ #
20
+ def initialize(str, opts={}, context=self)
21
+ @type = opts[:type] || :treetop
22
+ @exp = str
23
+ @parslet = transform(
24
+ parse(str))
25
+ end
26
+
27
+ # Transforms the parse tree into a parslet expression.
28
+ #
29
+ def transform(tree)
30
+ transform = Treetop::Transform.new
31
+
32
+ # pp tree
33
+ transform.apply(tree)
34
+ rescue
35
+ warn "Could not transform: " + tree.inspect
36
+ raise
37
+ end
38
+
39
+ # Parses the string and returns a parse tree.
40
+ #
41
+ def parse(str)
42
+ parser = Treetop::Parser.new
43
+ parser.parse(str)
44
+ end
45
+
46
+ # Turns this expression into a parslet.
47
+ #
48
+ def to_parslet
49
+ @parslet
50
+ end
51
+ end