parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Base class for AST visitors following the Visitor pattern
4
+ # This separates tree traversal logic from transformation logic
5
+ # making the code more maintainable and extensible.
6
+ module Parsanol
7
+ # Base visitor class that traverses the Parslet AST
8
+ # Subclasses override visit_* methods to perform transformations
9
+ class ASTVisitor
10
+ # Visit a parslet and its children
11
+ # Subclasses should override specific visit_* methods
12
+ # @param parslet [Parsanol::Atoms::Base] parslet to visit
13
+ # @return [Parsanol::Atoms::Base] transformed parslet
14
+ def visit(parslet)
15
+ case parslet
16
+ when Parsanol::Atoms::Sequence
17
+ visit_sequence(parslet)
18
+ when Parsanol::Atoms::Alternative
19
+ visit_alternative(parslet)
20
+ when Parsanol::Atoms::Repetition
21
+ visit_repetition(parslet)
22
+ when Parsanol::Atoms::Lookahead
23
+ visit_lookahead(parslet)
24
+ when Parsanol::Atoms::Named
25
+ visit_named(parslet)
26
+ when Parsanol::Atoms::Str
27
+ visit_str(parslet)
28
+ when Parsanol::Atoms::Re
29
+ visit_re(parslet)
30
+ else
31
+ # Leaf nodes or unknown types - return as-is
32
+ parslet
33
+ end
34
+ end
35
+
36
+ # Visit a sequence node
37
+ # Default implementation visits children and reconstructs if changed
38
+ # @param parslet [Parsanol::Atoms::Sequence] sequence to visit
39
+ # @return [Parsanol::Atoms::Base] transformed sequence
40
+ def visit_sequence(parslet)
41
+ new_parslets = parslet.parslets.map { |p| visit(p) }
42
+ if new_parslets == parslet.parslets
43
+ parslet
44
+ else
45
+ Parsanol::Atoms::Sequence.new(*new_parslets)
46
+ end
47
+ end
48
+
49
+ # Visit an alternative node
50
+ # Default implementation visits children and reconstructs if changed
51
+ # @param parslet [Parsanol::Atoms::Alternative] alternative to visit
52
+ # @return [Parsanol::Atoms::Base] transformed alternative
53
+ def visit_alternative(parslet)
54
+ new_alternatives = parslet.alternatives.map { |p| visit(p) }
55
+ if new_alternatives == parslet.alternatives
56
+ parslet
57
+ else
58
+ Parsanol::Atoms::Alternative.new(*new_alternatives)
59
+ end
60
+ end
61
+
62
+ # Visit a repetition node
63
+ # Default implementation visits child and reconstructs if changed
64
+ # @param parslet [Parsanol::Atoms::Repetition] repetition to visit
65
+ # @return [Parsanol::Atoms::Base] transformed repetition
66
+ def visit_repetition(parslet)
67
+ new_parslet = visit(parslet.parslet)
68
+ if new_parslet.equal?(parslet.parslet)
69
+ parslet
70
+ else
71
+ Parsanol::Atoms::Repetition.new(
72
+ new_parslet,
73
+ parslet.min,
74
+ parslet.max,
75
+ parslet.instance_variable_get(:@tag)
76
+ )
77
+ end
78
+ end
79
+
80
+ # Visit a lookahead node
81
+ # Default implementation visits child and reconstructs if changed
82
+ # @param parslet [Parsanol::Atoms::Lookahead] lookahead to visit
83
+ # @return [Parsanol::Atoms::Base] transformed lookahead
84
+ def visit_lookahead(parslet)
85
+ new_bound = visit(parslet.bound_parslet)
86
+ if new_bound.equal?(parslet.bound_parslet)
87
+ parslet
88
+ else
89
+ Parsanol::Atoms::Lookahead.new(new_bound, parslet.positive)
90
+ end
91
+ end
92
+
93
+ # Visit a named node
94
+ # Default implementation visits child and reconstructs if changed
95
+ # @param parslet [Parsanol::Atoms::Named] named to visit
96
+ # @return [Parsanol::Atoms::Base] transformed named
97
+ def visit_named(parslet)
98
+ new_parslet = visit(parslet.parslet)
99
+ if new_parslet.equal?(parslet.parslet)
100
+ parslet
101
+ else
102
+ Parsanol::Atoms::Named.new(new_parslet, parslet.name)
103
+ end
104
+ end
105
+
106
+ # Visit a string literal node
107
+ # Default implementation returns as-is (leaf node)
108
+ # @param parslet [Parsanol::Atoms::Str] string to visit
109
+ # @return [Parsanol::Atoms::Base] transformed string
110
+ def visit_str(parslet)
111
+ parslet
112
+ end
113
+
114
+ # Visit a regex node
115
+ # Default implementation returns as-is (leaf node)
116
+ # @param parslet [Parsanol::Atoms::Re] regex to visit
117
+ # @return [Parsanol::Atoms::Base] transformed regex
118
+ def visit_re(parslet)
119
+ parslet
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Alternative during matching. Contains a list of parslets that is tried each
4
+ # one in turn. Only fails if all alternatives fail.
5
+ #
6
+ # Example:
7
+ #
8
+ # str('a') | str('b') # matches either 'a' or 'b'
9
+ #
10
+ class Parsanol::Atoms::Alternative < Parsanol::Atoms::Base
11
+ attr_reader :alternatives
12
+
13
+ # Constructs an Alternative instance using all given parslets in the order
14
+ # given. This is what happens if you call '|' on existing parslets, like
15
+ # this:
16
+ #
17
+ # str('a') | str('b')
18
+ #
19
+ def initialize(*alternatives)
20
+ super()
21
+
22
+ @alternatives = alternatives
23
+
24
+ # Phase 60: Pre-compute and freeze error message
25
+ @error_msg = "Expected one of #{alternatives.inspect}".freeze
26
+ end
27
+
28
+ #---
29
+ # Don't construct a hanging tree of Alternative parslets, instead store them
30
+ # all here. This reduces the number of objects created.
31
+ #+++
32
+ def |(parslet)
33
+ # Phase 25: Alternative Flattening (similar to Phase 21 for Sequence)
34
+ # Flatten nested alternatives: (A | B) | C becomes Alternative(A, B, C)
35
+ # instead of Alternative(Alternative(A, B), C)
36
+ new_alts = if parslet.is_a?(Parsanol::Atoms::Alternative)
37
+ @alternatives + parslet.alternatives
38
+ else
39
+ @alternatives + [parslet]
40
+ end
41
+ self.class.new(*new_alts)
42
+ end
43
+
44
+
45
+ def try(source, context, consume_all)
46
+ # Phase 52: Cache @alternatives ivar to reduce lookup overhead
47
+ alternatives = @alternatives
48
+
49
+ # Fast paths for common alternative sizes (avoid iteration overhead)
50
+ case alternatives.size
51
+ when 2
52
+ success, value = alternatives[0].apply(source, context, consume_all)
53
+ return [success, value] if success
54
+ success2, value2 = alternatives[1].apply(source, context, consume_all)
55
+ return [success2, value2] if success2
56
+ return context.err(self, source, @error_msg, [value, value2])
57
+ when 3
58
+ success, value = alternatives[0].apply(source, context, consume_all)
59
+ return [success, value] if success
60
+ success2, value2 = alternatives[1].apply(source, context, consume_all)
61
+ return [success2, value2] if success2
62
+ success3, value3 = alternatives[2].apply(source, context, consume_all)
63
+ return [success3, value3] if success3
64
+ return context.err(self, source, @error_msg, [value, value2, value3])
65
+ end
66
+
67
+ # General case: Optimize by not allocating error array until we know all alternatives fail
68
+ # This saves significant allocation overhead when early alternatives succeed
69
+ errors = nil
70
+
71
+ alternatives.each do |a|
72
+ success, value = result = a.apply(source, context, consume_all)
73
+ return result if success
74
+
75
+ # Lazily allocate errors array only if needed
76
+ errors ||= []
77
+ errors << value
78
+ end
79
+
80
+ # If we reach this point, all alternatives have failed.
81
+ context.err(self, source, @error_msg, errors)
82
+ end
83
+
84
+ precedence ALTERNATE
85
+ def to_s_inner(prec)
86
+ alternatives.map { |a| a.to_s(prec) }.join(' / ')
87
+ end
88
+
89
+ # FIRST set of alternative is union of all alternatives' FIRST sets
90
+ # This is the key computation for cut operator insertion:
91
+ # If FIRST(alt1) ∩ FIRST(alt2) = ∅, we can insert a cut after alt1
92
+ def compute_first_set
93
+ return Set.new if alternatives.empty?
94
+
95
+ alternatives.map(&:first_set).reduce(&:union)
96
+ end
97
+ end
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Base class for all parslets, handles orchestration of calls and implements
4
+ # a lot of the operator and chaining methods.
5
+ #
6
+ # Also see Parsanol::Atoms::DSL chaining parslet atoms together.
7
+ #
8
+ class Parsanol::Atoms::Base
9
+ include Parsanol::Atoms::Precedence
10
+ include Parsanol::Atoms::DSL
11
+ include Parsanol::Atoms::CanFlatten
12
+ include Parsanol::FirstSet
13
+
14
+ # Parslet label as provided in grammar
15
+ attr_accessor :label
16
+
17
+ # Phase 61: Frozen error message for unknown input
18
+ ERROR_UNKNOWN_INPUT = "Don't know what to do with ".freeze
19
+
20
+ # Given a string or an IO object, this will attempt a parse of its contents
21
+ # and return a result. If the parse fails, a Parsanol::ParseFailed exception
22
+ # will be thrown.
23
+ #
24
+ # @param io [String, Source] input for the parse process
25
+ # @option options [Parsanol::ErrorReporter] :reporter error reporter to use,
26
+ # defaults to Parsanol::ErrorReporter::Tree
27
+ # @option options [Boolean] :prefix Should a prefix match be accepted?
28
+ # (default: false)
29
+ # @return [Hash, Array, Parsanol::Slice] PORO (Plain old Ruby object) result
30
+ # tree
31
+ #
32
+ def parse(io, options={})
33
+ source = io.respond_to?(:line_and_column) ?
34
+ io :
35
+ Parsanol::Source.new(io)
36
+
37
+ # Try to cheat. Assuming that we'll be able to parse the input, don't
38
+ # run error reporting code.
39
+ success, value = setup_and_apply(source, nil, !options[:prefix])
40
+
41
+ # If we didn't succeed the parse, raise an exception for the user.
42
+ # Stack trace will be off, but the error tree should explain the reason
43
+ # it failed.
44
+ unless success
45
+ # Cheating has not paid off. Now pay the cost: Rerun the parse,
46
+ # gathering error information in the process.
47
+ reporter = options[:reporter] || Parsanol::ErrorReporter::Tree.new
48
+ source.bytepos = 0
49
+ success, value = setup_and_apply(source, reporter, !options[:prefix])
50
+
51
+ fail "Assertion failed: success was true when parsing with reporter" \
52
+ if success
53
+
54
+ # Value is a Parsanol::Cause, which can be turned into an exception:
55
+ value.raise
56
+
57
+ fail "NEVER REACHED"
58
+ end
59
+
60
+ # assert: success is true
61
+
62
+ # Extra input is now handled inline with the rest of the parsing. If
63
+ # really we have success == true, prefix: false and still some input
64
+ # is left dangling, that is a BUG.
65
+ if !options[:prefix] && source.chars_left > 0
66
+ fail "BUG: New error strategy should not reach this point."
67
+ end
68
+
69
+ return flatten(value)
70
+ end
71
+
72
+ # Creates a context for parsing and applies the current atom to the input.
73
+ # Returns the parse result.
74
+ #
75
+ # @return [<Boolean, Object>] Result of the parse. If the first member is
76
+ # true, the parse has succeeded.
77
+ def setup_and_apply(source, error_reporter, consume_all)
78
+ # Session 13: Pass parser class for per-parser cache threshold selection
79
+ # If self is a Parser instance, pass its class for threshold lookup
80
+ parser_class = self.is_a?(Parsanol::Parser) ? self.class : nil
81
+ context = Parsanol::Atoms::Context.new(error_reporter, parser_class: parser_class)
82
+ apply(source, context, consume_all)
83
+ end
84
+
85
+ # Calls the #try method of this parslet. Success consumes input, error will
86
+ # rewind the input.
87
+ #
88
+ # @param source [Parsanol::Source] source to read input from
89
+ # @param context [Parsanol::Atoms::Context] context to use for the parsing
90
+ # @param consume_all [Boolean] true if the current parse must consume
91
+ # all input by itself.
92
+ def apply(source, context, consume_all=false)
93
+ old_pos = source.bytepos
94
+
95
+ success, _ = result = context.try_with_cache(self, source, consume_all)
96
+
97
+ if success
98
+ # Notify context
99
+ context.succ(source)
100
+ # If a consume_all parse was made and doesn't result in the consumption
101
+ # of all the input, that is considered an error.
102
+ if consume_all && source.chars_left>0
103
+ # Read 10 characters ahead. Why ten? I don't know.
104
+ offending_pos = source.bytepos
105
+ offending_input = source.consume(10)
106
+
107
+ # Rewind input (as happens always in error case)
108
+ source.bytepos = old_pos
109
+
110
+ return context.err_at(
111
+ self,
112
+ source,
113
+ ERROR_UNKNOWN_INPUT + offending_input.to_s.inspect,
114
+ offending_pos
115
+ )
116
+ end
117
+
118
+ # Looks like the parse was successful after all. Don't rewind the input.
119
+ return result
120
+ end
121
+
122
+ # We only reach this point if the parse has failed. Rewind the input.
123
+ source.bytepos = old_pos
124
+ return result
125
+ end
126
+
127
+ # Override this in your Atoms::Base subclasses to implement parsing
128
+ # behaviour.
129
+ #
130
+ def try(source, context, consume_all)
131
+ raise NotImplementedError, \
132
+ "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
133
+ end
134
+
135
+ # Returns true if this atom can be cached in the packrat cache. Most parslet
136
+ # atoms are cached, so this always returns true, unless overridden.
137
+ #
138
+ def cached?
139
+ true
140
+ end
141
+
142
+ # Returns true if this atom produces flat results by construction
143
+ # (no nested arrays/hashes that need flattening). This allows flatten
144
+ # to skip processing for atoms that are known to produce simple values.
145
+ #
146
+ # Session 13: Flatten optimization to reduce 5.27% overhead
147
+ # Atoms like Str and Re always produce strings (Parsanol::Slice),
148
+ # which don't need flattening.
149
+ #
150
+ # @return [Boolean] true if results are flat by construction
151
+ def flat?
152
+ false # Default: assume needs flattening
153
+ end
154
+
155
+ # Debug printing - in Treetop syntax.
156
+ #
157
+ def self.precedence(prec)
158
+ define_method(:precedence) { prec }
159
+ end
160
+ precedence BASE
161
+ def to_s(outer_prec=OUTER)
162
+ str = label || to_s_inner(precedence)
163
+ if outer_prec < precedence
164
+ "(#{str})"
165
+ else
166
+ str
167
+ end
168
+ end
169
+ def inspect
170
+ to_s(OUTER)
171
+ end
172
+ private
173
+
174
+ # Produces an instance of Success and returns it.
175
+ #
176
+ # Phase 57a-57b: Frozen constants for common result patterns to reduce allocations.
177
+ # These constants are used extensively in hot paths to avoid creating new arrays.
178
+ #
179
+ SUCCESS_NIL = [true, nil].freeze
180
+
181
+ # Common patterns for repetition results with empty values
182
+ # Format: [true, [tag]] where tag is the repetition marker
183
+ # Pre-allocated for common tags to avoid array creation
184
+ EMPTY_ARRAY = [].freeze
185
+
186
+ # Phase 57b: Additional frozen constants for tagged empty arrays
187
+ # These are common in repetitions that match 0 times (.maybe, .repeat(0,n))
188
+ EMPTY_REPETITION_ARRAY = [:repetition].freeze
189
+ SUCCESS_EMPTY_REPETITION = [true, EMPTY_REPETITION_ARRAY].freeze
190
+
191
+ EMPTY_SEQUENCE_ARRAY = [:sequence].freeze
192
+ SUCCESS_EMPTY_SEQUENCE = [true, EMPTY_SEQUENCE_ARRAY].freeze
193
+
194
+ # Phase 57c: Additional frozen constants for common patterns
195
+ EMPTY_HASH = {}.freeze
196
+ SUCCESS_EMPTY_HASH = [true, EMPTY_HASH].freeze
197
+
198
+ # Common single-element arrays for captures and tags
199
+ EMPTY_CAPTURE_ARRAY = [:capture].freeze
200
+ SUCCESS_EMPTY_CAPTURE = [true, EMPTY_CAPTURE_ARRAY].freeze
201
+
202
+ def succ(result)
203
+ return SUCCESS_NIL if result.nil?
204
+ # Check for empty array (common in repetitions with 0 matches)
205
+ return [true, EMPTY_ARRAY] if result.equal?(EMPTY_ARRAY)
206
+ # Check for empty hash (common in named captures with no matches)
207
+ return SUCCESS_EMPTY_HASH if result.equal?(EMPTY_HASH)
208
+ # Check for common tagged empty arrays
209
+ return SUCCESS_EMPTY_REPETITION if result.equal?(EMPTY_REPETITION_ARRAY)
210
+ return SUCCESS_EMPTY_SEQUENCE if result.equal?(EMPTY_SEQUENCE_ARRAY)
211
+ return SUCCESS_EMPTY_CAPTURE if result.equal?(EMPTY_CAPTURE_ARRAY)
212
+ [true, result]
213
+ end
214
+ end
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+
4
+ module Parsanol::Atoms
5
+ # A series of helper functions that have the common topic of flattening
6
+ # result values into the intermediary tree that consists of Ruby Hashes and
7
+ # Arrays.
8
+ #
9
+ # This module has one main function, #flatten, that takes an annotated
10
+ # structure as input and returns the reduced form that users expect from
11
+ # Atom#parse.
12
+ #
13
+ # NOTE: Since all of these functions are just that, functions without
14
+ # side effects, they are in a module and not in a class. Its hard to draw
15
+ # the line sometimes, but this is beyond.
16
+ #
17
+ module CanFlatten
18
+ # Takes a mixed value coming out of a parslet and converts it to a return
19
+ # value for the user by dropping things and merging hashes.
20
+ #
21
+ # Named is set to true if this result will be embedded in a Hash result from
22
+ # naming something using <code>.as(...)</code>. It changes the folding
23
+ # semantics of repetition.
24
+ #
25
+ def flatten(value, named=false)
26
+ # Passes through everything that isn't an array of things
27
+ # Phase 43: Use simpler check - if it's not an Array, return as-is
28
+ return value unless value.is_a?(Array)
29
+
30
+ # Extracts the s-expression tag
31
+ tag = value[0]
32
+
33
+ # Phase 43: Optimize flattening - reduce method call overhead
34
+ # For single element arrays (common case), handle directly
35
+ tail_size = value.size - 1
36
+ if tail_size == 1
37
+ flattened = flatten(value[1])
38
+ case tag
39
+ when :sequence
40
+ return flattened
41
+ when :maybe
42
+ return named ? flattened : (flattened || '')
43
+ when :repetition
44
+ return flatten_repetition([flattened], named)
45
+ end
46
+ end
47
+
48
+ # Flatten each element
49
+ result = Array.new(tail_size)
50
+ i = 0
51
+ while i < tail_size
52
+ result[i] = flatten(value[i + 1])
53
+ i += 1
54
+ end
55
+
56
+ case tag
57
+ when :sequence
58
+ return flatten_sequence(result)
59
+ when :maybe
60
+ return named ? result.first : result.first || ''
61
+ when :repetition
62
+ return flatten_repetition(result, named)
63
+ end
64
+
65
+ fail "BUG: Unknown tag #{tag.inspect}."
66
+ end
67
+
68
+ # Lisp style fold left where the first element builds the basis for
69
+ # an inject. Optimized with early return and reduced method calls.
70
+ #
71
+ def foldl(list, &block)
72
+ len = list.size
73
+ return '' if len == 0
74
+ return list[0] if len == 1 # Fast path for single element
75
+
76
+ result = list[0]
77
+ i = 1
78
+ while i < len
79
+ result = block.call(result, list[i])
80
+ i += 1
81
+ end
82
+ result
83
+ end
84
+
85
+ # Flatten results from a sequence of parslets.
86
+ #
87
+ # @api private
88
+ #
89
+ def flatten_sequence(list)
90
+ foldl(list.compact) { |r, e| # and then merge flat elements
91
+ merge_fold(r, e)
92
+ }
93
+ end
94
+ # @api private
95
+ # Phase 43: Optimized merge_fold - reduce repeated class checks
96
+ def merge_fold(l, r)
97
+ l_class = l.class
98
+ r_class = r.class
99
+
100
+ # equal pairs: merge. ----------------------------------------------------
101
+ if l_class == r_class
102
+ if l_class == Hash
103
+ warn_about_duplicate_keys(l, r)
104
+ return l.merge(r)
105
+ else
106
+ return l + r
107
+ end
108
+ end
109
+
110
+ # Phase 43: Cache instance_of? checks to avoid repeated method calls
111
+ # unequal pairs: hoist to same level. ------------------------------------
112
+ l_is_slice = l.instance_of?(Parsanol::Slice)
113
+ r_is_slice = r.instance_of?(Parsanol::Slice)
114
+ l_is_str = l_class == String || l_is_slice
115
+ r_is_str = r_class == String || r_is_slice
116
+
117
+ # Maybe classes are not equal, but both are stringlike?
118
+ if l_is_str && r_is_str
119
+ # if we're merging a String with a Slice, the slice wins.
120
+ return r if r_is_slice
121
+ return l if l_is_slice
122
+
123
+ fail "NOTREACHED: What other stringlike classes are there?"
124
+ end
125
+
126
+ # special case: If one of them is a string/slice, the other is more important
127
+ return l if r_is_str
128
+ return r if l_is_str
129
+
130
+ # otherwise just create an array for one of them to live in
131
+ return l + [r] if r_class == Hash
132
+ return [l] + r if l_class == Hash
133
+
134
+ fail "Unhandled case when foldr'ing sequence."
135
+ end
136
+
137
+ # Flatten results from a repetition of a single parslet. named indicates
138
+ # whether the user has named the result or not. If the user has named
139
+ # the results, we want to leave an empty list alone - otherwise it is
140
+ # turned into an empty string.
141
+ #
142
+ # @api private
143
+ #
144
+ # Phase 43: Optimized flatten_repetition - reduce array iterations
145
+ def flatten_repetition(list, named)
146
+ # Phase 43: Single pass to check for hashes and arrays
147
+ has_hash = false
148
+ has_array = false
149
+
150
+ i = 0
151
+ len = list.size
152
+ while i < len
153
+ e = list[i]
154
+ has_hash = true if e.instance_of?(Hash)
155
+ has_array = true if e.instance_of?(Array)
156
+ break if has_hash && has_array # Early exit if both found
157
+ i += 1
158
+ end
159
+
160
+ if has_hash
161
+ # If keyed subtrees are in the array, we'll want to discard all
162
+ # strings inbetween. To keep them, name them.
163
+ return list.select { |e| e.instance_of?(Hash) }
164
+ end
165
+
166
+ if has_array
167
+ # If any arrays are nested in this array, flatten all arrays to this
168
+ # level.
169
+ return list.
170
+ select { |e| e.instance_of?(Array) }.
171
+ flatten(1)
172
+ end
173
+
174
+ # Consistent handling of empty lists, when we act on a named result
175
+ return [] if named && list.empty?
176
+
177
+ # If there are only strings, concatenate them and return that.
178
+ foldl(list.compact) { |s,e| s+e }
179
+ end
180
+
181
+ # That annoying warning 'Duplicate subtrees while merging result' comes
182
+ # from here. You should add more '.as(...)' names to your intermediary tree.
183
+ #
184
+ def warn_about_duplicate_keys(h1, h2)
185
+ d = h1.keys & h2.keys
186
+ unless d.empty?
187
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
188
+ " of the latter will be kept. (keys: #{d.inspect})"
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,41 @@
1
+
2
+ # Stores the result of matching an atom against input in the #captures in
3
+ # parse context. Doing so will allow you to pull parts of the ongoing parse
4
+ # out later and use them to match other pieces of input.
5
+ #
6
+ # Example:
7
+ # # After this, context.captures[:an_a] returns 'a'
8
+ # str('a').capture(:an_a)
9
+ #
10
+ # # Capture and use of the capture: (matches either 'aa' or 'bb')
11
+ # match['ab'].capture(:first) >>
12
+ # dynamic { |src, ctx| str(ctx.captures[:first]) }
13
+ #
14
+ class Parsanol::Atoms::Capture < Parsanol::Atoms::Base
15
+ attr_reader :parslet, :name
16
+
17
+ def initialize(parslet, name)
18
+ super()
19
+
20
+ @parslet, @name = parslet, name
21
+ end
22
+
23
+ def apply(source, context, consume_all)
24
+ # Phase 55: Cache ivars to reduce lookup overhead
25
+ parslet = @parslet
26
+ name = @name
27
+
28
+ success, value = result = parslet.apply(source, context, consume_all)
29
+
30
+ if success
31
+ context.captures[name.to_sym] =
32
+ flatten(value)
33
+ end
34
+
35
+ return result
36
+ end
37
+
38
+ def to_s_inner(prec)
39
+ "(#{name.inspect} = #{parslet.to_s(prec)})"
40
+ end
41
+ end