parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,33 @@
1
+ # Evaluates a block at parse time. The result from the block must be a parser
2
+ # (something which implements #apply). In the first case, the parser will then
3
+ # be applied to the input, creating the result.
4
+ #
5
+ # Dynamic parses are never cached.
6
+ #
7
+ # Example:
8
+ # dynamic { rand < 0.5 ? str('a') : str('b') }
9
+ #
10
+ class Parsanol::Atoms::Dynamic < Parsanol::Atoms::Base
11
+ attr_reader :block
12
+
13
+ def initialize(block)
14
+ @block = block
15
+ end
16
+
17
+ def cached?
18
+ false
19
+ end
20
+
21
+ def try(source, context, consume_all)
22
+ # Phase 55: Cache @block ivar to reduce lookup overhead
23
+ block = @block
24
+ result = block.call(source, context)
25
+
26
+ # Result is a parslet atom.
27
+ return result.apply(source, context, consume_all)
28
+ end
29
+
30
+ def to_s_inner(prec)
31
+ "dynamic { ... }"
32
+ end
33
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
4
+ # piece is lazily evaluated and cached. This has two purposes:
5
+ #
6
+ # * Avoid infinite recursion during evaluation of the definition
7
+ # * Be able to print things by their name, not by their sometimes
8
+ # complicated content.
9
+ #
10
+ # You don't normally use this directly, instead you should generate it by
11
+ # using the structuring method Parslet.rule.
12
+ #
13
+ class Parsanol::Atoms::Entity < Parsanol::Atoms::Base
14
+ attr_reader :name, :block
15
+ def initialize(name, label=nil, &block)
16
+ super()
17
+
18
+ @name = name
19
+ @label = label
20
+ @block = block
21
+ @parslet = nil
22
+ end
23
+
24
+ def try(source, context, consume_all)
25
+ # Phase 54: Cache parslet method result to reduce method call overhead
26
+ p = parslet
27
+ p.apply(source, context, consume_all)
28
+ end
29
+
30
+ # Entity is just a thin wrapper that delegates to the underlying parslet.
31
+ # The underlying parslet is already cached, so caching the wrapper is redundant.
32
+ def cached?
33
+ false
34
+ end
35
+
36
+ def parslet
37
+ return @parslet unless @parslet.nil?
38
+ @parslet = @block.call
39
+ raise_not_implemented if @parslet.nil?
40
+ @parslet.label = @label
41
+ @parslet
42
+ end
43
+
44
+ def to_s_inner(prec)
45
+ name.to_s.upcase
46
+ end
47
+ private
48
+ def raise_not_implemented
49
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
50
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
51
+ exception.set_backtrace(trace)
52
+
53
+ raise exception
54
+ end
55
+ end
@@ -0,0 +1,28 @@
1
+ # Ignores the result of a match.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # will return 'foo',
6
+ # str('foo').ignore # will return nil
7
+ #
8
+ class Parsanol::Atoms::Ignored < Parsanol::Atoms::Base
9
+ attr_reader :parslet
10
+ def initialize(parslet)
11
+ super()
12
+
13
+ @parslet = parslet
14
+ end
15
+
16
+ def apply(source, context, consume_all)
17
+ # Phase 55: Cache @parslet ivar to reduce lookup overhead
18
+ parslet = @parslet
19
+ success, _ = result = parslet.apply(source, context, consume_all)
20
+
21
+ return result unless success
22
+ succ(nil)
23
+ end
24
+
25
+ def to_s_inner(prec)
26
+ "ignored(#{parslet.to_s(prec)})"
27
+ end
28
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Parsanol::Atoms::Infix < Parsanol::Atoms::Base
4
+ attr_reader :element, :operations, :reducer
5
+
6
+ def initialize(element, operations, &reducer)
7
+ super()
8
+
9
+ @element = element
10
+ @operations = operations
11
+ @reducer = reducer || lambda { |left, op, right| {l: left, o: op, r: right} }
12
+ end
13
+
14
+ def try(source, context, consume_all)
15
+ return catch(:error) {
16
+ return succ(
17
+ produce_tree(
18
+ precedence_climb(source, context, consume_all)))
19
+ }
20
+ end
21
+
22
+ # Turns an array of the form ['1', '+', ['2', '*', '3']] into a hash that
23
+ # reflects the same structure.
24
+ #
25
+ def produce_tree(ary)
26
+ return ary unless ary.kind_of? Array
27
+
28
+ # Phase 55: Cache @reducer ivar to reduce lookup overhead in loop
29
+ reducer = @reducer
30
+ left = ary.shift
31
+
32
+ until ary.empty?
33
+ op, right = ary.shift(2)
34
+
35
+ # p [left, op, right]
36
+
37
+ if right.kind_of? Array
38
+ # Subexpression -> Subhash
39
+ left = reducer.call(left, op, produce_tree(right))
40
+ else
41
+ left = reducer.call(left, op, right)
42
+ end
43
+ end
44
+
45
+ left
46
+ end
47
+
48
+ # A precedence climbing algorithm married to parslet, as described here
49
+ # http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
50
+ #
51
+ # @note Error handling in this routine is done by throwing :error and
52
+ # as a value the error to return to parslet. This avoids cluttering
53
+ # the recursion logic here with parslet error handling.
54
+ #
55
+ def precedence_climb(source, context, consume_all, current_prec=1, needs_element=false)
56
+ # Phase 55: Cache @element ivar to reduce lookup overhead
57
+ element = @element
58
+ result = []
59
+
60
+ # To even begin parsing an arithmetic expression, there needs to be
61
+ # at least one @element.
62
+ success, value = element.apply(source, context, false)
63
+
64
+ unless success
65
+ throw :error, context.err(self, source, "#{element.inspect} was expected", [value])
66
+ end
67
+
68
+ result << flatten(value, true)
69
+
70
+ # Loop until we fail on operator matching or until input runs out.
71
+ loop do
72
+ op_pos = source.bytepos
73
+ op_match, prec, assoc = match_operation(source, context, false)
74
+
75
+ # If no operator could be matched here, one of several cases
76
+ # applies:
77
+ #
78
+ # - end of file
79
+ # - end of expression
80
+ # - syntax error
81
+ #
82
+ # We abort matching the expression here.
83
+ break unless op_match
84
+
85
+ if prec >= current_prec
86
+ # Inline: next_prec = (assoc == :left) ? prec+1 : prec
87
+ result << op_match
88
+ result << precedence_climb(
89
+ source, context, consume_all,
90
+ (assoc == :left) ? prec+1 : prec, true)
91
+ else
92
+ source.bytepos = op_pos
93
+ # Inline unwrap
94
+ return result.size == 1 ? result.first : result
95
+ end
96
+ end
97
+
98
+ # Inline unwrap
99
+ return result.size == 1 ? result.first : result
100
+ end
101
+
102
+ def match_operation(source, context, consume_all)
103
+ # Phase 55: Cache @operations ivar to reduce lookup overhead in loop
104
+ operations = @operations
105
+ errors = []
106
+ operations.each do |op_atom, prec, assoc|
107
+ success, value = op_atom.apply(source, context, consume_all)
108
+ return flatten(value, true), prec, assoc if success
109
+
110
+ # assert: this was in fact an error, accumulate
111
+ errors << value
112
+ end
113
+
114
+ return nil
115
+ end
116
+
117
+ def to_s_inner(prec)
118
+ ops = @operations.map { |o, _, _| o.inspect }.join(', ')
119
+ "infix_expression(#{@element.inspect}, [#{ops}])"
120
+ end
121
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Either positive or negative lookahead, doesn't consume its input.
4
+ #
5
+ # Example:
6
+ #
7
+ # str('foo').present? # matches when the input contains 'foo', but leaves it
8
+ #
9
+ class Parsanol::Atoms::Lookahead < Parsanol::Atoms::Base
10
+ attr_reader :positive
11
+ attr_reader :bound_parslet
12
+
13
+ def initialize(bound_parslet, positive=true)
14
+ super()
15
+
16
+ # Model positive and negative lookahead by testing this flag.
17
+ @positive = positive
18
+ @bound_parslet = bound_parslet
19
+
20
+ # Phase 60: Pre-compute and freeze error messages
21
+ @error_msgs = {
22
+ :positive => ["Input should start with ".freeze, bound_parslet].freeze,
23
+ :negative => ["Input should not start with ".freeze, bound_parslet].freeze
24
+ }.freeze
25
+ end
26
+
27
+ def try(source, context, consume_all)
28
+ # Phase 55: Cache ivars to reduce lookup overhead
29
+ positive = @positive
30
+ bound_parslet = @bound_parslet
31
+
32
+ # Phase 23: Lookahead position restore optimization
33
+ # Always restore position after lookahead, simplify logic
34
+ rewind_pos = source.bytepos
35
+
36
+ success, _ = bound_parslet.apply(source, context, consume_all)
37
+
38
+ # Always restore position - lookahead never consumes input
39
+ source.bytepos = rewind_pos
40
+
41
+ # Positive lookahead: success when parslet matches
42
+ return succ(nil) if positive && success
43
+ return context.err_at(self, source, @error_msgs[:positive], source.bytepos) if positive
44
+
45
+ # Negative lookahead: success when parslet fails
46
+ return context.err_at(self, source, @error_msgs[:negative], source.bytepos) if success
47
+ return succ(nil)
48
+ end
49
+
50
+ precedence LOOKAHEAD
51
+ def to_s_inner(prec)
52
+ @char = positive ? '&' : '!'
53
+
54
+ "#{@char}#{bound_parslet.to_s(prec)}"
55
+ end
56
+
57
+ # FIRST set of lookahead:
58
+ # - Positive lookahead (&e): FIRST is EPSILON (doesn't consume)
59
+ # - Negative lookahead (!e): FIRST is EPSILON (doesn't consume)
60
+ # Both always match without consuming, so they can match empty
61
+ def compute_first_set
62
+ Set.new([Parsanol::FirstSet::EPSILON])
63
+ end
64
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Names a match to influence tree construction.
4
+ #
5
+ # Example:
6
+ #
7
+ # str('foo') # will return 'foo',
8
+ # str('foo').as(:foo) # will return :foo => 'foo'
9
+ #
10
+ class Parsanol::Atoms::Named < Parsanol::Atoms::Base
11
+ attr_reader :parslet, :name
12
+ def initialize(parslet, name)
13
+ super()
14
+
15
+ @parslet, @name = parslet, name
16
+ end
17
+
18
+ def apply(source, context, consume_all)
19
+ # Phase 52: Cache @parslet ivar to reduce lookup overhead
20
+ parslet = @parslet
21
+
22
+ success, value = result = parslet.apply(source, context, consume_all)
23
+
24
+ return result unless success
25
+ succ(
26
+ produce_return_value(
27
+ value))
28
+ end
29
+
30
+ # Named is just a thin wrapper that delegates to the underlying parslet.
31
+ # The underlying parslet is already cached, so caching the wrapper is redundant.
32
+ def cached?
33
+ false
34
+ end
35
+
36
+ def to_s_inner(prec)
37
+ "#{name}:#{parslet.to_s(prec)}"
38
+ end
39
+
40
+ # FIRST set of named atom is same as wrapped parslet
41
+ # Named is just a wrapper that doesn't change matching behavior
42
+ def compute_first_set
43
+ parslet.first_set
44
+ end
45
+
46
+ private
47
+ def produce_return_value(val)
48
+ { name => flatten(val, true) }
49
+ end
50
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Matches a special kind of regular expression that only ever matches one
4
+ # character at a time. Useful members of this family are: <code>character
5
+ # ranges, \\w, \\d, \\r, \\n, ...</code>
6
+ #
7
+ # Example:
8
+ #
9
+ # match('[a-z]') # matches a-z
10
+ # match('\s') # like regexps: matches space characters
11
+ #
12
+ class Parsanol::Atoms::Re < Parsanol::Atoms::Base
13
+ attr_reader :match, :re
14
+ def initialize(match)
15
+ super()
16
+
17
+ @match = match.to_s
18
+ @re = Regexp.new(self.match, Regexp::MULTILINE)
19
+
20
+ # Phase 60: Pre-compute and freeze error messages
21
+ @error_msgs = {
22
+ premature: 'Premature end of input'.freeze,
23
+ failed: "Failed to match #{match.inspect[1..-2]}".freeze
24
+ }.freeze
25
+ end
26
+
27
+ def try(source, context, consume_all)
28
+ # Phase 55: Cache @re ivar to reduce lookup overhead
29
+ re = @re
30
+ return succ(source.consume(1)) if source.matches?(re)
31
+
32
+ # No string could be read
33
+ return context.err(self, source, @error_msgs[:premature]) \
34
+ if source.chars_left < 1
35
+
36
+ # No match
37
+ return context.err(self, source, @error_msgs[:failed])
38
+ end
39
+
40
+ def to_s_inner(prec)
41
+ match.inspect[1..-2]
42
+ end
43
+
44
+ # Regex matching is already very fast (single character match).
45
+ # Caching adds overhead without benefit for such simple operations.
46
+ def cached?
47
+ false
48
+ end
49
+
50
+ # Session 13: Re always produces flat results (Parsanol::Slice)
51
+ # No nested structures, so flatten can skip processing
52
+ def flat?
53
+ true
54
+ end
55
+
56
+ # FIRST set for Re is the regex itself
57
+ # Conservative: could theoretically analyze regex to extract literal prefixes
58
+ def compute_first_set
59
+ Set.new([self])
60
+ end
61
+ end
@@ -0,0 +1,241 @@
1
+ # frozen_string_literal: true
2
+
3
+
4
+ # Matches a parslet repeatedly.
5
+ #
6
+ # Example:
7
+ #
8
+ # str('a').repeat(1,3) # matches 'a' at least once, but at most three times
9
+ # str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
10
+ #
11
+ class Parsanol::Atoms::Repetition < Parsanol::Atoms::Base
12
+ attr_reader :min, :max, :parslet
13
+ def initialize(parslet, min, max, tag=:repetition)
14
+ super()
15
+
16
+ raise ArgumentError,
17
+ "Asking for zero repetitions of a parslet. (#{parslet.inspect} repeating #{min},#{max})" \
18
+ if max == 0
19
+
20
+ @parslet = parslet
21
+ @min = min
22
+ @max = max
23
+ @tag = tag
24
+
25
+ # Phase 58: Pre-compute and freeze error messages to avoid allocations
26
+ @error_msgs = {
27
+ minrep: "Expected at least #{min} of #{parslet.inspect}".freeze,
28
+ unconsumed: 'Extra input after last repetition'.freeze
29
+ }.freeze
30
+ end
31
+
32
+ def error_msgs
33
+ @error_msgs
34
+ end
35
+
36
+ def try(source, context, consume_all)
37
+ # Phase 54: Cache ivars to reduce lookup overhead in hot method
38
+ parslet = @parslet
39
+ min = @min
40
+ max = @max
41
+ tag = @tag
42
+
43
+ # Use tree memoization if interval cache is enabled
44
+ if context.respond_to?(:use_tree_memoization?) && context.use_tree_memoization?
45
+ return try_with_tree_memoization(source, context, consume_all)
46
+ end
47
+
48
+ # Fast path for .maybe (min=0, max=1) - very common case
49
+ if min == 0 && max == 1
50
+ success, value = parslet.apply(source, context, false)
51
+ return succ([tag, value]) if success
52
+ # Phase 57b: Use frozen constant for empty repetition array
53
+ return succ(tag == :repetition ? Parsanol::Atoms::Base::EMPTY_REPETITION_ARRAY : [tag])
54
+ end
55
+
56
+ # Fast path for exact count (min == max)
57
+ if min == max && max && max <= 3
58
+ case max
59
+ when 1
60
+ success, value = parslet.apply(source, context, consume_all)
61
+ return success ? succ([tag, value]) : context.err_at(self, source, error_msgs[:minrep], source.bytepos, [value])
62
+ when 2
63
+ success, v1 = parslet.apply(source, context, false)
64
+ return context.err_at(self, source, error_msgs[:minrep], source.bytepos, [v1]) unless success
65
+ success, v2 = parslet.apply(source, context, consume_all)
66
+ return success ? succ([tag, v1, v2]) : context.err_at(self, source, error_msgs[:minrep], source.bytepos, [v2])
67
+ when 3
68
+ success, v1 = parslet.apply(source, context, false)
69
+ return context.err_at(self, source, error_msgs[:minrep], source.bytepos, [v1]) unless success
70
+ success, v2 = parslet.apply(source, context, false)
71
+ return context.err_at(self, source, error_msgs[:minrep], source.bytepos, [v2]) unless success
72
+ success, v3 = parslet.apply(source, context, consume_all)
73
+ return success ? succ([tag, v1, v2, v3]) : context.err_at(self, source, error_msgs[:minrep], source.bytepos, [v3])
74
+ end
75
+ end
76
+
77
+ # General case for variable or large repetitions
78
+ try_repetition_general(source, context, consume_all)
79
+ end
80
+
81
+ # GPeg-style tree memoization for repetitions
82
+ # Caches arrays of successful matches to reuse parsed prefixes
83
+ def try_with_tree_memoization(source, context, consume_all)
84
+ start_pos = source.bytepos
85
+ cache_key = object_id
86
+
87
+ # Check if we have a cached tree result at this position
88
+ cached = context.query_tree_memo(cache_key, start_pos)
89
+ if cached
90
+ values, end_pos = cached
91
+ source.bytepos = end_pos
92
+ return succ([@tag] + values)
93
+ end
94
+
95
+ # Parse repetition and collect all successful matches
96
+ occ = 0
97
+ # Phase 2.2: Use buffer pool for lazy materialization
98
+ estimated_size = [max || 10, 10].min
99
+ buffer = context.acquire_buffer(size: estimated_size + 1) # +1 for tag
100
+ buffer.push(@tag) # Add tag first to maintain [:repetition, ...] structure
101
+ # Positions array is stored in tree memo cache, so use array pool
102
+ positions = context.acquire_array
103
+ positions << start_pos # Track position after each match
104
+ break_on = nil
105
+
106
+ loop do
107
+ pos_before = source.bytepos
108
+ success, value = parslet.apply(source, context, false)
109
+
110
+ break_on = value
111
+ break unless success
112
+
113
+ occ += 1
114
+ buffer.push(value)
115
+ positions << source.bytepos
116
+
117
+ # Check max bound
118
+ break if max && occ >= max
119
+ end
120
+
121
+ # Store tree memo: cache the array of successful matches
122
+ # This allows reusing the parsed prefix on subsequent parses
123
+ if occ > 0
124
+ end_pos = positions[occ]
125
+ # Must materialize for tree memo storage
126
+ context.store_tree_memo(cache_key, start_pos, buffer.to_a[1..-1], end_pos)
127
+ end
128
+
129
+ # Check min bound
130
+ if occ < min
131
+ source.bytepos = start_pos
132
+ # Release buffer on failure
133
+ context.release_buffer(buffer)
134
+ return context.err_at(
135
+ self,
136
+ source,
137
+ error_msgs[:minrep],
138
+ start_pos,
139
+ [break_on])
140
+ end
141
+
142
+ # Check consume_all requirement
143
+ if consume_all && source.chars_left > 0
144
+ # Release buffer on failure
145
+ context.release_buffer(buffer)
146
+ return context.err(
147
+ self,
148
+ source,
149
+ error_msgs[:unconsumed],
150
+ [break_on])
151
+ end
152
+
153
+ # Phase 2.2: Return lazy result instead of array
154
+ lazy_result = Parsanol::LazyResult.new(buffer, context)
155
+ return succ(lazy_result)
156
+ end
157
+
158
+ # General repetition parsing (extracted for clarity)
159
+ def try_repetition_general(source, context, consume_all)
160
+ occ = 0
161
+ start_pos = source.bytepos
162
+
163
+ # Phase 2.2: Use buffer pool for lazy materialization
164
+ # Estimate size based on max, or default to 10
165
+ estimated_size = [max || 10, 10].min
166
+ buffer = context.acquire_buffer(size: estimated_size + 1) # +1 for tag
167
+ buffer.push(@tag) # Add tag first to maintain [:repetition, ...] structure
168
+
169
+ break_on = nil
170
+ loop do
171
+ success, value = parslet.apply(source, context, false)
172
+
173
+ break_on = value
174
+ break unless success
175
+
176
+ occ += 1
177
+ buffer.push(value)
178
+
179
+ # If max is defined and reached, return lazy result
180
+ if max && occ >= max
181
+ lazy_result = Parsanol::LazyResult.new(buffer, context)
182
+ return succ(lazy_result)
183
+ end
184
+ end
185
+
186
+ # Last attempt to match parslet was a failure, failure reason in break_on.
187
+
188
+ # Greedy matcher has produced a failure. Check if occ (which will
189
+ # contain the number of successes) is >= min.
190
+ if occ < min
191
+ # Release buffer on failure
192
+ context.release_buffer(buffer)
193
+ return context.err_at(
194
+ self,
195
+ source,
196
+ error_msgs[:minrep],
197
+ start_pos,
198
+ [break_on])
199
+ end
200
+
201
+ # consume_all is true, that means that we're inside the part of the parser
202
+ # that should consume the input completely. Repetition failing here means
203
+ # probably that we didn't.
204
+ #
205
+ # We have a special clause to create an error here because otherwise
206
+ # break_on would get thrown away. It turns out, that contains very
207
+ # interesting information in a lot of cases.
208
+ #
209
+ if consume_all && source.chars_left > 0
210
+ # Release buffer on failure
211
+ context.release_buffer(buffer)
212
+ return context.err(
213
+ self,
214
+ source,
215
+ error_msgs[:unconsumed],
216
+ [break_on])
217
+ end
218
+
219
+ # Phase 2.2: Return lazy result instead of array
220
+ lazy_result = Parsanol::LazyResult.new(buffer, context)
221
+ return succ(lazy_result)
222
+ end
223
+
224
+ precedence REPETITION
225
+ def to_s_inner(prec)
226
+ minmax = "{#{min}, #{max}}"
227
+ minmax = '?' if min == 0 && max == 1
228
+
229
+ parslet.to_s(prec) + minmax
230
+ end
231
+
232
+ # FIRST set of repetition:
233
+ # - If min == 0 (can match empty), includes EPSILON
234
+ # - Always includes FIRST of the repeated parslet
235
+ def compute_first_set
236
+ result = parslet.first_set.dup
237
+ # If repetition can match zero times, add EPSILON
238
+ result.add(Parsanol::FirstSet::EPSILON) if min == 0
239
+ result
240
+ end
241
+ end
@@ -0,0 +1,28 @@
1
+ # Starts a new scope in the parsing process. Please also see the #captures
2
+ # method.
3
+ #
4
+ class Parsanol::Atoms::Scope < Parsanol::Atoms::Base
5
+ attr_reader :block
6
+ def initialize(block)
7
+ super()
8
+
9
+ @block = block
10
+ end
11
+
12
+ def cached?
13
+ false
14
+ end
15
+
16
+ def apply(source, context, consume_all)
17
+ # Phase 55: Cache @block ivar to reduce lookup overhead
18
+ block = @block
19
+ context.scope do
20
+ parslet = block.call
21
+ return parslet.apply(source, context, consume_all)
22
+ end
23
+ end
24
+
25
+ def to_s_inner(prec)
26
+ "scope { #{block.call.to_s(prec)} }"
27
+ end
28
+ end