parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast_visitor'
4
+
5
+ module Parsanol
6
+ module Optimizers
7
+ # Optimizes repetition/quantifier patterns in the AST
8
+ # Follows visitor pattern for clean separation of concerns
9
+ #
10
+ # Transformations:
11
+ # - repeat(1,1) => unwrap (identity transformation)
12
+ # - repeat(0,1).repeat(0,1) => repeat(0,1) (idempotent)
13
+ # - repeat(n,n).repeat(m,m) => repeat(n*m,n*m) (multiply exact counts)
14
+ class QuantifierOptimizer < ASTVisitor
15
+ # Visit a repetition node and apply quantifier optimizations
16
+ # @param parslet [Parsanol::Atoms::Repetition] repetition to optimize
17
+ # @return [Parsanol::Atoms::Base] optimized parslet
18
+ def visit_repetition(parslet)
19
+ # First optimize the child
20
+ inner = visit(parslet.parslet)
21
+
22
+ # Optimization 1: repeat(1,1) is identity - unwrap it
23
+ if parslet.min == 1 && parslet.max == 1
24
+ return inner
25
+ end
26
+
27
+ # Optimization 2: Nested repetitions
28
+ if inner.is_a?(Parsanol::Atoms::Repetition)
29
+ # repeat(0,1).repeat(0,1) => repeat(0,1) (idempotent)
30
+ if parslet.min == 0 && parslet.max == 1 &&
31
+ inner.min == 0 && inner.max == 1
32
+ return inner
33
+ end
34
+
35
+ # repeat(n,n).repeat(m,m) => repeat(n*m,n*m) for exact counts
36
+ if parslet.min == parslet.max && inner.min == inner.max &&
37
+ parslet.max && inner.max
38
+ new_count = parslet.min * inner.min
39
+ return Parsanol::Atoms::Repetition.new(
40
+ inner.parslet,
41
+ new_count,
42
+ new_count,
43
+ parslet.instance_variable_get(:@tag)
44
+ )
45
+ end
46
+ end
47
+
48
+ # Return optimized repetition with simplified child
49
+ if inner.equal?(parslet.parslet)
50
+ parslet
51
+ else
52
+ Parsanol::Atoms::Repetition.new(
53
+ inner,
54
+ parslet.min,
55
+ parslet.max,
56
+ parslet.instance_variable_get(:@tag)
57
+ )
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast_visitor'
4
+
5
+ module Parsanol
6
+ module Optimizers
7
+ # Optimizes sequence patterns in the AST
8
+ # Follows visitor pattern for clean separation of concerns
9
+ #
10
+ # Transformations:
11
+ # - str('a') >> str('b') => str('ab') (merge adjacent strings)
12
+ # - (A >> B) >> C => A >> B >> C (flatten nested sequences)
13
+ # - Sequence(A) => A (unwrap single-element sequences)
14
+ class SequenceOptimizer < ASTVisitor
15
+ # Visit a sequence node and apply sequence optimizations
16
+ # @param parslet [Parsanol::Atoms::Sequence] sequence to optimize
17
+ # @return [Parsanol::Atoms::Base] optimized parslet
18
+ def visit_sequence(parslet)
19
+ # First optimize children recursively
20
+ new_parslets = parslet.parslets.map { |p| visit(p) }
21
+
22
+ # Optimization 1: Flatten nested sequences
23
+ flattened = flatten_sequences(new_parslets)
24
+
25
+ # Optimization 2: Merge adjacent string literals
26
+ merged = merge_adjacent_strings(flattened)
27
+
28
+ # Optimization 3: Unwrap single-element sequences
29
+ return merged[0] if merged.size == 1
30
+
31
+ # Return optimized sequence if changed
32
+ if merged != parslet.parslets
33
+ Parsanol::Atoms::Sequence.new(*merged)
34
+ else
35
+ parslet
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ # Flatten nested sequences into a single level
42
+ # @param parslets [Array<Parsanol::Atoms::Base>] array of parslets
43
+ # @return [Array<Parsanol::Atoms::Base>] flattened array
44
+ def flatten_sequences(parslets)
45
+ result = []
46
+ parslets.each do |p|
47
+ if p.is_a?(Parsanol::Atoms::Sequence)
48
+ result.concat(p.parslets)
49
+ else
50
+ result << p
51
+ end
52
+ end
53
+ result
54
+ end
55
+
56
+ # Merge adjacent Str atoms into single Str atoms
57
+ # @param parslets [Array<Parsanol::Atoms::Base>] array of parslets
58
+ # @return [Array<Parsanol::Atoms::Base>] array with merged strings
59
+ def merge_adjacent_strings(parslets)
60
+ return parslets if parslets.size < 2
61
+
62
+ result = []
63
+ i = 0
64
+
65
+ while i < parslets.size
66
+ current = parslets[i]
67
+
68
+ if current.is_a?(Parsanol::Atoms::Str)
69
+ # Look ahead for consecutive Str atoms using Rope for O(1) append
70
+ rope = Parsanol::Rope.new.append(current.str)
71
+ j = i + 1
72
+
73
+ while j < parslets.size && parslets[j].is_a?(Parsanol::Atoms::Str)
74
+ rope.append(parslets[j].str)
75
+ j += 1
76
+ end
77
+
78
+ # Create merged Str if we found consecutive strings
79
+ # O(n) join happens once at the end instead of O(n²) repeated concatenation
80
+ if j > i + 1
81
+ result << Parsanol::Atoms::Str.new(rope.to_s)
82
+ i = j
83
+ else
84
+ result << current
85
+ i += 1
86
+ end
87
+ else
88
+ result << current
89
+ i += 1
90
+ end
91
+ end
92
+
93
+ result
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::RubyTransform - Ruby Transform Mode (Parslet-Compatible)
4
+ #
5
+ # This is the default parsing mode that provides maximum flexibility.
6
+ # - Parsing can use Rust (if available) or pure Ruby
7
+ # - Transformation happens in Ruby using Parslet-style Transform class
8
+ #
9
+ # Usage:
10
+ # class MyParser < Parsanol::Parser
11
+ # include Parsanol::RubyTransform
12
+ # rule(:number) { match('[0-9]').repeat(1).as(:int) }
13
+ # root(:number)
14
+ # end
15
+ #
16
+ # parser = MyParser.new
17
+ # tree = parser.parse("42") # Returns generic tree
18
+ # ast = transform.apply(tree) # Transform in Ruby
19
+ #
20
+ # To use Rust backend for parsing:
21
+ # class MyParser < Parsanol::Parser
22
+ # include Parsanol::RubyTransform
23
+ # parse_backend :rust # Will raise if native extension not available
24
+ # ...
25
+ # end
26
+
27
+ module Parsanol
28
+ module RubyTransform
29
+ def self.included(base)
30
+ base.extend(ClassMethods)
31
+ end
32
+
33
+ module ClassMethods
34
+ # Get or set the parsing backend
35
+ # @param backend [Symbol] :ruby (default) or :rust
36
+ # @return [Symbol] Current backend setting
37
+ def parse_backend(backend = nil)
38
+ if backend
39
+ @parse_backend = backend
40
+ end
41
+ @parse_backend ||= :ruby
42
+ end
43
+
44
+ # Setter for parsing backend
45
+ # @param backend [Symbol] :ruby or :rust
46
+ def parse_backend=(backend)
47
+ @parse_backend = backend
48
+ end
49
+
50
+ # Configure parsing to use Rust backend
51
+ # Raises LoadError if native extension not available
52
+ def use_rust_backend!
53
+ unless Parsanol::Native.available?
54
+ raise LoadError,
55
+ "Rust backend requested but native extension not available. " \
56
+ "Run `rake compile` to build the extension."
57
+ end
58
+ @parse_backend = :rust
59
+ end
60
+
61
+ # Configure parsing to use pure Ruby (default)
62
+ def use_ruby_backend!
63
+ @parse_backend = :ruby
64
+ end
65
+ end
66
+
67
+ # Parse input and return generic tree
68
+ #
69
+ # @param input [String] The input string to parse
70
+ # @param options [Hash] Parse options
71
+ # @option options [Boolean] :consume_all (true) Consume entire input
72
+ # @return [Hash, Array, String, Parsanol::Slice] Parse tree
73
+ # @raise [Parsanol::ParseFailed] If parsing fails
74
+ def parse(input, options = {})
75
+ if self.class.parse_backend == :rust && Parsanol::Native.available?
76
+ parse_with_rust(input, options)
77
+ else
78
+ parse_with_ruby(input, options)
79
+ end
80
+ end
81
+
82
+ # Parse and apply transform in one step
83
+ #
84
+ # @param input [String] The input string to parse
85
+ # @param transform [Parsanol::Transform] Transform to apply
86
+ # @param options [Hash] Parse options
87
+ # @return [Object] Transformed result
88
+ def parse_with_transform(input, transform, options = {})
89
+ tree = parse(input, options)
90
+ transform.apply(tree)
91
+ end
92
+
93
+ private
94
+
95
+ # Parse using Rust native extension
96
+ def parse_with_rust(input, options = {})
97
+ consume_all = options.fetch(:consume_all, true)
98
+
99
+ # Use native parser with Parslet-compatible output
100
+ Parsanol::Native.parse_parslet_compatible(root, input)
101
+ end
102
+
103
+ # Parse using pure Ruby
104
+ def parse_with_ruby(input, options = {})
105
+ # Call the root parslet's parse method directly
106
+ root.parse(input, options)
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ # Parsanol::Serialized - Serialized Transform Mode (JSON Output)
6
+ #
7
+ # This mode provides cross-language compatibility through JSON serialization.
8
+ # - Parsing AND transformation happen in Rust for maximum performance
9
+ # - Output is a JSON string that can be deserialized to any format
10
+ # - REQUIRES native extension (will raise LoadError if not available)
11
+ #
12
+ # Usage:
13
+ # class MyParser < Parsanol::Parser
14
+ # include Parsanol::Serialized
15
+ # rule(:number) { match('[0-9]').repeat(1).as(:int) }
16
+ # root(:number)
17
+ # end
18
+ #
19
+ # parser = MyParser.new
20
+ # json = parser.parse_to_json("42") # Returns JSON string
21
+ # # => '{"int": "42"}'
22
+ #
23
+ # # With a deserializer class
24
+ # result = parser.parse_to_struct("42", MyDeserializer)
25
+ #
26
+ # Performance: Faster than RubyTransform because transform happens in Rust.
27
+ # Memory: Higher than ZeroCopy due to JSON serialization overhead.
28
+
29
+ module Parsanol
30
+ module Serialized
31
+ def self.included(base)
32
+ base.extend(ClassMethods)
33
+ end
34
+
35
+ module ClassMethods
36
+ # Define output schema for transformation
37
+ # This is optional but helps with type checking
38
+ #
39
+ # @param schema [Hash] Schema definition
40
+ # @example
41
+ # output_schema(
42
+ # number: { type: :integer },
43
+ # binop: { type: :object, properties: [:left, :op, :right] }
44
+ # )
45
+ def output_schema(schema = nil)
46
+ @output_schema = schema if schema
47
+ @output_schema ||= {}
48
+ end
49
+ end
50
+
51
+ # Parse input and return JSON string
52
+ #
53
+ # @param input [String] The input string to parse
54
+ # @return [String] JSON string representing the parse result
55
+ # @raise [LoadError] If native extension not available
56
+ # @raise [Parsanol::ParseFailed] If parsing fails
57
+ def parse_to_json(input)
58
+ unless Parsanol::Native.available?
59
+ raise LoadError,
60
+ "Serialized mode requires native extension for JSON serialization. " \
61
+ "Run `rake compile` or use Parsanol::RubyTransform for Ruby-only parsing."
62
+ end
63
+
64
+ grammar_json = Parsanol::Native.serialize_grammar(root)
65
+ Parsanol::Native.parse_to_json(grammar_json, input)
66
+ end
67
+
68
+ # Parse input and deserialize to a Ruby object
69
+ #
70
+ # @param input [String] The input string to parse
71
+ # @param deserializer_class [Class] Class with .from_json method
72
+ # @return [Object] Deserialized object
73
+ # @raise [LoadError] If native extension not available
74
+ # @raise [Parsanol::ParseFailed] If parsing fails
75
+ def parse_to_struct(input, deserializer_class)
76
+ json = parse_to_json(input)
77
+ deserializer_class.from_json(json)
78
+ end
79
+
80
+ # Parse input and return Ruby Hash (parsed JSON)
81
+ #
82
+ # @param input [String] The input string to parse
83
+ # @return [Hash, Array] Ruby object from JSON
84
+ # @raise [LoadError] If native extension not available
85
+ # @raise [Parsanol::ParseFailed] If parsing fails
86
+ def parse(input, options = {})
87
+ json = parse_to_json(input)
88
+ JSON.parse(json)
89
+ end
90
+
91
+ # Alias for consistency with other modes
92
+ alias parse_to_hash parse
93
+ end
94
+ end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::ZeroCopy - Zero-Copy Transform Mode (Direct FFI Object Construction)
4
+ #
5
+ # This mode provides MAXIMUM PERFORMANCE through zero-copy FFI.
6
+ # - Rust directly constructs Ruby objects via rb_class_new, rb_ivar_set
7
+ # - No serialization overhead whatsoever
8
+ # - REQUIRES native extension AND type mapping definitions
9
+ #
10
+ # Usage:
11
+ # # Define Ruby classes that mirror Rust types
12
+ # module Calculator
13
+ # class Number < Expr
14
+ # attr_reader :value
15
+ # def initialize(value); @value = value; end
16
+ # def eval = @value
17
+ # end
18
+ #
19
+ # class BinOp < Expr
20
+ # attr_reader :left, :op, :right
21
+ # def eval; ...; end
22
+ # end
23
+ # end
24
+ #
25
+ # class CalculatorParser < Parsanol::Parser
26
+ # include Parsanol::ZeroCopy
27
+ #
28
+ # rule(:number) { ... }
29
+ # root(:expression)
30
+ #
31
+ # # Type mapping (tells Rust which Ruby classes to construct)
32
+ # output_types(
33
+ # number: Calculator::Number,
34
+ # binop: Calculator::BinOp
35
+ # )
36
+ # end
37
+ #
38
+ # parser = CalculatorParser.new
39
+ # expr = parser.parse("42+8") # Returns Calculator::Number or BinOp DIRECTLY
40
+ # puts expr.eval # No transform needed!
41
+ #
42
+ # Performance: FASTEST mode (18-44x faster than pure Ruby)
43
+ # Memory: Lowest overhead (zero-copy, no serialization)
44
+
45
+ module Parsanol
46
+ module ZeroCopy
47
+ def self.included(base)
48
+ base.extend(ClassMethods)
49
+ end
50
+
51
+ module ClassMethods
52
+ # Define output type mapping for zero-copy construction
53
+ #
54
+ # This tells the Rust parser which Ruby classes to instantiate
55
+ # for each named capture in the grammar.
56
+ #
57
+ # @param types [Hash] Mapping of rule names to Ruby classes
58
+ # @example
59
+ # output_types(
60
+ # number: Calculator::Number,
61
+ # binop: Calculator::BinOp,
62
+ # expr: Calculator::Expr
63
+ # )
64
+ def output_types(types = nil)
65
+ @output_types = types if types
66
+ @output_types ||= {}
67
+ end
68
+
69
+ # Define a single output type mapping
70
+ #
71
+ # @param rule_name [Symbol, String] Name of the rule
72
+ # @param ruby_class [Class] Ruby class to instantiate
73
+ # @example
74
+ # output_type :number, Calculator::Number
75
+ def output_type(rule_name, ruby_class)
76
+ output_types[rule_name.to_sym] = ruby_class
77
+ end
78
+
79
+ # Get output types as a hash suitable for FFI
80
+ #
81
+ # @return [Hash] String keys, class names as values
82
+ def output_types_for_ffi
83
+ output_types.transform_keys(&:to_s).transform_values do |klass|
84
+ klass.is_a?(Class) ? klass.name : klass.to_s
85
+ end
86
+ end
87
+ end
88
+
89
+ # Parse input and return direct Ruby objects (no serialization)
90
+ #
91
+ # @param input [String] The input string to parse
92
+ # @param options [Hash] Parse options (ignored for zero-copy)
93
+ # @return [Object] Direct Ruby object (type depends on grammar)
94
+ # @raise [LoadError] If native extension not available
95
+ # @raise [Parsanol::ParseFailed] If parsing fails
96
+ def parse(input, options = {})
97
+ unless Parsanol::Native.available?
98
+ raise LoadError,
99
+ "ZeroCopy mode requires native extension for direct FFI object construction. " \
100
+ "Run `rake compile` to build the extension, or use " \
101
+ "Parsanol::RubyTransform for Ruby-only parsing."
102
+ end
103
+
104
+ grammar_json = Parsanol::Native.serialize_grammar(root)
105
+ type_map = self.class.output_types_for_ffi
106
+
107
+ if type_map.empty?
108
+ raise ArgumentError,
109
+ "ZeroCopy mode requires output_types to be defined. " \
110
+ "Add `output_types(number: MyNumberClass)` to your parser class."
111
+ end
112
+
113
+ Parsanol::Native.parse_to_objects(grammar_json, input, type_map)
114
+ end
115
+
116
+ # Parse with explicit type map override
117
+ #
118
+ # @param input [String] The input string to parse
119
+ # @param type_map [Hash] Override type mapping for this parse
120
+ # @return [Object] Direct Ruby object
121
+ def parse_with_types(input, type_map)
122
+ unless Parsanol::Native.available?
123
+ raise LoadError, "ZeroCopy mode requires native extension."
124
+ end
125
+
126
+ grammar_json = Parsanol::Native.serialize_grammar(root)
127
+ Parsanol::Native.parse_to_objects(grammar_json, input, type_map)
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol Transform Mode Options
4
+ #
5
+ # This module provides three transformation modes for parsing:
6
+ #
7
+ # 1. RubyTransform - Parse in Rust/Ruby, Transform in Ruby (default, most flexible)
8
+ # 2. Serialized - Parse + Transform in Rust, JSON output (requires native extension)
9
+ # 3. ZeroCopy - Direct FFI object construction (requires native extension, fastest)
10
+ #
11
+ # Usage:
12
+ # class MyParser < Parsanol::Parser
13
+ # include Parsanol::RubyTransform # or Serialized, or ZeroCopy
14
+ # rule(:number) { match('[0-9]').repeat(1).as(:int) }
15
+ # root(:number)
16
+ # end
17
+
18
+ require 'parsanol/options/ruby_transform'
19
+ require 'parsanol/options/serialized'
20
+ require 'parsanol/options/zero_copy'
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ # Parallel parsing support for batch processing multiple inputs.
5
+ # Uses rayon for linear speedup on multi-core systems.
6
+ #
7
+ # @example Parse multiple files in parallel
8
+ # grammar = MyParser.new.serialize_grammar
9
+ # inputs = Dir.glob("*.json").map { |f| File.read(f) }
10
+ #
11
+ # results = Parsanol::Parallel.parse_batch(grammar, inputs)
12
+ # results.each_with_index do |result, i|
13
+ # case result
14
+ # when Hash then puts "File #{i} parsed: #{result}"
15
+ # when Parsanol::ParseFailed then puts "File #{i} failed: #{result.message}"
16
+ # end
17
+ # end
18
+ #
19
+ module Parallel
20
+ # Configuration for parallel parsing.
21
+ #
22
+ # @example Configure with 8 threads
23
+ # config = Parsanol::Parallel::Config.new
24
+ # .with_num_threads(8)
25
+ # .with_min_chunk_size(50)
26
+ #
27
+ # results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
28
+ #
29
+ class Config
30
+ # @return [Integer, nil] Number of threads (nil = auto-detect based on CPU cores)
31
+ attr_accessor :num_threads
32
+
33
+ # @return [Integer] Minimum inputs per thread (default: 10)
34
+ attr_accessor :min_chunk_size
35
+
36
+ def initialize
37
+ @num_threads = nil # Auto-detect
38
+ @min_chunk_size = 10
39
+ end
40
+
41
+ # Set the number of threads to use.
42
+ #
43
+ # @param n [Integer] Number of threads
44
+ # @return [Config] self for chaining
45
+ def with_num_threads(n)
46
+ @num_threads = n
47
+ self
48
+ end
49
+
50
+ # Set the minimum chunk size per thread.
51
+ #
52
+ # @param size [Integer] Minimum inputs per thread
53
+ # @return [Config] self for chaining
54
+ def with_min_chunk_size(size)
55
+ @min_chunk_size = size
56
+ self
57
+ end
58
+ end
59
+
60
+ class << self
61
+ # Parse multiple inputs in parallel.
62
+ #
63
+ # When the native extension with parallel feature is available,
64
+ # this uses rayon for parallel execution. Otherwise, falls back
65
+ # to sequential parsing.
66
+ #
67
+ # @param grammar_json [String] JSON-serialized grammar
68
+ # @param inputs [Array<String>] Array of input strings to parse
69
+ # @param config [Config] Parallel configuration (optional)
70
+ # @return [Array<Object>] Array of parse results in same order as inputs
71
+ #
72
+ # @example Basic usage
73
+ # results = Parsanol::Parallel.parse_batch(grammar, inputs)
74
+ #
75
+ # @example With configuration
76
+ # config = Parsanol::Parallel::Config.new.with_num_threads(4)
77
+ # results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
78
+ #
79
+ def parse_batch(grammar_json, inputs, config: Config.new)
80
+ unless Parsanol::Native.available?
81
+ raise LoadError,
82
+ "Parallel parsing requires native extension. " \
83
+ "Run `rake compile` to build the extension."
84
+ end
85
+
86
+ # Try to use native parallel parsing
87
+ if respond_to?(:_parse_batch_parallel, true)
88
+ Parsanol::Native.parse_batch_parallel(
89
+ grammar_json,
90
+ inputs,
91
+ num_threads: config.num_threads
92
+ )
93
+ else
94
+ # Fallback to sequential if parallel not available
95
+ inputs.map { |input| Parsanol::Native.parse(grammar_json, input) }
96
+ end
97
+ end
98
+
99
+ # Parse multiple inputs in parallel with transformation.
100
+ #
101
+ # @param grammar_json [String] JSON-serialized grammar
102
+ # @param inputs [Array<String>] Array of input strings to parse
103
+ # @param transform [Parsanol::Transform] Transform to apply to each result
104
+ # @param config [Config] Parallel configuration (optional)
105
+ # @return [Array<Object>] Array of transformed results
106
+ #
107
+ def parse_batch_with_transform(grammar_json, inputs, transform, config: Config.new)
108
+ results = parse_batch(grammar_json, inputs, config: config)
109
+ results.map { |result| transform.apply(result) }
110
+ end
111
+
112
+ # Get the number of available CPU cores for parallel processing.
113
+ #
114
+ # @return [Integer] Number of available cores
115
+ def available_cores
116
+ require 'etc'
117
+ Etc.nprocessors
118
+ rescue StandardError
119
+ 1
120
+ end
121
+
122
+ # Estimate optimal number of threads for a given input size.
123
+ #
124
+ # @param input_count [Integer] Number of inputs to process
125
+ # @return [Integer] Recommended number of threads
126
+ def optimal_threads(input_count)
127
+ cores = available_cores
128
+ # Don't use more threads than inputs
129
+ [cores, input_count].min
130
+ end
131
+ end
132
+ end
133
+ end
Binary file