parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,245 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ # Grammar serializer for native parser
5
+ # Serializes Parslet atoms to JSON format expected by Rust parser
6
+ #
7
+ class GrammarSerializer
8
+ # Serialize a Parslet grammar (root atom) to JSON string
9
+ #
10
+ # @param root [Parsanol::Atoms::Base] The root atom of the grammar
11
+ # @return [String] JSON representation of the grammar
12
+ def self.serialize(root)
13
+ # Create fresh instance for each serialization
14
+ # (state is specific to each grammar)
15
+ new.serialize(root)
16
+ end
17
+
18
+ def initialize
19
+ @atoms = []
20
+ @atom_cache = {} # object_id => atom_id for deduplication
21
+ end
22
+
23
+ # Main serialization method
24
+ def serialize(root)
25
+ root_id = serialize_atom(root)
26
+
27
+ # Build JSON output directly to avoid intermediate Hash
28
+ # This is faster than creating a Hash and calling to_json
29
+ %{{"atoms":#{@atoms.to_json},"root":#{root_id}}}
30
+ end
31
+
32
+ private
33
+
34
+ # Serialize a single atom and return its atom_id
35
+ def serialize_atom(atom)
36
+ # Check cache for deduplication
37
+ cache_key = atom.object_id
38
+ return @atom_cache[cache_key] if @atom_cache.key?(cache_key)
39
+
40
+ # Entity atoms are special - they're just lazy references to other atoms
41
+ # Don't create a new atom, just resolve and return the referenced atom_id
42
+ if atom.is_a?(Parsanol::Atoms::Entity)
43
+ return serialize_entity(atom)
44
+ end
45
+
46
+ # Serialize based on atom type first (recursively)
47
+ serialized = case atom
48
+ when Parsanol::Atoms::Str
49
+ serialize_str(atom)
50
+ when Parsanol::Atoms::Re
51
+ serialize_re(atom)
52
+ when Parsanol::Atoms::Sequence
53
+ serialize_sequence(atom)
54
+ when Parsanol::Atoms::Alternative
55
+ serialize_alternative(atom)
56
+ when Parsanol::Atoms::Repetition
57
+ serialize_repetition(atom)
58
+ when Parsanol::Atoms::Named
59
+ serialize_named(atom)
60
+ when Parsanol::Atoms::Lookahead
61
+ serialize_lookahead(atom)
62
+ when Parsanol::Atoms::Capture
63
+ serialize_capture(atom)
64
+ when Parsanol::Atoms::Scope
65
+ serialize_scope(atom)
66
+ when Parsanol::Atoms::Dynamic
67
+ serialize_dynamic(atom)
68
+ else
69
+ # Fallback for unknown atom types
70
+ serialize_unknown(atom)
71
+ end
72
+
73
+ # Now reserve an atom_id and cache
74
+ atom_id = @atoms.size
75
+ @atom_cache[cache_key] = atom_id
76
+ @atoms << serialized
77
+
78
+ atom_id
79
+ end
80
+
81
+ def serialize_str(atom)
82
+ {
83
+ 'Str' => {
84
+ 'pattern' => atom.str
85
+ }
86
+ }
87
+ end
88
+
89
+ def serialize_re(atom)
90
+ # Ruby's Regexp#to_s produces "(?-mix:pattern)" format
91
+ # We need to extract just the pattern for the Rust parser
92
+ pattern = atom.match
93
+ if pattern =~ /^\(\?[-mix]*:(.+)\)$/
94
+ pattern = $1
95
+ end
96
+ {
97
+ 'Re' => {
98
+ 'pattern' => pattern
99
+ }
100
+ }
101
+ end
102
+
103
+ def serialize_sequence(atom)
104
+ atom_ids = atom.parslets.map { |p| serialize_atom(p) }
105
+ {
106
+ 'Sequence' => {
107
+ 'atoms' => atom_ids
108
+ }
109
+ }
110
+ end
111
+
112
+ def serialize_alternative(atom)
113
+ atom_ids = atom.alternatives.map { |p| serialize_atom(p) }
114
+ {
115
+ 'Alternative' => {
116
+ 'atoms' => atom_ids
117
+ }
118
+ }
119
+ end
120
+
121
+ def serialize_repetition(atom)
122
+ {
123
+ 'Repetition' => {
124
+ 'atom' => serialize_atom(atom.parslet),
125
+ 'min' => atom.min,
126
+ 'max' => atom.max
127
+ }
128
+ }
129
+ end
130
+
131
+ def serialize_named(atom)
132
+ {
133
+ 'Named' => {
134
+ 'name' => atom.name.to_s,
135
+ 'atom' => serialize_atom(atom.parslet)
136
+ }
137
+ }
138
+ end
139
+
140
+ def serialize_entity(atom)
141
+ # Entity is a lazy reference - resolve it to the actual parslet
142
+ # Cache FIRST before resolving to handle circular references
143
+ cache_key = atom.object_id
144
+
145
+ # Reserve an atom_id and cache it before resolving
146
+ # This prevents infinite recursion when a rule references itself
147
+ atom_id = @atoms.size
148
+ @atom_cache[cache_key] = atom_id
149
+
150
+ # Add a placeholder that will be replaced
151
+ @atoms << nil
152
+
153
+ parslet = atom.parslet rescue nil
154
+
155
+ if parslet
156
+ # Serialize the resolved parslet inline (don't call serialize_atom to avoid double-caching)
157
+ serialized = case parslet
158
+ when Parsanol::Atoms::Str
159
+ serialize_str(parslet)
160
+ when Parsanol::Atoms::Re
161
+ serialize_re(parslet)
162
+ when Parsanol::Atoms::Sequence
163
+ serialize_sequence(parslet)
164
+ when Parsanol::Atoms::Alternative
165
+ serialize_alternative(parslet)
166
+ when Parsanol::Atoms::Repetition
167
+ serialize_repetition(parslet)
168
+ when Parsanol::Atoms::Named
169
+ serialize_named(parslet)
170
+ when Parsanol::Atoms::Entity
171
+ # Nested entity - just reference it via serialize_atom
172
+ { 'Entity' => { 'atom' => serialize_atom(parslet) } }
173
+ when Parsanol::Atoms::Lookahead
174
+ serialize_lookahead(parslet)
175
+ else
176
+ serialize_unknown(parslet)
177
+ end
178
+
179
+ # Replace the placeholder with the serialized atom
180
+ @atoms[atom_id] = serialized
181
+ atom_id
182
+ else
183
+ # If the entity's block returns nil, create a placeholder that will fail
184
+ @atoms[atom_id] = {
185
+ 'Str' => {
186
+ 'pattern' => "\x00__UNIMPLEMENTED_ENTITY_#{atom.name}__"
187
+ }
188
+ }
189
+ atom_id
190
+ end
191
+ end
192
+
193
+ def serialize_lookahead(atom)
194
+ {
195
+ 'Lookahead' => {
196
+ 'atom' => serialize_atom(atom.bound_parslet),
197
+ 'positive' => atom.positive
198
+ }
199
+ }
200
+ end
201
+
202
+ def serialize_capture(atom)
203
+ # Capture stores matched text for later use by Dynamic.
204
+ # Native parser doesn't support cross-atom captures,
205
+ # so we serialize the inner atom but the capture is a no-op.
206
+ # Grammars using capture+dynamic will need Ruby fallback.
207
+ serialize_atom(atom.parslet)
208
+ end
209
+
210
+ def serialize_scope(atom)
211
+ # Scope creates a new capture scope.
212
+ # Native parser doesn't have scoped captures,
213
+ # so we just serialize the inner atom from the block.
214
+ inner = atom.block.call rescue nil
215
+ if inner
216
+ serialize_atom(inner)
217
+ else
218
+ serialize_unknown(atom)
219
+ end
220
+ end
221
+
222
+ def serialize_dynamic(atom)
223
+ # Dynamic evaluates a Ruby block at parse time.
224
+ # This cannot be serialized to JSON - the grammar
225
+ # requires Ruby fallback for this portion.
226
+ # We create a marker that will fail at parse time
227
+ # with a clear error message.
228
+ {
229
+ 'Str' => {
230
+ 'pattern' => "\x00__DYNAMIC_NOT_SUPPORTED__"
231
+ }
232
+ }
233
+ end
234
+
235
+ def serialize_unknown(atom)
236
+ # For unsupported atom types, create a placeholder
237
+ # This will cause a parse error at runtime
238
+ {
239
+ 'Str' => {
240
+ 'pattern' => '' # Empty pattern that will never match
241
+ }
242
+ }
243
+ end
244
+ end
245
+ end
@@ -0,0 +1,438 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module Native
5
+ # Transforms native AST format to Parslet-compatible format
6
+ #
7
+ # Native format from Rust parser:
8
+ # - Strings: "text"
9
+ # - Sequences: [":sequence", item1, item2, ...]
10
+ # - Repetitions: [":repetition", item1, item2, ...]
11
+ # - Named captures: {"name" => value}
12
+ #
13
+ # Parslet format:
14
+ # - Strings: "text" (with Parsanol::Slice for position info)
15
+ # - Sequences: merged hash {:key1 => val1, :key2 => val2, ...}
16
+ # - Repetitions: array of items (or "" if empty string-like)
17
+ # - Named wrapping Repetition: {:name => [{:name => item1}, {:name => item2}, ...]}
18
+ #
19
+ class AstTransformer
20
+ # Frozen string constants for tag comparisons (avoid allocations)
21
+ SEQUENCE_TAG = ':sequence'.freeze
22
+ REPETITION_TAG = ':repetition'.freeze
23
+ EMPTY_STRING = ''.freeze
24
+ EMPTY_ARRAY = [].freeze
25
+ EMPTY_HASH = {}.freeze
26
+
27
+ # Symbol cache to avoid repeated string-to-symbol conversions
28
+ # This is a class variable to share across all transformations
29
+ @@symbol_cache = {}
30
+
31
+ def self.transform(ast)
32
+ case ast
33
+ when Array
34
+ transform_array(ast)
35
+ when Hash
36
+ transform_hash(ast)
37
+ else
38
+ ast
39
+ end
40
+ end
41
+
42
+ # Batch transformation for multiple ASTs
43
+ # Provides better cache locality than transforming individually
44
+ def self.transform_batch(asts)
45
+ asts.map { |ast| transform(ast) }
46
+ end
47
+
48
+ # Convert string key to symbol with caching
49
+ def self.cached_symbol(key)
50
+ return key if key.is_a?(Symbol)
51
+ @@symbol_cache[key] ||= key.to_sym
52
+ end
53
+
54
+ def self.transform_array(arr)
55
+ return EMPTY_ARRAY if arr.empty? # Match Parsanol Ruby mode behavior
56
+
57
+ # Check if this is a tagged array from native parser
58
+ first = arr.first
59
+ if first.is_a?(String) && first.start_with?(':')
60
+ if first == SEQUENCE_TAG
61
+ # Optimized: transform items starting from index 1
62
+ # Avoid creating arr[1..] slice
63
+ len = arr.length
64
+ return EMPTY_ARRAY if len == 1
65
+
66
+ items = Array.new(len - 1)
67
+ i = 0
68
+ while i < len - 1
69
+ items[i] = transform(arr[i + 1])
70
+ i += 1
71
+ end
72
+ flatten_sequence(items)
73
+ elsif first == REPETITION_TAG
74
+ # Optimized: transform items starting from index 1
75
+ len = arr.length
76
+ return EMPTY_ARRAY if len == 1
77
+
78
+ items = Array.new(len - 1)
79
+ i = 0
80
+ while i < len - 1
81
+ items[i] = transform(arr[i + 1])
82
+ i += 1
83
+ end
84
+ flatten_repetition(items)
85
+ else
86
+ arr.map { |item| transform(item) }
87
+ end
88
+ else
89
+ # Untagged arrays from native parser are SEQUENCES
90
+ # Apply flatten_sequence to get Parslet-compatible output
91
+ items = arr.map { |item| transform(item) }
92
+ flatten_sequence(items)
93
+ end
94
+ end
95
+
96
+ def self.transform_hash(hash)
97
+ # Fast path: single-key hash (99.9% of cases from native parser)
98
+ # Native parser always produces single-key hashes: {"name" => value}
99
+ if hash.length == 1
100
+ return transform_single_key_hash(hash)
101
+ end
102
+
103
+ # Slow path: multi-key hash (rare, from nested structures)
104
+ transform_multi_key_hash(hash)
105
+ end
106
+
107
+ # Optimized handling for single-key hashes (the common case)
108
+ def self.transform_single_key_hash(hash)
109
+ # Extract the single key-value pair without iteration
110
+ key = hash.keys.first
111
+ value = hash[key]
112
+ sym_key = cached_symbol(key)
113
+
114
+ # Check if value is a tagged repetition from native parser
115
+ is_tagged_repetition = value.is_a?(Array) && !value.empty? &&
116
+ value.first.is_a?(String) && value.first == REPETITION_TAG
117
+
118
+ # Check RAW value for repetition pattern BEFORE transformation
119
+ # Array with items that all have the parent key
120
+ # e.g., [{x: 1}, {x: 2}] where parent key is :x
121
+ is_raw_array_repetition = value.is_a?(Array) && !value.empty? &&
122
+ value.all? { |item| item.is_a?(Hash) && item.keys.length == 1 && item.key?(key) }
123
+
124
+ # Empty array from native parser is a repetition result (not a sequence)
125
+ # Sequences produce arrays of arrays like [[], []], not empty arrays
126
+ is_empty_repetition = value.is_a?(Array) && value.empty?
127
+
128
+ # Transform the value
129
+ transformed = transform(value)
130
+
131
+ # Special handling for arrays that look like character repetitions
132
+ # (arrays of single-character strings should be joined)
133
+ if transformed.is_a?(Array) && !transformed.empty? &&
134
+ transformed.all? { |item| item.is_a?(String) && item.length == 1 }
135
+ transformed = transformed.join
136
+ end
137
+
138
+ # Check for UNTAGGED repetition pattern (native output):
139
+ # If array items all have the same key as parent, it's a repetition
140
+ is_transformed_repetition = transformed.is_a?(Array) && !transformed.empty? &&
141
+ transformed.all? { |item| item.is_a?(Hash) && item.keys.length == 1 && item.key?(sym_key) }
142
+
143
+ is_repetition = is_tagged_repetition || is_raw_array_repetition || is_transformed_repetition || is_empty_repetition
144
+
145
+ # Handle based on type
146
+ if is_repetition
147
+ transform_repetition_value(sym_key, transformed)
148
+ elsif transformed.is_a?(Hash)
149
+ { sym_key => transformed }
150
+ elsif transformed.is_a?(Array)
151
+ transform_array_value(sym_key, transformed)
152
+ else
153
+ # Simple value (string, nil, etc.) - most common case
154
+ { sym_key => transformed }
155
+ end
156
+ end
157
+
158
+ # Handle repetition values (named wrapping repetition)
159
+ def self.transform_repetition_value(sym_key, transformed)
160
+ if transformed.is_a?(Array)
161
+ # Empty array from repetition stays as empty array
162
+ if transformed.empty?
163
+ { sym_key => EMPTY_ARRAY }
164
+ # Check if items already have the same key (avoid double-wrapping)
165
+ elsif transformed.all? { |item| item.is_a?(Hash) && item.key?(sym_key) }
166
+ { sym_key => transformed }
167
+ else
168
+ # Wrap each item with the name
169
+ { sym_key => transformed.map { |item| { sym_key => item } } }
170
+ end
171
+ elsif transformed == EMPTY_STRING
172
+ { sym_key => EMPTY_ARRAY } # Empty repetition should be [], not ""
173
+ else
174
+ { sym_key => transformed }
175
+ end
176
+ end
177
+
178
+ # Handle array values (non-repetition case)
179
+ def self.transform_array_value(sym_key, transformed)
180
+ if transformed.empty?
181
+ # For empty arrays, we need to determine if this is a repetition or sequence
182
+ # Repetitions should return [], sequences should return ""
183
+ # We can't tell from the value alone, so we return "" (sequence semantics)
184
+ # The repetition detection in transform_single_key_hash will handle the other case
185
+ { sym_key => EMPTY_STRING }
186
+ elsif transformed.all? { |v| v.is_a?(Hash) && v.keys.length == 1 && v.key?(sym_key) }
187
+ # Items already have the parent key (repetition pattern) - keep as-is
188
+ { sym_key => transformed }
189
+ elsif transformed.all? { |v| v.is_a?(Hash) }
190
+ # Items are hashes with DIFFERENT keys (not the parent key)
191
+ # This is a repetition result from (separator >> item).repeat pattern
192
+ # The items already have their correct structure, DON'T wrap them
193
+ # Example: [{name: "b"}, {name: "c"}] for (str(',') >> item).repeat.as(:rest)
194
+ { sym_key => transformed }
195
+ else
196
+ { sym_key => transformed }
197
+ end
198
+ end
199
+
200
+ # Slow path: multi-key hash (rare)
201
+ def self.transform_multi_key_hash(hash)
202
+ result = {}
203
+
204
+ hash.each do |key, value|
205
+ sym_key = cached_symbol(key)
206
+
207
+ is_repetition = value.is_a?(Array) && !value.empty? &&
208
+ value.first.is_a?(String) && value.first == REPETITION_TAG
209
+
210
+ transformed = transform(value)
211
+
212
+ if is_repetition
213
+ result[sym_key] = if transformed.is_a?(Array)
214
+ if transformed.all? { |item| item.is_a?(Hash) && item.key?(sym_key) }
215
+ transformed
216
+ else
217
+ transformed.map { |item| { sym_key => item } }
218
+ end
219
+ elsif transformed == EMPTY_STRING
220
+ EMPTY_STRING
221
+ else
222
+ transformed
223
+ end
224
+ elsif transformed.is_a?(Hash)
225
+ result[sym_key] = transformed
226
+ elsif transformed.is_a?(Array)
227
+ result[sym_key] = if transformed.empty?
228
+ EMPTY_ARRAY
229
+ elsif transformed.all? { |v| v.is_a?(Hash) }
230
+ transformed.map { |item| { sym_key => item } }
231
+ else
232
+ transformed
233
+ end
234
+ else
235
+ result[sym_key] = transformed
236
+ end
237
+ end
238
+
239
+ result
240
+ end
241
+
242
+ # Flatten sequence items according to Parslet semantics:
243
+ # 1. If ALL items are hashes, return as array (this is a repetition result)
244
+ # 2. If there are named captures (hashes) among strings, return ONLY the merged hash (discard strings)
245
+ # 3. If only strings, join them (or return single string)
246
+ # 4. Return single value if only one item
247
+ #
248
+ # This matches Parslet's behavior where:
249
+ # str('SCHEMA') >> str(' ') >> match('[a-z]').repeat(1).as(:name) >> str(';')
250
+ # returns: {:name => "test"} (not ["SCHEMA ", {:name=>"test"}, ";"])
251
+ #
252
+ # But for repetitions with named captures:
253
+ # match('[a-z]').as(:x).repeat(2)
254
+ # returns: [{:x => "a"}, {:x => "b"}] (array of hashes, NOT merged!)
255
+ #
256
+ # Optimized: Single-pass with direct result building
257
+ def self.flatten_sequence(items)
258
+ return EMPTY_ARRAY if items.empty? # Match Parsanol Ruby mode
259
+
260
+ # DON'T unwrap single items - let the caller handle this
261
+ # This preserves repetition results like [{:x => 1}]
262
+ return items if items.length == 1
263
+
264
+ # Single pass: categorize items
265
+ merged_hash = {}
266
+ string_parts = []
267
+ hash_count = 0
268
+ total_items = 0
269
+ has_non_empty_array = false
270
+
271
+ items.each do |item|
272
+ case item
273
+ when Hash
274
+ merged_hash.merge!(item)
275
+ hash_count += 1
276
+ total_items += 1
277
+ when String
278
+ string_parts << item
279
+ total_items += 1
280
+ when Array
281
+ # Check if this is a non-empty array (repetition result with content)
282
+ # Parslet behavior: when a sequence contains a non-empty repetition,
283
+ # the WHOLE sequence should be kept as array, not merged.
284
+ if item.empty?
285
+ # Empty repetition - skip (sequence semantics: merge rest)
286
+ else
287
+ # Non-empty repetition - mark that we should keep as array
288
+ has_non_empty_array = true
289
+ # Still collect items for potential array result
290
+ item.each do |sub_item|
291
+ case sub_item
292
+ when Hash
293
+ hash_count += 1
294
+ when String
295
+ string_parts << sub_item
296
+ end
297
+ end
298
+ end
299
+ total_items += 1
300
+ when nil
301
+ # Skip nil values (from lookahead or optional that didn't match)
302
+ else
303
+ total_items += 1
304
+ end
305
+ end
306
+
307
+ # PARSLET SEQUENCE BEHAVIOR WITH REPETITIONS:
308
+ # If the sequence contains a non-empty repetition result (array with items),
309
+ # return as array instead of merging.
310
+ # Example: factor.as(:left) >> (op >> factor).as(:rhs).repeat
311
+ # With input "a+b" produces: [{left: {...}}, {rhs: {...}}]
312
+ # With input "a" produces: {left: {...}} (empty repetition, merge)
313
+ if has_non_empty_array
314
+ # Flatten the items: top-level hashes + array items
315
+ result = []
316
+ items.each do |item|
317
+ case item
318
+ when Hash
319
+ result << item
320
+ when Array
321
+ result.concat(item)
322
+ when String
323
+ # Skip unnamed strings when we have named captures
324
+ end
325
+ end
326
+ return result.length == 1 ? result.first : result
327
+ end
328
+
329
+ # KEY INSIGHT: If ALL items are hashes, we need to determine:
330
+ # 1. WRAPPER PATTERN: All hashes have the SAME single key, and values are HASHES
331
+ # => Merge the inner hashes under that key
332
+ # Example: [{:syntax => {:spaces => {...}}},
333
+ # {:syntax => {:schemaDecl => [...]}}]
334
+ # Result: {:syntax => {:spaces => {...}, :schemaDecl => [...]}}
335
+ #
336
+ # 2. REPETITION PATTERN: All hashes have the SAME single key, but values are SIMPLE
337
+ # => Keep as array (this is a repetition result)
338
+ # Example: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
339
+ # Result: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
340
+ #
341
+ # 3. MIXED KEYS: Hashes have DIFFERENT keys
342
+ # => Keep as array
343
+ # Example: [{:a => 1}, {:b => 2}]
344
+ # Result: [{:a => 1}, {:b => 2}]
345
+ if hash_count == total_items && hash_count > 1
346
+ # Check if all hashes have the same single key
347
+ first_item = items.first
348
+ if first_item.is_a?(Hash) && first_item.keys.length == 1
349
+ wrapper_key = first_item.keys.first
350
+
351
+ # Verify all items are hashes with the same single key
352
+ all_same_wrapper = items.all? do |item|
353
+ item.is_a?(Hash) && item.keys.length == 1 && item.keys.first == wrapper_key
354
+ end
355
+
356
+ if all_same_wrapper
357
+ # Check if values are all hashes (wrapper pattern) or not (repetition pattern)
358
+ all_values_are_hashes = items.all? do |item|
359
+ item[wrapper_key].is_a?(Hash)
360
+ end
361
+
362
+ if all_values_are_hashes
363
+ # Wrapper pattern: merge the inner hashes
364
+ merged_inner = {}
365
+ items.each do |item|
366
+ inner_value = item[wrapper_key]
367
+ merged_inner.merge!(inner_value)
368
+ end
369
+ return { wrapper_key => merged_inner }
370
+ else
371
+ # Repetition pattern: keep as array
372
+ return items
373
+ end
374
+ end
375
+ end
376
+
377
+ # MIXED KEYS: Hashes have different keys
378
+ # Parslet sequence semantics: merge into single hash
379
+ return merged_hash
380
+ end
381
+
382
+ # PARSLET SEQUENCE SEMANTICS:
383
+ # If there are named captures (hashes) mixed with other things,
384
+ # return ONLY the merged hash (discard unnamed strings)
385
+ if !merged_hash.empty?
386
+ return merged_hash
387
+ end
388
+
389
+ # No named captures - handle strings and other items
390
+ if string_parts.any?
391
+ return string_parts.length == 1 ? string_parts.first : string_parts.join
392
+ end
393
+
394
+ # Only other items (arrays, etc.)
395
+ if total_items == 0
396
+ return EMPTY_ARRAY
397
+ end
398
+
399
+ items.length == 1 ? items.first : items
400
+ end
401
+
402
+ # Parslet/Parsanol repetition semantics:
403
+ # 1. Return [] for empty repetitions
404
+ # 2. If all items are strings, join them
405
+ # 3. Otherwise return array
406
+ def self.flatten_repetition(items)
407
+ return EMPTY_ARRAY if items.empty?
408
+
409
+ # Single-pass flatten and check
410
+ flat_items = []
411
+ all_strings = true
412
+
413
+ items.each do |item|
414
+ if item.is_a?(Array)
415
+ item.each do |sub|
416
+ flat_items << sub
417
+ all_strings = false unless sub.is_a?(String)
418
+ end
419
+ else
420
+ flat_items << item
421
+ all_strings = false unless item.is_a?(String)
422
+ end
423
+ end
424
+
425
+ return EMPTY_ARRAY if flat_items.empty?
426
+
427
+ # If all strings, join them (string-like repetition)
428
+ if all_strings && flat_items.all? { |i| i.is_a?(String) }
429
+ flat_items.join
430
+ else
431
+ flat_items
432
+ end
433
+ end
434
+ end
435
+
436
+ private_constant :AstTransformer
437
+ end
438
+ end