parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,630 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module Parsanol
6
+ module Native
7
+ # Core parsing functionality using Rust native extension
8
+ #
9
+ # Provides three parsing modes:
10
+ # - :ruby - Parse and transform to Parslet-compatible format
11
+ # - :json - Parse and return JSON-serialized AST
12
+ # - :slice - Parse and return raw native format (fastest)
13
+ #
14
+ module Parser
15
+ # Two-level grammar cache (module-level for proper initialization)
16
+ GRAMMAR_HASH_CACHE = {} # object_id => hash_key
17
+ GRAMMAR_CACHE = {} # hash_key => grammar_json
18
+
19
+ class << self
20
+ # Cached availability check
21
+ @cached_available = nil
22
+
23
+ # Check if native extension is available
24
+ def available?
25
+ return @cached_available unless @cached_available.nil?
26
+ @cached_available = begin
27
+ require 'parsanol/parsanol_native'
28
+ Parsanol::Native.is_available
29
+ rescue LoadError
30
+ false
31
+ end
32
+ end
33
+
34
+ # Parse using native engine
35
+ # @param grammar_json [String] JSON-serialized grammar
36
+ # @param input [String] Input string to parse
37
+ # @return Ruby AST from parsing
38
+ def parse(grammar_json, input)
39
+ unless available?
40
+ raise LoadError, 'Native parser not available. Run `rake compile` to build.'
41
+ end
42
+
43
+ # Call native parse_batch (returns flat u64 array)
44
+ flat = Parsanol::Native.parse_batch(grammar_json, input)
45
+ # Decode flat array to Ruby AST
46
+ decode_flat(flat, input)
47
+ end
48
+
49
+ # Parse a grammar with automatic serialization and caching
50
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
51
+ # @param input [String] Input string to parse
52
+ # @return Ruby AST from parsing
53
+ def parse_with_grammar(root_atom, input)
54
+ # Extract root atom if a Parser is passed
55
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
56
+ grammar_json = serialize_grammar(root_atom)
57
+ parse(grammar_json, input)
58
+ end
59
+
60
+ # Parse and transform to Parslet-compatible format
61
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
62
+ # @param input [String] Input string to parse
63
+ # @return Ruby AST in Parslet-compatible format
64
+ def parse_parslet_compatible(root_atom, input)
65
+ # Extract root atom if a Parser is passed
66
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
67
+ raw_ast = parse_with_grammar(root_atom, input)
68
+ AstTransformer.transform(raw_ast)
69
+ end
70
+
71
+ # Parse multiple inputs with the same grammar (more efficient)
72
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
73
+ # @param inputs [Array<String>] Array of input strings to parse
74
+ # @return [Array] Array of raw Ruby ASTs from parsing
75
+ def parse_batch_inputs(root_atom, inputs)
76
+ # Extract root atom if a Parser is passed
77
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
78
+ grammar_json = serialize_grammar(root_atom)
79
+ inputs.map { |input| parse(grammar_json, input) }
80
+ end
81
+
82
+ # Parse multiple inputs with transformation
83
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
84
+ # @param inputs [Array<String>] Array of input strings to parse
85
+ # @return [Array] Array of transformed Ruby ASTs
86
+ def parse_batch_with_transform(root_atom, inputs)
87
+ # Extract root atom if a Parser is passed
88
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
89
+ grammar_json = serialize_grammar(root_atom)
90
+ # First parse all inputs, then batch transform
91
+ # This provides better cache locality
92
+ raw_asts = inputs.map { |input| parse(grammar_json, input) }
93
+ AstTransformer.transform_batch(raw_asts)
94
+ end
95
+
96
+ # Parse without transformation (faster for raw AST access)
97
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
98
+ # @param input [String] Input string to parse
99
+ # @return Raw Ruby AST from parsing (native format)
100
+ def parse_raw(root_atom, input)
101
+ # Extract root atom if a Parser is passed
102
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
103
+ parse_with_grammar(root_atom, input)
104
+ end
105
+
106
+ # Serialize a grammar to JSON, with two-level caching
107
+ # Level 1: object_id => hash_key (avoids grammar traversal)
108
+ # Level 2: hash_key => grammar_json (avoids serialization)
109
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
110
+ # @return [String] JSON string
111
+ def serialize_grammar(root_atom)
112
+ # Level 1: Check if we've already computed the hash for this object
113
+ obj_id = root_atom.object_id
114
+ cache_key = GRAMMAR_HASH_CACHE[obj_id]
115
+
116
+ if cache_key
117
+ # Fast path: already computed hash, check grammar cache
118
+ GRAMMAR_CACHE[cache_key] ||= GrammarSerializer.serialize(root_atom)
119
+ else
120
+ # Slow path: compute structural hash
121
+ cache_key = grammar_structure_hash(root_atom)
122
+ GRAMMAR_HASH_CACHE[obj_id] = cache_key
123
+ GRAMMAR_CACHE[cache_key] ||= GrammarSerializer.serialize(root_atom)
124
+ end
125
+ end
126
+
127
+ # Clear grammar caches (call if grammar changes)
128
+ def clear_cache
129
+ GRAMMAR_HASH_CACHE.clear
130
+ GRAMMAR_CACHE.clear
131
+ end
132
+
133
+ # Get cache statistics
134
+ def cache_stats
135
+ {
136
+ hash_cache_size: GRAMMAR_HASH_CACHE.size,
137
+ grammar_cache_size: GRAMMAR_CACHE.size,
138
+ grammar_keys: GRAMMAR_CACHE.keys
139
+ }
140
+ end
141
+
142
+ # ===== Serialized Mode (JSON Output) =====
143
+
144
+ # Parse input and return JSON string
145
+ # Uses native parsing and serializes the result to JSON
146
+ #
147
+ # @param grammar_json [String] JSON-serialized grammar
148
+ # @param input [String] Input string to parse
149
+ # @return [String] JSON string representing the result
150
+ def parse_to_json(grammar_json, input)
151
+ unless available?
152
+ raise LoadError,
153
+ "Serialized mode requires native extension. " \
154
+ "Run `rake compile` to build the extension."
155
+ end
156
+
157
+ # Parse using native engine and convert result to JSON
158
+ result = parse(grammar_json, input)
159
+ result.to_json
160
+ end
161
+
162
+ # Parse and return direct Ruby objects via FFI
163
+ # Uses ZeroCopy mode - Rust constructs Ruby objects directly via magnus FFI
164
+ # This bypasses the u64 serialization step for maximum performance.
165
+ #
166
+ # Slice information is preserved: InputRef nodes from Rust are returned
167
+ # directly as Parsanol::Slice objects (no intermediate hash conversion needed).
168
+ #
169
+ # @param grammar_json [String] JSON-serialized grammar
170
+ # @param input [String] Input string to parse
171
+ # @param type_map [Hash] Mapping of rule names to Ruby classes (not used in this mode)
172
+ # @return [Object] Direct Ruby object (type depends on grammar)
173
+ def parse_to_objects(grammar_json, input, type_map = nil)
174
+ unless available?
175
+ raise LoadError,
176
+ "ZeroCopy mode requires native extension. " \
177
+ "Run `rake compile` to build the extension."
178
+ end
179
+
180
+ # Call Rust function that returns Slice objects directly
181
+ # No need to convert - they are already Parsanol::Slice objects
182
+ Parsanol::Native.parse_to_ruby_objects(grammar_json, input)
183
+ end
184
+
185
+ # Recursively convert slice hashes to Parsanol::Slice objects
186
+ # Rust returns { "_slice" => true, "str" => "...", "offset" => N, "length" => N }
187
+ # for InputRef nodes, which we convert to Slice objects preserving position info.
188
+ #
189
+ # @param obj [Object] The object to convert (may be Hash, Array, or leaf value)
190
+ # @param input [String] The original input string (for Slice source reference)
191
+ # @return [Object] The converted object with Slice objects in place of slice hashes
192
+ def convert_slices(obj, input)
193
+ case obj
194
+ when Hash
195
+ # Check if this is a slice marker from Rust
196
+ if obj["_slice"] == true
197
+ Parsanol::Slice.new(obj["offset"], obj["str"])
198
+ else
199
+ # Recursively convert hash values
200
+ obj.transform_values { |v| convert_slices(v, input) }
201
+ end
202
+ when Array
203
+ # Recursively convert array elements
204
+ obj.map { |item| convert_slices(item, input) }
205
+ else
206
+ # Leaf values (strings, integers, etc.) are returned as-is
207
+ obj
208
+ end
209
+ end
210
+
211
+ # ===== Source Location Tracking =====
212
+
213
+ # Parse with source location tracking
214
+ # Returns both the AST and a hash of spans
215
+ #
216
+ # @param grammar_json [String] JSON-serialized grammar
217
+ # @param input [String] Input string to parse
218
+ # @return [Array<(Object, Hash)>] Tuple of [parsed_result, spans_hash]
219
+ def parse_with_spans(grammar_json, input)
220
+ unless available?
221
+ raise LoadError,
222
+ "Source location tracking requires native extension. " \
223
+ "Run `rake compile` to build the extension."
224
+ end
225
+
226
+ _parse_with_spans(grammar_json, input)
227
+ end
228
+
229
+ # Get span for a specific node
230
+ #
231
+ # @param result [Object] Parse result from parse_with_spans
232
+ # @param node_id [Integer] Node identifier
233
+ # @return [Hash] Span information {start: {offset, line, column}, end: {...}}
234
+ def get_span(result, node_id)
235
+ unless available?
236
+ raise LoadError, "Source location tracking requires native extension."
237
+ end
238
+
239
+ _get_span(result, node_id)
240
+ end
241
+
242
+ # ===== Grammar Composition =====
243
+
244
+ # Import another grammar with optional prefix
245
+ #
246
+ # @param builder_json [String] GrammarBuilder JSON
247
+ # @param grammar_json [String] Grammar to import
248
+ # @param prefix [String, nil] Optional prefix for imported rules
249
+ # @return [String] Updated GrammarBuilder JSON
250
+ def grammar_import(builder_json, grammar_json, prefix = nil)
251
+ unless available?
252
+ raise LoadError, "Grammar composition requires native extension."
253
+ end
254
+
255
+ _grammar_import(builder_json, grammar_json, prefix)
256
+ end
257
+
258
+ # Get mutable reference to a rule
259
+ #
260
+ # @param builder_json [String] GrammarBuilder JSON
261
+ # @param rule_name [String] Name of the rule to modify
262
+ # @return [String] Updated GrammarBuilder JSON
263
+ def grammar_rule_mut(builder_json, rule_name)
264
+ unless available?
265
+ raise LoadError, "Grammar composition requires native extension."
266
+ end
267
+
268
+ _grammar_rule_mut(builder_json, rule_name)
269
+ end
270
+
271
+ # ===== Streaming Parser =====
272
+
273
+ # Create a new streaming parser
274
+ #
275
+ # @param grammar_json [String] JSON-serialized grammar
276
+ # @return [Object] Streaming parser instance
277
+ def streaming_parser_new(grammar_json)
278
+ unless available?
279
+ raise LoadError, "Streaming parser requires native extension."
280
+ end
281
+
282
+ _streaming_parser_new(grammar_json)
283
+ end
284
+
285
+ # Add a chunk to the streaming parser
286
+ #
287
+ # @param parser [Object] Streaming parser instance
288
+ # @param chunk [String] Input chunk to add
289
+ # @return [Boolean] True if more chunks needed, false if ready
290
+ def streaming_parser_add_chunk(parser, chunk)
291
+ unless available?
292
+ raise LoadError, "Streaming parser requires native extension."
293
+ end
294
+
295
+ _streaming_parser_add_chunk(parser, chunk)
296
+ end
297
+
298
+ # Parse what we have so far
299
+ #
300
+ # @param parser [Object] Streaming parser instance
301
+ # @return [Object, nil] Parsed result or nil if need more data
302
+ def streaming_parser_parse_chunk(parser)
303
+ unless available?
304
+ raise LoadError, "Streaming parser requires native extension."
305
+ end
306
+
307
+ _streaming_parser_parse_chunk(parser)
308
+ end
309
+
310
+ # ===== Incremental Parser =====
311
+
312
+ # Create a new incremental parser
313
+ #
314
+ # @param grammar_json [String] JSON-serialized grammar
315
+ # @param initial_input [String] Initial input string
316
+ # @return [Object] Incremental parser instance
317
+ def incremental_parser_new(grammar_json, initial_input)
318
+ unless available?
319
+ raise LoadError, "Incremental parser requires native extension."
320
+ end
321
+
322
+ _incremental_parser_new(grammar_json, initial_input)
323
+ end
324
+
325
+ # Apply an edit to the incremental parser
326
+ #
327
+ # @param parser [Object] Incremental parser instance
328
+ # @param start [Integer] Start position of edit
329
+ # @param deleted [Integer] Number of characters deleted
330
+ # @param inserted [String] Text to insert
331
+ # @return [Object] Updated parser state
332
+ def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
333
+ unless available?
334
+ raise LoadError, "Incremental parser requires native extension."
335
+ end
336
+
337
+ _incremental_parser_apply_edit(parser, start, deleted, inserted)
338
+ end
339
+
340
+ # Reparse with changes
341
+ #
342
+ # @param parser [Object] Incremental parser instance
343
+ # @param new_input [String, nil] Optional new input (if not using apply_edit)
344
+ # @return [Object] Parse result
345
+ def incremental_parser_reparse(parser, new_input = nil)
346
+ unless available?
347
+ raise LoadError, "Incremental parser requires native extension."
348
+ end
349
+
350
+ _incremental_parser_reparse(parser, new_input)
351
+ end
352
+
353
+ # ===== Streaming Builder =====
354
+
355
+ # Parse with a streaming builder for maximum performance.
356
+ # The builder receives callbacks as parsing progresses, eliminating
357
+ # intermediate AST construction.
358
+ #
359
+ # @param grammar_json [String] JSON-serialized grammar
360
+ # @param input [String] Input string to parse
361
+ # @param builder [Object] Object including BuilderCallbacks module
362
+ # @return [Object] Result of builder.finish
363
+ def parse_with_builder(grammar_json, input, builder)
364
+ unless available?
365
+ raise LoadError,
366
+ "Streaming builder requires native extension. " \
367
+ "Run `rake compile` to build the extension."
368
+ end
369
+
370
+ _parse_with_builder(grammar_json, input, builder)
371
+ end
372
+
373
+ # ===== Parallel Parsing =====
374
+
375
+ # Parse multiple inputs in parallel using rayon.
376
+ # Provides linear speedup on multi-core systems.
377
+ #
378
+ # @param grammar_json [String] JSON-serialized grammar
379
+ # @param inputs [Array<String>] Array of input strings to parse
380
+ # @param num_threads [Integer, nil] Number of threads (nil = auto-detect)
381
+ # @return [Array<Object>] Array of parse results in same order as inputs
382
+ def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
383
+ unless available?
384
+ raise LoadError,
385
+ "Parallel parsing requires native extension. " \
386
+ "Run `rake compile` to build the extension."
387
+ end
388
+
389
+ _parse_batch_parallel(grammar_json, inputs, num_threads)
390
+ end
391
+
392
+ # ===== Security / Limits =====
393
+
394
+ # Parse with custom limits for untrusted input.
395
+ #
396
+ # @param grammar_json [String] JSON-serialized grammar
397
+ # @param input [String] Input string to parse
398
+ # @param max_input_size [Integer] Maximum input size in bytes (default: 100MB)
399
+ # @param max_recursion_depth [Integer] Maximum recursion depth (default: 1000)
400
+ # @return [Object] Parse result
401
+ def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
402
+ unless available?
403
+ raise LoadError,
404
+ "Security limits require native extension. " \
405
+ "Run `rake compile` to build the extension."
406
+ end
407
+
408
+ _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
409
+ end
410
+
411
+ # ===== Debug Tools =====
412
+
413
+ # Parse with tracing enabled for debugging.
414
+ #
415
+ # @param grammar_json [String] JSON-serialized grammar
416
+ # @param input [String] Input string to parse
417
+ # @return [Array<(Object, Array)>] Tuple of [parse_result, trace_events]
418
+ def parse_with_trace(grammar_json, input)
419
+ unless available?
420
+ raise LoadError,
421
+ "Debug tracing requires native extension. " \
422
+ "Run `rake compile` to build the extension."
423
+ end
424
+
425
+ _parse_with_trace(grammar_json, input)
426
+ end
427
+
428
+ # Generate Mermaid diagram for a grammar.
429
+ #
430
+ # @param grammar_json [String] JSON-serialized grammar
431
+ # @return [String] Mermaid diagram source
432
+ def grammar_to_mermaid(grammar_json)
433
+ unless available?
434
+ raise LoadError,
435
+ "Grammar visualization requires native extension. " \
436
+ "Run `rake compile` to build the extension."
437
+ end
438
+
439
+ _grammar_to_mermaid(grammar_json)
440
+ end
441
+
442
+ # Generate GraphViz DOT diagram for a grammar.
443
+ #
444
+ # @param grammar_json [String] JSON-serialized grammar
445
+ # @return [String] GraphViz DOT source
446
+ def grammar_to_dot(grammar_json)
447
+ unless available?
448
+ raise LoadError,
449
+ "Grammar visualization requires native extension. " \
450
+ "Run `rake compile` to build the extension."
451
+ end
452
+
453
+ _grammar_to_dot(grammar_json)
454
+ end
455
+
456
+ private
457
+
458
+ def _incremental_parser_reparse(parser, new_input)
459
+ raise NotImplementedError, "Native extension method not available"
460
+ end
461
+
462
+ def _parse_with_builder(grammar_json, input, builder)
463
+ # Call native Rust function directly - parse_with_builder is exposed
464
+ # from the native extension as a Ruby function
465
+ Parsanol::Native.parse_with_builder(grammar_json, input, builder)
466
+ end
467
+
468
+ def _parse_batch_parallel(grammar_json, inputs, num_threads)
469
+ raise NotImplementedError, "Native extension method not available"
470
+ end
471
+
472
+ def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
473
+ raise NotImplementedError, "Native extension method not available"
474
+ end
475
+
476
+ def _parse_with_trace(grammar_json, input)
477
+ raise NotImplementedError, "Native extension method not available"
478
+ end
479
+
480
+ def _grammar_to_mermaid(grammar_json)
481
+ raise NotImplementedError, "Native extension method not available"
482
+ end
483
+
484
+ def _grammar_to_dot(grammar_json)
485
+ raise NotImplementedError, "Native extension method not available"
486
+ end
487
+
488
+ # Decode flat u64 array to Ruby AST
489
+ # Tags:
490
+ # 0x00 = nil
491
+ # 0x01 = bool
492
+ # 0x02 = int
493
+ # 0x03 = float
494
+ # 0x04 = string_ref (offset, length)
495
+ # 0x05 = array_start
496
+ # 0x06 = array_end
497
+ # 0x07 = hash_start
498
+ # 0x08 = hash_end
499
+ # 0x09 = hash_key (tag, len, key_chunks..., value)
500
+ def decode_flat(flat, input)
501
+ stack = []
502
+ i = 0
503
+
504
+ while i < flat.length
505
+ tag = flat[i]
506
+
507
+ case tag
508
+ when 0x00 # nil
509
+ stack << nil
510
+ i += 1
511
+ when 0x01 # bool
512
+ stack << (flat[i + 1] != 0)
513
+ i += 2
514
+ when 0x02 # int
515
+ stack << flat[i + 1]
516
+ i += 2
517
+ when 0x03 # float
518
+ # Decode IEEE 754 float from bits
519
+ bits = flat[i + 1]
520
+ float = [bits].pack('Q').unpack1('D')
521
+ stack << float
522
+ i += 2
523
+ when 0x04 # string_ref (from input)
524
+ offset = flat[i + 1]
525
+ length = flat[i + 2]
526
+ stack << input.byteslice(offset, length)
527
+ i += 3
528
+ when 0x0A # inline_string (interned string from arena)
529
+ # Format: tag, len, u64 chunks of string bytes
530
+ len = flat[i + 1]
531
+ i += 2
532
+
533
+ # Read string bytes from u64 chunks
534
+ chunks = (len + 7) / 8
535
+ bytes = []
536
+ chunks.times do |j|
537
+ chunk = flat[i + j]
538
+ 8.times do |k|
539
+ break if bytes.length >= len
540
+ bytes << ((chunk >> (k * 8)) & 0xff)
541
+ end
542
+ end
543
+ i += chunks
544
+
545
+ stack << bytes.pack('C*').force_encoding('UTF-8')
546
+ when 0x05 # array_start
547
+ stack << :array_marker
548
+ i += 1
549
+ when 0x06 # array_end
550
+ items = []
551
+ items.unshift(stack.pop) until stack.last == :array_marker
552
+ stack.pop # Remove marker
553
+ stack << items
554
+ i += 1
555
+ when 0x07 # hash_start
556
+ stack << :hash_marker
557
+ i += 1
558
+ when 0x08 # hash_end
559
+ pairs = []
560
+ while stack.last != :hash_marker
561
+ value = stack.pop
562
+ key = stack.pop
563
+ pairs.unshift([key, value])
564
+ end
565
+ stack.pop # Remove marker
566
+ stack << pairs.to_h
567
+ i += 1
568
+ when 0x09 # hash_key
569
+ # Format: tag, len, key_chunks..., then value
570
+ len = flat[i + 1]
571
+ i += 2 # Skip tag and len
572
+
573
+ # Read key bytes from u64 chunks
574
+ chunks = (len + 7) / 8
575
+ key_bytes = []
576
+ chunks.times do |j|
577
+ chunk = flat[i + j]
578
+ 8.times do |k|
579
+ break if key_bytes.length >= len
580
+ key_bytes << ((chunk >> (k * 8)) & 0xff)
581
+ end
582
+ end
583
+ i += chunks
584
+
585
+ key = key_bytes.pack('C*').force_encoding('UTF-8')
586
+ stack << key
587
+ else
588
+ raise "Unknown tag: #{tag} at index #{i}"
589
+ end
590
+ end
591
+
592
+ stack.first
593
+ end
594
+
595
+ # Compute structural hash of a grammar atom
596
+ # This returns the same hash for grammars with the same structure
597
+ # regardless of whether they are different object instances
598
+ def grammar_structure_hash(atom)
599
+ structure = atom_structure(atom)
600
+ Digest::MD5.hexdigest(structure.to_s)
601
+ end
602
+
603
+ # Recursively build structure representation for hashing
604
+ def atom_structure(atom)
605
+ case atom
606
+ when ::Parsanol::Atoms::Str
607
+ [:str, atom.str]
608
+ when ::Parsanol::Atoms::Re
609
+ [:re, atom.match]
610
+ when ::Parsanol::Atoms::Sequence
611
+ [:seq, atom.parslets.map { |p| atom_structure(p) }]
612
+ when ::Parsanol::Atoms::Alternative
613
+ [:alt, atom.alternatives.map { |p| atom_structure(p) }]
614
+ when ::Parsanol::Atoms::Repetition
615
+ [:rep, atom.min, atom.max, atom_structure(atom.parslet)]
616
+ when ::Parsanol::Atoms::Named
617
+ [:named, atom.name.to_s, atom_structure(atom.parslet)]
618
+ when ::Parsanol::Atoms::Lookahead
619
+ [:lookahead, atom.positive, atom_structure(atom.bound_parslet)]
620
+ when ::Parsanol::Atoms::Entity
621
+ # Entity is a lazy reference - use its name for hashing
622
+ [:entity, atom.name.to_s]
623
+ else
624
+ [:unknown, atom.class.name]
625
+ end
626
+ end
627
+ end
628
+ end
629
+ end
630
+ end