parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module Native
5
+ # Type tags used in AST serialization
6
+ # These must match the tags used by the Rust parser
7
+ module Types
8
+ # AST node type tags (must match Rust parser output)
9
+ TAG_NIL = 0x00
10
+ TAG_BOOL = 0x01
11
+ TAG_INT = 0x02
12
+ TAG_FLOAT = 0x03
13
+ TAG_STRING_REF = 0x04
14
+ TAG_ARRAY_START = 0x05
15
+ TAG_ARRAY_END = 0x06
16
+ TAG_HASH_START = 0x07
17
+ TAG_HASH_END = 0x08
18
+ TAG_HASH_KEY = 0x09
19
+ TAG_INLINE_STRING = 0x0A
20
+
21
+ # Frozen string constants for transformer (avoid allocations)
22
+ SEQUENCE_TAG = ':sequence'.freeze
23
+ REPETITION_TAG = ':repetition'.freeze
24
+ EMPTY_STRING = ''.freeze
25
+ EMPTY_ARRAY = [].freeze
26
+ EMPTY_HASH = {}.freeze
27
+ end
28
+
29
+ # Symbol cache to avoid repeated string-to-symbol conversions
30
+ # This is a class variable to share across all transformations
31
+ @@symbol_cache = {}
32
+
33
+ # Convert string key to symbol with caching
34
+ # @param key [String, Symbol] The key to convert
35
+ # @return [Symbol] The symbol version of the key
36
+ def self.cached_symbol(key)
37
+ return key if key.is_a?(Symbol)
38
+ @@symbol_cache[key] ||= key.to_sym
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'digest'
5
+
6
+ # Entry point for native parsing functionality
7
+ # Requires the individual components
8
+ require 'parsanol/native/types'
9
+ require 'parsanol/native/parser'
10
+ require 'parsanol/native/transformer'
11
+ require 'parsanol/native/serializer'
12
+
13
+ module Parsanol
14
+ module Native
15
+ VERSION = '0.1.0'
16
+
17
+ class << self
18
+ # Delegate to Parser module
19
+ def available?
20
+ Parser.available?
21
+ end
22
+
23
+ def parse(grammar_json, input)
24
+ Parser.parse(grammar_json, input)
25
+ end
26
+
27
+ def parse_with_grammar(root_atom, input)
28
+ Parser.parse_with_grammar(root_atom, input)
29
+ end
30
+
31
+ def parse_parslet_compatible(root_atom, input)
32
+ Parser.parse_parslet_compatible(root_atom, input)
33
+ end
34
+
35
+ def parse_batch_inputs(root_atom, inputs)
36
+ Parser.parse_batch_inputs(root_atom, inputs)
37
+ end
38
+
39
+ def parse_batch_with_transform(root_atom, inputs)
40
+ Parser.parse_batch_with_transform(root_atom, inputs)
41
+ end
42
+
43
+ def parse_raw(root_atom, input)
44
+ Parser.parse_raw(root_atom, input)
45
+ end
46
+
47
+ def serialize_grammar(root_atom)
48
+ Parser.serialize_grammar(root_atom)
49
+ end
50
+
51
+ def clear_cache
52
+ Parser.clear_cache
53
+ end
54
+
55
+ def cache_stats
56
+ Parser.cache_stats
57
+ end
58
+
59
+ # Serialized Mode (JSON Output)
60
+ def parse_to_json(grammar_json, input)
61
+ Parser.parse_to_json(grammar_json, input)
62
+ end
63
+
64
+ # ZeroCopy Mode (Direct Ruby Objects)
65
+ def parse_to_objects(grammar_json, input, type_map = nil)
66
+ Parser.parse_to_objects(grammar_json, input, type_map)
67
+ end
68
+
69
+ def convert_slices(obj, input)
70
+ Parser.convert_slices(obj, input)
71
+ end
72
+
73
+ # Source Location Tracking
74
+ def parse_with_spans(grammar_json, input)
75
+ Parser.parse_with_spans(grammar_json, input)
76
+ end
77
+
78
+ def get_span(result, node_id)
79
+ Parser.get_span(result, node_id)
80
+ end
81
+
82
+ # Grammar Composition
83
+ def grammar_import(builder_json, grammar_json, prefix = nil)
84
+ Parser.grammar_import(builder_json, grammar_json, prefix)
85
+ end
86
+
87
+ def grammar_rule_mut(builder_json, rule_name)
88
+ Parser.grammar_rule_mut(builder_json, rule_name)
89
+ end
90
+
91
+ # Streaming Parser
92
+ def streaming_parser_new(grammar_json)
93
+ Parser.streaming_parser_new(grammar_json)
94
+ end
95
+
96
+ def streaming_parser_add_chunk(parser, chunk)
97
+ Parser.streaming_parser_add_chunk(parser, chunk)
98
+ end
99
+
100
+ def streaming_parser_parse_chunk(parser)
101
+ Parser.streaming_parser_parse_chunk(parser)
102
+ end
103
+
104
+ # Incremental Parser
105
+ def incremental_parser_new(grammar_json, initial_input)
106
+ Parser.incremental_parser_new(grammar_json, initial_input)
107
+ end
108
+
109
+ def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
110
+ Parser.incremental_parser_apply_edit(parser, start, deleted, inserted)
111
+ end
112
+
113
+ def incremental_parser_reparse(parser, new_input = nil)
114
+ Parser.incremental_parser_reparse(parser, new_input)
115
+ end
116
+
117
+ # Streaming Builder - uses native parse_with_builder directly (exposed from Rust)
118
+ # The native function is exposed directly on Parsanol::Native module
119
+
120
+ # Alias for parse_with_builder (same functionality)
121
+ def parse_with_callback(grammar_json, input, callback)
122
+ parse_with_builder(grammar_json, input, callback)
123
+ end
124
+
125
+ # Parallel Parsing - uses native _parse_batch_parallel
126
+ def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
127
+ _parse_batch_parallel(grammar_json, inputs, num_threads || 0)
128
+ end
129
+
130
+ # Security / Limits - uses native _parse_with_limits
131
+ def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
132
+ _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
133
+ end
134
+
135
+ # Debug Tools
136
+ def parse_with_trace(grammar_json, input)
137
+ Parser.parse_with_trace(grammar_json, input)
138
+ end
139
+
140
+ def grammar_to_mermaid(grammar_json)
141
+ Parser.grammar_to_mermaid(grammar_json)
142
+ end
143
+
144
+ def grammar_to_dot(grammar_json)
145
+ Parser.grammar_to_dot(grammar_json)
146
+ end
147
+
148
+ # Legacy internal methods (for backward compatibility)
149
+ def _parse_with_spans(grammar_json, input)
150
+ Parser.send(:_parse_with_spans, grammar_json, input)
151
+ end
152
+
153
+ def _get_span(result, node_id)
154
+ Parser.send(:_get_span, result, node_id)
155
+ end
156
+
157
+ def _grammar_import(builder_json, grammar_json, prefix)
158
+ Parser.send(:_grammar_import, builder_json, grammar_json, prefix)
159
+ end
160
+
161
+ def _grammar_rule_mut(builder_json, rule_name)
162
+ Parser.send(:_grammar_rule_mut, builder_json, rule_name)
163
+ end
164
+
165
+ def _streaming_parser_new(grammar_json)
166
+ Parser.send(:_streaming_parser_new, grammar_json)
167
+ end
168
+
169
+ def _streaming_parser_add_chunk(parser, chunk)
170
+ Parser.send(:_streaming_parser_add_chunk, parser, chunk)
171
+ end
172
+
173
+ def _streaming_parser_parse_chunk(parser)
174
+ Parser.send(:_streaming_parser_parse_chunk, parser)
175
+ end
176
+
177
+ def _incremental_parser_new(grammar_json, initial_input)
178
+ Parser.send(:_incremental_parser_new, grammar_json, initial_input)
179
+ end
180
+
181
+ def _incremental_parser_apply_edit(parser, start, deleted, inserted)
182
+ Parser.send(:_incremental_parser_apply_edit, parser, start, deleted, inserted)
183
+ end
184
+
185
+ def _incremental_parser_reparse(parser, new_input)
186
+ Parser.send(:_incremental_parser_reparse, parser, new_input)
187
+ end
188
+
189
+ def _parse_batch_parallel(grammar_json, inputs, num_threads)
190
+ Parser.send(:_parse_batch_parallel, grammar_json, inputs, num_threads)
191
+ end
192
+
193
+ def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
194
+ Parser.send(:_parse_with_limits, grammar_json, input, max_input_size, max_recursion_depth)
195
+ end
196
+
197
+ def _parse_with_trace(grammar_json, input)
198
+ Parser.send(:_parse_with_trace, grammar_json, input)
199
+ end
200
+
201
+ def _grammar_to_mermaid(grammar_json)
202
+ Parser.send(:_grammar_to_mermaid, grammar_json)
203
+ end
204
+
205
+ def _grammar_to_dot(grammar_json)
206
+ Parser.send(:_grammar_to_dot, grammar_json)
207
+ end
208
+ end
209
+ end
210
+ end
211
+
212
+ # Attempt to load native extension
213
+ begin
214
+ require 'parsanol/parsanol_native'
215
+ rescue LoadError
216
+ # Native extension not built yet
217
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ast_visitor'
4
+ require_relative 'optimizers/quantifier_optimizer'
5
+ require_relative 'optimizers/sequence_optimizer'
6
+ require_relative 'optimizers/choice_optimizer'
7
+ require_relative 'optimizers/lookahead_optimizer'
8
+ require_relative 'optimizers/cut_inserter'
9
+
10
+ # Grammar-level optimizations for Parslet parsers
11
+ # These optimizations transform the parser AST to reduce runtime overhead
12
+ # without changing semantics.
13
+ #
14
+ # Architecture:
15
+ # - Uses Visitor pattern for clean separation of traversal and transformation
16
+ # - Each optimizer is a separate class inheriting from ASTVisitor
17
+ # - Optimizer module provides facade methods for easy access
18
+ module Parsanol
19
+ module Optimizer
20
+ # Simplifies redundant quantifiers in a parslet tree
21
+ # Example: str('a').repeat(1, 1) => str('a')
22
+ # str('a').repeat(0, 1).repeat(0, 1) => str('a').repeat(0, 1)
23
+ #
24
+ # @param parslet [Parsanol::Atoms::Base] parslet to simplify
25
+ # @return [Parsanol::Atoms::Base] simplified parslet
26
+ def self.simplify_quantifiers(parslet)
27
+ Optimizers::QuantifierOptimizer.new.visit(parslet)
28
+ end
29
+
30
+ # Simplifies sequences by flattening and merging adjacent strings
31
+ # Example: str('a') >> str('b') => str('ab')
32
+ # (str('a') >> str('b')) >> str('c') => str('abc')
33
+ #
34
+ # @param parslet [Parsanol::Atoms::Base] parslet to simplify
35
+ # @return [Parsanol::Atoms::Base] simplified parslet
36
+ def self.simplify_sequences(parslet)
37
+ Optimizers::SequenceOptimizer.new.visit(parslet)
38
+ end
39
+
40
+ # Simplifies choice/alternative patterns
41
+ # Example: (A | B) | C => A | B | C
42
+ # A | B | A => A | B
43
+ #
44
+ # @param parslet [Parsanol::Atoms::Base] parslet to simplify
45
+ # @return [Parsanol::Atoms::Base] simplified parslet
46
+ def self.simplify_choices(parslet)
47
+ Optimizers::ChoiceOptimizer.new.visit(parslet)
48
+ end
49
+
50
+ # Simplifies lookahead patterns
51
+ # Example: !(!x) => &x (double negation elimination)
52
+ #
53
+ # @param parslet [Parsanol::Atoms::Base] parslet to simplify
54
+ # @return [Parsanol::Atoms::Base] simplified parslet
55
+ def self.simplify_lookaheads(parslet)
56
+ Optimizers::LookaheadOptimizer.new.visit(parslet)
57
+ end
58
+
59
+ # Automatically insert cut operators where safe (AC-FIRST algorithm)
60
+ # Inserts cuts after deterministic prefixes when alternatives have disjoint FIRST sets
61
+ # This enables O(1) space complexity by allowing aggressive cache eviction
62
+ #
63
+ # Example: str('if') >> x | str('while') >> y
64
+ # => str('if').cut >> x | str('while').cut >> y
65
+ #
66
+ # @param parslet [Parsanol::Atoms::Base] parslet to optimize
67
+ # @return [Parsanol::Atoms::Base] optimized parslet with cuts inserted
68
+ def self.insert_cuts(parslet)
69
+ Optimizers::CutInserter.new.optimize(parslet)
70
+ end
71
+
72
+ # Apply all optimizations in recommended order
73
+ # This is a convenience method that applies all optimizer passes
74
+ #
75
+ # @param parslet [Parsanol::Atoms::Base] parslet to optimize
76
+ # @return [Parsanol::Atoms::Base] fully optimized parslet
77
+ def self.optimize_all(parslet)
78
+ result = simplify_quantifiers(parslet)
79
+ result = simplify_sequences(result)
80
+ result = simplify_choices(result)
81
+ result = simplify_lookaheads(result)
82
+ result = insert_cuts(result)
83
+ result
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast_visitor'
4
+
5
+ module Parsanol
6
+ module Optimizers
7
+ # Optimizes alternative/choice patterns in the AST
8
+ # Follows visitor pattern for clean separation of concerns
9
+ #
10
+ # Transformations:
11
+ # - (A | B) | C => A | B | C (flatten nested alternatives)
12
+ # - A | B | A => A | B (remove duplicates)
13
+ # - Alternative(A) => A (unwrap single-element alternatives)
14
+ class ChoiceOptimizer < ASTVisitor
15
+ # Visit an alternative node and apply choice optimizations
16
+ # @param parslet [Parsanol::Atoms::Alternative] alternative to optimize
17
+ # @return [Parsanol::Atoms::Base] optimized parslet
18
+ def visit_alternative(parslet)
19
+ # First optimize children recursively
20
+ new_alternatives = parslet.alternatives.map { |p| visit(p) }
21
+
22
+ # Optimization 1: Flatten nested alternatives
23
+ flattened = flatten_alternatives(new_alternatives)
24
+
25
+ # Optimization 2: Remove duplicate alternatives
26
+ deduplicated = deduplicate_alternatives(flattened)
27
+
28
+ # Optimization 3: Unwrap single-element alternatives
29
+ return deduplicated[0] if deduplicated.size == 1
30
+
31
+ # Return optimized alternative if changed
32
+ if deduplicated != parslet.alternatives
33
+ Parsanol::Atoms::Alternative.new(*deduplicated)
34
+ else
35
+ parslet
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ # Flatten nested alternatives into a single level
42
+ # @param alternatives [Array<Parsanol::Atoms::Base>] array of alternatives
43
+ # @return [Array<Parsanol::Atoms::Base>] flattened array
44
+ def flatten_alternatives(alternatives)
45
+ result = []
46
+ alternatives.each do |alt|
47
+ if alt.is_a?(Parsanol::Atoms::Alternative)
48
+ result.concat(alt.alternatives)
49
+ else
50
+ result << alt
51
+ end
52
+ end
53
+ result
54
+ end
55
+
56
+ # Remove duplicate alternatives using structural equality
57
+ # @param alternatives [Array<Parsanol::Atoms::Base>] array of alternatives
58
+ # @return [Array<Parsanol::Atoms::Base>] deduplicated array
59
+ def deduplicate_alternatives(alternatives)
60
+ return alternatives if alternatives.size < 2
61
+
62
+ # Use to_s as proxy for structural equality
63
+ seen = {}
64
+ result = []
65
+
66
+ alternatives.each do |alt|
67
+ key = alt.to_s
68
+ unless seen[key]
69
+ seen[key] = true
70
+ result << alt
71
+ end
72
+ end
73
+
74
+ result
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,175 @@
1
+ # Automatic Cut Insertion (AC-FIRST Algorithm)
2
+ #
3
+ # This optimizer implements the AC-FIRST algorithm from Mizushima et al. (2010)
4
+ # to automatically insert cut operators when alternatives have disjoint FIRST sets.
5
+ #
6
+ # When all alternatives in a choice have non-overlapping FIRST sets, we can safely
7
+ # insert a cut after the deterministic prefix, since backtracking will never be
8
+ # needed.
9
+ #
10
+ # Example:
11
+ # str('if') >> condition >> then_clause |
12
+ # str('while') >> condition >> body |
13
+ # str('print') >> expression
14
+ #
15
+ # Becomes:
16
+ # str('if').cut >> condition >> then_clause |
17
+ # str('while').cut >> condition >> body |
18
+ # str('print').cut >> expression
19
+ #
20
+ # Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
21
+ # Grammars in Mostly Constant Space"
22
+ #
23
+ class Parsanol::Optimizers::CutInserter
24
+ # Optimize a parslet by inserting cuts where safe
25
+ # Recursively traverses the grammar AST
26
+ #
27
+ # @param parslet [Parsanol::Atoms::Base] The parslet to optimize
28
+ # @return [Parsanol::Atoms::Base] Optimized parslet with cuts inserted
29
+ def optimize(parslet)
30
+ case parslet
31
+ when Parsanol::Atoms::Alternative
32
+ optimize_alternative(parslet)
33
+ when Parsanol::Atoms::Sequence
34
+ optimize_sequence(parslet)
35
+ when Parsanol::Atoms::Repetition
36
+ optimize_repetition(parslet)
37
+ when Parsanol::Atoms::Named
38
+ optimize_named(parslet)
39
+ else
40
+ # Return atom unchanged (Str, Re, Lookahead, etc.)
41
+ parslet
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ # Optimize an Alternative atom by inserting cuts when all alternatives
48
+ # have disjoint FIRST sets
49
+ def optimize_alternative(alt)
50
+ alternatives = alt.alternatives
51
+ first_sets = alternatives.map(&:first_set)
52
+
53
+ # Only optimize if all FIRST sets are disjoint
54
+ unless Parsanol::FirstSet.all_disjoint?(first_sets)
55
+ # Not safe to insert cuts - return alternatives with recursive optimization
56
+ optimized = alternatives.map { |a| optimize(a) }
57
+ return Parsanol::Atoms::Alternative.new(*optimized)
58
+ end
59
+
60
+ # All FIRST sets are disjoint - safe to insert cuts!
61
+ # Insert cuts after deterministic prefixes
62
+ optimized = alternatives.map do |alternative|
63
+ insert_cut_if_safe(alternative)
64
+ end
65
+
66
+ Parsanol::Atoms::Alternative.new(*optimized)
67
+ end
68
+
69
+ # Optimize a Sequence atom by recursively optimizing its elements
70
+ def optimize_sequence(seq)
71
+ optimized_parslets = seq.parslets.map { |p| optimize(p) }
72
+ Parsanol::Atoms::Sequence.new(*optimized_parslets)
73
+ end
74
+
75
+ # Optimize a Repetition atom by recursively optimizing its parslet
76
+ def optimize_repetition(rep)
77
+ optimized_parslet = optimize(rep.parslet)
78
+ # Create new repetition with same min/max
79
+ # Note: We use default tag since it's not exposed as a reader
80
+ Parsanol::Atoms::Repetition.new(
81
+ optimized_parslet,
82
+ rep.min,
83
+ rep.max
84
+ )
85
+ end
86
+
87
+ # Optimize a Named atom by recursively optimizing its parslet
88
+ def optimize_named(named)
89
+ optimized_parslet = optimize(named.parslet)
90
+ optimized_parslet.as(named.name)
91
+ end
92
+
93
+ # Insert a cut after the deterministic prefix if safe
94
+ # For sequences: find longest prefix without EPSILON
95
+ # For other atoms: cut the whole thing if it doesn't include EPSILON
96
+ def insert_cut_if_safe(parslet)
97
+ # For sequences, find the longest safe prefix
98
+ if parslet.is_a?(Parsanol::Atoms::Sequence)
99
+ prefix_parslets = find_deterministic_prefix(parslet)
100
+ if prefix_parslets && !prefix_parslets.empty?
101
+ return build_cut_sequence(parslet, prefix_parslets)
102
+ end
103
+ end
104
+
105
+ # For other atoms, cut the whole thing if safe
106
+ if safe_to_cut?(parslet)
107
+ return parslet.cut
108
+ end
109
+
110
+ # Not safe to cut - recursively optimize and return
111
+ optimize(parslet)
112
+ end
113
+
114
+ # Find the longest deterministic prefix of a sequence
115
+ # A deterministic prefix doesn't include EPSILON in its FIRST set
116
+ #
117
+ # @param sequence [Parsanol::Atoms::Sequence] The sequence to analyze
118
+ # @return [Array<Parsanol::Atoms::Base>] Prefix parslets, or nil if none
119
+ def find_deterministic_prefix(sequence)
120
+ parslets = sequence.parslets
121
+ prefix_length = 0
122
+
123
+ # Find longest prefix where no element can match empty
124
+ parslets.each do |p|
125
+ break if p.first_set.include?(Parsanol::FirstSet::EPSILON)
126
+ prefix_length += 1
127
+ end
128
+
129
+ prefix_length > 0 ? parslets[0...prefix_length] : nil
130
+ end
131
+
132
+ # Check if it's safe to cut after this parslet
133
+ # Safe if the parslet doesn't have EPSILON in its FIRST set
134
+ # (i.e., it always consumes input)
135
+ def safe_to_cut?(parslet)
136
+ first = parslet.first_set
137
+ # Don't cut if EPSILON is in FIRST set (might not consume)
138
+ # Also don't cut if FIRST set contains only nil (unknown)
139
+ return false if first.include?(Parsanol::FirstSet::EPSILON)
140
+ return false if first.all?(&:nil?)
141
+ true
142
+ end
143
+
144
+ # Build a new sequence with a cut after the prefix
145
+ #
146
+ # @param sequence [Parsanol::Atoms::Sequence] Original sequence
147
+ # @param prefix_parslets [Array] Parslets forming the deterministic prefix
148
+ # @return [Parsanol::Atoms::Base] New sequence with cut inserted
149
+ def build_cut_sequence(sequence, prefix_parslets)
150
+ # Recursively optimize prefix parslets
151
+ optimized_prefix = prefix_parslets.map { |p| optimize(p) }
152
+
153
+ # Build prefix (single parslet or sequence)
154
+ prefix = if optimized_prefix.length == 1
155
+ optimized_prefix.first
156
+ else
157
+ Parsanol::Atoms::Sequence.new(*optimized_prefix)
158
+ end
159
+
160
+ # Get remaining parslets after prefix
161
+ remaining = sequence.parslets[prefix_parslets.length..-1]
162
+
163
+ # Recursively optimize remaining parslets
164
+ optimized_remaining = remaining.map { |p| optimize(p) }
165
+
166
+ # Build final sequence with cut
167
+ if optimized_remaining.empty?
168
+ # Prefix is the entire sequence
169
+ prefix.cut
170
+ else
171
+ # Prefix + cut + remaining
172
+ Parsanol::Atoms::Sequence.new(prefix.cut, *optimized_remaining)
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast_visitor'
4
+
5
+ module Parsanol
6
+ module Optimizers
7
+ # Optimizes lookahead patterns in the AST
8
+ # Follows visitor pattern for clean separation of concerns
9
+ #
10
+ # Transformations:
11
+ # - !(!x) => &x (double negation elimination)
12
+ # - &(&x) => &x (positive lookahead is idempotent)
13
+ # - !(&x) => !x (negative of positive)
14
+ # - &(!x) => !x (positive of negative)
15
+ class LookaheadOptimizer < ASTVisitor
16
+ # Visit a lookahead node and apply lookahead optimizations
17
+ # @param parslet [Parsanol::Atoms::Lookahead] lookahead to optimize
18
+ # @return [Parsanol::Atoms::Base] optimized parslet
19
+ def visit_lookahead(parslet)
20
+ # First optimize the child
21
+ inner = visit(parslet.bound_parslet)
22
+
23
+ # If inner is also a lookahead, simplify nested lookaheads
24
+ if inner.is_a?(Parsanol::Atoms::Lookahead)
25
+ outer_positive = parslet.positive
26
+ inner_positive = inner.positive
27
+
28
+ # !(!x) => &x (double negation)
29
+ if !outer_positive && !inner_positive
30
+ return Parsanol::Atoms::Lookahead.new(inner.bound_parslet, true)
31
+ end
32
+
33
+ # &(&x) => &x (idempotent)
34
+ if outer_positive && inner_positive
35
+ return inner
36
+ end
37
+
38
+ # !(&x) => !x (negative of positive)
39
+ if !outer_positive && inner_positive
40
+ return Parsanol::Atoms::Lookahead.new(inner.bound_parslet, false)
41
+ end
42
+
43
+ # &(!x) => !x (positive of negative)
44
+ if outer_positive && !inner_positive
45
+ return inner
46
+ end
47
+ end
48
+
49
+ # Return lookahead with optimized child
50
+ if inner.equal?(parslet.bound_parslet)
51
+ parslet
52
+ else
53
+ Parsanol::Atoms::Lookahead.new(inner, parslet.positive)
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end