parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Fast mode patch for Parslet - matches vanilla parslet 2.0 behavior.
4
+ #
5
+ # For grammars with many small allocations (like EXPRESS), this is faster
6
+ # because the overhead of pool management exceeds the benefit.
7
+ #
8
+ # Usage:
9
+ # require 'parslet'
10
+ # require 'parsanol/fast_mode'
11
+ # # Now all parsing uses fast mode methods
12
+ #
13
+
14
+ module Parsanol
15
+ FAST_MODE = true
16
+
17
+ module Atoms
18
+ # Fast mode Context - matches vanilla parslet 2.0 simplicity
19
+ class Context
20
+ # Override try_with_cache with vanilla-like version (no eviction, no pooling)
21
+ def try_with_cache(obj, source, consume_all)
22
+ beg = source.bytepos
23
+
24
+ # Not in cache yet? Return early.
25
+ unless (entry = @cache[beg]&.[](obj.object_id))
26
+ result = obj.try(source, self, consume_all)
27
+
28
+ if obj.cached?
29
+ (@cache[beg] ||= {})[obj.object_id] = [result, source.bytepos - beg]
30
+ end
31
+
32
+ return result
33
+ end
34
+
35
+ # Cache hit
36
+ result, advance = entry
37
+ source.bytepos = beg + advance
38
+ result
39
+ end
40
+ end
41
+
42
+ # Fast mode Sequence - direct array creation, no lazy evaluation
43
+ class Sequence
44
+ def try(source, context, consume_all)
45
+ parslets = @parslets
46
+
47
+ case parslets.size
48
+ when 1
49
+ success, value = parslets[0].apply(source, context, consume_all)
50
+ return success ? succ([:sequence, value]) : context.err(self, source, @error_msg, [value])
51
+ when 2
52
+ success, v1 = parslets[0].apply(source, context, false)
53
+ return context.err(self, source, @error_msg, [v1]) unless success
54
+ success, v2 = parslets[1].apply(source, context, consume_all)
55
+ return success ? succ([:sequence, v1, v2]) : context.err(self, source, @error_msg, [v2])
56
+ when 3
57
+ success, v1 = parslets[0].apply(source, context, false)
58
+ return context.err(self, source, @error_msg, [v1]) unless success
59
+ success, v2 = parslets[1].apply(source, context, false)
60
+ return context.err(self, source, @error_msg, [v2]) unless success
61
+ success, v3 = parslets[2].apply(source, context, consume_all)
62
+ return success ? succ([:sequence, v1, v2, v3]) : context.err(self, source, @error_msg, [v3])
63
+ else
64
+ result = [:sequence]
65
+ last_idx = parslets.size - 1
66
+ i = 0
67
+ while i <= last_idx
68
+ success, value = parslets[i].apply(source, context, consume_all && i == last_idx)
69
+ return context.err(self, source, @error_msg, [value]) unless success
70
+ result << value
71
+ i += 1
72
+ end
73
+ succ(result)
74
+ end
75
+ end
76
+ end
77
+
78
+ # Fast mode Repetition - direct array creation, no lazy evaluation
79
+ class Repetition
80
+ EMPTY_REPETITION_ARRAY = [:repetition].freeze
81
+
82
+ def try(source, context, consume_all)
83
+ parslet = @parslet
84
+ min = @min
85
+ max = @max
86
+ tag = @tag
87
+
88
+ # Fast path for .maybe
89
+ if min == 0 && max == 1
90
+ success, value = parslet.apply(source, context, false)
91
+ return succ([tag, value]) if success
92
+ return succ(tag == :repetition ? EMPTY_REPETITION_ARRAY : [tag])
93
+ end
94
+
95
+ # Fast path for exact count
96
+ if min == max && max && max <= 3
97
+ case max
98
+ when 1
99
+ success, value = parslet.apply(source, context, consume_all)
100
+ return success ? succ([tag, value]) : context.err_at(self, source, @error_msg, source.bytepos, [value])
101
+ when 2
102
+ success, v1 = parslet.apply(source, context, false)
103
+ return context.err_at(self, source, @error_msg, source.bytepos, [v1]) unless success
104
+ success, v2 = parslet.apply(source, context, consume_all)
105
+ return success ? succ([tag, v1, v2]) : context.err_at(self, source, @error_msg, source.bytepos, [v2])
106
+ when 3
107
+ success, v1 = parslet.apply(source, context, false)
108
+ return context.err_at(self, source, @error_msg, source.bytepos, [v1]) unless success
109
+ success, v2 = parslet.apply(source, context, false)
110
+ return context.err_at(self, source, @error_msg, source.bytepos, [v2]) unless success
111
+ success, v3 = parslet.apply(source, context, consume_all)
112
+ return success ? succ([tag, v1, v2, v3]) : context.err_at(self, source, @error_msg, source.bytepos, [v3])
113
+ end
114
+ end
115
+
116
+ # General case
117
+ start_pos = source.bytepos
118
+ occ = 0
119
+ result = [tag]
120
+ break_on = nil
121
+
122
+ loop do
123
+ success, value = parslet.apply(source, context, false)
124
+ break_on = value
125
+ break unless success
126
+
127
+ occ += 1
128
+ result << value
129
+ break if max && occ >= max
130
+ end
131
+
132
+ if occ < min
133
+ source.bytepos = start_pos
134
+ return context.err_at(self, source, @error_msg, start_pos, [break_on])
135
+ end
136
+
137
+ if consume_all && source.chars_left > 0
138
+ return context.err(self, source, @unconsumed_msg, [break_on])
139
+ end
140
+
141
+ succ(result)
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,75 @@
1
+ # FIRST Set Analysis for PEG Grammars
2
+ #
3
+ # FIRST sets help identify which terminals can appear at the beginning of
4
+ # a parse. This is essential for:
5
+ # 1. Automatic cut operator insertion (AC-FIRST algorithm)
6
+ # 2. Grammar analysis and optimization
7
+ # 3. Detecting ambiguous choices
8
+ #
9
+ # Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
10
+ # Grammars in Mostly Constant Space"
11
+ #
12
+ module Parsanol::FirstSet
13
+ # Sentinel value representing the empty string (ε)
14
+ EPSILON = :epsilon
15
+
16
+ # Compute the FIRST set for this parslet atom
17
+ # Returns a Set containing:
18
+ # - Terminal atoms (Str, Re) that can match first
19
+ # - EPSILON if the atom can match empty string
20
+ # - nil elements represent unknown/variable terminals (e.g., any)
21
+ #
22
+ # @return [Set] FIRST set containing terminal atoms or EPSILON
23
+ def first_set
24
+ @first_set_cache ||= compute_first_set
25
+ end
26
+
27
+ # Clear cached FIRST set (useful after grammar modifications)
28
+ def clear_first_set_cache
29
+ @first_set_cache = nil
30
+ end
31
+
32
+ protected
33
+
34
+ # Override in subclasses to compute FIRST set
35
+ # Default: conservative approximation (unknown)
36
+ def compute_first_set
37
+ Set.new([nil]) # nil = unknown terminal
38
+ end
39
+
40
+ # Class methods for FIRST set analysis
41
+ class << self
42
+ # Check if two FIRST sets are disjoint
43
+ # Two sets are disjoint if they have no common elements
44
+ # EPSILON is ignored when checking disjointness
45
+ #
46
+ # @param set1 [Set] First FIRST set
47
+ # @param set2 [Set] Second FIRST set
48
+ # @return [Boolean] true if sets are disjoint
49
+ def disjoint?(set1, set2)
50
+ # Remove EPSILON and nil from both sets for comparison
51
+ real_set1 = set1.reject { |x| x == EPSILON || x.nil? }
52
+ real_set2 = set2.reject { |x| x == EPSILON || x.nil? }
53
+
54
+ # If either set is empty (only EPSILON/nil), consider disjoint
55
+ return true if real_set1.empty? || real_set2.empty?
56
+
57
+ # Check if intersection is empty (using to_a for Opal compatibility)
58
+ (real_set1.to_a & real_set2.to_a).empty?
59
+ end
60
+
61
+ # Check if all FIRST sets in a collection are mutually disjoint
62
+ # This is critical for AC-FIRST algorithm - we can only insert
63
+ # cuts when all alternatives have non-overlapping FIRST sets
64
+ #
65
+ # @param sets [Array<Set>] Collection of FIRST sets
66
+ # @return [Boolean] true if all pairs are disjoint
67
+ def all_disjoint?(sets)
68
+ # Need at least 2 sets to check disjointness
69
+ return true if sets.length < 2
70
+
71
+ # Check all pairs
72
+ sets.combination(2).all? { |s1, s2| disjoint?(s1, s2) }
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::GrammarBuilder - Grammar Composition
4
+ #
5
+ # Build complex grammars by importing and composing smaller grammars.
6
+ # This enables reusable grammar modules.
7
+ #
8
+ # Usage:
9
+ # # Define reusable grammars
10
+ # expression_grammar = GrammarBuilder.new
11
+ # .rule("expr", str("a") | str("b"))
12
+ # .build
13
+ #
14
+ # type_grammar = GrammarBuilder.new
15
+ # .rule("type", str("int") | str("str"))
16
+ # .build
17
+ #
18
+ # # Compose into a new grammar
19
+ # combined = GrammarBuilder.new
20
+ # .import(expression_grammar, prefix: "expr")
21
+ # .import(type_grammar, prefix: "type")
22
+ # .rule("typed", seq([ref("expr:root"), str(":"), ref("type:root")]))
23
+ # .build
24
+ #
25
+ # Requires native extension for full functionality.
26
+
27
+ module Parsanol
28
+ class GrammarBuilder
29
+ # Create a new grammar builder
30
+ def initialize
31
+ @rules = {}
32
+ @imports = []
33
+ @root = nil
34
+ end
35
+
36
+ # Define a rule
37
+ #
38
+ # @param name [String, Symbol] Rule name
39
+ # @param parslet [Parsanol::Atoms::Base] Parslet atom
40
+ # @return [self] For chaining
41
+ def rule(name, parslet)
42
+ @rules[name.to_s] = parslet
43
+ self
44
+ end
45
+
46
+ # Get a rule for modification
47
+ #
48
+ # @param name [String, Symbol] Rule name
49
+ # @return [Parsanol::Atoms::Base, nil] The rule atom
50
+ def [](name)
51
+ @rules[name.to_s]
52
+ end
53
+
54
+ # Set the root rule
55
+ #
56
+ # @param name [String, Symbol] Root rule name
57
+ # @return [self] For chaining
58
+ def root(name)
59
+ @root = name.to_s
60
+ self
61
+ end
62
+
63
+ # Import another grammar with optional prefix
64
+ #
65
+ # @param grammar [GrammarBuilder, Hash] Grammar to import
66
+ # @param prefix [String, nil] Optional prefix for imported rules
67
+ # @return [self] For chaining
68
+ def import(grammar, prefix: nil)
69
+ grammar_data = case grammar
70
+ when GrammarBuilder
71
+ grammar.to_h
72
+ when Hash
73
+ grammar
74
+ else
75
+ raise ArgumentError, "Expected GrammarBuilder or Hash, got #{grammar.class}"
76
+ end
77
+
78
+ @imports << { grammar: grammar_data, prefix: prefix }
79
+ self
80
+ end
81
+
82
+ # Import with explicit rule mapping
83
+ #
84
+ # @param grammar [GrammarBuilder, Hash] Grammar to import
85
+ # @param prefix [String, nil] Optional prefix
86
+ # @param rules [Hash] Rule mapping {from_rule: to_rule}
87
+ # @return [self] For chaining
88
+ def import_with_rules(grammar, prefix: nil, rules: {})
89
+ grammar_data = case grammar
90
+ when GrammarBuilder
91
+ grammar.to_h
92
+ when Hash
93
+ grammar
94
+ else
95
+ raise ArgumentError, "Expected GrammarBuilder or Hash, got #{grammar.class}"
96
+ end
97
+
98
+ @imports << { grammar: grammar_data, prefix: prefix, rules: rules }
99
+ self
100
+ end
101
+
102
+ # Build the grammar
103
+ #
104
+ # @return [Hash] Grammar representation
105
+ def build
106
+ {
107
+ rules: @rules,
108
+ root: @root,
109
+ imports: @imports
110
+ }
111
+ end
112
+
113
+ # Convert to JSON for native parser
114
+ #
115
+ # @return [String] JSON representation
116
+ def to_json
117
+ build.to_json
118
+ end
119
+
120
+ # Get as a Hash
121
+ #
122
+ # @return [Hash] Grammar representation
123
+ def to_h
124
+ build
125
+ end
126
+
127
+ # Reference another rule in this grammar
128
+ #
129
+ # @param name [String, Symbol] Rule name
130
+ # @return [Parsanol::Atoms::Entity] Entity referencing the rule
131
+ def ref(name)
132
+ Parsanol::Atoms::Entity.new(name)
133
+ end
134
+
135
+ # Reference the root of another grammar
136
+ #
137
+ # @param grammar_name [String] Name of the grammar (for prefixed imports)
138
+ # @return [Parsanol::Atoms::Entity] Entity referencing the root
139
+ def ref_root(grammar_name = nil)
140
+ if grammar_name
141
+ ref("#{grammar_name}:root")
142
+ else
143
+ ref('root')
144
+ end
145
+ end
146
+
147
+ class << self
148
+ # Create a grammar from a block
149
+ #
150
+ # @yield [GrammarBuilder] Builder to configure
151
+ # @return [Hash] Built grammar
152
+ def build(&block)
153
+ builder = new
154
+ builder.instance_eval(&block)
155
+ builder.build
156
+ end
157
+
158
+ # Import a grammar from JSON string
159
+ #
160
+ # @param json [String] JSON representation
161
+ # @return [Hash] Grammar representation
162
+ def from_json(json)
163
+ JSON.parse(json)
164
+ end
165
+ end
166
+ end
167
+
168
+ # Module methods for DSL
169
+ module GrammarBuilderDSL
170
+ # Create a new grammar builder
171
+ #
172
+ # @return [GrammarBuilder] New builder
173
+ def grammar(&block)
174
+ GrammarBuilder.build(&block)
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,97 @@
1
+
2
+ # Paints a graphviz graph of your parser.
3
+
4
+ begin
5
+ require 'ruby-graphviz'
6
+ rescue LoadError
7
+ puts "Please install the 'ruby-graphviz' gem first."
8
+ fail
9
+ end
10
+
11
+ require 'set'
12
+ require 'parsanol/atoms/visitor'
13
+
14
+ module Parsanol
15
+ class GraphvizVisitor
16
+ def initialize g
17
+ @graph = g
18
+ @known_links = Set.new
19
+ @visited = Set.new
20
+ end
21
+
22
+ attr_reader :parent
23
+
24
+ def visit_parser(root)
25
+ recurse root, node('parser')
26
+ end
27
+ def visit_entity(name, block)
28
+ s = node(name)
29
+
30
+ downwards s
31
+
32
+ return if @visited.include?(name)
33
+ @visited << name
34
+
35
+ recurse block.call, s
36
+ end
37
+ def visit_named(name, atom)
38
+ recurse atom, parent
39
+ end
40
+ def visit_repetition(tag, min, max, atom)
41
+ recurse atom, parent
42
+ end
43
+ def visit_alternative(alternatives)
44
+ p = parent
45
+ alternatives.each do |atom|
46
+ recurse atom, p
47
+ end
48
+ end
49
+ def visit_sequence(sequence)
50
+ p = parent
51
+ sequence.each do |atom|
52
+ recurse atom, p
53
+ end
54
+ end
55
+ def visit_lookahead(positive, atom)
56
+ recurse atom, parent
57
+ end
58
+ def visit_re(regexp)
59
+ # downwards node(regexp.object_id, label: escape("re(#{regexp.inspect})"))
60
+ end
61
+ def visit_str(str)
62
+ # downwards node(str.object_id, label: escape("#{str.inspect}"))
63
+ end
64
+
65
+ def escape str
66
+ str.gsub('"', "'")
67
+ end
68
+ def node name, opts={}
69
+ @graph.add_nodes name.to_s, opts
70
+ end
71
+ def downwards child
72
+ if @parent && !@known_links.include?([@parent, child])
73
+ @graph.add_edges(@parent, child)
74
+ @known_links << [@parent, child]
75
+ end
76
+ end
77
+ def recurse node, current
78
+ @parent = current
79
+ node.accept(self)
80
+ end
81
+ end
82
+
83
+ module Graphable
84
+ def graph opts
85
+ g = GraphViz.new(:G, type: :digraph)
86
+ visitor = GraphvizVisitor.new(g)
87
+
88
+ new.accept(visitor)
89
+
90
+ g.output opts
91
+ end
92
+ end
93
+
94
+ class Parser # reopen for introducing the .graph method
95
+ extend Graphable
96
+ end
97
+ end
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::IncrementalParser - Incremental Parser for Editor Integration
4
+ #
5
+ # Parse with support for incremental edits. This is useful for editor integration
6
+ # where the input changes frequently (e.g., as the user types).
7
+ #
8
+ # Usage:
9
+ # parser = Parsanol::IncrementalParser.new(grammar, initial_text)
10
+ #
11
+ # # When text changes
12
+ # parser.apply_edit(start: 5, deleted: 3, inserted: "new")
13
+ # result = parser.reparse
14
+ #
15
+ # Requires native extension for full functionality.
16
+
17
+ module Parsanol
18
+ # Represents an edit to apply to the input
19
+ class Edit
20
+ attr_reader :start, :deleted, :inserted
21
+
22
+ def initialize(start:, deleted:, inserted: '')
23
+ @start = start
24
+ @deleted = deleted
25
+ @inserted = inserted
26
+ end
27
+
28
+ # Get the old range that was replaced
29
+ def old_range
30
+ @start...(@start + @deleted)
31
+ end
32
+
33
+ # Check if this edit affects a specific position
34
+ def affects_position?(position)
35
+ position >= @start && position < @start + @deleted + @inserted.length
36
+ end
37
+
38
+ # Get the new position after this edit
39
+ def new_position
40
+ @start + @inserted.length
41
+ end
42
+
43
+ # Apply this edit to a string
44
+ def apply(input)
45
+ input[0...@start] + @inserted + input[@start + @deleted..]
46
+ end
47
+
48
+ def to_s
49
+ "Edit(#{@start}, +#{@inserted.length}, -#{@deleted})"
50
+ end
51
+
52
+ def ==(other)
53
+ return false unless other.is_a?(Edit)
54
+ @start == other.start && @deleted == other.deleted && @inserted == other.inserted
55
+ end
56
+ end
57
+
58
+ class IncrementalParser
59
+ # Create a new incremental parser
60
+ #
61
+ # @param grammar [Parsanol::Parser, Parsanol::Atoms::Base] Grammar to use
62
+ # @param initial_input [String] Initial input string
63
+ def initialize(grammar, initial_input = '')
64
+ @grammar = grammar
65
+ @input = initial_input
66
+
67
+ if Parsanol::Native.available?
68
+ grammar_json = Parsanol::Native.serialize_grammar(grammar.root)
69
+ @native_parser = Parsanol::Native.incremental_parser_new(grammar_json, initial_input)
70
+ else
71
+ @native_parser = nil
72
+ end
73
+
74
+ @edits = []
75
+ @cached_result = nil
76
+ end
77
+
78
+ # Apply an edit to the parser
79
+ #
80
+ # @param start [Integer] Start position of edit
81
+ # @param deleted [Integer] Number of characters deleted
82
+ # @param inserted [String] Text to insert
83
+ def apply_edit(start:, deleted:, inserted: '')
84
+ edit = Edit.new(start: start, deleted: deleted, inserted: inserted)
85
+ @edits << edit
86
+
87
+ # Update cached input
88
+ @input = edit.apply(@input)
89
+
90
+ # Invalidate cached result
91
+ @cached_result = nil
92
+
93
+ if @native_parser
94
+ Parsanol::Native.incremental_parser_apply_edit(@native_parser, start, deleted, inserted)
95
+ end
96
+ end
97
+
98
+ # Convenience method to apply multiple edits
99
+ #
100
+ # @param edits [Array<Hash>] Array of {start:, deleted:, inserted:} hashes
101
+ def apply_edits(edits)
102
+ edits.each do |edit_hash|
103
+ apply_edit(**edit_hash)
104
+ end
105
+ end
106
+
107
+ # Reparse with current input (or optional new input)
108
+ #
109
+ # @param new_input [String, nil] Optional new input (replaces current)
110
+ # @return [Object] Parse result
111
+ def reparse(new_input = nil)
112
+ if new_input
113
+ @input = new_input
114
+ @edits.clear
115
+ @cached_result = nil
116
+ end
117
+
118
+ return @cached_result if @cached_result
119
+
120
+ if @native_parser
121
+ @cached_result = Parsanol::Native.incremental_parser_reparse(@native_parser, @input)
122
+ else
123
+ # Pure Ruby fallback - reparse from scratch
124
+ root = @grammar.root
125
+ @cached_result = root.parse(@input)
126
+ end
127
+
128
+ @cached_result
129
+ end
130
+
131
+ # Invalidate a range (for external changes)
132
+ #
133
+ # @param start [Integer] Start position
134
+ # @param end_pos [Integer] End position
135
+ def invalidate_range(start, end_pos)
136
+ # Clear cached result if the invalidated range might affect it
137
+ @cached_result = nil
138
+
139
+ if @native_parser
140
+ # Native implementation handles invalidation
141
+ end
142
+ end
143
+
144
+ # Get the current input
145
+ #
146
+ # @return [String] Current input
147
+ def input
148
+ @input
149
+ end
150
+
151
+ # Get all applied edits
152
+ #
153
+ # @return [Array<Edit>] Array of edits
154
+ def edits
155
+ @edits.dup
156
+ end
157
+
158
+ # Check if there are unapplied edits
159
+ #
160
+ # @return [Boolean] True if there are pending edits
161
+ def dirty?
162
+ @cached_result.nil? && !@edits.empty?
163
+ end
164
+
165
+ # Reset to initial state
166
+ #
167
+ # @param new_input [String, nil] Optional new initial input
168
+ def reset(new_input = nil)
169
+ @input = new_input || ''
170
+ @edits.clear
171
+ @cached_result = nil
172
+
173
+ if @native_parser && new_input
174
+ grammar_json = Parsanol::Native.serialize_grammar(@grammar.root)
175
+ @native_parser = Parsanol::Native.incremental_parser_new(grammar_json, @input)
176
+ end
177
+ end
178
+ end
179
+ end