parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,399 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parslet Compatibility Specs
4
+ # These specs are adapted from Parslet's test suite to verify 100% API compatibility.
5
+ #
6
+ # Original source: https://github.com/kschiess/parslet
7
+
8
+ require 'spec_helper'
9
+ require 'parsanol/parslet'
10
+
11
+ RSpec.describe 'Parslet API Compatibility' do
12
+ include Parsanol::Parslet
13
+
14
+ # =============================================================================
15
+ # ATOM SPECS - Parser Combinators
16
+ # =============================================================================
17
+
18
+ describe 'str() atom' do
19
+ it 'matches literal strings' do
20
+ parser = str('hello')
21
+ expect(parser.parse('hello')).to eq('hello')
22
+ end
23
+
24
+ it 'fails on non-matching input' do
25
+ parser = str('hello')
26
+ expect { parser.parse('world') }.to raise_error(Parsanol::ParseFailed)
27
+ end
28
+
29
+ it 'fails on partial match' do
30
+ parser = str('hello')
31
+ expect { parser.parse('hell') }.to raise_error(Parsanol::ParseFailed)
32
+ end
33
+ end
34
+
35
+ describe 'match() atom' do
36
+ it 'matches character classes' do
37
+ parser = match('[a-z]')
38
+ expect(parser.parse('x')).to eq('x')
39
+ end
40
+
41
+ it 'matches digits' do
42
+ parser = match('[0-9]')
43
+ expect(parser.parse('5')).to eq('5')
44
+ end
45
+
46
+ it 'fails on non-matching character' do
47
+ parser = match('[a-z]')
48
+ expect { parser.parse('5') }.to raise_error(Parsanol::ParseFailed)
49
+ end
50
+ end
51
+
52
+ describe 'any atom' do
53
+ it 'matches any single character' do
54
+ parser = any
55
+ expect(parser.parse('x')).to eq('x')
56
+ expect(parser.parse('5')).to eq('5')
57
+ expect(parser.parse(' ')).to eq(' ')
58
+ end
59
+
60
+ it 'fails on empty input' do
61
+ parser = any
62
+ expect { parser.parse('') }.to raise_error(Parsanol::ParseFailed)
63
+ end
64
+ end
65
+
66
+ # =============================================================================
67
+ # COMBINATOR SPECS
68
+ # =============================================================================
69
+
70
+ describe 'sequence (>>) combinator' do
71
+ it 'matches sequences in order' do
72
+ parser = str('a') >> str('b')
73
+ expect(parser.parse('ab')).to eq('ab')
74
+ end
75
+
76
+ it 'returns merged hash for named captures' do
77
+ parser = str('a').as(:first) >> str('b').as(:second)
78
+ result = parser.parse('ab')
79
+ expect(result).to eq({ first: 'a', second: 'b' })
80
+ end
81
+
82
+ it 'discards unnamed matches when named captures present' do
83
+ # This is the KEY Parslet semantic!
84
+ parser = str('SCHEMA ') >> match('[a-z]').repeat(1).as(:name) >> str(';')
85
+ result = parser.parse('SCHEMA test;')
86
+ expect(result).to eq({ name: 'test' })
87
+ end
88
+
89
+ it 'joins consecutive unnamed strings' do
90
+ parser = str('a') >> str('b') >> str('c')
91
+ expect(parser.parse('abc')).to eq('abc')
92
+ end
93
+ end
94
+
95
+ describe 'alternative (|) combinator' do
96
+ it 'tries alternatives in order' do
97
+ parser = str('a') | str('b')
98
+ expect(parser.parse('a')).to eq('a')
99
+ expect(parser.parse('b')).to eq('b')
100
+ end
101
+
102
+ it 'fails if no alternative matches' do
103
+ parser = str('a') | str('b')
104
+ expect { parser.parse('c') }.to raise_error(Parsanol::ParseFailed)
105
+ end
106
+ end
107
+
108
+ describe 'repetition (.repeat)' do
109
+ it 'matches zero or more times' do
110
+ parser = match('[a-z]').repeat(0)
111
+ expect(parser.parse('')).to eq('')
112
+ expect(parser.parse('abc')).to eq('abc')
113
+ end
114
+
115
+ it 'matches one or more times' do
116
+ parser = match('[a-z]').repeat(1)
117
+ expect(parser.parse('abc')).to eq('abc')
118
+ expect { parser.parse('') }.to raise_error(Parsanol::ParseFailed)
119
+ end
120
+
121
+ it 'respects max boundary' do
122
+ # repeat(0, 2) matches at most 2 characters
123
+ # We must only parse 2 characters, not 3
124
+ parser = match('[a-z]').repeat(0, 2)
125
+ expect(parser.parse('ab')).to eq('ab') # Parse 'ab', not 'abc'
126
+ end
127
+
128
+ it 'produces array of named captures when name comes before repeat' do
129
+ # key difference: .as(:x).repeat(1) vs .repeat(1).as(:x)
130
+ # .as(:x).repeat(1) produces [{x: 'a'}, {x: 'b'}, {x: 'c'}]
131
+ parser = match('[a-z]').as(:letter).repeat(1)
132
+ result = parser.parse('abc')
133
+ expect(result).to be_an(Array)
134
+ expect(result.length).to eq(3)
135
+ expect(result.first).to eq({ letter: 'a' })
136
+ end
137
+
138
+ it 'produces single hash when repeat comes before name' do
139
+ # .repeat(1).as(:x) produces {x: 'abc'}
140
+ parser = match('[a-z]').repeat(1).as(:letters)
141
+ result = parser.parse('abc')
142
+ expect(result).to eq({ letters: 'abc' })
143
+ end
144
+ end
145
+
146
+ describe '.maybe (optional)' do
147
+ it 'matches zero or one time' do
148
+ parser = str('a').maybe
149
+ expect(parser.parse('')).to eq('')
150
+ expect(parser.parse('a')).to eq('a')
151
+ end
152
+ end
153
+
154
+ describe '.as (named capture)' do
155
+ it 'captures match with name' do
156
+ # match('[a-z]') only matches ONE character
157
+ # Use .repeat(1) to match multiple characters
158
+ parser = match('[a-z]').repeat(1).as(:word)
159
+ expect(parser.parse('hello')).to eq({ word: 'hello' })
160
+ end
161
+
162
+ it 'captures sequences' do
163
+ parser = (str('a') >> str('b')).as(:pair)
164
+ expect(parser.parse('ab')).to eq({ pair: 'ab' })
165
+ end
166
+ end
167
+
168
+ # =============================================================================
169
+ # PARSER CLASS SPECS
170
+ # =============================================================================
171
+
172
+ describe 'Parser class' do
173
+ let(:parser_class) do
174
+ Class.new(Parsanol::Parslet::Parser) do
175
+ include Parsanol::Parslet
176
+
177
+ rule(:digit) { match('[0-9]') }
178
+ rule(:number) { digit.repeat(1).as(:num) }
179
+ rule(:letter) { match('[a-z]') }
180
+ rule(:word) { letter.repeat(1).as(:word) }
181
+ rule(:expression) { number >> str(' ') >> word }
182
+ root(:expression)
183
+ end
184
+ end
185
+
186
+ it 'parses using root rule' do
187
+ parser = parser_class.new
188
+ result = parser.parse('123 hello')
189
+ expect(result).to eq({ num: '123', word: 'hello' })
190
+ end
191
+
192
+ it 'raises ParseFailed on invalid input' do
193
+ parser = parser_class.new
194
+ expect { parser.parse('abc') }.to raise_error(Parsanol::ParseFailed)
195
+ end
196
+ end
197
+
198
+ # =============================================================================
199
+ # TRANSFORM SPECS
200
+ # =============================================================================
201
+
202
+ describe 'Transform class' do
203
+ let(:transform_class) do
204
+ Class.new(Parsanol::Parslet::Transform) do
205
+ rule(num: simple(:n)) { Integer(n) }
206
+ rule(word: simple(:w)) { w.to_s.upcase }
207
+ rule(num: simple(:n), word: simple(:w)) { { number: Integer(n), word: w.to_s.upcase } }
208
+ end
209
+ end
210
+
211
+ it 'transforms simple patterns' do
212
+ transform = transform_class.new
213
+ expect(transform.apply({ num: '42' })).to eq(42)
214
+ expect(transform.apply({ word: 'hello' })).to eq('HELLO')
215
+ end
216
+
217
+ it 'transforms complex patterns' do
218
+ transform = transform_class.new
219
+ result = transform.apply({ num: '123', word: 'world' })
220
+ expect(result).to eq({ number: 123, word: 'WORLD' })
221
+ end
222
+ end
223
+
224
+ # =============================================================================
225
+ # NATIVE PARSER COMPATIBILITY SPECS
226
+ # =============================================================================
227
+
228
+ describe 'Native parser compatibility', if: defined?(Parsanol::Native) && Parsanol::Native.available? do
229
+ describe 'sequence flattening' do
230
+ it 'produces Parslet-compatible AST for SCHEMA example' do
231
+ # This is the exact test case from TODO.parslet-compat-fix.md
232
+ parser = str('SCHEMA ') >> match('[a-z]').repeat(1).as(:name) >> str(';')
233
+
234
+ input = 'SCHEMA test;'
235
+ ruby_ast = parser.parse(input)
236
+ native_ast = Parsanol::Native.parse_parslet_compatible(parser, input)
237
+
238
+ # Both should produce same structure (ignoring Slice position info)
239
+ expect(ruby_ast.keys).to eq(native_ast.keys)
240
+ expect(ruby_ast[:name]).to eq(native_ast[:name])
241
+ end
242
+
243
+ it 'handles nested sequences correctly' do
244
+ parser = str('(') >>
245
+ match('[a-z]').as(:first) >>
246
+ str(',') >>
247
+ match('[a-z]').as(:second) >>
248
+ str(')')
249
+
250
+ input = '(a,b)'
251
+ ruby_ast = parser.parse(input)
252
+ native_ast = Parsanol::Native.parse_parslet_compatible(parser, input)
253
+
254
+ expect(native_ast).to eq({ first: 'a', second: 'b' })
255
+ expect(native_ast.keys).to eq(ruby_ast.keys)
256
+ end
257
+
258
+ it 'returns single value for single named capture' do
259
+ # Use .repeat(1).as() pattern for matching multiple characters
260
+ parser = match('[a-z]').repeat(1).as(:word)
261
+ input = 'hello'
262
+
263
+ ruby_ast = parser.parse(input)
264
+ native_ast = Parsanol::Native.parse_parslet_compatible(parser, input)
265
+
266
+ expect(native_ast).to eq({ word: 'hello' })
267
+ expect(native_ast.keys).to eq(ruby_ast.keys)
268
+ end
269
+
270
+ it 'handles repetitions with named captures (name before repeat)' do
271
+ # .as(:x).repeat(1) produces array of hashes
272
+ parser = match('[a-z]').as(:letter).repeat(1)
273
+ input = 'abc'
274
+
275
+ ruby_ast = parser.parse(input)
276
+ native_ast = Parsanol::Native.parse_parslet_compatible(parser, input)
277
+
278
+ # Ruby produces array of hashes with Slice
279
+ expect(ruby_ast).to be_an(Array)
280
+ expect(ruby_ast.length).to eq(3)
281
+ expect(ruby_ast.first.keys).to eq([:letter])
282
+
283
+ # Native should produce same structure (array of hashes)
284
+ expect(native_ast).to be_an(Array)
285
+ expect(native_ast.length).to eq(3)
286
+ expect(native_ast.first.keys).to eq([:letter])
287
+ expect(native_ast.first[:letter]).to eq('a')
288
+ end
289
+
290
+ it 'handles repetitions with named captures (repeat before name)' do
291
+ # .repeat(1).as(:x) produces single hash with joined string
292
+ parser = match('[a-z]').repeat(1).as(:letters)
293
+ input = 'abc'
294
+
295
+ ruby_ast = parser.parse(input)
296
+ native_ast = Parsanol::Native.parse_parslet_compatible(parser, input)
297
+
298
+ expect(native_ast).to eq({ letters: 'abc' })
299
+ expect(native_ast).to eq(ruby_ast)
300
+ end
301
+
302
+ it 'handles nested wrapper pattern (EXPRESS-like syntax)' do
303
+ # This tests the wrapper pattern detection for sequences where items
304
+ # have different inner keys under the same wrapper key.
305
+ #
306
+ # NOTE: When there are duplicate keys in a sequence, Parslet KEEPS THE LAST ONE
307
+ # (with a warning). So the result is {:wrapper => {:second => "..."}}, not merged.
308
+ #
309
+ # The wrapper pattern detection is important for Expressir-style grammars where
310
+ # the native parser might produce:
311
+ # [{:syntax => {:spaces => {...}}}, {:syntax => {:schemaDecl => [...]}}]
312
+ # But this should NOT be merged because the values are HASHES with DIFFERENT keys.
313
+ #
314
+ # For now, we test that the native parser produces the same result as Ruby.
315
+
316
+ parser = (
317
+ match('[a-z]').repeat(1).as(:first).as(:wrapper) >>
318
+ str(' ') >>
319
+ match('[0-9]').repeat(1).as(:second).as(:wrapper)
320
+ )
321
+
322
+ input = 'abc 123'
323
+ ruby_ast = parser.parse(input)
324
+ native_ast = Parsanol::Native.parse_parslet_compatible(parser, input)
325
+
326
+ # Both should produce the same result (Ruby overwrites duplicates)
327
+ expect(native_ast).to be_a(Hash)
328
+ expect(native_ast.keys).to eq([:wrapper])
329
+ expect(native_ast[:wrapper]).to be_a(Hash)
330
+ expect(native_ast[:wrapper].keys).to eq([:second])
331
+
332
+ # Native should match Ruby
333
+ expect(native_ast.keys).to eq(ruby_ast.keys)
334
+ end
335
+
336
+ it 'distinguishes wrapper pattern from repetition pattern' do
337
+ # Repetition pattern: .as(:x).repeat(2) should produce array
338
+ # Wrapper pattern: sequence of items with same single key should merge
339
+ #
340
+ # This test ensures we don't incorrectly merge repetition results
341
+
342
+ # Repetition: should produce array of hashes
343
+ repetition_parser = match('[a-z]').as(:letter).repeat(2)
344
+ repetition_result = Parsanol::Native.parse_parslet_compatible(repetition_parser, 'ab')
345
+ expect(repetition_result).to be_an(Array)
346
+ expect(repetition_result.length).to eq(2)
347
+ expect(repetition_result).to eq([{ letter: 'a' }, { letter: 'b' }])
348
+
349
+ # Wrapper: should merge into single hash
350
+ wrapper_parser = (
351
+ match('[a-z]').as(:char).as(:group) >>
352
+ match('[0-9]').as(:digit).as(:group)
353
+ )
354
+ wrapper_result = Parsanol::Native.parse_parslet_compatible(wrapper_parser, 'a5')
355
+ expect(wrapper_result).to be_a(Hash)
356
+ expect(wrapper_result.keys).to eq([:group])
357
+ expect(wrapper_result[:group]).to eq({ char: 'a', digit: '5' })
358
+ end
359
+
360
+ it 'handles repetition with separator pattern correctly' do
361
+ # This is the common pattern: X (separator X)*
362
+ # Example: item (',' item)* used in parameter lists, argument lists, etc.
363
+ #
364
+ # The key test is that items in the repetition should NOT be wrapped
365
+ # with the parent key. They should keep their own keys.
366
+ #
367
+ # Bug history: Previously, (separator >> item).repeat.as(:rest) would
368
+ # incorrectly produce: {rest: [{rest: {name: "b"}}, {rest: {name: "c"}}]}
369
+ # instead of the correct: {rest: [{name: "b"}, {name: "c"}]}
370
+
371
+ # Test Case 1: item.as(:first) >> (separator >> item).repeat.as(:rest)
372
+ parser = Class.new(Parsanol::Parser) do
373
+ include Parsanol
374
+
375
+ rule(:item) { match('[a-z]').as(:name) }
376
+ rule(:separator) { str(',') }
377
+ rule(:list) { item.as(:first) >> (separator >> item).repeat.as(:rest) }
378
+
379
+ root(:list)
380
+ end.new
381
+
382
+ result = Parsanol::Native.parse_parslet_compatible(parser.root, 'a,b,c')
383
+
384
+ expect(result).to be_a(Hash)
385
+ expect(result[:first]).to eq({ name: 'a' })
386
+ expect(result[:rest]).to be_an(Array)
387
+ expect(result[:rest].length).to eq(2)
388
+
389
+ # CRITICAL: Items should have :name key, NOT :rest key
390
+ expect(result[:rest][0]).to eq({ name: 'b' })
391
+ expect(result[:rest][1]).to eq({ name: 'c' })
392
+
393
+ # Verify same result as Ruby parser
394
+ ruby_result = parser.parse('a,b,c')
395
+ expect(result).to eq(ruby_result)
396
+ end
397
+ end
398
+ end
399
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Imported from Parslet test suite
4
+ # Original: https://github.com/kschiess/parslet/blob/main/spec/atom_spec.rb
5
+ #
6
+ # These tests verify that Parsanol::Parslet behaves identically to Parslet
7
+ # for the core atom operations.
8
+
9
+ require_relative 'spec_helper'
10
+
11
+ RSpec.describe 'Parslet Atoms' do
12
+ # Get the appropriate module based on environment
13
+ let(:parslet) do
14
+ if ENV['PARSANOL_BACKEND'] == 'parslet'
15
+ Parslet
16
+ else
17
+ Parsanol::Parslet
18
+ end
19
+ end
20
+
21
+ describe 'str() atom' do
22
+ it 'matches a literal string' do
23
+ parser = parslet.str('hello')
24
+ expect(parser.parse('hello')).to eq('hello')
25
+ end
26
+
27
+ it 'fails on non-matching input' do
28
+ parser = parslet.str('hello')
29
+ expect { parser.parse('world') }.to raise_error(parslet::ParseFailed)
30
+ end
31
+
32
+ it 'fails on partial match' do
33
+ parser = parslet.str('hello')
34
+ expect { parser.parse('hell') }.to raise_error(parslet::ParseFailed)
35
+ end
36
+
37
+ it 'fails on extra input' do
38
+ parser = parslet.str('hello')
39
+ expect { parser.parse('hello world') }.to raise_error(parslet::ParseFailed)
40
+ end
41
+
42
+ it 'matches empty string' do
43
+ parser = parslet.str('')
44
+ expect(parser.parse('')).to eq('')
45
+ end
46
+ end
47
+
48
+ describe 'match() atom' do
49
+ it 'matches character classes' do
50
+ parser = parslet.match('[a-z]')
51
+ expect(parser.parse('x')).to eq('x')
52
+ end
53
+
54
+ it 'matches digits' do
55
+ parser = parslet.match('[0-9]')
56
+ expect(parser.parse('5')).to eq('5')
57
+ end
58
+
59
+ it 'fails on non-matching character' do
60
+ parser = parslet.match('[a-z]')
61
+ expect { parser.parse('5') }.to raise_error(parslet::ParseFailed)
62
+ end
63
+
64
+ it 'matches only one character' do
65
+ parser = parslet.match('[a-z]')
66
+ expect { parser.parse('abc') }.to raise_error(parslet::ParseFailed)
67
+ end
68
+
69
+ it 'matches multiple character classes' do
70
+ parser = parslet.match('[a-zA-Z]')
71
+ expect(parser.parse('X')).to eq('X')
72
+ end
73
+ end
74
+
75
+ describe 'any atom' do
76
+ it 'matches any single character' do
77
+ parser = parslet.any
78
+ expect(parser.parse('x')).to eq('x')
79
+ expect(parser.parse('5')).to eq('5')
80
+ expect(parser.parse(' ')).to eq(' ')
81
+ end
82
+
83
+ it 'fails on empty input' do
84
+ parser = parslet.any
85
+ expect { parser.parse('') }.to raise_error(parslet::ParseFailed)
86
+ end
87
+
88
+ it 'matches only one character' do
89
+ parser = parslet.any
90
+ expect { parser.parse('ab') }.to raise_error(parslet::ParseFailed)
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Imported from Parslet test suite
4
+ # Original: https://github.com/kschiess/parslet/blob/main/spec/combinator_spec.rb
5
+ #
6
+ # These tests verify that Parsanol::Parslet behaves identically to Parslet
7
+ # for the core combinator operations.
8
+
9
+ require_relative 'spec_helper'
10
+
11
+ RSpec.describe 'Parslet Combinators' do
12
+ let(:parslet) do
13
+ ENV['PARSANOL_BACKEND'] == 'parslet' ? Parslet : Parsanol::Parslet
14
+ end
15
+
16
+ describe 'sequence (>>) combinator' do
17
+ it 'matches sequences in order' do
18
+ parser = parslet.str('a') >> parslet.str('b')
19
+ expect(parser.parse('ab')).to eq('ab')
20
+ end
21
+
22
+ it 'fails on first part failure' do
23
+ parser = parslet.str('a') >> parslet.str('b')
24
+ expect { parser.parse('xb') }.to raise_error(parslet::ParseFailed)
25
+ end
26
+
27
+ it 'fails on second part failure' do
28
+ parser = parslet.str('a') >> parslet.str('b')
29
+ expect { parser.parse('ax') }.to raise_error(parslet::ParseFailed)
30
+ end
31
+
32
+ it 'returns merged hash for named captures' do
33
+ parser = parslet.str('a').as(:first) >> parslet.str('b').as(:second)
34
+ result = parser.parse('ab')
35
+ expect(result).to eq({ first: 'a', second: 'b' })
36
+ end
37
+
38
+ it 'discards unnamed matches when named captures present' do
39
+ # This is the KEY Parslet semantic!
40
+ parser = parslet.str('SCHEMA ') >> parslet.match('[a-z]').repeat(1).as(:name) >> parslet.str(';')
41
+ result = parser.parse('SCHEMA test;')
42
+ expect(result).to eq({ name: 'test' })
43
+ end
44
+
45
+ it 'joins consecutive unnamed strings' do
46
+ parser = parslet.str('a') >> parslet.str('b') >> parslet.str('c')
47
+ expect(parser.parse('abc')).to eq('abc')
48
+ end
49
+
50
+ it 'handles three-part sequences' do
51
+ parser = parslet.str('a') >> parslet.str('b') >> parslet.str('c')
52
+ expect(parser.parse('abc')).to eq('abc')
53
+ end
54
+ end
55
+
56
+ describe 'alternative (|) combinator' do
57
+ it 'tries alternatives in order' do
58
+ parser = parslet.str('a') | parslet.str('b')
59
+ expect(parser.parse('a')).to eq('a')
60
+ expect(parser.parse('b')).to eq('b')
61
+ end
62
+
63
+ it 'succeeds on first match' do
64
+ parser = parslet.str('a') | parslet.str('ab')
65
+ expect(parser.parse('a')).to eq('a')
66
+ end
67
+
68
+ it 'tries second if first fails' do
69
+ parser = parslet.str('x') | parslet.str('a')
70
+ expect(parser.parse('a')).to eq('a')
71
+ end
72
+
73
+ it 'fails if no alternative matches' do
74
+ parser = parslet.str('a') | parslet.str('b')
75
+ expect { parser.parse('c') }.to raise_error(parslet::ParseFailed)
76
+ end
77
+
78
+ it 'handles multiple alternatives' do
79
+ parser = parslet.str('a') | parslet.str('b') | parslet.str('c')
80
+ expect(parser.parse('b')).to eq('b')
81
+ end
82
+ end
83
+
84
+ describe 'repetition (.repeat)' do
85
+ it 'matches zero or more times' do
86
+ parser = parslet.match('[a-z]').repeat(0)
87
+ expect(parser.parse('')).to eq('')
88
+ expect(parser.parse('abc')).to eq('abc')
89
+ end
90
+
91
+ it 'matches one or more times' do
92
+ parser = parslet.match('[a-z]').repeat(1)
93
+ expect(parser.parse('abc')).to eq('abc')
94
+ expect { parser.parse('') }.to raise_error(parslet::ParseFailed)
95
+ end
96
+
97
+ it 'respects min boundary' do
98
+ parser = parslet.match('[a-z]').repeat(2)
99
+ expect(parser.parse('ab')).to eq('ab')
100
+ expect { parser.parse('a') }.to raise_error(parslet::ParseFailed)
101
+ end
102
+
103
+ it 'respects max boundary' do
104
+ parser = parslet.match('[a-z]').repeat(0, 2)
105
+ expect(parser.parse('ab')).to eq('ab')
106
+ # Note: This should only parse 2 characters, not fail
107
+ end
108
+
109
+ it 'produces array of named captures when name comes before repeat' do
110
+ # .as(:x).repeat(1) produces [{x: 'a'}, {x: 'b'}, {x: 'c'}]
111
+ parser = parslet.match('[a-z]').as(:letter).repeat(1)
112
+ result = parser.parse('abc')
113
+ expect(result).to be_an(Array)
114
+ expect(result.length).to eq(3)
115
+ expect(result.first).to eq({ letter: 'a' })
116
+ end
117
+
118
+ it 'produces single hash when repeat comes before name' do
119
+ # .repeat(1).as(:x) produces {x: 'abc'}
120
+ parser = parslet.match('[a-z]').repeat(1).as(:letters)
121
+ result = parser.parse('abc')
122
+ expect(result).to eq({ letters: 'abc' })
123
+ end
124
+ end
125
+
126
+ describe '.maybe (optional)' do
127
+ it 'matches zero or one time' do
128
+ parser = parslet.str('a').maybe
129
+ expect(parser.parse('')).to eq('')
130
+ expect(parser.parse('a')).to eq('a')
131
+ end
132
+
133
+ it 'does not consume more than one' do
134
+ parser = parslet.str('a').maybe >> parslet.str('b')
135
+ expect(parser.parse('ab')).to eq('ab')
136
+ expect(parser.parse('b')).to eq('b')
137
+ end
138
+
139
+ it 'returns empty string for no match' do
140
+ parser = parslet.str('x').maybe
141
+ expect(parser.parse('')).to eq('')
142
+ end
143
+ end
144
+
145
+ describe '.as (named capture)' do
146
+ it 'captures match with name' do
147
+ parser = parslet.match('[a-z]').repeat(1).as(:word)
148
+ expect(parser.parse('hello')).to eq({ word: 'hello' })
149
+ end
150
+
151
+ it 'captures sequences' do
152
+ parser = (parslet.str('a') >> parslet.str('b')).as(:pair)
153
+ expect(parser.parse('ab')).to eq({ pair: 'ab' })
154
+ end
155
+
156
+ it 'captures single character' do
157
+ parser = parslet.match('[a-z]').as(:char)
158
+ expect(parser.parse('x')).to eq({ char: 'x' })
159
+ end
160
+ end
161
+ end