parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "parsanol/lexer"
5
+
6
+ RSpec.describe Parsanol::Lexer do
7
+ describe "basic tokenization" do
8
+ before do
9
+ class TestLexer < Parsanol::Lexer
10
+ token :word, /[a-z]+/
11
+ token :number, /[0-9]+/
12
+ ignore /\s+/
13
+ end
14
+ end
15
+
16
+ let(:lexer) { TestLexer.new }
17
+
18
+ it "tokenizes words" do
19
+ tokens = lexer.tokenize("hello world")
20
+ expect(tokens.size).to eq(3) # 2 words + eof
21
+ expect(tokens[0]["type"]).to eq("word")
22
+ expect(tokens[0]["value"]).to eq("hello")
23
+ expect(tokens[1]["type"]).to eq("word")
24
+ expect(tokens[1]["value"]).to eq("world")
25
+ end
26
+
27
+ it "tokenizes numbers" do
28
+ tokens = lexer.tokenize("123 456")
29
+ expect(tokens.size).to eq(3)
30
+ expect(tokens[0]["type"]).to eq("number")
31
+ expect(tokens[0]["value"]).to eq("123")
32
+ expect(tokens[1]["type"]).to eq("number")
33
+ expect(tokens[1]["value"]).to eq("456")
34
+ end
35
+
36
+ it "ignores whitespace" do
37
+ tokens = lexer.tokenize("hello world")
38
+ expect(tokens.size).to eq(3)
39
+ end
40
+
41
+ it "includes location information" do
42
+ tokens = lexer.tokenize("hello")
43
+ expect(tokens[0]["location"]["line"]).to eq(1)
44
+ expect(tokens[0]["location"]["column"]).to eq(1)
45
+ expect(tokens[0]["location"]["offset"]).to eq(0)
46
+ end
47
+
48
+ it "tracks line and column correctly" do
49
+ tokens = lexer.tokenize("hello\nworld")
50
+ expect(tokens[0]["location"]["line"]).to eq(1)
51
+ expect(tokens[0]["location"]["column"]).to eq(1)
52
+ expect(tokens[1]["location"]["line"]).to eq(2)
53
+ expect(tokens[1]["location"]["column"]).to eq(1)
54
+ end
55
+
56
+ it "adds eof token at the end" do
57
+ tokens = lexer.tokenize("hello")
58
+ expect(tokens.last["type"]).to eq("eof")
59
+ end
60
+ end
61
+
62
+ describe "priority handling" do
63
+ before do
64
+ class PriorityLexer < Parsanol::Lexer
65
+ token :keyword, /if|else|while/, priority: 100
66
+ token :identifier, /[a-z]+/, priority: 1
67
+ ignore /\s+/
68
+ end
69
+ end
70
+
71
+ let(:lexer) { PriorityLexer.new }
72
+
73
+ it "matches higher priority patterns first" do
74
+ tokens = lexer.tokenize("if else while")
75
+ expect(tokens[0]["type"]).to eq("keyword")
76
+ expect(tokens[0]["value"]).to eq("if")
77
+ expect(tokens[1]["type"]).to eq("keyword")
78
+ expect(tokens[1]["value"]).to eq("else")
79
+ expect(tokens[2]["type"]).to eq("keyword")
80
+ expect(tokens[2]["value"]).to eq("while")
81
+ end
82
+
83
+ it "falls back to lower priority when no match" do
84
+ tokens = lexer.tokenize("if variable else")
85
+ expect(tokens[0]["type"]).to eq("keyword")
86
+ expect(tokens[1]["type"]).to eq("identifier")
87
+ expect(tokens[1]["value"]).to eq("variable")
88
+ expect(tokens[2]["type"]).to eq("keyword")
89
+ end
90
+ end
91
+
92
+ describe "keyword helper" do
93
+ before do
94
+ class KeywordLexer < Parsanol::Lexer
95
+ keyword :if, :then, :else, priority: 50
96
+ token :identifier, /[a-z]+/, priority: 1
97
+ ignore /\s+/
98
+ end
99
+ end
100
+
101
+ let(:lexer) { KeywordLexer.new }
102
+
103
+ it "creates keyword tokens with high priority" do
104
+ tokens = lexer.tokenize("if x then y")
105
+ expect(tokens[0]["type"]).to eq("IF")
106
+ expect(tokens[1]["type"]).to eq("identifier")
107
+ expect(tokens[2]["type"]).to eq("THEN")
108
+ end
109
+ end
110
+
111
+ describe "longest match rule" do
112
+ before do
113
+ class LongestMatchLexer < Parsanol::Lexer
114
+ token :string, /"[^"]*"/
115
+ token :quote, /"/
116
+ ignore /\s+/
117
+ end
118
+ end
119
+
120
+ let(:lexer) { LongestMatchLexer.new }
121
+
122
+ it "prefers longer matches" do
123
+ tokens = lexer.tokenize('"hello"')
124
+ expect(tokens.size).to eq(2) # string + eof
125
+ expect(tokens[0]["type"]).to eq("string")
126
+ expect(tokens[0]["value"]).to eq('"hello"')
127
+ end
128
+ end
129
+
130
+ describe "inheritance" do
131
+ before do
132
+ class BaseLexer < Parsanol::Lexer
133
+ token :word, /[a-z]+/
134
+ ignore /\s+/
135
+ end
136
+
137
+ class ExtendedLexer < BaseLexer
138
+ token :number, /[0-9]+/
139
+ end
140
+ end
141
+
142
+ let(:base_lexer) { BaseLexer.new }
143
+ let(:extended_lexer) { ExtendedLexer.new }
144
+
145
+ it "inherits tokens from parent class" do
146
+ tokens = extended_lexer.tokenize("hello 123")
147
+ # word + number + eof = 3 tokens
148
+ expect(tokens.size).to eq(3)
149
+ expect(tokens[0]["type"]).to eq("word")
150
+ expect(tokens[0]["value"]).to eq("hello")
151
+ expect(tokens[1]["type"]).to eq("number")
152
+ expect(tokens[1]["value"]).to eq("123")
153
+ end
154
+
155
+ it "does not modify parent class" do
156
+ tokens = base_lexer.tokenize("hello world")
157
+ # Base lexer should only match words
158
+ expect(tokens.size).to eq(3) # word + word + eof
159
+ expect(tokens[0]["type"]).to eq("word")
160
+ expect(tokens[0]["value"]).to eq("hello")
161
+ expect(tokens[1]["type"]).to eq("word")
162
+ expect(tokens[1]["value"]).to eq("world")
163
+ end
164
+ end
165
+
166
+ describe "error handling" do
167
+ before do
168
+ class ErrorLexer < Parsanol::Lexer
169
+ token :word, /[a-z]+/
170
+ ignore /\s+/
171
+ end
172
+ end
173
+
174
+ let(:lexer) { ErrorLexer.new }
175
+
176
+ it "produces error token for unrecognized input" do
177
+ tokens = lexer.tokenize("hello 123 world")
178
+ # 123 should produce error tokens (1, 2, 3)
179
+ error_tokens = tokens.select { |t| t["type"] == "error" }
180
+ expect(error_tokens.size).to be >= 1
181
+ end
182
+ end
183
+
184
+ describe "JSON example" do
185
+ before do
186
+ class JsonLexer < Parsanol::Lexer
187
+ token :string, /"[^"]*"/
188
+ token :number, /-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?/
189
+ token :true, /true/
190
+ token :false, /false/
191
+ token :null, /null/
192
+ token :lbrace, /\{/
193
+ token :rbrace, /\}/
194
+ token :lbracket, /\[/
195
+ token :rbracket, /\]/
196
+ token :colon, /:/
197
+ token :comma, /,/
198
+ ignore /\s+/
199
+ end
200
+ end
201
+
202
+ let(:lexer) { JsonLexer.new }
203
+
204
+ it "tokenizes simple JSON object" do
205
+ tokens = lexer.tokenize('{"name": "test"}')
206
+ types = tokens.map { |t| t["type"] }
207
+ expect(types).to eq(%w[lbrace string colon string rbrace eof])
208
+ end
209
+
210
+ it "tokenizes JSON with numbers" do
211
+ tokens = lexer.tokenize('{"count": 42}')
212
+ types = tokens.map { |t| t["type"] }
213
+ expect(types).to eq(%w[lbrace string colon number rbrace eof])
214
+ end
215
+
216
+ it "tokenizes JSON with boolean" do
217
+ tokens = lexer.tokenize('{"active": true}')
218
+ types = tokens.map { |t| t["type"] }
219
+ expect(types).to eq(%w[lbrace string colon true rbrace eof])
220
+ end
221
+
222
+ it "tokenizes complex JSON" do
223
+ tokens = lexer.tokenize('{"items": [1, 2, 3], "nested": {"x": true}}')
224
+ types = tokens.map { |t| t["type"] }
225
+ expect(types).to eq(%w[
226
+ lbrace string colon lbracket number comma number comma number rbracket
227
+ comma string colon lbrace string colon true rbrace rbrace eof
228
+ ])
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Result of a Parsanol#parse' do
4
+ include Parsanol; extend Parsanol
5
+
6
+ describe 'regression' do
7
+ [
8
+ # Behaviour with maybe-nil
9
+ [str('foo').maybe >> str('bar'), 'bar', 'bar'],
10
+ [str('bar') >> str('foo').maybe, 'bar', 'bar'],
11
+
12
+ # These might be hard to understand; look at the result of
13
+ # str.maybe >> str
14
+ # and
15
+ # str.maybe >> str first.
16
+ [(str('f').maybe >> str('b')).repeat, 'bb', 'bb'],
17
+ [(str('b') >> str('f').maybe).repeat, 'bb', 'bb'],
18
+
19
+ [str('a').as(:a) >> (str('b') >> str('c').as(:a)).repeat, 'abc',
20
+ [{ a: 'a' }, { a: 'c' }]],
21
+
22
+ [str('a').as(:a).repeat >> str('b').as(:b).repeat, 'ab', [{ a: 'a' }, { b: 'b' }]],
23
+
24
+ # Repetition behaviour / named vs. unnamed
25
+ [str('f').repeat, '', ''],
26
+ [str('f').repeat.as(:f), '', { f: [] }],
27
+
28
+ # Maybe behaviour / named vs. unnamed
29
+ [str('f').maybe, '', ''],
30
+ [str('f').maybe.as(:f), '', { f: nil }],
31
+ ].each do |parslet, input, result|
32
+ context parslet.inspect.to_s do
33
+ it "parses \"#{input}\" into \"#{result}\"" do
34
+ expect(strip_positions(parslet.parse(input))).to eq(result)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Alternative do
4
+ include Parsanol
5
+
6
+ describe '| shortcut' do
7
+ let(:alternative) { str('a') | str('b') }
8
+
9
+ context "when chained with different atoms" do
10
+ before(:each) {
11
+ # Chain something else to the alternative parslet. If it modifies the
12
+ # parslet atom in place, we'll notice:
13
+
14
+ alternative | str('d')
15
+ }
16
+ let!(:chained) { alternative | str('c') }
17
+
18
+
19
+ it "is side-effect free" do
20
+ chained.should parse('c')
21
+ chained.should parse('a')
22
+ chained.should_not parse('d')
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,127 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Base do
4
+ let(:parslet) { Parsanol::Atoms::Base.new }
5
+ let(:context) { Parsanol::Atoms::Context.new }
6
+
7
+ describe '<- #try(io)' do
8
+ it 'raises NotImplementedError' do
9
+ lambda {
10
+ parslet.try(double(:io), context, false)
11
+ }.should raise_error(NotImplementedError)
12
+ end
13
+ end
14
+
15
+ describe '<- #flatten_sequence' do
16
+ [
17
+ # 9 possibilities for making a word of 2 letters from the alphabeth of
18
+ # A(rray), H(ash) and S(tring). Make sure that all results are valid.
19
+ #
20
+ %w[a b], 'ab', # S S
21
+ [['a'], ['b']], %w[a b], # A A
22
+ [{ a: 'a' }, { b: 'b' }], { a: 'a', b: 'b' }, # H H
23
+
24
+ [{ a: 'a' }, ['a']], [{ a: 'a' }, 'a'], # H A
25
+ [{ a: 'a' }, 's'], { a: 'a' }, # H S
26
+
27
+ [['a'], { a: 'a' }], ['a', { a: 'a' }], # A H (symmetric to H A)
28
+ [['a'], 'b'], ['a'], # A S
29
+
30
+ ['a', { b: 'b' }], { b: 'b' }, # S H (symmetric to H S)
31
+ ['a', ['b']], ['b'], # S A (symmetric to A S)
32
+
33
+ [nil, ['a']], ['a'], # handling of lhs nil
34
+ [nil, { a: 'a' }], { a: 'a' },
35
+ [['a'], nil], ['a'], # handling of rhs nil
36
+ [{ a: 'a' }, nil], { a: 'a' }
37
+ ].each_slice(2) do |sequence, result|
38
+ context 'for ' + sequence.inspect do
39
+ it "equals #{result.inspect}" do
40
+ parslet.flatten_sequence(sequence).should == result
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ describe '<- #flatten_repetition' do
47
+ def unnamed(obj)
48
+ parslet.flatten_repetition(obj, false)
49
+ end
50
+
51
+ it 'gives subtrees precedence' do
52
+ unnamed([[{ a: 'a' }, { m: 'm' }], { a: 'a' }]).should == [{ a: 'a' }]
53
+ end
54
+ end
55
+
56
+ describe '#parse(source)' do
57
+ context 'when given something that looks like a source' do
58
+ let(:source) do
59
+ double('source lookalike',
60
+ line_and_column: [1, 2],
61
+ bytepos: 1,
62
+ chars_left: 0)
63
+ end
64
+
65
+ it 'does not rewrap in a source' do
66
+ expect(Parsanol::Source).not_to receive(:new)
67
+
68
+ begin
69
+ parslet.parse(source)
70
+ rescue NotImplementedError
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ context 'when the parse fails, the exception' do
77
+ it 'contains a string' do
78
+ Parsanol.str('foo').parse('bar')
79
+ rescue Parsanol::ParseFailed => e
80
+ e.message.should be_kind_of(String)
81
+ end
82
+ end
83
+
84
+ context 'when not all input is consumed' do
85
+ let(:parslet) { Parsanol.str('foo') }
86
+
87
+ it 'raises with a proper error message' do
88
+ error = catch_failed_parse do
89
+ parslet.parse('foobar')
90
+ end
91
+
92
+ error.to_s.should == "Don't know what to do with \"bar\" at line 1 char 4."
93
+ end
94
+ end
95
+
96
+ context 'when only parsing string prefix' do
97
+ let(:parslet) { Parsanol.str('foo') >> Parsanol.str('bar') }
98
+
99
+ it 'returns the first half on a prefix parse' do
100
+ parslet.parse('foobarbaz', prefix: true).should == 'foobar'
101
+ end
102
+ end
103
+
104
+ describe ':reporter option' do
105
+ let(:parslet) { Parsanol.str('test') >> Parsanol.str('ing') }
106
+ let(:reporter) { double(:reporter) }
107
+
108
+ it 'replaces the default reporter' do
109
+ cause = double(:cause)
110
+
111
+ # Two levels of the parse, calling two different error reporting
112
+ # methods.
113
+ expect(reporter).to receive(:err_at).once
114
+ expect(reporter).to receive(:err).and_return(cause).once
115
+ expect(reporter).to receive(:succ).once
116
+
117
+ # The final cause will be sent the #raise method.
118
+ expect(cause).to receive(:raise).once.and_throw(:raise)
119
+
120
+ catch(:raise) do
121
+ parslet.parse('testung', reporter: reporter)
122
+
123
+ raise 'NEVER REACHED'
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Capture do
4
+ include Parsanol
5
+
6
+ let(:context) { Parsanol::Atoms::Context.new(nil) }
7
+
8
+ def inject string, parser
9
+ source = Parsanol::Source.new(string)
10
+ parser.apply(source, context, true)
11
+ end
12
+
13
+ it "should capture simple results" do
14
+ inject 'a', str('a').capture(:a)
15
+ strip_positions(context.captures[:a]).should == 'a'
16
+ end
17
+ it "should capture complex results" do
18
+ inject 'a', str('a').as(:b).capture(:a)
19
+ strip_positions(context.captures[:a]).should == {:b => 'a'}
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Parsanol combinations" do
4
+ include Parsanol
5
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ class TestCustomAtom < Parsanol::Atoms::Custom
6
+ def initialize(match_string)
7
+ @match_string = match_string
8
+ super()
9
+ end
10
+
11
+ def try_match(source, context, consume_all)
12
+ pos = source.bytepos
13
+
14
+ # Try to match the string using consume (returns a Slice)
15
+ result = source.consume(@match_string.length)
16
+ if result == @match_string
17
+ # Success - source.consume already returned a Slice
18
+ [true, result]
19
+ else
20
+ # Restore position on failure
21
+ source.bytepos = pos
22
+ [false, nil]
23
+ end
24
+ end
25
+
26
+ def to_s_inner(prec = nil)
27
+ "test_custom(#{@match_string.inspect})"
28
+ end
29
+ end
30
+
31
+ RSpec.describe Parsanol::Atoms::Custom do
32
+ let(:atom) { TestCustomAtom.new('hello') }
33
+
34
+ it 'can be created and used for parsing' do
35
+ result = atom.parse('hello')
36
+ expect(result.to_s).to eq('hello')
37
+ end
38
+
39
+ it 'fails on non-matching input' do
40
+ expect { atom.parse('world') }.to raise_error(Parsanol::ParseFailed)
41
+ end
42
+
43
+ it 'raises NotImplementedError when try_match is not implemented' do
44
+ custom = Class.new(Parsanol::Atoms::Custom).new
45
+ expect { custom.parse('test') }.to raise_error(NotImplementedError)
46
+ end
47
+
48
+ it 'can be combined with other atoms using sequence' do
49
+ # Use as() to label results for structured output
50
+ combined = atom.as(:first) >> Parsanol.str(' world').as(:second)
51
+ result = combined.parse('hello world')
52
+ expect(result).to eq({ first: 'hello', second: ' world' })
53
+ end
54
+
55
+ it 'supports repetition' do
56
+ # Basic repetition - returns concatenated result by default
57
+ repeated = atom.repeat(2, 2)
58
+ result = repeated.parse('hellohello')
59
+ expect(result.to_s).to eq('hellohello')
60
+ end
61
+
62
+ it 'can be used in alternative' do
63
+ alt = atom | Parsanol.str('world')
64
+ expect(alt.parse('hello').to_s).to eq('hello')
65
+ expect(alt.parse('world').to_s).to eq('world')
66
+ end
67
+
68
+ it 'supports maybe' do
69
+ maybe = atom.maybe
70
+ expect(maybe.parse('hello').to_s).to eq('hello')
71
+ # Maybe returns empty string when it doesn't match (standard Parslet behavior)
72
+ expect(maybe.parse('')).to eq('')
73
+ end
74
+
75
+ it 'provides custom to_s_inner for debugging' do
76
+ expect(atom.to_s).to include('test_custom')
77
+ expect(atom.to_s).to include('hello')
78
+ end
79
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::DSL do
4
+ describe "deprecated methods" do
5
+ let(:parslet) { Parsanol.str('foo') }
6
+ end
7
+ end
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Entity do
4
+ context "when constructed with str('bar') inside" do
5
+ let(:named) { Parsanol::Atoms::Entity.new('name', &proc { Parsanol.str('bar') }) }
6
+
7
+ it "should parse 'bar' without raising exceptions" do
8
+ named.parse('bar')
9
+ end
10
+ it "should raise when applied to 'foo'" do
11
+ lambda {
12
+ named.parse('foo')
13
+ }.should raise_error(Parsanol::ParseFailed)
14
+ end
15
+
16
+ describe "#inspect" do
17
+ it "should return the name of the entity" do
18
+ named.inspect.should == 'NAME'
19
+ end
20
+ end
21
+ end
22
+ context "when constructed with empty block" do
23
+ let(:entity) { Parsanol::Atoms::Entity.new('name', &proc { }) }
24
+
25
+ it "should raise NotImplementedError" do
26
+ lambda {
27
+ entity.parse('some_string')
28
+ }.should raise_error(NotImplementedError)
29
+ end
30
+ end
31
+
32
+ context "recursive definition parser" do
33
+ class RecDefParser
34
+ include Parsanol
35
+ rule :recdef do
36
+ str('(') >> atom >> str(')')
37
+ end
38
+ rule :atom do
39
+ str('a') | str('b') | recdef
40
+ end
41
+ end
42
+ let(:parser) { RecDefParser.new }
43
+
44
+ it "should parse balanced parens" do
45
+ parser.recdef.parse("(((a)))")
46
+ end
47
+ it "should not throw 'stack level too deep' when printing errors" do
48
+ cause = catch_failed_parse { parser.recdef.parse('(((a))') }
49
+ cause.ascii_tree
50
+ end
51
+ end
52
+
53
+ context "when constructed with a label" do
54
+ let(:named) { Parsanol::Atoms::Entity.new('name', 'label', &proc { Parsanol.str('bar') }) }
55
+
56
+ it "should parse 'bar' without raising exceptions" do
57
+ named.parse('bar')
58
+ end
59
+ it "should raise when applied to 'foo'" do
60
+ lambda {
61
+ named.parse('foo')
62
+ }.should raise_error(Parsanol::ParseFailed)
63
+ end
64
+
65
+ describe "#inspect" do
66
+ it "should return the label of the entity" do
67
+ named.inspect.should == 'label'
68
+ end
69
+ end
70
+
71
+ describe "#parslet" do
72
+ it "should set the label on the cached parslet" do
73
+ named.parslet.label.should == 'label'
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Ignored do
4
+ include Parsanol
5
+
6
+ describe "ignore" do
7
+ it "ignores parts of the input" do
8
+ str('a').ignore.parse('a').should == nil
9
+ (str('a') >> str('b').ignore >> str('c')).parse('abc').should == 'ac'
10
+ (str('a') >> str('b').as(:name).ignore >> str('c')).parse('abc').should == 'ac'
11
+ (str('a') >> str('b').maybe.ignore >> str('c')).parse('abc').should == 'ac'
12
+ (str('a') >> str('b').maybe.ignore >> str('c')).parse('ac').should == 'ac'
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,5 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Infix do
4
+
5
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Lookahead do
4
+ include Parsanol
5
+
6
+ describe 'negative lookahead' do
7
+ it "influences the error tree" do
8
+ parser = str('f').absent? >> str('b')
9
+ cause = catch_failed_parse { parser.parse('f') }
10
+
11
+ cause.ascii_tree.should == "Failed to match sequence (!'f' 'b') at line 1 char 1.\n`- Input should not start with 'f' at line 1 char 1.\n"
12
+ end
13
+ end
14
+ describe 'positive lookahead' do
15
+ it "influences the error tree" do
16
+ parser = str('f').present? >> str('b')
17
+ cause = catch_failed_parse { parser.parse('b') }
18
+
19
+ cause.ascii_tree.should == "Failed to match sequence (&'f' 'b') at line 1 char 1.\n`- Input should start with 'f' at line 1 char 1.\n"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,4 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Named do
4
+ end
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Atoms::Re do
4
+ describe "construction" do
5
+ include Parsanol
6
+
7
+ it "should allow match(str) form" do
8
+ match('[a]').should be_a(Parsanol::Atoms::Re)
9
+ end
10
+ it "should allow match[str] form" do
11
+ match['a'].should be_a(Parsanol::Atoms::Re)
12
+ end
13
+ end
14
+ end