parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,82 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::ErrorReporter::Deepest do
4
+ let(:reporter) { described_class.new }
5
+ let(:fake_source) { double('source') }
6
+
7
+ describe '#err' do
8
+ before do
9
+ allow(fake_source).to receive(:pos).and_return(13)
10
+ allow(fake_source).to receive(:line_and_column).and_return([1, 1])
11
+ end
12
+
13
+ it 'returns the deepest cause' do
14
+ expect(reporter).to receive(:deepest).and_return(:deepest)
15
+ expect(reporter.err('parslet', fake_source, 'message')).to eq(:deepest)
16
+ end
17
+ end
18
+
19
+ describe '#err_at' do
20
+ before do
21
+ allow(fake_source).to receive(:pos).and_return(13)
22
+ allow(fake_source).to receive(:line_and_column).and_return([1, 1])
23
+ end
24
+
25
+ it 'returns the deepest cause' do
26
+ expect(reporter).to receive(:deepest).and_return(:deepest)
27
+ expect(reporter.err('parslet', fake_source, 'message', 13)).to eq(:deepest)
28
+ end
29
+ end
30
+
31
+ describe '#deepest(cause)' do
32
+ def fake_cause(pos = 13, children = nil)
33
+ double('cause' + pos.to_s, pos: pos, children: children)
34
+ end
35
+
36
+ context 'when there is no deepest cause yet' do
37
+ let(:cause) { fake_cause }
38
+
39
+ it 'returns the given cause' do
40
+ reporter.deepest(cause).should == cause
41
+ end
42
+ end
43
+
44
+ context 'when the previous cause is deeper (no relationship)' do
45
+ let(:previous) { fake_cause }
46
+
47
+ before do
48
+ reporter.deepest(previous)
49
+ end
50
+
51
+ it 'returns the previous cause' do
52
+ reporter.deepest(fake_cause(12))
53
+ .should == previous
54
+ end
55
+ end
56
+
57
+ context 'when the previous cause is deeper (child)' do
58
+ let(:previous) { fake_cause }
59
+
60
+ before do
61
+ reporter.deepest(previous)
62
+ end
63
+
64
+ it 'returns the given cause' do
65
+ given = fake_cause(12, [previous])
66
+ reporter.deepest(given).should == given
67
+ end
68
+ end
69
+
70
+ context 'when the previous cause is shallower' do
71
+ before do
72
+ reporter.deepest(fake_cause)
73
+ end
74
+
75
+ it 'stores the cause as deepest' do
76
+ deeper = fake_cause(14)
77
+ reporter.deepest(deeper)
78
+ reporter.deepest_cause.should == deeper
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+
3
+ require 'parsanol/error_reporter'
4
+
5
+ describe Parsanol::ErrorReporter::Tree do
6
+
7
+ end
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ describe Parsanol::Parser, "exporting to other lingos" do
4
+ class MiniLisp < Parsanol::Parser
5
+ root :expression
6
+ rule(:expression) {
7
+ space? >> str('(') >> space? >> body >> str(')')
8
+ }
9
+
10
+ rule(:body) {
11
+ (expression | identifier | float | integer | string).repeat.as(:exp)
12
+ }
13
+
14
+ rule(:space) {
15
+ match('\s').repeat(1)
16
+ }
17
+ rule(:space?) {
18
+ space.maybe
19
+ }
20
+
21
+ rule(:identifier) {
22
+ (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
23
+ }
24
+
25
+ rule(:float) {
26
+ (
27
+ integer >> (
28
+ str('.') >> match('[0-9]').repeat(1) |
29
+ str('e') >> match('[0-9]').repeat(1)
30
+ ).as(:e)
31
+ ).as(:float) >> space?
32
+ }
33
+
34
+ rule(:integer) {
35
+ ((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
36
+ }
37
+
38
+ rule(:string) {
39
+ str('"') >> (
40
+ str('\\') >> any |
41
+ str('"').absent? >> any
42
+ ).repeat.as(:string) >> str('"') >> space?
43
+ }
44
+ end
45
+
46
+ # I only update the files once I've verified the new syntax to work with
47
+ # the respective tools. This is more an acceptance test than a real spec.
48
+
49
+ describe "<- #to_citrus" do
50
+ let(:citrus) { File.read(
51
+ File.join(File.dirname(__FILE__), 'minilisp.citrus'))
52
+ }
53
+ it "should be valid citrus syntax" do
54
+ # puts MiniLisp.new.to_citrus
55
+ MiniLisp.new.to_citrus.should == citrus
56
+ end
57
+ end
58
+ describe "<- #to_treetop" do
59
+ let(:treetop) { File.read(
60
+ File.join(File.dirname(__FILE__), 'minilisp.tt'))
61
+ }
62
+ it "should be valid treetop syntax" do
63
+ # puts MiniLisp.new.to_treetop
64
+ MiniLisp.new.to_treetop.should == treetop
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+
3
+ require 'parsanol/parslet'
4
+ require 'parsanol/expression/treetop'
5
+
6
+ describe Parsanol::Expression::Treetop do
7
+ include Parsanol
8
+
9
+ describe "positive samples" do
10
+ [ # pattern # input
11
+ "'abc'", 'abc',
12
+ "...", 'abc',
13
+ "[1-4]", '3',
14
+
15
+ "'abc'?", 'abc',
16
+ "'abc'?", '',
17
+
18
+ "('abc')", 'abc',
19
+
20
+ "'a' 'b'", 'ab',
21
+ "'a' ('b')", 'ab',
22
+
23
+ "'a' / 'b'", 'a',
24
+ "'a' / 'b'", 'b',
25
+
26
+ "'a'*", 'aaa',
27
+ "'a'*", '',
28
+
29
+ "'a'+", 'aa',
30
+ "'a'+", 'a',
31
+
32
+ "'a'{1,2}", 'a',
33
+ "'a'{1,2}", 'aa',
34
+
35
+ "'a'{1,}", 'a',
36
+ "'a'{1,}", 'aa',
37
+
38
+ "'a'{,2}", '',
39
+ "'a'{,2}", 'a',
40
+ "'a'{,2}", 'aa',
41
+ ].each_slice(2) do |pattern, input|
42
+ context "exp(#{pattern.inspect})" do
43
+ let(:parslet) { exp(pattern) }
44
+ subject { parslet }
45
+ it { should parse(input) }
46
+ context "string representation" do
47
+ subject { exp(parslet.to_s) }
48
+ it { should parse(input, :trace => true) }
49
+ end
50
+ end
51
+ end
52
+ end
53
+ describe "negative samples" do
54
+ [ # pattern # input
55
+ "'abc'", 'cba',
56
+ "[1-4]", '5',
57
+
58
+ "'a' / 'b'", 'c',
59
+
60
+ "'a'+", '',
61
+
62
+ "'a'{1,2}", '',
63
+ "'a'{1,2}", 'aaa',
64
+
65
+ "'a'{1,}", '',
66
+
67
+ "'a'{,2}", 'aaa',
68
+ ].each_slice(2) do |pattern, input|
69
+ context "exp(#{pattern.inspect})" do
70
+ subject { exp(pattern) }
71
+ it { should_not parse(input) }
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,298 @@
1
+ require 'spec_helper'
2
+
3
+ describe "FIRST set computation" do
4
+ include Parsanol
5
+
6
+ describe "Str atom" do
7
+ it "returns itself as FIRST set" do
8
+ atom = str('foo')
9
+ first = atom.first_set
10
+ expect(first.size).to eq(1)
11
+ expect(first.first).to be_a(Parsanol::Atoms::Str)
12
+ expect(first.first.str).to eq('foo')
13
+ end
14
+
15
+ it "different strings have different FIRST sets" do
16
+ atom1 = str('foo')
17
+ atom2 = str('bar')
18
+ first1 = atom1.first_set
19
+ first2 = atom2.first_set
20
+ expect(first1.to_a & first2.to_a).to be_empty
21
+ end
22
+ end
23
+
24
+ describe "Re atom" do
25
+ it "returns itself as FIRST set" do
26
+ atom = match('[a-z]')
27
+ first = atom.first_set
28
+ expect(first.size).to eq(1)
29
+ expect(first.first).to be_a(Parsanol::Atoms::Re)
30
+ end
31
+ end
32
+
33
+ describe "Sequence atom" do
34
+ it "returns FIRST of first element" do
35
+ atom = str('a') >> str('b')
36
+ first = atom.first_set
37
+ expect(first.size).to eq(1)
38
+ expect(first.first).to be_a(Parsanol::Atoms::Str)
39
+ expect(first.first.str).to eq('a')
40
+ end
41
+
42
+ it "handles sequences of more than 2 elements" do
43
+ # Note: Due to Phase 24 string concatenation optimization,
44
+ # str('x') >> str('y') >> str('z') becomes str('xyz')
45
+ atom = str('x') >> str('y') >> str('z')
46
+ first = atom.first_set
47
+ expect(first.size).to eq(1)
48
+ expect(first.first.str).to eq('xyz') # Optimized to single string
49
+ end
50
+
51
+ it "propagates through EPSILON when first element can match empty" do
52
+ # This test would require a .maybe or similar
53
+ atom = str('a').maybe >> str('b')
54
+ first = atom.first_set
55
+ # Should include both 'a' and 'b' since 'a'.maybe can match empty
56
+ expect(first.size).to eq(2)
57
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
58
+ expect(strs).to include('a', 'b')
59
+ end
60
+ end
61
+
62
+ describe "Alternative atom" do
63
+ it "returns union of all alternatives" do
64
+ atom = str('a') | str('b')
65
+ first = atom.first_set
66
+ expect(first.size).to eq(2)
67
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
68
+ expect(strs).to contain_exactly('a', 'b')
69
+ end
70
+
71
+ it "handles three alternatives" do
72
+ atom = str('x') | str('y') | str('z')
73
+ first = atom.first_set
74
+ expect(first.size).to eq(3)
75
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
76
+ expect(strs).to contain_exactly('x', 'y', 'z')
77
+ end
78
+
79
+ it "detects disjoint FIRST sets" do
80
+ atom = str('if') | str('while') | str('for')
81
+ first = atom.first_set
82
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
83
+ # All three keywords are disjoint
84
+ expect(strs.size).to eq(3)
85
+ end
86
+ end
87
+
88
+ describe "Repetition atom" do
89
+ it "includes EPSILON for min=0 (maybe)" do
90
+ atom = str('a').maybe
91
+ first = atom.first_set
92
+ expect(first).to include(Parsanol::FirstSet::EPSILON)
93
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }
94
+ expect(strs.size).to eq(1)
95
+ expect(strs.first.str).to eq('a')
96
+ end
97
+
98
+ it "includes EPSILON for min=0 (repeat)" do
99
+ atom = str('a').repeat(0, 3)
100
+ first = atom.first_set
101
+ expect(first).to include(Parsanol::FirstSet::EPSILON)
102
+ end
103
+
104
+ it "does not include EPSILON for min=1" do
105
+ atom = str('a').repeat(1, 3)
106
+ first = atom.first_set
107
+ expect(first).not_to include(Parsanol::FirstSet::EPSILON)
108
+ end
109
+
110
+ it "includes parslet's FIRST set" do
111
+ atom = str('x').repeat(0, 5)
112
+ first = atom.first_set
113
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }
114
+ expect(strs.first.str).to eq('x')
115
+ end
116
+ end
117
+
118
+ describe "Lookahead atom" do
119
+ it "returns EPSILON for positive lookahead" do
120
+ atom = str('foo').present?
121
+ first = atom.first_set
122
+ expect(first).to eq(Set.new([Parsanol::FirstSet::EPSILON]))
123
+ end
124
+
125
+ it "returns EPSILON for negative lookahead" do
126
+ atom = str('foo').absent?
127
+ first = atom.first_set
128
+ expect(first).to eq(Set.new([Parsanol::FirstSet::EPSILON]))
129
+ end
130
+ end
131
+
132
+ describe "Named atom" do
133
+ it "delegates to wrapped parslet" do
134
+ atom = str('hello').as(:greeting)
135
+ first = atom.first_set
136
+ expect(first.size).to eq(1)
137
+ expect(first.first).to be_a(Parsanol::Atoms::Str)
138
+ expect(first.first.str).to eq('hello')
139
+ end
140
+ end
141
+
142
+ describe "Complex grammars" do
143
+ it "computes FIRST for statement-like pattern" do
144
+ # Simulates: if_stmt | while_stmt | print_stmt
145
+ atom = str('if') | str('while') | str('print')
146
+ first = atom.first_set
147
+ strs = first.select { |x| x.is_a?(Parsanol::Atoms::Str) }.map(&:str)
148
+ expect(strs).to contain_exactly('if', 'while', 'print')
149
+ end
150
+
151
+ it "computes FIRST for expression-like pattern" do
152
+ # Simulates: '(' expr ')' | number
153
+ # Note: str('(') >> str('x') >> str(')') gets optimized to str('(x)')
154
+ # by Phase 24 string concatenation
155
+ atom = (str('(') >> match('[a-z]') >> str(')')) | match('[0-9]')
156
+ first = atom.first_set
157
+ # FIRST should include '(' and [0-9]
158
+ expect(first.size).to eq(2)
159
+ has_paren = first.any? { |x| x.is_a?(Parsanol::Atoms::Str) && x.str == '(' }
160
+ has_digit = first.any? { |x| x.is_a?(Parsanol::Atoms::Re) }
161
+ expect(has_paren).to be true
162
+ expect(has_digit).to be true
163
+ end
164
+ end
165
+
166
+ describe "FIRST set caching" do
167
+ it "caches computed FIRST sets" do
168
+ atom = str('test')
169
+ first1 = atom.first_set
170
+ first2 = atom.first_set
171
+ # Should return same object (cached)
172
+ expect(first1.object_id).to eq(first2.object_id)
173
+ end
174
+
175
+ it "can clear cache" do
176
+ atom = str('test')
177
+ first1 = atom.first_set
178
+ atom.clear_first_set_cache
179
+ first2 = atom.first_set
180
+ # After clearing, should compute fresh (different object)
181
+ expect(first1.object_id).not_to eq(first2.object_id)
182
+ # But content should be same
183
+ expect(first1).to eq(first2)
184
+ end
185
+ end
186
+
187
+ describe "Disjoint detection (for cut operator insertion)" do
188
+ it "detects disjoint alternatives" do
189
+ alt1 = str('if')
190
+ alt2 = str('while')
191
+ first1 = alt1.first_set
192
+ first2 = alt2.first_set
193
+ # Disjoint: intersection is empty
194
+ expect(first1.to_a & first2.to_a).to be_empty
195
+ end
196
+
197
+ it "detects overlapping alternatives" do
198
+ # Both start with 'a'
199
+ alt1 = str('apple')
200
+ alt2 = str('apricot')
201
+ first1 = alt1.first_set
202
+ first2 = alt2.first_set
203
+ # Not disjoint - but note: str atoms are compared by identity
204
+ # so these will appear disjoint even though strings start same
205
+ # This is conservative and safe for cut insertion
206
+ expect(first1.to_a & first2.to_a).to be_empty
207
+ end
208
+
209
+ it "handles regex overlaps conservatively" do
210
+ alt1 = match('[a-z]')
211
+ alt2 = match('[A-Z]')
212
+ first1 = alt1.first_set
213
+ first2 = alt2.first_set
214
+ # Different Re objects are treated as potentially overlapping
215
+ # (conservative approach)
216
+ expect(first1.to_a & first2.to_a).to be_empty
217
+ end
218
+ end
219
+
220
+ describe "Parsanol::FirstSet class methods" do
221
+ describe ".disjoint?" do
222
+ it "returns true for disjoint sets" do
223
+ set1 = Set.new([str('if')])
224
+ set2 = Set.new([str('while')])
225
+ expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
226
+ end
227
+
228
+ it "returns false for overlapping sets" do
229
+ atom = str('same')
230
+ set1 = Set.new([atom])
231
+ set2 = Set.new([atom])
232
+ expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be false
233
+ end
234
+
235
+ it "ignores EPSILON when checking disjointness" do
236
+ set1 = Set.new([str('a'), Parsanol::FirstSet::EPSILON])
237
+ set2 = Set.new([str('b'), Parsanol::FirstSet::EPSILON])
238
+ # Should be disjoint despite both having EPSILON
239
+ expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
240
+ end
241
+
242
+ it "ignores nil when checking disjointness" do
243
+ set1 = Set.new([str('a'), nil])
244
+ set2 = Set.new([str('b'), nil])
245
+ # Should be disjoint despite both having nil
246
+ expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
247
+ end
248
+
249
+ it "returns true for empty sets" do
250
+ set1 = Set.new([Parsanol::FirstSet::EPSILON])
251
+ set2 = Set.new([str('a')])
252
+ # set1 is empty after removing EPSILON
253
+ expect(Parsanol::FirstSet.disjoint?(set1, set2)).to be true
254
+ end
255
+ end
256
+
257
+ describe ".all_disjoint?" do
258
+ it "returns true for mutually disjoint sets" do
259
+ sets = [
260
+ Set.new([str('if')]),
261
+ Set.new([str('while')]),
262
+ Set.new([str('print')])
263
+ ]
264
+ expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
265
+ end
266
+
267
+ it "returns false when any two sets overlap" do
268
+ atom = str('same')
269
+ sets = [
270
+ Set.new([str('if')]),
271
+ Set.new([atom]),
272
+ Set.new([atom])
273
+ ]
274
+ expect(Parsanol::FirstSet.all_disjoint?(sets)).to be false
275
+ end
276
+
277
+ it "returns true for less than 2 sets" do
278
+ sets = [Set.new([str('a')])]
279
+ expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
280
+ end
281
+
282
+ it "returns true for empty array" do
283
+ sets = []
284
+ expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
285
+ end
286
+
287
+ it "handles sets with EPSILON correctly" do
288
+ sets = [
289
+ Set.new([str('a'), Parsanol::FirstSet::EPSILON]),
290
+ Set.new([str('b'), Parsanol::FirstSet::EPSILON]),
291
+ Set.new([str('c'), Parsanol::FirstSet::EPSILON])
292
+ ]
293
+ # All disjoint despite all having EPSILON
294
+ expect(Parsanol::FirstSet.all_disjoint?(sets)).to be true
295
+ end
296
+ end
297
+ end
298
+ end