dendroid 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +19 -0
- data/lib/dendroid/recognizer/chart.rb +55 -0
- data/lib/dendroid/recognizer/e_item.rb +47 -0
- data/lib/dendroid/recognizer/item_set.rb +38 -0
- data/lib/dendroid/recognizer/recognizer.rb +286 -0
- data/lib/dendroid/syntax/grammar.rb +1 -1
- data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb +1 -72
- data/spec/dendroid/recognizer/chart_spec.rb +1 -0
- data/spec/dendroid/recognizer/e_item_spec.rb +59 -0
- data/spec/dendroid/recognizer/item_set_spec.rb +63 -0
- data/spec/dendroid/recognizer/recognizer_spec.rb +761 -0
- data/spec/dendroid/support/sample_grammars.rb +319 -0
- data/spec/dendroid/syntax/grammar_spec.rb +145 -0
- data/version.txt +1 -1
- metadata +11 -2
@@ -0,0 +1,761 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../support/sample_grammars'
|
5
|
+
require_relative '../../../lib/dendroid/recognizer/recognizer'
|
6
|
+
|
7
|
+
describe Dendroid::Recognizer::Recognizer do
|
8
|
+
include SampleGrammars
|
9
|
+
let(:grammar1) { grammar_l1 }
|
10
|
+
|
11
|
+
def comp_expected_actuals(chart, expectations)
|
12
|
+
expectations.each_with_index do |set, rank|
|
13
|
+
expect(chart[rank].to_s).to eq(set.join("\n"))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Implements a dotted item: expression => NUMBER . PLUS NUMBER
|
18
|
+
subject { described_class.new(grammar1, tokenizer_l1) }
|
19
|
+
|
20
|
+
context 'Initialization:' do
|
21
|
+
it 'is initialized with a grammar' do
|
22
|
+
expect { described_class.new(grammar1, tokenizer_l1) }.not_to raise_error
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'knows its grammar analyzer' do
|
26
|
+
expect(subject.grm_analysis).to be_kind_of(Dendroid::GrmAnalysis::GrmAnalyzer)
|
27
|
+
expect(subject.grm_analysis.grammar).to eq(grammar1)
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'knows its tokenizer' do
|
31
|
+
expect(subject.grm_analysis).to be_kind_of(Dendroid::GrmAnalysis::GrmAnalyzer)
|
32
|
+
expect(subject.grm_analysis.grammar).to eq(grammar1)
|
33
|
+
end
|
34
|
+
end # context
|
35
|
+
|
36
|
+
context 'Recognizer at work:' do
|
37
|
+
it 'can recognize example from Wikipedia' do
|
38
|
+
chart = subject.run('2 + 3 * 4')
|
39
|
+
expect(chart).to be_successful
|
40
|
+
|
41
|
+
set0 = [ # . 2 + 3 * 4'
|
42
|
+
'p => . s @ 0',
|
43
|
+
's => . s PLUS m @ 0',
|
44
|
+
's => . m @ 0',
|
45
|
+
'm => . m STAR t @ 0',
|
46
|
+
'm => . t @ 0',
|
47
|
+
't => . INTEGER @ 0'
|
48
|
+
]
|
49
|
+
set1 = [ # 2 . + 3 * 4'
|
50
|
+
't => INTEGER . @ 0',
|
51
|
+
'm => t . @ 0',
|
52
|
+
's => m . @ 0',
|
53
|
+
# 'm => m . STAR t @ 0',
|
54
|
+
'p => s . @ 0',
|
55
|
+
's => s . PLUS m @ 0'
|
56
|
+
]
|
57
|
+
set2 = [ # 2 + . 3 * 4'
|
58
|
+
's => s PLUS . m @ 0',
|
59
|
+
'm => . m STAR t @ 2',
|
60
|
+
'm => . t @ 2',
|
61
|
+
't => . INTEGER @ 2'
|
62
|
+
]
|
63
|
+
set3 = [ # 2 + 3 . * 4'
|
64
|
+
't => INTEGER . @ 2',
|
65
|
+
'm => t . @ 2',
|
66
|
+
's => s PLUS m . @ 0',
|
67
|
+
'm => m . STAR t @ 2',
|
68
|
+
'p => s . @ 0'
|
69
|
+
# 's => s . PLUS m @ 0'
|
70
|
+
]
|
71
|
+
set4 = [ # 2 + 3 * . 4'
|
72
|
+
'm => m STAR . t @ 2',
|
73
|
+
't => . INTEGER @ 4'
|
74
|
+
]
|
75
|
+
set5 = [ # 2 + 3 * 4 .'
|
76
|
+
't => INTEGER . @ 4',
|
77
|
+
'm => m STAR t . @ 2',
|
78
|
+
's => s PLUS m . @ 0',
|
79
|
+
# 'm => m . STAR t @ 2',
|
80
|
+
'p => s . @ 0'
|
81
|
+
# 's => s . PLUS m @ 0'
|
82
|
+
]
|
83
|
+
[set0, set1, set2, set3, set4, set5].each_with_index do |set, rank|
|
84
|
+
expect(chart[rank].to_s).to eq(set.join("\n"))
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'can recognize example for L2 language' do
|
89
|
+
recognizer = described_class.new(grammar_l2, tokenizer_l2)
|
90
|
+
chart = recognizer.run('1 + (2 * 3 - 4)')
|
91
|
+
expect(chart).to be_successful
|
92
|
+
|
93
|
+
set0 = [ # . 1 + (2 * 3 - 4)
|
94
|
+
'p => . sum @ 0',
|
95
|
+
'sum => . sum PLUS product @ 0',
|
96
|
+
'sum => . sum MINUS product @ 0',
|
97
|
+
'sum => . product @ 0',
|
98
|
+
'product => . product STAR factor @ 0',
|
99
|
+
'product => . product SLASH factor @ 0',
|
100
|
+
'product => . factor @ 0',
|
101
|
+
# 'factor => . LPAREN sum RPAREN @ 0',
|
102
|
+
'factor => . NUMBER @ 0'
|
103
|
+
]
|
104
|
+
set1 = [ # 1 . + (2 * 3 - 4)
|
105
|
+
'factor => NUMBER . @ 0',
|
106
|
+
'product => factor . @ 0',
|
107
|
+
'sum => product . @ 0',
|
108
|
+
# 'product => product . STAR factor @ 0',
|
109
|
+
# 'product => product . SLASH factor @ 0',
|
110
|
+
'p => sum . @ 0',
|
111
|
+
'sum => sum . PLUS product @ 0'
|
112
|
+
# 'sum => sum . MINUS product @ 0'
|
113
|
+
]
|
114
|
+
set2 = [ # 1 + . (2 * 3 - 4)
|
115
|
+
'sum => sum PLUS . product @ 0',
|
116
|
+
'product => . product STAR factor @ 2',
|
117
|
+
'product => . product SLASH factor @ 2',
|
118
|
+
'product => . factor @ 2',
|
119
|
+
'factor => . LPAREN sum RPAREN @ 2'
|
120
|
+
# 'factor => . NUMBER @ 2'
|
121
|
+
]
|
122
|
+
set3 = [ # 1 + (. 2 * 3 - 4)
|
123
|
+
'factor => LPAREN . sum RPAREN @ 2',
|
124
|
+
'sum => . sum PLUS product @ 3',
|
125
|
+
'sum => . sum MINUS product @ 3',
|
126
|
+
'sum => . product @ 3',
|
127
|
+
'product => . product STAR factor @ 3',
|
128
|
+
'product => . product SLASH factor @ 3',
|
129
|
+
'product => . factor @ 3',
|
130
|
+
# 'factor => . LPAREN sum RPAREN @ 3',
|
131
|
+
'factor => . NUMBER @ 3'
|
132
|
+
]
|
133
|
+
set4 = [ # 1 + (2 . * 3 - 4)
|
134
|
+
'factor => NUMBER . @ 3',
|
135
|
+
'product => factor . @ 3',
|
136
|
+
'sum => product . @ 3',
|
137
|
+
'product => product . STAR factor @ 3'
|
138
|
+
# 'product => product . SLASH factor @ 3',
|
139
|
+
# 'factor => LPAREN sum . RPAREN @ 2',
|
140
|
+
# 'sum => sum . PLUS product @ 3',
|
141
|
+
# 'sum => sum . MINUS product @ 3'
|
142
|
+
]
|
143
|
+
set5 = [ # 1 + (2 * . 3 - 4)
|
144
|
+
'product => product STAR . factor @ 3',
|
145
|
+
# 'factor => . LPAREN sum RPAREN @ 5',
|
146
|
+
'factor => . NUMBER @ 5'
|
147
|
+
]
|
148
|
+
set6 = [ # 1 + (2 * 3 . - 4)
|
149
|
+
'factor => NUMBER . @ 5',
|
150
|
+
'product => product STAR factor . @ 3',
|
151
|
+
'sum => product . @ 3',
|
152
|
+
# 'product => product . STAR factor @ 3',
|
153
|
+
# 'product => product . SLASH factor @ 3',
|
154
|
+
# 'factor => LPAREN sum . RPAREN @ 2',
|
155
|
+
# 'sum => sum . PLUS product @ 3',
|
156
|
+
'sum => sum . MINUS product @ 3'
|
157
|
+
]
|
158
|
+
set7 = [ # 1 + (2 * 3 - . 4)
|
159
|
+
'sum => sum MINUS . product @ 3',
|
160
|
+
'product => . product STAR factor @ 7',
|
161
|
+
'product => . product SLASH factor @ 7',
|
162
|
+
'product => . factor @ 7',
|
163
|
+
# 'factor => . LPAREN sum RPAREN @ 7',
|
164
|
+
'factor => . NUMBER @ 7'
|
165
|
+
]
|
166
|
+
set8 = [ # 1 + (2 * 3 - 4 .)
|
167
|
+
'factor => NUMBER . @ 7',
|
168
|
+
'product => factor . @ 7',
|
169
|
+
'sum => sum MINUS product . @ 3',
|
170
|
+
# 'product => product . STAR factor @ 7',
|
171
|
+
# 'product => product . SLASH factor @ 7',
|
172
|
+
'factor => LPAREN sum . RPAREN @ 2'
|
173
|
+
# 'sum => sum . PLUS product @ 3',
|
174
|
+
# 'sum => sum . MINUS product @ 3'
|
175
|
+
]
|
176
|
+
set9 = [ # 1 + (2 * 3 - 4 ).
|
177
|
+
'factor => LPAREN sum RPAREN . @ 2',
|
178
|
+
'product => factor . @ 2',
|
179
|
+
'sum => sum PLUS product . @ 0',
|
180
|
+
# 'product => product . STAR factor @ 2',
|
181
|
+
# 'product => product . SLASH factor @ 2',
|
182
|
+
'p => sum . @ 0'
|
183
|
+
# 'sum => sum . PLUS product @ 0',
|
184
|
+
# 'sum => sum . MINUS product @ 0'
|
185
|
+
]
|
186
|
+
expectations = [set0, set1, set2, set3, set4, set5, set6, set7, set8, set9]
|
187
|
+
expectations.each_with_index do |set, rank|
|
188
|
+
expect(chart[rank].to_s).to eq(set.join("\n"))
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end # context
|
192
|
+
|
193
|
+
context 'Handle empty rules' do
|
194
|
+
it 'can cope with an empty rule' do
|
195
|
+
recognizer = described_class.new(grammar_l7, tokenizer_l7)
|
196
|
+
chart = recognizer.run('a a')
|
197
|
+
expect(chart).to be_successful
|
198
|
+
|
199
|
+
set0 = [ # . a a
|
200
|
+
'S => . S T @ 0',
|
201
|
+
'S => . a @ 0'
|
202
|
+
]
|
203
|
+
set1 = [ # a . a
|
204
|
+
'S => a . @ 0',
|
205
|
+
'S => S . T @ 0',
|
206
|
+
'T => . a B @ 1',
|
207
|
+
'T => . a @ 1'
|
208
|
+
]
|
209
|
+
set2 = [ # a a .
|
210
|
+
'T => a . B @ 1',
|
211
|
+
'T => a . @ 1',
|
212
|
+
'B => . @ 2',
|
213
|
+
'T => a B . @ 1',
|
214
|
+
'S => S T . @ 0',
|
215
|
+
'S => S . T @ 0'
|
216
|
+
# 'T => . a B @ 2',
|
217
|
+
# 'T => . a @ 2'
|
218
|
+
]
|
219
|
+
|
220
|
+
expectations = [set0, set1, set2]
|
221
|
+
comp_expected_actuals(chart, expectations)
|
222
|
+
end
|
223
|
+
|
224
|
+
it 'can cope with a nullable symbol' do
|
225
|
+
recognizer = described_class.new(grammar_l14, tokenizer_l14)
|
226
|
+
chart = recognizer.run('a a / a')
|
227
|
+
expect(chart).to be_successful
|
228
|
+
|
229
|
+
set0 = [ # . a a / a
|
230
|
+
'S => . E @ 0',
|
231
|
+
'E => . E Q F @ 0',
|
232
|
+
'E => . F @ 0',
|
233
|
+
'F => . a @ 0'
|
234
|
+
]
|
235
|
+
set1 = [ # a . a / a
|
236
|
+
'F => a . @ 0',
|
237
|
+
'E => F . @ 0',
|
238
|
+
'S => E . @ 0',
|
239
|
+
'E => E . Q F @ 0',
|
240
|
+
# 'Q => . star @ 1',
|
241
|
+
# 'Q => . slash @ 1',
|
242
|
+
'Q => . @ 1',
|
243
|
+
'E => E Q . F @ 0',
|
244
|
+
'F => . a @ 1'
|
245
|
+
]
|
246
|
+
set2 = [ # a a . / a
|
247
|
+
'F => a . @ 1',
|
248
|
+
'E => E Q F . @ 0',
|
249
|
+
'S => E . @ 0',
|
250
|
+
'E => E . Q F @ 0',
|
251
|
+
# 'Q => . star @ 2',
|
252
|
+
'Q => . slash @ 2',
|
253
|
+
'Q => . @ 2',
|
254
|
+
'E => E Q . F @ 0'
|
255
|
+
# 'F => . a @ 2'
|
256
|
+
]
|
257
|
+
set3 = [ # a a . / a
|
258
|
+
'Q => slash . @ 2',
|
259
|
+
'E => E Q . F @ 0',
|
260
|
+
'F => . a @ 3'
|
261
|
+
]
|
262
|
+
set4 = [ # a a / . a
|
263
|
+
'F => a . @ 3',
|
264
|
+
'E => E Q F . @ 0',
|
265
|
+
'S => E . @ 0',
|
266
|
+
'E => E . Q F @ 0',
|
267
|
+
# 'Q => . star @ 4',
|
268
|
+
# 'Q => . slash @ 4',
|
269
|
+
'Q => . @ 4',
|
270
|
+
'E => E Q . F @ 0'
|
271
|
+
# 'F => . a @ 4'
|
272
|
+
]
|
273
|
+
expectations = [set0, set1, set2, set3, set4]
|
274
|
+
comp_expected_actuals(chart, expectations)
|
275
|
+
end
|
276
|
+
end # context
|
277
|
+
|
278
|
+
context 'Recognizer and ambiguous grammars:' do
|
279
|
+
it 'can handle ambiguous input (I)' do
|
280
|
+
recognizer = described_class.new(grammar_l31, tokenizer_l1)
|
281
|
+
chart = recognizer.run('2 + 3 * 4')
|
282
|
+
expect(chart).to be_successful
|
283
|
+
|
284
|
+
set0 = [ # . 2 + 3 * 4
|
285
|
+
'p => . s @ 0',
|
286
|
+
's => . s PLUS s @ 0',
|
287
|
+
's => . s STAR s @ 0',
|
288
|
+
's => . INTEGER @ 0'
|
289
|
+
]
|
290
|
+
set1 = [ # 2 . + 3 * 4
|
291
|
+
's => INTEGER . @ 0',
|
292
|
+
'p => s . @ 0',
|
293
|
+
's => s . PLUS s @ 0'
|
294
|
+
# 's => s . STAR s @ 0',
|
295
|
+
]
|
296
|
+
set2 = [ # 2 + . 3 * 4
|
297
|
+
's => s PLUS . s @ 0',
|
298
|
+
's => . s PLUS s @ 2',
|
299
|
+
's => . s STAR s @ 2',
|
300
|
+
's => . INTEGER @ 2'
|
301
|
+
]
|
302
|
+
set3 = [ # 2 + 3 . * 4
|
303
|
+
's => INTEGER . @ 2',
|
304
|
+
's => s PLUS s . @ 0',
|
305
|
+
# 's => s . PLUS s @ 2',
|
306
|
+
's => s . STAR s @ 2',
|
307
|
+
'p => s . @ 0',
|
308
|
+
# 's => s . PLUS s @ 0',
|
309
|
+
's => s . STAR s @ 0'
|
310
|
+
]
|
311
|
+
set4 = [ # 2 + 3 * . 4
|
312
|
+
's => s STAR . s @ 2',
|
313
|
+
's => s STAR . s @ 0',
|
314
|
+
's => . s PLUS s @ 4',
|
315
|
+
's => . s STAR s @ 4',
|
316
|
+
's => . INTEGER @ 4'
|
317
|
+
]
|
318
|
+
set5 = [ # 2 + 3 * 4 .
|
319
|
+
's => INTEGER . @ 4',
|
320
|
+
's => s STAR s . @ 2',
|
321
|
+
's => s STAR s . @ 0',
|
322
|
+
# 's => s . PLUS s @ 4',
|
323
|
+
# 's => s . STAR s @ 4',
|
324
|
+
's => s PLUS s . @ 0',
|
325
|
+
# 's => s . PLUS s @ 2',
|
326
|
+
# 's => s . STAR s @ 2',
|
327
|
+
'p => s . @ 0'
|
328
|
+
# 's => s . PLUS s @ 0',
|
329
|
+
# 's => s . STAR s @ 0'
|
330
|
+
]
|
331
|
+
expectations = [set0, set1, set2, set3, set4, set5]
|
332
|
+
comp_expected_actuals(chart, expectations)
|
333
|
+
end
|
334
|
+
|
335
|
+
it 'can handle ambiguous input (II)' do
|
336
|
+
recognizer = described_class.new(grammar_l4, tokenizer_l4)
|
337
|
+
chart = recognizer.run('abc + def + ghi')
|
338
|
+
expect(chart).to be_successful
|
339
|
+
|
340
|
+
set0 = [ # . abc + def + ghi
|
341
|
+
'S => . E @ 0',
|
342
|
+
'E => . E plus E @ 0',
|
343
|
+
'E => . id @ 0'
|
344
|
+
]
|
345
|
+
set1 = [ # abc . + def + ghi
|
346
|
+
'E => id . @ 0',
|
347
|
+
'S => E . @ 0',
|
348
|
+
'E => E . plus E @ 0'
|
349
|
+
]
|
350
|
+
set2 = [ # abc + . def + ghi
|
351
|
+
'E => E plus . E @ 0',
|
352
|
+
'E => . E plus E @ 2',
|
353
|
+
'E => . id @ 2'
|
354
|
+
]
|
355
|
+
set3 = [ # abc + def . + ghi
|
356
|
+
'E => id . @ 2',
|
357
|
+
'E => E plus E . @ 0',
|
358
|
+
'E => E . plus E @ 2',
|
359
|
+
'S => E . @ 0',
|
360
|
+
'E => E . plus E @ 0'
|
361
|
+
|
362
|
+
]
|
363
|
+
set4 = [ # abc + def + . ghi
|
364
|
+
'E => E plus . E @ 2',
|
365
|
+
'E => E plus . E @ 0',
|
366
|
+
'E => . E plus E @ 4',
|
367
|
+
'E => . id @ 4'
|
368
|
+
]
|
369
|
+
set5 = [ # abc + def + ghi .
|
370
|
+
'E => id . @ 4',
|
371
|
+
'E => E plus E . @ 2',
|
372
|
+
'E => E plus E . @ 0',
|
373
|
+
# 'E => E . plus E @ 4',
|
374
|
+
# 'E => E . plus E @ 2',
|
375
|
+
'S => E . @ 0'
|
376
|
+
# 'E => E . plus E @ 0'
|
377
|
+
]
|
378
|
+
expectations = [set0, set1, set2, set3, set4, set5]
|
379
|
+
comp_expected_actuals(chart, expectations)
|
380
|
+
end
|
381
|
+
|
382
|
+
it 'copes with the dangling else ambiguity' do
|
383
|
+
recognizer = described_class.new(grammar_l6, tokenizer_l6)
|
384
|
+
chart = recognizer.run('if E then if E then other else other')
|
385
|
+
expect(chart).to be_successful
|
386
|
+
end
|
387
|
+
|
388
|
+
it 'swallows an input that failed with the Earley parsing approach' do
|
389
|
+
recognizer = described_class.new(grammar_l8, tokenizer_l8)
|
390
|
+
chart = recognizer.run('x x x')
|
391
|
+
expect(chart).to be_successful
|
392
|
+
|
393
|
+
set0 = [ # . x x x
|
394
|
+
'S => . S S @ 0',
|
395
|
+
'S => . x @ 0'
|
396
|
+
]
|
397
|
+
set1 = [ # x . x x
|
398
|
+
'S => x . @ 0',
|
399
|
+
'S => S . S @ 0',
|
400
|
+
'S => . S S @ 1',
|
401
|
+
'S => . x @ 1'
|
402
|
+
]
|
403
|
+
set2 = [ # x x . x
|
404
|
+
'S => x . @ 1',
|
405
|
+
'S => S S . @ 0',
|
406
|
+
'S => S . S @ 1',
|
407
|
+
'S => S . S @ 0',
|
408
|
+
'S => . S S @ 2',
|
409
|
+
'S => . x @ 2'
|
410
|
+
]
|
411
|
+
set3 = [ # x x x .
|
412
|
+
'S => x . @ 2',
|
413
|
+
'S => S S . @ 1',
|
414
|
+
'S => S S . @ 0',
|
415
|
+
'S => S . S @ 2',
|
416
|
+
'S => S . S @ 1',
|
417
|
+
'S => S . S @ 0',
|
418
|
+
'S => . S S @ 3'
|
419
|
+
# 'S => . x @ 3'
|
420
|
+
]
|
421
|
+
expectations = [set0, set1, set2, set3]
|
422
|
+
comp_expected_actuals(chart, expectations)
|
423
|
+
end
|
424
|
+
|
425
|
+
it 'swallows the input from an infinite ambiguity grammar' do
|
426
|
+
recognizer = described_class.new(grammar_l9, tokenizer_l9)
|
427
|
+
chart = recognizer.run('x x x')
|
428
|
+
expect(chart).to be_successful
|
429
|
+
|
430
|
+
set0 = [ # . x x x
|
431
|
+
'S => . S S @ 0',
|
432
|
+
'S => . @ 0',
|
433
|
+
'S => . x @ 0',
|
434
|
+
'S => S . S @ 0',
|
435
|
+
'S => S S . @ 0'
|
436
|
+
]
|
437
|
+
set1 = [ # x . x x
|
438
|
+
'S => x . @ 0',
|
439
|
+
'S => S . S @ 0',
|
440
|
+
'S => S S . @ 0',
|
441
|
+
'S => . S S @ 1',
|
442
|
+
'S => . @ 1',
|
443
|
+
'S => . x @ 1',
|
444
|
+
'S => S . S @ 1'
|
445
|
+
]
|
446
|
+
set2 = [ # x x . x
|
447
|
+
'S => x . @ 1',
|
448
|
+
'S => S S . @ 0',
|
449
|
+
'S => S . S @ 1',
|
450
|
+
'S => S S . @ 1',
|
451
|
+
'S => S . S @ 0',
|
452
|
+
'S => . S S @ 2',
|
453
|
+
'S => . @ 2',
|
454
|
+
'S => . x @ 2',
|
455
|
+
'S => S . S @ 2'
|
456
|
+
]
|
457
|
+
set3 = [ # x x x .
|
458
|
+
'S => x . @ 2',
|
459
|
+
'S => S S . @ 1',
|
460
|
+
'S => S S . @ 0',
|
461
|
+
'S => S . S @ 2',
|
462
|
+
'S => S S . @ 2',
|
463
|
+
'S => S . S @ 1',
|
464
|
+
'S => S . S @ 0',
|
465
|
+
'S => . S S @ 3',
|
466
|
+
'S => . @ 3',
|
467
|
+
# 'S => . x @ 3',
|
468
|
+
'S => S . S @ 3'
|
469
|
+
]
|
470
|
+
expectations = [set0, set1, set2, set3]
|
471
|
+
comp_expected_actuals(chart, expectations)
|
472
|
+
end
|
473
|
+
end # context
|
474
|
+
|
475
|
+
context 'Recognizer and recursive rules:' do
|
476
|
+
it 'can handle left-recursion' do
|
477
|
+
recognizer = described_class.new(grammar_l10, tokenizer_l10)
|
478
|
+
chart = recognizer.run('a a a a a')
|
479
|
+
expect(chart).to be_successful
|
480
|
+
|
481
|
+
set0 = [ # . a a a a a
|
482
|
+
'A => . A a @ 0',
|
483
|
+
'A => . @ 0',
|
484
|
+
'A => A . a @ 0'
|
485
|
+
]
|
486
|
+
set1 = [ # a . a a a a
|
487
|
+
'A => A a . @ 0',
|
488
|
+
'A => A . a @ 0'
|
489
|
+
]
|
490
|
+
set2 = [ # a a . a a a
|
491
|
+
'A => A a . @ 0',
|
492
|
+
'A => A . a @ 0'
|
493
|
+
]
|
494
|
+
set3 = [ # a a a . a a
|
495
|
+
'A => A a . @ 0',
|
496
|
+
'A => A . a @ 0'
|
497
|
+
]
|
498
|
+
set4 = [ # a a a a . a
|
499
|
+
'A => A a . @ 0',
|
500
|
+
'A => A . a @ 0'
|
501
|
+
]
|
502
|
+
set5 = [ # a a a a a .
|
503
|
+
'A => A a . @ 0'
|
504
|
+
# 'A => A . a @ 0'
|
505
|
+
]
|
506
|
+
expectations = [set0, set1, set2, set3, set4, set5]
|
507
|
+
comp_expected_actuals(chart, expectations)
|
508
|
+
end
|
509
|
+
|
510
|
+
it 'supports right-recursive rules' do
|
511
|
+
recognizer = described_class.new(grammar_l11, tokenizer_l11)
|
512
|
+
chart = recognizer.run('a a a a a')
|
513
|
+
expect(chart).to be_successful
|
514
|
+
set0 = [ # . a a a a a
|
515
|
+
'A => . a A @ 0',
|
516
|
+
'A => . @ 0'
|
517
|
+
]
|
518
|
+
set1 = [ # a . a a a a
|
519
|
+
'A => a . A @ 0',
|
520
|
+
'A => . a A @ 1',
|
521
|
+
'A => . @ 1',
|
522
|
+
'A => a A . @ 0'
|
523
|
+
]
|
524
|
+
set2 = [ # a a . a a a
|
525
|
+
'A => a . A @ 1',
|
526
|
+
'A => . a A @ 2',
|
527
|
+
'A => . @ 2',
|
528
|
+
'A => a A . @ 1',
|
529
|
+
'A => a A . @ 0'
|
530
|
+
]
|
531
|
+
set3 = [ # a a a . a a
|
532
|
+
'A => a . A @ 2',
|
533
|
+
'A => . a A @ 3',
|
534
|
+
'A => . @ 3',
|
535
|
+
'A => a A . @ 2',
|
536
|
+
'A => a A . @ 1',
|
537
|
+
'A => a A . @ 0'
|
538
|
+
]
|
539
|
+
set4 = [ # a a a a . a
|
540
|
+
'A => a . A @ 3',
|
541
|
+
'A => . a A @ 4',
|
542
|
+
'A => . @ 4',
|
543
|
+
'A => a A . @ 3',
|
544
|
+
'A => a A . @ 2',
|
545
|
+
'A => a A . @ 1',
|
546
|
+
'A => a A . @ 0'
|
547
|
+
]
|
548
|
+
set5 = [ # a a a a a .
|
549
|
+
'A => a . A @ 4',
|
550
|
+
# 'A => . a A @ 5',
|
551
|
+
'A => . @ 5',
|
552
|
+
'A => a A . @ 4',
|
553
|
+
'A => a A . @ 3',
|
554
|
+
'A => a A . @ 2',
|
555
|
+
'A => a A . @ 1',
|
556
|
+
'A => a A . @ 0'
|
557
|
+
]
|
558
|
+
expectations = [set0, set1, set2, set3, set4, set5]
|
559
|
+
comp_expected_actuals(chart, expectations)
|
560
|
+
end
|
561
|
+
|
562
|
+
it 'supports mid-recursive rules' do
|
563
|
+
recognizer = described_class.new(grammar_l5, tokenizer_l5)
|
564
|
+
chart = recognizer.run('a a b c c')
|
565
|
+
expect(chart).to be_successful
|
566
|
+
set0 = [ # . a a b c c
|
567
|
+
'S => . A @ 0',
|
568
|
+
'A => . a A c @ 0'
|
569
|
+
# 'A => . b @ 0'
|
570
|
+
]
|
571
|
+
set1 = [ # a . a b c c
|
572
|
+
'A => a . A c @ 0',
|
573
|
+
'A => . a A c @ 1'
|
574
|
+
# 'A => . b @ 1'
|
575
|
+
]
|
576
|
+
set2 = [ # a a . b c c
|
577
|
+
'A => a . A c @ 1',
|
578
|
+
# 'A => . a A c @ 2',
|
579
|
+
'A => . b @ 2'
|
580
|
+
]
|
581
|
+
set3 = [ # a a b . c c
|
582
|
+
'A => b . @ 2',
|
583
|
+
'A => a A . c @ 1'
|
584
|
+
]
|
585
|
+
set4 = [ # a a b c . c
|
586
|
+
'A => a A c . @ 1',
|
587
|
+
'A => a A . c @ 0'
|
588
|
+
]
|
589
|
+
set5 = [ # a a b c c .
|
590
|
+
'A => a A c . @ 0',
|
591
|
+
'S => A . @ 0'
|
592
|
+
]
|
593
|
+
expectations = [set0, set1, set2, set3, set4, set5]
|
594
|
+
comp_expected_actuals(chart, expectations)
|
595
|
+
end
|
596
|
+
|
597
|
+
it 'can handle hidden left-recursion' do
|
598
|
+
recognizer = described_class.new(grammar_l12, tokenizer_l12)
|
599
|
+
chart = recognizer.run('a b b b')
|
600
|
+
expect(chart).to be_successful
|
601
|
+
|
602
|
+
set0 = [ # . a b b b
|
603
|
+
'S => . A T @ 0',
|
604
|
+
'S => . a T @ 0',
|
605
|
+
'A => . a @ 0',
|
606
|
+
'A => . B A @ 0',
|
607
|
+
'B => . @ 0',
|
608
|
+
'A => B . A @ 0'
|
609
|
+
]
|
610
|
+
set1 = [ # a . b b b
|
611
|
+
'S => a . T @ 0',
|
612
|
+
'A => a . @ 0',
|
613
|
+
'T => . b b b @ 1',
|
614
|
+
'S => A . T @ 0',
|
615
|
+
'A => B A . @ 0'
|
616
|
+
]
|
617
|
+
set2 = [ # a b . b b
|
618
|
+
'T => b . b b @ 1'
|
619
|
+
]
|
620
|
+
set3 = [ # a b b . b
|
621
|
+
'T => b b . b @ 1'
|
622
|
+
]
|
623
|
+
set4 = [ # a b b b .
|
624
|
+
'T => b b b . @ 1',
|
625
|
+
'S => a T . @ 0',
|
626
|
+
'S => A T . @ 0'
|
627
|
+
]
|
628
|
+
expectations = [set0, set1, set2, set3, set4]
|
629
|
+
comp_expected_actuals(chart, expectations)
|
630
|
+
end
|
631
|
+
|
632
|
+
it 'can handle right-recursion (II)' do
|
633
|
+
recognizer = described_class.new(grammar_l13, tokenizer_l13)
|
634
|
+
chart = recognizer.run('x x x')
|
635
|
+
expect(chart).to be_successful
|
636
|
+
set0 = [ # . x x x
|
637
|
+
'A => . x A @ 0',
|
638
|
+
'A => . x @ 0'
|
639
|
+
]
|
640
|
+
set1 = [ # x . x x
|
641
|
+
'A => x . A @ 0',
|
642
|
+
'A => x . @ 0',
|
643
|
+
'A => . x A @ 1',
|
644
|
+
'A => . x @ 1'
|
645
|
+
]
|
646
|
+
set2 = [ # x x . x
|
647
|
+
'A => x . A @ 1',
|
648
|
+
'A => x . @ 1',
|
649
|
+
'A => . x A @ 2',
|
650
|
+
'A => . x @ 2',
|
651
|
+
'A => x A . @ 0'
|
652
|
+
]
|
653
|
+
set3 = [ # x x x .
|
654
|
+
'A => x . A @ 2',
|
655
|
+
'A => x . @ 2',
|
656
|
+
# 'A => . x A @ 3',
|
657
|
+
# 'A => . x @ 3',
|
658
|
+
'A => x A . @ 1',
|
659
|
+
'A => x A . @ 0'
|
660
|
+
]
|
661
|
+
expectations = [set0, set1, set2, set3]
|
662
|
+
comp_expected_actuals(chart, expectations)
|
663
|
+
end
|
664
|
+
|
665
|
+
# TODO: Use grammars from "The Structure of Shared Forests in Ambiguous Parsing"
|
666
|
+
# Grammar UBDA == grammar_l8
|
667
|
+
# Grammar RR == grammar_l13
|
668
|
+
end # context
|
669
|
+
|
670
|
+
context 'Error reporting:' do
|
671
|
+
it 'should parse an invalid simple input' do
|
672
|
+
recognizer = described_class.new(grammar_l5, tokenizer_l5)
|
673
|
+
# Parse an erroneous input (b is missing)
|
674
|
+
chart = recognizer.run('a a c c')
|
675
|
+
expect(chart).not_to be_successful
|
676
|
+
|
677
|
+
# TODO
|
678
|
+
# err_msg = <<-MSG
|
679
|
+
# Syntax error at or near token line 1, column 5 >>>c<<<
|
680
|
+
# Expected one of: ['a', 'b'], found a 'c' instead.
|
681
|
+
# MSG
|
682
|
+
# expect(parse_result.failure_reason.message).to eq(err_msg.chomp)
|
683
|
+
end
|
684
|
+
end # context
|
685
|
+
|
686
|
+
context 'Error at start of input' do
|
687
|
+
it 'raises an error if input is empty and grammar disallows this' do
|
688
|
+
err_msg = 'Error: Input may not be empty nor blank.'
|
689
|
+
recognizer = described_class.new(grammar_l5, tokenizer_l5)
|
690
|
+
|
691
|
+
['', " \t \n"].each do |input|
|
692
|
+
chart = recognizer.run(input)
|
693
|
+
expect(chart).not_to be_successful
|
694
|
+
expect(chart.failure_class).to eq(StandardError)
|
695
|
+
expect(chart.failure_reason).to eq(err_msg)
|
696
|
+
end
|
697
|
+
end
|
698
|
+
|
699
|
+
it 'raises an error if encounters an unexpected token' do
|
700
|
+
recognizer = described_class.new(grammar_l5, tokenizer_l5)
|
701
|
+
chart = recognizer.run('a a c c')
|
702
|
+
expect(chart).not_to be_successful
|
703
|
+
set0 = [ # . a a c c
|
704
|
+
'S => . A @ 0',
|
705
|
+
'A => . a A c @ 0'
|
706
|
+
# 'A => . b @ 0'
|
707
|
+
]
|
708
|
+
set1 = [ # a . a c c
|
709
|
+
'A => a . A c @ 0',
|
710
|
+
'A => . a A c @ 1'
|
711
|
+
# 'A => . b @ 1'
|
712
|
+
]
|
713
|
+
set2 = [ # a a . c c
|
714
|
+
'A => a . A c @ 1',
|
715
|
+
'A => . a A c @ 2', # State is not pruned (in error state)
|
716
|
+
'A => . b @ 2' # State is not pruned (in error state)
|
717
|
+
]
|
718
|
+
[set0, set1, set2].each_with_index do |set, rank|
|
719
|
+
expect(chart[rank].to_s).to eq(set.join("\n"))
|
720
|
+
end
|
721
|
+
expect(chart.failure_class).to eq(StandardError)
|
722
|
+
err_msg = 'Syntax error at or near token line 1, column 5 >>>c<<< Expected one of: [a, b], found a c instead.'
|
723
|
+
expect(chart.failure_reason).to eq(err_msg)
|
724
|
+
end
|
725
|
+
|
726
|
+
it "reports an error when last token isn't final state" do
|
727
|
+
recognizer = described_class.new(grammar_l5, tokenizer_l5)
|
728
|
+
chart = recognizer.run('aabc')
|
729
|
+
expect(chart).not_to be_successful
|
730
|
+
set0 = [ # . a a b c
|
731
|
+
'S => . A @ 0',
|
732
|
+
'A => . a A c @ 0'
|
733
|
+
# 'A => . b @ 0'
|
734
|
+
]
|
735
|
+
set1 = [ # a . a b c
|
736
|
+
'A => a . A c @ 0',
|
737
|
+
'A => . a A c @ 1'
|
738
|
+
# 'A => . b @ 1'
|
739
|
+
]
|
740
|
+
set2 = [ # a a . b c
|
741
|
+
'A => a . A c @ 1',
|
742
|
+
# 'A => . a A c @ 2',
|
743
|
+
'A => . b @ 2'
|
744
|
+
]
|
745
|
+
set3 = [ # a a b . c
|
746
|
+
'A => b . @ 2',
|
747
|
+
'A => a A . c @ 1'
|
748
|
+
]
|
749
|
+
set4 = [ # a a b c .
|
750
|
+
'A => a A c . @ 1',
|
751
|
+
'A => a A . c @ 0'
|
752
|
+
]
|
753
|
+
[set0, set1, set2, set3, set4].each_with_index do |set, rank|
|
754
|
+
expect(chart[rank].to_s).to eq(set.join("\n"))
|
755
|
+
end
|
756
|
+
expect(chart.failure_class).to eq(StandardError)
|
757
|
+
err_msg = "Line 1, column 4: Premature end of input after 'c', expected: c."
|
758
|
+
expect(chart.failure_reason).to eq(err_msg)
|
759
|
+
end
|
760
|
+
end # context
|
761
|
+
end # describe
|