dendroid 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +7 -0
- data/lib/dendroid/recognizer/chart.rb +6 -4
- data/lib/dendroid/recognizer/e_item.rb +0 -1
- data/lib/dendroid/recognizer/item_set.rb +1 -0
- data/lib/dendroid/recognizer/recognizer.rb +32 -28
- data/lib/dendroid/syntax/grammar.rb +1 -1
- data/spec/dendroid/recognizer/chart_spec.rb +0 -1
- data/spec/dendroid/recognizer/e_item_spec.rb +4 -0
- data/spec/dendroid/recognizer/item_set_spec.rb +1 -1
- data/spec/dendroid/recognizer/recognizer_spec.rb +594 -19
- data/spec/dendroid/support/sample_grammars.rb +249 -6
- data/spec/dendroid/syntax/grammar_spec.rb +145 -0
- data/version.txt +1 -1
- metadata +1 -1
@@ -10,9 +10,7 @@ module SampleGrammars
|
|
10
10
|
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
11
11
|
rule('p' => 's')
|
12
12
|
rule('s' => ['s PLUS m', 'm'])
|
13
|
-
# rule('s' => 'm')
|
14
13
|
rule('m' => ['m STAR t', 't'])
|
15
|
-
# rule('m' => 't')
|
16
14
|
rule('t' => 'INTEGER')
|
17
15
|
end
|
18
16
|
|
@@ -28,7 +26,6 @@ module SampleGrammars
|
|
28
26
|
end
|
29
27
|
end
|
30
28
|
|
31
|
-
|
32
29
|
def grammar_l2
|
33
30
|
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
34
31
|
# Grammar inspired from Loup Vaillant's example
|
@@ -47,8 +44,7 @@ module SampleGrammars
|
|
47
44
|
|
48
45
|
def tokenizer_l2
|
49
46
|
Dendroid::Utils::BaseTokenizer.new do
|
50
|
-
map_verbatim2terminal({
|
51
|
-
'+' => :PLUS,
|
47
|
+
map_verbatim2terminal({ '+' => :PLUS,
|
52
48
|
'-' => :MINUS,
|
53
49
|
'*' => :STAR,
|
54
50
|
'/' => :SLASH,
|
@@ -68,9 +64,256 @@ module SampleGrammars
|
|
68
64
|
|
69
65
|
rule('Z' => ['d', 'X Y Z'])
|
70
66
|
rule('Y' => ['', 'c'])
|
71
|
-
rule('X' => [
|
67
|
+
rule('X' => %w[Y a])
|
68
|
+
end
|
69
|
+
|
70
|
+
builder.grammar
|
71
|
+
end
|
72
|
+
|
73
|
+
def grammar_l31
|
74
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
75
|
+
# Ambiguous arithmetical expression language
|
76
|
+
# This language is compatible with tokenizer L1
|
77
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
78
|
+
rule('p' => 's')
|
79
|
+
rule('s' => ['s PLUS s', 's STAR s', 'INTEGER'])
|
80
|
+
end
|
81
|
+
|
82
|
+
builder.grammar
|
83
|
+
end
|
84
|
+
|
85
|
+
def grammar_l4
|
86
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
87
|
+
# (based on an example from Fisher and LeBlanc: "Crafting a Compiler")
|
88
|
+
declare_terminals('plus', 'id')
|
89
|
+
|
90
|
+
rule 'S' => 'E'
|
91
|
+
rule 'E' => ['E plus E', 'id']
|
92
|
+
end
|
93
|
+
|
94
|
+
builder.grammar
|
95
|
+
end
|
96
|
+
|
97
|
+
def tokenizer_l4
|
98
|
+
Dendroid::Utils::BaseTokenizer.new do
|
99
|
+
map_verbatim2terminal({ '+' => :plus })
|
100
|
+
|
101
|
+
scan_verbatim(['+'])
|
102
|
+
scan_value(/[_A-Za-z][_A-Za-z0-9]*/, :id, ->(txt) { txt })
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def grammar_l5
|
107
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
108
|
+
# (based on example in N. Wirth "Compiler Construction" book, p. 6)
|
109
|
+
declare_terminals('a', 'b', 'c')
|
110
|
+
|
111
|
+
rule 'S' => 'A'
|
112
|
+
rule 'A' => ['a A c', 'b']
|
72
113
|
end
|
73
114
|
|
74
115
|
builder.grammar
|
75
116
|
end
|
117
|
+
|
118
|
+
def tokenizer_l5
|
119
|
+
Dendroid::Utils::BaseTokenizer.new do
|
120
|
+
map_verbatim2terminal({ 'a' => :a, 'b' => :b, 'c' => :c })
|
121
|
+
|
122
|
+
scan_verbatim(%w[a b c])
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def grammar_l6
|
127
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
128
|
+
# Grammar to illustrate the dangling else ambiguity
|
129
|
+
# (based on grammar G5 from Douglas Thain "Introduction to Compiler and Language Design" book, p. 6)
|
130
|
+
declare_terminals('if', 'then', 'else', 'E', 'other')
|
131
|
+
|
132
|
+
rule 'P' => 'S'
|
133
|
+
rule 'S' => ['if E then S', 'if E then S else S', 'other']
|
134
|
+
end
|
135
|
+
|
136
|
+
builder.grammar
|
137
|
+
end
|
138
|
+
|
139
|
+
def tokenizer_l6
|
140
|
+
Dendroid::Utils::BaseTokenizer.new do
|
141
|
+
map_verbatim2terminal({ 'if' => :if,
|
142
|
+
'then' => :then,
|
143
|
+
'else' => :else,
|
144
|
+
'E' => :E,
|
145
|
+
'other' => :other })
|
146
|
+
|
147
|
+
scan_verbatim(%w[if then else E other])
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def grammar_l7
|
152
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
153
|
+
# (based on grammar G1 from paper Elizabeth Scott, Adrian Johnstone "Recognition
|
154
|
+
# is not parsing SPPF-style parsing from cubic recognisers")
|
155
|
+
declare_terminals('a')
|
156
|
+
|
157
|
+
rule 'S' => ['S T', 'a']
|
158
|
+
rule 'B' => ''
|
159
|
+
rule 'T' => ['a B', 'a']
|
160
|
+
end
|
161
|
+
|
162
|
+
builder.grammar
|
163
|
+
end
|
164
|
+
|
165
|
+
def tokenizer_l7
|
166
|
+
Dendroid::Utils::BaseTokenizer.new do
|
167
|
+
map_verbatim2terminal({ 'a' => :a })
|
168
|
+
|
169
|
+
scan_verbatim(['a'])
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def grammar_l8
|
174
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
175
|
+
# (based on grammar G2 from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
|
176
|
+
# for Natural Languages")
|
177
|
+
declare_terminals('x')
|
178
|
+
|
179
|
+
rule 'S' => ['S S', 'x']
|
180
|
+
end
|
181
|
+
|
182
|
+
builder.grammar
|
183
|
+
end
|
184
|
+
|
185
|
+
def tokenizer_l8
|
186
|
+
Dendroid::Utils::BaseTokenizer.new do
|
187
|
+
map_verbatim2terminal({ 'x' => :x })
|
188
|
+
|
189
|
+
scan_verbatim(['x'])
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def grammar_l9
|
194
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
195
|
+
# (based on "infinite ambiguity" grammar from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
|
196
|
+
# for Natural Languages")
|
197
|
+
declare_terminals('x')
|
198
|
+
|
199
|
+
rule 'S' => ['S S', '', 'x']
|
200
|
+
end
|
201
|
+
|
202
|
+
builder.grammar
|
203
|
+
end
|
204
|
+
|
205
|
+
def tokenizer_l9
|
206
|
+
Dendroid::Utils::BaseTokenizer.new do
|
207
|
+
map_verbatim2terminal({ 'x' => :x })
|
208
|
+
|
209
|
+
scan_verbatim(['x'])
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def grammar_l10
|
214
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
215
|
+
declare_terminals('a')
|
216
|
+
|
217
|
+
rule 'A' => ['A a', '']
|
218
|
+
end
|
219
|
+
|
220
|
+
builder.grammar
|
221
|
+
end
|
222
|
+
|
223
|
+
def tokenizer_l10
|
224
|
+
Dendroid::Utils::BaseTokenizer.new do
|
225
|
+
map_verbatim2terminal({ 'a' => :a })
|
226
|
+
|
227
|
+
scan_verbatim(['a'])
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def grammar_l11
|
232
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
233
|
+
declare_terminals('a')
|
234
|
+
|
235
|
+
rule 'A' => ['a A', '']
|
236
|
+
end
|
237
|
+
|
238
|
+
builder.grammar
|
239
|
+
end
|
240
|
+
|
241
|
+
def tokenizer_l11
|
242
|
+
Dendroid::Utils::BaseTokenizer.new do
|
243
|
+
map_verbatim2terminal({ 'a' => :a })
|
244
|
+
|
245
|
+
scan_verbatim(['a'])
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def grammar_l12
|
250
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
251
|
+
# (based on grammar Example 3 from paper Elizabeth Scott "SPPF-Style Parsing
|
252
|
+
# from Earley Recognisers")
|
253
|
+
# Grammar with hidden left recursion and a cycle
|
254
|
+
declare_terminals('a', 'b')
|
255
|
+
|
256
|
+
rule 'S' => ['A T', 'a T']
|
257
|
+
# rule 'S' => 'a T'
|
258
|
+
rule 'A' => ['a', 'B A']
|
259
|
+
# rule 'A' => 'B A'
|
260
|
+
rule 'B' => ''
|
261
|
+
rule 'T' => 'b b b'
|
262
|
+
end
|
263
|
+
|
264
|
+
builder.grammar
|
265
|
+
end
|
266
|
+
|
267
|
+
def tokenizer_l12
|
268
|
+
Dendroid::Utils::BaseTokenizer.new do
|
269
|
+
map_verbatim2terminal({ 'a' => :a, 'b' => :b })
|
270
|
+
|
271
|
+
scan_verbatim(%w[a b])
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
def grammar_l13
|
276
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
277
|
+
# Grammar based on example RR from Sylvie Billot, Bernard Lang "The Structure of Shared Forests
|
278
|
+
# in Ambiguous Parsing"
|
279
|
+
declare_terminals('x')
|
280
|
+
|
281
|
+
rule 'A' => ['x A', 'x']
|
282
|
+
# rule 'A' => 'x'
|
283
|
+
end
|
284
|
+
|
285
|
+
builder.grammar
|
286
|
+
end
|
287
|
+
|
288
|
+
def tokenizer_l13
|
289
|
+
Dendroid::Utils::BaseTokenizer.new do
|
290
|
+
map_verbatim2terminal({ 'x' => :x })
|
291
|
+
|
292
|
+
scan_verbatim(['x'])
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def grammar_l14
|
297
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
298
|
+
# Grammar 4: A grammar with nullable nonterminal
|
299
|
+
# based on example from "Parsing Techniques" book, p. 216
|
300
|
+
# (D. Grune, C. Jabobs)
|
301
|
+
declare_terminals('a', 'star', 'slash')
|
302
|
+
|
303
|
+
rule 'S' => 'E'
|
304
|
+
rule 'E' => ['E Q F', 'F']
|
305
|
+
rule 'F' => 'a'
|
306
|
+
rule 'Q' => ['star', 'slash', '']
|
307
|
+
end
|
308
|
+
|
309
|
+
builder.grammar
|
310
|
+
end
|
311
|
+
|
312
|
+
def tokenizer_l14
|
313
|
+
Dendroid::Utils::BaseTokenizer.new do
|
314
|
+
map_verbatim2terminal({ 'a' => :a, '*' => :star, '/' => :slash })
|
315
|
+
|
316
|
+
scan_verbatim(['a', '*', '/'])
|
317
|
+
end
|
318
|
+
end
|
76
319
|
end # module
|
@@ -7,6 +7,7 @@ require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
|
7
7
|
require_relative '..\..\..\lib\dendroid\syntax\production'
|
8
8
|
require_relative '..\..\..\lib\dendroid\syntax\choice'
|
9
9
|
require_relative '..\..\..\lib\dendroid\syntax\grammar'
|
10
|
+
require_relative '..\..\..\lib\dendroid\grm_dsl\base_grm_builder'
|
10
11
|
|
11
12
|
describe Dendroid::Syntax::Grammar do
|
12
13
|
let(:int_symb) { build_terminal('INTEGER') }
|
@@ -200,4 +201,148 @@ describe Dendroid::Syntax::Grammar do
|
|
200
201
|
expect(nonproductive).to eq([nterm_D, nterm_F])
|
201
202
|
end
|
202
203
|
end # context
|
204
|
+
|
205
|
+
context 'Errors with terminal symbols' do
|
206
|
+
def grm_terminal_in_lhs
|
207
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
208
|
+
declare_terminals('a', 'b', 'c')
|
209
|
+
|
210
|
+
rule 'S' => 'A'
|
211
|
+
rule 'a' => 'a A c' # Wrong: terminal 'a' in lhs
|
212
|
+
rule 'A' => 'b'
|
213
|
+
end
|
214
|
+
|
215
|
+
builder.grammar
|
216
|
+
end
|
217
|
+
|
218
|
+
def grm_cyclic
|
219
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
220
|
+
declare_terminals('x')
|
221
|
+
|
222
|
+
rule 'S' => %w[S x] # Wrong: cyclic production (lhs and rhs are the same)
|
223
|
+
end
|
224
|
+
|
225
|
+
builder.grammar
|
226
|
+
end
|
227
|
+
|
228
|
+
def grm_no_terminal
|
229
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
230
|
+
# No terminal symbol explicitly declared => all symbols are non-terminals
|
231
|
+
|
232
|
+
rule 'S' => 'A'
|
233
|
+
rule 'A' => 'a A c'
|
234
|
+
rule 'A' => 'b'
|
235
|
+
end
|
236
|
+
|
237
|
+
builder.grammar
|
238
|
+
end
|
239
|
+
|
240
|
+
def unused_terminals
|
241
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
242
|
+
declare_terminals('a', 'b', 'c', 'd', 'e')
|
243
|
+
|
244
|
+
# # Wrong: terminals 'd' and 'e' never appear in rules
|
245
|
+
rule 'S' => 'A'
|
246
|
+
rule 'A' => 'a A c'
|
247
|
+
rule 'A' => 'b'
|
248
|
+
end
|
249
|
+
|
250
|
+
builder.grammar
|
251
|
+
end
|
252
|
+
|
253
|
+
it 'raises an error if there is no terminal symbol' do
|
254
|
+
err_msg = "Grammar doesn't contain any terminal symbol."
|
255
|
+
expect { grm_no_terminal }.to raise_error(StandardError, err_msg)
|
256
|
+
end
|
257
|
+
|
258
|
+
it 'raises an error if a terminal is in lhs of production' do
|
259
|
+
err_msg = "Terminal symbol 'a' may not be on left-side of a rule."
|
260
|
+
expect { grm_terminal_in_lhs }.to raise_error(StandardError, err_msg)
|
261
|
+
end
|
262
|
+
|
263
|
+
it 'raises an error if a terminal never appear in rules' do
|
264
|
+
err_msg = "Terminal symbols 'd', 'e' never appear in production rules."
|
265
|
+
expect { unused_terminals }.to raise_error(StandardError, err_msg)
|
266
|
+
end
|
267
|
+
|
268
|
+
it 'raises an error if a production is cyclic' do
|
269
|
+
err_msg = 'Cyclic rules of the kind S => S are not allowed.'
|
270
|
+
expect { grm_cyclic }.to raise_error(StandardError, err_msg)
|
271
|
+
end
|
272
|
+
end # context
|
273
|
+
|
274
|
+
context 'Errors with non-terminal symbols' do
|
275
|
+
def grm_undefined_nterm
|
276
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
277
|
+
declare_terminals('x')
|
278
|
+
|
279
|
+
rule 'S' => %w[x A] # Wrong: A is never defined
|
280
|
+
end
|
281
|
+
|
282
|
+
builder.grammar
|
283
|
+
end
|
284
|
+
|
285
|
+
def duplicate_production
|
286
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
287
|
+
declare_terminals('a', 'b', 'c')
|
288
|
+
|
289
|
+
rule 'S' => 'A'
|
290
|
+
rule 'A' => ['a A c', 'b']
|
291
|
+
rule 'S' => 'A' # Duplicate rule
|
292
|
+
end
|
293
|
+
|
294
|
+
builder.grammar
|
295
|
+
end
|
296
|
+
|
297
|
+
def grm_unreachable_symbols
|
298
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
299
|
+
declare_terminals('a', 'b', 'c')
|
300
|
+
|
301
|
+
rule 'S' => 'A'
|
302
|
+
rule 'A' => ['a A c', 'b']
|
303
|
+
rule 'Z' => 'a Z X'
|
304
|
+
rule 'X' => 'b b'
|
305
|
+
end
|
306
|
+
|
307
|
+
builder.grammar
|
308
|
+
end
|
309
|
+
|
310
|
+
def nonproductive_symbols
|
311
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
312
|
+
declare_terminals('a', 'b', 'c', 'd', 'e', 'f')
|
313
|
+
|
314
|
+
# # Wrong: terminals 'D' and 'F' are non-productive (they never reduce to a string of terminals)
|
315
|
+
rule 'S' => ['A B', 'D E']
|
316
|
+
rule 'A' => 'a'
|
317
|
+
rule 'B' => 'b C'
|
318
|
+
rule 'C' => 'c'
|
319
|
+
rule 'D' => 'd F'
|
320
|
+
rule 'E' => 'e'
|
321
|
+
rule 'F' => 'f D'
|
322
|
+
end
|
323
|
+
|
324
|
+
builder.grammar
|
325
|
+
end
|
326
|
+
|
327
|
+
it 'raises an error when a non-terminal is never defined' do
|
328
|
+
err_msg = "Non-terminal symbols 'A' never appear in head of any production rule."
|
329
|
+
expect { grm_undefined_nterm }.to raise_error(StandardError, err_msg)
|
330
|
+
end
|
331
|
+
|
332
|
+
it 'raises an error when a production is duplicated' do
|
333
|
+
err_msg = "Production rule 'S => A' appears more than once in the grammar."
|
334
|
+
expect { duplicate_production }.to raise_error(StandardError, err_msg)
|
335
|
+
end
|
336
|
+
|
337
|
+
it 'raises an error when a non-terminal is unreachable' do
|
338
|
+
# err_msg = "Symbols 'Z', 'X' are unreachable from start symbol."
|
339
|
+
err_msg = "Symbols 'Z' are non-productive."
|
340
|
+
expect { grm_unreachable_symbols }.to raise_error(StandardError, err_msg)
|
341
|
+
end
|
342
|
+
|
343
|
+
it 'raises an error when a non-terminal is non-productive' do
|
344
|
+
err_msg = "Symbols 'D', 'F' are non-productive."
|
345
|
+
expect { nonproductive_symbols }.to raise_error(StandardError, err_msg)
|
346
|
+
end
|
347
|
+
end # contex
|
203
348
|
end # describe
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.12
|