dendroid 0.0.11 → 0.1.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,9 +10,7 @@ module SampleGrammars
10
10
  declare_terminals('PLUS', 'STAR', 'INTEGER')
11
11
  rule('p' => 's')
12
12
  rule('s' => ['s PLUS m', 'm'])
13
- # rule('s' => 'm')
14
13
  rule('m' => ['m STAR t', 't'])
15
- # rule('m' => 't')
16
14
  rule('t' => 'INTEGER')
17
15
  end
18
16
 
@@ -28,7 +26,6 @@ module SampleGrammars
28
26
  end
29
27
  end
30
28
 
31
-
32
29
  def grammar_l2
33
30
  builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
34
31
  # Grammar inspired from Loup Vaillant's example
@@ -47,8 +44,7 @@ module SampleGrammars
47
44
 
48
45
  def tokenizer_l2
49
46
  Dendroid::Utils::BaseTokenizer.new do
50
- map_verbatim2terminal({
51
- '+' => :PLUS,
47
+ map_verbatim2terminal({ '+' => :PLUS,
52
48
  '-' => :MINUS,
53
49
  '*' => :STAR,
54
50
  '/' => :SLASH,
@@ -68,9 +64,256 @@ module SampleGrammars
68
64
 
69
65
  rule('Z' => ['d', 'X Y Z'])
70
66
  rule('Y' => ['', 'c'])
71
- rule('X' => ['Y', 'a'])
67
+ rule('X' => %w[Y a])
68
+ end
69
+
70
+ builder.grammar
71
+ end
72
+
73
+ def grammar_l31
74
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
75
+ # Ambiguous arithmetical expression language
76
+ # This language is compatible with tokenizer L1
77
+ declare_terminals('PLUS', 'STAR', 'INTEGER')
78
+ rule('p' => 's')
79
+ rule('s' => ['s PLUS s', 's STAR s', 'INTEGER'])
80
+ end
81
+
82
+ builder.grammar
83
+ end
84
+
85
+ def grammar_l4
86
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
87
+ # (based on an example from Fisher and LeBlanc: "Crafting a Compiler")
88
+ declare_terminals('plus', 'id')
89
+
90
+ rule 'S' => 'E'
91
+ rule 'E' => ['E plus E', 'id']
92
+ end
93
+
94
+ builder.grammar
95
+ end
96
+
97
+ def tokenizer_l4
98
+ Dendroid::Utils::BaseTokenizer.new do
99
+ map_verbatim2terminal({ '+' => :plus })
100
+
101
+ scan_verbatim(['+'])
102
+ scan_value(/[_A-Za-z][_A-Za-z0-9]*/, :id, ->(txt) { txt })
103
+ end
104
+ end
105
+
106
+ def grammar_l5
107
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
108
+ # (based on example in N. Wirth "Compiler Construction" book, p. 6)
109
+ declare_terminals('a', 'b', 'c')
110
+
111
+ rule 'S' => 'A'
112
+ rule 'A' => ['a A c', 'b']
72
113
  end
73
114
 
74
115
  builder.grammar
75
116
  end
117
+
118
+ def tokenizer_l5
119
+ Dendroid::Utils::BaseTokenizer.new do
120
+ map_verbatim2terminal({ 'a' => :a, 'b' => :b, 'c' => :c })
121
+
122
+ scan_verbatim(%w[a b c])
123
+ end
124
+ end
125
+
126
+ def grammar_l6
127
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
128
+ # Grammar to illustrate the dangling else ambiguity
129
+ # (based on grammar G5 from Douglas Thain "Introduction to Compiler and Language Design" book, p. 6)
130
+ declare_terminals('if', 'then', 'else', 'E', 'other')
131
+
132
+ rule 'P' => 'S'
133
+ rule 'S' => ['if E then S', 'if E then S else S', 'other']
134
+ end
135
+
136
+ builder.grammar
137
+ end
138
+
139
+ def tokenizer_l6
140
+ Dendroid::Utils::BaseTokenizer.new do
141
+ map_verbatim2terminal({ 'if' => :if,
142
+ 'then' => :then,
143
+ 'else' => :else,
144
+ 'E' => :E,
145
+ 'other' => :other })
146
+
147
+ scan_verbatim(%w[if then else E other])
148
+ end
149
+ end
150
+
151
+ def grammar_l7
152
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
153
+ # (based on grammar G1 from paper Elizabeth Scott, Adrian Johnstone "Recognition
154
+ # is not parsing SPPF-style parsing from cubic recognisers")
155
+ declare_terminals('a')
156
+
157
+ rule 'S' => ['S T', 'a']
158
+ rule 'B' => ''
159
+ rule 'T' => ['a B', 'a']
160
+ end
161
+
162
+ builder.grammar
163
+ end
164
+
165
+ def tokenizer_l7
166
+ Dendroid::Utils::BaseTokenizer.new do
167
+ map_verbatim2terminal({ 'a' => :a })
168
+
169
+ scan_verbatim(['a'])
170
+ end
171
+ end
172
+
173
+ def grammar_l8
174
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
175
+ # (based on grammar G2 from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
176
+ # for Natural Languages")
177
+ declare_terminals('x')
178
+
179
+ rule 'S' => ['S S', 'x']
180
+ end
181
+
182
+ builder.grammar
183
+ end
184
+
185
+ def tokenizer_l8
186
+ Dendroid::Utils::BaseTokenizer.new do
187
+ map_verbatim2terminal({ 'x' => :x })
188
+
189
+ scan_verbatim(['x'])
190
+ end
191
+ end
192
+
193
+ def grammar_l9
194
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
195
+ # (based on "infinite ambiguity" grammar from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
196
+ # for Natural Languages")
197
+ declare_terminals('x')
198
+
199
+ rule 'S' => ['S S', '', 'x']
200
+ end
201
+
202
+ builder.grammar
203
+ end
204
+
205
+ def tokenizer_l9
206
+ Dendroid::Utils::BaseTokenizer.new do
207
+ map_verbatim2terminal({ 'x' => :x })
208
+
209
+ scan_verbatim(['x'])
210
+ end
211
+ end
212
+
213
+ def grammar_l10
214
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
215
+ declare_terminals('a')
216
+
217
+ rule 'A' => ['A a', '']
218
+ end
219
+
220
+ builder.grammar
221
+ end
222
+
223
+ def tokenizer_l10
224
+ Dendroid::Utils::BaseTokenizer.new do
225
+ map_verbatim2terminal({ 'a' => :a })
226
+
227
+ scan_verbatim(['a'])
228
+ end
229
+ end
230
+
231
+ def grammar_l11
232
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
233
+ declare_terminals('a')
234
+
235
+ rule 'A' => ['a A', '']
236
+ end
237
+
238
+ builder.grammar
239
+ end
240
+
241
+ def tokenizer_l11
242
+ Dendroid::Utils::BaseTokenizer.new do
243
+ map_verbatim2terminal({ 'a' => :a })
244
+
245
+ scan_verbatim(['a'])
246
+ end
247
+ end
248
+
249
+ def grammar_l12
250
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
251
+ # (based on grammar Example 3 from paper Elizabeth Scott "SPPF-Style Parsing
252
+ # from Earley Recognisers")
253
+ # Grammar with hidden left recursion and a cycle
254
+ declare_terminals('a', 'b')
255
+
256
+ rule 'S' => ['A T', 'a T']
257
+ # rule 'S' => 'a T'
258
+ rule 'A' => ['a', 'B A']
259
+ # rule 'A' => 'B A'
260
+ rule 'B' => ''
261
+ rule 'T' => 'b b b'
262
+ end
263
+
264
+ builder.grammar
265
+ end
266
+
267
+ def tokenizer_l12
268
+ Dendroid::Utils::BaseTokenizer.new do
269
+ map_verbatim2terminal({ 'a' => :a, 'b' => :b })
270
+
271
+ scan_verbatim(%w[a b])
272
+ end
273
+ end
274
+
275
+ def grammar_l13
276
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
277
+ # Grammar based on example RR from Sylvie Billot, Bernard Lang "The Structure of Shared Forests
278
+ # in Ambiguous Parsing"
279
+ declare_terminals('x')
280
+
281
+ rule 'A' => ['x A', 'x']
282
+ # rule 'A' => 'x'
283
+ end
284
+
285
+ builder.grammar
286
+ end
287
+
288
+ def tokenizer_l13
289
+ Dendroid::Utils::BaseTokenizer.new do
290
+ map_verbatim2terminal({ 'x' => :x })
291
+
292
+ scan_verbatim(['x'])
293
+ end
294
+ end
295
+
296
+ def grammar_l14
297
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
298
+ # Grammar 4: A grammar with nullable nonterminal
299
+ # based on example from "Parsing Techniques" book, p. 216
300
+ # (D. Grune, C. Jabobs)
301
+ declare_terminals('a', 'star', 'slash')
302
+
303
+ rule 'S' => 'E'
304
+ rule 'E' => ['E Q F', 'F']
305
+ rule 'F' => 'a'
306
+ rule 'Q' => ['star', 'slash', '']
307
+ end
308
+
309
+ builder.grammar
310
+ end
311
+
312
+ def tokenizer_l14
313
+ Dendroid::Utils::BaseTokenizer.new do
314
+ map_verbatim2terminal({ 'a' => :a, '*' => :star, '/' => :slash })
315
+
316
+ scan_verbatim(['a', '*', '/'])
317
+ end
318
+ end
76
319
  end # module
@@ -7,6 +7,7 @@ require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
7
7
  require_relative '..\..\..\lib\dendroid\syntax\production'
8
8
  require_relative '..\..\..\lib\dendroid\syntax\choice'
9
9
  require_relative '..\..\..\lib\dendroid\syntax\grammar'
10
+ require_relative '..\..\..\lib\dendroid\grm_dsl\base_grm_builder'
10
11
 
11
12
  describe Dendroid::Syntax::Grammar do
12
13
  let(:int_symb) { build_terminal('INTEGER') }
@@ -57,8 +58,8 @@ describe Dendroid::Syntax::Grammar do
57
58
  expect(subject.symbols).to eq(all_terminals)
58
59
  end
59
60
 
60
- it 'ignores about productions after initialization' do
61
- expect(subject.rules).to be_nil
61
+ it 'does not have rules after initialization' do
62
+ expect(subject.rules).to be_empty
62
63
  end
63
64
 
64
65
  it 'maps a terminal name to one GrmSymbol object' do
@@ -108,14 +109,15 @@ describe Dendroid::Syntax::Grammar do
108
109
  end
109
110
  end
110
111
 
111
- it 'maps every non-terminal to its defining productions' do
112
+ it 'maps every non-terminal to its defining production' do
112
113
  rules = build_all_rules
113
114
  rules.each { |rl| subject.add_rule(rl) }
114
115
  %i[p s m t].each do |symb_name|
115
116
  symb = subject.name2symbol[symb_name]
116
117
  expected_prods = subject.rules.select { |prd| prd.head == symb }
117
- related_prods = subject.nonterm2productions[symb]
118
- expect(related_prods).to eq(expected_prods)
118
+ expect(expected_prods.size).to eq(1)
119
+ related_prod = subject.nonterm2production[symb]
120
+ expect(related_prod).to eq(expected_prods[0])
119
121
  end
120
122
  end
121
123
  end # context
@@ -200,4 +202,162 @@ describe Dendroid::Syntax::Grammar do
200
202
  expect(nonproductive).to eq([nterm_D, nterm_F])
201
203
  end
202
204
  end # context
205
+
206
+ context 'Errors with terminal symbols' do
207
+ def grm_terminal_in_lhs
208
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
209
+ declare_terminals('a', 'b', 'c')
210
+
211
+ rule 'S' => 'A'
212
+ rule 'a' => 'a A c' # Wrong: terminal 'a' in lhs
213
+ rule 'A' => 'b'
214
+ end
215
+
216
+ builder.grammar
217
+ end
218
+
219
+ def grm_cyclic
220
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
221
+ declare_terminals('x')
222
+
223
+ rule 'S' => %w[S x] # Wrong: cyclic production (lhs and rhs are the same)
224
+ end
225
+
226
+ builder.grammar
227
+ end
228
+
229
+ def grm_no_terminal
230
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
231
+ # No terminal symbol explicitly declared => all symbols are non-terminals
232
+
233
+ rule 'S' => 'A'
234
+ rule 'A' => ['a A c', 'b']
235
+ end
236
+
237
+ builder.grammar
238
+ end
239
+
240
+ def unused_terminals
241
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
242
+ declare_terminals('a', 'b', 'c', 'd', 'e')
243
+
244
+ # # Wrong: terminals 'd' and 'e' never appear in rules
245
+ rule 'S' => 'A'
246
+ rule 'A' => ['a A c', 'b']
247
+ end
248
+
249
+ builder.grammar
250
+ end
251
+
252
+ it 'raises an error if there is no terminal symbol' do
253
+ err_msg = "Grammar doesn't contain any terminal symbol."
254
+ expect { grm_no_terminal }.to raise_error(StandardError, err_msg)
255
+ end
256
+
257
+ it 'raises an error if a terminal is in lhs of production' do
258
+ err_msg = "Terminal symbol 'a' may not be on left-side of a rule."
259
+ expect { grm_terminal_in_lhs }.to raise_error(StandardError, err_msg)
260
+ end
261
+
262
+ it 'raises an error if a terminal never appear in rules' do
263
+ err_msg = "Terminal symbols 'd', 'e' never appear in production rules."
264
+ expect { unused_terminals }.to raise_error(StandardError, err_msg)
265
+ end
266
+
267
+ it 'raises an error if a production is cyclic' do
268
+ err_msg = 'Cyclic rules of the kind S => S are not allowed.'
269
+ expect { grm_cyclic }.to raise_error(StandardError, err_msg)
270
+ end
271
+ end # context
272
+
273
+ context 'Errors with non-terminal symbols' do
274
+ def grm_undefined_nterm
275
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
276
+ declare_terminals('x')
277
+
278
+ rule 'S' => %w[x A] # Wrong: A is never defined
279
+ end
280
+
281
+ builder.grammar
282
+ end
283
+
284
+ def grm_multiple_defs
285
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
286
+ declare_terminals('a b c')
287
+
288
+ rule 'A' => %w[a B]
289
+ rule 'B' => ['b', '']
290
+ rule 'A' => 'c'
291
+ end
292
+
293
+ builder.grammar
294
+ end
295
+
296
+ def duplicate_production
297
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
298
+ declare_terminals('a')
299
+
300
+ rule 'S' => 'A'
301
+ rule 'A' => %w[a a] # Duplicate alternatives
302
+ end
303
+
304
+ builder.grammar
305
+ end
306
+
307
+ def grm_unreachable_symbols
308
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
309
+ declare_terminals('a', 'b', 'c')
310
+
311
+ rule 'S' => 'A'
312
+ rule 'A' => ['a A c', 'b']
313
+ rule 'Z' => 'a Z X'
314
+ rule 'X' => 'b b'
315
+ end
316
+
317
+ builder.grammar
318
+ end
319
+
320
+ def nonproductive_symbols
321
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
322
+ declare_terminals('a', 'b', 'c', 'd', 'e', 'f')
323
+
324
+ # # Wrong: terminals 'D' and 'F' are non-productive (they never reduce to a string of terminals)
325
+ rule 'S' => ['A B', 'D E']
326
+ rule 'A' => 'a'
327
+ rule 'B' => 'b C'
328
+ rule 'C' => 'c'
329
+ rule 'D' => 'd F'
330
+ rule 'E' => 'e'
331
+ rule 'F' => 'f D'
332
+ end
333
+
334
+ builder.grammar
335
+ end
336
+
337
+ it 'raises an error when a non-terminal is never defined' do
338
+ err_msg = "Non-terminal symbols 'A' never appear in head of any production rule."
339
+ expect { grm_undefined_nterm }.to raise_error(StandardError, err_msg)
340
+ end
341
+
342
+ it 'raises an error when a non-terminal is defined multiple times' do
343
+ err_msg = "Non-terminal 'A' is on left-hand side of more than one rule."
344
+ expect { grm_multiple_defs }.to raise_error(StandardError, err_msg)
345
+ end
346
+
347
+ it 'raises an error when a production is duplicated' do
348
+ err_msg = 'Duplicate alternatives: A => a'
349
+ expect { duplicate_production }.to raise_error(StandardError, err_msg)
350
+ end
351
+
352
+ it 'raises an error when a non-terminal is unreachable' do
353
+ # err_msg = "Symbols 'Z', 'X' are unreachable from start symbol."
354
+ err_msg = "Symbols 'Z' are non-productive."
355
+ expect { grm_unreachable_symbols }.to raise_error(StandardError, err_msg)
356
+ end
357
+
358
+ it 'raises an error when a non-terminal is non-productive' do
359
+ err_msg = "Symbols 'D', 'F' are non-productive."
360
+ expect { nonproductive_symbols }.to raise_error(StandardError, err_msg)
361
+ end
362
+ end # contex
203
363
  end # describe
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.0.11
1
+ 0.1.00
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dendroid
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.1.00
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-02 00:00:00.000000000 Z
11
+ date: 2023-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: WIP. A Ruby implementation of an Earley parser
14
14
  email: famished.tiger@yahoo.com