sequitur 0.0.04

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,271 @@
1
+ require_relative '../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../lib/sequitur/production'
5
+
6
+ module Sequitur # Re-open the module to get rid of qualified names
7
+
8
+ describe Production do
9
+ # Helper method: convert list of digrams into an array
10
+ # of symbol couples.
11
+ def to_symbols(theDigrams)
12
+ return theDigrams.map(&:symbols)
13
+ end
14
+
15
+ let(:p_a) do
16
+ instance = Production.new
17
+ instance.append_symbol(:a)
18
+ instance
19
+ end
20
+
21
+ let(:p_bc) do
22
+ instance = Production.new
23
+ instance.append_symbol('b')
24
+ instance.append_symbol('c')
25
+ instance
26
+ end
27
+
28
+ context 'Creation & initialization:' do
29
+ it 'should be created without argument' do
30
+ expect { Production.new }.not_to raise_error
31
+ end
32
+
33
+ it 'should not referenced yet' do
34
+ expect(subject.refcount).to eq(0)
35
+ end
36
+
37
+ it 'should be empty at creation' do
38
+ expect(subject).to be_empty
39
+ end
40
+
41
+ it 'should not have digram' do
42
+ expect(subject.digrams).to be_empty
43
+ expect(subject.last_digram).to be_nil
44
+ end
45
+ end # context
46
+
47
+ context 'Knowing its rhs:' do
48
+
49
+ it 'should know the productions in its rhs' do
50
+ # Case 1: empty production
51
+ expect(subject.references).to be_empty
52
+
53
+ # Case 2: production without references
54
+ symbols = [:a, :b, :c]
55
+ symbols.each { |symb| subject.append_symbol(symb) }
56
+ expect(subject.references).to be_empty
57
+
58
+ # Case 2: production with one reference
59
+ subject.append_symbol(p_a)
60
+ expect(subject.references).to eq([p_a])
61
+
62
+ # Case 3: production with repeated references
63
+ subject.append_symbol(p_a) # second time
64
+ expect(subject.references).to eq([p_a, p_a])
65
+
66
+ # Case 4: production with multiple distinct references
67
+ subject.append_symbol(p_bc)
68
+ expect(subject.references).to eq([p_a, p_a, p_bc])
69
+ end
70
+
71
+ end # context
72
+
73
+ context 'Appending a symbol:' do
74
+
75
+ it 'should append a symbol when empty' do
76
+ expect { subject.append_symbol(:a) }.not_to raise_error
77
+ expect(subject.rhs).to eq([:a])
78
+ expect(subject.last_digram).to be_nil
79
+ end
80
+
81
+ it 'should append a symbol when has one symbol' do
82
+ subject.append_symbol(:a)
83
+ subject.append_symbol(:b)
84
+ expect(subject.rhs).to eq([:a, :b])
85
+ expect(subject.last_digram.symbols).to eq([:a, :b])
86
+ end
87
+
88
+ it 'should append a symbol when rhs has several symbols' do
89
+ symbols = [:a, :b, :c, :d, :e, :f]
90
+ symbols.each { |symb| subject.append_symbol(symb) }
91
+ expect(subject.rhs).to eq(symbols)
92
+ expect(subject.last_digram.symbols).to eq([:e, :f])
93
+ end
94
+
95
+ it 'should increment the refcount for each production in the rhs' do
96
+ expect(p_a.refcount).to be(0)
97
+
98
+ input = [p_a, :b, :c, :d, p_a, :e, :f] # p_a appears twice
99
+ input.each { |symb| subject.append_symbol(symb) }
100
+ expect(p_a.refcount).to be(2)
101
+ end
102
+
103
+ it 'should calculate the digrams before appending:' do
104
+ # Case: empty production
105
+ expect(subject.calc_append_symbol(:a)).to be_empty
106
+
107
+ # Case: single-symbol rhs
108
+ subject.append_symbol(:a)
109
+ expect(to_symbols(subject.calc_append_symbol(:b))).to eq([[:a, :b]])
110
+
111
+ # Case: two-symbols rhs
112
+ subject.append_symbol(:b)
113
+ expectation = [[:a, :b], [:b, :c]]
114
+ expect(to_symbols(subject.calc_append_symbol(:c))).to eq(expectation)
115
+ end
116
+
117
+ end # context
118
+
119
+
120
+ context 'Text representation of a production rule:' do
121
+
122
+ it 'should emit minimal text when empty' do
123
+ expectation = "#{subject.object_id} : ."
124
+ expect(subject.to_string).to eq(expectation)
125
+ end
126
+
127
+ it 'should emit its text representation' do
128
+ symbols = [:a, :b, 'c', :d, :e, :f]
129
+ symbols.each { |symb| subject.append_symbol(symb) }
130
+ expectation = "#{subject.object_id} : a b 'c' d e f."
131
+ expect(subject.to_string).to eq(expectation)
132
+ end
133
+
134
+ end # context
135
+
136
+ context 'Detecting digram repetition:' do
137
+ it 'should report no repetition when empty' do
138
+ expect(subject.repeated_digram?).to be_falsey
139
+ end
140
+
141
+ it 'should report no repetition when rhs has less than 3 symbols' do
142
+ subject.append_symbol(:a)
143
+ expect(subject.repeated_digram?).to be_falsey
144
+
145
+ subject.append_symbol(:a)
146
+ expect(subject.repeated_digram?).to be_falsey
147
+ end
148
+
149
+ it 'should detect shortest repetition' do
150
+ 'aaa'.each_char { |symb| subject.append_symbol(symb) }
151
+ expect(subject.repeated_digram?).to be_truthy
152
+ end
153
+
154
+ it 'should detect any repetition pattern' do
155
+ # Positive cases
156
+ cases = %w(abab abcdab abcdcd abcdefcd )
157
+ cases.each do |word|
158
+ instance = Production.new
159
+ word.each_char { |symb| instance.append_symbol(symb) }
160
+ expect(instance.repeated_digram?).to be_truthy
161
+ end
162
+
163
+ # Negative cases
164
+ cases = %w(abc abb abba abcdef)
165
+ cases.each do |word|
166
+ instance = Production.new
167
+ word.each_char { |symb| instance.append_symbol(symb) }
168
+ expect(instance.repeated_digram?).to be_falsey
169
+ end
170
+ end
171
+ end # context
172
+
173
+ context 'Replacing a digram by a production:' do
174
+
175
+ it 'should have not effect on empty production' do
176
+ subject.replace_digram(p_bc)
177
+ expect(subject.rhs).to be_empty
178
+ expect(p_bc.refcount).to eq(0)
179
+ end
180
+
181
+
182
+ it 'should replace two-symbol sequence' do
183
+ %w(a b c d e b c e).each { |symb| subject.append_symbol(symb) }
184
+ subject.replace_digram(p_bc)
185
+
186
+ expect(subject.rhs.size).to eq(6)
187
+ expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc, 'e'])
188
+ expect(p_bc.refcount).to eq(2)
189
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
190
+ end
191
+
192
+
193
+ it 'should replace a starting two-symbol sequence' do
194
+ %w(b c d e b c e).each { |symb| subject.append_symbol(symb) }
195
+ subject.replace_digram(p_bc)
196
+
197
+ expect(subject.rhs.size).to eq(5)
198
+ expect(subject.rhs).to eq([p_bc, 'd', 'e', p_bc, 'e'])
199
+ expect(p_bc.refcount).to eq(2)
200
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
201
+ end
202
+
203
+
204
+ it 'should replace an ending two-symbol sequence' do
205
+ %w(a b c d e b c).each { |symb| subject.append_symbol(symb) }
206
+ subject.replace_digram(p_bc)
207
+
208
+ expect(subject.rhs.size).to eq(5)
209
+ expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc])
210
+ expect(p_bc.refcount).to eq(2)
211
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
212
+ end
213
+
214
+ it 'should replace two consecutive two-symbol sequences' do
215
+ %w(a b c b c d).each { |symb| subject.append_symbol(symb) }
216
+ subject.replace_digram(p_bc)
217
+
218
+ expect(subject.rhs.size).to eq(4)
219
+ expect(subject.rhs).to eq(['a', p_bc, p_bc, 'd'])
220
+ expect(p_bc.refcount).to eq(2)
221
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
222
+ end
223
+
224
+ end # context
225
+
226
+ context 'Replacing a production occurrence by its rhs:' do
227
+
228
+ it 'should have not effect on empty production' do
229
+ subject.replace_production(p_bc)
230
+ expect(subject.rhs).to be_empty
231
+ end
232
+
233
+ it 'should replace a production at the start' do
234
+ [p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
235
+ subject.replace_production(p_bc)
236
+ expect(subject.rhs.size).to eq(3)
237
+ expect(subject.rhs).to eq(%w(b c d))
238
+ end
239
+
240
+
241
+ it 'should replace a production at the end' do
242
+ ['d', p_bc].each { |symb| subject.append_symbol(symb) }
243
+ subject.replace_production(p_bc)
244
+
245
+ expect(subject.rhs.size).to eq(3)
246
+ expect(subject.rhs).to eq(%w(d b c))
247
+ end
248
+
249
+ it 'should replace a production as sole symbol' do
250
+ subject.append_symbol(p_bc)
251
+ subject.replace_production(p_bc)
252
+
253
+ expect(subject.rhs.size).to eq(2)
254
+ expect(subject.rhs).to eq(%w(b c))
255
+ end
256
+
257
+ it 'should replace a production in the middle' do
258
+ ['a', p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
259
+ subject.replace_production(p_bc)
260
+
261
+ expect(subject.rhs.size).to eq(4)
262
+ expect(subject.rhs).to eq(%w(a b c d))
263
+ end
264
+
265
+ end # context
266
+
267
+ end # describe
268
+
269
+ end # module
270
+
271
+ # End of file
@@ -0,0 +1,213 @@
1
+ require_relative '../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../lib/sequitur/sequitur-grammar'
5
+
6
+ module Sequitur # Re-open the module to get rid of qualified names
7
+
8
+ describe SequiturGrammar do
9
+ # Factory method. Returns an empty enumerator (
10
+ # i.e. without elements to iterate)
11
+ def empty_enum()
12
+ return [].to_enum
13
+ end
14
+
15
+ context 'Creation from an enumeration of tokens:' do
16
+
17
+ it 'could be created with an empty enumerator' do
18
+ expect { SequiturGrammar.new(empty_enum) }.not_to raise_error
19
+
20
+ # Creation
21
+ instance = SequiturGrammar.new(empty_enum)
22
+
23
+ # Initialization
24
+ expect(instance.productions.size).to eq(1)
25
+ expect(instance.root).to eq(instance.productions.first)
26
+ expect(instance.root).to be_empty
27
+ end
28
+
29
+ it 'could be created with single token' do
30
+ # Creation
31
+ instance = SequiturGrammar.new([:a].to_enum)
32
+
33
+ # Initialization
34
+ expect(instance.productions.size).to eq(1)
35
+ expect(instance.root).to eq(instance.productions.first)
36
+ expect(instance.root.rhs).to eq([:a])
37
+ end
38
+
39
+ it 'could be created with multiple unique tokens' do
40
+ # Creation
41
+ instance = SequiturGrammar.new([:a, :b, :c, :d].to_enum)
42
+
43
+ # Initialization
44
+ expect(instance.productions.size).to eq(1)
45
+ expect(instance.root).to eq(instance.productions.first)
46
+ expect(instance.root.rhs).to eq([:a, :b, :c, :d])
47
+ end
48
+
49
+ it 'could be created with a repeating digram' do
50
+ instance = SequiturGrammar.new([:a, :b, :a, :b].to_enum)
51
+
52
+ # Expectations:
53
+ # S : A A.
54
+ # A : a b.
55
+ expect(instance.productions.size).to eq(2)
56
+ p_a = instance.productions[1]
57
+ expect(p_a.rhs).to eq([:a, :b])
58
+ expect(instance.root.rhs).to eq([p_a, p_a])
59
+ end
60
+
61
+ it 'should enforce the utility rule' do
62
+ instance = SequiturGrammar.new([:a, :b, :c, :a, :b, :c].to_enum)
63
+
64
+ # Expectations without utility rule:
65
+ # S : B B.
66
+ # A : a b.
67
+ # B : A c.
68
+
69
+ # Expectations with utility rule:
70
+ # S : A A.
71
+ # A : a b c.
72
+ expect(instance.productions.size).to eq(2)
73
+ p_a = instance.productions.last
74
+ expect(p_a.rhs).to eq([:a, :b, :c])
75
+ expect(instance.root.rhs).to eq([p_a, p_a])
76
+ end
77
+
78
+
79
+ it 'should cope with the example from presentation' do
80
+ input = 'bbebeebebebbebee'
81
+
82
+ # Creation
83
+ instance = SequiturGrammar.new(input.chars)
84
+
85
+ # Expectations:
86
+ # S: P3 P2 P3
87
+ # P1: b e
88
+ # P2: P1 P1
89
+ # P3: b P2 e
90
+ expect(instance.productions.size).to eq(4)
91
+ (p1, p2, p3) = instance.productions[1..3]
92
+ expect(instance.root.rhs).to eq([p3, p2, p3])
93
+ expect(p1.rhs).to eq(%w(b e))
94
+ expect(p2.rhs).to eq([p1, p1])
95
+ expect(p3.rhs).to eq(['b', p2, 'e'])
96
+ end
97
+
98
+ it 'should cope with the example from sequitur.info website' do
99
+ input = 'abcabdabcabd'
100
+ instance = SequiturGrammar.new(input.chars)
101
+
102
+ # Expectations:
103
+ # 0 → 2 2
104
+ # 1 → a b
105
+ # 2 → 1 c 1 d
106
+
107
+ expect(instance.productions.size).to eq(3)
108
+ (p1, p2) = instance.productions[1..2]
109
+ expect(instance.root.rhs).to eq([p2, p2])
110
+ expect(p1.rhs).to eq(%w(a b))
111
+ expect(p2.rhs).to eq([p1, 'c', p1, 'd'])
112
+ end
113
+
114
+ it "should cope with the example from Salomon's book" do
115
+ input = 'abcdbcabcdbc'
116
+ instance = SequiturGrammar.new(input.chars)
117
+
118
+ # Expectations:
119
+ # S → CC
120
+ # A → bc
121
+ # C → aAdA
122
+
123
+ expect(instance.productions.size).to eq(3)
124
+ (p_a, p_c) = instance.productions[1..2]
125
+ expect(instance.root.rhs).to eq([p_c, p_c])
126
+ expect(p_a.rhs).to eq(%w(b c))
127
+ expect(p_c.rhs).to eq(['a', p_a, 'd', p_a])
128
+ end
129
+
130
+ it 'should cope with the "porridge" example from sequitur.info' do
131
+ # Another example from sequitur.info website
132
+ input = <<-SNIPPET
133
+ pease porridge hot,
134
+ pease porridge cold,
135
+ pease porridge in the pot,
136
+ nine days old.
137
+
138
+ some like it hot,
139
+ some like it cold,
140
+ some like it in the pot,
141
+ nine days old.
142
+ SNIPPET
143
+ # Expectations (sequitur.org)
144
+ # 0 → 1 2 3 4 3 5 ↵ 6 2 7 4 7 5
145
+ # 1 → p e a s 8 r r i d g 9 pease_porridge_
146
+ # 2 → h o t hot
147
+ # 3 → 10 1 ,↵pease_porridge_
148
+ # 4 → c 11 cold
149
+ # 5 → 12 _ t h 8 t 10 n 12 9 d a y s _ 11 . ↵
150
+ # in_the_pot,↵nine_days_old.↵
151
+ # 6 → s o m 9 l i k 9 i t _ some_like_it_
152
+ # 7 → 10 6 ,↵some_like_it_
153
+ # 8 → 9 p o e_po
154
+ # 9 → e _ e_
155
+ # 10 → , ↵ ,↵
156
+ # 11 → o l d old
157
+ # 12 → i n in
158
+
159
+ instance = SequiturGrammar.new(input.chars)
160
+ expect(instance.productions.size).to eq(13)
161
+ p0 = instance.root
162
+ expect(p0.rhs.size).to eq(13)
163
+
164
+ (p1, p2, p3, p4, p5, p6, p7, p8, p9) = instance.productions[1..9]
165
+ (p10, p11, p12) = instance.productions[10..12]
166
+
167
+ # Note: the productions aren't sorted the same way as
168
+ # the sequitur.info implementation.
169
+ p0_expectation = [
170
+ p2, p8, p3, p10, p3, p12, "\n",
171
+ p9, p8, p11, p10, p11, p12
172
+ ]
173
+ expect(p0.rhs).to eq(p0_expectation) # Rule 0 above
174
+ expect(p1.rhs).to eq(['e', ' ']) # Rule 9 above
175
+ expect(p2.rhs).to eq([%w(p e a s), p4, %w(r r i d g), p1].flatten) # R1
176
+ expect(p3.rhs).to eq([p5, p2]) # Rule 3 above
177
+ expect(p4.rhs).to eq([p1, 'p', 'o']) # Rule 8 above
178
+ expect(p5.rhs).to eq([',', "\n"]) # Rule 10 above
179
+ expect(p6.rhs).to eq(%w(i n)) # Rule 12 above
180
+ expect(p7.rhs).to eq(%w(o l d)) # Rule 11 above
181
+ expect(p8.rhs).to eq(%w(h o t)) # Rule 2 above
182
+ p9_expectation = [%w(s o m), p1, %w(l i k), p1, 'i', 't', ' '].flatten
183
+ expect(p9.rhs).to eq(p9_expectation) # Rule 6 above
184
+ expect(p10.rhs).to eq(['c', p7]) # Rule 4 above
185
+ expect(p11.rhs).to eq([p5, p9]) # Rule 7 above
186
+ p12_expectation = [
187
+ p6, ' ', 't', 'h', p4, 't', p5, 'n', p6, p1,
188
+ %w(d a y s), ' ', p7, '.', "\n"
189
+ ].flatten
190
+ expect(p12.rhs).to eq(p12_expectation) # Rule 5 above
191
+ end
192
+ end # context
193
+
194
+ context 'Generating a text representation of itself:' do
195
+
196
+ it 'should generate a text representation when empty' do
197
+ instance = SequiturGrammar.new(empty_enum)
198
+ expectation = "#{instance.root.object_id} : ."
199
+
200
+ expect(instance.to_string).to eq(expectation)
201
+ end
202
+
203
+ it 'should generate a text representation of a simple production' do
204
+ instance = SequiturGrammar.new([:a].to_enum)
205
+ expectation = "#{instance.root.object_id} : a."
206
+ expect(instance.to_string).to eq(expectation)
207
+ end
208
+ end # context
209
+
210
+ end # describe
211
+ end # module
212
+
213
+ # End of file