sequitur 0.0.04

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,271 @@
1
+ require_relative '../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../lib/sequitur/production'
5
+
6
+ module Sequitur # Re-open the module to get rid of qualified names
7
+
8
+ describe Production do
9
+ # Helper method: convert list of digrams into an array
10
+ # of symbol couples.
11
+ def to_symbols(theDigrams)
12
+ return theDigrams.map(&:symbols)
13
+ end
14
+
15
+ let(:p_a) do
16
+ instance = Production.new
17
+ instance.append_symbol(:a)
18
+ instance
19
+ end
20
+
21
+ let(:p_bc) do
22
+ instance = Production.new
23
+ instance.append_symbol('b')
24
+ instance.append_symbol('c')
25
+ instance
26
+ end
27
+
28
+ context 'Creation & initialization:' do
29
+ it 'should be created without argument' do
30
+ expect { Production.new }.not_to raise_error
31
+ end
32
+
33
+ it 'should not referenced yet' do
34
+ expect(subject.refcount).to eq(0)
35
+ end
36
+
37
+ it 'should be empty at creation' do
38
+ expect(subject).to be_empty
39
+ end
40
+
41
+ it 'should not have digram' do
42
+ expect(subject.digrams).to be_empty
43
+ expect(subject.last_digram).to be_nil
44
+ end
45
+ end # context
46
+
47
+ context 'Knowing its rhs:' do
48
+
49
+ it 'should know the productions in its rhs' do
50
+ # Case 1: empty production
51
+ expect(subject.references).to be_empty
52
+
53
+ # Case 2: production without references
54
+ symbols = [:a, :b, :c]
55
+ symbols.each { |symb| subject.append_symbol(symb) }
56
+ expect(subject.references).to be_empty
57
+
58
+ # Case 2: production with one reference
59
+ subject.append_symbol(p_a)
60
+ expect(subject.references).to eq([p_a])
61
+
62
+ # Case 3: production with repeated references
63
+ subject.append_symbol(p_a) # second time
64
+ expect(subject.references).to eq([p_a, p_a])
65
+
66
+ # Case 4: production with multiple distinct references
67
+ subject.append_symbol(p_bc)
68
+ expect(subject.references).to eq([p_a, p_a, p_bc])
69
+ end
70
+
71
+ end # context
72
+
73
+ context 'Appending a symbol:' do
74
+
75
+ it 'should append a symbol when empty' do
76
+ expect { subject.append_symbol(:a) }.not_to raise_error
77
+ expect(subject.rhs).to eq([:a])
78
+ expect(subject.last_digram).to be_nil
79
+ end
80
+
81
+ it 'should append a symbol when has one symbol' do
82
+ subject.append_symbol(:a)
83
+ subject.append_symbol(:b)
84
+ expect(subject.rhs).to eq([:a, :b])
85
+ expect(subject.last_digram.symbols).to eq([:a, :b])
86
+ end
87
+
88
+ it 'should append a symbol when rhs has several symbols' do
89
+ symbols = [:a, :b, :c, :d, :e, :f]
90
+ symbols.each { |symb| subject.append_symbol(symb) }
91
+ expect(subject.rhs).to eq(symbols)
92
+ expect(subject.last_digram.symbols).to eq([:e, :f])
93
+ end
94
+
95
+ it 'should increment the refcount for each production in the rhs' do
96
+ expect(p_a.refcount).to be(0)
97
+
98
+ input = [p_a, :b, :c, :d, p_a, :e, :f] # p_a appears twice
99
+ input.each { |symb| subject.append_symbol(symb) }
100
+ expect(p_a.refcount).to be(2)
101
+ end
102
+
103
+ it 'should calculate the digrams before appending:' do
104
+ # Case: empty production
105
+ expect(subject.calc_append_symbol(:a)).to be_empty
106
+
107
+ # Case: single-symbol rhs
108
+ subject.append_symbol(:a)
109
+ expect(to_symbols(subject.calc_append_symbol(:b))).to eq([[:a, :b]])
110
+
111
+ # Case: two-symbols rhs
112
+ subject.append_symbol(:b)
113
+ expectation = [[:a, :b], [:b, :c]]
114
+ expect(to_symbols(subject.calc_append_symbol(:c))).to eq(expectation)
115
+ end
116
+
117
+ end # context
118
+
119
+
120
+ context 'Text representation of a production rule:' do
121
+
122
+ it 'should emit minimal text when empty' do
123
+ expectation = "#{subject.object_id} : ."
124
+ expect(subject.to_string).to eq(expectation)
125
+ end
126
+
127
+ it 'should emit its text representation' do
128
+ symbols = [:a, :b, 'c', :d, :e, :f]
129
+ symbols.each { |symb| subject.append_symbol(symb) }
130
+ expectation = "#{subject.object_id} : a b 'c' d e f."
131
+ expect(subject.to_string).to eq(expectation)
132
+ end
133
+
134
+ end # context
135
+
136
+ context 'Detecting digram repetition:' do
137
+ it 'should report no repetition when empty' do
138
+ expect(subject.repeated_digram?).to be_falsey
139
+ end
140
+
141
+ it 'should report no repetition when rhs has less than 3 symbols' do
142
+ subject.append_symbol(:a)
143
+ expect(subject.repeated_digram?).to be_falsey
144
+
145
+ subject.append_symbol(:a)
146
+ expect(subject.repeated_digram?).to be_falsey
147
+ end
148
+
149
+ it 'should detect shortest repetition' do
150
+ 'aaa'.each_char { |symb| subject.append_symbol(symb) }
151
+ expect(subject.repeated_digram?).to be_truthy
152
+ end
153
+
154
+ it 'should detect any repetition pattern' do
155
+ # Positive cases
156
+ cases = %w(abab abcdab abcdcd abcdefcd )
157
+ cases.each do |word|
158
+ instance = Production.new
159
+ word.each_char { |symb| instance.append_symbol(symb) }
160
+ expect(instance.repeated_digram?).to be_truthy
161
+ end
162
+
163
+ # Negative cases
164
+ cases = %w(abc abb abba abcdef)
165
+ cases.each do |word|
166
+ instance = Production.new
167
+ word.each_char { |symb| instance.append_symbol(symb) }
168
+ expect(instance.repeated_digram?).to be_falsey
169
+ end
170
+ end
171
+ end # context
172
+
173
+ context 'Replacing a digram by a production:' do
174
+
175
+ it 'should have not effect on empty production' do
176
+ subject.replace_digram(p_bc)
177
+ expect(subject.rhs).to be_empty
178
+ expect(p_bc.refcount).to eq(0)
179
+ end
180
+
181
+
182
+ it 'should replace two-symbol sequence' do
183
+ %w(a b c d e b c e).each { |symb| subject.append_symbol(symb) }
184
+ subject.replace_digram(p_bc)
185
+
186
+ expect(subject.rhs.size).to eq(6)
187
+ expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc, 'e'])
188
+ expect(p_bc.refcount).to eq(2)
189
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
190
+ end
191
+
192
+
193
+ it 'should replace a starting two-symbol sequence' do
194
+ %w(b c d e b c e).each { |symb| subject.append_symbol(symb) }
195
+ subject.replace_digram(p_bc)
196
+
197
+ expect(subject.rhs.size).to eq(5)
198
+ expect(subject.rhs).to eq([p_bc, 'd', 'e', p_bc, 'e'])
199
+ expect(p_bc.refcount).to eq(2)
200
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
201
+ end
202
+
203
+
204
+ it 'should replace an ending two-symbol sequence' do
205
+ %w(a b c d e b c).each { |symb| subject.append_symbol(symb) }
206
+ subject.replace_digram(p_bc)
207
+
208
+ expect(subject.rhs.size).to eq(5)
209
+ expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc])
210
+ expect(p_bc.refcount).to eq(2)
211
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
212
+ end
213
+
214
+ it 'should replace two consecutive two-symbol sequences' do
215
+ %w(a b c b c d).each { |symb| subject.append_symbol(symb) }
216
+ subject.replace_digram(p_bc)
217
+
218
+ expect(subject.rhs.size).to eq(4)
219
+ expect(subject.rhs).to eq(['a', p_bc, p_bc, 'd'])
220
+ expect(p_bc.refcount).to eq(2)
221
+ expect(p_bc.backrefs[subject.object_id]).to eq(2)
222
+ end
223
+
224
+ end # context
225
+
226
+ context 'Replacing a production occurrence by its rhs:' do
227
+
228
+ it 'should have not effect on empty production' do
229
+ subject.replace_production(p_bc)
230
+ expect(subject.rhs).to be_empty
231
+ end
232
+
233
+ it 'should replace a production at the start' do
234
+ [p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
235
+ subject.replace_production(p_bc)
236
+ expect(subject.rhs.size).to eq(3)
237
+ expect(subject.rhs).to eq(%w(b c d))
238
+ end
239
+
240
+
241
+ it 'should replace a production at the end' do
242
+ ['d', p_bc].each { |symb| subject.append_symbol(symb) }
243
+ subject.replace_production(p_bc)
244
+
245
+ expect(subject.rhs.size).to eq(3)
246
+ expect(subject.rhs).to eq(%w(d b c))
247
+ end
248
+
249
+ it 'should replace a production as sole symbol' do
250
+ subject.append_symbol(p_bc)
251
+ subject.replace_production(p_bc)
252
+
253
+ expect(subject.rhs.size).to eq(2)
254
+ expect(subject.rhs).to eq(%w(b c))
255
+ end
256
+
257
+ it 'should replace a production in the middle' do
258
+ ['a', p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
259
+ subject.replace_production(p_bc)
260
+
261
+ expect(subject.rhs.size).to eq(4)
262
+ expect(subject.rhs).to eq(%w(a b c d))
263
+ end
264
+
265
+ end # context
266
+
267
+ end # describe
268
+
269
+ end # module
270
+
271
+ # End of file
@@ -0,0 +1,213 @@
1
+ require_relative '../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../lib/sequitur/sequitur-grammar'
5
+
6
+ module Sequitur # Re-open the module to get rid of qualified names
7
+
8
+ describe SequiturGrammar do
9
+ # Factory method. Returns an empty enumerator (
10
+ # i.e. without elements to iterate)
11
+ def empty_enum()
12
+ return [].to_enum
13
+ end
14
+
15
+ context 'Creation from an enumeration of tokens:' do
16
+
17
+ it 'could be created with an empty enumerator' do
18
+ expect { SequiturGrammar.new(empty_enum) }.not_to raise_error
19
+
20
+ # Creation
21
+ instance = SequiturGrammar.new(empty_enum)
22
+
23
+ # Initialization
24
+ expect(instance.productions.size).to eq(1)
25
+ expect(instance.root).to eq(instance.productions.first)
26
+ expect(instance.root).to be_empty
27
+ end
28
+
29
+ it 'could be created with single token' do
30
+ # Creation
31
+ instance = SequiturGrammar.new([:a].to_enum)
32
+
33
+ # Initialization
34
+ expect(instance.productions.size).to eq(1)
35
+ expect(instance.root).to eq(instance.productions.first)
36
+ expect(instance.root.rhs).to eq([:a])
37
+ end
38
+
39
+ it 'could be created with multiple unique tokens' do
40
+ # Creation
41
+ instance = SequiturGrammar.new([:a, :b, :c, :d].to_enum)
42
+
43
+ # Initialization
44
+ expect(instance.productions.size).to eq(1)
45
+ expect(instance.root).to eq(instance.productions.first)
46
+ expect(instance.root.rhs).to eq([:a, :b, :c, :d])
47
+ end
48
+
49
+ it 'could be created with a repeating digram' do
50
+ instance = SequiturGrammar.new([:a, :b, :a, :b].to_enum)
51
+
52
+ # Expectations:
53
+ # S : A A.
54
+ # A : a b.
55
+ expect(instance.productions.size).to eq(2)
56
+ p_a = instance.productions[1]
57
+ expect(p_a.rhs).to eq([:a, :b])
58
+ expect(instance.root.rhs).to eq([p_a, p_a])
59
+ end
60
+
61
+ it 'should enforce the utility rule' do
62
+ instance = SequiturGrammar.new([:a, :b, :c, :a, :b, :c].to_enum)
63
+
64
+ # Expectations without utility rule:
65
+ # S : B B.
66
+ # A : a b.
67
+ # B : A c.
68
+
69
+ # Expectations with utility rule:
70
+ # S : A A.
71
+ # A : a b c.
72
+ expect(instance.productions.size).to eq(2)
73
+ p_a = instance.productions.last
74
+ expect(p_a.rhs).to eq([:a, :b, :c])
75
+ expect(instance.root.rhs).to eq([p_a, p_a])
76
+ end
77
+
78
+
79
+ it 'should cope with the example from presentation' do
80
+ input = 'bbebeebebebbebee'
81
+
82
+ # Creation
83
+ instance = SequiturGrammar.new(input.chars)
84
+
85
+ # Expectations:
86
+ # S: P3 P2 P3
87
+ # P1: b e
88
+ # P2: P1 P1
89
+ # P3: b P2 e
90
+ expect(instance.productions.size).to eq(4)
91
+ (p1, p2, p3) = instance.productions[1..3]
92
+ expect(instance.root.rhs).to eq([p3, p2, p3])
93
+ expect(p1.rhs).to eq(%w(b e))
94
+ expect(p2.rhs).to eq([p1, p1])
95
+ expect(p3.rhs).to eq(['b', p2, 'e'])
96
+ end
97
+
98
+ it 'should cope with the example from sequitur.info website' do
99
+ input = 'abcabdabcabd'
100
+ instance = SequiturGrammar.new(input.chars)
101
+
102
+ # Expectations:
103
+ # 0 → 2 2
104
+ # 1 → a b
105
+ # 2 → 1 c 1 d
106
+
107
+ expect(instance.productions.size).to eq(3)
108
+ (p1, p2) = instance.productions[1..2]
109
+ expect(instance.root.rhs).to eq([p2, p2])
110
+ expect(p1.rhs).to eq(%w(a b))
111
+ expect(p2.rhs).to eq([p1, 'c', p1, 'd'])
112
+ end
113
+
114
+ it "should cope with the example from Salomon's book" do
115
+ input = 'abcdbcabcdbc'
116
+ instance = SequiturGrammar.new(input.chars)
117
+
118
+ # Expectations:
119
+ # S → CC
120
+ # A → bc
121
+ # C → aAdA
122
+
123
+ expect(instance.productions.size).to eq(3)
124
+ (p_a, p_c) = instance.productions[1..2]
125
+ expect(instance.root.rhs).to eq([p_c, p_c])
126
+ expect(p_a.rhs).to eq(%w(b c))
127
+ expect(p_c.rhs).to eq(['a', p_a, 'd', p_a])
128
+ end
129
+
130
+ it 'should cope with the "porridge" example from sequitur.info' do
131
+ # Another example from sequitur.info website
132
+ input = <<-SNIPPET
133
+ pease porridge hot,
134
+ pease porridge cold,
135
+ pease porridge in the pot,
136
+ nine days old.
137
+
138
+ some like it hot,
139
+ some like it cold,
140
+ some like it in the pot,
141
+ nine days old.
142
+ SNIPPET
143
+ # Expectations (sequitur.org)
144
+ # 0 → 1 2 3 4 3 5 ↵ 6 2 7 4 7 5
145
+ # 1 → p e a s 8 r r i d g 9 pease_porridge_
146
+ # 2 → h o t hot
147
+ # 3 → 10 1 ,↵pease_porridge_
148
+ # 4 → c 11 cold
149
+ # 5 → 12 _ t h 8 t 10 n 12 9 d a y s _ 11 . ↵
150
+ # in_the_pot,↵nine_days_old.↵
151
+ # 6 → s o m 9 l i k 9 i t _ some_like_it_
152
+ # 7 → 10 6 ,↵some_like_it_
153
+ # 8 → 9 p o e_po
154
+ # 9 → e _ e_
155
+ # 10 → , ↵ ,↵
156
+ # 11 → o l d old
157
+ # 12 → i n in
158
+
159
+ instance = SequiturGrammar.new(input.chars)
160
+ expect(instance.productions.size).to eq(13)
161
+ p0 = instance.root
162
+ expect(p0.rhs.size).to eq(13)
163
+
164
+ (p1, p2, p3, p4, p5, p6, p7, p8, p9) = instance.productions[1..9]
165
+ (p10, p11, p12) = instance.productions[10..12]
166
+
167
+ # Note: the productions aren't sorted the same way as
168
+ # the sequitur.info implementation.
169
+ p0_expectation = [
170
+ p2, p8, p3, p10, p3, p12, "\n",
171
+ p9, p8, p11, p10, p11, p12
172
+ ]
173
+ expect(p0.rhs).to eq(p0_expectation) # Rule 0 above
174
+ expect(p1.rhs).to eq(['e', ' ']) # Rule 9 above
175
+ expect(p2.rhs).to eq([%w(p e a s), p4, %w(r r i d g), p1].flatten) # R1
176
+ expect(p3.rhs).to eq([p5, p2]) # Rule 3 above
177
+ expect(p4.rhs).to eq([p1, 'p', 'o']) # Rule 8 above
178
+ expect(p5.rhs).to eq([',', "\n"]) # Rule 10 above
179
+ expect(p6.rhs).to eq(%w(i n)) # Rule 12 above
180
+ expect(p7.rhs).to eq(%w(o l d)) # Rule 11 above
181
+ expect(p8.rhs).to eq(%w(h o t)) # Rule 2 above
182
+ p9_expectation = [%w(s o m), p1, %w(l i k), p1, 'i', 't', ' '].flatten
183
+ expect(p9.rhs).to eq(p9_expectation) # Rule 6 above
184
+ expect(p10.rhs).to eq(['c', p7]) # Rule 4 above
185
+ expect(p11.rhs).to eq([p5, p9]) # Rule 7 above
186
+ p12_expectation = [
187
+ p6, ' ', 't', 'h', p4, 't', p5, 'n', p6, p1,
188
+ %w(d a y s), ' ', p7, '.', "\n"
189
+ ].flatten
190
+ expect(p12.rhs).to eq(p12_expectation) # Rule 5 above
191
+ end
192
+ end # context
193
+
194
+ context 'Generating a text representation of itself:' do
195
+
196
+ it 'should generate a text representation when empty' do
197
+ instance = SequiturGrammar.new(empty_enum)
198
+ expectation = "#{instance.root.object_id} : ."
199
+
200
+ expect(instance.to_string).to eq(expectation)
201
+ end
202
+
203
+ it 'should generate a text representation of a simple production' do
204
+ instance = SequiturGrammar.new([:a].to_enum)
205
+ expectation = "#{instance.root.object_id} : a."
206
+ expect(instance.to_string).to eq(expectation)
207
+ end
208
+ end # context
209
+
210
+ end # describe
211
+ end # module
212
+
213
+ # End of file