sequitur 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.travis.yml +5 -0
- data/CHANGELOG.md +5 -0
- data/README.md +3 -3
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +10 -8
- data/lib/sequitur/dynamic_grammar.rb +6 -45
- data/lib/sequitur/production.rb +113 -85
- data/lib/sequitur/production_ref.rb +89 -0
- data/lib/sequitur/sequitur_grammar.rb +43 -54
- data/spec/sequitur/digram_spec.rb +19 -4
- data/spec/sequitur/production_ref_spec.rb +95 -0
- data/spec/sequitur/production_spec.rb +77 -37
- data/spec/sequitur/sequitur_grammar_spec.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZjQ5ZDcxYTMzZGJlOWUzOTg1YmYxMThiMTAxYjk5YjZmZjkyY2FjMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZGIyODgxMjNhMmRiNGUyMjZlMTMzYjQwOGRjMjc3YzYyMTYzYjNmZQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OWFmNTRlN2NkZjRhNjVlOTU0MTlmZjZjNjllMDZjY2M4NWNiNWQ3NzQ0ZGMz
|
10
|
+
MjBkOWQzNjJjN2JiODExNDc2OTFmNjIyMGEyY2VkMDdiNDQyZjdiZTFjNTgw
|
11
|
+
NGE5NmVlZTEwMTkzNjU4ZGI2MjA5MGY3YTVhMjM2ZDcyZjhlMzk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjUzNWVlNTQ1ODI0NDkyMGUxOWY4NDIwYWIzNmJjNTEzOTgzZDE3YmRmMTE3
|
14
|
+
NjM1Mzc0Mzk5YmQ1MDdhMzFlZDc2YzVkYjc2MmY4ZWEwZWY0YjY1ZTdlYmFi
|
15
|
+
ZGJjODhmYzBhNGU2Y2IxZGZlODZlODNhNTg2NzU1YTgwNmQ5OTk=
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.0.12 / 2014-08-24
|
2
|
+
* [CHANGE] Significant internal refactoring.
|
3
|
+
* [CHANGE] Method `ObjectSpace::id2ref` is no more used => one obstacle to JRuby porting is removed.
|
4
|
+
* [NEW] Added new class `ProductionReference`
|
5
|
+
|
1
6
|
### 0.0.11 / 2014-08-24
|
2
7
|
* [FIX] `SequiturGrammar#check_unicity`: an exception was raised when it shouldn't. Added example in spec file.
|
3
8
|
* [CHANGE] `sequitur.rb` : Added the convenience Sequitur::build_from method.
|
data/README.md
CHANGED
@@ -20,8 +20,8 @@ It detects repeated token patterns and can represent them in a compact way.
|
|
20
20
|
```ruby
|
21
21
|
|
22
22
|
require 'sequitur' # Load the Sequitur library
|
23
|
-
|
24
|
-
input_sequence = 'abcabdab'
|
23
|
+
|
24
|
+
input_sequence = 'abcabdab' # Let's analyze this string
|
25
25
|
|
26
26
|
# The SEQUITUR algorithm will detect the repeated 'ab' pattern
|
27
27
|
# and will generate a context-free grammar that represents the input string
|
@@ -29,7 +29,7 @@ It detects repeated token patterns and can represent them in a compact way.
|
|
29
29
|
|
30
30
|
# Display the grammar rules
|
31
31
|
# Each rule is displayed with the format:
|
32
|
-
# rule_id :
|
32
|
+
# rule_id : a_sequence_of_grammar_symbols
|
33
33
|
# Where:
|
34
34
|
# - rule_id is the object id of a rule (in decimal)
|
35
35
|
# - a grammar symbol is either a terminal symbol
|
data/lib/sequitur/constants.rb
CHANGED
data/lib/sequitur/digram.rb
CHANGED
@@ -10,12 +10,12 @@ class Digram
|
|
10
10
|
# The sequence of two consecutive grammar symbols.
|
11
11
|
attr_reader(:symbols)
|
12
12
|
|
13
|
-
# The object id of the production that contains this digram in its rhs.
|
14
|
-
attr_reader(:production_id)
|
15
|
-
|
16
13
|
# An unique Hash key of the digram
|
17
14
|
attr_reader(:key)
|
18
15
|
|
16
|
+
# The production in which the digram occurs
|
17
|
+
attr_reader(:production)
|
18
|
+
|
19
19
|
# Constructor.
|
20
20
|
# @param symbol1 [StringOrSymbol] First element of the digram
|
21
21
|
# @param symbol2 [StringOrSymbol] Second element of the digram
|
@@ -24,13 +24,15 @@ class Digram
|
|
24
24
|
def initialize(symbol1, symbol2, aProduction)
|
25
25
|
@symbols = [symbol1, symbol2]
|
26
26
|
@key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
|
27
|
-
@
|
27
|
+
@production = aProduction
|
28
28
|
end
|
29
|
-
|
30
|
-
#
|
31
|
-
|
32
|
-
|
29
|
+
|
30
|
+
# Equality testing.
|
31
|
+
# Returns true when keys of both digrams are equal
|
32
|
+
def ==(other)
|
33
|
+
return key == other.key
|
33
34
|
end
|
35
|
+
|
34
36
|
end # class
|
35
37
|
|
36
38
|
end # module
|
@@ -47,11 +47,9 @@ class DynamicGrammar
|
|
47
47
|
puts to_string if trace
|
48
48
|
prod = productions.delete_at(anIndex)
|
49
49
|
# TODO: remove output
|
50
|
-
puts prod.to_string if trace
|
50
|
+
puts('Removed: ' + prod.to_string) if trace
|
51
51
|
prod.clear_rhs
|
52
52
|
|
53
|
-
check_backrefs # TODO: configurable check
|
54
|
-
|
55
53
|
return prod
|
56
54
|
end
|
57
55
|
|
@@ -68,57 +66,20 @@ class DynamicGrammar
|
|
68
66
|
end
|
69
67
|
|
70
68
|
|
71
|
-
# Check that
|
69
|
+
# Check that every production reference in rhs is
|
72
70
|
# pointing to a production of the grammar
|
73
71
|
def check_rhs_of(aProduction)
|
74
72
|
aProduction.references.each do |symb|
|
75
|
-
|
73
|
+
referenced_prod = symb.production
|
74
|
+
next if productions.include?(referenced_prod)
|
76
75
|
|
77
|
-
msg = "Production #{aProduction.object_id} refers to
|
78
|
-
msg << "production #{
|
76
|
+
msg = "Production #{aProduction.object_id} refers to"
|
77
|
+
msg << " production #{referenced_prod.object_id}"
|
79
78
|
msg << ' that is not part of the grammar.'
|
80
79
|
fail StandardError, msg
|
81
80
|
end
|
82
81
|
end
|
83
82
|
|
84
|
-
# Check the invariants:
|
85
|
-
# Every back reference must must point to a production of the grammar
|
86
|
-
# Every back reference count must be equal to the number
|
87
|
-
# of occurrences in the referencing production.
|
88
|
-
def check_backrefs()
|
89
|
-
return if productions.size < 2
|
90
|
-
|
91
|
-
all_but_root = productions[1...productions.size]
|
92
|
-
all_but_root.each do |a_prod|
|
93
|
-
a_prod.backrefs.each do |other_prod_id, count|
|
94
|
-
begin
|
95
|
-
other_prod = ObjectSpace._id2ref(other_prod_id)
|
96
|
-
rescue RangeError => exc
|
97
|
-
msg = "Production #{a_prod.object_id} has a backref to "
|
98
|
-
msg << "recycled production #{other_prod_id}."
|
99
|
-
msg << "\n#{to_string}"
|
100
|
-
$stderr.puts msg
|
101
|
-
raise exc
|
102
|
-
end
|
103
|
-
found = productions.find { |elem| elem == other_prod }
|
104
|
-
unless found
|
105
|
-
msg = "Production #{a_prod.object_id} is referenced by the "
|
106
|
-
msg << "unknown production (#{other_prod_id})."
|
107
|
-
msg << "\n#{to_string}"
|
108
|
-
fail StandardError, msg
|
109
|
-
end
|
110
|
-
|
111
|
-
unless count == found.rhs.count { |symb| symb == a_prod }
|
112
|
-
msg = "Production #{a_prod.object_id} has a count mismatch"
|
113
|
-
msg << "\nIt expects #{count} references in rhs of #{other_prod_id} "
|
114
|
-
msg << "but actual count is #{other_prod.rhs.count}."
|
115
|
-
msg << "\n#{to_string}"
|
116
|
-
fail StandardError, msg
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
83
|
end # class
|
123
84
|
|
124
85
|
end # module
|
data/lib/sequitur/production.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative 'digram'
|
2
|
+
require_relative 'production_ref'
|
2
3
|
|
3
4
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
5
|
|
@@ -14,33 +15,76 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
14
15
|
class Production
|
15
16
|
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
16
17
|
attr_reader(:rhs)
|
18
|
+
|
19
|
+
# The reference count (= how times other productions reference this one)
|
20
|
+
attr_reader(:refcount)
|
17
21
|
|
18
|
-
#
|
19
|
-
|
20
|
-
# Where the reference count is the number of times this production
|
21
|
-
# appears in the rhs of the production with given id.
|
22
|
-
attr_reader(:backrefs)
|
22
|
+
# The sequence of digrams appearing in the RHS
|
23
|
+
attr_reader(:digrams)
|
23
24
|
|
24
25
|
# Constructor. Build a production with an empty RHS.
|
25
26
|
def initialize()
|
26
27
|
clear_rhs
|
27
|
-
@
|
28
|
+
@refcount = 0
|
29
|
+
@digrams = []
|
28
30
|
end
|
29
31
|
|
30
32
|
public
|
31
33
|
|
34
|
+
def ==(other)
|
35
|
+
return true if object_id == other.object_id
|
36
|
+
|
37
|
+
if other.is_a?(ProductionRef)
|
38
|
+
result = (other == self)
|
39
|
+
else
|
40
|
+
result = false
|
41
|
+
end
|
42
|
+
|
43
|
+
return result
|
44
|
+
end
|
45
|
+
|
46
|
+
|
32
47
|
# Is the rhs empty?
|
33
48
|
def empty?
|
34
49
|
return rhs.empty?
|
35
50
|
end
|
36
51
|
|
52
|
+
def incr_refcount()
|
53
|
+
@refcount += 1
|
54
|
+
end
|
55
|
+
|
56
|
+
def decr_refcount()
|
57
|
+
fail StandardError if @refcount == 0
|
58
|
+
@refcount -= 1
|
59
|
+
end
|
60
|
+
|
37
61
|
|
38
62
|
# Return the set of productions appearing in the rhs.
|
39
63
|
def references()
|
40
|
-
return rhs.select { |symb| symb.
|
64
|
+
return rhs.select { |symb| symb.is_a?(ProductionRef) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Return the set of references to a given production
|
68
|
+
def references_of(aProduction)
|
69
|
+
refs = references
|
70
|
+
return refs.select { |a_ref| a_ref == aProduction }
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
# Return the list digrams found in rhs of this production.
|
77
|
+
def recalc_digrams()
|
78
|
+
return [] if rhs.size < 2
|
79
|
+
|
80
|
+
result = []
|
81
|
+
rhs.each_cons(2) { |couple| result << Digram.new(*couple, self) }
|
82
|
+
|
83
|
+
@digrams = result
|
41
84
|
end
|
42
85
|
|
43
86
|
|
87
|
+
|
44
88
|
# Does the rhs have exactly one digram only (= 2 symbols)?
|
45
89
|
def single_digram?
|
46
90
|
return rhs.size == 2
|
@@ -59,53 +103,14 @@ class Production
|
|
59
103
|
same_key_found = all_keys.index(last_key)
|
60
104
|
return !same_key_found.nil?
|
61
105
|
end
|
62
|
-
|
106
|
+
|
63
107
|
# Return the last digram appearing in the RHS.
|
64
108
|
def last_digram()
|
65
|
-
|
66
|
-
|
67
|
-
return Digram.new(rhs[-2], rhs[-1], self)
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
# The back reference count is the number of times this production
|
73
|
-
# appears in the rhs of all the productions of the grammar
|
74
|
-
def refcount()
|
75
|
-
total = backrefs.values.reduce(0) do |sub_result, count|
|
76
|
-
sub_result += count
|
77
|
-
end
|
78
|
-
|
79
|
-
return total
|
80
|
-
end
|
81
|
-
|
82
|
-
# Add a back reference to the given production.
|
83
|
-
# @param aProduction [Production] Assume that production P appears in the
|
84
|
-
# RHS of production Q, then a reference count of P is incremented in Q.
|
85
|
-
def add_backref(aProduction)
|
86
|
-
prod_id = aProduction.object_id
|
87
|
-
|
88
|
-
count = backrefs.fetch(prod_id, 0)
|
89
|
-
backrefs[prod_id] = count + 1
|
90
|
-
return count
|
109
|
+
result = digrams.empty? ? nil : digrams.last
|
110
|
+
return result
|
91
111
|
end
|
92
112
|
|
93
|
-
# Decrement the reference count for the given production.
|
94
|
-
# If result is zero, then the entry is removed from the Hash.
|
95
|
-
def remove_backref(aProduction)
|
96
|
-
prod_id = aProduction.object_id
|
97
|
-
|
98
|
-
count = backrefs.fetch(prod_id)
|
99
|
-
fail StandardError if count < 1
|
100
|
-
|
101
|
-
if count > 1
|
102
|
-
backrefs[prod_id] = count - 1
|
103
|
-
else
|
104
|
-
backrefs.delete(prod_id)
|
105
|
-
end
|
106
113
|
|
107
|
-
return count
|
108
|
-
end
|
109
114
|
|
110
115
|
# Emit a text representation of the production rule.
|
111
116
|
# Text is of the form:
|
@@ -122,17 +127,24 @@ class Production
|
|
122
127
|
return "#{object_id} : #{rhs_text.join(' ')}."
|
123
128
|
end
|
124
129
|
|
125
|
-
#
|
126
|
-
# the given symbol is appended at the end of the rhs
|
127
|
-
def calc_append_symbol(aSymbol)
|
128
|
-
return [] if empty?
|
129
|
-
|
130
|
-
return digrams + [ Digram.new(rhs.last, aSymbol, self) ]
|
131
|
-
end
|
132
|
-
|
130
|
+
# Add a (grammar) symbol at the end of the RHS.
|
133
131
|
def append_symbol(aSymbol)
|
134
|
-
|
135
|
-
|
132
|
+
case aSymbol
|
133
|
+
when Production
|
134
|
+
new_symb = ProductionRef.new(aSymbol)
|
135
|
+
when ProductionRef
|
136
|
+
if aSymbol.unbound?
|
137
|
+
msg = 'Fail to append reference to nil production in '
|
138
|
+
msg << to_string
|
139
|
+
fail StandardError, msg
|
140
|
+
end
|
141
|
+
new_symb = aSymbol.dup
|
142
|
+
else
|
143
|
+
new_symb = aSymbol
|
144
|
+
end
|
145
|
+
|
146
|
+
rhs << new_symb
|
147
|
+
digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
|
136
148
|
end
|
137
149
|
|
138
150
|
# Clear the right-hand side.
|
@@ -140,61 +152,77 @@ class Production
|
|
140
152
|
def clear_rhs()
|
141
153
|
if rhs
|
142
154
|
refs = references
|
143
|
-
refs.each { |a_ref| a_ref.
|
155
|
+
refs.each { |a_ref| a_ref.unbind }
|
144
156
|
end
|
145
157
|
@rhs = []
|
146
158
|
end
|
147
159
|
|
148
|
-
#
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
160
|
+
# Find all the positions where the digram occurs in the rhs
|
161
|
+
# Synopsis:
|
162
|
+
# Given the production p -> a b c a b a b d
|
163
|
+
# Then p.positions_of(a, b) should returns [0, 3, 5]
|
164
|
+
# Caution: "overlapping" digrams shouldn't be counted
|
165
|
+
# Given the production p -> a a b a a a c d
|
166
|
+
# Then p.positions_of(a, a) should returns [0, 3]
|
167
|
+
def positions_of(symb1, symb2)
|
157
168
|
|
158
|
-
# Substitute in self all occurence of the digram that
|
159
|
-
# appears in the rhs of the other production
|
160
|
-
# Pre-condition:
|
161
|
-
# another has a rhs with exactly one digram (= a two-symbol sequence).
|
162
|
-
def replace_digram(another)
|
163
169
|
# Find the positions where the digram occur in rhs
|
164
|
-
(symb1, symb2) = another.rhs
|
165
170
|
indices = [ -2 ] # Dummy index!
|
166
|
-
|
167
171
|
(0...rhs.size).each do |i|
|
168
172
|
next if i == indices.last + 1
|
169
173
|
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
170
174
|
end
|
175
|
+
|
171
176
|
indices.shift
|
172
177
|
|
173
|
-
|
178
|
+
return indices
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
# Substitute in self all occurrences of the digram that
|
183
|
+
# appears in the rhs of the other production
|
184
|
+
# Pre-condition:
|
185
|
+
# another has a rhs with exactly one digram (= a two-symbol sequence).
|
186
|
+
def replace_digram(another)
|
187
|
+
(symb1, symb2) = another.rhs
|
188
|
+
pos = positions_of(symb1, symb2).reverse
|
174
189
|
|
175
190
|
# Replace the two symbol sequence by the production
|
176
191
|
pos.each do |index|
|
177
|
-
|
178
|
-
|
192
|
+
if rhs[index].is_a?(ProductionRef)
|
193
|
+
rhs[index].bind_to(another)
|
194
|
+
else
|
195
|
+
rhs[index] = ProductionRef.new(another)
|
196
|
+
end
|
179
197
|
index1 = index + 1
|
180
|
-
rhs[index1].
|
198
|
+
rhs[index1].unbind if rhs[index1].is_a?(ProductionRef)
|
181
199
|
rhs.delete_at(index1)
|
182
|
-
another.add_backref(self)
|
183
200
|
end
|
201
|
+
|
202
|
+
recalc_digrams
|
184
203
|
end
|
185
204
|
|
186
205
|
# Replace every occurrence of 'another' production in rhs by
|
187
206
|
# the rhs of 'another'.
|
207
|
+
# Given the production p_A -> a p_B b p_B c
|
208
|
+
# And the production p_B -> x y
|
209
|
+
# Then the call p_A.replace_production(p_B)
|
210
|
+
# Modifies p_A as into:
|
211
|
+
# p_A -> a x y b x y c
|
188
212
|
def replace_production(another)
|
189
213
|
(0...rhs.size).to_a.reverse.each do |index|
|
190
214
|
next unless rhs[index] == another
|
191
|
-
|
192
|
-
|
193
|
-
|
215
|
+
|
216
|
+
# Avoid the aliasing of production reference
|
217
|
+
other_rhs = another.rhs.map do |symb|
|
218
|
+
symb.is_a?(ProductionRef) ? symb.dup : symb
|
194
219
|
end
|
195
|
-
|
220
|
+
rhs.insert(index + 1, *other_rhs)
|
221
|
+
another.decr_refcount
|
196
222
|
rhs.delete_at(index)
|
197
223
|
end
|
224
|
+
|
225
|
+
recalc_digrams
|
198
226
|
end
|
199
227
|
|
200
228
|
end # class
|
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
+
|
5
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
6
|
+
# side of a production P1 and that refers to a production P2.
|
7
|
+
# Every time a production P2 appears in the left-hand side of
|
8
|
+
# production P1, this is implemented by inserting a production reference to P2
|
9
|
+
# in the appropriate position in the RHS of P1.
|
10
|
+
# In the literature, production references are also called non terminal
|
11
|
+
# symbols
|
12
|
+
class ProductionRef
|
13
|
+
|
14
|
+
# Link to the production to reference
|
15
|
+
attr_reader(:production)
|
16
|
+
|
17
|
+
# Constructor
|
18
|
+
# [target] The production that is being referenced.
|
19
|
+
def initialize(target)
|
20
|
+
bind_to(target)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Copy constructor invoked by dup or clone methods
|
24
|
+
def initialize_copy(orig)
|
25
|
+
@production = nil
|
26
|
+
bind_to(orig.production)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Return the text representation of a production reference.
|
30
|
+
def to_s()
|
31
|
+
return "#{production.object_id}"
|
32
|
+
end
|
33
|
+
|
34
|
+
alias_method :to_string, :to_s
|
35
|
+
|
36
|
+
|
37
|
+
# Equality testing.
|
38
|
+
# A production ref is equal to another one when its
|
39
|
+
# refers to the same production or when it is compared to
|
40
|
+
# the production it refers to.
|
41
|
+
def ==(other)
|
42
|
+
return true if object_id == other.object_id
|
43
|
+
|
44
|
+
if other.is_a?(ProductionRef)
|
45
|
+
result = (production == other.production)
|
46
|
+
else
|
47
|
+
result = (production == other)
|
48
|
+
end
|
49
|
+
|
50
|
+
return result
|
51
|
+
end
|
52
|
+
|
53
|
+
# Generates a Fixnum value as hash value.
|
54
|
+
# As a reference has no identity on its own,
|
55
|
+
# the method returns the hash value of the
|
56
|
+
# referenced production
|
57
|
+
def hash()
|
58
|
+
fail StandardError, 'Nil production' if production.nil?
|
59
|
+
return production.hash
|
60
|
+
end
|
61
|
+
|
62
|
+
# Make this reference points to the given production
|
63
|
+
def bind_to(aProduction)
|
64
|
+
return if aProduction == @production
|
65
|
+
|
66
|
+
production.decr_refcount if production
|
67
|
+
unless aProduction.kind_of?(Production)
|
68
|
+
fail StandardError, "Illegal production type #{aProduction.class}"
|
69
|
+
end
|
70
|
+
@production = aProduction
|
71
|
+
production.incr_refcount
|
72
|
+
end
|
73
|
+
|
74
|
+
# Clear the reference to the target production
|
75
|
+
def unbind()
|
76
|
+
production.decr_refcount
|
77
|
+
@production = nil
|
78
|
+
end
|
79
|
+
|
80
|
+
# Check that the this object doesn't refer to any production.
|
81
|
+
def unbound?()
|
82
|
+
return production.nil?
|
83
|
+
end
|
84
|
+
|
85
|
+
end # class
|
86
|
+
|
87
|
+
end # module
|
88
|
+
|
89
|
+
# End of file
|
@@ -13,7 +13,7 @@ class SequiturGrammar < DynamicGrammar
|
|
13
13
|
def initialize(anEnum)
|
14
14
|
super()
|
15
15
|
# Make start production compliant with utility rule
|
16
|
-
2.times { root.
|
16
|
+
2.times { root.incr_refcount }
|
17
17
|
|
18
18
|
@digrams = {}
|
19
19
|
@parsed = []
|
@@ -34,14 +34,15 @@ class SequiturGrammar < DynamicGrammar
|
|
34
34
|
all_digrams = {}
|
35
35
|
productions.each do |a_prod|
|
36
36
|
prod_digrams = a_prod.digrams
|
37
|
-
prod_digrams.
|
37
|
+
prod_digrams.each_with_index do |a_digram, index|
|
38
|
+
next if index && a_digram == a_prod.digrams[index - 1]
|
38
39
|
if all_digrams.include? a_digram.key
|
39
40
|
msg = "Digram #{a_digram.symbols} occurs twice!"
|
40
41
|
colliding = all_digrams[a_digram.key]
|
41
|
-
msg << "\nOnce in production #{colliding.
|
42
|
+
msg << "\nOnce in production #{colliding.production.object_id}"
|
42
43
|
msg << "\nSecond in production #{a_prod.object_id}"
|
43
44
|
msg << "\n#{to_string}"
|
44
|
-
fail StandardError, msg unless colliding
|
45
|
+
fail StandardError, msg unless colliding == a_prod
|
45
46
|
else
|
46
47
|
all_digrams[a_digram.key] = a_digram
|
47
48
|
end
|
@@ -55,8 +56,8 @@ class SequiturGrammar < DynamicGrammar
|
|
55
56
|
# Assumption: last digram of production isn't yet registered.
|
56
57
|
def add_production(aProduction)
|
57
58
|
super # Call original method from superclass...
|
58
|
-
|
59
|
-
# ... then add this behaviour
|
59
|
+
|
60
|
+
# ... then add this behaviour
|
60
61
|
last_digram = aProduction.last_digram
|
61
62
|
digrams[last_digram.key] = last_digram
|
62
63
|
end
|
@@ -67,7 +68,7 @@ class SequiturGrammar < DynamicGrammar
|
|
67
68
|
|
68
69
|
# Retrieve in the Hash all registered digrams from the removed production
|
69
70
|
digrams_subset = digrams.select do |_, digr|
|
70
|
-
digr.
|
71
|
+
digr.production == prod
|
71
72
|
end
|
72
73
|
|
73
74
|
# Remove them...
|
@@ -76,10 +77,10 @@ class SequiturGrammar < DynamicGrammar
|
|
76
77
|
end
|
77
78
|
|
78
79
|
def append_symbol_to(aProduction, aSymbol)
|
79
|
-
prod_digrams = aProduction.calc_append_symbol(aSymbol)
|
80
80
|
check_digrams # TODO: remove this
|
81
|
-
check_backrefs # TODO: remove this
|
82
81
|
super
|
82
|
+
|
83
|
+
prod_digrams = aProduction.digrams
|
83
84
|
unless prod_digrams.empty?
|
84
85
|
last_digram = prod_digrams.last
|
85
86
|
matching_digram = digrams[last_digram.key]
|
@@ -105,32 +106,19 @@ class SequiturGrammar < DynamicGrammar
|
|
105
106
|
def preserve_unicity(aProduction)
|
106
107
|
last_digram = aProduction.last_digram
|
107
108
|
matching_digram = digrams[last_digram.key]
|
108
|
-
if
|
109
|
+
if aProduction == matching_digram.production
|
109
110
|
# Rule: no other production distinct from aProduction should have
|
110
111
|
# the matching digram
|
111
112
|
productions.each do |prod|
|
112
113
|
its_digrams = prod.digrams
|
113
114
|
its_keys = its_digrams.map(&:key)
|
114
|
-
if prod
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
# msg << "\n#{self.to_string}"
|
122
|
-
# fail StandardError, msg
|
123
|
-
# end
|
124
|
-
|
125
|
-
else
|
126
|
-
if its_keys.include? last_digram.key
|
127
|
-
msg = "Digram #{last_digram.symbols} occurs three times!"
|
128
|
-
msg << "\nTwice in production #{aProduction.object_id}"
|
129
|
-
msg << "\nThird in production #{prod.object_id}"
|
130
|
-
msg << "\n#{to_string}"
|
131
|
-
fail StandardError, msg
|
132
|
-
end
|
133
|
-
end
|
115
|
+
next if prod == last_digram.production
|
116
|
+
next unless its_keys.include? last_digram.key
|
117
|
+
msg = "Digram #{last_digram.symbols} occurs three times!"
|
118
|
+
msg << "\nTwice in production #{aProduction.object_id}"
|
119
|
+
msg << "\nThird in production #{prod.object_id}"
|
120
|
+
msg << "\n#{to_string}"
|
121
|
+
fail StandardError, msg
|
134
122
|
end
|
135
123
|
|
136
124
|
# Digram appears twice in given production...
|
@@ -148,17 +136,18 @@ class SequiturGrammar < DynamicGrammar
|
|
148
136
|
else
|
149
137
|
# Duplicate digram used in distinct production
|
150
138
|
# Two cases: other production is a single digram one or a multi-digram
|
151
|
-
other_prod =
|
139
|
+
other_prod = matching_digram.production
|
152
140
|
if other_prod.single_digram?
|
153
141
|
# ... replace duplicate digram by reference to other production
|
154
142
|
aProduction.replace_digram(other_prod)
|
155
143
|
update_digrams_from(aProduction)
|
156
144
|
|
157
|
-
# Special case a: replacement causes another digram duplication
|
145
|
+
# Special case a: replacement causes another digram duplication
|
158
146
|
# in the given production
|
159
|
-
# Special case b: replacement causes another digram duplication
|
147
|
+
# Special case b: replacement causes another digram duplication
|
160
148
|
# with other production
|
161
|
-
if aProduction.repeated_digram? ||
|
149
|
+
if aProduction.repeated_digram? ||
|
150
|
+
(digrams[aProduction.last_digram.key].production != aProduction)
|
162
151
|
preserve_unicity(aProduction)
|
163
152
|
end
|
164
153
|
|
@@ -178,7 +167,6 @@ class SequiturGrammar < DynamicGrammar
|
|
178
167
|
|
179
168
|
# TODO: Check when aProduction and other_prod have same preceding symbol
|
180
169
|
update_digrams_from(other_prod)
|
181
|
-
check_backrefs # TODO: remove this
|
182
170
|
end
|
183
171
|
check_unicity
|
184
172
|
end
|
@@ -205,16 +193,17 @@ class SequiturGrammar < DynamicGrammar
|
|
205
193
|
loop do
|
206
194
|
all_refcount_ok = true
|
207
195
|
(1...productions.size).to_a.reverse.each do |index|
|
208
|
-
|
196
|
+
curr_production = productions[index]
|
197
|
+
next unless curr_production.refcount == 1
|
209
198
|
|
210
199
|
all_refcount_ok = false
|
211
|
-
|
212
|
-
|
200
|
+
dependent = productions.find do |a_prod|
|
201
|
+
!a_prod.references_of(curr_production).empty?
|
202
|
+
end
|
213
203
|
dependent.replace_production(productions[index])
|
214
204
|
delete_production(index)
|
215
205
|
update_digrams_from(dependent)
|
216
206
|
check_references
|
217
|
-
check_backrefs
|
218
207
|
end
|
219
208
|
|
220
209
|
break if all_refcount_ok
|
@@ -226,14 +215,14 @@ class SequiturGrammar < DynamicGrammar
|
|
226
215
|
def update_digrams_from(aProduction)
|
227
216
|
current_digrams = aProduction.digrams
|
228
217
|
|
229
|
-
# Add new digrams
|
218
|
+
# Add new digrams only if they don't collide
|
230
219
|
current_digrams.each do |digr|
|
231
220
|
digrams[digr.key] = digr unless digrams.include? digr.key
|
232
221
|
end
|
233
222
|
|
234
223
|
# Retrieve all registered digrams from the production
|
235
224
|
digrams_subset = digrams.select do |_, digr|
|
236
|
-
digr.
|
225
|
+
digr.production == aProduction
|
237
226
|
end
|
238
227
|
|
239
228
|
# Remove obsolete digrams
|
@@ -244,16 +233,18 @@ class SequiturGrammar < DynamicGrammar
|
|
244
233
|
end
|
245
234
|
|
246
235
|
# Check the invariant:
|
247
|
-
# Every
|
236
|
+
# Every reference in a rhs that is bound must point
|
248
237
|
# to a production of the grammar.
|
249
238
|
def check_references()
|
250
239
|
productions.each do |a_prod|
|
251
240
|
rhs_prods = a_prod.references
|
252
|
-
rhs_prods.each do |
|
241
|
+
rhs_prods.each do |a_reference|
|
242
|
+
next if a_reference.unbound?
|
243
|
+
referenced_prod = a_reference.production
|
253
244
|
next if productions.include? referenced_prod
|
254
245
|
|
255
|
-
msg = "Production #{a_prod.object_id}
|
256
|
-
msg << "unknown production #{referenced_prod.object_id}"
|
246
|
+
msg = "Production #{a_prod.object_id} #{a_prod.to_string}"
|
247
|
+
msg << " references the unknown production #{referenced_prod.object_id}"
|
257
248
|
msg << "\nOrphan production: #{referenced_prod.to_string}"
|
258
249
|
msg << "\n#{to_string}"
|
259
250
|
fail StandardError, msg
|
@@ -265,13 +256,11 @@ class SequiturGrammar < DynamicGrammar
|
|
265
256
|
# Every registered digram must reference a production from the grammar
|
266
257
|
def check_registered()
|
267
258
|
digrams.each do |_key, digr|
|
268
|
-
found = productions.find
|
269
|
-
digr.production_id == a_prod.object_id
|
270
|
-
end
|
259
|
+
found = productions.find { |a_prod| digr.production == a_prod }
|
271
260
|
next if found
|
272
261
|
|
273
262
|
msg = "Digram #{digr.symbols} references the unknown "
|
274
|
-
msg << "production (#{digr.
|
263
|
+
msg << "production (#{digr.production.object_id})."
|
275
264
|
msg << "\n#{to_string}"
|
276
265
|
fail StandardError, msg
|
277
266
|
end
|
@@ -283,7 +272,7 @@ class SequiturGrammar < DynamicGrammar
|
|
283
272
|
# Control that every registered digram refers
|
284
273
|
# to a production that really has that digram
|
285
274
|
digrams.each do |key, digr|
|
286
|
-
its_prod =
|
275
|
+
its_prod = digr.production
|
287
276
|
prod_digrams = its_prod.digrams
|
288
277
|
prod_keys = prod_digrams.map(&:key)
|
289
278
|
next if prod_keys.include? key
|
@@ -307,15 +296,15 @@ class SequiturGrammar < DynamicGrammar
|
|
307
296
|
all_digrams.each do |key, digr|
|
308
297
|
registered = digrams[key]
|
309
298
|
if registered
|
310
|
-
if registered
|
311
|
-
msg = "Production #{digr.
|
299
|
+
if registered != digr
|
300
|
+
msg = "Production #{digr.production.object_id} has "
|
312
301
|
msg << "the digram #{digr.symbols} that collides"
|
313
|
-
msg << "\n with same digram from #{registered.
|
302
|
+
msg << "\n with same digram from #{registered.production.object_id}"
|
314
303
|
msg << "\n#{to_string}"
|
315
304
|
fail StandardError, msg
|
316
305
|
end
|
317
306
|
else
|
318
|
-
its_prod =
|
307
|
+
its_prod = digr.production
|
319
308
|
msg = "Production #{its_prod.object_id} (#{its_prod.rhs}) "
|
320
309
|
msg << "has the digram #{digr.symbols} that isn't registered."
|
321
310
|
msg << "\n#{to_string}"
|
@@ -7,24 +7,39 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
7
7
|
|
8
8
|
describe Digram do
|
9
9
|
let(:two_symbols) { [:b, :c] }
|
10
|
+
let(:production) { double('sample-production') }
|
10
11
|
|
11
12
|
context 'Standard creation & initialization:' do
|
12
13
|
|
13
14
|
it 'should be created with 3 arguments' do
|
14
|
-
production = double('sample-production')
|
15
15
|
instance = Digram.new(:b, :c, production)
|
16
16
|
|
17
17
|
expect(instance.symbols).to eq(two_symbols)
|
18
|
-
expect(instance.
|
18
|
+
expect(instance.production).to eq(production)
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'should return the production that it refers to' do
|
22
|
-
production = double('sample-production')
|
23
22
|
instance = Digram.new(:b, :c, production)
|
24
23
|
expect(instance.production).to eq(production)
|
25
24
|
end
|
26
25
|
|
27
|
-
end # context
|
26
|
+
end # context
|
27
|
+
|
28
|
+
context 'Standard creation & initialization:' do
|
29
|
+
|
30
|
+
it 'should compare itself to another digram' do
|
31
|
+
instance1 = Digram.new(:a, :b, production)
|
32
|
+
same = Digram.new(:a, :b, production)
|
33
|
+
different = Digram.new(:b, :c, production)
|
34
|
+
|
35
|
+
expect(instance1).to eq(instance1)
|
36
|
+
expect(instance1).to eq(same)
|
37
|
+
expect(instance1).not_to eq(different)
|
38
|
+
expect(same).not_to eq(different)
|
39
|
+
end
|
40
|
+
|
41
|
+
end # context
|
42
|
+
|
28
43
|
|
29
44
|
end # describe
|
30
45
|
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
|
3
|
+
# Load the class under test
|
4
|
+
require_relative '../../lib/sequitur/production'
|
5
|
+
require_relative '../../lib/sequitur/production_ref'
|
6
|
+
|
7
|
+
module Sequitur # Re-open the module to get rid of qualified names
|
8
|
+
|
9
|
+
describe ProductionRef do
|
10
|
+
|
11
|
+
let(:target) { Production.new }
|
12
|
+
let(:another_target) { Production.new }
|
13
|
+
|
14
|
+
subject { ProductionRef.new(target) }
|
15
|
+
|
16
|
+
context 'Creation & initialization:' do
|
17
|
+
|
18
|
+
it 'should be created with a production argument' do
|
19
|
+
expect { ProductionRef.new(target) }.not_to raise_error
|
20
|
+
expect(target.refcount).to eq(1)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should clone with reference count incrementing' do
|
24
|
+
expect(target.refcount).to eq(0)
|
25
|
+
expect(subject.production.refcount).to eq(1)
|
26
|
+
klone = subject.clone
|
27
|
+
expect(klone.production.refcount).to eq(2)
|
28
|
+
duplicate = subject.dup
|
29
|
+
expect(duplicate.production.refcount).to eq(3)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should know its referenced production' do
|
33
|
+
instance = ProductionRef.new(target)
|
34
|
+
expect(instance.production).to eq(target)
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
end # context
|
39
|
+
|
40
|
+
context 'Provided services:' do
|
41
|
+
|
42
|
+
it 'should render its referenced production' do
|
43
|
+
expect(subject.to_s).to eq(target.object_id.to_s)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should unbind itself from its production' do
|
47
|
+
expect(target.refcount).to eq(0)
|
48
|
+
expect(subject).not_to be_unbound
|
49
|
+
expect(target.refcount).to eq(1)
|
50
|
+
subject.unbind
|
51
|
+
expect(target.refcount).to eq(0)
|
52
|
+
expect(subject.production).to be_nil
|
53
|
+
expect(subject).to be_unbound
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should bind to a production' do
|
57
|
+
expect(target.refcount).to eq(0)
|
58
|
+
|
59
|
+
expect(subject).not_to be_unbound
|
60
|
+
expect(target.refcount).to eq(1)
|
61
|
+
|
62
|
+
# Case: bind again to same production
|
63
|
+
expect { subject.bind_to(target) }.not_to raise_error
|
64
|
+
expect(target.refcount).to eq(1)
|
65
|
+
|
66
|
+
# Case: bind to another production
|
67
|
+
expect(another_target.refcount).to eq(0)
|
68
|
+
subject.bind_to(another_target)
|
69
|
+
expect(target.refcount).to eq(0)
|
70
|
+
expect(another_target.refcount).to eq(1)
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should compare to other production (reference)' do
|
74
|
+
same = ProductionRef.new(target)
|
75
|
+
expect(subject).to eq(subject) # Strict identity
|
76
|
+
expect(subject).to eq(same) # 2 references pointing to same production
|
77
|
+
expect(subject).to eq(target)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'should return the hash value of its production' do
|
81
|
+
expectation = target.hash
|
82
|
+
expect(subject.hash).to eq(expectation)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should complain when requested for a hash and unbound' do
|
86
|
+
subject.unbind
|
87
|
+
expect { subject.hash }.to raise_error(StandardError)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end # describe
|
92
|
+
|
93
|
+
end # module
|
94
|
+
|
95
|
+
# End of file
|
@@ -43,33 +43,52 @@ describe Production do
|
|
43
43
|
expect(subject.last_digram).to be_nil
|
44
44
|
end
|
45
45
|
end # context
|
46
|
-
|
46
|
+
|
47
47
|
context 'Knowing its rhs:' do
|
48
|
-
|
48
|
+
|
49
49
|
it 'should know the productions in its rhs' do
|
50
50
|
# Case 1: empty production
|
51
51
|
expect(subject.references).to be_empty
|
52
|
-
|
52
|
+
|
53
53
|
# Case 2: production without references
|
54
54
|
symbols = [:a, :b, :c]
|
55
55
|
symbols.each { |symb| subject.append_symbol(symb) }
|
56
56
|
expect(subject.references).to be_empty
|
57
|
+
expect(subject.references_of(p_a)).to be_empty
|
57
58
|
|
58
59
|
# Case 2: production with one reference
|
59
60
|
subject.append_symbol(p_a)
|
60
61
|
expect(subject.references).to eq([p_a])
|
62
|
+
expect(subject.references_of(p_a)).to eq([p_a])
|
61
63
|
|
62
64
|
# Case 3: production with repeated references
|
63
65
|
subject.append_symbol(p_a) # second time
|
64
|
-
expect(subject.references).to eq([p_a, p_a])
|
66
|
+
expect(subject.references).to eq([p_a, p_a])
|
67
|
+
expect(subject.references_of(p_a)).to eq([p_a, p_a])
|
68
|
+
|
65
69
|
|
66
70
|
# Case 4: production with multiple distinct references
|
67
71
|
subject.append_symbol(p_bc)
|
68
|
-
expect(subject.references).to eq([p_a, p_a, p_bc])
|
72
|
+
expect(subject.references).to eq([p_a, p_a, p_bc])
|
73
|
+
expect(subject.references_of(p_bc)).to eq([p_bc])
|
69
74
|
end
|
70
|
-
|
75
|
+
|
76
|
+
it 'should know the position(s) of a given digram' do
|
77
|
+
sequence1 = [:a, :b, :c, :a, :b, :a, :b, :d]
|
78
|
+
sequence1.each { |symb| subject.append_symbol(symb) }
|
79
|
+
positions = [0, 3, 5]
|
80
|
+
expect(subject.positions_of(:a, :b)).to eq(positions)
|
81
|
+
|
82
|
+
subject.clear_rhs
|
83
|
+
# Case of overlapping digrams
|
84
|
+
sequence2 = [:a, :a, :b, :a, :a, :a, :c, :d]
|
85
|
+
sequence2.each { |symb| subject.append_symbol(symb) }
|
86
|
+
positions = [0, 3]
|
87
|
+
expect(subject.positions_of(:a, :a)).to eq(positions)
|
88
|
+
end
|
89
|
+
|
71
90
|
end # context
|
72
|
-
|
91
|
+
|
73
92
|
context 'Appending a symbol:' do
|
74
93
|
|
75
94
|
it 'should append a symbol when empty' do
|
@@ -92,31 +111,48 @@ describe Production do
|
|
92
111
|
expect(subject.last_digram.symbols).to eq([:e, :f])
|
93
112
|
end
|
94
113
|
|
95
|
-
it 'should
|
114
|
+
it 'should append a production in its rhs' do
|
115
|
+
# Side-effect: refcount of production to append is incremented
|
96
116
|
expect(p_a.refcount).to be(0)
|
97
117
|
|
98
118
|
input = [p_a, :b, :c, :d, p_a, :e, :f] # p_a appears twice
|
99
119
|
input.each { |symb| subject.append_symbol(symb) }
|
100
120
|
expect(p_a.refcount).to be(2)
|
101
121
|
end
|
102
|
-
|
103
|
-
it 'should
|
104
|
-
#
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
#
|
112
|
-
|
113
|
-
|
114
|
-
expect(
|
122
|
+
|
123
|
+
it 'should append a production ref in its rhs' do
|
124
|
+
# Side-effect: refcount of production to append is incremented
|
125
|
+
ref_a = ProductionRef.new(p_a)
|
126
|
+
expect(p_a.refcount).to be(1)
|
127
|
+
|
128
|
+
input = [ref_a, :b, :c, :d, ref_a] # ref_a appears twice
|
129
|
+
input.each { |symb| subject.append_symbol(symb) }
|
130
|
+
|
131
|
+
# References in rhs should point to p_a...
|
132
|
+
# ...but should be distinct reference objects
|
133
|
+
expect(subject.rhs[0]).to eq(p_a)
|
134
|
+
expect(subject.rhs[0].object_id).not_to eq(ref_a.object_id)
|
135
|
+
expect(subject.rhs[-1]).to eq(p_a)
|
136
|
+
expect(subject.rhs[-1].object_id).not_to eq(ref_a.object_id)
|
137
|
+
|
138
|
+
# Reference count should be updated
|
139
|
+
expect(p_a.refcount).to be(3)
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'should complain when appending ref to nil production' do
|
143
|
+
# Side-effect: refcount of production to append is incremented
|
144
|
+
ref_a = ProductionRef.new(p_a)
|
145
|
+
expect(p_a.refcount).to be(1)
|
146
|
+
|
147
|
+
# Unbind the reference
|
148
|
+
ref_a.unbind
|
149
|
+
|
150
|
+
expect { subject.append_symbol(ref_a) }.to raise_error(StandardError)
|
115
151
|
end
|
116
152
|
|
117
153
|
end # context
|
118
|
-
|
119
|
-
|
154
|
+
|
155
|
+
|
120
156
|
context 'Text representation of a production rule:' do
|
121
157
|
|
122
158
|
it 'should emit minimal text when empty' do
|
@@ -128,7 +164,8 @@ describe Production do
|
|
128
164
|
instance = Production.new
|
129
165
|
symbols = [:a, :b, 'c', :d, :e, 1000, instance]
|
130
166
|
symbols.each { |symb| subject.append_symbol(symb) }
|
131
|
-
expectation = "#{subject.object_id} :
|
167
|
+
expectation = "#{subject.object_id} : "
|
168
|
+
expectation << "a b 'c' d e 1000 #{instance.object_id}."
|
132
169
|
expect(subject.to_string).to eq(expectation)
|
133
170
|
end
|
134
171
|
|
@@ -138,19 +175,19 @@ describe Production do
|
|
138
175
|
it 'should report no repetition when empty' do
|
139
176
|
expect(subject.repeated_digram?).to be_falsey
|
140
177
|
end
|
141
|
-
|
178
|
+
|
142
179
|
it 'should report no repetition when rhs has less than 3 symbols' do
|
143
180
|
subject.append_symbol(:a)
|
144
181
|
expect(subject.repeated_digram?).to be_falsey
|
145
|
-
|
182
|
+
|
146
183
|
subject.append_symbol(:a)
|
147
|
-
expect(subject.repeated_digram?).to be_falsey
|
184
|
+
expect(subject.repeated_digram?).to be_falsey
|
148
185
|
end
|
149
|
-
|
186
|
+
|
150
187
|
it 'should detect shortest repetition' do
|
151
188
|
'aaa'.each_char { |symb| subject.append_symbol(symb) }
|
152
|
-
expect(subject.repeated_digram?).to be_truthy
|
153
|
-
end
|
189
|
+
expect(subject.repeated_digram?).to be_truthy
|
190
|
+
end
|
154
191
|
|
155
192
|
it 'should detect any repetition pattern' do
|
156
193
|
# Positive cases
|
@@ -160,15 +197,15 @@ describe Production do
|
|
160
197
|
word.each_char { |symb| instance.append_symbol(symb) }
|
161
198
|
expect(instance.repeated_digram?).to be_truthy
|
162
199
|
end
|
163
|
-
|
200
|
+
|
164
201
|
# Negative cases
|
165
202
|
cases = %w(abc abb abba abcdef)
|
166
203
|
cases.each do |word|
|
167
204
|
instance = Production.new
|
168
205
|
word.each_char { |symb| instance.append_symbol(symb) }
|
169
206
|
expect(instance.repeated_digram?).to be_falsey
|
170
|
-
end
|
171
|
-
end
|
207
|
+
end
|
208
|
+
end
|
172
209
|
end # context
|
173
210
|
|
174
211
|
context 'Replacing a digram by a production:' do
|
@@ -182,12 +219,13 @@ describe Production do
|
|
182
219
|
|
183
220
|
it 'should replace two-symbol sequence' do
|
184
221
|
%w(a b c d e b c e).each { |symb| subject.append_symbol(symb) }
|
222
|
+
p_bc_before = p_bc.to_string
|
185
223
|
subject.replace_digram(p_bc)
|
186
224
|
|
187
225
|
expect(subject.rhs.size).to eq(6)
|
188
226
|
expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc, 'e'])
|
189
227
|
expect(p_bc.refcount).to eq(2)
|
190
|
-
expect(p_bc.
|
228
|
+
expect(p_bc.to_string).to eq(p_bc_before)
|
191
229
|
end
|
192
230
|
|
193
231
|
|
@@ -198,7 +236,6 @@ describe Production do
|
|
198
236
|
expect(subject.rhs.size).to eq(5)
|
199
237
|
expect(subject.rhs).to eq([p_bc, 'd', 'e', p_bc, 'e'])
|
200
238
|
expect(p_bc.refcount).to eq(2)
|
201
|
-
expect(p_bc.backrefs[subject.object_id]).to eq(2)
|
202
239
|
end
|
203
240
|
|
204
241
|
|
@@ -209,7 +246,6 @@ describe Production do
|
|
209
246
|
expect(subject.rhs.size).to eq(5)
|
210
247
|
expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc])
|
211
248
|
expect(p_bc.refcount).to eq(2)
|
212
|
-
expect(p_bc.backrefs[subject.object_id]).to eq(2)
|
213
249
|
end
|
214
250
|
|
215
251
|
it 'should replace two consecutive two-symbol sequences' do
|
@@ -219,7 +255,6 @@ describe Production do
|
|
219
255
|
expect(subject.rhs.size).to eq(4)
|
220
256
|
expect(subject.rhs).to eq(['a', p_bc, p_bc, 'd'])
|
221
257
|
expect(p_bc.refcount).to eq(2)
|
222
|
-
expect(p_bc.backrefs[subject.object_id]).to eq(2)
|
223
258
|
end
|
224
259
|
|
225
260
|
end # context
|
@@ -233,18 +268,23 @@ describe Production do
|
|
233
268
|
|
234
269
|
it 'should replace a production at the start' do
|
235
270
|
[p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
|
271
|
+
expect(p_bc.refcount).to eq(1)
|
272
|
+
|
236
273
|
subject.replace_production(p_bc)
|
237
274
|
expect(subject.rhs.size).to eq(3)
|
238
275
|
expect(subject.rhs).to eq(%w(b c d))
|
276
|
+
expect(p_bc.refcount).to eq(0)
|
239
277
|
end
|
240
278
|
|
241
279
|
|
242
280
|
it 'should replace a production at the end' do
|
243
281
|
['d', p_bc].each { |symb| subject.append_symbol(symb) }
|
282
|
+
expect(p_bc.refcount).to eq(1)
|
244
283
|
subject.replace_production(p_bc)
|
245
284
|
|
246
285
|
expect(subject.rhs.size).to eq(3)
|
247
286
|
expect(subject.rhs).to eq(%w(d b c))
|
287
|
+
expect(p_bc.refcount).to eq(0)
|
248
288
|
end
|
249
289
|
|
250
290
|
it 'should replace a production as sole symbol' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -90,9 +90,11 @@ files:
|
|
90
90
|
- lib/sequitur/digram.rb
|
91
91
|
- lib/sequitur/dynamic_grammar.rb
|
92
92
|
- lib/sequitur/production.rb
|
93
|
+
- lib/sequitur/production_ref.rb
|
93
94
|
- lib/sequitur/sequitur_grammar.rb
|
94
95
|
- spec/sequitur/digram_spec.rb
|
95
96
|
- spec/sequitur/dynamic_grammar_spec.rb
|
97
|
+
- spec/sequitur/production_ref_spec.rb
|
96
98
|
- spec/sequitur/production_spec.rb
|
97
99
|
- spec/sequitur/sequitur_grammar_spec.rb
|
98
100
|
- spec/spec_helper.rb
|
@@ -130,5 +132,6 @@ summary: Ruby implementation of the Sequitur algorithm
|
|
130
132
|
test_files:
|
131
133
|
- spec/sequitur/digram_spec.rb
|
132
134
|
- spec/sequitur/dynamic_grammar_spec.rb
|
135
|
+
- spec/sequitur/production_ref_spec.rb
|
133
136
|
- spec/sequitur/production_spec.rb
|
134
137
|
- spec/sequitur/sequitur_grammar_spec.rb
|