sequitur 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.travis.yml +5 -0
- data/CHANGELOG.md +5 -0
- data/README.md +3 -3
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +10 -8
- data/lib/sequitur/dynamic_grammar.rb +6 -45
- data/lib/sequitur/production.rb +113 -85
- data/lib/sequitur/production_ref.rb +89 -0
- data/lib/sequitur/sequitur_grammar.rb +43 -54
- data/spec/sequitur/digram_spec.rb +19 -4
- data/spec/sequitur/production_ref_spec.rb +95 -0
- data/spec/sequitur/production_spec.rb +77 -37
- data/spec/sequitur/sequitur_grammar_spec.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZjQ5ZDcxYTMzZGJlOWUzOTg1YmYxMThiMTAxYjk5YjZmZjkyY2FjMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZGIyODgxMjNhMmRiNGUyMjZlMTMzYjQwOGRjMjc3YzYyMTYzYjNmZQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OWFmNTRlN2NkZjRhNjVlOTU0MTlmZjZjNjllMDZjY2M4NWNiNWQ3NzQ0ZGMz
|
10
|
+
MjBkOWQzNjJjN2JiODExNDc2OTFmNjIyMGEyY2VkMDdiNDQyZjdiZTFjNTgw
|
11
|
+
NGE5NmVlZTEwMTkzNjU4ZGI2MjA5MGY3YTVhMjM2ZDcyZjhlMzk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjUzNWVlNTQ1ODI0NDkyMGUxOWY4NDIwYWIzNmJjNTEzOTgzZDE3YmRmMTE3
|
14
|
+
NjM1Mzc0Mzk5YmQ1MDdhMzFlZDc2YzVkYjc2MmY4ZWEwZWY0YjY1ZTdlYmFi
|
15
|
+
ZGJjODhmYzBhNGU2Y2IxZGZlODZlODNhNTg2NzU1YTgwNmQ5OTk=
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.0.12 / 2014-08-24
|
2
|
+
* [CHANGE] Significant internal refactoring.
|
3
|
+
* [CHANGE] Method `ObjectSpace::id2ref` is no more used => one obstacle to JRuby porting is removed.
|
4
|
+
* [NEW] Added new class `ProductionReference`
|
5
|
+
|
1
6
|
### 0.0.11 / 2014-08-24
|
2
7
|
* [FIX] `SequiturGrammar#check_unicity`: an exception was raised when it shouldn't. Added example in spec file.
|
3
8
|
* [CHANGE] `sequitur.rb` : Added the convenience Sequitur::build_from method.
|
data/README.md
CHANGED
@@ -20,8 +20,8 @@ It detects repeated token patterns and can represent them in a compact way.
|
|
20
20
|
```ruby
|
21
21
|
|
22
22
|
require 'sequitur' # Load the Sequitur library
|
23
|
-
|
24
|
-
input_sequence = 'abcabdab'
|
23
|
+
|
24
|
+
input_sequence = 'abcabdab' # Let's analyze this string
|
25
25
|
|
26
26
|
# The SEQUITUR algorithm will detect the repeated 'ab' pattern
|
27
27
|
# and will generate a context-free grammar that represents the input string
|
@@ -29,7 +29,7 @@ It detects repeated token patterns and can represent them in a compact way.
|
|
29
29
|
|
30
30
|
# Display the grammar rules
|
31
31
|
# Each rule is displayed with the format:
|
32
|
-
# rule_id :
|
32
|
+
# rule_id : a_sequence_of_grammar_symbols
|
33
33
|
# Where:
|
34
34
|
# - rule_id is the object id of a rule (in decimal)
|
35
35
|
# - a grammar symbol is either a terminal symbol
|
data/lib/sequitur/constants.rb
CHANGED
data/lib/sequitur/digram.rb
CHANGED
@@ -10,12 +10,12 @@ class Digram
|
|
10
10
|
# The sequence of two consecutive grammar symbols.
|
11
11
|
attr_reader(:symbols)
|
12
12
|
|
13
|
-
# The object id of the production that contains this digram in its rhs.
|
14
|
-
attr_reader(:production_id)
|
15
|
-
|
16
13
|
# An unique Hash key of the digram
|
17
14
|
attr_reader(:key)
|
18
15
|
|
16
|
+
# The production in which the digram occurs
|
17
|
+
attr_reader(:production)
|
18
|
+
|
19
19
|
# Constructor.
|
20
20
|
# @param symbol1 [StringOrSymbol] First element of the digram
|
21
21
|
# @param symbol2 [StringOrSymbol] Second element of the digram
|
@@ -24,13 +24,15 @@ class Digram
|
|
24
24
|
def initialize(symbol1, symbol2, aProduction)
|
25
25
|
@symbols = [symbol1, symbol2]
|
26
26
|
@key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
|
27
|
-
@
|
27
|
+
@production = aProduction
|
28
28
|
end
|
29
|
-
|
30
|
-
#
|
31
|
-
|
32
|
-
|
29
|
+
|
30
|
+
# Equality testing.
|
31
|
+
# Returns true when keys of both digrams are equal
|
32
|
+
def ==(other)
|
33
|
+
return key == other.key
|
33
34
|
end
|
35
|
+
|
34
36
|
end # class
|
35
37
|
|
36
38
|
end # module
|
@@ -47,11 +47,9 @@ class DynamicGrammar
|
|
47
47
|
puts to_string if trace
|
48
48
|
prod = productions.delete_at(anIndex)
|
49
49
|
# TODO: remove output
|
50
|
-
puts prod.to_string if trace
|
50
|
+
puts('Removed: ' + prod.to_string) if trace
|
51
51
|
prod.clear_rhs
|
52
52
|
|
53
|
-
check_backrefs # TODO: configurable check
|
54
|
-
|
55
53
|
return prod
|
56
54
|
end
|
57
55
|
|
@@ -68,57 +66,20 @@ class DynamicGrammar
|
|
68
66
|
end
|
69
67
|
|
70
68
|
|
71
|
-
# Check that
|
69
|
+
# Check that every production reference in rhs is
|
72
70
|
# pointing to a production of the grammar
|
73
71
|
def check_rhs_of(aProduction)
|
74
72
|
aProduction.references.each do |symb|
|
75
|
-
|
73
|
+
referenced_prod = symb.production
|
74
|
+
next if productions.include?(referenced_prod)
|
76
75
|
|
77
|
-
msg = "Production #{aProduction.object_id} refers to
|
78
|
-
msg << "production #{
|
76
|
+
msg = "Production #{aProduction.object_id} refers to"
|
77
|
+
msg << " production #{referenced_prod.object_id}"
|
79
78
|
msg << ' that is not part of the grammar.'
|
80
79
|
fail StandardError, msg
|
81
80
|
end
|
82
81
|
end
|
83
82
|
|
84
|
-
# Check the invariants:
|
85
|
-
# Every back reference must must point to a production of the grammar
|
86
|
-
# Every back reference count must be equal to the number
|
87
|
-
# of occurrences in the referencing production.
|
88
|
-
def check_backrefs()
|
89
|
-
return if productions.size < 2
|
90
|
-
|
91
|
-
all_but_root = productions[1...productions.size]
|
92
|
-
all_but_root.each do |a_prod|
|
93
|
-
a_prod.backrefs.each do |other_prod_id, count|
|
94
|
-
begin
|
95
|
-
other_prod = ObjectSpace._id2ref(other_prod_id)
|
96
|
-
rescue RangeError => exc
|
97
|
-
msg = "Production #{a_prod.object_id} has a backref to "
|
98
|
-
msg << "recycled production #{other_prod_id}."
|
99
|
-
msg << "\n#{to_string}"
|
100
|
-
$stderr.puts msg
|
101
|
-
raise exc
|
102
|
-
end
|
103
|
-
found = productions.find { |elem| elem == other_prod }
|
104
|
-
unless found
|
105
|
-
msg = "Production #{a_prod.object_id} is referenced by the "
|
106
|
-
msg << "unknown production (#{other_prod_id})."
|
107
|
-
msg << "\n#{to_string}"
|
108
|
-
fail StandardError, msg
|
109
|
-
end
|
110
|
-
|
111
|
-
unless count == found.rhs.count { |symb| symb == a_prod }
|
112
|
-
msg = "Production #{a_prod.object_id} has a count mismatch"
|
113
|
-
msg << "\nIt expects #{count} references in rhs of #{other_prod_id} "
|
114
|
-
msg << "but actual count is #{other_prod.rhs.count}."
|
115
|
-
msg << "\n#{to_string}"
|
116
|
-
fail StandardError, msg
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
83
|
end # class
|
123
84
|
|
124
85
|
end # module
|
data/lib/sequitur/production.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative 'digram'
|
2
|
+
require_relative 'production_ref'
|
2
3
|
|
3
4
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
5
|
|
@@ -14,33 +15,76 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
14
15
|
class Production
|
15
16
|
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
16
17
|
attr_reader(:rhs)
|
18
|
+
|
19
|
+
# The reference count (= how times other productions reference this one)
|
20
|
+
attr_reader(:refcount)
|
17
21
|
|
18
|
-
#
|
19
|
-
|
20
|
-
# Where the reference count is the number of times this production
|
21
|
-
# appears in the rhs of the production with given id.
|
22
|
-
attr_reader(:backrefs)
|
22
|
+
# The sequence of digrams appearing in the RHS
|
23
|
+
attr_reader(:digrams)
|
23
24
|
|
24
25
|
# Constructor. Build a production with an empty RHS.
|
25
26
|
def initialize()
|
26
27
|
clear_rhs
|
27
|
-
@
|
28
|
+
@refcount = 0
|
29
|
+
@digrams = []
|
28
30
|
end
|
29
31
|
|
30
32
|
public
|
31
33
|
|
34
|
+
def ==(other)
|
35
|
+
return true if object_id == other.object_id
|
36
|
+
|
37
|
+
if other.is_a?(ProductionRef)
|
38
|
+
result = (other == self)
|
39
|
+
else
|
40
|
+
result = false
|
41
|
+
end
|
42
|
+
|
43
|
+
return result
|
44
|
+
end
|
45
|
+
|
46
|
+
|
32
47
|
# Is the rhs empty?
|
33
48
|
def empty?
|
34
49
|
return rhs.empty?
|
35
50
|
end
|
36
51
|
|
52
|
+
def incr_refcount()
|
53
|
+
@refcount += 1
|
54
|
+
end
|
55
|
+
|
56
|
+
def decr_refcount()
|
57
|
+
fail StandardError if @refcount == 0
|
58
|
+
@refcount -= 1
|
59
|
+
end
|
60
|
+
|
37
61
|
|
38
62
|
# Return the set of productions appearing in the rhs.
|
39
63
|
def references()
|
40
|
-
return rhs.select { |symb| symb.
|
64
|
+
return rhs.select { |symb| symb.is_a?(ProductionRef) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Return the set of references to a given production
|
68
|
+
def references_of(aProduction)
|
69
|
+
refs = references
|
70
|
+
return refs.select { |a_ref| a_ref == aProduction }
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
# Return the list digrams found in rhs of this production.
|
77
|
+
def recalc_digrams()
|
78
|
+
return [] if rhs.size < 2
|
79
|
+
|
80
|
+
result = []
|
81
|
+
rhs.each_cons(2) { |couple| result << Digram.new(*couple, self) }
|
82
|
+
|
83
|
+
@digrams = result
|
41
84
|
end
|
42
85
|
|
43
86
|
|
87
|
+
|
44
88
|
# Does the rhs have exactly one digram only (= 2 symbols)?
|
45
89
|
def single_digram?
|
46
90
|
return rhs.size == 2
|
@@ -59,53 +103,14 @@ class Production
|
|
59
103
|
same_key_found = all_keys.index(last_key)
|
60
104
|
return !same_key_found.nil?
|
61
105
|
end
|
62
|
-
|
106
|
+
|
63
107
|
# Return the last digram appearing in the RHS.
|
64
108
|
def last_digram()
|
65
|
-
|
66
|
-
|
67
|
-
return Digram.new(rhs[-2], rhs[-1], self)
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
# The back reference count is the number of times this production
|
73
|
-
# appears in the rhs of all the productions of the grammar
|
74
|
-
def refcount()
|
75
|
-
total = backrefs.values.reduce(0) do |sub_result, count|
|
76
|
-
sub_result += count
|
77
|
-
end
|
78
|
-
|
79
|
-
return total
|
80
|
-
end
|
81
|
-
|
82
|
-
# Add a back reference to the given production.
|
83
|
-
# @param aProduction [Production] Assume that production P appears in the
|
84
|
-
# RHS of production Q, then a reference count of P is incremented in Q.
|
85
|
-
def add_backref(aProduction)
|
86
|
-
prod_id = aProduction.object_id
|
87
|
-
|
88
|
-
count = backrefs.fetch(prod_id, 0)
|
89
|
-
backrefs[prod_id] = count + 1
|
90
|
-
return count
|
109
|
+
result = digrams.empty? ? nil : digrams.last
|
110
|
+
return result
|
91
111
|
end
|
92
112
|
|
93
|
-
# Decrement the reference count for the given production.
|
94
|
-
# If result is zero, then the entry is removed from the Hash.
|
95
|
-
def remove_backref(aProduction)
|
96
|
-
prod_id = aProduction.object_id
|
97
|
-
|
98
|
-
count = backrefs.fetch(prod_id)
|
99
|
-
fail StandardError if count < 1
|
100
|
-
|
101
|
-
if count > 1
|
102
|
-
backrefs[prod_id] = count - 1
|
103
|
-
else
|
104
|
-
backrefs.delete(prod_id)
|
105
|
-
end
|
106
113
|
|
107
|
-
return count
|
108
|
-
end
|
109
114
|
|
110
115
|
# Emit a text representation of the production rule.
|
111
116
|
# Text is of the form:
|
@@ -122,17 +127,24 @@ class Production
|
|
122
127
|
return "#{object_id} : #{rhs_text.join(' ')}."
|
123
128
|
end
|
124
129
|
|
125
|
-
#
|
126
|
-
# the given symbol is appended at the end of the rhs
|
127
|
-
def calc_append_symbol(aSymbol)
|
128
|
-
return [] if empty?
|
129
|
-
|
130
|
-
return digrams + [ Digram.new(rhs.last, aSymbol, self) ]
|
131
|
-
end
|
132
|
-
|
130
|
+
# Add a (grammar) symbol at the end of the RHS.
|
133
131
|
def append_symbol(aSymbol)
|
134
|
-
|
135
|
-
|
132
|
+
case aSymbol
|
133
|
+
when Production
|
134
|
+
new_symb = ProductionRef.new(aSymbol)
|
135
|
+
when ProductionRef
|
136
|
+
if aSymbol.unbound?
|
137
|
+
msg = 'Fail to append reference to nil production in '
|
138
|
+
msg << to_string
|
139
|
+
fail StandardError, msg
|
140
|
+
end
|
141
|
+
new_symb = aSymbol.dup
|
142
|
+
else
|
143
|
+
new_symb = aSymbol
|
144
|
+
end
|
145
|
+
|
146
|
+
rhs << new_symb
|
147
|
+
digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
|
136
148
|
end
|
137
149
|
|
138
150
|
# Clear the right-hand side.
|
@@ -140,61 +152,77 @@ class Production
|
|
140
152
|
def clear_rhs()
|
141
153
|
if rhs
|
142
154
|
refs = references
|
143
|
-
refs.each { |a_ref| a_ref.
|
155
|
+
refs.each { |a_ref| a_ref.unbind }
|
144
156
|
end
|
145
157
|
@rhs = []
|
146
158
|
end
|
147
159
|
|
148
|
-
#
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
160
|
+
# Find all the positions where the digram occurs in the rhs
|
161
|
+
# Synopsis:
|
162
|
+
# Given the production p -> a b c a b a b d
|
163
|
+
# Then p.positions_of(a, b) should returns [0, 3, 5]
|
164
|
+
# Caution: "overlapping" digrams shouldn't be counted
|
165
|
+
# Given the production p -> a a b a a a c d
|
166
|
+
# Then p.positions_of(a, a) should returns [0, 3]
|
167
|
+
def positions_of(symb1, symb2)
|
157
168
|
|
158
|
-
# Substitute in self all occurence of the digram that
|
159
|
-
# appears in the rhs of the other production
|
160
|
-
# Pre-condition:
|
161
|
-
# another has a rhs with exactly one digram (= a two-symbol sequence).
|
162
|
-
def replace_digram(another)
|
163
169
|
# Find the positions where the digram occur in rhs
|
164
|
-
(symb1, symb2) = another.rhs
|
165
170
|
indices = [ -2 ] # Dummy index!
|
166
|
-
|
167
171
|
(0...rhs.size).each do |i|
|
168
172
|
next if i == indices.last + 1
|
169
173
|
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
170
174
|
end
|
175
|
+
|
171
176
|
indices.shift
|
172
177
|
|
173
|
-
|
178
|
+
return indices
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
# Substitute in self all occurrences of the digram that
|
183
|
+
# appears in the rhs of the other production
|
184
|
+
# Pre-condition:
|
185
|
+
# another has a rhs with exactly one digram (= a two-symbol sequence).
|
186
|
+
def replace_digram(another)
|
187
|
+
(symb1, symb2) = another.rhs
|
188
|
+
pos = positions_of(symb1, symb2).reverse
|
174
189
|
|
175
190
|
# Replace the two symbol sequence by the production
|
176
191
|
pos.each do |index|
|
177
|
-
|
178
|
-
|
192
|
+
if rhs[index].is_a?(ProductionRef)
|
193
|
+
rhs[index].bind_to(another)
|
194
|
+
else
|
195
|
+
rhs[index] = ProductionRef.new(another)
|
196
|
+
end
|
179
197
|
index1 = index + 1
|
180
|
-
rhs[index1].
|
198
|
+
rhs[index1].unbind if rhs[index1].is_a?(ProductionRef)
|
181
199
|
rhs.delete_at(index1)
|
182
|
-
another.add_backref(self)
|
183
200
|
end
|
201
|
+
|
202
|
+
recalc_digrams
|
184
203
|
end
|
185
204
|
|
186
205
|
# Replace every occurrence of 'another' production in rhs by
|
187
206
|
# the rhs of 'another'.
|
207
|
+
# Given the production p_A -> a p_B b p_B c
|
208
|
+
# And the production p_B -> x y
|
209
|
+
# Then the call p_A.replace_production(p_B)
|
210
|
+
# Modifies p_A as into:
|
211
|
+
# p_A -> a x y b x y c
|
188
212
|
def replace_production(another)
|
189
213
|
(0...rhs.size).to_a.reverse.each do |index|
|
190
214
|
next unless rhs[index] == another
|
191
|
-
|
192
|
-
|
193
|
-
|
215
|
+
|
216
|
+
# Avoid the aliasing of production reference
|
217
|
+
other_rhs = another.rhs.map do |symb|
|
218
|
+
symb.is_a?(ProductionRef) ? symb.dup : symb
|
194
219
|
end
|
195
|
-
|
220
|
+
rhs.insert(index + 1, *other_rhs)
|
221
|
+
another.decr_refcount
|
196
222
|
rhs.delete_at(index)
|
197
223
|
end
|
224
|
+
|
225
|
+
recalc_digrams
|
198
226
|
end
|
199
227
|
|
200
228
|
end # class
|
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
+
|
5
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
6
|
+
# side of a production P1 and that refers to a production P2.
|
7
|
+
# Every time a production P2 appears in the left-hand side of
|
8
|
+
# production P1, this is implemented by inserting a production reference to P2
|
9
|
+
# in the appropriate position in the RHS of P1.
|
10
|
+
# In the literature, production references are also called non terminal
|
11
|
+
# symbols
|
12
|
+
class ProductionRef
|
13
|
+
|
14
|
+
# Link to the production to reference
|
15
|
+
attr_reader(:production)
|
16
|
+
|
17
|
+
# Constructor
|
18
|
+
# [target] The production that is being referenced.
|
19
|
+
def initialize(target)
|
20
|
+
bind_to(target)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Copy constructor invoked by dup or clone methods
|
24
|
+
def initialize_copy(orig)
|
25
|
+
@production = nil
|
26
|
+
bind_to(orig.production)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Return the text representation of a production reference.
|
30
|
+
def to_s()
|
31
|
+
return "#{production.object_id}"
|
32
|
+
end
|
33
|
+
|
34
|
+
alias_method :to_string, :to_s
|
35
|
+
|
36
|
+
|
37
|
+
# Equality testing.
|
38
|
+
# A production ref is equal to another one when its
|
39
|
+
# refers to the same production or when it is compared to
|
40
|
+
# the production it refers to.
|
41
|
+
def ==(other)
|
42
|
+
return true if object_id == other.object_id
|
43
|
+
|
44
|
+
if other.is_a?(ProductionRef)
|
45
|
+
result = (production == other.production)
|
46
|
+
else
|
47
|
+
result = (production == other)
|
48
|
+
end
|
49
|
+
|
50
|
+
return result
|
51
|
+
end
|
52
|
+
|
53
|
+
# Generates a Fixnum value as hash value.
|
54
|
+
# As a reference has no identity on its own,
|
55
|
+
# the method returns the hash value of the
|
56
|
+
# referenced production
|
57
|
+
def hash()
|
58
|
+
fail StandardError, 'Nil production' if production.nil?
|
59
|
+
return production.hash
|
60
|
+
end
|
61
|
+
|
62
|
+
# Make this reference points to the given production
|
63
|
+
def bind_to(aProduction)
|
64
|
+
return if aProduction == @production
|
65
|
+
|
66
|
+
production.decr_refcount if production
|
67
|
+
unless aProduction.kind_of?(Production)
|
68
|
+
fail StandardError, "Illegal production type #{aProduction.class}"
|
69
|
+
end
|
70
|
+
@production = aProduction
|
71
|
+
production.incr_refcount
|
72
|
+
end
|
73
|
+
|
74
|
+
# Clear the reference to the target production
|
75
|
+
def unbind()
|
76
|
+
production.decr_refcount
|
77
|
+
@production = nil
|
78
|
+
end
|
79
|
+
|
80
|
+
# Check that the this object doesn't refer to any production.
|
81
|
+
def unbound?()
|
82
|
+
return production.nil?
|
83
|
+
end
|
84
|
+
|
85
|
+
end # class
|
86
|
+
|
87
|
+
end # module
|
88
|
+
|
89
|
+
# End of file
|
@@ -13,7 +13,7 @@ class SequiturGrammar < DynamicGrammar
|
|
13
13
|
def initialize(anEnum)
|
14
14
|
super()
|
15
15
|
# Make start production compliant with utility rule
|
16
|
-
2.times { root.
|
16
|
+
2.times { root.incr_refcount }
|
17
17
|
|
18
18
|
@digrams = {}
|
19
19
|
@parsed = []
|
@@ -34,14 +34,15 @@ class SequiturGrammar < DynamicGrammar
|
|
34
34
|
all_digrams = {}
|
35
35
|
productions.each do |a_prod|
|
36
36
|
prod_digrams = a_prod.digrams
|
37
|
-
prod_digrams.
|
37
|
+
prod_digrams.each_with_index do |a_digram, index|
|
38
|
+
next if index && a_digram == a_prod.digrams[index - 1]
|
38
39
|
if all_digrams.include? a_digram.key
|
39
40
|
msg = "Digram #{a_digram.symbols} occurs twice!"
|
40
41
|
colliding = all_digrams[a_digram.key]
|
41
|
-
msg << "\nOnce in production #{colliding.
|
42
|
+
msg << "\nOnce in production #{colliding.production.object_id}"
|
42
43
|
msg << "\nSecond in production #{a_prod.object_id}"
|
43
44
|
msg << "\n#{to_string}"
|
44
|
-
fail StandardError, msg unless colliding
|
45
|
+
fail StandardError, msg unless colliding == a_prod
|
45
46
|
else
|
46
47
|
all_digrams[a_digram.key] = a_digram
|
47
48
|
end
|
@@ -55,8 +56,8 @@ class SequiturGrammar < DynamicGrammar
|
|
55
56
|
# Assumption: last digram of production isn't yet registered.
|
56
57
|
def add_production(aProduction)
|
57
58
|
super # Call original method from superclass...
|
58
|
-
|
59
|
-
# ... then add this behaviour
|
59
|
+
|
60
|
+
# ... then add this behaviour
|
60
61
|
last_digram = aProduction.last_digram
|
61
62
|
digrams[last_digram.key] = last_digram
|
62
63
|
end
|
@@ -67,7 +68,7 @@ class SequiturGrammar < DynamicGrammar
|
|
67
68
|
|
68
69
|
# Retrieve in the Hash all registered digrams from the removed production
|
69
70
|
digrams_subset = digrams.select do |_, digr|
|
70
|
-
digr.
|
71
|
+
digr.production == prod
|
71
72
|
end
|
72
73
|
|
73
74
|
# Remove them...
|
@@ -76,10 +77,10 @@ class SequiturGrammar < DynamicGrammar
|
|
76
77
|
end
|
77
78
|
|
78
79
|
def append_symbol_to(aProduction, aSymbol)
|
79
|
-
prod_digrams = aProduction.calc_append_symbol(aSymbol)
|
80
80
|
check_digrams # TODO: remove this
|
81
|
-
check_backrefs # TODO: remove this
|
82
81
|
super
|
82
|
+
|
83
|
+
prod_digrams = aProduction.digrams
|
83
84
|
unless prod_digrams.empty?
|
84
85
|
last_digram = prod_digrams.last
|
85
86
|
matching_digram = digrams[last_digram.key]
|
@@ -105,32 +106,19 @@ class SequiturGrammar < DynamicGrammar
|
|
105
106
|
def preserve_unicity(aProduction)
|
106
107
|
last_digram = aProduction.last_digram
|
107
108
|
matching_digram = digrams[last_digram.key]
|
108
|
-
if
|
109
|
+
if aProduction == matching_digram.production
|
109
110
|
# Rule: no other production distinct from aProduction should have
|
110
111
|
# the matching digram
|
111
112
|
productions.each do |prod|
|
112
113
|
its_digrams = prod.digrams
|
113
114
|
its_keys = its_digrams.map(&:key)
|
114
|
-
if prod
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
# msg << "\n#{self.to_string}"
|
122
|
-
# fail StandardError, msg
|
123
|
-
# end
|
124
|
-
|
125
|
-
else
|
126
|
-
if its_keys.include? last_digram.key
|
127
|
-
msg = "Digram #{last_digram.symbols} occurs three times!"
|
128
|
-
msg << "\nTwice in production #{aProduction.object_id}"
|
129
|
-
msg << "\nThird in production #{prod.object_id}"
|
130
|
-
msg << "\n#{to_string}"
|
131
|
-
fail StandardError, msg
|
132
|
-
end
|
133
|
-
end
|
115
|
+
next if prod == last_digram.production
|
116
|
+
next unless its_keys.include? last_digram.key
|
117
|
+
msg = "Digram #{last_digram.symbols} occurs three times!"
|
118
|
+
msg << "\nTwice in production #{aProduction.object_id}"
|
119
|
+
msg << "\nThird in production #{prod.object_id}"
|
120
|
+
msg << "\n#{to_string}"
|
121
|
+
fail StandardError, msg
|
134
122
|
end
|
135
123
|
|
136
124
|
# Digram appears twice in given production...
|
@@ -148,17 +136,18 @@ class SequiturGrammar < DynamicGrammar
|
|
148
136
|
else
|
149
137
|
# Duplicate digram used in distinct production
|
150
138
|
# Two cases: other production is a single digram one or a multi-digram
|
151
|
-
other_prod =
|
139
|
+
other_prod = matching_digram.production
|
152
140
|
if other_prod.single_digram?
|
153
141
|
# ... replace duplicate digram by reference to other production
|
154
142
|
aProduction.replace_digram(other_prod)
|
155
143
|
update_digrams_from(aProduction)
|
156
144
|
|
157
|
-
# Special case a: replacement causes another digram duplication
|
145
|
+
# Special case a: replacement causes another digram duplication
|
158
146
|
# in the given production
|
159
|
-
# Special case b: replacement causes another digram duplication
|
147
|
+
# Special case b: replacement causes another digram duplication
|
160
148
|
# with other production
|
161
|
-
if aProduction.repeated_digram? ||
|
149
|
+
if aProduction.repeated_digram? ||
|
150
|
+
(digrams[aProduction.last_digram.key].production != aProduction)
|
162
151
|
preserve_unicity(aProduction)
|
163
152
|
end
|
164
153
|
|
@@ -178,7 +167,6 @@ class SequiturGrammar < DynamicGrammar
|
|
178
167
|
|
179
168
|
# TODO: Check when aProduction and other_prod have same preceding symbol
|
180
169
|
update_digrams_from(other_prod)
|
181
|
-
check_backrefs # TODO: remove this
|
182
170
|
end
|
183
171
|
check_unicity
|
184
172
|
end
|
@@ -205,16 +193,17 @@ class SequiturGrammar < DynamicGrammar
|
|
205
193
|
loop do
|
206
194
|
all_refcount_ok = true
|
207
195
|
(1...productions.size).to_a.reverse.each do |index|
|
208
|
-
|
196
|
+
curr_production = productions[index]
|
197
|
+
next unless curr_production.refcount == 1
|
209
198
|
|
210
199
|
all_refcount_ok = false
|
211
|
-
|
212
|
-
|
200
|
+
dependent = productions.find do |a_prod|
|
201
|
+
!a_prod.references_of(curr_production).empty?
|
202
|
+
end
|
213
203
|
dependent.replace_production(productions[index])
|
214
204
|
delete_production(index)
|
215
205
|
update_digrams_from(dependent)
|
216
206
|
check_references
|
217
|
-
check_backrefs
|
218
207
|
end
|
219
208
|
|
220
209
|
break if all_refcount_ok
|
@@ -226,14 +215,14 @@ class SequiturGrammar < DynamicGrammar
|
|
226
215
|
def update_digrams_from(aProduction)
|
227
216
|
current_digrams = aProduction.digrams
|
228
217
|
|
229
|
-
# Add new digrams
|
218
|
+
# Add new digrams only if they don't collide
|
230
219
|
current_digrams.each do |digr|
|
231
220
|
digrams[digr.key] = digr unless digrams.include? digr.key
|
232
221
|
end
|
233
222
|
|
234
223
|
# Retrieve all registered digrams from the production
|
235
224
|
digrams_subset = digrams.select do |_, digr|
|
236
|
-
digr.
|
225
|
+
digr.production == aProduction
|
237
226
|
end
|
238
227
|
|
239
228
|
# Remove obsolete digrams
|
@@ -244,16 +233,18 @@ class SequiturGrammar < DynamicGrammar
|
|
244
233
|
end
|
245
234
|
|
246
235
|
# Check the invariant:
|
247
|
-
# Every
|
236
|
+
# Every reference in a rhs that is bound must point
|
248
237
|
# to a production of the grammar.
|
249
238
|
def check_references()
|
250
239
|
productions.each do |a_prod|
|
251
240
|
rhs_prods = a_prod.references
|
252
|
-
rhs_prods.each do |
|
241
|
+
rhs_prods.each do |a_reference|
|
242
|
+
next if a_reference.unbound?
|
243
|
+
referenced_prod = a_reference.production
|
253
244
|
next if productions.include? referenced_prod
|
254
245
|
|
255
|
-
msg = "Production #{a_prod.object_id}
|
256
|
-
msg << "unknown production #{referenced_prod.object_id}"
|
246
|
+
msg = "Production #{a_prod.object_id} #{a_prod.to_string}"
|
247
|
+
msg << " references the unknown production #{referenced_prod.object_id}"
|
257
248
|
msg << "\nOrphan production: #{referenced_prod.to_string}"
|
258
249
|
msg << "\n#{to_string}"
|
259
250
|
fail StandardError, msg
|
@@ -265,13 +256,11 @@ class SequiturGrammar < DynamicGrammar
|
|
265
256
|
# Every registered digram must reference a production from the grammar
|
266
257
|
def check_registered()
|
267
258
|
digrams.each do |_key, digr|
|
268
|
-
found = productions.find
|
269
|
-
digr.production_id == a_prod.object_id
|
270
|
-
end
|
259
|
+
found = productions.find { |a_prod| digr.production == a_prod }
|
271
260
|
next if found
|
272
261
|
|
273
262
|
msg = "Digram #{digr.symbols} references the unknown "
|
274
|
-
msg << "production (#{digr.
|
263
|
+
msg << "production (#{digr.production.object_id})."
|
275
264
|
msg << "\n#{to_string}"
|
276
265
|
fail StandardError, msg
|
277
266
|
end
|
@@ -283,7 +272,7 @@ class SequiturGrammar < DynamicGrammar
|
|
283
272
|
# Control that every registered digram refers
|
284
273
|
# to a production that really has that digram
|
285
274
|
digrams.each do |key, digr|
|
286
|
-
its_prod =
|
275
|
+
its_prod = digr.production
|
287
276
|
prod_digrams = its_prod.digrams
|
288
277
|
prod_keys = prod_digrams.map(&:key)
|
289
278
|
next if prod_keys.include? key
|
@@ -307,15 +296,15 @@ class SequiturGrammar < DynamicGrammar
|
|
307
296
|
all_digrams.each do |key, digr|
|
308
297
|
registered = digrams[key]
|
309
298
|
if registered
|
310
|
-
if registered
|
311
|
-
msg = "Production #{digr.
|
299
|
+
if registered != digr
|
300
|
+
msg = "Production #{digr.production.object_id} has "
|
312
301
|
msg << "the digram #{digr.symbols} that collides"
|
313
|
-
msg << "\n with same digram from #{registered.
|
302
|
+
msg << "\n with same digram from #{registered.production.object_id}"
|
314
303
|
msg << "\n#{to_string}"
|
315
304
|
fail StandardError, msg
|
316
305
|
end
|
317
306
|
else
|
318
|
-
its_prod =
|
307
|
+
its_prod = digr.production
|
319
308
|
msg = "Production #{its_prod.object_id} (#{its_prod.rhs}) "
|
320
309
|
msg << "has the digram #{digr.symbols} that isn't registered."
|
321
310
|
msg << "\n#{to_string}"
|
@@ -7,24 +7,39 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
7
7
|
|
8
8
|
describe Digram do
|
9
9
|
let(:two_symbols) { [:b, :c] }
|
10
|
+
let(:production) { double('sample-production') }
|
10
11
|
|
11
12
|
context 'Standard creation & initialization:' do
|
12
13
|
|
13
14
|
it 'should be created with 3 arguments' do
|
14
|
-
production = double('sample-production')
|
15
15
|
instance = Digram.new(:b, :c, production)
|
16
16
|
|
17
17
|
expect(instance.symbols).to eq(two_symbols)
|
18
|
-
expect(instance.
|
18
|
+
expect(instance.production).to eq(production)
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'should return the production that it refers to' do
|
22
|
-
production = double('sample-production')
|
23
22
|
instance = Digram.new(:b, :c, production)
|
24
23
|
expect(instance.production).to eq(production)
|
25
24
|
end
|
26
25
|
|
27
|
-
end # context
|
26
|
+
end # context
|
27
|
+
|
28
|
+
context 'Standard creation & initialization:' do
|
29
|
+
|
30
|
+
it 'should compare itself to another digram' do
|
31
|
+
instance1 = Digram.new(:a, :b, production)
|
32
|
+
same = Digram.new(:a, :b, production)
|
33
|
+
different = Digram.new(:b, :c, production)
|
34
|
+
|
35
|
+
expect(instance1).to eq(instance1)
|
36
|
+
expect(instance1).to eq(same)
|
37
|
+
expect(instance1).not_to eq(different)
|
38
|
+
expect(same).not_to eq(different)
|
39
|
+
end
|
40
|
+
|
41
|
+
end # context
|
42
|
+
|
28
43
|
|
29
44
|
end # describe
|
30
45
|
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
|
3
|
+
# Load the class under test
|
4
|
+
require_relative '../../lib/sequitur/production'
|
5
|
+
require_relative '../../lib/sequitur/production_ref'
|
6
|
+
|
7
|
+
module Sequitur # Re-open the module to get rid of qualified names
|
8
|
+
|
9
|
+
describe ProductionRef do
|
10
|
+
|
11
|
+
let(:target) { Production.new }
|
12
|
+
let(:another_target) { Production.new }
|
13
|
+
|
14
|
+
subject { ProductionRef.new(target) }
|
15
|
+
|
16
|
+
context 'Creation & initialization:' do
|
17
|
+
|
18
|
+
it 'should be created with a production argument' do
|
19
|
+
expect { ProductionRef.new(target) }.not_to raise_error
|
20
|
+
expect(target.refcount).to eq(1)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should clone with reference count incrementing' do
|
24
|
+
expect(target.refcount).to eq(0)
|
25
|
+
expect(subject.production.refcount).to eq(1)
|
26
|
+
klone = subject.clone
|
27
|
+
expect(klone.production.refcount).to eq(2)
|
28
|
+
duplicate = subject.dup
|
29
|
+
expect(duplicate.production.refcount).to eq(3)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should know its referenced production' do
|
33
|
+
instance = ProductionRef.new(target)
|
34
|
+
expect(instance.production).to eq(target)
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
end # context
|
39
|
+
|
40
|
+
context 'Provided services:' do
|
41
|
+
|
42
|
+
it 'should render its referenced production' do
|
43
|
+
expect(subject.to_s).to eq(target.object_id.to_s)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should unbind itself from its production' do
|
47
|
+
expect(target.refcount).to eq(0)
|
48
|
+
expect(subject).not_to be_unbound
|
49
|
+
expect(target.refcount).to eq(1)
|
50
|
+
subject.unbind
|
51
|
+
expect(target.refcount).to eq(0)
|
52
|
+
expect(subject.production).to be_nil
|
53
|
+
expect(subject).to be_unbound
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should bind to a production' do
|
57
|
+
expect(target.refcount).to eq(0)
|
58
|
+
|
59
|
+
expect(subject).not_to be_unbound
|
60
|
+
expect(target.refcount).to eq(1)
|
61
|
+
|
62
|
+
# Case: bind again to same production
|
63
|
+
expect { subject.bind_to(target) }.not_to raise_error
|
64
|
+
expect(target.refcount).to eq(1)
|
65
|
+
|
66
|
+
# Case: bind to another production
|
67
|
+
expect(another_target.refcount).to eq(0)
|
68
|
+
subject.bind_to(another_target)
|
69
|
+
expect(target.refcount).to eq(0)
|
70
|
+
expect(another_target.refcount).to eq(1)
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should compare to other production (reference)' do
|
74
|
+
same = ProductionRef.new(target)
|
75
|
+
expect(subject).to eq(subject) # Strict identity
|
76
|
+
expect(subject).to eq(same) # 2 references pointing to same production
|
77
|
+
expect(subject).to eq(target)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'should return the hash value of its production' do
|
81
|
+
expectation = target.hash
|
82
|
+
expect(subject.hash).to eq(expectation)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should complain when requested for a hash and unbound' do
|
86
|
+
subject.unbind
|
87
|
+
expect { subject.hash }.to raise_error(StandardError)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end # describe
|
92
|
+
|
93
|
+
end # module
|
94
|
+
|
95
|
+
# End of file
|
@@ -43,33 +43,52 @@ describe Production do
|
|
43
43
|
expect(subject.last_digram).to be_nil
|
44
44
|
end
|
45
45
|
end # context
|
46
|
-
|
46
|
+
|
47
47
|
context 'Knowing its rhs:' do
|
48
|
-
|
48
|
+
|
49
49
|
it 'should know the productions in its rhs' do
|
50
50
|
# Case 1: empty production
|
51
51
|
expect(subject.references).to be_empty
|
52
|
-
|
52
|
+
|
53
53
|
# Case 2: production without references
|
54
54
|
symbols = [:a, :b, :c]
|
55
55
|
symbols.each { |symb| subject.append_symbol(symb) }
|
56
56
|
expect(subject.references).to be_empty
|
57
|
+
expect(subject.references_of(p_a)).to be_empty
|
57
58
|
|
58
59
|
# Case 2: production with one reference
|
59
60
|
subject.append_symbol(p_a)
|
60
61
|
expect(subject.references).to eq([p_a])
|
62
|
+
expect(subject.references_of(p_a)).to eq([p_a])
|
61
63
|
|
62
64
|
# Case 3: production with repeated references
|
63
65
|
subject.append_symbol(p_a) # second time
|
64
|
-
expect(subject.references).to eq([p_a, p_a])
|
66
|
+
expect(subject.references).to eq([p_a, p_a])
|
67
|
+
expect(subject.references_of(p_a)).to eq([p_a, p_a])
|
68
|
+
|
65
69
|
|
66
70
|
# Case 4: production with multiple distinct references
|
67
71
|
subject.append_symbol(p_bc)
|
68
|
-
expect(subject.references).to eq([p_a, p_a, p_bc])
|
72
|
+
expect(subject.references).to eq([p_a, p_a, p_bc])
|
73
|
+
expect(subject.references_of(p_bc)).to eq([p_bc])
|
69
74
|
end
|
70
|
-
|
75
|
+
|
76
|
+
it 'should know the position(s) of a given digram' do
|
77
|
+
sequence1 = [:a, :b, :c, :a, :b, :a, :b, :d]
|
78
|
+
sequence1.each { |symb| subject.append_symbol(symb) }
|
79
|
+
positions = [0, 3, 5]
|
80
|
+
expect(subject.positions_of(:a, :b)).to eq(positions)
|
81
|
+
|
82
|
+
subject.clear_rhs
|
83
|
+
# Case of overlapping digrams
|
84
|
+
sequence2 = [:a, :a, :b, :a, :a, :a, :c, :d]
|
85
|
+
sequence2.each { |symb| subject.append_symbol(symb) }
|
86
|
+
positions = [0, 3]
|
87
|
+
expect(subject.positions_of(:a, :a)).to eq(positions)
|
88
|
+
end
|
89
|
+
|
71
90
|
end # context
|
72
|
-
|
91
|
+
|
73
92
|
context 'Appending a symbol:' do
|
74
93
|
|
75
94
|
it 'should append a symbol when empty' do
|
@@ -92,31 +111,48 @@ describe Production do
|
|
92
111
|
expect(subject.last_digram.symbols).to eq([:e, :f])
|
93
112
|
end
|
94
113
|
|
95
|
-
it 'should
|
114
|
+
it 'should append a production in its rhs' do
|
115
|
+
# Side-effect: refcount of production to append is incremented
|
96
116
|
expect(p_a.refcount).to be(0)
|
97
117
|
|
98
118
|
input = [p_a, :b, :c, :d, p_a, :e, :f] # p_a appears twice
|
99
119
|
input.each { |symb| subject.append_symbol(symb) }
|
100
120
|
expect(p_a.refcount).to be(2)
|
101
121
|
end
|
102
|
-
|
103
|
-
it 'should
|
104
|
-
#
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
#
|
112
|
-
|
113
|
-
|
114
|
-
expect(
|
122
|
+
|
123
|
+
it 'should append a production ref in its rhs' do
|
124
|
+
# Side-effect: refcount of production to append is incremented
|
125
|
+
ref_a = ProductionRef.new(p_a)
|
126
|
+
expect(p_a.refcount).to be(1)
|
127
|
+
|
128
|
+
input = [ref_a, :b, :c, :d, ref_a] # ref_a appears twice
|
129
|
+
input.each { |symb| subject.append_symbol(symb) }
|
130
|
+
|
131
|
+
# References in rhs should point to p_a...
|
132
|
+
# ...but should be distinct reference objects
|
133
|
+
expect(subject.rhs[0]).to eq(p_a)
|
134
|
+
expect(subject.rhs[0].object_id).not_to eq(ref_a.object_id)
|
135
|
+
expect(subject.rhs[-1]).to eq(p_a)
|
136
|
+
expect(subject.rhs[-1].object_id).not_to eq(ref_a.object_id)
|
137
|
+
|
138
|
+
# Reference count should be updated
|
139
|
+
expect(p_a.refcount).to be(3)
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'should complain when appending ref to nil production' do
|
143
|
+
# Side-effect: refcount of production to append is incremented
|
144
|
+
ref_a = ProductionRef.new(p_a)
|
145
|
+
expect(p_a.refcount).to be(1)
|
146
|
+
|
147
|
+
# Unbind the reference
|
148
|
+
ref_a.unbind
|
149
|
+
|
150
|
+
expect { subject.append_symbol(ref_a) }.to raise_error(StandardError)
|
115
151
|
end
|
116
152
|
|
117
153
|
end # context
|
118
|
-
|
119
|
-
|
154
|
+
|
155
|
+
|
120
156
|
context 'Text representation of a production rule:' do
|
121
157
|
|
122
158
|
it 'should emit minimal text when empty' do
|
@@ -128,7 +164,8 @@ describe Production do
|
|
128
164
|
instance = Production.new
|
129
165
|
symbols = [:a, :b, 'c', :d, :e, 1000, instance]
|
130
166
|
symbols.each { |symb| subject.append_symbol(symb) }
|
131
|
-
expectation = "#{subject.object_id} :
|
167
|
+
expectation = "#{subject.object_id} : "
|
168
|
+
expectation << "a b 'c' d e 1000 #{instance.object_id}."
|
132
169
|
expect(subject.to_string).to eq(expectation)
|
133
170
|
end
|
134
171
|
|
@@ -138,19 +175,19 @@ describe Production do
|
|
138
175
|
it 'should report no repetition when empty' do
|
139
176
|
expect(subject.repeated_digram?).to be_falsey
|
140
177
|
end
|
141
|
-
|
178
|
+
|
142
179
|
it 'should report no repetition when rhs has less than 3 symbols' do
|
143
180
|
subject.append_symbol(:a)
|
144
181
|
expect(subject.repeated_digram?).to be_falsey
|
145
|
-
|
182
|
+
|
146
183
|
subject.append_symbol(:a)
|
147
|
-
expect(subject.repeated_digram?).to be_falsey
|
184
|
+
expect(subject.repeated_digram?).to be_falsey
|
148
185
|
end
|
149
|
-
|
186
|
+
|
150
187
|
it 'should detect shortest repetition' do
|
151
188
|
'aaa'.each_char { |symb| subject.append_symbol(symb) }
|
152
|
-
expect(subject.repeated_digram?).to be_truthy
|
153
|
-
end
|
189
|
+
expect(subject.repeated_digram?).to be_truthy
|
190
|
+
end
|
154
191
|
|
155
192
|
it 'should detect any repetition pattern' do
|
156
193
|
# Positive cases
|
@@ -160,15 +197,15 @@ describe Production do
|
|
160
197
|
word.each_char { |symb| instance.append_symbol(symb) }
|
161
198
|
expect(instance.repeated_digram?).to be_truthy
|
162
199
|
end
|
163
|
-
|
200
|
+
|
164
201
|
# Negative cases
|
165
202
|
cases = %w(abc abb abba abcdef)
|
166
203
|
cases.each do |word|
|
167
204
|
instance = Production.new
|
168
205
|
word.each_char { |symb| instance.append_symbol(symb) }
|
169
206
|
expect(instance.repeated_digram?).to be_falsey
|
170
|
-
end
|
171
|
-
end
|
207
|
+
end
|
208
|
+
end
|
172
209
|
end # context
|
173
210
|
|
174
211
|
context 'Replacing a digram by a production:' do
|
@@ -182,12 +219,13 @@ describe Production do
|
|
182
219
|
|
183
220
|
it 'should replace two-symbol sequence' do
|
184
221
|
%w(a b c d e b c e).each { |symb| subject.append_symbol(symb) }
|
222
|
+
p_bc_before = p_bc.to_string
|
185
223
|
subject.replace_digram(p_bc)
|
186
224
|
|
187
225
|
expect(subject.rhs.size).to eq(6)
|
188
226
|
expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc, 'e'])
|
189
227
|
expect(p_bc.refcount).to eq(2)
|
190
|
-
expect(p_bc.
|
228
|
+
expect(p_bc.to_string).to eq(p_bc_before)
|
191
229
|
end
|
192
230
|
|
193
231
|
|
@@ -198,7 +236,6 @@ describe Production do
|
|
198
236
|
expect(subject.rhs.size).to eq(5)
|
199
237
|
expect(subject.rhs).to eq([p_bc, 'd', 'e', p_bc, 'e'])
|
200
238
|
expect(p_bc.refcount).to eq(2)
|
201
|
-
expect(p_bc.backrefs[subject.object_id]).to eq(2)
|
202
239
|
end
|
203
240
|
|
204
241
|
|
@@ -209,7 +246,6 @@ describe Production do
|
|
209
246
|
expect(subject.rhs.size).to eq(5)
|
210
247
|
expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc])
|
211
248
|
expect(p_bc.refcount).to eq(2)
|
212
|
-
expect(p_bc.backrefs[subject.object_id]).to eq(2)
|
213
249
|
end
|
214
250
|
|
215
251
|
it 'should replace two consecutive two-symbol sequences' do
|
@@ -219,7 +255,6 @@ describe Production do
|
|
219
255
|
expect(subject.rhs.size).to eq(4)
|
220
256
|
expect(subject.rhs).to eq(['a', p_bc, p_bc, 'd'])
|
221
257
|
expect(p_bc.refcount).to eq(2)
|
222
|
-
expect(p_bc.backrefs[subject.object_id]).to eq(2)
|
223
258
|
end
|
224
259
|
|
225
260
|
end # context
|
@@ -233,18 +268,23 @@ describe Production do
|
|
233
268
|
|
234
269
|
it 'should replace a production at the start' do
|
235
270
|
[p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
|
271
|
+
expect(p_bc.refcount).to eq(1)
|
272
|
+
|
236
273
|
subject.replace_production(p_bc)
|
237
274
|
expect(subject.rhs.size).to eq(3)
|
238
275
|
expect(subject.rhs).to eq(%w(b c d))
|
276
|
+
expect(p_bc.refcount).to eq(0)
|
239
277
|
end
|
240
278
|
|
241
279
|
|
242
280
|
it 'should replace a production at the end' do
|
243
281
|
['d', p_bc].each { |symb| subject.append_symbol(symb) }
|
282
|
+
expect(p_bc.refcount).to eq(1)
|
244
283
|
subject.replace_production(p_bc)
|
245
284
|
|
246
285
|
expect(subject.rhs.size).to eq(3)
|
247
286
|
expect(subject.rhs).to eq(%w(d b c))
|
287
|
+
expect(p_bc.refcount).to eq(0)
|
248
288
|
end
|
249
289
|
|
250
290
|
it 'should replace a production as sole symbol' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -90,9 +90,11 @@ files:
|
|
90
90
|
- lib/sequitur/digram.rb
|
91
91
|
- lib/sequitur/dynamic_grammar.rb
|
92
92
|
- lib/sequitur/production.rb
|
93
|
+
- lib/sequitur/production_ref.rb
|
93
94
|
- lib/sequitur/sequitur_grammar.rb
|
94
95
|
- spec/sequitur/digram_spec.rb
|
95
96
|
- spec/sequitur/dynamic_grammar_spec.rb
|
97
|
+
- spec/sequitur/production_ref_spec.rb
|
96
98
|
- spec/sequitur/production_spec.rb
|
97
99
|
- spec/sequitur/sequitur_grammar_spec.rb
|
98
100
|
- spec/spec_helper.rb
|
@@ -130,5 +132,6 @@ summary: Ruby implementation of the Sequitur algorithm
|
|
130
132
|
test_files:
|
131
133
|
- spec/sequitur/digram_spec.rb
|
132
134
|
- spec/sequitur/dynamic_grammar_spec.rb
|
135
|
+
- spec/sequitur/production_ref_spec.rb
|
133
136
|
- spec/sequitur/production_spec.rb
|
134
137
|
- spec/sequitur/sequitur_grammar_spec.rb
|