sequitur 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OGM4MWZiN2YzOTRmODhhNDlkNGIzMGRiOWYzZjU3MDY5ODc2YWUwMg==
4
+ ZjQ5ZDcxYTMzZGJlOWUzOTg1YmYxMThiMTAxYjk5YjZmZjkyY2FjMA==
5
5
  data.tar.gz: !binary |-
6
- ZGJmYzVjMzlkOTE0ZGRhZGI3ZWRjY2IyMDk3ZGY3ZDY1YjIyY2YyNg==
6
+ ZGIyODgxMjNhMmRiNGUyMjZlMTMzYjQwOGRjMjc3YzYyMTYzYjNmZQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- Nzk1ZDRmNjBkN2FmODQ4Yjk5ODNiYTk3NDA1YTkxYmZlYjMyOGI0MDhkZTM5
10
- NTg1ZWQyOGIxYzc2NjE4ODg0ODY1MDJiMzJhNDBmNGJlMjdiNzAwMTI3ZmZl
11
- MDkzMGFjYWNhNTU4ZWUwYTBjYjdlMjJjNjc2MmY5ZTk5MTUzNTQ=
9
+ OWFmNTRlN2NkZjRhNjVlOTU0MTlmZjZjNjllMDZjY2M4NWNiNWQ3NzQ0ZGMz
10
+ MjBkOWQzNjJjN2JiODExNDc2OTFmNjIyMGEyY2VkMDdiNDQyZjdiZTFjNTgw
11
+ NGE5NmVlZTEwMTkzNjU4ZGI2MjA5MGY3YTVhMjM2ZDcyZjhlMzk=
12
12
  data.tar.gz: !binary |-
13
- MWQ2OGU3OWIyMzQ5MWVkM2IxZGFhYzhmNDM5MGRkMzA5MjlhNzUxYTViYWMz
14
- MzIzY2MxYjk1OTBmMTNkNmM4NDM0NWI4YzJlZDk5YjU2ZjU4YmM2ZWYzMjkx
15
- MmNkYWE5MjU3NmI3MTE1OTIwYWZmOWI1ODRlMmQ4ZmZjNjkwY2E=
13
+ ZjUzNWVlNTQ1ODI0NDkyMGUxOWY4NDIwYWIzNmJjNTEzOTgzZDE3YmRmMTE3
14
+ NjM1Mzc0Mzk5YmQ1MDdhMzFlZDc2YzVkYjc2MmY4ZWEwZWY0YjY1ZTdlYmFi
15
+ ZGJjODhmYzBhNGU2Y2IxZGZlODZlODNhNTg2NzU1YTgwNmQ5OTk=
data/.travis.yml CHANGED
@@ -5,10 +5,15 @@ rvm:
5
5
  - 2.0.0
6
6
  - 1.9.3
7
7
  - 1.9.2
8
+ - jruby-head
8
9
 
9
10
  gemfile:
10
11
  - Gemfile
11
12
 
13
+ matrix:
14
+ allow_failures:
15
+ - rvm: jruby-head
16
+
12
17
  # whitelist
13
18
  branches:
14
19
  only:
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ### 0.0.12 / 2014-08-24
2
+ * [CHANGE] Significant internal refactoring.
3
+ * [CHANGE] Method `ObjectSpace::id2ref` is no more used => one obstacle to JRuby porting is removed.
4
+ * [NEW] Added new class `ProductionReference`
5
+
1
6
  ### 0.0.11 / 2014-08-24
2
7
  * [FIX] `SequiturGrammar#check_unicity`: an exception was raised when it shouldn't. Added example in spec file.
3
8
  * [CHANGE] `sequitur.rb` : Added the convenience Sequitur::build_from method.
data/README.md CHANGED
@@ -20,8 +20,8 @@ It detects repeated token patterns and can represent them in a compact way.
20
20
  ```ruby
21
21
 
22
22
  require 'sequitur' # Load the Sequitur library
23
-
24
- input_sequence = 'abcabdab'
23
+
24
+ input_sequence = 'abcabdab' # Let's analyze this string
25
25
 
26
26
  # The SEQUITUR algorithm will detect the repeated 'ab' pattern
27
27
  # and will generate a context-free grammar that represents the input string
@@ -29,7 +29,7 @@ It detects repeated token patterns and can represent them in a compact way.
29
29
 
30
30
  # Display the grammar rules
31
31
  # Each rule is displayed with the format:
32
- # rule_id : a_sequence_grammar_symbols
32
+ # rule_id : a_sequence_of_grammar_symbols
33
33
  # Where:
34
34
  # - rule_id is the object id of a rule (in decimal)
35
35
  # - a grammar symbol is either a terminal symbol
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Sequitur # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.0.11'
6
+ Version = '0.0.12'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = 'Ruby implementation of the Sequitur algorithm'
@@ -10,12 +10,12 @@ class Digram
10
10
  # The sequence of two consecutive grammar symbols.
11
11
  attr_reader(:symbols)
12
12
 
13
- # The object id of the production that contains this digram in its rhs.
14
- attr_reader(:production_id)
15
-
16
13
  # An unique Hash key of the digram
17
14
  attr_reader(:key)
18
15
 
16
+ # The production in which the digram occurs
17
+ attr_reader(:production)
18
+
19
19
  # Constructor.
20
20
  # @param symbol1 [StringOrSymbol] First element of the digram
21
21
  # @param symbol2 [StringOrSymbol] Second element of the digram
@@ -24,13 +24,15 @@ class Digram
24
24
  def initialize(symbol1, symbol2, aProduction)
25
25
  @symbols = [symbol1, symbol2]
26
26
  @key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
27
- @production_id = aProduction.object_id
27
+ @production = aProduction
28
28
  end
29
-
30
- # Return the production object of this digram
31
- def production()
32
- ObjectSpace._id2ref(production_id)
29
+
30
+ # Equality testing.
31
+ # Returns true when keys of both digrams are equal
32
+ def ==(other)
33
+ return key == other.key
33
34
  end
35
+
34
36
  end # class
35
37
 
36
38
  end # module
@@ -47,11 +47,9 @@ class DynamicGrammar
47
47
  puts to_string if trace
48
48
  prod = productions.delete_at(anIndex)
49
49
  # TODO: remove output
50
- puts prod.to_string if trace
50
+ puts('Removed: ' + prod.to_string) if trace
51
51
  prod.clear_rhs
52
52
 
53
- check_backrefs # TODO: configurable check
54
-
55
53
  return prod
56
54
  end
57
55
 
@@ -68,57 +66,20 @@ class DynamicGrammar
68
66
  end
69
67
 
70
68
 
71
- # Check that any production reference in rhs is
69
+ # Check that every production reference in rhs is
72
70
  # pointing to a production of the grammar
73
71
  def check_rhs_of(aProduction)
74
72
  aProduction.references.each do |symb|
75
- next if productions.include?(symb)
73
+ referenced_prod = symb.production
74
+ next if productions.include?(referenced_prod)
76
75
 
77
- msg = "Production #{aProduction.object_id} refers to "
78
- msg << "production #{symb.object_id}"
76
+ msg = "Production #{aProduction.object_id} refers to"
77
+ msg << " production #{referenced_prod.object_id}"
79
78
  msg << ' that is not part of the grammar.'
80
79
  fail StandardError, msg
81
80
  end
82
81
  end
83
82
 
84
- # Check the invariants:
85
- # Every back reference must must point to a production of the grammar
86
- # Every back reference count must be equal to the number
87
- # of occurrences in the referencing production.
88
- def check_backrefs()
89
- return if productions.size < 2
90
-
91
- all_but_root = productions[1...productions.size]
92
- all_but_root.each do |a_prod|
93
- a_prod.backrefs.each do |other_prod_id, count|
94
- begin
95
- other_prod = ObjectSpace._id2ref(other_prod_id)
96
- rescue RangeError => exc
97
- msg = "Production #{a_prod.object_id} has a backref to "
98
- msg << "recycled production #{other_prod_id}."
99
- msg << "\n#{to_string}"
100
- $stderr.puts msg
101
- raise exc
102
- end
103
- found = productions.find { |elem| elem == other_prod }
104
- unless found
105
- msg = "Production #{a_prod.object_id} is referenced by the "
106
- msg << "unknown production (#{other_prod_id})."
107
- msg << "\n#{to_string}"
108
- fail StandardError, msg
109
- end
110
-
111
- unless count == found.rhs.count { |symb| symb == a_prod }
112
- msg = "Production #{a_prod.object_id} has a count mismatch"
113
- msg << "\nIt expects #{count} references in rhs of #{other_prod_id} "
114
- msg << "but actual count is #{other_prod.rhs.count}."
115
- msg << "\n#{to_string}"
116
- fail StandardError, msg
117
- end
118
- end
119
- end
120
- end
121
-
122
83
  end # class
123
84
 
124
85
  end # module
@@ -1,4 +1,5 @@
1
1
  require_relative 'digram'
2
+ require_relative 'production_ref'
2
3
 
3
4
  module Sequitur # Module for classes implementing the Sequitur algorithm
4
5
 
@@ -14,33 +15,76 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
14
15
  class Production
15
16
  # The right-hand side (rhs) consists of a sequence of grammar symbols
16
17
  attr_reader(:rhs)
18
+
19
+ # The reference count (= how times other productions reference this one)
20
+ attr_reader(:refcount)
17
21
 
18
- # A Hash with pairs of the form:
19
- # production id => reference count
20
- # Where the reference count is the number of times this production
21
- # appears in the rhs of the production with given id.
22
- attr_reader(:backrefs)
22
+ # The sequence of digrams appearing in the RHS
23
+ attr_reader(:digrams)
23
24
 
24
25
  # Constructor. Build a production with an empty RHS.
25
26
  def initialize()
26
27
  clear_rhs
27
- @backrefs = {}
28
+ @refcount = 0
29
+ @digrams = []
28
30
  end
29
31
 
30
32
  public
31
33
 
34
+ def ==(other)
35
+ return true if object_id == other.object_id
36
+
37
+ if other.is_a?(ProductionRef)
38
+ result = (other == self)
39
+ else
40
+ result = false
41
+ end
42
+
43
+ return result
44
+ end
45
+
46
+
32
47
  # Is the rhs empty?
33
48
  def empty?
34
49
  return rhs.empty?
35
50
  end
36
51
 
52
+ def incr_refcount()
53
+ @refcount += 1
54
+ end
55
+
56
+ def decr_refcount()
57
+ fail StandardError if @refcount == 0
58
+ @refcount -= 1
59
+ end
60
+
37
61
 
38
62
  # Return the set of productions appearing in the rhs.
39
63
  def references()
40
- return rhs.select { |symb| symb.kind_of?(Production) }
64
+ return rhs.select { |symb| symb.is_a?(ProductionRef) }
65
+ end
66
+
67
+ # Return the set of references to a given production
68
+ def references_of(aProduction)
69
+ refs = references
70
+ return refs.select { |a_ref| a_ref == aProduction }
71
+ end
72
+
73
+
74
+
75
+
76
+ # Return the list digrams found in rhs of this production.
77
+ def recalc_digrams()
78
+ return [] if rhs.size < 2
79
+
80
+ result = []
81
+ rhs.each_cons(2) { |couple| result << Digram.new(*couple, self) }
82
+
83
+ @digrams = result
41
84
  end
42
85
 
43
86
 
87
+
44
88
  # Does the rhs have exactly one digram only (= 2 symbols)?
45
89
  def single_digram?
46
90
  return rhs.size == 2
@@ -59,53 +103,14 @@ class Production
59
103
  same_key_found = all_keys.index(last_key)
60
104
  return !same_key_found.nil?
61
105
  end
62
-
106
+
63
107
  # Return the last digram appearing in the RHS.
64
108
  def last_digram()
65
- return nil if rhs.size < 2
66
-
67
- return Digram.new(rhs[-2], rhs[-1], self)
68
- end
69
-
70
-
71
-
72
- # The back reference count is the number of times this production
73
- # appears in the rhs of all the productions of the grammar
74
- def refcount()
75
- total = backrefs.values.reduce(0) do |sub_result, count|
76
- sub_result += count
77
- end
78
-
79
- return total
80
- end
81
-
82
- # Add a back reference to the given production.
83
- # @param aProduction [Production] Assume that production P appears in the
84
- # RHS of production Q, then a reference count of P is incremented in Q.
85
- def add_backref(aProduction)
86
- prod_id = aProduction.object_id
87
-
88
- count = backrefs.fetch(prod_id, 0)
89
- backrefs[prod_id] = count + 1
90
- return count
109
+ result = digrams.empty? ? nil : digrams.last
110
+ return result
91
111
  end
92
112
 
93
- # Decrement the reference count for the given production.
94
- # If result is zero, then the entry is removed from the Hash.
95
- def remove_backref(aProduction)
96
- prod_id = aProduction.object_id
97
-
98
- count = backrefs.fetch(prod_id)
99
- fail StandardError if count < 1
100
-
101
- if count > 1
102
- backrefs[prod_id] = count - 1
103
- else
104
- backrefs.delete(prod_id)
105
- end
106
113
 
107
- return count
108
- end
109
114
 
110
115
  # Emit a text representation of the production rule.
111
116
  # Text is of the form:
@@ -122,17 +127,24 @@ class Production
122
127
  return "#{object_id} : #{rhs_text.join(' ')}."
123
128
  end
124
129
 
125
- # Return the digrams for this production as if
126
- # the given symbol is appended at the end of the rhs
127
- def calc_append_symbol(aSymbol)
128
- return [] if empty?
129
-
130
- return digrams + [ Digram.new(rhs.last, aSymbol, self) ]
131
- end
132
-
130
+ # Add a (grammar) symbol at the end of the RHS.
133
131
  def append_symbol(aSymbol)
134
- aSymbol.add_backref(self) if aSymbol.kind_of?(Production)
135
- rhs << aSymbol
132
+ case aSymbol
133
+ when Production
134
+ new_symb = ProductionRef.new(aSymbol)
135
+ when ProductionRef
136
+ if aSymbol.unbound?
137
+ msg = 'Fail to append reference to nil production in '
138
+ msg << to_string
139
+ fail StandardError, msg
140
+ end
141
+ new_symb = aSymbol.dup
142
+ else
143
+ new_symb = aSymbol
144
+ end
145
+
146
+ rhs << new_symb
147
+ digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
136
148
  end
137
149
 
138
150
  # Clear the right-hand side.
@@ -140,61 +152,77 @@ class Production
140
152
  def clear_rhs()
141
153
  if rhs
142
154
  refs = references
143
- refs.each { |a_ref| a_ref.remove_backref(self) }
155
+ refs.each { |a_ref| a_ref.unbind }
144
156
  end
145
157
  @rhs = []
146
158
  end
147
159
 
148
- # Return the list digrams found in rhs of this production.
149
- def digrams()
150
- return [] if rhs.size < 2
151
-
152
- result = []
153
- rhs.each_cons(2) { |couple| result << Digram.new(*couple, self) }
154
-
155
- return result
156
- end
160
+ # Find all the positions where the digram occurs in the rhs
161
+ # Synopsis:
162
+ # Given the production p -> a b c a b a b d
163
+ # Then p.positions_of(a, b) should returns [0, 3, 5]
164
+ # Caution: "overlapping" digrams shouldn't be counted
165
+ # Given the production p -> a a b a a a c d
166
+ # Then p.positions_of(a, a) should returns [0, 3]
167
+ def positions_of(symb1, symb2)
157
168
 
158
- # Substitute in self all occurence of the digram that
159
- # appears in the rhs of the other production
160
- # Pre-condition:
161
- # another has a rhs with exactly one digram (= a two-symbol sequence).
162
- def replace_digram(another)
163
169
  # Find the positions where the digram occur in rhs
164
- (symb1, symb2) = another.rhs
165
170
  indices = [ -2 ] # Dummy index!
166
-
167
171
  (0...rhs.size).each do |i|
168
172
  next if i == indices.last + 1
169
173
  indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
170
174
  end
175
+
171
176
  indices.shift
172
177
 
173
- pos = indices.reverse
178
+ return indices
179
+ end
180
+
181
+
182
+ # Substitute in self all occurrences of the digram that
183
+ # appears in the rhs of the other production
184
+ # Pre-condition:
185
+ # another has a rhs with exactly one digram (= a two-symbol sequence).
186
+ def replace_digram(another)
187
+ (symb1, symb2) = another.rhs
188
+ pos = positions_of(symb1, symb2).reverse
174
189
 
175
190
  # Replace the two symbol sequence by the production
176
191
  pos.each do |index|
177
- rhs[index].remove_backref(self) if rhs[index].kind_of?(Production)
178
- rhs[index] = another
192
+ if rhs[index].is_a?(ProductionRef)
193
+ rhs[index].bind_to(another)
194
+ else
195
+ rhs[index] = ProductionRef.new(another)
196
+ end
179
197
  index1 = index + 1
180
- rhs[index1].remove_backref(self) if rhs[index1].kind_of?(Production)
198
+ rhs[index1].unbind if rhs[index1].is_a?(ProductionRef)
181
199
  rhs.delete_at(index1)
182
- another.add_backref(self)
183
200
  end
201
+
202
+ recalc_digrams
184
203
  end
185
204
 
186
205
  # Replace every occurrence of 'another' production in rhs by
187
206
  # the rhs of 'another'.
207
+ # Given the production p_A -> a p_B b p_B c
208
+ # And the production p_B -> x y
209
+ # Then the call p_A.replace_production(p_B)
210
+ # Modifies p_A as into:
211
+ # p_A -> a x y b x y c
188
212
  def replace_production(another)
189
213
  (0...rhs.size).to_a.reverse.each do |index|
190
214
  next unless rhs[index] == another
191
- rhs.insert(index + 1, *another.rhs)
192
- another.rhs.each do |new_symb|
193
- new_symb.add_backref(self) if new_symb.kind_of?(Production)
215
+
216
+ # Avoid the aliasing of production reference
217
+ other_rhs = another.rhs.map do |symb|
218
+ symb.is_a?(ProductionRef) ? symb.dup : symb
194
219
  end
195
- another.remove_backref(self)
220
+ rhs.insert(index + 1, *other_rhs)
221
+ another.decr_refcount
196
222
  rhs.delete_at(index)
197
223
  end
224
+
225
+ recalc_digrams
198
226
  end
199
227
 
200
228
  end # class
@@ -0,0 +1,89 @@
1
+
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+ # A production reference is a grammar symbol that may appear in the right-hand
6
+ # side of a production P1 and that refers to a production P2.
7
+ # Every time a production P2 appears in the left-hand side of
8
+ # production P1, this is implemented by inserting a production reference to P2
9
+ # in the appropriate position in the RHS of P1.
10
+ # In the literature, production references are also called non terminal
11
+ # symbols
12
+ class ProductionRef
13
+
14
+ # Link to the production to reference
15
+ attr_reader(:production)
16
+
17
+ # Constructor
18
+ # [target] The production that is being referenced.
19
+ def initialize(target)
20
+ bind_to(target)
21
+ end
22
+
23
+ # Copy constructor invoked by dup or clone methods
24
+ def initialize_copy(orig)
25
+ @production = nil
26
+ bind_to(orig.production)
27
+ end
28
+
29
+ # Return the text representation of a production reference.
30
+ def to_s()
31
+ return "#{production.object_id}"
32
+ end
33
+
34
+ alias_method :to_string, :to_s
35
+
36
+
37
+ # Equality testing.
38
+ # A production ref is equal to another one when its
39
+ # refers to the same production or when it is compared to
40
+ # the production it refers to.
41
+ def ==(other)
42
+ return true if object_id == other.object_id
43
+
44
+ if other.is_a?(ProductionRef)
45
+ result = (production == other.production)
46
+ else
47
+ result = (production == other)
48
+ end
49
+
50
+ return result
51
+ end
52
+
53
+ # Generates a Fixnum value as hash value.
54
+ # As a reference has no identity on its own,
55
+ # the method returns the hash value of the
56
+ # referenced production
57
+ def hash()
58
+ fail StandardError, 'Nil production' if production.nil?
59
+ return production.hash
60
+ end
61
+
62
+ # Make this reference points to the given production
63
+ def bind_to(aProduction)
64
+ return if aProduction == @production
65
+
66
+ production.decr_refcount if production
67
+ unless aProduction.kind_of?(Production)
68
+ fail StandardError, "Illegal production type #{aProduction.class}"
69
+ end
70
+ @production = aProduction
71
+ production.incr_refcount
72
+ end
73
+
74
+ # Clear the reference to the target production
75
+ def unbind()
76
+ production.decr_refcount
77
+ @production = nil
78
+ end
79
+
80
+ # Check that the this object doesn't refer to any production.
81
+ def unbound?()
82
+ return production.nil?
83
+ end
84
+
85
+ end # class
86
+
87
+ end # module
88
+
89
+ # End of file
@@ -13,7 +13,7 @@ class SequiturGrammar < DynamicGrammar
13
13
  def initialize(anEnum)
14
14
  super()
15
15
  # Make start production compliant with utility rule
16
- 2.times { root.add_backref(root) }
16
+ 2.times { root.incr_refcount }
17
17
 
18
18
  @digrams = {}
19
19
  @parsed = []
@@ -34,14 +34,15 @@ class SequiturGrammar < DynamicGrammar
34
34
  all_digrams = {}
35
35
  productions.each do |a_prod|
36
36
  prod_digrams = a_prod.digrams
37
- prod_digrams.each do |a_digram|
37
+ prod_digrams.each_with_index do |a_digram, index|
38
+ next if index && a_digram == a_prod.digrams[index - 1]
38
39
  if all_digrams.include? a_digram.key
39
40
  msg = "Digram #{a_digram.symbols} occurs twice!"
40
41
  colliding = all_digrams[a_digram.key]
41
- msg << "\nOnce in production #{colliding.production_id}"
42
+ msg << "\nOnce in production #{colliding.production.object_id}"
42
43
  msg << "\nSecond in production #{a_prod.object_id}"
43
44
  msg << "\n#{to_string}"
44
- fail StandardError, msg unless colliding.production_id == a_prod.object_id
45
+ fail StandardError, msg unless colliding == a_prod
45
46
  else
46
47
  all_digrams[a_digram.key] = a_digram
47
48
  end
@@ -55,8 +56,8 @@ class SequiturGrammar < DynamicGrammar
55
56
  # Assumption: last digram of production isn't yet registered.
56
57
  def add_production(aProduction)
57
58
  super # Call original method from superclass...
58
-
59
- # ... then add this behaviour
59
+
60
+ # ... then add this behaviour
60
61
  last_digram = aProduction.last_digram
61
62
  digrams[last_digram.key] = last_digram
62
63
  end
@@ -67,7 +68,7 @@ class SequiturGrammar < DynamicGrammar
67
68
 
68
69
  # Retrieve in the Hash all registered digrams from the removed production
69
70
  digrams_subset = digrams.select do |_, digr|
70
- digr.production_id == prod.object_id
71
+ digr.production == prod
71
72
  end
72
73
 
73
74
  # Remove them...
@@ -76,10 +77,10 @@ class SequiturGrammar < DynamicGrammar
76
77
  end
77
78
 
78
79
  def append_symbol_to(aProduction, aSymbol)
79
- prod_digrams = aProduction.calc_append_symbol(aSymbol)
80
80
  check_digrams # TODO: remove this
81
- check_backrefs # TODO: remove this
82
81
  super
82
+
83
+ prod_digrams = aProduction.digrams
83
84
  unless prod_digrams.empty?
84
85
  last_digram = prod_digrams.last
85
86
  matching_digram = digrams[last_digram.key]
@@ -105,32 +106,19 @@ class SequiturGrammar < DynamicGrammar
105
106
  def preserve_unicity(aProduction)
106
107
  last_digram = aProduction.last_digram
107
108
  matching_digram = digrams[last_digram.key]
108
- if last_digram.production_id == matching_digram.production_id
109
+ if aProduction == matching_digram.production
109
110
  # Rule: no other production distinct from aProduction should have
110
111
  # the matching digram
111
112
  productions.each do |prod|
112
113
  its_digrams = prod.digrams
113
114
  its_keys = its_digrams.map(&:key)
114
- if prod.object_id == last_digram.production_id
115
- # TODO: check that digram really occurs twice in the production.
116
- # occurrences = its_keys.select { |a_key| a_key == last_digram.key }
117
- # if occurrences.size != 2
118
- # msg = "Digram #{last_digram.symbols} should occur twice"
119
- # msg << "\nin production #{aProduction.object_id}"
120
- # msg << "\nBut occurs #{occurrences.size}"
121
- # msg << "\n#{self.to_string}"
122
- # fail StandardError, msg
123
- # end
124
-
125
- else
126
- if its_keys.include? last_digram.key
127
- msg = "Digram #{last_digram.symbols} occurs three times!"
128
- msg << "\nTwice in production #{aProduction.object_id}"
129
- msg << "\nThird in production #{prod.object_id}"
130
- msg << "\n#{to_string}"
131
- fail StandardError, msg
132
- end
133
- end
115
+ next if prod == last_digram.production
116
+ next unless its_keys.include? last_digram.key
117
+ msg = "Digram #{last_digram.symbols} occurs three times!"
118
+ msg << "\nTwice in production #{aProduction.object_id}"
119
+ msg << "\nThird in production #{prod.object_id}"
120
+ msg << "\n#{to_string}"
121
+ fail StandardError, msg
134
122
  end
135
123
 
136
124
  # Digram appears twice in given production...
@@ -148,17 +136,18 @@ class SequiturGrammar < DynamicGrammar
148
136
  else
149
137
  # Duplicate digram used in distinct production
150
138
  # Two cases: other production is a single digram one or a multi-digram
151
- other_prod = ObjectSpace._id2ref(matching_digram.production_id)
139
+ other_prod = matching_digram.production
152
140
  if other_prod.single_digram?
153
141
  # ... replace duplicate digram by reference to other production
154
142
  aProduction.replace_digram(other_prod)
155
143
  update_digrams_from(aProduction)
156
144
 
157
- # Special case a: replacement causes another digram duplication
145
+ # Special case a: replacement causes another digram duplication
158
146
  # in the given production
159
- # Special case b: replacement causes another digram duplication
147
+ # Special case b: replacement causes another digram duplication
160
148
  # with other production
161
- if aProduction.repeated_digram? || digrams[aProduction.last_digram.key]
149
+ if aProduction.repeated_digram? ||
150
+ (digrams[aProduction.last_digram.key].production != aProduction)
162
151
  preserve_unicity(aProduction)
163
152
  end
164
153
 
@@ -178,7 +167,6 @@ class SequiturGrammar < DynamicGrammar
178
167
 
179
168
  # TODO: Check when aProduction and other_prod have same preceding symbol
180
169
  update_digrams_from(other_prod)
181
- check_backrefs # TODO: remove this
182
170
  end
183
171
  check_unicity
184
172
  end
@@ -205,16 +193,17 @@ class SequiturGrammar < DynamicGrammar
205
193
  loop do
206
194
  all_refcount_ok = true
207
195
  (1...productions.size).to_a.reverse.each do |index|
208
- next unless productions[index].refcount == 1
196
+ curr_production = productions[index]
197
+ next unless curr_production.refcount == 1
209
198
 
210
199
  all_refcount_ok = false
211
- other_id = productions[index].backrefs.keys.first
212
- dependent = ObjectSpace._id2ref(other_id)
200
+ dependent = productions.find do |a_prod|
201
+ !a_prod.references_of(curr_production).empty?
202
+ end
213
203
  dependent.replace_production(productions[index])
214
204
  delete_production(index)
215
205
  update_digrams_from(dependent)
216
206
  check_references
217
- check_backrefs
218
207
  end
219
208
 
220
209
  break if all_refcount_ok
@@ -226,14 +215,14 @@ class SequiturGrammar < DynamicGrammar
226
215
  def update_digrams_from(aProduction)
227
216
  current_digrams = aProduction.digrams
228
217
 
229
- # Add new digrams
218
+ # Add new digrams only if they don't collide
230
219
  current_digrams.each do |digr|
231
220
  digrams[digr.key] = digr unless digrams.include? digr.key
232
221
  end
233
222
 
234
223
  # Retrieve all registered digrams from the production
235
224
  digrams_subset = digrams.select do |_, digr|
236
- digr.production_id == aProduction.object_id
225
+ digr.production == aProduction
237
226
  end
238
227
 
239
228
  # Remove obsolete digrams
@@ -244,16 +233,18 @@ class SequiturGrammar < DynamicGrammar
244
233
  end
245
234
 
246
235
  # Check the invariant:
247
- # Every production reference in a rhs must point
236
+ # Every reference in a rhs that is bound must point
248
237
  # to a production of the grammar.
249
238
  def check_references()
250
239
  productions.each do |a_prod|
251
240
  rhs_prods = a_prod.references
252
- rhs_prods.each do |referenced_prod|
241
+ rhs_prods.each do |a_reference|
242
+ next if a_reference.unbound?
243
+ referenced_prod = a_reference.production
253
244
  next if productions.include? referenced_prod
254
245
 
255
- msg = "Production #{a_prod.object_id} references the "
256
- msg << "unknown production #{referenced_prod.object_id}"
246
+ msg = "Production #{a_prod.object_id} #{a_prod.to_string}"
247
+ msg << " references the unknown production #{referenced_prod.object_id}"
257
248
  msg << "\nOrphan production: #{referenced_prod.to_string}"
258
249
  msg << "\n#{to_string}"
259
250
  fail StandardError, msg
@@ -265,13 +256,11 @@ class SequiturGrammar < DynamicGrammar
265
256
  # Every registered digram must reference a production from the grammar
266
257
  def check_registered()
267
258
  digrams.each do |_key, digr|
268
- found = productions.find do |a_prod|
269
- digr.production_id == a_prod.object_id
270
- end
259
+ found = productions.find { |a_prod| digr.production == a_prod }
271
260
  next if found
272
261
 
273
262
  msg = "Digram #{digr.symbols} references the unknown "
274
- msg << "production (#{digr.production_id})."
263
+ msg << "production (#{digr.production.object_id})."
275
264
  msg << "\n#{to_string}"
276
265
  fail StandardError, msg
277
266
  end
@@ -283,7 +272,7 @@ class SequiturGrammar < DynamicGrammar
283
272
  # Control that every registered digram refers
284
273
  # to a production that really has that digram
285
274
  digrams.each do |key, digr|
286
- its_prod = ObjectSpace._id2ref(digr.production_id)
275
+ its_prod = digr.production
287
276
  prod_digrams = its_prod.digrams
288
277
  prod_keys = prod_digrams.map(&:key)
289
278
  next if prod_keys.include? key
@@ -307,15 +296,15 @@ class SequiturGrammar < DynamicGrammar
307
296
  all_digrams.each do |key, digr|
308
297
  registered = digrams[key]
309
298
  if registered
310
- if registered.production_id != digr.production_id
311
- msg = "Production #{digr.production_id} has "
299
+ if registered != digr
300
+ msg = "Production #{digr.production.object_id} has "
312
301
  msg << "the digram #{digr.symbols} that collides"
313
- msg << "\n with same digram from #{registered.production_id}"
302
+ msg << "\n with same digram from #{registered.production.object_id}"
314
303
  msg << "\n#{to_string}"
315
304
  fail StandardError, msg
316
305
  end
317
306
  else
318
- its_prod = ObjectSpace._id2ref(digr.production_id)
307
+ its_prod = digr.production
319
308
  msg = "Production #{its_prod.object_id} (#{its_prod.rhs}) "
320
309
  msg << "has the digram #{digr.symbols} that isn't registered."
321
310
  msg << "\n#{to_string}"
@@ -7,24 +7,39 @@ module Sequitur # Re-open the module to get rid of qualified names
7
7
 
8
8
  describe Digram do
9
9
  let(:two_symbols) { [:b, :c] }
10
+ let(:production) { double('sample-production') }
10
11
 
11
12
  context 'Standard creation & initialization:' do
12
13
 
13
14
  it 'should be created with 3 arguments' do
14
- production = double('sample-production')
15
15
  instance = Digram.new(:b, :c, production)
16
16
 
17
17
  expect(instance.symbols).to eq(two_symbols)
18
- expect(instance.production_id).to eq(production.object_id)
18
+ expect(instance.production).to eq(production)
19
19
  end
20
20
 
21
21
  it 'should return the production that it refers to' do
22
- production = double('sample-production')
23
22
  instance = Digram.new(:b, :c, production)
24
23
  expect(instance.production).to eq(production)
25
24
  end
26
25
 
27
- end # context
26
+ end # context
27
+
28
+ context 'Standard creation & initialization:' do
29
+
30
+ it 'should compare itself to another digram' do
31
+ instance1 = Digram.new(:a, :b, production)
32
+ same = Digram.new(:a, :b, production)
33
+ different = Digram.new(:b, :c, production)
34
+
35
+ expect(instance1).to eq(instance1)
36
+ expect(instance1).to eq(same)
37
+ expect(instance1).not_to eq(different)
38
+ expect(same).not_to eq(different)
39
+ end
40
+
41
+ end # context
42
+
28
43
 
29
44
  end # describe
30
45
 
@@ -0,0 +1,95 @@
1
+ require_relative '../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../lib/sequitur/production'
5
+ require_relative '../../lib/sequitur/production_ref'
6
+
7
+ module Sequitur # Re-open the module to get rid of qualified names
8
+
9
+ describe ProductionRef do
10
+
11
+ let(:target) { Production.new }
12
+ let(:another_target) { Production.new }
13
+
14
+ subject { ProductionRef.new(target) }
15
+
16
+ context 'Creation & initialization:' do
17
+
18
+ it 'should be created with a production argument' do
19
+ expect { ProductionRef.new(target) }.not_to raise_error
20
+ expect(target.refcount).to eq(1)
21
+ end
22
+
23
+ it 'should clone with reference count incrementing' do
24
+ expect(target.refcount).to eq(0)
25
+ expect(subject.production.refcount).to eq(1)
26
+ klone = subject.clone
27
+ expect(klone.production.refcount).to eq(2)
28
+ duplicate = subject.dup
29
+ expect(duplicate.production.refcount).to eq(3)
30
+ end
31
+
32
+ it 'should know its referenced production' do
33
+ instance = ProductionRef.new(target)
34
+ expect(instance.production).to eq(target)
35
+ end
36
+
37
+
38
+ end # context
39
+
40
+ context 'Provided services:' do
41
+
42
+ it 'should render its referenced production' do
43
+ expect(subject.to_s).to eq(target.object_id.to_s)
44
+ end
45
+
46
+ it 'should unbind itself from its production' do
47
+ expect(target.refcount).to eq(0)
48
+ expect(subject).not_to be_unbound
49
+ expect(target.refcount).to eq(1)
50
+ subject.unbind
51
+ expect(target.refcount).to eq(0)
52
+ expect(subject.production).to be_nil
53
+ expect(subject).to be_unbound
54
+ end
55
+
56
+ it 'should bind to a production' do
57
+ expect(target.refcount).to eq(0)
58
+
59
+ expect(subject).not_to be_unbound
60
+ expect(target.refcount).to eq(1)
61
+
62
+ # Case: bind again to same production
63
+ expect { subject.bind_to(target) }.not_to raise_error
64
+ expect(target.refcount).to eq(1)
65
+
66
+ # Case: bind to another production
67
+ expect(another_target.refcount).to eq(0)
68
+ subject.bind_to(another_target)
69
+ expect(target.refcount).to eq(0)
70
+ expect(another_target.refcount).to eq(1)
71
+ end
72
+
73
+ it 'should compare to other production (reference)' do
74
+ same = ProductionRef.new(target)
75
+ expect(subject).to eq(subject) # Strict identity
76
+ expect(subject).to eq(same) # 2 references pointing to same production
77
+ expect(subject).to eq(target)
78
+ end
79
+
80
+ it 'should return the hash value of its production' do
81
+ expectation = target.hash
82
+ expect(subject.hash).to eq(expectation)
83
+ end
84
+
85
+ it 'should complain when requested for a hash and unbound' do
86
+ subject.unbind
87
+ expect { subject.hash }.to raise_error(StandardError)
88
+ end
89
+ end
90
+
91
+ end # describe
92
+
93
+ end # module
94
+
95
+ # End of file
@@ -43,33 +43,52 @@ describe Production do
43
43
  expect(subject.last_digram).to be_nil
44
44
  end
45
45
  end # context
46
-
46
+
47
47
  context 'Knowing its rhs:' do
48
-
48
+
49
49
  it 'should know the productions in its rhs' do
50
50
  # Case 1: empty production
51
51
  expect(subject.references).to be_empty
52
-
52
+
53
53
  # Case 2: production without references
54
54
  symbols = [:a, :b, :c]
55
55
  symbols.each { |symb| subject.append_symbol(symb) }
56
56
  expect(subject.references).to be_empty
57
+ expect(subject.references_of(p_a)).to be_empty
57
58
 
58
59
  # Case 2: production with one reference
59
60
  subject.append_symbol(p_a)
60
61
  expect(subject.references).to eq([p_a])
62
+ expect(subject.references_of(p_a)).to eq([p_a])
61
63
 
62
64
  # Case 3: production with repeated references
63
65
  subject.append_symbol(p_a) # second time
64
- expect(subject.references).to eq([p_a, p_a])
66
+ expect(subject.references).to eq([p_a, p_a])
67
+ expect(subject.references_of(p_a)).to eq([p_a, p_a])
68
+
65
69
 
66
70
  # Case 4: production with multiple distinct references
67
71
  subject.append_symbol(p_bc)
68
- expect(subject.references).to eq([p_a, p_a, p_bc])
72
+ expect(subject.references).to eq([p_a, p_a, p_bc])
73
+ expect(subject.references_of(p_bc)).to eq([p_bc])
69
74
  end
70
-
75
+
76
+ it 'should know the position(s) of a given digram' do
77
+ sequence1 = [:a, :b, :c, :a, :b, :a, :b, :d]
78
+ sequence1.each { |symb| subject.append_symbol(symb) }
79
+ positions = [0, 3, 5]
80
+ expect(subject.positions_of(:a, :b)).to eq(positions)
81
+
82
+ subject.clear_rhs
83
+ # Case of overlapping digrams
84
+ sequence2 = [:a, :a, :b, :a, :a, :a, :c, :d]
85
+ sequence2.each { |symb| subject.append_symbol(symb) }
86
+ positions = [0, 3]
87
+ expect(subject.positions_of(:a, :a)).to eq(positions)
88
+ end
89
+
71
90
  end # context
72
-
91
+
73
92
  context 'Appending a symbol:' do
74
93
 
75
94
  it 'should append a symbol when empty' do
@@ -92,31 +111,48 @@ describe Production do
92
111
  expect(subject.last_digram.symbols).to eq([:e, :f])
93
112
  end
94
113
 
95
- it 'should increment the refcount for each production in the rhs' do
114
+ it 'should append a production in its rhs' do
115
+ # Side-effect: refcount of production to append is incremented
96
116
  expect(p_a.refcount).to be(0)
97
117
 
98
118
  input = [p_a, :b, :c, :d, p_a, :e, :f] # p_a appears twice
99
119
  input.each { |symb| subject.append_symbol(symb) }
100
120
  expect(p_a.refcount).to be(2)
101
121
  end
102
-
103
- it 'should calculate the digrams before appending:' do
104
- # Case: empty production
105
- expect(subject.calc_append_symbol(:a)).to be_empty
106
-
107
- # Case: single-symbol rhs
108
- subject.append_symbol(:a)
109
- expect(to_symbols(subject.calc_append_symbol(:b))).to eq([[:a, :b]])
110
-
111
- # Case: two-symbols rhs
112
- subject.append_symbol(:b)
113
- expectation = [[:a, :b], [:b, :c]]
114
- expect(to_symbols(subject.calc_append_symbol(:c))).to eq(expectation)
122
+
123
+ it 'should append a production ref in its rhs' do
124
+ # Side-effect: refcount of production to append is incremented
125
+ ref_a = ProductionRef.new(p_a)
126
+ expect(p_a.refcount).to be(1)
127
+
128
+ input = [ref_a, :b, :c, :d, ref_a] # ref_a appears twice
129
+ input.each { |symb| subject.append_symbol(symb) }
130
+
131
+ # References in rhs should point to p_a...
132
+ # ...but should be distinct reference objects
133
+ expect(subject.rhs[0]).to eq(p_a)
134
+ expect(subject.rhs[0].object_id).not_to eq(ref_a.object_id)
135
+ expect(subject.rhs[-1]).to eq(p_a)
136
+ expect(subject.rhs[-1].object_id).not_to eq(ref_a.object_id)
137
+
138
+ # Reference count should be updated
139
+ expect(p_a.refcount).to be(3)
140
+ end
141
+
142
+ it 'should complain when appending ref to nil production' do
143
+ # Side-effect: refcount of production to append is incremented
144
+ ref_a = ProductionRef.new(p_a)
145
+ expect(p_a.refcount).to be(1)
146
+
147
+ # Unbind the reference
148
+ ref_a.unbind
149
+
150
+ expect { subject.append_symbol(ref_a) }.to raise_error(StandardError)
115
151
  end
116
152
 
117
153
  end # context
118
-
119
-
154
+
155
+
120
156
  context 'Text representation of a production rule:' do
121
157
 
122
158
  it 'should emit minimal text when empty' do
@@ -128,7 +164,8 @@ describe Production do
128
164
  instance = Production.new
129
165
  symbols = [:a, :b, 'c', :d, :e, 1000, instance]
130
166
  symbols.each { |symb| subject.append_symbol(symb) }
131
- expectation = "#{subject.object_id} : a b 'c' d e 1000 #{instance.object_id}."
167
+ expectation = "#{subject.object_id} : "
168
+ expectation << "a b 'c' d e 1000 #{instance.object_id}."
132
169
  expect(subject.to_string).to eq(expectation)
133
170
  end
134
171
 
@@ -138,19 +175,19 @@ describe Production do
138
175
  it 'should report no repetition when empty' do
139
176
  expect(subject.repeated_digram?).to be_falsey
140
177
  end
141
-
178
+
142
179
  it 'should report no repetition when rhs has less than 3 symbols' do
143
180
  subject.append_symbol(:a)
144
181
  expect(subject.repeated_digram?).to be_falsey
145
-
182
+
146
183
  subject.append_symbol(:a)
147
- expect(subject.repeated_digram?).to be_falsey
184
+ expect(subject.repeated_digram?).to be_falsey
148
185
  end
149
-
186
+
150
187
  it 'should detect shortest repetition' do
151
188
  'aaa'.each_char { |symb| subject.append_symbol(symb) }
152
- expect(subject.repeated_digram?).to be_truthy
153
- end
189
+ expect(subject.repeated_digram?).to be_truthy
190
+ end
154
191
 
155
192
  it 'should detect any repetition pattern' do
156
193
  # Positive cases
@@ -160,15 +197,15 @@ describe Production do
160
197
  word.each_char { |symb| instance.append_symbol(symb) }
161
198
  expect(instance.repeated_digram?).to be_truthy
162
199
  end
163
-
200
+
164
201
  # Negative cases
165
202
  cases = %w(abc abb abba abcdef)
166
203
  cases.each do |word|
167
204
  instance = Production.new
168
205
  word.each_char { |symb| instance.append_symbol(symb) }
169
206
  expect(instance.repeated_digram?).to be_falsey
170
- end
171
- end
207
+ end
208
+ end
172
209
  end # context
173
210
 
174
211
  context 'Replacing a digram by a production:' do
@@ -182,12 +219,13 @@ describe Production do
182
219
 
183
220
  it 'should replace two-symbol sequence' do
184
221
  %w(a b c d e b c e).each { |symb| subject.append_symbol(symb) }
222
+ p_bc_before = p_bc.to_string
185
223
  subject.replace_digram(p_bc)
186
224
 
187
225
  expect(subject.rhs.size).to eq(6)
188
226
  expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc, 'e'])
189
227
  expect(p_bc.refcount).to eq(2)
190
- expect(p_bc.backrefs[subject.object_id]).to eq(2)
228
+ expect(p_bc.to_string).to eq(p_bc_before)
191
229
  end
192
230
 
193
231
 
@@ -198,7 +236,6 @@ describe Production do
198
236
  expect(subject.rhs.size).to eq(5)
199
237
  expect(subject.rhs).to eq([p_bc, 'd', 'e', p_bc, 'e'])
200
238
  expect(p_bc.refcount).to eq(2)
201
- expect(p_bc.backrefs[subject.object_id]).to eq(2)
202
239
  end
203
240
 
204
241
 
@@ -209,7 +246,6 @@ describe Production do
209
246
  expect(subject.rhs.size).to eq(5)
210
247
  expect(subject.rhs).to eq(['a', p_bc, 'd', 'e', p_bc])
211
248
  expect(p_bc.refcount).to eq(2)
212
- expect(p_bc.backrefs[subject.object_id]).to eq(2)
213
249
  end
214
250
 
215
251
  it 'should replace two consecutive two-symbol sequences' do
@@ -219,7 +255,6 @@ describe Production do
219
255
  expect(subject.rhs.size).to eq(4)
220
256
  expect(subject.rhs).to eq(['a', p_bc, p_bc, 'd'])
221
257
  expect(p_bc.refcount).to eq(2)
222
- expect(p_bc.backrefs[subject.object_id]).to eq(2)
223
258
  end
224
259
 
225
260
  end # context
@@ -233,18 +268,23 @@ describe Production do
233
268
 
234
269
  it 'should replace a production at the start' do
235
270
  [p_bc, 'd'].each { |symb| subject.append_symbol(symb) }
271
+ expect(p_bc.refcount).to eq(1)
272
+
236
273
  subject.replace_production(p_bc)
237
274
  expect(subject.rhs.size).to eq(3)
238
275
  expect(subject.rhs).to eq(%w(b c d))
276
+ expect(p_bc.refcount).to eq(0)
239
277
  end
240
278
 
241
279
 
242
280
  it 'should replace a production at the end' do
243
281
  ['d', p_bc].each { |symb| subject.append_symbol(symb) }
282
+ expect(p_bc.refcount).to eq(1)
244
283
  subject.replace_production(p_bc)
245
284
 
246
285
  expect(subject.rhs.size).to eq(3)
247
286
  expect(subject.rhs).to eq(%w(d b c))
287
+ expect(p_bc.refcount).to eq(0)
248
288
  end
249
289
 
250
290
  it 'should replace a production as sole symbol' do
@@ -79,7 +79,7 @@ describe SequiturGrammar do
79
79
  input = 'aaac' # This sequence raised an exception
80
80
 
81
81
  # Creation
82
- expect {SequiturGrammar.new(input.chars)}.not_to raise_error
82
+ expect { SequiturGrammar.new(input.chars) }.not_to raise_error
83
83
  end
84
84
 
85
85
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequitur
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-26 00:00:00.000000000 Z
11
+ date: 2014-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -90,9 +90,11 @@ files:
90
90
  - lib/sequitur/digram.rb
91
91
  - lib/sequitur/dynamic_grammar.rb
92
92
  - lib/sequitur/production.rb
93
+ - lib/sequitur/production_ref.rb
93
94
  - lib/sequitur/sequitur_grammar.rb
94
95
  - spec/sequitur/digram_spec.rb
95
96
  - spec/sequitur/dynamic_grammar_spec.rb
97
+ - spec/sequitur/production_ref_spec.rb
96
98
  - spec/sequitur/production_spec.rb
97
99
  - spec/sequitur/sequitur_grammar_spec.rb
98
100
  - spec/spec_helper.rb
@@ -130,5 +132,6 @@ summary: Ruby implementation of the Sequitur algorithm
130
132
  test_files:
131
133
  - spec/sequitur/digram_spec.rb
132
134
  - spec/sequitur/dynamic_grammar_spec.rb
135
+ - spec/sequitur/production_ref_spec.rb
133
136
  - spec/sequitur/production_spec.rb
134
137
  - spec/sequitur/sequitur_grammar_spec.rb