sequitur 0.1.18 → 0.1.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +163 -49
- data/.travis.yml +13 -10
- data/CHANGELOG.md +9 -0
- data/Gemfile +2 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +13 -10
- data/examples/integer_sample.rb +5 -6
- data/examples/porridge.rb +4 -6
- data/examples/simple_case.rb +5 -6
- data/examples/symbol_sample.rb +5 -8
- data/examples/word_sample.rb +1 -2
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +38 -38
- data/lib/sequitur/dynamic_grammar.rb +91 -95
- data/lib/sequitur/formatter/base_text.rb +1 -1
- data/lib/sequitur/formatter/debug.rb +2 -2
- data/lib/sequitur/grammar_visitor.rb +96 -98
- data/lib/sequitur/production.rb +10 -19
- data/lib/sequitur/production_ref.rb +104 -105
- data/lib/sequitur/sequitur_grammar.rb +3 -3
- data/lib/sequitur/symbol_sequence.rb +7 -11
- data/spec/sequitur/digram_spec.rb +8 -8
- data/spec/sequitur/production_spec.rb +7 -7
- data/spec/sequitur/sequitur_grammar_spec.rb +10 -10
- data/spec/sequitur/symbol_sequence_spec.rb +4 -4
- data/spec/spec_helper.rb +6 -4
- metadata +44 -29
data/lib/sequitur/production.rb
CHANGED
@@ -23,7 +23,7 @@ class Production
|
|
23
23
|
|
24
24
|
# Constructor.
|
25
25
|
# Build a production with an empty RHS.
|
26
|
-
def initialize
|
26
|
+
def initialize
|
27
27
|
@rhs = SymbolSequence.new
|
28
28
|
@refcount = 0
|
29
29
|
@digrams = []
|
@@ -44,7 +44,6 @@ class Production
|
|
44
44
|
return result
|
45
45
|
end
|
46
46
|
|
47
|
-
|
48
47
|
# Is the rhs empty?
|
49
48
|
# @ return true if the rhs has no members.
|
50
49
|
def empty?
|
@@ -52,20 +51,19 @@ class Production
|
|
52
51
|
end
|
53
52
|
|
54
53
|
# Increment the reference count by one.
|
55
|
-
def incr_refcount
|
54
|
+
def incr_refcount
|
56
55
|
@refcount += 1
|
57
56
|
end
|
58
57
|
|
59
58
|
# Decrement the reference count by one.
|
60
|
-
def decr_refcount
|
59
|
+
def decr_refcount
|
61
60
|
raise StandardError, 'Internal error' if @refcount.zero?
|
62
61
|
@refcount -= 1
|
63
62
|
end
|
64
63
|
|
65
|
-
|
66
64
|
# Select the references to production appearing in the rhs.
|
67
65
|
# @return [Array of ProductionRef]
|
68
|
-
def references
|
66
|
+
def references
|
69
67
|
return rhs.references
|
70
68
|
end
|
71
69
|
|
@@ -77,10 +75,9 @@ class Production
|
|
77
75
|
return rhs.references_of(real_prod)
|
78
76
|
end
|
79
77
|
|
80
|
-
|
81
78
|
# Enumerate the digrams appearing in the right-hand side (rhs)
|
82
79
|
# @return [Array] the list of digrams found in rhs of this production.
|
83
|
-
def recalc_digrams
|
80
|
+
def recalc_digrams
|
84
81
|
return [] if rhs.size < 2
|
85
82
|
|
86
83
|
result = []
|
@@ -88,20 +85,17 @@ class Production
|
|
88
85
|
@digrams = result
|
89
86
|
end
|
90
87
|
|
91
|
-
|
92
|
-
|
93
88
|
# Does the rhs have exactly one digram only (= 2 symbols)?
|
94
89
|
# @return [true/false] true when the rhs contains exactly two symbols.
|
95
90
|
def single_digram?
|
96
91
|
return rhs.size == 2
|
97
92
|
end
|
98
93
|
|
99
|
-
|
100
94
|
# Detect whether the last digram occurs twice
|
101
95
|
# Assumption: when a digram occurs twice in a production then it must occur
|
102
96
|
# at the end of the rhs
|
103
97
|
# @return [true/false] true when the digram occurs twice in rhs.
|
104
|
-
def repeated_digram?
|
98
|
+
def repeated_digram?
|
105
99
|
return false if rhs.size < 3
|
106
100
|
|
107
101
|
my_digrams = digrams
|
@@ -113,17 +107,16 @@ class Production
|
|
113
107
|
|
114
108
|
# Retrieve the last digram appearing in the RHS (if any).
|
115
109
|
# @return [Digram] last digram in the rhs otherwise nil.
|
116
|
-
def last_digram
|
110
|
+
def last_digram
|
117
111
|
result = digrams.empty? ? nil : digrams.last
|
118
112
|
return result
|
119
113
|
end
|
120
114
|
|
121
|
-
|
122
115
|
# Emit a text representation of the production rule.
|
123
116
|
# Text is of the form:
|
124
117
|
# object id of production : rhs as space-separated sequence of symbols.
|
125
118
|
# @return [String]
|
126
|
-
def to_string
|
119
|
+
def to_string
|
127
120
|
return "#{object_id} : #{rhs.to_string}."
|
128
121
|
end
|
129
122
|
|
@@ -150,7 +143,7 @@ class Production
|
|
150
143
|
|
151
144
|
# Clear the right-hand side.
|
152
145
|
# Any referenced production has its reference counter decremented.
|
153
|
-
def clear_rhs
|
146
|
+
def clear_rhs
|
154
147
|
rhs.clear
|
155
148
|
end
|
156
149
|
|
@@ -168,7 +161,7 @@ class Production
|
|
168
161
|
# p.positions_of(a, a) # => [0, 3]
|
169
162
|
def positions_of(symb1, symb2)
|
170
163
|
# Find the positions where the digram occur in rhs
|
171
|
-
indices = [
|
164
|
+
indices = [-2] # Dummy index!
|
172
165
|
(0...rhs.size).each do |i|
|
173
166
|
next if i == indices.last + 1
|
174
167
|
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
@@ -179,7 +172,6 @@ class Production
|
|
179
172
|
return indices
|
180
173
|
end
|
181
174
|
|
182
|
-
|
183
175
|
# Given that the production P passed as argument has exactly 2 symbols
|
184
176
|
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
185
177
|
# s1 s2 by a reference to P.
|
@@ -217,7 +209,6 @@ class Production
|
|
217
209
|
recalc_digrams
|
218
210
|
end
|
219
211
|
|
220
|
-
|
221
212
|
# Part of the 'visitee' role in Visitor design pattern.
|
222
213
|
# @param aVisitor[GrammarVisitor]
|
223
214
|
def accept(aVisitor)
|
@@ -1,115 +1,114 @@
|
|
1
1
|
|
2
2
|
|
3
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
-
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
-
# side of a production P1 and that refers to a production P2.
|
6
|
-
# Every time a production P2 appears in the left-hand side of
|
7
|
-
# production P1, this is implemented by inserting a production reference to P2
|
8
|
-
# in the appropriate position in the RHS of P1.
|
9
|
-
# In the literature, production references are also called non terminal
|
10
|
-
# symbols
|
11
|
-
# @example
|
12
|
-
# # Given a production rule...
|
13
|
-
# prod = Sequitur::Production.new
|
14
|
-
# puts prod.refcount # outputs 0
|
15
|
-
# # ... Build a reference to it
|
16
|
-
# ref = Sequitur::ProductionRef.new(prod)
|
17
|
-
# # ... Production reference count is updated...
|
18
|
-
# puts prod.refcount # outputs 1
|
19
|
-
class ProductionRef
|
20
|
-
# Link to the production to reference.
|
21
|
-
attr_reader(:production)
|
22
|
-
|
23
|
-
# Constructor
|
24
|
-
# @param target [Production or ProductionRef]
|
25
|
-
# The production that is being referenced.
|
26
|
-
def initialize(target)
|
27
|
-
bind_to(target)
|
28
|
-
end
|
29
|
-
|
30
|
-
# Copy constructor invoked by dup or clone methods.
|
31
|
-
# @param orig [ProductionRef]
|
4
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
+
# side of a production P1 and that refers to a production P2.
|
6
|
+
# Every time a production P2 appears in the left-hand side of
|
7
|
+
# production P1, this is implemented by inserting a production reference to P2
|
8
|
+
# in the appropriate position in the RHS of P1.
|
9
|
+
# In the literature, production references are also called non terminal
|
10
|
+
# symbols
|
32
11
|
# @example
|
12
|
+
# # Given a production rule...
|
33
13
|
# prod = Sequitur::Production.new
|
14
|
+
# puts prod.refcount # outputs 0
|
15
|
+
# # ... Build a reference to it
|
34
16
|
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
-
#
|
36
|
-
# puts prod.refcount # outputs
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
47
|
-
|
48
|
-
alias to_string to_s
|
49
|
-
|
50
|
-
|
51
|
-
# Equality testing.
|
52
|
-
# A production ref is equal to another one when its
|
53
|
-
# refers to the same production or when it is compared to
|
54
|
-
# the production it refers to.
|
55
|
-
# @param other [ProductionRef]
|
56
|
-
# @return [true / false]
|
57
|
-
def ==(other)
|
58
|
-
return true if object_id == other.object_id
|
59
|
-
|
60
|
-
result = if other.is_a?(ProductionRef)
|
61
|
-
(production == other.production)
|
62
|
-
else
|
63
|
-
(production == other)
|
64
|
-
end
|
65
|
-
|
66
|
-
return result
|
67
|
-
end
|
68
|
-
|
69
|
-
# Produce a hash value.
|
70
|
-
# A reference has no identity on its own,
|
71
|
-
# the method returns the hash value of the
|
72
|
-
# referenced production
|
73
|
-
# @return [Fixnum] the hash value
|
74
|
-
def hash()
|
75
|
-
raise StandardError, 'Nil production' if production.nil?
|
76
|
-
return production.hash
|
77
|
-
end
|
78
|
-
|
79
|
-
# Make this reference point to the given production.
|
80
|
-
# @param aProduction [Production or ProductionRef] the production
|
81
|
-
# to refer to
|
82
|
-
def bind_to(aProduction)
|
83
|
-
return if aProduction == @production
|
84
|
-
|
85
|
-
production.decr_refcount if production
|
86
|
-
unless aProduction.kind_of?(Production)
|
87
|
-
raise StandardError, "Illegal production type #{aProduction.class}"
|
17
|
+
# # ... Production reference count is updated...
|
18
|
+
# puts prod.refcount # outputs 1
|
19
|
+
class ProductionRef
|
20
|
+
# Link to the production to reference.
|
21
|
+
attr_reader(:production)
|
22
|
+
|
23
|
+
# Constructor
|
24
|
+
# @param target [Production or ProductionRef]
|
25
|
+
# The production that is being referenced.
|
26
|
+
def initialize(target)
|
27
|
+
bind_to(target)
|
88
28
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
29
|
+
|
30
|
+
# Copy constructor invoked by dup or clone methods.
|
31
|
+
# @param orig [ProductionRef]
|
32
|
+
# @example
|
33
|
+
# prod = Sequitur::Production.new
|
34
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
+
# copy_ref = ref.dup
|
36
|
+
# puts prod.refcount # outputs 2
|
37
|
+
def initialize_copy(orig)
|
38
|
+
@production = nil
|
39
|
+
bind_to(orig.production)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Emit the text representation of a production reference.
|
43
|
+
# @return [String]
|
44
|
+
def to_s
|
45
|
+
return production.object_id.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
alias to_string to_s
|
49
|
+
|
50
|
+
|
51
|
+
# Equality testing.
|
52
|
+
# A production ref is equal to another one when its
|
53
|
+
# refers to the same production or when it is compared to
|
54
|
+
# the production it refers to.
|
55
|
+
# @param other [ProductionRef]
|
56
|
+
# @return [true / false]
|
57
|
+
def ==(other)
|
58
|
+
return true if object_id == other.object_id
|
59
|
+
|
60
|
+
result = if other.is_a?(ProductionRef)
|
61
|
+
(production == other.production)
|
62
|
+
else
|
63
|
+
(production == other)
|
64
|
+
end
|
65
|
+
|
66
|
+
return result
|
67
|
+
end
|
68
|
+
|
69
|
+
# Produce a hash value.
|
70
|
+
# A reference has no identity on its own,
|
71
|
+
# the method returns the hash value of the
|
72
|
+
# referenced production
|
73
|
+
# @return [Fixnum] the hash value
|
74
|
+
def hash
|
75
|
+
raise StandardError, 'Nil production' if production.nil?
|
76
|
+
return production.hash
|
77
|
+
end
|
78
|
+
|
79
|
+
# Make this reference point to the given production.
|
80
|
+
# @param aProduction [Production or ProductionRef] the production
|
81
|
+
# to refer to
|
82
|
+
def bind_to(aProduction)
|
83
|
+
return if aProduction == @production
|
84
|
+
|
85
|
+
production.decr_refcount if production
|
86
|
+
unless aProduction.kind_of?(Production)
|
87
|
+
raise StandardError, "Illegal production type #{aProduction.class}"
|
88
|
+
end
|
89
|
+
@production = aProduction
|
90
|
+
production.incr_refcount
|
91
|
+
end
|
92
|
+
|
93
|
+
# Clear the reference to the target production.
|
94
|
+
def unbind
|
95
|
+
production.decr_refcount
|
96
|
+
@production = nil
|
97
|
+
end
|
98
|
+
|
99
|
+
# Check that the this object doesn't refer to any production.
|
100
|
+
# @return [true / false] true when this object doesn't
|
101
|
+
# point to a production.
|
102
|
+
def unbound?
|
103
|
+
return production.nil?
|
104
|
+
end
|
105
|
+
|
106
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
107
|
+
# @param aVisitor [GrammarVisitor] the visitor
|
108
|
+
def accept(aVisitor)
|
109
|
+
aVisitor.visit_prod_ref(self)
|
110
|
+
end
|
111
|
+
end # class
|
113
112
|
end # module
|
114
113
|
|
115
114
|
# End of file
|
@@ -43,7 +43,7 @@ class SequiturGrammar < DynamicGrammar
|
|
43
43
|
# remove P from grammar
|
44
44
|
# end
|
45
45
|
# end until digram unicity and rule utility are met
|
46
|
-
def enforce_rules
|
46
|
+
def enforce_rules
|
47
47
|
loop do
|
48
48
|
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
49
49
|
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
@@ -61,7 +61,7 @@ class SequiturGrammar < DynamicGrammar
|
|
61
61
|
# Return an empty Hash if each digram appears once.
|
62
62
|
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
63
63
|
# Where Pi, Pk are two productions where the digram occurs.
|
64
|
-
def detect_collision
|
64
|
+
def detect_collision
|
65
65
|
diagnosis = CollisionDiagnosis.new(false)
|
66
66
|
found_so_far = {}
|
67
67
|
productions.each do |a_prod|
|
@@ -109,7 +109,7 @@ class SequiturGrammar < DynamicGrammar
|
|
109
109
|
end
|
110
110
|
|
111
111
|
# Return a production that is used less than twice in the grammar.
|
112
|
-
def detect_useless_production
|
112
|
+
def detect_useless_production
|
113
113
|
useless = productions.index { |prod| prod.refcount < 2 }
|
114
114
|
useless = nil if useless && useless.zero?
|
115
115
|
|
@@ -6,7 +6,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
6
6
|
attr_reader(:symbols)
|
7
7
|
|
8
8
|
# Create an empty sequence
|
9
|
-
def initialize
|
9
|
+
def initialize
|
10
10
|
@symbols = []
|
11
11
|
end
|
12
12
|
|
@@ -21,7 +21,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
21
21
|
end
|
22
22
|
|
23
23
|
# Clear the symbol sequence.
|
24
|
-
def clear
|
24
|
+
def clear
|
25
25
|
refs = references
|
26
26
|
refs.each(&:unbind)
|
27
27
|
@symbols = []
|
@@ -30,13 +30,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
30
30
|
|
31
31
|
# Tell whether the sequence is empty.
|
32
32
|
# @return [true / false] true only if the sequence has no symbol in it.
|
33
|
-
def empty?
|
33
|
+
def empty?
|
34
34
|
return symbols.empty?
|
35
35
|
end
|
36
36
|
|
37
37
|
# Count the number of elements in the sequence.
|
38
38
|
# @return [Fixnum] the number of elements
|
39
|
-
def size
|
39
|
+
def size
|
40
40
|
return symbols.size
|
41
41
|
end
|
42
42
|
|
@@ -76,15 +76,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
76
76
|
return same
|
77
77
|
end
|
78
78
|
|
79
|
-
|
80
79
|
# Select the references to production appearing in the rhs.
|
81
80
|
# @return [Array of ProductionRef]
|
82
|
-
def references
|
81
|
+
def references
|
83
82
|
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
84
83
|
return @memo_references
|
85
84
|
end
|
86
85
|
|
87
|
-
|
88
86
|
# Select the references of the given production appearing in the rhs.
|
89
87
|
# @param aProduction [Production]
|
90
88
|
# @return [Array of ProductionRef]
|
@@ -94,11 +92,10 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
94
92
|
return result
|
95
93
|
end
|
96
94
|
|
97
|
-
|
98
95
|
# Emit a text representation of the symbol sequence.
|
99
96
|
# Text is of the form: space-separated sequence of symbols.
|
100
97
|
# @return [String]
|
101
|
-
def to_string
|
98
|
+
def to_string
|
102
99
|
rhs_text = symbols.map do |elem|
|
103
100
|
case elem
|
104
101
|
when String then "'#{elem}'"
|
@@ -150,7 +147,6 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
150
147
|
symbols.delete_at(position)
|
151
148
|
end
|
152
149
|
|
153
|
-
|
154
150
|
# Part of the 'visitee' role in Visitor design pattern.
|
155
151
|
# @param aVisitor[GrammarVisitor]
|
156
152
|
def accept(aVisitor)
|
@@ -170,7 +166,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
170
166
|
|
171
167
|
private
|
172
168
|
|
173
|
-
def invalidate_refs
|
169
|
+
def invalidate_refs
|
174
170
|
@memo_references = nil
|
175
171
|
@lookup_references = nil
|
176
172
|
end
|
@@ -5,28 +5,28 @@ require_relative '../../lib/sequitur/digram'
|
|
5
5
|
|
6
6
|
module Sequitur # Re-open the module to get rid of qualified names
|
7
7
|
describe Digram do
|
8
|
-
let(:two_symbols) { [
|
8
|
+
let(:two_symbols) { %i[b c] }
|
9
9
|
let(:production) { double('sample-production') }
|
10
10
|
|
11
11
|
context 'Standard creation & initialization:' do
|
12
12
|
it 'should be created with 3 arguments' do
|
13
13
|
instance = Digram.new(:b, :c, production)
|
14
|
-
|
14
|
+
|
15
15
|
expect(instance.symbols).to eq(two_symbols)
|
16
16
|
expect(instance.production).to eq(production)
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
it 'should return the production that it refers to' do
|
20
20
|
instance = Digram.new(:b, :c, production)
|
21
21
|
expect(instance.production).to eq(production)
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
it 'should whether its symbols are the same' do
|
25
25
|
instance1 = Digram.new(:a, :a, production)
|
26
26
|
expect(instance1).to be_repeating
|
27
|
-
|
27
|
+
|
28
28
|
instance1 = Digram.new(:a, :b, production)
|
29
|
-
expect(instance1).not_to be_repeating
|
29
|
+
expect(instance1).not_to be_repeating
|
30
30
|
end
|
31
31
|
end # context
|
32
32
|
|
@@ -35,11 +35,11 @@ describe Digram do
|
|
35
35
|
instance1 = Digram.new(:a, :b, production)
|
36
36
|
same = Digram.new(:a, :b, production)
|
37
37
|
different = Digram.new(:b, :c, production)
|
38
|
-
|
38
|
+
|
39
39
|
expect(instance1).to eq(instance1)
|
40
40
|
expect(instance1).to eq(same)
|
41
41
|
expect(instance1).not_to eq(different)
|
42
|
-
expect(same).not_to eq(different)
|
42
|
+
expect(same).not_to eq(different)
|
43
43
|
end
|
44
44
|
end # context
|
45
45
|
end # describe
|