sequitur 0.1.18 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +163 -49
- data/.travis.yml +13 -10
- data/CHANGELOG.md +9 -0
- data/Gemfile +2 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +13 -10
- data/examples/integer_sample.rb +5 -6
- data/examples/porridge.rb +4 -6
- data/examples/simple_case.rb +5 -6
- data/examples/symbol_sample.rb +5 -8
- data/examples/word_sample.rb +1 -2
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +38 -38
- data/lib/sequitur/dynamic_grammar.rb +91 -95
- data/lib/sequitur/formatter/base_text.rb +1 -1
- data/lib/sequitur/formatter/debug.rb +2 -2
- data/lib/sequitur/grammar_visitor.rb +96 -98
- data/lib/sequitur/production.rb +10 -19
- data/lib/sequitur/production_ref.rb +104 -105
- data/lib/sequitur/sequitur_grammar.rb +3 -3
- data/lib/sequitur/symbol_sequence.rb +7 -11
- data/spec/sequitur/digram_spec.rb +8 -8
- data/spec/sequitur/production_spec.rb +7 -7
- data/spec/sequitur/sequitur_grammar_spec.rb +10 -10
- data/spec/sequitur/symbol_sequence_spec.rb +4 -4
- data/spec/spec_helper.rb +6 -4
- metadata +44 -29
data/lib/sequitur/production.rb
CHANGED
@@ -23,7 +23,7 @@ class Production
|
|
23
23
|
|
24
24
|
# Constructor.
|
25
25
|
# Build a production with an empty RHS.
|
26
|
-
def initialize
|
26
|
+
def initialize
|
27
27
|
@rhs = SymbolSequence.new
|
28
28
|
@refcount = 0
|
29
29
|
@digrams = []
|
@@ -44,7 +44,6 @@ class Production
|
|
44
44
|
return result
|
45
45
|
end
|
46
46
|
|
47
|
-
|
48
47
|
# Is the rhs empty?
|
49
48
|
# @ return true if the rhs has no members.
|
50
49
|
def empty?
|
@@ -52,20 +51,19 @@ class Production
|
|
52
51
|
end
|
53
52
|
|
54
53
|
# Increment the reference count by one.
|
55
|
-
def incr_refcount
|
54
|
+
def incr_refcount
|
56
55
|
@refcount += 1
|
57
56
|
end
|
58
57
|
|
59
58
|
# Decrement the reference count by one.
|
60
|
-
def decr_refcount
|
59
|
+
def decr_refcount
|
61
60
|
raise StandardError, 'Internal error' if @refcount.zero?
|
62
61
|
@refcount -= 1
|
63
62
|
end
|
64
63
|
|
65
|
-
|
66
64
|
# Select the references to production appearing in the rhs.
|
67
65
|
# @return [Array of ProductionRef]
|
68
|
-
def references
|
66
|
+
def references
|
69
67
|
return rhs.references
|
70
68
|
end
|
71
69
|
|
@@ -77,10 +75,9 @@ class Production
|
|
77
75
|
return rhs.references_of(real_prod)
|
78
76
|
end
|
79
77
|
|
80
|
-
|
81
78
|
# Enumerate the digrams appearing in the right-hand side (rhs)
|
82
79
|
# @return [Array] the list of digrams found in rhs of this production.
|
83
|
-
def recalc_digrams
|
80
|
+
def recalc_digrams
|
84
81
|
return [] if rhs.size < 2
|
85
82
|
|
86
83
|
result = []
|
@@ -88,20 +85,17 @@ class Production
|
|
88
85
|
@digrams = result
|
89
86
|
end
|
90
87
|
|
91
|
-
|
92
|
-
|
93
88
|
# Does the rhs have exactly one digram only (= 2 symbols)?
|
94
89
|
# @return [true/false] true when the rhs contains exactly two symbols.
|
95
90
|
def single_digram?
|
96
91
|
return rhs.size == 2
|
97
92
|
end
|
98
93
|
|
99
|
-
|
100
94
|
# Detect whether the last digram occurs twice
|
101
95
|
# Assumption: when a digram occurs twice in a production then it must occur
|
102
96
|
# at the end of the rhs
|
103
97
|
# @return [true/false] true when the digram occurs twice in rhs.
|
104
|
-
def repeated_digram?
|
98
|
+
def repeated_digram?
|
105
99
|
return false if rhs.size < 3
|
106
100
|
|
107
101
|
my_digrams = digrams
|
@@ -113,17 +107,16 @@ class Production
|
|
113
107
|
|
114
108
|
# Retrieve the last digram appearing in the RHS (if any).
|
115
109
|
# @return [Digram] last digram in the rhs otherwise nil.
|
116
|
-
def last_digram
|
110
|
+
def last_digram
|
117
111
|
result = digrams.empty? ? nil : digrams.last
|
118
112
|
return result
|
119
113
|
end
|
120
114
|
|
121
|
-
|
122
115
|
# Emit a text representation of the production rule.
|
123
116
|
# Text is of the form:
|
124
117
|
# object id of production : rhs as space-separated sequence of symbols.
|
125
118
|
# @return [String]
|
126
|
-
def to_string
|
119
|
+
def to_string
|
127
120
|
return "#{object_id} : #{rhs.to_string}."
|
128
121
|
end
|
129
122
|
|
@@ -150,7 +143,7 @@ class Production
|
|
150
143
|
|
151
144
|
# Clear the right-hand side.
|
152
145
|
# Any referenced production has its reference counter decremented.
|
153
|
-
def clear_rhs
|
146
|
+
def clear_rhs
|
154
147
|
rhs.clear
|
155
148
|
end
|
156
149
|
|
@@ -168,7 +161,7 @@ class Production
|
|
168
161
|
# p.positions_of(a, a) # => [0, 3]
|
169
162
|
def positions_of(symb1, symb2)
|
170
163
|
# Find the positions where the digram occur in rhs
|
171
|
-
indices = [
|
164
|
+
indices = [-2] # Dummy index!
|
172
165
|
(0...rhs.size).each do |i|
|
173
166
|
next if i == indices.last + 1
|
174
167
|
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
@@ -179,7 +172,6 @@ class Production
|
|
179
172
|
return indices
|
180
173
|
end
|
181
174
|
|
182
|
-
|
183
175
|
# Given that the production P passed as argument has exactly 2 symbols
|
184
176
|
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
185
177
|
# s1 s2 by a reference to P.
|
@@ -217,7 +209,6 @@ class Production
|
|
217
209
|
recalc_digrams
|
218
210
|
end
|
219
211
|
|
220
|
-
|
221
212
|
# Part of the 'visitee' role in Visitor design pattern.
|
222
213
|
# @param aVisitor[GrammarVisitor]
|
223
214
|
def accept(aVisitor)
|
@@ -1,115 +1,114 @@
|
|
1
1
|
|
2
2
|
|
3
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
-
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
-
# side of a production P1 and that refers to a production P2.
|
6
|
-
# Every time a production P2 appears in the left-hand side of
|
7
|
-
# production P1, this is implemented by inserting a production reference to P2
|
8
|
-
# in the appropriate position in the RHS of P1.
|
9
|
-
# In the literature, production references are also called non terminal
|
10
|
-
# symbols
|
11
|
-
# @example
|
12
|
-
# # Given a production rule...
|
13
|
-
# prod = Sequitur::Production.new
|
14
|
-
# puts prod.refcount # outputs 0
|
15
|
-
# # ... Build a reference to it
|
16
|
-
# ref = Sequitur::ProductionRef.new(prod)
|
17
|
-
# # ... Production reference count is updated...
|
18
|
-
# puts prod.refcount # outputs 1
|
19
|
-
class ProductionRef
|
20
|
-
# Link to the production to reference.
|
21
|
-
attr_reader(:production)
|
22
|
-
|
23
|
-
# Constructor
|
24
|
-
# @param target [Production or ProductionRef]
|
25
|
-
# The production that is being referenced.
|
26
|
-
def initialize(target)
|
27
|
-
bind_to(target)
|
28
|
-
end
|
29
|
-
|
30
|
-
# Copy constructor invoked by dup or clone methods.
|
31
|
-
# @param orig [ProductionRef]
|
4
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
+
# side of a production P1 and that refers to a production P2.
|
6
|
+
# Every time a production P2 appears in the left-hand side of
|
7
|
+
# production P1, this is implemented by inserting a production reference to P2
|
8
|
+
# in the appropriate position in the RHS of P1.
|
9
|
+
# In the literature, production references are also called non terminal
|
10
|
+
# symbols
|
32
11
|
# @example
|
12
|
+
# # Given a production rule...
|
33
13
|
# prod = Sequitur::Production.new
|
14
|
+
# puts prod.refcount # outputs 0
|
15
|
+
# # ... Build a reference to it
|
34
16
|
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
-
#
|
36
|
-
# puts prod.refcount # outputs
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
47
|
-
|
48
|
-
alias to_string to_s
|
49
|
-
|
50
|
-
|
51
|
-
# Equality testing.
|
52
|
-
# A production ref is equal to another one when its
|
53
|
-
# refers to the same production or when it is compared to
|
54
|
-
# the production it refers to.
|
55
|
-
# @param other [ProductionRef]
|
56
|
-
# @return [true / false]
|
57
|
-
def ==(other)
|
58
|
-
return true if object_id == other.object_id
|
59
|
-
|
60
|
-
result = if other.is_a?(ProductionRef)
|
61
|
-
(production == other.production)
|
62
|
-
else
|
63
|
-
(production == other)
|
64
|
-
end
|
65
|
-
|
66
|
-
return result
|
67
|
-
end
|
68
|
-
|
69
|
-
# Produce a hash value.
|
70
|
-
# A reference has no identity on its own,
|
71
|
-
# the method returns the hash value of the
|
72
|
-
# referenced production
|
73
|
-
# @return [Fixnum] the hash value
|
74
|
-
def hash()
|
75
|
-
raise StandardError, 'Nil production' if production.nil?
|
76
|
-
return production.hash
|
77
|
-
end
|
78
|
-
|
79
|
-
# Make this reference point to the given production.
|
80
|
-
# @param aProduction [Production or ProductionRef] the production
|
81
|
-
# to refer to
|
82
|
-
def bind_to(aProduction)
|
83
|
-
return if aProduction == @production
|
84
|
-
|
85
|
-
production.decr_refcount if production
|
86
|
-
unless aProduction.kind_of?(Production)
|
87
|
-
raise StandardError, "Illegal production type #{aProduction.class}"
|
17
|
+
# # ... Production reference count is updated...
|
18
|
+
# puts prod.refcount # outputs 1
|
19
|
+
class ProductionRef
|
20
|
+
# Link to the production to reference.
|
21
|
+
attr_reader(:production)
|
22
|
+
|
23
|
+
# Constructor
|
24
|
+
# @param target [Production or ProductionRef]
|
25
|
+
# The production that is being referenced.
|
26
|
+
def initialize(target)
|
27
|
+
bind_to(target)
|
88
28
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
29
|
+
|
30
|
+
# Copy constructor invoked by dup or clone methods.
|
31
|
+
# @param orig [ProductionRef]
|
32
|
+
# @example
|
33
|
+
# prod = Sequitur::Production.new
|
34
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
+
# copy_ref = ref.dup
|
36
|
+
# puts prod.refcount # outputs 2
|
37
|
+
def initialize_copy(orig)
|
38
|
+
@production = nil
|
39
|
+
bind_to(orig.production)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Emit the text representation of a production reference.
|
43
|
+
# @return [String]
|
44
|
+
def to_s
|
45
|
+
return production.object_id.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
alias to_string to_s
|
49
|
+
|
50
|
+
|
51
|
+
# Equality testing.
|
52
|
+
# A production ref is equal to another one when its
|
53
|
+
# refers to the same production or when it is compared to
|
54
|
+
# the production it refers to.
|
55
|
+
# @param other [ProductionRef]
|
56
|
+
# @return [true / false]
|
57
|
+
def ==(other)
|
58
|
+
return true if object_id == other.object_id
|
59
|
+
|
60
|
+
result = if other.is_a?(ProductionRef)
|
61
|
+
(production == other.production)
|
62
|
+
else
|
63
|
+
(production == other)
|
64
|
+
end
|
65
|
+
|
66
|
+
return result
|
67
|
+
end
|
68
|
+
|
69
|
+
# Produce a hash value.
|
70
|
+
# A reference has no identity on its own,
|
71
|
+
# the method returns the hash value of the
|
72
|
+
# referenced production
|
73
|
+
# @return [Fixnum] the hash value
|
74
|
+
def hash
|
75
|
+
raise StandardError, 'Nil production' if production.nil?
|
76
|
+
return production.hash
|
77
|
+
end
|
78
|
+
|
79
|
+
# Make this reference point to the given production.
|
80
|
+
# @param aProduction [Production or ProductionRef] the production
|
81
|
+
# to refer to
|
82
|
+
def bind_to(aProduction)
|
83
|
+
return if aProduction == @production
|
84
|
+
|
85
|
+
production.decr_refcount if production
|
86
|
+
unless aProduction.kind_of?(Production)
|
87
|
+
raise StandardError, "Illegal production type #{aProduction.class}"
|
88
|
+
end
|
89
|
+
@production = aProduction
|
90
|
+
production.incr_refcount
|
91
|
+
end
|
92
|
+
|
93
|
+
# Clear the reference to the target production.
|
94
|
+
def unbind
|
95
|
+
production.decr_refcount
|
96
|
+
@production = nil
|
97
|
+
end
|
98
|
+
|
99
|
+
# Check that the this object doesn't refer to any production.
|
100
|
+
# @return [true / false] true when this object doesn't
|
101
|
+
# point to a production.
|
102
|
+
def unbound?
|
103
|
+
return production.nil?
|
104
|
+
end
|
105
|
+
|
106
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
107
|
+
# @param aVisitor [GrammarVisitor] the visitor
|
108
|
+
def accept(aVisitor)
|
109
|
+
aVisitor.visit_prod_ref(self)
|
110
|
+
end
|
111
|
+
end # class
|
113
112
|
end # module
|
114
113
|
|
115
114
|
# End of file
|
@@ -43,7 +43,7 @@ class SequiturGrammar < DynamicGrammar
|
|
43
43
|
# remove P from grammar
|
44
44
|
# end
|
45
45
|
# end until digram unicity and rule utility are met
|
46
|
-
def enforce_rules
|
46
|
+
def enforce_rules
|
47
47
|
loop do
|
48
48
|
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
49
49
|
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
@@ -61,7 +61,7 @@ class SequiturGrammar < DynamicGrammar
|
|
61
61
|
# Return an empty Hash if each digram appears once.
|
62
62
|
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
63
63
|
# Where Pi, Pk are two productions where the digram occurs.
|
64
|
-
def detect_collision
|
64
|
+
def detect_collision
|
65
65
|
diagnosis = CollisionDiagnosis.new(false)
|
66
66
|
found_so_far = {}
|
67
67
|
productions.each do |a_prod|
|
@@ -109,7 +109,7 @@ class SequiturGrammar < DynamicGrammar
|
|
109
109
|
end
|
110
110
|
|
111
111
|
# Return a production that is used less than twice in the grammar.
|
112
|
-
def detect_useless_production
|
112
|
+
def detect_useless_production
|
113
113
|
useless = productions.index { |prod| prod.refcount < 2 }
|
114
114
|
useless = nil if useless && useless.zero?
|
115
115
|
|
@@ -6,7 +6,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
6
6
|
attr_reader(:symbols)
|
7
7
|
|
8
8
|
# Create an empty sequence
|
9
|
-
def initialize
|
9
|
+
def initialize
|
10
10
|
@symbols = []
|
11
11
|
end
|
12
12
|
|
@@ -21,7 +21,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
21
21
|
end
|
22
22
|
|
23
23
|
# Clear the symbol sequence.
|
24
|
-
def clear
|
24
|
+
def clear
|
25
25
|
refs = references
|
26
26
|
refs.each(&:unbind)
|
27
27
|
@symbols = []
|
@@ -30,13 +30,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
30
30
|
|
31
31
|
# Tell whether the sequence is empty.
|
32
32
|
# @return [true / false] true only if the sequence has no symbol in it.
|
33
|
-
def empty?
|
33
|
+
def empty?
|
34
34
|
return symbols.empty?
|
35
35
|
end
|
36
36
|
|
37
37
|
# Count the number of elements in the sequence.
|
38
38
|
# @return [Fixnum] the number of elements
|
39
|
-
def size
|
39
|
+
def size
|
40
40
|
return symbols.size
|
41
41
|
end
|
42
42
|
|
@@ -76,15 +76,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
76
76
|
return same
|
77
77
|
end
|
78
78
|
|
79
|
-
|
80
79
|
# Select the references to production appearing in the rhs.
|
81
80
|
# @return [Array of ProductionRef]
|
82
|
-
def references
|
81
|
+
def references
|
83
82
|
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
84
83
|
return @memo_references
|
85
84
|
end
|
86
85
|
|
87
|
-
|
88
86
|
# Select the references of the given production appearing in the rhs.
|
89
87
|
# @param aProduction [Production]
|
90
88
|
# @return [Array of ProductionRef]
|
@@ -94,11 +92,10 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
94
92
|
return result
|
95
93
|
end
|
96
94
|
|
97
|
-
|
98
95
|
# Emit a text representation of the symbol sequence.
|
99
96
|
# Text is of the form: space-separated sequence of symbols.
|
100
97
|
# @return [String]
|
101
|
-
def to_string
|
98
|
+
def to_string
|
102
99
|
rhs_text = symbols.map do |elem|
|
103
100
|
case elem
|
104
101
|
when String then "'#{elem}'"
|
@@ -150,7 +147,6 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
150
147
|
symbols.delete_at(position)
|
151
148
|
end
|
152
149
|
|
153
|
-
|
154
150
|
# Part of the 'visitee' role in Visitor design pattern.
|
155
151
|
# @param aVisitor[GrammarVisitor]
|
156
152
|
def accept(aVisitor)
|
@@ -170,7 +166,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
170
166
|
|
171
167
|
private
|
172
168
|
|
173
|
-
def invalidate_refs
|
169
|
+
def invalidate_refs
|
174
170
|
@memo_references = nil
|
175
171
|
@lookup_references = nil
|
176
172
|
end
|
@@ -5,28 +5,28 @@ require_relative '../../lib/sequitur/digram'
|
|
5
5
|
|
6
6
|
module Sequitur # Re-open the module to get rid of qualified names
|
7
7
|
describe Digram do
|
8
|
-
let(:two_symbols) { [
|
8
|
+
let(:two_symbols) { %i[b c] }
|
9
9
|
let(:production) { double('sample-production') }
|
10
10
|
|
11
11
|
context 'Standard creation & initialization:' do
|
12
12
|
it 'should be created with 3 arguments' do
|
13
13
|
instance = Digram.new(:b, :c, production)
|
14
|
-
|
14
|
+
|
15
15
|
expect(instance.symbols).to eq(two_symbols)
|
16
16
|
expect(instance.production).to eq(production)
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
it 'should return the production that it refers to' do
|
20
20
|
instance = Digram.new(:b, :c, production)
|
21
21
|
expect(instance.production).to eq(production)
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
it 'should whether its symbols are the same' do
|
25
25
|
instance1 = Digram.new(:a, :a, production)
|
26
26
|
expect(instance1).to be_repeating
|
27
|
-
|
27
|
+
|
28
28
|
instance1 = Digram.new(:a, :b, production)
|
29
|
-
expect(instance1).not_to be_repeating
|
29
|
+
expect(instance1).not_to be_repeating
|
30
30
|
end
|
31
31
|
end # context
|
32
32
|
|
@@ -35,11 +35,11 @@ describe Digram do
|
|
35
35
|
instance1 = Digram.new(:a, :b, production)
|
36
36
|
same = Digram.new(:a, :b, production)
|
37
37
|
different = Digram.new(:b, :c, production)
|
38
|
-
|
38
|
+
|
39
39
|
expect(instance1).to eq(instance1)
|
40
40
|
expect(instance1).to eq(same)
|
41
41
|
expect(instance1).not_to eq(different)
|
42
|
-
expect(same).not_to eq(different)
|
42
|
+
expect(same).not_to eq(different)
|
43
43
|
end
|
44
44
|
end # context
|
45
45
|
end # describe
|