sequitur 0.1.23 → 0.1.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -437
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +1 -1
- data/Rakefile +0 -2
- data/examples/integer_sample.rb +0 -1
- data/examples/porridge.rb +9 -9
- data/examples/word_sample.rb +4 -5
- data/lib/sequitur/constants.rb +4 -1
- data/lib/sequitur/digram.rb +2 -2
- data/lib/sequitur/dynamic_grammar.rb +3 -4
- data/lib/sequitur/formatter/base_formatter.rb +1 -1
- data/lib/sequitur/formatter/base_text.rb +3 -7
- data/lib/sequitur/formatter/debug.rb +0 -1
- data/lib/sequitur/grammar_visitor.rb +1 -1
- data/lib/sequitur/production.rb +200 -205
- data/lib/sequitur/production_ref.rb +9 -12
- data/lib/sequitur/sequitur_grammar.rb +135 -137
- data/lib/sequitur/symbol_sequence.rb +24 -27
- data/lib/sequitur.rb +4 -5
- data/spec/sequitur/digram_spec.rb +13 -12
- data/spec/sequitur/dynamic_grammar_spec.rb +5 -11
- data/spec/sequitur/formatter/base_text_spec.rb +70 -72
- data/spec/sequitur/formatter/debug_spec.rb +90 -92
- data/spec/sequitur/grammar_visitor_spec.rb +70 -71
- data/spec/sequitur/production_ref_spec.rb +92 -92
- data/spec/sequitur/production_spec.rb +30 -34
- data/spec/sequitur/sequitur_grammar_spec.rb +47 -46
- data/spec/sequitur/symbol_sequence_spec.rb +102 -105
- data/spec/spec_helper.rb +0 -1
- metadata +4 -5
- data/.travis.yml +0 -29
data/lib/sequitur/production.rb
CHANGED
@@ -5,128 +5,125 @@ require_relative 'symbol_sequence'
|
|
5
5
|
require_relative 'production_ref'
|
6
6
|
|
7
7
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
8
|
-
# In a context-free grammar, a production is a rule in which
|
9
|
-
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
10
|
-
# and the right-hand side (RHS) consists of a sequence of symbols.
|
11
|
-
# The symbols in RHS can be either terminal or non-terminal symbols.
|
12
|
-
# The rule stipulates that the LHS is equivalent to the RHS,
|
13
|
-
# in other words every occurrence of the LHS can be substituted to
|
14
|
-
# corresponding RHS.
|
15
|
-
# Implementation note: the object id of the production is taken as its LHS.
|
16
|
-
class Production
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
# @param aSymbol [Object] A (grammar) symbol to add.
|
128
|
-
def append_symbol(aSymbol)
|
129
|
-
case aSymbol
|
8
|
+
# In a context-free grammar, a production is a rule in which
|
9
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
10
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
11
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
12
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
13
|
+
# in other words every occurrence of the LHS can be substituted to
|
14
|
+
# corresponding RHS.
|
15
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
16
|
+
class Production
|
17
|
+
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
18
|
+
attr_reader(:rhs)
|
19
|
+
|
20
|
+
# The reference count (= how times other productions reference this one)
|
21
|
+
attr_reader(:refcount)
|
22
|
+
|
23
|
+
# The sequence of digrams appearing in the RHS
|
24
|
+
attr_reader(:digrams)
|
25
|
+
|
26
|
+
# Constructor.
|
27
|
+
# Build a production with an empty RHS.
|
28
|
+
def initialize
|
29
|
+
@rhs = SymbolSequence.new
|
30
|
+
@refcount = 0
|
31
|
+
@digrams = []
|
32
|
+
end
|
33
|
+
|
34
|
+
# Identity testing.
|
35
|
+
# @param other [] another production or production reference.
|
36
|
+
# @return true when the receiver and other are the same.
|
37
|
+
def ==(other)
|
38
|
+
return true if object_id == other.object_id
|
39
|
+
|
40
|
+
if other.is_a?(ProductionRef)
|
41
|
+
(other == self)
|
42
|
+
else
|
43
|
+
false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Is the rhs empty?
|
48
|
+
# @ return true if the rhs has no members.
|
49
|
+
def empty?
|
50
|
+
rhs.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Increment the reference count by one.
|
54
|
+
def incr_refcount
|
55
|
+
@refcount += 1
|
56
|
+
end
|
57
|
+
|
58
|
+
# Decrement the reference count by one.
|
59
|
+
def decr_refcount
|
60
|
+
raise StandardError, 'Internal error' if @refcount.zero?
|
61
|
+
|
62
|
+
@refcount -= 1
|
63
|
+
end
|
64
|
+
|
65
|
+
# Select the references to production appearing in the rhs.
|
66
|
+
# @return [Array of ProductionRef]
|
67
|
+
def references
|
68
|
+
rhs.references
|
69
|
+
end
|
70
|
+
|
71
|
+
# Look in the rhs all the references to a production passed a argument.
|
72
|
+
# aProduction [aProduction or ProductionRef] The production to search for.
|
73
|
+
# @return [Array] the array of ProductionRef to the passed production
|
74
|
+
def references_of(a_prod)
|
75
|
+
real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
|
76
|
+
rhs.references_of(real_prod)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Enumerate the digrams appearing in the right-hand side (rhs)
|
80
|
+
# @return [Array] the list of digrams found in rhs of this production.
|
81
|
+
def recalc_digrams
|
82
|
+
return [] if rhs.size < 2
|
83
|
+
|
84
|
+
result = []
|
85
|
+
rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
|
86
|
+
@digrams = result
|
87
|
+
end
|
88
|
+
|
89
|
+
# Does the rhs have exactly one digram only (= 2 symbols)?
|
90
|
+
# @return [true/false] true when the rhs contains exactly two symbols.
|
91
|
+
def single_digram?
|
92
|
+
rhs.size == 2
|
93
|
+
end
|
94
|
+
|
95
|
+
# Detect whether the last digram occurs twice
|
96
|
+
# Assumption: when a digram occurs twice in a production then it must occur
|
97
|
+
# at the end of the rhs
|
98
|
+
# @return [true/false] true when the digram occurs twice in rhs.
|
99
|
+
def repeated_digram?
|
100
|
+
return false if rhs.size < 3
|
101
|
+
|
102
|
+
my_digrams = digrams
|
103
|
+
all_keys = my_digrams.map(&:key)
|
104
|
+
last_key = all_keys.pop
|
105
|
+
same_key_found = all_keys.index(last_key)
|
106
|
+
!same_key_found.nil?
|
107
|
+
end
|
108
|
+
|
109
|
+
# Retrieve the last digram appearing in the RHS (if any).
|
110
|
+
# @return [Digram] last digram in the rhs otherwise nil.
|
111
|
+
def last_digram
|
112
|
+
digrams.empty? ? nil : digrams.last
|
113
|
+
end
|
114
|
+
|
115
|
+
# Emit a text representation of the production rule.
|
116
|
+
# Text is of the form:
|
117
|
+
# object id of production : rhs as space-separated sequence of symbols.
|
118
|
+
# @return [String]
|
119
|
+
def to_string
|
120
|
+
"#{object_id} : #{rhs.to_string}."
|
121
|
+
end
|
122
|
+
|
123
|
+
# Add a (grammar) symbol at the end of the RHS.
|
124
|
+
# @param aSymbol [Object] A (grammar) symbol to add.
|
125
|
+
def append_symbol(aSymbol)
|
126
|
+
case aSymbol
|
130
127
|
when Production
|
131
128
|
new_symb = ProductionRef.new(aSymbol)
|
132
129
|
when ProductionRef
|
@@ -138,91 +135,89 @@ class Production
|
|
138
135
|
new_symb = aSymbol.dup
|
139
136
|
else
|
140
137
|
new_symb = aSymbol
|
138
|
+
end
|
139
|
+
|
140
|
+
rhs << new_symb
|
141
|
+
digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
|
142
|
+
end
|
143
|
+
|
144
|
+
# Clear the right-hand side.
|
145
|
+
# Any referenced production has its reference counter decremented.
|
146
|
+
def clear_rhs
|
147
|
+
rhs.clear
|
148
|
+
end
|
149
|
+
|
150
|
+
# Find all the positions where the digram occurs in the rhs
|
151
|
+
# @param symb1 [Object] first symbol of the digram
|
152
|
+
# @param symb2 [Object] second symbol of the digram
|
153
|
+
# @return [Array] the list of indices where the digram occurs in rhs.
|
154
|
+
# @example
|
155
|
+
# # Given the production p : a b c a b a b d
|
156
|
+
# #Then ...
|
157
|
+
# p.positions_of(a, b) # => [0, 3, 5]
|
158
|
+
# # Caution: "overlapping" digrams shouldn't be counted
|
159
|
+
# # Given the production p : a a b a a a c d
|
160
|
+
# # Then ...
|
161
|
+
# p.positions_of(a, a) # => [0, 3]
|
162
|
+
def positions_of(symb1, symb2)
|
163
|
+
# Find the positions where the digram occur in rhs
|
164
|
+
indices = [-2] # Dummy index!
|
165
|
+
(0...rhs.size).each do |i|
|
166
|
+
next if i == indices.last + 1
|
167
|
+
|
168
|
+
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
169
|
+
end
|
170
|
+
|
171
|
+
indices.shift
|
172
|
+
|
173
|
+
indices
|
174
|
+
end
|
175
|
+
|
176
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
177
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
178
|
+
# s1 s2 by a reference to P.
|
179
|
+
# @param another [Production or ProductionRef] a production that
|
180
|
+
# consists exactly of one digram (= 2 symbols).
|
181
|
+
def reduce_step(another)
|
182
|
+
(symb1, symb2) = another.rhs.symbols
|
183
|
+
pos = positions_of(symb1, symb2).reverse
|
184
|
+
|
185
|
+
# Replace the two symbol sequence by the production
|
186
|
+
pos.each { |index| rhs.reduce_step(index, another) }
|
187
|
+
|
188
|
+
recalc_digrams
|
141
189
|
end
|
142
190
|
|
143
|
-
rhs
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
#
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
indices.shift
|
175
|
-
|
176
|
-
return indices
|
177
|
-
end
|
178
|
-
|
179
|
-
# Given that the production P passed as argument has exactly 2 symbols
|
180
|
-
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
181
|
-
# s1 s2 by a reference to P.
|
182
|
-
# @param another [Production or ProductionRef] a production that
|
183
|
-
# consists exactly of one digram (= 2 symbols).
|
184
|
-
def reduce_step(another)
|
185
|
-
(symb1, symb2) = another.rhs.symbols
|
186
|
-
pos = positions_of(symb1, symb2).reverse
|
187
|
-
|
188
|
-
# Replace the two symbol sequence by the production
|
189
|
-
pos.each { |index| rhs.reduce_step(index, another) }
|
190
|
-
|
191
|
-
recalc_digrams
|
192
|
-
end
|
193
|
-
|
194
|
-
# Replace every occurrence of 'another' production in self.rhs by
|
195
|
-
# the symbols in the rhs of 'another'.
|
196
|
-
# @param another [Production or ProductionRef] a production that
|
197
|
-
# consists exactly of one digram (= 2 symbols).
|
198
|
-
# @example Synopsis
|
199
|
-
# # Given the production p_A : a p_B b p_B c
|
200
|
-
# # And the production p_B : x y
|
201
|
-
# # Then...
|
202
|
-
# p_A.derive_step(p_B)
|
203
|
-
# #Modifies p_A as into: p_A -> a x y b x y c
|
204
|
-
def derive_step(another)
|
205
|
-
(0...rhs.size).to_a.reverse_each do |index|
|
206
|
-
next unless rhs[index] == another
|
207
|
-
|
208
|
-
rhs.insert_at(index + 1, another.rhs)
|
209
|
-
another.decr_refcount
|
210
|
-
rhs.delete_at(index)
|
211
|
-
end
|
212
|
-
|
213
|
-
recalc_digrams
|
214
|
-
end
|
215
|
-
|
216
|
-
# Part of the 'visitee' role in Visitor design pattern.
|
217
|
-
# @param aVisitor[GrammarVisitor]
|
218
|
-
def accept(aVisitor)
|
219
|
-
aVisitor.start_visit_production(self)
|
220
|
-
|
221
|
-
rhs.accept(aVisitor)
|
222
|
-
|
223
|
-
aVisitor.end_visit_production(self)
|
224
|
-
end
|
225
|
-
end # class
|
191
|
+
# Replace every occurrence of 'another' production in self.rhs by
|
192
|
+
# the symbols in the rhs of 'another'.
|
193
|
+
# @param another [Production or ProductionRef] a production that
|
194
|
+
# consists exactly of one digram (= 2 symbols).
|
195
|
+
# @example Synopsis
|
196
|
+
# # Given the production p_A : a p_B b p_B c
|
197
|
+
# # And the production p_B : x y
|
198
|
+
# # Then...
|
199
|
+
# p_A.derive_step(p_B)
|
200
|
+
# #Modifies p_A as into: p_A -> a x y b x y c
|
201
|
+
def derive_step(another)
|
202
|
+
(0...rhs.size).to_a.reverse_each do |index|
|
203
|
+
next unless rhs[index] == another
|
204
|
+
|
205
|
+
rhs.insert_at(index + 1, another.rhs)
|
206
|
+
another.decr_refcount
|
207
|
+
rhs.delete_at(index)
|
208
|
+
end
|
209
|
+
|
210
|
+
recalc_digrams
|
211
|
+
end
|
212
|
+
|
213
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
214
|
+
# @param aVisitor[GrammarVisitor]
|
215
|
+
def accept(aVisitor)
|
216
|
+
aVisitor.start_visit_production(self)
|
217
|
+
rhs.accept(aVisitor)
|
218
|
+
aVisitor.end_visit_production(self)
|
219
|
+
end
|
220
|
+
end # class
|
226
221
|
end # module
|
227
222
|
|
228
223
|
# End of file
|
@@ -42,12 +42,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
42
42
|
# Emit the text representation of a production reference.
|
43
43
|
# @return [String]
|
44
44
|
def to_s
|
45
|
-
|
45
|
+
production.object_id.to_s
|
46
46
|
end
|
47
47
|
|
48
48
|
alias to_string to_s
|
49
49
|
|
50
|
-
|
51
50
|
# Equality testing.
|
52
51
|
# A production ref is equal to another one when its
|
53
52
|
# refers to the same production or when it is compared to
|
@@ -57,13 +56,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
57
56
|
def ==(other)
|
58
57
|
return true if object_id == other.object_id
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
return result
|
59
|
+
if other.is_a?(ProductionRef)
|
60
|
+
production == other.production
|
61
|
+
else
|
62
|
+
production == other
|
63
|
+
end
|
67
64
|
end
|
68
65
|
|
69
66
|
# Produce a hash value.
|
@@ -74,7 +71,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
74
71
|
def hash
|
75
72
|
raise StandardError, 'Nil production' if production.nil?
|
76
73
|
|
77
|
-
|
74
|
+
production.hash
|
78
75
|
end
|
79
76
|
|
80
77
|
# Make this reference point to the given production.
|
@@ -84,7 +81,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
84
81
|
return if aProduction == @production
|
85
82
|
|
86
83
|
production&.decr_refcount
|
87
|
-
unless aProduction.
|
84
|
+
unless aProduction.is_a?(Production)
|
88
85
|
raise StandardError, "Illegal production type #{aProduction.class}"
|
89
86
|
end
|
90
87
|
|
@@ -102,7 +99,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
102
99
|
# @return [true / false] true when this object doesn't
|
103
100
|
# point to a production.
|
104
101
|
def unbound?
|
105
|
-
|
102
|
+
production.nil?
|
106
103
|
end
|
107
104
|
|
108
105
|
# Part of the 'visitee' role in the Visitor design pattern.
|