sequitur 0.1.23 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -437
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +1 -1
- data/Rakefile +0 -2
- data/examples/integer_sample.rb +0 -1
- data/examples/porridge.rb +9 -9
- data/examples/word_sample.rb +4 -5
- data/lib/sequitur/constants.rb +4 -1
- data/lib/sequitur/digram.rb +2 -2
- data/lib/sequitur/dynamic_grammar.rb +3 -4
- data/lib/sequitur/formatter/base_formatter.rb +1 -1
- data/lib/sequitur/formatter/base_text.rb +3 -7
- data/lib/sequitur/formatter/debug.rb +0 -1
- data/lib/sequitur/grammar_visitor.rb +1 -1
- data/lib/sequitur/production.rb +200 -205
- data/lib/sequitur/production_ref.rb +9 -12
- data/lib/sequitur/sequitur_grammar.rb +135 -137
- data/lib/sequitur/symbol_sequence.rb +24 -27
- data/lib/sequitur.rb +4 -5
- data/spec/sequitur/digram_spec.rb +13 -12
- data/spec/sequitur/dynamic_grammar_spec.rb +5 -11
- data/spec/sequitur/formatter/base_text_spec.rb +70 -72
- data/spec/sequitur/formatter/debug_spec.rb +90 -92
- data/spec/sequitur/grammar_visitor_spec.rb +70 -71
- data/spec/sequitur/production_ref_spec.rb +92 -92
- data/spec/sequitur/production_spec.rb +30 -34
- data/spec/sequitur/sequitur_grammar_spec.rb +47 -46
- data/spec/sequitur/symbol_sequence_spec.rb +102 -105
- data/spec/spec_helper.rb +0 -1
- metadata +4 -5
- data/.travis.yml +0 -29
data/lib/sequitur/production.rb
CHANGED
@@ -5,128 +5,125 @@ require_relative 'symbol_sequence'
|
|
5
5
|
require_relative 'production_ref'
|
6
6
|
|
7
7
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
8
|
-
# In a context-free grammar, a production is a rule in which
|
9
|
-
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
10
|
-
# and the right-hand side (RHS) consists of a sequence of symbols.
|
11
|
-
# The symbols in RHS can be either terminal or non-terminal symbols.
|
12
|
-
# The rule stipulates that the LHS is equivalent to the RHS,
|
13
|
-
# in other words every occurrence of the LHS can be substituted to
|
14
|
-
# corresponding RHS.
|
15
|
-
# Implementation note: the object id of the production is taken as its LHS.
|
16
|
-
class Production
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
# @param aSymbol [Object] A (grammar) symbol to add.
|
128
|
-
def append_symbol(aSymbol)
|
129
|
-
case aSymbol
|
8
|
+
# In a context-free grammar, a production is a rule in which
|
9
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
10
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
11
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
12
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
13
|
+
# in other words every occurrence of the LHS can be substituted to
|
14
|
+
# corresponding RHS.
|
15
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
16
|
+
class Production
|
17
|
+
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
18
|
+
attr_reader(:rhs)
|
19
|
+
|
20
|
+
# The reference count (= how times other productions reference this one)
|
21
|
+
attr_reader(:refcount)
|
22
|
+
|
23
|
+
# The sequence of digrams appearing in the RHS
|
24
|
+
attr_reader(:digrams)
|
25
|
+
|
26
|
+
# Constructor.
|
27
|
+
# Build a production with an empty RHS.
|
28
|
+
def initialize
|
29
|
+
@rhs = SymbolSequence.new
|
30
|
+
@refcount = 0
|
31
|
+
@digrams = []
|
32
|
+
end
|
33
|
+
|
34
|
+
# Identity testing.
|
35
|
+
# @param other [] another production or production reference.
|
36
|
+
# @return true when the receiver and other are the same.
|
37
|
+
def ==(other)
|
38
|
+
return true if object_id == other.object_id
|
39
|
+
|
40
|
+
if other.is_a?(ProductionRef)
|
41
|
+
(other == self)
|
42
|
+
else
|
43
|
+
false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Is the rhs empty?
|
48
|
+
# @ return true if the rhs has no members.
|
49
|
+
def empty?
|
50
|
+
rhs.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Increment the reference count by one.
|
54
|
+
def incr_refcount
|
55
|
+
@refcount += 1
|
56
|
+
end
|
57
|
+
|
58
|
+
# Decrement the reference count by one.
|
59
|
+
def decr_refcount
|
60
|
+
raise StandardError, 'Internal error' if @refcount.zero?
|
61
|
+
|
62
|
+
@refcount -= 1
|
63
|
+
end
|
64
|
+
|
65
|
+
# Select the references to production appearing in the rhs.
|
66
|
+
# @return [Array of ProductionRef]
|
67
|
+
def references
|
68
|
+
rhs.references
|
69
|
+
end
|
70
|
+
|
71
|
+
# Look in the rhs all the references to a production passed a argument.
|
72
|
+
# aProduction [aProduction or ProductionRef] The production to search for.
|
73
|
+
# @return [Array] the array of ProductionRef to the passed production
|
74
|
+
def references_of(a_prod)
|
75
|
+
real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
|
76
|
+
rhs.references_of(real_prod)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Enumerate the digrams appearing in the right-hand side (rhs)
|
80
|
+
# @return [Array] the list of digrams found in rhs of this production.
|
81
|
+
def recalc_digrams
|
82
|
+
return [] if rhs.size < 2
|
83
|
+
|
84
|
+
result = []
|
85
|
+
rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
|
86
|
+
@digrams = result
|
87
|
+
end
|
88
|
+
|
89
|
+
# Does the rhs have exactly one digram only (= 2 symbols)?
|
90
|
+
# @return [true/false] true when the rhs contains exactly two symbols.
|
91
|
+
def single_digram?
|
92
|
+
rhs.size == 2
|
93
|
+
end
|
94
|
+
|
95
|
+
# Detect whether the last digram occurs twice
|
96
|
+
# Assumption: when a digram occurs twice in a production then it must occur
|
97
|
+
# at the end of the rhs
|
98
|
+
# @return [true/false] true when the digram occurs twice in rhs.
|
99
|
+
def repeated_digram?
|
100
|
+
return false if rhs.size < 3
|
101
|
+
|
102
|
+
my_digrams = digrams
|
103
|
+
all_keys = my_digrams.map(&:key)
|
104
|
+
last_key = all_keys.pop
|
105
|
+
same_key_found = all_keys.index(last_key)
|
106
|
+
!same_key_found.nil?
|
107
|
+
end
|
108
|
+
|
109
|
+
# Retrieve the last digram appearing in the RHS (if any).
|
110
|
+
# @return [Digram] last digram in the rhs otherwise nil.
|
111
|
+
def last_digram
|
112
|
+
digrams.empty? ? nil : digrams.last
|
113
|
+
end
|
114
|
+
|
115
|
+
# Emit a text representation of the production rule.
|
116
|
+
# Text is of the form:
|
117
|
+
# object id of production : rhs as space-separated sequence of symbols.
|
118
|
+
# @return [String]
|
119
|
+
def to_string
|
120
|
+
"#{object_id} : #{rhs.to_string}."
|
121
|
+
end
|
122
|
+
|
123
|
+
# Add a (grammar) symbol at the end of the RHS.
|
124
|
+
# @param aSymbol [Object] A (grammar) symbol to add.
|
125
|
+
def append_symbol(aSymbol)
|
126
|
+
case aSymbol
|
130
127
|
when Production
|
131
128
|
new_symb = ProductionRef.new(aSymbol)
|
132
129
|
when ProductionRef
|
@@ -138,91 +135,89 @@ class Production
|
|
138
135
|
new_symb = aSymbol.dup
|
139
136
|
else
|
140
137
|
new_symb = aSymbol
|
138
|
+
end
|
139
|
+
|
140
|
+
rhs << new_symb
|
141
|
+
digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
|
142
|
+
end
|
143
|
+
|
144
|
+
# Clear the right-hand side.
|
145
|
+
# Any referenced production has its reference counter decremented.
|
146
|
+
def clear_rhs
|
147
|
+
rhs.clear
|
148
|
+
end
|
149
|
+
|
150
|
+
# Find all the positions where the digram occurs in the rhs
|
151
|
+
# @param symb1 [Object] first symbol of the digram
|
152
|
+
# @param symb2 [Object] second symbol of the digram
|
153
|
+
# @return [Array] the list of indices where the digram occurs in rhs.
|
154
|
+
# @example
|
155
|
+
# # Given the production p : a b c a b a b d
|
156
|
+
# #Then ...
|
157
|
+
# p.positions_of(a, b) # => [0, 3, 5]
|
158
|
+
# # Caution: "overlapping" digrams shouldn't be counted
|
159
|
+
# # Given the production p : a a b a a a c d
|
160
|
+
# # Then ...
|
161
|
+
# p.positions_of(a, a) # => [0, 3]
|
162
|
+
def positions_of(symb1, symb2)
|
163
|
+
# Find the positions where the digram occur in rhs
|
164
|
+
indices = [-2] # Dummy index!
|
165
|
+
(0...rhs.size).each do |i|
|
166
|
+
next if i == indices.last + 1
|
167
|
+
|
168
|
+
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
169
|
+
end
|
170
|
+
|
171
|
+
indices.shift
|
172
|
+
|
173
|
+
indices
|
174
|
+
end
|
175
|
+
|
176
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
177
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
178
|
+
# s1 s2 by a reference to P.
|
179
|
+
# @param another [Production or ProductionRef] a production that
|
180
|
+
# consists exactly of one digram (= 2 symbols).
|
181
|
+
def reduce_step(another)
|
182
|
+
(symb1, symb2) = another.rhs.symbols
|
183
|
+
pos = positions_of(symb1, symb2).reverse
|
184
|
+
|
185
|
+
# Replace the two symbol sequence by the production
|
186
|
+
pos.each { |index| rhs.reduce_step(index, another) }
|
187
|
+
|
188
|
+
recalc_digrams
|
141
189
|
end
|
142
190
|
|
143
|
-
rhs
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
#
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
indices.shift
|
175
|
-
|
176
|
-
return indices
|
177
|
-
end
|
178
|
-
|
179
|
-
# Given that the production P passed as argument has exactly 2 symbols
|
180
|
-
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
181
|
-
# s1 s2 by a reference to P.
|
182
|
-
# @param another [Production or ProductionRef] a production that
|
183
|
-
# consists exactly of one digram (= 2 symbols).
|
184
|
-
def reduce_step(another)
|
185
|
-
(symb1, symb2) = another.rhs.symbols
|
186
|
-
pos = positions_of(symb1, symb2).reverse
|
187
|
-
|
188
|
-
# Replace the two symbol sequence by the production
|
189
|
-
pos.each { |index| rhs.reduce_step(index, another) }
|
190
|
-
|
191
|
-
recalc_digrams
|
192
|
-
end
|
193
|
-
|
194
|
-
# Replace every occurrence of 'another' production in self.rhs by
|
195
|
-
# the symbols in the rhs of 'another'.
|
196
|
-
# @param another [Production or ProductionRef] a production that
|
197
|
-
# consists exactly of one digram (= 2 symbols).
|
198
|
-
# @example Synopsis
|
199
|
-
# # Given the production p_A : a p_B b p_B c
|
200
|
-
# # And the production p_B : x y
|
201
|
-
# # Then...
|
202
|
-
# p_A.derive_step(p_B)
|
203
|
-
# #Modifies p_A as into: p_A -> a x y b x y c
|
204
|
-
def derive_step(another)
|
205
|
-
(0...rhs.size).to_a.reverse_each do |index|
|
206
|
-
next unless rhs[index] == another
|
207
|
-
|
208
|
-
rhs.insert_at(index + 1, another.rhs)
|
209
|
-
another.decr_refcount
|
210
|
-
rhs.delete_at(index)
|
211
|
-
end
|
212
|
-
|
213
|
-
recalc_digrams
|
214
|
-
end
|
215
|
-
|
216
|
-
# Part of the 'visitee' role in Visitor design pattern.
|
217
|
-
# @param aVisitor[GrammarVisitor]
|
218
|
-
def accept(aVisitor)
|
219
|
-
aVisitor.start_visit_production(self)
|
220
|
-
|
221
|
-
rhs.accept(aVisitor)
|
222
|
-
|
223
|
-
aVisitor.end_visit_production(self)
|
224
|
-
end
|
225
|
-
end # class
|
191
|
+
# Replace every occurrence of 'another' production in self.rhs by
|
192
|
+
# the symbols in the rhs of 'another'.
|
193
|
+
# @param another [Production or ProductionRef] a production that
|
194
|
+
# consists exactly of one digram (= 2 symbols).
|
195
|
+
# @example Synopsis
|
196
|
+
# # Given the production p_A : a p_B b p_B c
|
197
|
+
# # And the production p_B : x y
|
198
|
+
# # Then...
|
199
|
+
# p_A.derive_step(p_B)
|
200
|
+
# #Modifies p_A as into: p_A -> a x y b x y c
|
201
|
+
def derive_step(another)
|
202
|
+
(0...rhs.size).to_a.reverse_each do |index|
|
203
|
+
next unless rhs[index] == another
|
204
|
+
|
205
|
+
rhs.insert_at(index + 1, another.rhs)
|
206
|
+
another.decr_refcount
|
207
|
+
rhs.delete_at(index)
|
208
|
+
end
|
209
|
+
|
210
|
+
recalc_digrams
|
211
|
+
end
|
212
|
+
|
213
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
214
|
+
# @param aVisitor[GrammarVisitor]
|
215
|
+
def accept(aVisitor)
|
216
|
+
aVisitor.start_visit_production(self)
|
217
|
+
rhs.accept(aVisitor)
|
218
|
+
aVisitor.end_visit_production(self)
|
219
|
+
end
|
220
|
+
end # class
|
226
221
|
end # module
|
227
222
|
|
228
223
|
# End of file
|
@@ -42,12 +42,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
42
42
|
# Emit the text representation of a production reference.
|
43
43
|
# @return [String]
|
44
44
|
def to_s
|
45
|
-
|
45
|
+
production.object_id.to_s
|
46
46
|
end
|
47
47
|
|
48
48
|
alias to_string to_s
|
49
49
|
|
50
|
-
|
51
50
|
# Equality testing.
|
52
51
|
# A production ref is equal to another one when its
|
53
52
|
# refers to the same production or when it is compared to
|
@@ -57,13 +56,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
57
56
|
def ==(other)
|
58
57
|
return true if object_id == other.object_id
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
return result
|
59
|
+
if other.is_a?(ProductionRef)
|
60
|
+
production == other.production
|
61
|
+
else
|
62
|
+
production == other
|
63
|
+
end
|
67
64
|
end
|
68
65
|
|
69
66
|
# Produce a hash value.
|
@@ -74,7 +71,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
74
71
|
def hash
|
75
72
|
raise StandardError, 'Nil production' if production.nil?
|
76
73
|
|
77
|
-
|
74
|
+
production.hash
|
78
75
|
end
|
79
76
|
|
80
77
|
# Make this reference point to the given production.
|
@@ -84,7 +81,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
84
81
|
return if aProduction == @production
|
85
82
|
|
86
83
|
production&.decr_refcount
|
87
|
-
unless aProduction.
|
84
|
+
unless aProduction.is_a?(Production)
|
88
85
|
raise StandardError, "Illegal production type #{aProduction.class}"
|
89
86
|
end
|
90
87
|
|
@@ -102,7 +99,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
102
99
|
# @return [true / false] true when this object doesn't
|
103
100
|
# point to a production.
|
104
101
|
def unbound?
|
105
|
-
|
102
|
+
production.nil?
|
106
103
|
end
|
107
104
|
|
108
105
|
# Part of the 'visitee' role in the Visitor design pattern.
|