sequitur 0.1.23 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -437
- data/CHANGELOG.md +9 -0
- data/Gemfile +0 -2
- data/LICENSE.txt +1 -1
- data/README.md +2 -3
- data/Rakefile +0 -2
- data/appveyor.yml +10 -10
- data/examples/inductive_english.rb +35 -0
- data/examples/integer_sample.rb +0 -1
- data/examples/porridge.rb +9 -9
- data/examples/word_sample.rb +4 -5
- data/lib/sequitur/constants.rb +7 -4
- data/lib/sequitur/digram.rb +11 -11
- data/lib/sequitur/dynamic_grammar.rb +12 -12
- data/lib/sequitur/formatter/base_formatter.rb +2 -2
- data/lib/sequitur/formatter/base_text.rb +8 -9
- data/lib/sequitur/formatter/debug.rb +10 -4
- data/lib/sequitur/grammar_visitor.rb +7 -7
- data/lib/sequitur/production.rb +203 -205
- data/lib/sequitur/production_ref.rb +18 -20
- data/lib/sequitur/sequitur_grammar.rb +135 -137
- data/lib/sequitur/symbol_sequence.rb +29 -32
- data/lib/sequitur.rb +6 -6
- data/sig/lib/sequitur/constants.rbs +10 -0
- data/sig/lib/sequitur/digram.rbs +37 -0
- data/sig/lib/sequitur/dynamic_grammar.rbs +58 -0
- data/sig/lib/sequitur/formatter/base_formatter.rbs +20 -0
- data/sig/lib/sequitur/formatter/base_text.rbs +62 -0
- data/sig/lib/sequitur/formatter/debug.rbs +89 -0
- data/sig/lib/sequitur/production.rbs +120 -0
- data/sig/lib/sequitur/production_ref.rbs +73 -0
- data/sig/lib/sequitur/sequitur_grammar.rbs +55 -0
- data/sig/lib/sequitur/symbol_sequence.rbs +83 -0
- data/sig/lib/sequitur.rbs +9 -0
- data/spec/sequitur/digram_spec.rb +13 -12
- data/spec/sequitur/dynamic_grammar_spec.rb +5 -11
- data/spec/sequitur/formatter/base_text_spec.rb +70 -72
- data/spec/sequitur/formatter/debug_spec.rb +90 -92
- data/spec/sequitur/grammar_visitor_spec.rb +70 -71
- data/spec/sequitur/production_ref_spec.rb +92 -92
- data/spec/sequitur/production_spec.rb +30 -34
- data/spec/sequitur/sequitur_grammar_spec.rb +47 -46
- data/spec/sequitur/symbol_sequence_spec.rb +102 -105
- data/spec/spec_helper.rb +0 -1
- metadata +28 -17
- data/.travis.yml +0 -29
data/lib/sequitur/production.rb
CHANGED
@@ -5,128 +5,128 @@ require_relative 'symbol_sequence'
|
|
5
5
|
require_relative 'production_ref'
|
6
6
|
|
7
7
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
8
|
-
# In a context-free grammar, a production is a rule in which
|
9
|
-
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
10
|
-
# and the right-hand side (RHS) consists of a sequence of symbols.
|
11
|
-
# The symbols in RHS can be either terminal or non-terminal symbols.
|
12
|
-
# The rule stipulates that the LHS is equivalent to the RHS,
|
13
|
-
# in other words every occurrence of the LHS can be substituted to
|
14
|
-
# corresponding RHS.
|
15
|
-
# Implementation note: the object id of the production is taken as its LHS.
|
16
|
-
class Production
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
8
|
+
# In a context-free grammar, a production is a rule in which
|
9
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
10
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
11
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
12
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
13
|
+
# in other words every occurrence of the LHS can be substituted to
|
14
|
+
# corresponding RHS.
|
15
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
16
|
+
class Production
|
17
|
+
# @return [Sequitur::SymbolSequence] The right-hand side (rhs)
|
18
|
+
# consists of a sequence of grammar symbols
|
19
|
+
attr_reader(:rhs)
|
20
|
+
|
21
|
+
# @return [Integer] The reference count (= how times other productions reference this one)
|
22
|
+
attr_reader(:refcount)
|
23
|
+
|
24
|
+
# @return [Array<Sequitur::Digram>] The sequence of digrams appearing in the RHS
|
25
|
+
attr_reader(:digrams)
|
26
|
+
|
27
|
+
# Constructor.
|
28
|
+
# Build a production with an empty RHS.
|
29
|
+
def initialize
|
30
|
+
@rhs = SymbolSequence.new
|
31
|
+
@refcount = 0
|
32
|
+
@digrams = []
|
33
|
+
end
|
34
|
+
|
35
|
+
# Identity testing.
|
36
|
+
# @param other [Production, ProductionRef] another production or production reference.
|
37
|
+
# @return [TrueClass, FalseClass] true when the receiver and other are the same.
|
38
|
+
def ==(other)
|
39
|
+
return true if object_id == other.object_id
|
40
|
+
|
41
|
+
if other.is_a?(ProductionRef)
|
42
|
+
(other == self)
|
43
|
+
else
|
44
|
+
false
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Is the rhs empty?
|
49
|
+
# @return [TrueClass, FalseClass] true if the rhs has no members.
|
50
|
+
def empty?
|
51
|
+
rhs.empty?
|
52
|
+
end
|
53
|
+
|
54
|
+
# Increment the reference count by one.
|
55
|
+
# @return [Integer]
|
56
|
+
def incr_refcount
|
57
|
+
@refcount += 1
|
58
|
+
end
|
59
|
+
|
60
|
+
# Decrement the reference count by one.
|
61
|
+
# @return [Integer]
|
62
|
+
def decr_refcount
|
63
|
+
raise StandardError, 'Internal error' if @refcount.zero?
|
64
|
+
|
65
|
+
@refcount -= 1
|
66
|
+
end
|
67
|
+
|
68
|
+
# Select the references to production appearing in the rhs.
|
69
|
+
# @return [Array<ProductionRef>]
|
70
|
+
def references
|
71
|
+
rhs.references
|
72
|
+
end
|
73
|
+
|
74
|
+
# Look in the rhs all the references to a production passed a argument.
|
75
|
+
# @param a_prod [Production, ProductionRef] The production to search for.
|
76
|
+
# @return [Array<ProductionRef>]
|
77
|
+
def references_of(a_prod)
|
78
|
+
real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
|
79
|
+
rhs.references_of(real_prod)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Enumerate the digrams appearing in the right-hand side (rhs)
|
83
|
+
# @return [Array<Sequitur::Digram>] the list of digrams found in rhs of this production.
|
84
|
+
def recalc_digrams
|
85
|
+
return [] if rhs.size < 2
|
86
|
+
|
87
|
+
result = []
|
88
|
+
rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
|
89
|
+
@digrams = result
|
90
|
+
end
|
91
|
+
|
92
|
+
# Does the rhs have exactly one digram only (= 2 symbols)?
|
93
|
+
# @return [TrueClass, FalseClass] true when the rhs contains exactly two symbols.
|
94
|
+
def single_digram?
|
95
|
+
rhs.size == 2
|
96
|
+
end
|
97
|
+
|
98
|
+
# Detect whether the last digram occurs twice
|
99
|
+
# Assumption: when a digram occurs twice in a production then it must occur
|
100
|
+
# at the end of the rhs
|
101
|
+
# @return [TrueClass, FalseClass] true when the digram occurs twice in rhs.
|
102
|
+
def repeated_digram?
|
103
|
+
return false if rhs.size < 3
|
104
|
+
|
105
|
+
my_digrams = digrams
|
106
|
+
all_keys = my_digrams.map(&:key)
|
107
|
+
last_key = all_keys.pop
|
108
|
+
same_key_found = all_keys.index(last_key)
|
109
|
+
!same_key_found.nil?
|
110
|
+
end
|
111
|
+
|
112
|
+
# Retrieve the last digram appearing in the RHS (if any).
|
113
|
+
# @return [Sequitur::Digram, NilClass] last digram in the rhs otherwise nil.
|
114
|
+
def last_digram
|
115
|
+
digrams.empty? ? nil : digrams.last
|
116
|
+
end
|
117
|
+
|
118
|
+
# Emit a text representation of the production rule.
|
119
|
+
# Text is of the form:
|
120
|
+
# object id of production : rhs as space-separated sequence of symbols.
|
121
|
+
# @return [String]
|
122
|
+
def to_string
|
123
|
+
"#{object_id} : #{rhs.to_string}."
|
124
|
+
end
|
125
|
+
|
126
|
+
# Add a (grammar) symbol at the end of the RHS.
|
127
|
+
# @param aSymbol [Object] A (grammar) symbol to add.
|
128
|
+
def append_symbol(aSymbol)
|
129
|
+
case aSymbol
|
130
130
|
when Production
|
131
131
|
new_symb = ProductionRef.new(aSymbol)
|
132
132
|
when ProductionRef
|
@@ -138,91 +138,89 @@ class Production
|
|
138
138
|
new_symb = aSymbol.dup
|
139
139
|
else
|
140
140
|
new_symb = aSymbol
|
141
|
+
end
|
142
|
+
|
143
|
+
rhs << new_symb
|
144
|
+
digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
|
145
|
+
end
|
146
|
+
|
147
|
+
# Clear the right-hand side.
|
148
|
+
# Any referenced production has its reference counter decremented.
|
149
|
+
def clear_rhs
|
150
|
+
rhs.clear
|
151
|
+
end
|
152
|
+
|
153
|
+
# Find all the positions where the digram occurs in the rhs
|
154
|
+
# @param symb1 [Object] first symbol of the digram
|
155
|
+
# @param symb2 [Object] second symbol of the digram
|
156
|
+
# @return [Array<Integer>] the list of indices where the digram occurs in rhs.
|
157
|
+
# @example
|
158
|
+
# # Given the production p : a b c a b a b d
|
159
|
+
# #Then ...
|
160
|
+
# p.positions_of(a, b) # => [0, 3, 5]
|
161
|
+
# # Caution: "overlapping" digrams shouldn't be counted
|
162
|
+
# # Given the production p : a a b a a a c d
|
163
|
+
# # Then ...
|
164
|
+
# p.positions_of(a, a) # => [0, 3]
|
165
|
+
def positions_of(symb1, symb2)
|
166
|
+
# Find the positions where the digram occur in rhs
|
167
|
+
indices = [-2] # Dummy index!
|
168
|
+
(0...rhs.size).each do |i|
|
169
|
+
next if i == indices.last + 1
|
170
|
+
|
171
|
+
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
172
|
+
end
|
173
|
+
|
174
|
+
indices.shift
|
175
|
+
|
176
|
+
indices
|
177
|
+
end
|
178
|
+
|
179
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
180
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
181
|
+
# s1 s2 by a reference to P.
|
182
|
+
# @param another [Production, ProductionRef] a production that
|
183
|
+
# consists exactly of one digram (= 2 symbols).
|
184
|
+
def reduce_step(another)
|
185
|
+
(symb1, symb2) = another.rhs.symbols
|
186
|
+
pos = positions_of(symb1, symb2).reverse
|
187
|
+
|
188
|
+
# Replace the two symbol sequence by the production
|
189
|
+
pos.each { |index| rhs.reduce_step(index, another) }
|
190
|
+
|
191
|
+
recalc_digrams
|
141
192
|
end
|
142
193
|
|
143
|
-
rhs
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
#
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
indices.shift
|
175
|
-
|
176
|
-
return indices
|
177
|
-
end
|
178
|
-
|
179
|
-
# Given that the production P passed as argument has exactly 2 symbols
|
180
|
-
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
181
|
-
# s1 s2 by a reference to P.
|
182
|
-
# @param another [Production or ProductionRef] a production that
|
183
|
-
# consists exactly of one digram (= 2 symbols).
|
184
|
-
def reduce_step(another)
|
185
|
-
(symb1, symb2) = another.rhs.symbols
|
186
|
-
pos = positions_of(symb1, symb2).reverse
|
187
|
-
|
188
|
-
# Replace the two symbol sequence by the production
|
189
|
-
pos.each { |index| rhs.reduce_step(index, another) }
|
190
|
-
|
191
|
-
recalc_digrams
|
192
|
-
end
|
193
|
-
|
194
|
-
# Replace every occurrence of 'another' production in self.rhs by
|
195
|
-
# the symbols in the rhs of 'another'.
|
196
|
-
# @param another [Production or ProductionRef] a production that
|
197
|
-
# consists exactly of one digram (= 2 symbols).
|
198
|
-
# @example Synopsis
|
199
|
-
# # Given the production p_A : a p_B b p_B c
|
200
|
-
# # And the production p_B : x y
|
201
|
-
# # Then...
|
202
|
-
# p_A.derive_step(p_B)
|
203
|
-
# #Modifies p_A as into: p_A -> a x y b x y c
|
204
|
-
def derive_step(another)
|
205
|
-
(0...rhs.size).to_a.reverse_each do |index|
|
206
|
-
next unless rhs[index] == another
|
207
|
-
|
208
|
-
rhs.insert_at(index + 1, another.rhs)
|
209
|
-
another.decr_refcount
|
210
|
-
rhs.delete_at(index)
|
211
|
-
end
|
212
|
-
|
213
|
-
recalc_digrams
|
214
|
-
end
|
215
|
-
|
216
|
-
# Part of the 'visitee' role in Visitor design pattern.
|
217
|
-
# @param aVisitor[GrammarVisitor]
|
218
|
-
def accept(aVisitor)
|
219
|
-
aVisitor.start_visit_production(self)
|
220
|
-
|
221
|
-
rhs.accept(aVisitor)
|
222
|
-
|
223
|
-
aVisitor.end_visit_production(self)
|
224
|
-
end
|
225
|
-
end # class
|
194
|
+
# Replace every occurrence of 'another' production in self.rhs by
|
195
|
+
# the symbols in the rhs of 'another'.
|
196
|
+
# @param another [Production, ProductionRef] a production that
|
197
|
+
# consists exactly of one digram (= 2 symbols).
|
198
|
+
# @example Synopsis
|
199
|
+
# # Given the production p_A : a p_B b p_B c
|
200
|
+
# # And the production p_B : x y
|
201
|
+
# # Then...
|
202
|
+
# p_A.derive_step(p_B)
|
203
|
+
# #Modifies p_A as into: p_A -> a x y b x y c
|
204
|
+
def derive_step(another)
|
205
|
+
(0...rhs.size).to_a.reverse_each do |index|
|
206
|
+
next unless rhs[index] == another
|
207
|
+
|
208
|
+
rhs.insert_at(index + 1, another.rhs)
|
209
|
+
another.decr_refcount
|
210
|
+
rhs.delete_at(index)
|
211
|
+
end
|
212
|
+
|
213
|
+
recalc_digrams
|
214
|
+
end
|
215
|
+
|
216
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
217
|
+
# @param aVisitor[Sequitur::GrammarVisitor]
|
218
|
+
def accept(aVisitor)
|
219
|
+
aVisitor.start_visit_production(self)
|
220
|
+
rhs.accept(aVisitor)
|
221
|
+
aVisitor.end_visit_production(self)
|
222
|
+
end
|
223
|
+
end # class
|
226
224
|
end # module
|
227
225
|
|
228
226
|
# End of file
|
@@ -17,11 +17,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
17
17
|
# # ... Production reference count is updated...
|
18
18
|
# puts prod.refcount # outputs 1
|
19
19
|
class ProductionRef
|
20
|
-
# Link to the production to reference.
|
20
|
+
# @return [Sequitur::Production] Link to the production to reference.
|
21
21
|
attr_reader(:production)
|
22
22
|
|
23
23
|
# Constructor
|
24
|
-
# @param target [Production
|
24
|
+
# @param target [Production, ProductionRef]
|
25
25
|
# The production that is being referenced.
|
26
26
|
def initialize(target)
|
27
27
|
bind_to(target)
|
@@ -42,49 +42,46 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
42
42
|
# Emit the text representation of a production reference.
|
43
43
|
# @return [String]
|
44
44
|
def to_s
|
45
|
-
|
45
|
+
production.object_id.to_s
|
46
46
|
end
|
47
47
|
|
48
48
|
alias to_string to_s
|
49
49
|
|
50
|
-
|
51
50
|
# Equality testing.
|
52
51
|
# A production ref is equal to another one when its
|
53
52
|
# refers to the same production or when it is compared to
|
54
53
|
# the production it refers to.
|
55
|
-
# @param other [ProductionRef]
|
56
|
-
# @return [
|
54
|
+
# @param other [Production, ProductionRef]
|
55
|
+
# @return [TrueClass, FalseClass]
|
57
56
|
def ==(other)
|
58
57
|
return true if object_id == other.object_id
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
return result
|
59
|
+
if other.is_a?(ProductionRef)
|
60
|
+
production == other.production
|
61
|
+
else
|
62
|
+
production == other
|
63
|
+
end
|
67
64
|
end
|
68
65
|
|
69
66
|
# Produce a hash value.
|
70
67
|
# A reference has no identity on its own,
|
71
68
|
# the method returns the hash value of the
|
72
69
|
# referenced production
|
73
|
-
# @return [
|
70
|
+
# @return [Integer] the hash value
|
74
71
|
def hash
|
75
72
|
raise StandardError, 'Nil production' if production.nil?
|
76
73
|
|
77
|
-
|
74
|
+
production.hash
|
78
75
|
end
|
79
76
|
|
80
77
|
# Make this reference point to the given production.
|
81
|
-
# @param aProduction [Production
|
78
|
+
# @param aProduction [Production, ProductionRef] the production
|
82
79
|
# to refer to
|
83
80
|
def bind_to(aProduction)
|
84
81
|
return if aProduction == @production
|
85
82
|
|
86
83
|
production&.decr_refcount
|
87
|
-
unless aProduction.
|
84
|
+
unless aProduction.is_a?(Production)
|
88
85
|
raise StandardError, "Illegal production type #{aProduction.class}"
|
89
86
|
end
|
90
87
|
|
@@ -93,20 +90,21 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
93
90
|
end
|
94
91
|
|
95
92
|
# Clear the reference to the target production.
|
93
|
+
# return [NilClass]
|
96
94
|
def unbind
|
97
95
|
production.decr_refcount
|
98
96
|
@production = nil
|
99
97
|
end
|
100
98
|
|
101
99
|
# Check that the this object doesn't refer to any production.
|
102
|
-
# @return [
|
100
|
+
# @return [TrueClass, FalseClass] true when this object doesn't
|
103
101
|
# point to a production.
|
104
102
|
def unbound?
|
105
|
-
|
103
|
+
production.nil?
|
106
104
|
end
|
107
105
|
|
108
106
|
# Part of the 'visitee' role in the Visitor design pattern.
|
109
|
-
# @param aVisitor [GrammarVisitor] the visitor
|
107
|
+
# @param aVisitor [Sequitur::GrammarVisitor] the visitor
|
110
108
|
def accept(aVisitor)
|
111
109
|
aVisitor.visit_prod_ref(self)
|
112
110
|
end
|