sequitur 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,243 +1,243 @@
1
- require_relative 'digram'
2
- require_relative 'symbol_sequence'
3
- require_relative 'production_ref'
4
-
5
- module Sequitur # Module for classes implementing the Sequitur algorithm
6
-
7
-
8
- # In a context-free grammar, a production is a rule in which
9
- # its left-hand side (LHS) consists solely of a non-terminal symbol
10
- # and the right-hand side (RHS) consists of a sequence of symbols.
11
- # The symbols in RHS can be either terminal or non-terminal symbols.
12
- # The rule stipulates that the LHS is equivalent to the RHS,
13
- # in other words every occurrence of the LHS can be substituted to
14
- # corresponding RHS.
15
- # Implementation note: the object id of the production is taken as its LHS.
16
- class Production
17
- # The right-hand side (rhs) consists of a sequence of grammar symbols
18
- attr_reader(:rhs)
19
-
20
- # The reference count (= how times other productions reference this one)
21
- attr_reader(:refcount)
22
-
23
- # The sequence of digrams appearing in the RHS
24
- attr_reader(:digrams)
25
-
26
- # Constructor.
27
- # Build a production with an empty RHS.
28
- def initialize()
29
- @rhs = SymbolSequence.new
30
- @refcount = 0
31
- @digrams = []
32
- end
33
-
34
-
35
-
36
- public
37
-
38
- # Identity testing.
39
- # @param other [] another production or production reference.
40
- # @return true when the receiver and other are the same.
41
- def ==(other)
42
- return true if object_id == other.object_id
43
-
44
- if other.is_a?(ProductionRef)
45
- result = (other == self)
46
- else
47
- result = false
48
- end
49
-
50
- return result
51
- end
52
-
53
-
54
- # Is the rhs empty?
55
- # @ return true if the rhs has no members.
56
- def empty?
57
- return rhs.empty?
58
- end
59
-
60
- # Increment the reference count by one.
61
- def incr_refcount()
62
- @refcount += 1
63
- end
64
-
65
- # Decrement the reference count by one.
66
- def decr_refcount()
67
- fail StandardError, 'Internal error' if @refcount == 0
68
- @refcount -= 1
69
- end
70
-
71
-
72
- # Select the references to production appearing in the rhs.
73
- # @return [Array of ProductionRef]
74
- def references()
75
- return rhs.references
76
- end
77
-
78
- # Look in the rhs all the references to a production passed a argument.
79
- # aProduction [aProduction or ProductionRef] The production to search for.
80
- # @return [Array] the array of ProductionRef to the passed production
81
- def references_of(a_prod)
82
- real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
83
- return rhs.references_of(real_prod)
84
- end
85
-
86
-
87
- # Enumerate the digrams appearing in the right-hand side (rhs)
88
- # @return [Array] the list of digrams found in rhs of this production.
89
- def recalc_digrams()
90
- return [] if rhs.size < 2
91
-
92
- result = []
93
- rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
94
- @digrams = result
95
- end
96
-
97
-
98
-
99
- # Does the rhs have exactly one digram only (= 2 symbols)?
100
- # @return [true/false] true when the rhs contains exactly two symbols.
101
- def single_digram?
102
- return rhs.size == 2
103
- end
104
-
105
-
106
- # Detect whether the last digram occurs twice
107
- # Assumption: when a digram occurs twice in a production then it must occur
108
- # at the end of the rhs
109
- # @return [true/false] true when the digram occurs twice in rhs.
110
- def repeated_digram?()
111
- return false if rhs.size < 3
112
-
113
- my_digrams = digrams
114
- all_keys = my_digrams.map(&:key)
115
- last_key = all_keys.pop
116
- same_key_found = all_keys.index(last_key)
117
- return !same_key_found.nil?
118
- end
119
-
120
- # Retrieve the last digram appearing in the RHS (if any).
121
- # @return [Digram] last digram in the rhs otherwise nil.
122
- def last_digram()
123
- result = digrams.empty? ? nil : digrams.last
124
- return result
125
- end
126
-
127
-
128
-
129
- # Emit a text representation of the production rule.
130
- # Text is of the form:
131
- # object id of production : rhs as space-separated sequence of symbols.
132
- # @return [String]
133
- def to_string()
134
- return "#{object_id} : #{rhs.to_string}."
135
- end
136
-
137
- # Add a (grammar) symbol at the end of the RHS.
138
- # @param aSymbol [Object] A (grammar) symbol to add.
139
- def append_symbol(aSymbol)
140
- case aSymbol
141
- when Production
142
- new_symb = ProductionRef.new(aSymbol)
143
- when ProductionRef
144
- if aSymbol.unbound?
145
- msg = 'Fail to append reference to nil production in '
146
- msg << to_string
147
- fail StandardError, msg
148
- end
149
- new_symb = aSymbol.dup
150
- else
151
- new_symb = aSymbol
152
- end
153
-
154
- rhs << new_symb
155
- digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
156
- end
157
-
158
- # Clear the right-hand side.
159
- # Any referenced production has its reference counter decremented.
160
- def clear_rhs()
161
- rhs.clear
162
- end
163
-
164
- # Find all the positions where the digram occurs in the rhs
165
- # @param symb1 [Object] first symbol of the digram
166
- # @param symb2 [Object] second symbol of the digram
167
- # @return [Array] the list of indices where the digram occurs in rhs.
168
- # @example
169
- # # Given the production p : a b c a b a b d
170
- # #Then ...
171
- # p.positions_of(a, b) # => [0, 3, 5]
172
- # # Caution: "overlapping" digrams shouldn't be counted
173
- # # Given the production p : a a b a a a c d
174
- # # Then ...
175
- # p.positions_of(a, a) # => [0, 3]
176
- def positions_of(symb1, symb2)
177
-
178
- # Find the positions where the digram occur in rhs
179
- indices = [ -2 ] # Dummy index!
180
- (0...rhs.size).each do |i|
181
- next if i == indices.last + 1
182
- indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
183
- end
184
-
185
- indices.shift
186
-
187
- return indices
188
- end
189
-
190
-
191
- # Given that the production P passed as argument has exactly 2 symbols
192
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
193
- # s1 s2 by a reference to P.
194
- # @param another [Production or ProductionRef] a production that
195
- # consists exactly of one digram (= 2 symbols).
196
- def reduce_step(another)
197
- (symb1, symb2) = another.rhs.symbols
198
- pos = positions_of(symb1, symb2).reverse
199
-
200
- # Replace the two symbol sequence by the production
201
- pos.each { |index| rhs.reduce_step(index, another) }
202
-
203
- recalc_digrams
204
- end
205
-
206
- # Replace every occurrence of 'another' production in self.rhs by
207
- # the symbols in the rhs of 'another'.
208
- # @param another [Production or ProductionRef] a production that
209
- # consists exactly of one digram (= 2 symbols).
210
- # @example Synopsis
211
- # # Given the production p_A : a p_B b p_B c
212
- # # And the production p_B : x y
213
- # # Then...
214
- # p_A.derive_step(p_B)
215
- # #Modifies p_A as into: p_A -> a x y b x y c
216
- def derive_step(another)
217
- (0...rhs.size).to_a.reverse.each do |index|
218
- next unless rhs[index] == another
219
-
220
- rhs.insert_at(index + 1, another.rhs)
221
- another.decr_refcount
222
- rhs.delete_at(index)
223
- end
224
-
225
- recalc_digrams
226
- end
227
-
228
-
229
- # Part of the 'visitee' role in Visitor design pattern.
230
- # @param aVisitor[GrammarVisitor]
231
- def accept(aVisitor)
232
- aVisitor.start_visit_production(self)
233
-
234
- rhs.accept(aVisitor)
235
-
236
- aVisitor.end_visit_production(self)
237
- end
238
-
239
- end # class
240
-
241
- end # module
242
-
243
- # End of file
1
+ require_relative 'digram'
2
+ require_relative 'symbol_sequence'
3
+ require_relative 'production_ref'
4
+
5
+ module Sequitur # Module for classes implementing the Sequitur algorithm
6
+
7
+
8
+ # In a context-free grammar, a production is a rule in which
9
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
10
+ # and the right-hand side (RHS) consists of a sequence of symbols.
11
+ # The symbols in RHS can be either terminal or non-terminal symbols.
12
+ # The rule stipulates that the LHS is equivalent to the RHS,
13
+ # in other words every occurrence of the LHS can be substituted to
14
+ # corresponding RHS.
15
+ # Implementation note: the object id of the production is taken as its LHS.
16
+ class Production
17
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
18
+ attr_reader(:rhs)
19
+
20
+ # The reference count (= how times other productions reference this one)
21
+ attr_reader(:refcount)
22
+
23
+ # The sequence of digrams appearing in the RHS
24
+ attr_reader(:digrams)
25
+
26
+ # Constructor.
27
+ # Build a production with an empty RHS.
28
+ def initialize()
29
+ @rhs = SymbolSequence.new
30
+ @refcount = 0
31
+ @digrams = []
32
+ end
33
+
34
+
35
+
36
+ public
37
+
38
+ # Identity testing.
39
+ # @param other [] another production or production reference.
40
+ # @return true when the receiver and other are the same.
41
+ def ==(other)
42
+ return true if object_id == other.object_id
43
+
44
+ if other.is_a?(ProductionRef)
45
+ result = (other == self)
46
+ else
47
+ result = false
48
+ end
49
+
50
+ return result
51
+ end
52
+
53
+
54
+ # Is the rhs empty?
55
+ # @ return true if the rhs has no members.
56
+ def empty?
57
+ return rhs.empty?
58
+ end
59
+
60
+ # Increment the reference count by one.
61
+ def incr_refcount()
62
+ @refcount += 1
63
+ end
64
+
65
+ # Decrement the reference count by one.
66
+ def decr_refcount()
67
+ fail StandardError, 'Internal error' if @refcount == 0
68
+ @refcount -= 1
69
+ end
70
+
71
+
72
+ # Select the references to production appearing in the rhs.
73
+ # @return [Array of ProductionRef]
74
+ def references()
75
+ return rhs.references
76
+ end
77
+
78
+ # Look in the rhs all the references to a production passed a argument.
79
+ # aProduction [aProduction or ProductionRef] The production to search for.
80
+ # @return [Array] the array of ProductionRef to the passed production
81
+ def references_of(a_prod)
82
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
83
+ return rhs.references_of(real_prod)
84
+ end
85
+
86
+
87
+ # Enumerate the digrams appearing in the right-hand side (rhs)
88
+ # @return [Array] the list of digrams found in rhs of this production.
89
+ def recalc_digrams()
90
+ return [] if rhs.size < 2
91
+
92
+ result = []
93
+ rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
94
+ @digrams = result
95
+ end
96
+
97
+
98
+
99
+ # Does the rhs have exactly one digram only (= 2 symbols)?
100
+ # @return [true/false] true when the rhs contains exactly two symbols.
101
+ def single_digram?
102
+ return rhs.size == 2
103
+ end
104
+
105
+
106
+ # Detect whether the last digram occurs twice
107
+ # Assumption: when a digram occurs twice in a production then it must occur
108
+ # at the end of the rhs
109
+ # @return [true/false] true when the digram occurs twice in rhs.
110
+ def repeated_digram?()
111
+ return false if rhs.size < 3
112
+
113
+ my_digrams = digrams
114
+ all_keys = my_digrams.map(&:key)
115
+ last_key = all_keys.pop
116
+ same_key_found = all_keys.index(last_key)
117
+ return !same_key_found.nil?
118
+ end
119
+
120
+ # Retrieve the last digram appearing in the RHS (if any).
121
+ # @return [Digram] last digram in the rhs otherwise nil.
122
+ def last_digram()
123
+ result = digrams.empty? ? nil : digrams.last
124
+ return result
125
+ end
126
+
127
+
128
+
129
+ # Emit a text representation of the production rule.
130
+ # Text is of the form:
131
+ # object id of production : rhs as space-separated sequence of symbols.
132
+ # @return [String]
133
+ def to_string()
134
+ return "#{object_id} : #{rhs.to_string}."
135
+ end
136
+
137
+ # Add a (grammar) symbol at the end of the RHS.
138
+ # @param aSymbol [Object] A (grammar) symbol to add.
139
+ def append_symbol(aSymbol)
140
+ case aSymbol
141
+ when Production
142
+ new_symb = ProductionRef.new(aSymbol)
143
+ when ProductionRef
144
+ if aSymbol.unbound?
145
+ msg = 'Fail to append reference to nil production in '
146
+ msg << to_string
147
+ fail StandardError, msg
148
+ end
149
+ new_symb = aSymbol.dup
150
+ else
151
+ new_symb = aSymbol
152
+ end
153
+
154
+ rhs << new_symb
155
+ digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
156
+ end
157
+
158
+ # Clear the right-hand side.
159
+ # Any referenced production has its reference counter decremented.
160
+ def clear_rhs()
161
+ rhs.clear
162
+ end
163
+
164
+ # Find all the positions where the digram occurs in the rhs
165
+ # @param symb1 [Object] first symbol of the digram
166
+ # @param symb2 [Object] second symbol of the digram
167
+ # @return [Array] the list of indices where the digram occurs in rhs.
168
+ # @example
169
+ # # Given the production p : a b c a b a b d
170
+ # #Then ...
171
+ # p.positions_of(a, b) # => [0, 3, 5]
172
+ # # Caution: "overlapping" digrams shouldn't be counted
173
+ # # Given the production p : a a b a a a c d
174
+ # # Then ...
175
+ # p.positions_of(a, a) # => [0, 3]
176
+ def positions_of(symb1, symb2)
177
+
178
+ # Find the positions where the digram occur in rhs
179
+ indices = [ -2 ] # Dummy index!
180
+ (0...rhs.size).each do |i|
181
+ next if i == indices.last + 1
182
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
183
+ end
184
+
185
+ indices.shift
186
+
187
+ return indices
188
+ end
189
+
190
+
191
+ # Given that the production P passed as argument has exactly 2 symbols
192
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
193
+ # s1 s2 by a reference to P.
194
+ # @param another [Production or ProductionRef] a production that
195
+ # consists exactly of one digram (= 2 symbols).
196
+ def reduce_step(another)
197
+ (symb1, symb2) = another.rhs.symbols
198
+ pos = positions_of(symb1, symb2).reverse
199
+
200
+ # Replace the two symbol sequence by the production
201
+ pos.each { |index| rhs.reduce_step(index, another) }
202
+
203
+ recalc_digrams
204
+ end
205
+
206
+ # Replace every occurrence of 'another' production in self.rhs by
207
+ # the symbols in the rhs of 'another'.
208
+ # @param another [Production or ProductionRef] a production that
209
+ # consists exactly of one digram (= 2 symbols).
210
+ # @example Synopsis
211
+ # # Given the production p_A : a p_B b p_B c
212
+ # # And the production p_B : x y
213
+ # # Then...
214
+ # p_A.derive_step(p_B)
215
+ # #Modifies p_A as into: p_A -> a x y b x y c
216
+ def derive_step(another)
217
+ (0...rhs.size).to_a.reverse.each do |index|
218
+ next unless rhs[index] == another
219
+
220
+ rhs.insert_at(index + 1, another.rhs)
221
+ another.decr_refcount
222
+ rhs.delete_at(index)
223
+ end
224
+
225
+ recalc_digrams
226
+ end
227
+
228
+
229
+ # Part of the 'visitee' role in Visitor design pattern.
230
+ # @param aVisitor[GrammarVisitor]
231
+ def accept(aVisitor)
232
+ aVisitor.start_visit_production(self)
233
+
234
+ rhs.accept(aVisitor)
235
+
236
+ aVisitor.end_visit_production(self)
237
+ end
238
+
239
+ end # class
240
+
241
+ end # module
242
+
243
+ # End of file