sequitur 0.1.10 → 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,243 +1,243 @@
1
- require_relative 'digram'
2
- require_relative 'symbol_sequence'
3
- require_relative 'production_ref'
4
-
5
- module Sequitur # Module for classes implementing the Sequitur algorithm
6
-
7
-
8
- # In a context-free grammar, a production is a rule in which
9
- # its left-hand side (LHS) consists solely of a non-terminal symbol
10
- # and the right-hand side (RHS) consists of a sequence of symbols.
11
- # The symbols in RHS can be either terminal or non-terminal symbols.
12
- # The rule stipulates that the LHS is equivalent to the RHS,
13
- # in other words every occurrence of the LHS can be substituted to
14
- # corresponding RHS.
15
- # Implementation note: the object id of the production is taken as its LHS.
16
- class Production
17
- # The right-hand side (rhs) consists of a sequence of grammar symbols
18
- attr_reader(:rhs)
19
-
20
- # The reference count (= how times other productions reference this one)
21
- attr_reader(:refcount)
22
-
23
- # The sequence of digrams appearing in the RHS
24
- attr_reader(:digrams)
25
-
26
- # Constructor.
27
- # Build a production with an empty RHS.
28
- def initialize()
29
- @rhs = SymbolSequence.new
30
- @refcount = 0
31
- @digrams = []
32
- end
33
-
34
-
35
-
36
- public
37
-
38
- # Identity testing.
39
- # @param other [] another production or production reference.
40
- # @return true when the receiver and other are the same.
41
- def ==(other)
42
- return true if object_id == other.object_id
43
-
44
- if other.is_a?(ProductionRef)
45
- result = (other == self)
46
- else
47
- result = false
48
- end
49
-
50
- return result
51
- end
52
-
53
-
54
- # Is the rhs empty?
55
- # @ return true if the rhs has no members.
56
- def empty?
57
- return rhs.empty?
58
- end
59
-
60
- # Increment the reference count by one.
61
- def incr_refcount()
62
- @refcount += 1
63
- end
64
-
65
- # Decrement the reference count by one.
66
- def decr_refcount()
67
- fail StandardError, 'Internal error' if @refcount == 0
68
- @refcount -= 1
69
- end
70
-
71
-
72
- # Select the references to production appearing in the rhs.
73
- # @return [Array of ProductionRef]
74
- def references()
75
- return rhs.references
76
- end
77
-
78
- # Look in the rhs all the references to a production passed a argument.
79
- # aProduction [aProduction or ProductionRef] The production to search for.
80
- # @return [Array] the array of ProductionRef to the passed production
81
- def references_of(a_prod)
82
- real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
83
- return rhs.references_of(real_prod)
84
- end
85
-
86
-
87
- # Enumerate the digrams appearing in the right-hand side (rhs)
88
- # @return [Array] the list of digrams found in rhs of this production.
89
- def recalc_digrams()
90
- return [] if rhs.size < 2
91
-
92
- result = []
93
- rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
94
- @digrams = result
95
- end
96
-
97
-
98
-
99
- # Does the rhs have exactly one digram only (= 2 symbols)?
100
- # @return [true/false] true when the rhs contains exactly two symbols.
101
- def single_digram?
102
- return rhs.size == 2
103
- end
104
-
105
-
106
- # Detect whether the last digram occurs twice
107
- # Assumption: when a digram occurs twice in a production then it must occur
108
- # at the end of the rhs
109
- # @return [true/false] true when the digram occurs twice in rhs.
110
- def repeated_digram?()
111
- return false if rhs.size < 3
112
-
113
- my_digrams = digrams
114
- all_keys = my_digrams.map(&:key)
115
- last_key = all_keys.pop
116
- same_key_found = all_keys.index(last_key)
117
- return !same_key_found.nil?
118
- end
119
-
120
- # Retrieve the last digram appearing in the RHS (if any).
121
- # @return [Digram] last digram in the rhs otherwise nil.
122
- def last_digram()
123
- result = digrams.empty? ? nil : digrams.last
124
- return result
125
- end
126
-
127
-
128
-
129
- # Emit a text representation of the production rule.
130
- # Text is of the form:
131
- # object id of production : rhs as space-separated sequence of symbols.
132
- # @return [String]
133
- def to_string()
134
- return "#{object_id} : #{rhs.to_string}."
135
- end
136
-
137
- # Add a (grammar) symbol at the end of the RHS.
138
- # @param aSymbol [Object] A (grammar) symbol to add.
139
- def append_symbol(aSymbol)
140
- case aSymbol
141
- when Production
142
- new_symb = ProductionRef.new(aSymbol)
143
- when ProductionRef
144
- if aSymbol.unbound?
145
- msg = 'Fail to append reference to nil production in '
146
- msg << to_string
147
- fail StandardError, msg
148
- end
149
- new_symb = aSymbol.dup
150
- else
151
- new_symb = aSymbol
152
- end
153
-
154
- rhs << new_symb
155
- digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
156
- end
157
-
158
- # Clear the right-hand side.
159
- # Any referenced production has its reference counter decremented.
160
- def clear_rhs()
161
- rhs.clear
162
- end
163
-
164
- # Find all the positions where the digram occurs in the rhs
165
- # @param symb1 [Object] first symbol of the digram
166
- # @param symb2 [Object] second symbol of the digram
167
- # @return [Array] the list of indices where the digram occurs in rhs.
168
- # @example
169
- # # Given the production p : a b c a b a b d
170
- # #Then ...
171
- # p.positions_of(a, b) # => [0, 3, 5]
172
- # # Caution: "overlapping" digrams shouldn't be counted
173
- # # Given the production p : a a b a a a c d
174
- # # Then ...
175
- # p.positions_of(a, a) # => [0, 3]
176
- def positions_of(symb1, symb2)
177
-
178
- # Find the positions where the digram occur in rhs
179
- indices = [ -2 ] # Dummy index!
180
- (0...rhs.size).each do |i|
181
- next if i == indices.last + 1
182
- indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
183
- end
184
-
185
- indices.shift
186
-
187
- return indices
188
- end
189
-
190
-
191
- # Given that the production P passed as argument has exactly 2 symbols
192
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
193
- # s1 s2 by a reference to P.
194
- # @param another [Production or ProductionRef] a production that
195
- # consists exactly of one digram (= 2 symbols).
196
- def reduce_step(another)
197
- (symb1, symb2) = another.rhs.symbols
198
- pos = positions_of(symb1, symb2).reverse
199
-
200
- # Replace the two symbol sequence by the production
201
- pos.each { |index| rhs.reduce_step(index, another) }
202
-
203
- recalc_digrams
204
- end
205
-
206
- # Replace every occurrence of 'another' production in self.rhs by
207
- # the symbols in the rhs of 'another'.
208
- # @param another [Production or ProductionRef] a production that
209
- # consists exactly of one digram (= 2 symbols).
210
- # @example Synopsis
211
- # # Given the production p_A : a p_B b p_B c
212
- # # And the production p_B : x y
213
- # # Then...
214
- # p_A.derive_step(p_B)
215
- # #Modifies p_A as into: p_A -> a x y b x y c
216
- def derive_step(another)
217
- (0...rhs.size).to_a.reverse.each do |index|
218
- next unless rhs[index] == another
219
-
220
- rhs.insert_at(index + 1, another.rhs)
221
- another.decr_refcount
222
- rhs.delete_at(index)
223
- end
224
-
225
- recalc_digrams
226
- end
227
-
228
-
229
- # Part of the 'visitee' role in Visitor design pattern.
230
- # @param aVisitor[GrammarVisitor]
231
- def accept(aVisitor)
232
- aVisitor.start_visit_production(self)
233
-
234
- rhs.accept(aVisitor)
235
-
236
- aVisitor.end_visit_production(self)
237
- end
238
-
239
- end # class
240
-
241
- end # module
242
-
243
- # End of file
1
+ require_relative 'digram'
2
+ require_relative 'symbol_sequence'
3
+ require_relative 'production_ref'
4
+
5
+ module Sequitur # Module for classes implementing the Sequitur algorithm
6
+
7
+
8
+ # In a context-free grammar, a production is a rule in which
9
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
10
+ # and the right-hand side (RHS) consists of a sequence of symbols.
11
+ # The symbols in RHS can be either terminal or non-terminal symbols.
12
+ # The rule stipulates that the LHS is equivalent to the RHS,
13
+ # in other words every occurrence of the LHS can be substituted to
14
+ # corresponding RHS.
15
+ # Implementation note: the object id of the production is taken as its LHS.
16
+ class Production
17
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
18
+ attr_reader(:rhs)
19
+
20
+ # The reference count (= how times other productions reference this one)
21
+ attr_reader(:refcount)
22
+
23
+ # The sequence of digrams appearing in the RHS
24
+ attr_reader(:digrams)
25
+
26
+ # Constructor.
27
+ # Build a production with an empty RHS.
28
+ def initialize()
29
+ @rhs = SymbolSequence.new
30
+ @refcount = 0
31
+ @digrams = []
32
+ end
33
+
34
+
35
+
36
+ public
37
+
38
+ # Identity testing.
39
+ # @param other [] another production or production reference.
40
+ # @return true when the receiver and other are the same.
41
+ def ==(other)
42
+ return true if object_id == other.object_id
43
+
44
+ if other.is_a?(ProductionRef)
45
+ result = (other == self)
46
+ else
47
+ result = false
48
+ end
49
+
50
+ return result
51
+ end
52
+
53
+
54
+ # Is the rhs empty?
55
+ # @ return true if the rhs has no members.
56
+ def empty?
57
+ return rhs.empty?
58
+ end
59
+
60
+ # Increment the reference count by one.
61
+ def incr_refcount()
62
+ @refcount += 1
63
+ end
64
+
65
+ # Decrement the reference count by one.
66
+ def decr_refcount()
67
+ fail StandardError, 'Internal error' if @refcount == 0
68
+ @refcount -= 1
69
+ end
70
+
71
+
72
+ # Select the references to production appearing in the rhs.
73
+ # @return [Array of ProductionRef]
74
+ def references()
75
+ return rhs.references
76
+ end
77
+
78
+ # Look in the rhs all the references to a production passed a argument.
79
+ # aProduction [aProduction or ProductionRef] The production to search for.
80
+ # @return [Array] the array of ProductionRef to the passed production
81
+ def references_of(a_prod)
82
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
83
+ return rhs.references_of(real_prod)
84
+ end
85
+
86
+
87
+ # Enumerate the digrams appearing in the right-hand side (rhs)
88
+ # @return [Array] the list of digrams found in rhs of this production.
89
+ def recalc_digrams()
90
+ return [] if rhs.size < 2
91
+
92
+ result = []
93
+ rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
94
+ @digrams = result
95
+ end
96
+
97
+
98
+
99
+ # Does the rhs have exactly one digram only (= 2 symbols)?
100
+ # @return [true/false] true when the rhs contains exactly two symbols.
101
+ def single_digram?
102
+ return rhs.size == 2
103
+ end
104
+
105
+
106
+ # Detect whether the last digram occurs twice
107
+ # Assumption: when a digram occurs twice in a production then it must occur
108
+ # at the end of the rhs
109
+ # @return [true/false] true when the digram occurs twice in rhs.
110
+ def repeated_digram?()
111
+ return false if rhs.size < 3
112
+
113
+ my_digrams = digrams
114
+ all_keys = my_digrams.map(&:key)
115
+ last_key = all_keys.pop
116
+ same_key_found = all_keys.index(last_key)
117
+ return !same_key_found.nil?
118
+ end
119
+
120
+ # Retrieve the last digram appearing in the RHS (if any).
121
+ # @return [Digram] last digram in the rhs otherwise nil.
122
+ def last_digram()
123
+ result = digrams.empty? ? nil : digrams.last
124
+ return result
125
+ end
126
+
127
+
128
+
129
+ # Emit a text representation of the production rule.
130
+ # Text is of the form:
131
+ # object id of production : rhs as space-separated sequence of symbols.
132
+ # @return [String]
133
+ def to_string()
134
+ return "#{object_id} : #{rhs.to_string}."
135
+ end
136
+
137
+ # Add a (grammar) symbol at the end of the RHS.
138
+ # @param aSymbol [Object] A (grammar) symbol to add.
139
+ def append_symbol(aSymbol)
140
+ case aSymbol
141
+ when Production
142
+ new_symb = ProductionRef.new(aSymbol)
143
+ when ProductionRef
144
+ if aSymbol.unbound?
145
+ msg = 'Fail to append reference to nil production in '
146
+ msg << to_string
147
+ fail StandardError, msg
148
+ end
149
+ new_symb = aSymbol.dup
150
+ else
151
+ new_symb = aSymbol
152
+ end
153
+
154
+ rhs << new_symb
155
+ digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
156
+ end
157
+
158
+ # Clear the right-hand side.
159
+ # Any referenced production has its reference counter decremented.
160
+ def clear_rhs()
161
+ rhs.clear
162
+ end
163
+
164
+ # Find all the positions where the digram occurs in the rhs
165
+ # @param symb1 [Object] first symbol of the digram
166
+ # @param symb2 [Object] second symbol of the digram
167
+ # @return [Array] the list of indices where the digram occurs in rhs.
168
+ # @example
169
+ # # Given the production p : a b c a b a b d
170
+ # #Then ...
171
+ # p.positions_of(a, b) # => [0, 3, 5]
172
+ # # Caution: "overlapping" digrams shouldn't be counted
173
+ # # Given the production p : a a b a a a c d
174
+ # # Then ...
175
+ # p.positions_of(a, a) # => [0, 3]
176
+ def positions_of(symb1, symb2)
177
+
178
+ # Find the positions where the digram occur in rhs
179
+ indices = [ -2 ] # Dummy index!
180
+ (0...rhs.size).each do |i|
181
+ next if i == indices.last + 1
182
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
183
+ end
184
+
185
+ indices.shift
186
+
187
+ return indices
188
+ end
189
+
190
+
191
+ # Given that the production P passed as argument has exactly 2 symbols
192
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
193
+ # s1 s2 by a reference to P.
194
+ # @param another [Production or ProductionRef] a production that
195
+ # consists exactly of one digram (= 2 symbols).
196
+ def reduce_step(another)
197
+ (symb1, symb2) = another.rhs.symbols
198
+ pos = positions_of(symb1, symb2).reverse
199
+
200
+ # Replace the two symbol sequence by the production
201
+ pos.each { |index| rhs.reduce_step(index, another) }
202
+
203
+ recalc_digrams
204
+ end
205
+
206
+ # Replace every occurrence of 'another' production in self.rhs by
207
+ # the symbols in the rhs of 'another'.
208
+ # @param another [Production or ProductionRef] a production that
209
+ # consists exactly of one digram (= 2 symbols).
210
+ # @example Synopsis
211
+ # # Given the production p_A : a p_B b p_B c
212
+ # # And the production p_B : x y
213
+ # # Then...
214
+ # p_A.derive_step(p_B)
215
+ # #Modifies p_A as into: p_A -> a x y b x y c
216
+ def derive_step(another)
217
+ (0...rhs.size).to_a.reverse.each do |index|
218
+ next unless rhs[index] == another
219
+
220
+ rhs.insert_at(index + 1, another.rhs)
221
+ another.decr_refcount
222
+ rhs.delete_at(index)
223
+ end
224
+
225
+ recalc_digrams
226
+ end
227
+
228
+
229
+ # Part of the 'visitee' role in Visitor design pattern.
230
+ # @param aVisitor[GrammarVisitor]
231
+ def accept(aVisitor)
232
+ aVisitor.start_visit_production(self)
233
+
234
+ rhs.accept(aVisitor)
235
+
236
+ aVisitor.end_visit_production(self)
237
+ end
238
+
239
+ end # class
240
+
241
+ end # module
242
+
243
+ # End of file