sequitur 0.1.23 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,128 +5,125 @@ require_relative 'symbol_sequence'
5
5
  require_relative 'production_ref'
6
6
 
7
7
  module Sequitur # Module for classes implementing the Sequitur algorithm
8
- # In a context-free grammar, a production is a rule in which
9
- # its left-hand side (LHS) consists solely of a non-terminal symbol
10
- # and the right-hand side (RHS) consists of a sequence of symbols.
11
- # The symbols in RHS can be either terminal or non-terminal symbols.
12
- # The rule stipulates that the LHS is equivalent to the RHS,
13
- # in other words every occurrence of the LHS can be substituted to
14
- # corresponding RHS.
15
- # Implementation note: the object id of the production is taken as its LHS.
16
- class Production
17
- # The right-hand side (rhs) consists of a sequence of grammar symbols
18
- attr_reader(:rhs)
19
-
20
- # The reference count (= how times other productions reference this one)
21
- attr_reader(:refcount)
22
-
23
- # The sequence of digrams appearing in the RHS
24
- attr_reader(:digrams)
25
-
26
- # Constructor.
27
- # Build a production with an empty RHS.
28
- def initialize
29
- @rhs = SymbolSequence.new
30
- @refcount = 0
31
- @digrams = []
32
- end
33
-
34
- # Identity testing.
35
- # @param other [] another production or production reference.
36
- # @return true when the receiver and other are the same.
37
- def ==(other)
38
- return true if object_id == other.object_id
39
-
40
- result = if other.is_a?(ProductionRef)
41
- (other == self)
42
- else
43
- false
44
- end
45
-
46
- return result
47
- end
48
-
49
- # Is the rhs empty?
50
- # @ return true if the rhs has no members.
51
- def empty?
52
- return rhs.empty?
53
- end
54
-
55
- # Increment the reference count by one.
56
- def incr_refcount
57
- @refcount += 1
58
- end
59
-
60
- # Decrement the reference count by one.
61
- def decr_refcount
62
- raise StandardError, 'Internal error' if @refcount.zero?
63
-
64
- @refcount -= 1
65
- end
66
-
67
- # Select the references to production appearing in the rhs.
68
- # @return [Array of ProductionRef]
69
- def references
70
- return rhs.references
71
- end
72
-
73
- # Look in the rhs all the references to a production passed a argument.
74
- # aProduction [aProduction or ProductionRef] The production to search for.
75
- # @return [Array] the array of ProductionRef to the passed production
76
- def references_of(a_prod)
77
- real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
78
- return rhs.references_of(real_prod)
79
- end
80
-
81
- # Enumerate the digrams appearing in the right-hand side (rhs)
82
- # @return [Array] the list of digrams found in rhs of this production.
83
- def recalc_digrams
84
- return [] if rhs.size < 2
85
-
86
- result = []
87
- rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
88
- @digrams = result
89
- end
90
-
91
- # Does the rhs have exactly one digram only (= 2 symbols)?
92
- # @return [true/false] true when the rhs contains exactly two symbols.
93
- def single_digram?
94
- return rhs.size == 2
95
- end
96
-
97
- # Detect whether the last digram occurs twice
98
- # Assumption: when a digram occurs twice in a production then it must occur
99
- # at the end of the rhs
100
- # @return [true/false] true when the digram occurs twice in rhs.
101
- def repeated_digram?
102
- return false if rhs.size < 3
103
-
104
- my_digrams = digrams
105
- all_keys = my_digrams.map(&:key)
106
- last_key = all_keys.pop
107
- same_key_found = all_keys.index(last_key)
108
- return !same_key_found.nil?
109
- end
110
-
111
- # Retrieve the last digram appearing in the RHS (if any).
112
- # @return [Digram] last digram in the rhs otherwise nil.
113
- def last_digram
114
- result = digrams.empty? ? nil : digrams.last
115
- return result
116
- end
117
-
118
- # Emit a text representation of the production rule.
119
- # Text is of the form:
120
- # object id of production : rhs as space-separated sequence of symbols.
121
- # @return [String]
122
- def to_string
123
- return "#{object_id} : #{rhs.to_string}."
124
- end
125
-
126
- # Add a (grammar) symbol at the end of the RHS.
127
- # @param aSymbol [Object] A (grammar) symbol to add.
128
- def append_symbol(aSymbol)
129
- case aSymbol
8
+ # In a context-free grammar, a production is a rule in which
9
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
10
+ # and the right-hand side (RHS) consists of a sequence of symbols.
11
+ # The symbols in RHS can be either terminal or non-terminal symbols.
12
+ # The rule stipulates that the LHS is equivalent to the RHS,
13
+ # in other words every occurrence of the LHS can be substituted to
14
+ # corresponding RHS.
15
+ # Implementation note: the object id of the production is taken as its LHS.
16
+ class Production
17
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
18
+ attr_reader(:rhs)
19
+
20
+ # The reference count (= how times other productions reference this one)
21
+ attr_reader(:refcount)
22
+
23
+ # The sequence of digrams appearing in the RHS
24
+ attr_reader(:digrams)
25
+
26
+ # Constructor.
27
+ # Build a production with an empty RHS.
28
+ def initialize
29
+ @rhs = SymbolSequence.new
30
+ @refcount = 0
31
+ @digrams = []
32
+ end
33
+
34
+ # Identity testing.
35
+ # @param other [] another production or production reference.
36
+ # @return true when the receiver and other are the same.
37
+ def ==(other)
38
+ return true if object_id == other.object_id
39
+
40
+ if other.is_a?(ProductionRef)
41
+ (other == self)
42
+ else
43
+ false
44
+ end
45
+ end
46
+
47
+ # Is the rhs empty?
48
+ # @ return true if the rhs has no members.
49
+ def empty?
50
+ rhs.empty?
51
+ end
52
+
53
+ # Increment the reference count by one.
54
+ def incr_refcount
55
+ @refcount += 1
56
+ end
57
+
58
+ # Decrement the reference count by one.
59
+ def decr_refcount
60
+ raise StandardError, 'Internal error' if @refcount.zero?
61
+
62
+ @refcount -= 1
63
+ end
64
+
65
+ # Select the references to production appearing in the rhs.
66
+ # @return [Array of ProductionRef]
67
+ def references
68
+ rhs.references
69
+ end
70
+
71
+ # Look in the rhs all the references to a production passed a argument.
72
+ # aProduction [aProduction or ProductionRef] The production to search for.
73
+ # @return [Array] the array of ProductionRef to the passed production
74
+ def references_of(a_prod)
75
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
76
+ rhs.references_of(real_prod)
77
+ end
78
+
79
+ # Enumerate the digrams appearing in the right-hand side (rhs)
80
+ # @return [Array] the list of digrams found in rhs of this production.
81
+ def recalc_digrams
82
+ return [] if rhs.size < 2
83
+
84
+ result = []
85
+ rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
86
+ @digrams = result
87
+ end
88
+
89
+ # Does the rhs have exactly one digram only (= 2 symbols)?
90
+ # @return [true/false] true when the rhs contains exactly two symbols.
91
+ def single_digram?
92
+ rhs.size == 2
93
+ end
94
+
95
+ # Detect whether the last digram occurs twice
96
+ # Assumption: when a digram occurs twice in a production then it must occur
97
+ # at the end of the rhs
98
+ # @return [true/false] true when the digram occurs twice in rhs.
99
+ def repeated_digram?
100
+ return false if rhs.size < 3
101
+
102
+ my_digrams = digrams
103
+ all_keys = my_digrams.map(&:key)
104
+ last_key = all_keys.pop
105
+ same_key_found = all_keys.index(last_key)
106
+ !same_key_found.nil?
107
+ end
108
+
109
+ # Retrieve the last digram appearing in the RHS (if any).
110
+ # @return [Digram] last digram in the rhs otherwise nil.
111
+ def last_digram
112
+ digrams.empty? ? nil : digrams.last
113
+ end
114
+
115
+ # Emit a text representation of the production rule.
116
+ # Text is of the form:
117
+ # object id of production : rhs as space-separated sequence of symbols.
118
+ # @return [String]
119
+ def to_string
120
+ "#{object_id} : #{rhs.to_string}."
121
+ end
122
+
123
+ # Add a (grammar) symbol at the end of the RHS.
124
+ # @param aSymbol [Object] A (grammar) symbol to add.
125
+ def append_symbol(aSymbol)
126
+ case aSymbol
130
127
  when Production
131
128
  new_symb = ProductionRef.new(aSymbol)
132
129
  when ProductionRef
@@ -138,91 +135,89 @@ class Production
138
135
  new_symb = aSymbol.dup
139
136
  else
140
137
  new_symb = aSymbol
138
+ end
139
+
140
+ rhs << new_symb
141
+ digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
142
+ end
143
+
144
+ # Clear the right-hand side.
145
+ # Any referenced production has its reference counter decremented.
146
+ def clear_rhs
147
+ rhs.clear
148
+ end
149
+
150
+ # Find all the positions where the digram occurs in the rhs
151
+ # @param symb1 [Object] first symbol of the digram
152
+ # @param symb2 [Object] second symbol of the digram
153
+ # @return [Array] the list of indices where the digram occurs in rhs.
154
+ # @example
155
+ # # Given the production p : a b c a b a b d
156
+ # #Then ...
157
+ # p.positions_of(a, b) # => [0, 3, 5]
158
+ # # Caution: "overlapping" digrams shouldn't be counted
159
+ # # Given the production p : a a b a a a c d
160
+ # # Then ...
161
+ # p.positions_of(a, a) # => [0, 3]
162
+ def positions_of(symb1, symb2)
163
+ # Find the positions where the digram occur in rhs
164
+ indices = [-2] # Dummy index!
165
+ (0...rhs.size).each do |i|
166
+ next if i == indices.last + 1
167
+
168
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
169
+ end
170
+
171
+ indices.shift
172
+
173
+ indices
174
+ end
175
+
176
+ # Given that the production P passed as argument has exactly 2 symbols
177
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
178
+ # s1 s2 by a reference to P.
179
+ # @param another [Production or ProductionRef] a production that
180
+ # consists exactly of one digram (= 2 symbols).
181
+ def reduce_step(another)
182
+ (symb1, symb2) = another.rhs.symbols
183
+ pos = positions_of(symb1, symb2).reverse
184
+
185
+ # Replace the two symbol sequence by the production
186
+ pos.each { |index| rhs.reduce_step(index, another) }
187
+
188
+ recalc_digrams
141
189
  end
142
190
 
143
- rhs << new_symb
144
- digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
145
- end
146
-
147
- # Clear the right-hand side.
148
- # Any referenced production has its reference counter decremented.
149
- def clear_rhs
150
- rhs.clear
151
- end
152
-
153
- # Find all the positions where the digram occurs in the rhs
154
- # @param symb1 [Object] first symbol of the digram
155
- # @param symb2 [Object] second symbol of the digram
156
- # @return [Array] the list of indices where the digram occurs in rhs.
157
- # @example
158
- # # Given the production p : a b c a b a b d
159
- # #Then ...
160
- # p.positions_of(a, b) # => [0, 3, 5]
161
- # # Caution: "overlapping" digrams shouldn't be counted
162
- # # Given the production p : a a b a a a c d
163
- # # Then ...
164
- # p.positions_of(a, a) # => [0, 3]
165
- def positions_of(symb1, symb2)
166
- # Find the positions where the digram occur in rhs
167
- indices = [-2] # Dummy index!
168
- (0...rhs.size).each do |i|
169
- next if i == indices.last + 1
170
-
171
- indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
172
- end
173
-
174
- indices.shift
175
-
176
- return indices
177
- end
178
-
179
- # Given that the production P passed as argument has exactly 2 symbols
180
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
181
- # s1 s2 by a reference to P.
182
- # @param another [Production or ProductionRef] a production that
183
- # consists exactly of one digram (= 2 symbols).
184
- def reduce_step(another)
185
- (symb1, symb2) = another.rhs.symbols
186
- pos = positions_of(symb1, symb2).reverse
187
-
188
- # Replace the two symbol sequence by the production
189
- pos.each { |index| rhs.reduce_step(index, another) }
190
-
191
- recalc_digrams
192
- end
193
-
194
- # Replace every occurrence of 'another' production in self.rhs by
195
- # the symbols in the rhs of 'another'.
196
- # @param another [Production or ProductionRef] a production that
197
- # consists exactly of one digram (= 2 symbols).
198
- # @example Synopsis
199
- # # Given the production p_A : a p_B b p_B c
200
- # # And the production p_B : x y
201
- # # Then...
202
- # p_A.derive_step(p_B)
203
- # #Modifies p_A as into: p_A -> a x y b x y c
204
- def derive_step(another)
205
- (0...rhs.size).to_a.reverse_each do |index|
206
- next unless rhs[index] == another
207
-
208
- rhs.insert_at(index + 1, another.rhs)
209
- another.decr_refcount
210
- rhs.delete_at(index)
211
- end
212
-
213
- recalc_digrams
214
- end
215
-
216
- # Part of the 'visitee' role in Visitor design pattern.
217
- # @param aVisitor[GrammarVisitor]
218
- def accept(aVisitor)
219
- aVisitor.start_visit_production(self)
220
-
221
- rhs.accept(aVisitor)
222
-
223
- aVisitor.end_visit_production(self)
224
- end
225
- end # class
191
+ # Replace every occurrence of 'another' production in self.rhs by
192
+ # the symbols in the rhs of 'another'.
193
+ # @param another [Production or ProductionRef] a production that
194
+ # consists exactly of one digram (= 2 symbols).
195
+ # @example Synopsis
196
+ # # Given the production p_A : a p_B b p_B c
197
+ # # And the production p_B : x y
198
+ # # Then...
199
+ # p_A.derive_step(p_B)
200
+ # #Modifies p_A as into: p_A -> a x y b x y c
201
+ def derive_step(another)
202
+ (0...rhs.size).to_a.reverse_each do |index|
203
+ next unless rhs[index] == another
204
+
205
+ rhs.insert_at(index + 1, another.rhs)
206
+ another.decr_refcount
207
+ rhs.delete_at(index)
208
+ end
209
+
210
+ recalc_digrams
211
+ end
212
+
213
+ # Part of the 'visitee' role in Visitor design pattern.
214
+ # @param aVisitor[GrammarVisitor]
215
+ def accept(aVisitor)
216
+ aVisitor.start_visit_production(self)
217
+ rhs.accept(aVisitor)
218
+ aVisitor.end_visit_production(self)
219
+ end
220
+ end # class
226
221
  end # module
227
222
 
228
223
  # End of file
@@ -42,12 +42,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
42
42
  # Emit the text representation of a production reference.
43
43
  # @return [String]
44
44
  def to_s
45
- return production.object_id.to_s
45
+ production.object_id.to_s
46
46
  end
47
47
 
48
48
  alias to_string to_s
49
49
 
50
-
51
50
  # Equality testing.
52
51
  # A production ref is equal to another one when its
53
52
  # refers to the same production or when it is compared to
@@ -57,13 +56,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
57
56
  def ==(other)
58
57
  return true if object_id == other.object_id
59
58
 
60
- result = if other.is_a?(ProductionRef)
61
- (production == other.production)
62
- else
63
- (production == other)
64
- end
65
-
66
- return result
59
+ if other.is_a?(ProductionRef)
60
+ production == other.production
61
+ else
62
+ production == other
63
+ end
67
64
  end
68
65
 
69
66
  # Produce a hash value.
@@ -74,7 +71,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
74
71
  def hash
75
72
  raise StandardError, 'Nil production' if production.nil?
76
73
 
77
- return production.hash
74
+ production.hash
78
75
  end
79
76
 
80
77
  # Make this reference point to the given production.
@@ -84,7 +81,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
84
81
  return if aProduction == @production
85
82
 
86
83
  production&.decr_refcount
87
- unless aProduction.kind_of?(Production)
84
+ unless aProduction.is_a?(Production)
88
85
  raise StandardError, "Illegal production type #{aProduction.class}"
89
86
  end
90
87
 
@@ -102,7 +99,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
102
99
  # @return [true / false] true when this object doesn't
103
100
  # point to a production.
104
101
  def unbound?
105
- return production.nil?
102
+ production.nil?
106
103
  end
107
104
 
108
105
  # Part of the 'visitee' role in the Visitor design pattern.