sequitur 0.1.23 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,128 +5,125 @@ require_relative 'symbol_sequence'
5
5
  require_relative 'production_ref'
6
6
 
7
7
  module Sequitur # Module for classes implementing the Sequitur algorithm
8
- # In a context-free grammar, a production is a rule in which
9
- # its left-hand side (LHS) consists solely of a non-terminal symbol
10
- # and the right-hand side (RHS) consists of a sequence of symbols.
11
- # The symbols in RHS can be either terminal or non-terminal symbols.
12
- # The rule stipulates that the LHS is equivalent to the RHS,
13
- # in other words every occurrence of the LHS can be substituted to
14
- # corresponding RHS.
15
- # Implementation note: the object id of the production is taken as its LHS.
16
- class Production
17
- # The right-hand side (rhs) consists of a sequence of grammar symbols
18
- attr_reader(:rhs)
19
-
20
- # The reference count (= how times other productions reference this one)
21
- attr_reader(:refcount)
22
-
23
- # The sequence of digrams appearing in the RHS
24
- attr_reader(:digrams)
25
-
26
- # Constructor.
27
- # Build a production with an empty RHS.
28
- def initialize
29
- @rhs = SymbolSequence.new
30
- @refcount = 0
31
- @digrams = []
32
- end
33
-
34
- # Identity testing.
35
- # @param other [] another production or production reference.
36
- # @return true when the receiver and other are the same.
37
- def ==(other)
38
- return true if object_id == other.object_id
39
-
40
- result = if other.is_a?(ProductionRef)
41
- (other == self)
42
- else
43
- false
44
- end
45
-
46
- return result
47
- end
48
-
49
- # Is the rhs empty?
50
- # @ return true if the rhs has no members.
51
- def empty?
52
- return rhs.empty?
53
- end
54
-
55
- # Increment the reference count by one.
56
- def incr_refcount
57
- @refcount += 1
58
- end
59
-
60
- # Decrement the reference count by one.
61
- def decr_refcount
62
- raise StandardError, 'Internal error' if @refcount.zero?
63
-
64
- @refcount -= 1
65
- end
66
-
67
- # Select the references to production appearing in the rhs.
68
- # @return [Array of ProductionRef]
69
- def references
70
- return rhs.references
71
- end
72
-
73
- # Look in the rhs all the references to a production passed a argument.
74
- # aProduction [aProduction or ProductionRef] The production to search for.
75
- # @return [Array] the array of ProductionRef to the passed production
76
- def references_of(a_prod)
77
- real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
78
- return rhs.references_of(real_prod)
79
- end
80
-
81
- # Enumerate the digrams appearing in the right-hand side (rhs)
82
- # @return [Array] the list of digrams found in rhs of this production.
83
- def recalc_digrams
84
- return [] if rhs.size < 2
85
-
86
- result = []
87
- rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
88
- @digrams = result
89
- end
90
-
91
- # Does the rhs have exactly one digram only (= 2 symbols)?
92
- # @return [true/false] true when the rhs contains exactly two symbols.
93
- def single_digram?
94
- return rhs.size == 2
95
- end
96
-
97
- # Detect whether the last digram occurs twice
98
- # Assumption: when a digram occurs twice in a production then it must occur
99
- # at the end of the rhs
100
- # @return [true/false] true when the digram occurs twice in rhs.
101
- def repeated_digram?
102
- return false if rhs.size < 3
103
-
104
- my_digrams = digrams
105
- all_keys = my_digrams.map(&:key)
106
- last_key = all_keys.pop
107
- same_key_found = all_keys.index(last_key)
108
- return !same_key_found.nil?
109
- end
110
-
111
- # Retrieve the last digram appearing in the RHS (if any).
112
- # @return [Digram] last digram in the rhs otherwise nil.
113
- def last_digram
114
- result = digrams.empty? ? nil : digrams.last
115
- return result
116
- end
117
-
118
- # Emit a text representation of the production rule.
119
- # Text is of the form:
120
- # object id of production : rhs as space-separated sequence of symbols.
121
- # @return [String]
122
- def to_string
123
- return "#{object_id} : #{rhs.to_string}."
124
- end
125
-
126
- # Add a (grammar) symbol at the end of the RHS.
127
- # @param aSymbol [Object] A (grammar) symbol to add.
128
- def append_symbol(aSymbol)
129
- case aSymbol
8
+ # In a context-free grammar, a production is a rule in which
9
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
10
+ # and the right-hand side (RHS) consists of a sequence of symbols.
11
+ # The symbols in RHS can be either terminal or non-terminal symbols.
12
+ # The rule stipulates that the LHS is equivalent to the RHS,
13
+ # in other words every occurrence of the LHS can be substituted to
14
+ # corresponding RHS.
15
+ # Implementation note: the object id of the production is taken as its LHS.
16
+ class Production
17
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
18
+ attr_reader(:rhs)
19
+
20
+ # The reference count (= how times other productions reference this one)
21
+ attr_reader(:refcount)
22
+
23
+ # The sequence of digrams appearing in the RHS
24
+ attr_reader(:digrams)
25
+
26
+ # Constructor.
27
+ # Build a production with an empty RHS.
28
+ def initialize
29
+ @rhs = SymbolSequence.new
30
+ @refcount = 0
31
+ @digrams = []
32
+ end
33
+
34
+ # Identity testing.
35
+ # @param other [] another production or production reference.
36
+ # @return true when the receiver and other are the same.
37
+ def ==(other)
38
+ return true if object_id == other.object_id
39
+
40
+ if other.is_a?(ProductionRef)
41
+ (other == self)
42
+ else
43
+ false
44
+ end
45
+ end
46
+
47
+ # Is the rhs empty?
48
+ # @ return true if the rhs has no members.
49
+ def empty?
50
+ rhs.empty?
51
+ end
52
+
53
+ # Increment the reference count by one.
54
+ def incr_refcount
55
+ @refcount += 1
56
+ end
57
+
58
+ # Decrement the reference count by one.
59
+ def decr_refcount
60
+ raise StandardError, 'Internal error' if @refcount.zero?
61
+
62
+ @refcount -= 1
63
+ end
64
+
65
+ # Select the references to production appearing in the rhs.
66
+ # @return [Array of ProductionRef]
67
+ def references
68
+ rhs.references
69
+ end
70
+
71
+ # Look in the rhs all the references to a production passed a argument.
72
+ # aProduction [aProduction or ProductionRef] The production to search for.
73
+ # @return [Array] the array of ProductionRef to the passed production
74
+ def references_of(a_prod)
75
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
76
+ rhs.references_of(real_prod)
77
+ end
78
+
79
+ # Enumerate the digrams appearing in the right-hand side (rhs)
80
+ # @return [Array] the list of digrams found in rhs of this production.
81
+ def recalc_digrams
82
+ return [] if rhs.size < 2
83
+
84
+ result = []
85
+ rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
86
+ @digrams = result
87
+ end
88
+
89
+ # Does the rhs have exactly one digram only (= 2 symbols)?
90
+ # @return [true/false] true when the rhs contains exactly two symbols.
91
+ def single_digram?
92
+ rhs.size == 2
93
+ end
94
+
95
+ # Detect whether the last digram occurs twice
96
+ # Assumption: when a digram occurs twice in a production then it must occur
97
+ # at the end of the rhs
98
+ # @return [true/false] true when the digram occurs twice in rhs.
99
+ def repeated_digram?
100
+ return false if rhs.size < 3
101
+
102
+ my_digrams = digrams
103
+ all_keys = my_digrams.map(&:key)
104
+ last_key = all_keys.pop
105
+ same_key_found = all_keys.index(last_key)
106
+ !same_key_found.nil?
107
+ end
108
+
109
+ # Retrieve the last digram appearing in the RHS (if any).
110
+ # @return [Digram] last digram in the rhs otherwise nil.
111
+ def last_digram
112
+ digrams.empty? ? nil : digrams.last
113
+ end
114
+
115
+ # Emit a text representation of the production rule.
116
+ # Text is of the form:
117
+ # object id of production : rhs as space-separated sequence of symbols.
118
+ # @return [String]
119
+ def to_string
120
+ "#{object_id} : #{rhs.to_string}."
121
+ end
122
+
123
+ # Add a (grammar) symbol at the end of the RHS.
124
+ # @param aSymbol [Object] A (grammar) symbol to add.
125
+ def append_symbol(aSymbol)
126
+ case aSymbol
130
127
  when Production
131
128
  new_symb = ProductionRef.new(aSymbol)
132
129
  when ProductionRef
@@ -138,91 +135,89 @@ class Production
138
135
  new_symb = aSymbol.dup
139
136
  else
140
137
  new_symb = aSymbol
138
+ end
139
+
140
+ rhs << new_symb
141
+ digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
142
+ end
143
+
144
+ # Clear the right-hand side.
145
+ # Any referenced production has its reference counter decremented.
146
+ def clear_rhs
147
+ rhs.clear
148
+ end
149
+
150
+ # Find all the positions where the digram occurs in the rhs
151
+ # @param symb1 [Object] first symbol of the digram
152
+ # @param symb2 [Object] second symbol of the digram
153
+ # @return [Array] the list of indices where the digram occurs in rhs.
154
+ # @example
155
+ # # Given the production p : a b c a b a b d
156
+ # #Then ...
157
+ # p.positions_of(a, b) # => [0, 3, 5]
158
+ # # Caution: "overlapping" digrams shouldn't be counted
159
+ # # Given the production p : a a b a a a c d
160
+ # # Then ...
161
+ # p.positions_of(a, a) # => [0, 3]
162
+ def positions_of(symb1, symb2)
163
+ # Find the positions where the digram occur in rhs
164
+ indices = [-2] # Dummy index!
165
+ (0...rhs.size).each do |i|
166
+ next if i == indices.last + 1
167
+
168
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
169
+ end
170
+
171
+ indices.shift
172
+
173
+ indices
174
+ end
175
+
176
+ # Given that the production P passed as argument has exactly 2 symbols
177
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
178
+ # s1 s2 by a reference to P.
179
+ # @param another [Production or ProductionRef] a production that
180
+ # consists exactly of one digram (= 2 symbols).
181
+ def reduce_step(another)
182
+ (symb1, symb2) = another.rhs.symbols
183
+ pos = positions_of(symb1, symb2).reverse
184
+
185
+ # Replace the two symbol sequence by the production
186
+ pos.each { |index| rhs.reduce_step(index, another) }
187
+
188
+ recalc_digrams
141
189
  end
142
190
 
143
- rhs << new_symb
144
- digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
145
- end
146
-
147
- # Clear the right-hand side.
148
- # Any referenced production has its reference counter decremented.
149
- def clear_rhs
150
- rhs.clear
151
- end
152
-
153
- # Find all the positions where the digram occurs in the rhs
154
- # @param symb1 [Object] first symbol of the digram
155
- # @param symb2 [Object] second symbol of the digram
156
- # @return [Array] the list of indices where the digram occurs in rhs.
157
- # @example
158
- # # Given the production p : a b c a b a b d
159
- # #Then ...
160
- # p.positions_of(a, b) # => [0, 3, 5]
161
- # # Caution: "overlapping" digrams shouldn't be counted
162
- # # Given the production p : a a b a a a c d
163
- # # Then ...
164
- # p.positions_of(a, a) # => [0, 3]
165
- def positions_of(symb1, symb2)
166
- # Find the positions where the digram occur in rhs
167
- indices = [-2] # Dummy index!
168
- (0...rhs.size).each do |i|
169
- next if i == indices.last + 1
170
-
171
- indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
172
- end
173
-
174
- indices.shift
175
-
176
- return indices
177
- end
178
-
179
- # Given that the production P passed as argument has exactly 2 symbols
180
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
181
- # s1 s2 by a reference to P.
182
- # @param another [Production or ProductionRef] a production that
183
- # consists exactly of one digram (= 2 symbols).
184
- def reduce_step(another)
185
- (symb1, symb2) = another.rhs.symbols
186
- pos = positions_of(symb1, symb2).reverse
187
-
188
- # Replace the two symbol sequence by the production
189
- pos.each { |index| rhs.reduce_step(index, another) }
190
-
191
- recalc_digrams
192
- end
193
-
194
- # Replace every occurrence of 'another' production in self.rhs by
195
- # the symbols in the rhs of 'another'.
196
- # @param another [Production or ProductionRef] a production that
197
- # consists exactly of one digram (= 2 symbols).
198
- # @example Synopsis
199
- # # Given the production p_A : a p_B b p_B c
200
- # # And the production p_B : x y
201
- # # Then...
202
- # p_A.derive_step(p_B)
203
- # #Modifies p_A as into: p_A -> a x y b x y c
204
- def derive_step(another)
205
- (0...rhs.size).to_a.reverse_each do |index|
206
- next unless rhs[index] == another
207
-
208
- rhs.insert_at(index + 1, another.rhs)
209
- another.decr_refcount
210
- rhs.delete_at(index)
211
- end
212
-
213
- recalc_digrams
214
- end
215
-
216
- # Part of the 'visitee' role in Visitor design pattern.
217
- # @param aVisitor[GrammarVisitor]
218
- def accept(aVisitor)
219
- aVisitor.start_visit_production(self)
220
-
221
- rhs.accept(aVisitor)
222
-
223
- aVisitor.end_visit_production(self)
224
- end
225
- end # class
191
+ # Replace every occurrence of 'another' production in self.rhs by
192
+ # the symbols in the rhs of 'another'.
193
+ # @param another [Production or ProductionRef] a production that
194
+ # consists exactly of one digram (= 2 symbols).
195
+ # @example Synopsis
196
+ # # Given the production p_A : a p_B b p_B c
197
+ # # And the production p_B : x y
198
+ # # Then...
199
+ # p_A.derive_step(p_B)
200
+ # #Modifies p_A as into: p_A -> a x y b x y c
201
+ def derive_step(another)
202
+ (0...rhs.size).to_a.reverse_each do |index|
203
+ next unless rhs[index] == another
204
+
205
+ rhs.insert_at(index + 1, another.rhs)
206
+ another.decr_refcount
207
+ rhs.delete_at(index)
208
+ end
209
+
210
+ recalc_digrams
211
+ end
212
+
213
+ # Part of the 'visitee' role in Visitor design pattern.
214
+ # @param aVisitor[GrammarVisitor]
215
+ def accept(aVisitor)
216
+ aVisitor.start_visit_production(self)
217
+ rhs.accept(aVisitor)
218
+ aVisitor.end_visit_production(self)
219
+ end
220
+ end # class
226
221
  end # module
227
222
 
228
223
  # End of file
@@ -42,12 +42,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
42
42
  # Emit the text representation of a production reference.
43
43
  # @return [String]
44
44
  def to_s
45
- return production.object_id.to_s
45
+ production.object_id.to_s
46
46
  end
47
47
 
48
48
  alias to_string to_s
49
49
 
50
-
51
50
  # Equality testing.
52
51
  # A production ref is equal to another one when its
53
52
  # refers to the same production or when it is compared to
@@ -57,13 +56,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
57
56
  def ==(other)
58
57
  return true if object_id == other.object_id
59
58
 
60
- result = if other.is_a?(ProductionRef)
61
- (production == other.production)
62
- else
63
- (production == other)
64
- end
65
-
66
- return result
59
+ if other.is_a?(ProductionRef)
60
+ production == other.production
61
+ else
62
+ production == other
63
+ end
67
64
  end
68
65
 
69
66
  # Produce a hash value.
@@ -74,7 +71,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
74
71
  def hash
75
72
  raise StandardError, 'Nil production' if production.nil?
76
73
 
77
- return production.hash
74
+ production.hash
78
75
  end
79
76
 
80
77
  # Make this reference point to the given production.
@@ -84,7 +81,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
84
81
  return if aProduction == @production
85
82
 
86
83
  production&.decr_refcount
87
- unless aProduction.kind_of?(Production)
84
+ unless aProduction.is_a?(Production)
88
85
  raise StandardError, "Illegal production type #{aProduction.class}"
89
86
  end
90
87
 
@@ -102,7 +99,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
102
99
  # @return [true / false] true when this object doesn't
103
100
  # point to a production.
104
101
  def unbound?
105
- return production.nil?
102
+ production.nil?
106
103
  end
107
104
 
108
105
  # Part of the 'visitee' role in the Visitor design pattern.