sequitur 0.1.23 → 0.1.25

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +11 -437
  3. data/CHANGELOG.md +9 -0
  4. data/Gemfile +0 -2
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -3
  7. data/Rakefile +0 -2
  8. data/appveyor.yml +10 -10
  9. data/examples/inductive_english.rb +35 -0
  10. data/examples/integer_sample.rb +0 -1
  11. data/examples/porridge.rb +9 -9
  12. data/examples/word_sample.rb +4 -5
  13. data/lib/sequitur/constants.rb +7 -4
  14. data/lib/sequitur/digram.rb +11 -11
  15. data/lib/sequitur/dynamic_grammar.rb +12 -12
  16. data/lib/sequitur/formatter/base_formatter.rb +2 -2
  17. data/lib/sequitur/formatter/base_text.rb +8 -9
  18. data/lib/sequitur/formatter/debug.rb +10 -4
  19. data/lib/sequitur/grammar_visitor.rb +7 -7
  20. data/lib/sequitur/production.rb +203 -205
  21. data/lib/sequitur/production_ref.rb +18 -20
  22. data/lib/sequitur/sequitur_grammar.rb +135 -137
  23. data/lib/sequitur/symbol_sequence.rb +29 -32
  24. data/lib/sequitur.rb +6 -6
  25. data/sig/lib/sequitur/constants.rbs +10 -0
  26. data/sig/lib/sequitur/digram.rbs +37 -0
  27. data/sig/lib/sequitur/dynamic_grammar.rbs +58 -0
  28. data/sig/lib/sequitur/formatter/base_formatter.rbs +20 -0
  29. data/sig/lib/sequitur/formatter/base_text.rbs +62 -0
  30. data/sig/lib/sequitur/formatter/debug.rbs +89 -0
  31. data/sig/lib/sequitur/production.rbs +120 -0
  32. data/sig/lib/sequitur/production_ref.rbs +73 -0
  33. data/sig/lib/sequitur/sequitur_grammar.rbs +55 -0
  34. data/sig/lib/sequitur/symbol_sequence.rbs +83 -0
  35. data/sig/lib/sequitur.rbs +9 -0
  36. data/spec/sequitur/digram_spec.rb +13 -12
  37. data/spec/sequitur/dynamic_grammar_spec.rb +5 -11
  38. data/spec/sequitur/formatter/base_text_spec.rb +70 -72
  39. data/spec/sequitur/formatter/debug_spec.rb +90 -92
  40. data/spec/sequitur/grammar_visitor_spec.rb +70 -71
  41. data/spec/sequitur/production_ref_spec.rb +92 -92
  42. data/spec/sequitur/production_spec.rb +30 -34
  43. data/spec/sequitur/sequitur_grammar_spec.rb +47 -46
  44. data/spec/sequitur/symbol_sequence_spec.rb +102 -105
  45. data/spec/spec_helper.rb +0 -1
  46. metadata +28 -17
  47. data/.travis.yml +0 -29
@@ -5,128 +5,128 @@ require_relative 'symbol_sequence'
5
5
  require_relative 'production_ref'
6
6
 
7
7
  module Sequitur # Module for classes implementing the Sequitur algorithm
8
- # In a context-free grammar, a production is a rule in which
9
- # its left-hand side (LHS) consists solely of a non-terminal symbol
10
- # and the right-hand side (RHS) consists of a sequence of symbols.
11
- # The symbols in RHS can be either terminal or non-terminal symbols.
12
- # The rule stipulates that the LHS is equivalent to the RHS,
13
- # in other words every occurrence of the LHS can be substituted to
14
- # corresponding RHS.
15
- # Implementation note: the object id of the production is taken as its LHS.
16
- class Production
17
- # The right-hand side (rhs) consists of a sequence of grammar symbols
18
- attr_reader(:rhs)
19
-
20
- # The reference count (= how times other productions reference this one)
21
- attr_reader(:refcount)
22
-
23
- # The sequence of digrams appearing in the RHS
24
- attr_reader(:digrams)
25
-
26
- # Constructor.
27
- # Build a production with an empty RHS.
28
- def initialize
29
- @rhs = SymbolSequence.new
30
- @refcount = 0
31
- @digrams = []
32
- end
33
-
34
- # Identity testing.
35
- # @param other [] another production or production reference.
36
- # @return true when the receiver and other are the same.
37
- def ==(other)
38
- return true if object_id == other.object_id
39
-
40
- result = if other.is_a?(ProductionRef)
41
- (other == self)
42
- else
43
- false
44
- end
45
-
46
- return result
47
- end
48
-
49
- # Is the rhs empty?
50
- # @ return true if the rhs has no members.
51
- def empty?
52
- return rhs.empty?
53
- end
54
-
55
- # Increment the reference count by one.
56
- def incr_refcount
57
- @refcount += 1
58
- end
59
-
60
- # Decrement the reference count by one.
61
- def decr_refcount
62
- raise StandardError, 'Internal error' if @refcount.zero?
63
-
64
- @refcount -= 1
65
- end
66
-
67
- # Select the references to production appearing in the rhs.
68
- # @return [Array of ProductionRef]
69
- def references
70
- return rhs.references
71
- end
72
-
73
- # Look in the rhs all the references to a production passed a argument.
74
- # aProduction [aProduction or ProductionRef] The production to search for.
75
- # @return [Array] the array of ProductionRef to the passed production
76
- def references_of(a_prod)
77
- real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
78
- return rhs.references_of(real_prod)
79
- end
80
-
81
- # Enumerate the digrams appearing in the right-hand side (rhs)
82
- # @return [Array] the list of digrams found in rhs of this production.
83
- def recalc_digrams
84
- return [] if rhs.size < 2
85
-
86
- result = []
87
- rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
88
- @digrams = result
89
- end
90
-
91
- # Does the rhs have exactly one digram only (= 2 symbols)?
92
- # @return [true/false] true when the rhs contains exactly two symbols.
93
- def single_digram?
94
- return rhs.size == 2
95
- end
96
-
97
- # Detect whether the last digram occurs twice
98
- # Assumption: when a digram occurs twice in a production then it must occur
99
- # at the end of the rhs
100
- # @return [true/false] true when the digram occurs twice in rhs.
101
- def repeated_digram?
102
- return false if rhs.size < 3
103
-
104
- my_digrams = digrams
105
- all_keys = my_digrams.map(&:key)
106
- last_key = all_keys.pop
107
- same_key_found = all_keys.index(last_key)
108
- return !same_key_found.nil?
109
- end
110
-
111
- # Retrieve the last digram appearing in the RHS (if any).
112
- # @return [Digram] last digram in the rhs otherwise nil.
113
- def last_digram
114
- result = digrams.empty? ? nil : digrams.last
115
- return result
116
- end
117
-
118
- # Emit a text representation of the production rule.
119
- # Text is of the form:
120
- # object id of production : rhs as space-separated sequence of symbols.
121
- # @return [String]
122
- def to_string
123
- return "#{object_id} : #{rhs.to_string}."
124
- end
125
-
126
- # Add a (grammar) symbol at the end of the RHS.
127
- # @param aSymbol [Object] A (grammar) symbol to add.
128
- def append_symbol(aSymbol)
129
- case aSymbol
8
+ # In a context-free grammar, a production is a rule in which
9
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
10
+ # and the right-hand side (RHS) consists of a sequence of symbols.
11
+ # The symbols in RHS can be either terminal or non-terminal symbols.
12
+ # The rule stipulates that the LHS is equivalent to the RHS,
13
+ # in other words every occurrence of the LHS can be substituted to
14
+ # corresponding RHS.
15
+ # Implementation note: the object id of the production is taken as its LHS.
16
+ class Production
17
+ # @return [Sequitur::SymbolSequence] The right-hand side (rhs)
18
+ # consists of a sequence of grammar symbols
19
+ attr_reader(:rhs)
20
+
21
+ # @return [Integer] The reference count (= how times other productions reference this one)
22
+ attr_reader(:refcount)
23
+
24
+ # @return [Array<Sequitur::Digram>] The sequence of digrams appearing in the RHS
25
+ attr_reader(:digrams)
26
+
27
+ # Constructor.
28
+ # Build a production with an empty RHS.
29
+ def initialize
30
+ @rhs = SymbolSequence.new
31
+ @refcount = 0
32
+ @digrams = []
33
+ end
34
+
35
+ # Identity testing.
36
+ # @param other [Production, ProductionRef] another production or production reference.
37
+ # @return [TrueClass, FalseClass] true when the receiver and other are the same.
38
+ def ==(other)
39
+ return true if object_id == other.object_id
40
+
41
+ if other.is_a?(ProductionRef)
42
+ (other == self)
43
+ else
44
+ false
45
+ end
46
+ end
47
+
48
+ # Is the rhs empty?
49
+ # @return [TrueClass, FalseClass] true if the rhs has no members.
50
+ def empty?
51
+ rhs.empty?
52
+ end
53
+
54
+ # Increment the reference count by one.
55
+ # @return [Integer]
56
+ def incr_refcount
57
+ @refcount += 1
58
+ end
59
+
60
+ # Decrement the reference count by one.
61
+ # @return [Integer]
62
+ def decr_refcount
63
+ raise StandardError, 'Internal error' if @refcount.zero?
64
+
65
+ @refcount -= 1
66
+ end
67
+
68
+ # Select the references to production appearing in the rhs.
69
+ # @return [Array<ProductionRef>]
70
+ def references
71
+ rhs.references
72
+ end
73
+
74
+ # Look in the rhs all the references to a production passed a argument.
75
+ # @param a_prod [Production, ProductionRef] The production to search for.
76
+ # @return [Array<ProductionRef>]
77
+ def references_of(a_prod)
78
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
79
+ rhs.references_of(real_prod)
80
+ end
81
+
82
+ # Enumerate the digrams appearing in the right-hand side (rhs)
83
+ # @return [Array<Sequitur::Digram>] the list of digrams found in rhs of this production.
84
+ def recalc_digrams
85
+ return [] if rhs.size < 2
86
+
87
+ result = []
88
+ rhs.symbols.each_cons(2) { |couple| result << Digram.new(*couple, self) }
89
+ @digrams = result
90
+ end
91
+
92
+ # Does the rhs have exactly one digram only (= 2 symbols)?
93
+ # @return [TrueClass, FalseClass] true when the rhs contains exactly two symbols.
94
+ def single_digram?
95
+ rhs.size == 2
96
+ end
97
+
98
+ # Detect whether the last digram occurs twice
99
+ # Assumption: when a digram occurs twice in a production then it must occur
100
+ # at the end of the rhs
101
+ # @return [TrueClass, FalseClass] true when the digram occurs twice in rhs.
102
+ def repeated_digram?
103
+ return false if rhs.size < 3
104
+
105
+ my_digrams = digrams
106
+ all_keys = my_digrams.map(&:key)
107
+ last_key = all_keys.pop
108
+ same_key_found = all_keys.index(last_key)
109
+ !same_key_found.nil?
110
+ end
111
+
112
+ # Retrieve the last digram appearing in the RHS (if any).
113
+ # @return [Sequitur::Digram, NilClass] last digram in the rhs otherwise nil.
114
+ def last_digram
115
+ digrams.empty? ? nil : digrams.last
116
+ end
117
+
118
+ # Emit a text representation of the production rule.
119
+ # Text is of the form:
120
+ # object id of production : rhs as space-separated sequence of symbols.
121
+ # @return [String]
122
+ def to_string
123
+ "#{object_id} : #{rhs.to_string}."
124
+ end
125
+
126
+ # Add a (grammar) symbol at the end of the RHS.
127
+ # @param aSymbol [Object] A (grammar) symbol to add.
128
+ def append_symbol(aSymbol)
129
+ case aSymbol
130
130
  when Production
131
131
  new_symb = ProductionRef.new(aSymbol)
132
132
  when ProductionRef
@@ -138,91 +138,89 @@ class Production
138
138
  new_symb = aSymbol.dup
139
139
  else
140
140
  new_symb = aSymbol
141
+ end
142
+
143
+ rhs << new_symb
144
+ digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
145
+ end
146
+
147
+ # Clear the right-hand side.
148
+ # Any referenced production has its reference counter decremented.
149
+ def clear_rhs
150
+ rhs.clear
151
+ end
152
+
153
+ # Find all the positions where the digram occurs in the rhs
154
+ # @param symb1 [Object] first symbol of the digram
155
+ # @param symb2 [Object] second symbol of the digram
156
+ # @return [Array<Integer>] the list of indices where the digram occurs in rhs.
157
+ # @example
158
+ # # Given the production p : a b c a b a b d
159
+ # #Then ...
160
+ # p.positions_of(a, b) # => [0, 3, 5]
161
+ # # Caution: "overlapping" digrams shouldn't be counted
162
+ # # Given the production p : a a b a a a c d
163
+ # # Then ...
164
+ # p.positions_of(a, a) # => [0, 3]
165
+ def positions_of(symb1, symb2)
166
+ # Find the positions where the digram occur in rhs
167
+ indices = [-2] # Dummy index!
168
+ (0...rhs.size).each do |i|
169
+ next if i == indices.last + 1
170
+
171
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
172
+ end
173
+
174
+ indices.shift
175
+
176
+ indices
177
+ end
178
+
179
+ # Given that the production P passed as argument has exactly 2 symbols
180
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
181
+ # s1 s2 by a reference to P.
182
+ # @param another [Production, ProductionRef] a production that
183
+ # consists exactly of one digram (= 2 symbols).
184
+ def reduce_step(another)
185
+ (symb1, symb2) = another.rhs.symbols
186
+ pos = positions_of(symb1, symb2).reverse
187
+
188
+ # Replace the two symbol sequence by the production
189
+ pos.each { |index| rhs.reduce_step(index, another) }
190
+
191
+ recalc_digrams
141
192
  end
142
193
 
143
- rhs << new_symb
144
- digrams << Digram.new(rhs[-2], rhs[-1], self) if rhs.size >= 2
145
- end
146
-
147
- # Clear the right-hand side.
148
- # Any referenced production has its reference counter decremented.
149
- def clear_rhs
150
- rhs.clear
151
- end
152
-
153
- # Find all the positions where the digram occurs in the rhs
154
- # @param symb1 [Object] first symbol of the digram
155
- # @param symb2 [Object] second symbol of the digram
156
- # @return [Array] the list of indices where the digram occurs in rhs.
157
- # @example
158
- # # Given the production p : a b c a b a b d
159
- # #Then ...
160
- # p.positions_of(a, b) # => [0, 3, 5]
161
- # # Caution: "overlapping" digrams shouldn't be counted
162
- # # Given the production p : a a b a a a c d
163
- # # Then ...
164
- # p.positions_of(a, a) # => [0, 3]
165
- def positions_of(symb1, symb2)
166
- # Find the positions where the digram occur in rhs
167
- indices = [-2] # Dummy index!
168
- (0...rhs.size).each do |i|
169
- next if i == indices.last + 1
170
-
171
- indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
172
- end
173
-
174
- indices.shift
175
-
176
- return indices
177
- end
178
-
179
- # Given that the production P passed as argument has exactly 2 symbols
180
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
181
- # s1 s2 by a reference to P.
182
- # @param another [Production or ProductionRef] a production that
183
- # consists exactly of one digram (= 2 symbols).
184
- def reduce_step(another)
185
- (symb1, symb2) = another.rhs.symbols
186
- pos = positions_of(symb1, symb2).reverse
187
-
188
- # Replace the two symbol sequence by the production
189
- pos.each { |index| rhs.reduce_step(index, another) }
190
-
191
- recalc_digrams
192
- end
193
-
194
- # Replace every occurrence of 'another' production in self.rhs by
195
- # the symbols in the rhs of 'another'.
196
- # @param another [Production or ProductionRef] a production that
197
- # consists exactly of one digram (= 2 symbols).
198
- # @example Synopsis
199
- # # Given the production p_A : a p_B b p_B c
200
- # # And the production p_B : x y
201
- # # Then...
202
- # p_A.derive_step(p_B)
203
- # #Modifies p_A as into: p_A -> a x y b x y c
204
- def derive_step(another)
205
- (0...rhs.size).to_a.reverse_each do |index|
206
- next unless rhs[index] == another
207
-
208
- rhs.insert_at(index + 1, another.rhs)
209
- another.decr_refcount
210
- rhs.delete_at(index)
211
- end
212
-
213
- recalc_digrams
214
- end
215
-
216
- # Part of the 'visitee' role in Visitor design pattern.
217
- # @param aVisitor[GrammarVisitor]
218
- def accept(aVisitor)
219
- aVisitor.start_visit_production(self)
220
-
221
- rhs.accept(aVisitor)
222
-
223
- aVisitor.end_visit_production(self)
224
- end
225
- end # class
194
+ # Replace every occurrence of 'another' production in self.rhs by
195
+ # the symbols in the rhs of 'another'.
196
+ # @param another [Production, ProductionRef] a production that
197
+ # consists exactly of one digram (= 2 symbols).
198
+ # @example Synopsis
199
+ # # Given the production p_A : a p_B b p_B c
200
+ # # And the production p_B : x y
201
+ # # Then...
202
+ # p_A.derive_step(p_B)
203
+ # #Modifies p_A as into: p_A -> a x y b x y c
204
+ def derive_step(another)
205
+ (0...rhs.size).to_a.reverse_each do |index|
206
+ next unless rhs[index] == another
207
+
208
+ rhs.insert_at(index + 1, another.rhs)
209
+ another.decr_refcount
210
+ rhs.delete_at(index)
211
+ end
212
+
213
+ recalc_digrams
214
+ end
215
+
216
+ # Part of the 'visitee' role in Visitor design pattern.
217
+ # @param aVisitor[Sequitur::GrammarVisitor]
218
+ def accept(aVisitor)
219
+ aVisitor.start_visit_production(self)
220
+ rhs.accept(aVisitor)
221
+ aVisitor.end_visit_production(self)
222
+ end
223
+ end # class
226
224
  end # module
227
225
 
228
226
  # End of file
@@ -17,11 +17,11 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
17
17
  # # ... Production reference count is updated...
18
18
  # puts prod.refcount # outputs 1
19
19
  class ProductionRef
20
- # Link to the production to reference.
20
+ # @return [Sequitur::Production] Link to the production to reference.
21
21
  attr_reader(:production)
22
22
 
23
23
  # Constructor
24
- # @param target [Production or ProductionRef]
24
+ # @param target [Production, ProductionRef]
25
25
  # The production that is being referenced.
26
26
  def initialize(target)
27
27
  bind_to(target)
@@ -42,49 +42,46 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
42
42
  # Emit the text representation of a production reference.
43
43
  # @return [String]
44
44
  def to_s
45
- return production.object_id.to_s
45
+ production.object_id.to_s
46
46
  end
47
47
 
48
48
  alias to_string to_s
49
49
 
50
-
51
50
  # Equality testing.
52
51
  # A production ref is equal to another one when its
53
52
  # refers to the same production or when it is compared to
54
53
  # the production it refers to.
55
- # @param other [ProductionRef]
56
- # @return [true / false]
54
+ # @param other [Production, ProductionRef]
55
+ # @return [TrueClass, FalseClass]
57
56
  def ==(other)
58
57
  return true if object_id == other.object_id
59
58
 
60
- result = if other.is_a?(ProductionRef)
61
- (production == other.production)
62
- else
63
- (production == other)
64
- end
65
-
66
- return result
59
+ if other.is_a?(ProductionRef)
60
+ production == other.production
61
+ else
62
+ production == other
63
+ end
67
64
  end
68
65
 
69
66
  # Produce a hash value.
70
67
  # A reference has no identity on its own,
71
68
  # the method returns the hash value of the
72
69
  # referenced production
73
- # @return [Fixnum] the hash value
70
+ # @return [Integer] the hash value
74
71
  def hash
75
72
  raise StandardError, 'Nil production' if production.nil?
76
73
 
77
- return production.hash
74
+ production.hash
78
75
  end
79
76
 
80
77
  # Make this reference point to the given production.
81
- # @param aProduction [Production or ProductionRef] the production
78
+ # @param aProduction [Production, ProductionRef] the production
82
79
  # to refer to
83
80
  def bind_to(aProduction)
84
81
  return if aProduction == @production
85
82
 
86
83
  production&.decr_refcount
87
- unless aProduction.kind_of?(Production)
84
+ unless aProduction.is_a?(Production)
88
85
  raise StandardError, "Illegal production type #{aProduction.class}"
89
86
  end
90
87
 
@@ -93,20 +90,21 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
93
90
  end
94
91
 
95
92
  # Clear the reference to the target production.
93
+ # return [NilClass]
96
94
  def unbind
97
95
  production.decr_refcount
98
96
  @production = nil
99
97
  end
100
98
 
101
99
  # Check that the this object doesn't refer to any production.
102
- # @return [true / false] true when this object doesn't
100
+ # @return [TrueClass, FalseClass] true when this object doesn't
103
101
  # point to a production.
104
102
  def unbound?
105
- return production.nil?
103
+ production.nil?
106
104
  end
107
105
 
108
106
  # Part of the 'visitee' role in the Visitor design pattern.
109
- # @param aVisitor [GrammarVisitor] the visitor
107
+ # @param aVisitor [Sequitur::GrammarVisitor] the visitor
110
108
  def accept(aVisitor)
111
109
  aVisitor.visit_prod_ref(self)
112
110
  end