sequitur 0.1.03 → 0.1.05

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,80 +1,98 @@
1
1
  module Sequitur # Module for classes implementing the Sequitur algorithm
2
2
 
3
3
  # A visitor class dedicated in the visit of Grammar.
4
-
5
4
  class GrammarVisitor
5
+ # Link to the grammar to visit
6
6
  attr_reader(:grammar)
7
-
7
+
8
+ # List of objects that subscribed to the visit event notification.
8
9
  attr_reader(:subscribers)
9
-
10
- # Constructor.
11
- # [aGrammar] a DynamicGrammar-like instance.
10
+
11
+ # Build a visitor for the given grammar.
12
+ # @param aGrammar [DynamicGrammar-like] the grammar to visit.
12
13
  def initialize(aGrammar)
13
14
  @grammar = aGrammar
14
15
  @subscribers = []
15
16
  end
16
-
17
+
17
18
  public
18
-
19
- # Add a subscriber to the list.
19
+
20
+ # Add a subscriber for the visit event notification.
21
+ # @param aSubscriber [Object]
20
22
  def subscribe(aSubscriber)
21
23
  subscribers << aSubscriber
22
24
  end
23
-
25
+
26
+ # Remove the given object from the subscription list.
27
+ # The object won't be notified of visit events.
28
+ # @param aSubscriber [Object]
24
29
  def unsubscribe(aSubscriber)
25
30
  subscribers.delete_if { |entry| entry == aSubscriber }
26
31
  end
27
-
32
+
28
33
  # The signal to start the visit.
29
34
  def start()
30
- grammar.send(:accept, self)
35
+ grammar.accept(self)
31
36
  end
32
37
 
33
-
38
+
39
+ # Visit event. The visitor is about to visit the grammar.
40
+ # @param aGrammar [DynamicGrammar-like] the grammar to visit.
34
41
  def start_visit_grammar(aGrammar)
35
42
  broadcast(:before_grammar, aGrammar)
36
43
  end
37
-
38
44
 
45
+
46
+ # Visit event. The visitor is about to visit the given production.
47
+ # @param aProduction [Production] the production to visit.
39
48
  def start_visit_production(aProduction)
40
49
  broadcast(:before_production, aProduction)
41
50
  broadcast(:before_rhs, aProduction.rhs)
42
51
  end
43
52
 
44
-
53
+ # Visit event. The visitor is visiting the
54
+ # given reference production (= non-terminal symbol).
55
+ # @param aProdRef [ProductionRef] the production reference to visit.
45
56
  def visit_prod_ref(aProdRef)
46
57
  production = aProdRef.production
47
58
  broadcast(:before_non_terminal, production)
48
59
  broadcast(:after_non_terminal, production)
49
60
  end
50
61
 
62
+ # Visit event. The visitor is visiting the
63
+ # given terminal symbol.
64
+ # @param aTerminal [Object] the terminal to visit.
51
65
  def visit_terminal(aTerminal)
52
66
  broadcast(:before_terminal, aTerminal)
53
67
  broadcast(:after_terminal, aTerminal)
54
- end
55
-
68
+ end
56
69
 
70
+ # Visit event. The visitor has completed its visit of the given production.
71
+ # @param aProduction [Production] the production to visit.
57
72
  def end_visit_production(aProduction)
58
73
  broadcast(:after_rhs, aProduction.rhs)
59
74
  broadcast(:after_production, aProduction)
60
75
 
61
- end
62
-
63
-
76
+ end
77
+
78
+ # Visit event. The visitor has completed the visit of the grammar.
79
+ # @param aGrammar [DynamicGrammar-like] the grammar to visit.
64
80
  def end_visit_grammar(aGrammar)
65
81
  broadcast(:after_grammar, aGrammar)
66
82
  end
67
-
68
- private
69
83
 
84
+ private
85
+ # Send a notification to all subscribers.
86
+ # @param msg [Symbol] event to notify
87
+ # @param args [Array] arguments of the notification.
70
88
  def broadcast(msg, *args)
71
89
  subscribers.each do |a_subscriber|
72
90
  next unless a_subscriber.respond_to?(msg)
73
91
  a_subscriber.send(msg, *args)
74
92
  end
75
93
  end
76
-
77
-
94
+
95
+
78
96
  end # class
79
97
 
80
98
  end # module
@@ -11,7 +11,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
11
11
  # The rule stipulates that the LHS is equivalent to the RHS,
12
12
  # in other words every occurrence of the LHS can be substituted to
13
13
  # corresponding RHS.
14
- # The object id of the production is taken as its LHS.
14
+ # Implementation note: the object id of the production is taken as its LHS.
15
15
  class Production
16
16
  # The right-hand side (rhs) consists of a sequence of grammar symbols
17
17
  attr_reader(:rhs)
@@ -22,7 +22,8 @@ class Production
22
22
  # The sequence of digrams appearing in the RHS
23
23
  attr_reader(:digrams)
24
24
 
25
- # Constructor. Build a production with an empty RHS.
25
+ # Constructor.
26
+ # Build a production with an empty RHS.
26
27
  def initialize()
27
28
  clear_rhs
28
29
  @refcount = 0
@@ -31,6 +32,9 @@ class Production
31
32
 
32
33
  public
33
34
 
35
+ # Identity testing.
36
+ # @param other [] another production or production reference.
37
+ # @return true when the receiver and other are the same.
34
38
  def ==(other)
35
39
  return true if object_id == other.object_id
36
40
 
@@ -45,33 +49,40 @@ class Production
45
49
 
46
50
 
47
51
  # Is the rhs empty?
52
+ # @ return true if the rhs has no members.
48
53
  def empty?
49
54
  return rhs.empty?
50
55
  end
51
56
 
57
+ # Increment the reference count by one.
52
58
  def incr_refcount()
53
59
  @refcount += 1
54
60
  end
55
61
 
62
+ # Decrement the reference count by one.
56
63
  def decr_refcount()
57
64
  fail StandardError, 'Internal error' if @refcount == 0
58
65
  @refcount -= 1
59
66
  end
60
67
 
61
68
 
62
- # Return the set of references to production appearing in the rhs.
69
+ # Select the references to production appearing in the rhs.
70
+ # @return [Array of ProductionRef]
63
71
  def references()
64
72
  return rhs.select { |symb| symb.is_a?(ProductionRef) }
65
73
  end
66
74
 
67
- # Return the set of references to a given production
75
+ # Look in the rhs all the references to a production passed a argument.
76
+ # aProduction [aProduction or ProductionRef] The production to search for.
77
+ # @return [Array] the array of ProductionRef to the passed production
68
78
  def references_of(aProduction)
69
79
  refs = references
70
80
  return refs.select { |a_ref| a_ref == aProduction }
71
81
  end
72
82
 
73
83
 
74
- # Return the list digrams found in rhs of this production.
84
+ # Enumerate the digrams appearing in the right-hand side (rhs)
85
+ # @return [Array] the list of digrams found in rhs of this production.
75
86
  def recalc_digrams()
76
87
  return [] if rhs.size < 2
77
88
 
@@ -84,6 +95,7 @@ class Production
84
95
 
85
96
 
86
97
  # Does the rhs have exactly one digram only (= 2 symbols)?
98
+ # @return [true/false] true when the rhs contains exactly two symbols.
87
99
  def single_digram?
88
100
  return rhs.size == 2
89
101
  end
@@ -92,7 +104,8 @@ class Production
92
104
  # Detect whether the last digram occurs twice
93
105
  # Assumption: when a digram occurs twice in a production then it must occur
94
106
  # at the end of the rhs
95
- def repeated_digram?
107
+ # @return [true/false] true when the digram occurs twice in rhs.
108
+ def repeated_digram?()
96
109
  return false if rhs.size < 3
97
110
 
98
111
  my_digrams = digrams
@@ -102,7 +115,8 @@ class Production
102
115
  return !same_key_found.nil?
103
116
  end
104
117
 
105
- # Return the last digram appearing in the RHS.
118
+ # Retrieve the last digram appearing in the RHS (if any).
119
+ # @return [Digram] last digram in the rhs otherwise nil.
106
120
  def last_digram()
107
121
  result = digrams.empty? ? nil : digrams.last
108
122
  return result
@@ -113,6 +127,7 @@ class Production
113
127
  # Emit a text representation of the production rule.
114
128
  # Text is of the form:
115
129
  # object id of production : rhs as space-separated sequence of symbols.
130
+ # @return [String]
116
131
  def to_string()
117
132
  rhs_text = rhs.map do |elem|
118
133
  case elem
@@ -125,6 +140,7 @@ class Production
125
140
  end
126
141
 
127
142
  # Add a (grammar) symbol at the end of the RHS.
143
+ # @param aSymbol [Object] A (grammar) symbol to add.
128
144
  def append_symbol(aSymbol)
129
145
  case aSymbol
130
146
  when Production
@@ -145,22 +161,27 @@ class Production
145
161
  end
146
162
 
147
163
  # Clear the right-hand side.
148
- # Any referenced production has its back reference counter decremented
164
+ # Any referenced production has its reference counter decremented.
149
165
  def clear_rhs()
150
166
  if rhs
151
167
  refs = references
152
- refs.each { |a_ref| a_ref.unbind }
168
+ refs.each(&:unbind)
153
169
  end
154
170
  @rhs = []
155
171
  end
156
172
 
157
173
  # Find all the positions where the digram occurs in the rhs
158
- # Synopsis:
159
- # Given the production p -> a b c a b a b d
160
- # Then p.positions_of(a, b) should returns [0, 3, 5]
161
- # Caution: "overlapping" digrams shouldn't be counted
162
- # Given the production p -> a a b a a a c d
163
- # Then p.positions_of(a, a) should returns [0, 3]
174
+ # @param symb1 [Object] first symbol of the digram
175
+ # @param symb2 [Object] second symbol of the digram
176
+ # @return [Array] the list of indices where the digram occurs in rhs.
177
+ # @example
178
+ # # Given the production p : a b c a b a b d
179
+ # #Then ...
180
+ # p.positions_of(a, b) # => [0, 3, 5]
181
+ # # Caution: "overlapping" digrams shouldn't be counted
182
+ # # Given the production p : a a b a a a c d
183
+ # # Then ...
184
+ # p.positions_of(a, a) # => [0, 3]
164
185
  def positions_of(symb1, symb2)
165
186
 
166
187
  # Find the positions where the digram occur in rhs
@@ -176,11 +197,12 @@ class Production
176
197
  end
177
198
 
178
199
 
179
- # Substitute in self all occurrences of the digram that
180
- # appears in the rhs of the other production
181
- # Pre-condition:
182
- # another has a rhs with exactly one digram (= a two-symbol sequence).
183
- def replace_digram(another)
200
+ # Given that the production P passed as argument has exactly 2 symbols
201
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
202
+ # s1 s2 by a reference to P.
203
+ # @param another [Production or ProductionRef] a production that
204
+ # consists exactly of one digram (= 2 symbols).
205
+ def reduce_step(another)
184
206
  (symb1, symb2) = another.rhs
185
207
  pos = positions_of(symb1, symb2).reverse
186
208
 
@@ -199,14 +221,17 @@ class Production
199
221
  recalc_digrams
200
222
  end
201
223
 
202
- # Replace every occurrence of 'another' production in rhs by
203
- # the rhs of 'another'.
204
- # Given the production p_A -> a p_B b p_B c
205
- # And the production p_B -> x y
206
- # Then the call p_A.replace_production(p_B)
207
- # Modifies p_A as into:
208
- # p_A -> a x y b x y c
209
- def replace_production(another)
224
+ # Replace every occurrence of 'another' production in self.rhs by
225
+ # the symbols in the rhs of 'another'.
226
+ # @param another [Production or ProductionRef] a production that
227
+ # consists exactly of one digram (= 2 symbols).
228
+ # @example Synopsis
229
+ # # Given the production p_A : a p_B b p_B c
230
+ # # And the production p_B : x y
231
+ # # Then...
232
+ # p_A.derive_step(p_B)
233
+ # #Modifies p_A as into: p_A -> a x y b x y c
234
+ def derive_step(another)
210
235
  (0...rhs.size).to_a.reverse.each do |index|
211
236
  next unless rhs[index] == another
212
237
 
@@ -223,8 +248,8 @@ class Production
223
248
  end
224
249
 
225
250
 
226
- # Part of the 'visitee' role.
227
- # [aVisitor] a GrammarVisitor instance
251
+ # Part of the 'visitee' role in Visitor design pattern.
252
+ # @param aVisitor[GrammarVisitor]
228
253
  def accept(aVisitor)
229
254
  aVisitor.start_visit_production(self)
230
255
 
@@ -9,24 +9,40 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
9
9
  # in the appropriate position in the RHS of P1.
10
10
  # In the literature, production references are also called non terminal
11
11
  # symbols
12
+ # @example
13
+ # # Given a production rule...
14
+ # prod = Sequitur::Production.new
15
+ # puts prod.refcount # outputs 0
16
+ # # ... Build a reference to it
17
+ # ref = Sequitur::ProductionRef.new(prod)
18
+ # # ... Production reference count is updated...
19
+ # puts prod.refcount # outputs 1
12
20
  class ProductionRef
13
21
 
14
- # Link to the production to reference
22
+ # Link to the production to reference.
15
23
  attr_reader(:production)
16
24
 
17
25
  # Constructor
18
- # [target] The production that is being referenced.
26
+ # @param target [Production or ProductionRef]
27
+ # The production that is being referenced.
19
28
  def initialize(target)
20
29
  bind_to(target)
21
30
  end
22
-
23
- # Copy constructor invoked by dup or clone methods
31
+
32
+ # Copy constructor invoked by dup or clone methods.
33
+ # @param orig [ProductionRef]
34
+ # @example
35
+ # prod = Sequitur::Production.new
36
+ # ref = Sequitur::ProductionRef.new(prod)
37
+ # copy_ref = ref.dup
38
+ # puts prod.refcount # outputs 2
24
39
  def initialize_copy(orig)
25
40
  @production = nil
26
41
  bind_to(orig.production)
27
42
  end
28
43
 
29
- # Return the text representation of a production reference.
44
+ # Emit the text representation of a production reference.
45
+ # @return [String]
30
46
  def to_s()
31
47
  return "#{production.object_id}"
32
48
  end
@@ -35,9 +51,11 @@ class ProductionRef
35
51
 
36
52
 
37
53
  # Equality testing.
38
- # A production ref is equal to another one when its
39
- # refers to the same production or when it is compared to
40
- # the production it refers to.
54
+ # A production ref is equal to another one when its
55
+ # refers to the same production or when it is compared to
56
+ # the production it refers to.
57
+ # @param other [ProductionRef]
58
+ # @return [true / false]
41
59
  def ==(other)
42
60
  return true if object_id == other.object_id
43
61
 
@@ -50,42 +68,48 @@ class ProductionRef
50
68
  return result
51
69
  end
52
70
 
53
- # Generates a Fixnum value as hash value.
54
- # As a reference has no identity on its own,
55
- # the method returns the hash value of the
56
- # referenced production
71
+ # Produce a hash value.
72
+ # A reference has no identity on its own,
73
+ # the method returns the hash value of the
74
+ # referenced production
75
+ # @return [Fixnum] the hash value
57
76
  def hash()
58
77
  fail StandardError, 'Nil production' if production.nil?
59
78
  return production.hash
60
79
  end
61
-
62
- # Make this reference points to the given production
80
+
81
+ # Make this reference point to the given production.
82
+ # @param aProduction [Production or ProductionRef] the production
83
+ # to refer to
63
84
  def bind_to(aProduction)
64
85
  return if aProduction == @production
65
-
86
+
66
87
  production.decr_refcount if production
67
88
  unless aProduction.kind_of?(Production)
68
89
  fail StandardError, "Illegal production type #{aProduction.class}"
69
90
  end
70
- @production = aProduction
91
+ @production = aProduction
71
92
  production.incr_refcount
72
93
  end
73
94
 
74
- # Clear the reference to the target production
95
+
96
+ # Clear the reference to the target production.
75
97
  def unbind()
76
98
  production.decr_refcount
77
99
  @production = nil
78
100
  end
79
101
 
80
102
  # Check that the this object doesn't refer to any production.
103
+ # @return [true / false] true when this object doesn't
104
+ # point to a production.
81
105
  def unbound?()
82
106
  return production.nil?
83
107
  end
84
-
85
- # Part of the 'visitee' role.
86
- # [aVisitor] a GrammarVisitor instance
108
+
109
+ # Part of the 'visitee' role in the Visitor design pattern.
110
+ # @param aVisitor [GrammarVisitor] the visitor
87
111
  def accept(aVisitor)
88
- aVisitor.visit_prod_ref(self)
112
+ aVisitor.visit_prod_ref(self)
89
113
  end
90
114
 
91
115
  end # class
@@ -3,13 +3,18 @@ require_relative 'dynamic_grammar'
3
3
 
4
4
  module Sequitur # Module for classes implementing the Sequitur algorithm
5
5
 
6
+ # Specialization of the DynamicGrammar class.
7
+ # A Sequitur grammar is a context-free grammar that is entirely built
8
+ # from a sequence of input tokens through the Sequitur algorithm.
6
9
  class SequiturGrammar < DynamicGrammar
7
10
 
8
- # Constructor. Build the grammar from an enumerator of tokens
11
+ # Build the grammar from an enumerator of tokens.
12
+ # @param anEnum [Enumerator] an enumerator that will iterate
13
+ # over the input tokens.
9
14
  def initialize(anEnum)
10
15
  super()
11
16
  # Make start production compliant with utility rule
12
- 2.times { root.incr_refcount }
17
+ 2.times { start.incr_refcount }
13
18
 
14
19
  # Read the input sequence and apply the Sequitur algorithm
15
20
  anEnum.each do |a_token|
@@ -18,10 +23,14 @@ class SequiturGrammar < DynamicGrammar
18
23
  end
19
24
  end
20
25
 
21
- public
26
+ private
22
27
 
23
-
24
- CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
28
+ # Struct used for internal purposes
29
+ CollisionDiagnosis = Struct.new(
30
+ :collision_found, # true if collision detected
31
+ :digram, # The digram involved in a collision
32
+ :productions # The productions where the digram occurs
33
+ )
25
34
 
26
35
 
27
36
  # Assuming that a new input token was added to the start production,
@@ -37,7 +46,7 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
37
46
  # end
38
47
  # end until digram unicity and rule utility are met
39
48
  def enforce_rules()
40
- begin
49
+ loop do
41
50
  unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
42
51
  restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
43
52
 
@@ -46,8 +55,8 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
46
55
 
47
56
  unicity_diagnosis = detect_collision
48
57
  useless_prod = detect_useless_production
49
-
50
- end while unicity_diagnosis.collision_found || useless_prod
58
+ break unless unicity_diagnosis.collision_found || useless_prod
59
+ end
51
60
  end
52
61
 
53
62
  # Check whether a digram is used twice in the grammar.
@@ -88,15 +97,11 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
88
97
  # Then create a new production that will have
89
98
  # the symbols of d as its rhs members.
90
99
  def restore_unicity(aDiagnosis)
91
- return if aDiagnosis.nil?
92
-
93
100
  digr = aDiagnosis.digram
94
101
  prods = aDiagnosis.productions
95
102
  if prods.any?(&:single_digram?)
96
- (simple, compound) = prods.partition do |a_prod|
97
- a_prod.single_digram?
98
- end
99
- compound[0].replace_digram(simple[0])
103
+ (simple, compound) = prods.partition(&:single_digram?)
104
+ compound[0].reduce_step(simple[0])
100
105
  else
101
106
  # Create a new production with the digram's symbols as its
102
107
  # sole rhs members.
@@ -104,9 +109,9 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
104
109
  digr.symbols.each { |sym| new_prod.append_symbol(sym) }
105
110
  add_production(new_prod)
106
111
  if prods[0] == prods[1]
107
- prods[0].replace_digram(new_prod)
112
+ prods[0].reduce_step(new_prod)
108
113
  else
109
- prods.each { |a_prod| a_prod.replace_digram(new_prod) }
114
+ prods.each { |a_prod| a_prod.reduce_step(new_prod) }
110
115
  end
111
116
  end
112
117
  end
@@ -136,7 +141,7 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
136
141
  break
137
142
  end
138
143
 
139
- referencing.replace_production(useless_prod)
144
+ referencing.derive_step(useless_prod)
140
145
  remove_production(index)
141
146
  end
142
147