sequitur 0.1.03 → 0.1.05

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,80 +1,98 @@
1
1
  module Sequitur # Module for classes implementing the Sequitur algorithm
2
2
 
3
3
  # A visitor class dedicated in the visit of Grammar.
4
-
5
4
  class GrammarVisitor
5
+ # Link to the grammar to visit
6
6
  attr_reader(:grammar)
7
-
7
+
8
+ # List of objects that subscribed to the visit event notification.
8
9
  attr_reader(:subscribers)
9
-
10
- # Constructor.
11
- # [aGrammar] a DynamicGrammar-like instance.
10
+
11
+ # Build a visitor for the given grammar.
12
+ # @param aGrammar [DynamicGrammar-like] the grammar to visit.
12
13
  def initialize(aGrammar)
13
14
  @grammar = aGrammar
14
15
  @subscribers = []
15
16
  end
16
-
17
+
17
18
  public
18
-
19
- # Add a subscriber to the list.
19
+
20
+ # Add a subscriber for the visit event notification.
21
+ # @param aSubscriber [Object]
20
22
  def subscribe(aSubscriber)
21
23
  subscribers << aSubscriber
22
24
  end
23
-
25
+
26
+ # Remove the given object from the subscription list.
27
+ # The object won't be notified of visit events.
28
+ # @param aSubscriber [Object]
24
29
  def unsubscribe(aSubscriber)
25
30
  subscribers.delete_if { |entry| entry == aSubscriber }
26
31
  end
27
-
32
+
28
33
  # The signal to start the visit.
29
34
  def start()
30
- grammar.send(:accept, self)
35
+ grammar.accept(self)
31
36
  end
32
37
 
33
-
38
+
39
+ # Visit event. The visitor is about to visit the grammar.
40
+ # @param aGrammar [DynamicGrammar-like] the grammar to visit.
34
41
  def start_visit_grammar(aGrammar)
35
42
  broadcast(:before_grammar, aGrammar)
36
43
  end
37
-
38
44
 
45
+
46
+ # Visit event. The visitor is about to visit the given production.
47
+ # @param aProduction [Production] the production to visit.
39
48
  def start_visit_production(aProduction)
40
49
  broadcast(:before_production, aProduction)
41
50
  broadcast(:before_rhs, aProduction.rhs)
42
51
  end
43
52
 
44
-
53
+ # Visit event. The visitor is visiting the
54
+ # given reference production (= non-terminal symbol).
55
+ # @param aProdRef [ProductionRef] the production reference to visit.
45
56
  def visit_prod_ref(aProdRef)
46
57
  production = aProdRef.production
47
58
  broadcast(:before_non_terminal, production)
48
59
  broadcast(:after_non_terminal, production)
49
60
  end
50
61
 
62
+ # Visit event. The visitor is visiting the
63
+ # given terminal symbol.
64
+ # @param aTerminal [Object] the terminal to visit.
51
65
  def visit_terminal(aTerminal)
52
66
  broadcast(:before_terminal, aTerminal)
53
67
  broadcast(:after_terminal, aTerminal)
54
- end
55
-
68
+ end
56
69
 
70
+ # Visit event. The visitor has completed its visit of the given production.
71
+ # @param aProduction [Production] the production to visit.
57
72
  def end_visit_production(aProduction)
58
73
  broadcast(:after_rhs, aProduction.rhs)
59
74
  broadcast(:after_production, aProduction)
60
75
 
61
- end
62
-
63
-
76
+ end
77
+
78
+ # Visit event. The visitor has completed the visit of the grammar.
79
+ # @param aGrammar [DynamicGrammar-like] the grammar to visit.
64
80
  def end_visit_grammar(aGrammar)
65
81
  broadcast(:after_grammar, aGrammar)
66
82
  end
67
-
68
- private
69
83
 
84
+ private
85
+ # Send a notification to all subscribers.
86
+ # @param msg [Symbol] event to notify
87
+ # @param args [Array] arguments of the notification.
70
88
  def broadcast(msg, *args)
71
89
  subscribers.each do |a_subscriber|
72
90
  next unless a_subscriber.respond_to?(msg)
73
91
  a_subscriber.send(msg, *args)
74
92
  end
75
93
  end
76
-
77
-
94
+
95
+
78
96
  end # class
79
97
 
80
98
  end # module
@@ -11,7 +11,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
11
11
  # The rule stipulates that the LHS is equivalent to the RHS,
12
12
  # in other words every occurrence of the LHS can be substituted to
13
13
  # corresponding RHS.
14
- # The object id of the production is taken as its LHS.
14
+ # Implementation note: the object id of the production is taken as its LHS.
15
15
  class Production
16
16
  # The right-hand side (rhs) consists of a sequence of grammar symbols
17
17
  attr_reader(:rhs)
@@ -22,7 +22,8 @@ class Production
22
22
  # The sequence of digrams appearing in the RHS
23
23
  attr_reader(:digrams)
24
24
 
25
- # Constructor. Build a production with an empty RHS.
25
+ # Constructor.
26
+ # Build a production with an empty RHS.
26
27
  def initialize()
27
28
  clear_rhs
28
29
  @refcount = 0
@@ -31,6 +32,9 @@ class Production
31
32
 
32
33
  public
33
34
 
35
+ # Identity testing.
36
+ # @param other [] another production or production reference.
37
+ # @return true when the receiver and other are the same.
34
38
  def ==(other)
35
39
  return true if object_id == other.object_id
36
40
 
@@ -45,33 +49,40 @@ class Production
45
49
 
46
50
 
47
51
  # Is the rhs empty?
52
+ # @ return true if the rhs has no members.
48
53
  def empty?
49
54
  return rhs.empty?
50
55
  end
51
56
 
57
+ # Increment the reference count by one.
52
58
  def incr_refcount()
53
59
  @refcount += 1
54
60
  end
55
61
 
62
+ # Decrement the reference count by one.
56
63
  def decr_refcount()
57
64
  fail StandardError, 'Internal error' if @refcount == 0
58
65
  @refcount -= 1
59
66
  end
60
67
 
61
68
 
62
- # Return the set of references to production appearing in the rhs.
69
+ # Select the references to production appearing in the rhs.
70
+ # @return [Array of ProductionRef]
63
71
  def references()
64
72
  return rhs.select { |symb| symb.is_a?(ProductionRef) }
65
73
  end
66
74
 
67
- # Return the set of references to a given production
75
+ # Look in the rhs all the references to a production passed a argument.
76
+ # aProduction [aProduction or ProductionRef] The production to search for.
77
+ # @return [Array] the array of ProductionRef to the passed production
68
78
  def references_of(aProduction)
69
79
  refs = references
70
80
  return refs.select { |a_ref| a_ref == aProduction }
71
81
  end
72
82
 
73
83
 
74
- # Return the list digrams found in rhs of this production.
84
+ # Enumerate the digrams appearing in the right-hand side (rhs)
85
+ # @return [Array] the list of digrams found in rhs of this production.
75
86
  def recalc_digrams()
76
87
  return [] if rhs.size < 2
77
88
 
@@ -84,6 +95,7 @@ class Production
84
95
 
85
96
 
86
97
  # Does the rhs have exactly one digram only (= 2 symbols)?
98
+ # @return [true/false] true when the rhs contains exactly two symbols.
87
99
  def single_digram?
88
100
  return rhs.size == 2
89
101
  end
@@ -92,7 +104,8 @@ class Production
92
104
  # Detect whether the last digram occurs twice
93
105
  # Assumption: when a digram occurs twice in a production then it must occur
94
106
  # at the end of the rhs
95
- def repeated_digram?
107
+ # @return [true/false] true when the digram occurs twice in rhs.
108
+ def repeated_digram?()
96
109
  return false if rhs.size < 3
97
110
 
98
111
  my_digrams = digrams
@@ -102,7 +115,8 @@ class Production
102
115
  return !same_key_found.nil?
103
116
  end
104
117
 
105
- # Return the last digram appearing in the RHS.
118
+ # Retrieve the last digram appearing in the RHS (if any).
119
+ # @return [Digram] last digram in the rhs otherwise nil.
106
120
  def last_digram()
107
121
  result = digrams.empty? ? nil : digrams.last
108
122
  return result
@@ -113,6 +127,7 @@ class Production
113
127
  # Emit a text representation of the production rule.
114
128
  # Text is of the form:
115
129
  # object id of production : rhs as space-separated sequence of symbols.
130
+ # @return [String]
116
131
  def to_string()
117
132
  rhs_text = rhs.map do |elem|
118
133
  case elem
@@ -125,6 +140,7 @@ class Production
125
140
  end
126
141
 
127
142
  # Add a (grammar) symbol at the end of the RHS.
143
+ # @param aSymbol [Object] A (grammar) symbol to add.
128
144
  def append_symbol(aSymbol)
129
145
  case aSymbol
130
146
  when Production
@@ -145,22 +161,27 @@ class Production
145
161
  end
146
162
 
147
163
  # Clear the right-hand side.
148
- # Any referenced production has its back reference counter decremented
164
+ # Any referenced production has its reference counter decremented.
149
165
  def clear_rhs()
150
166
  if rhs
151
167
  refs = references
152
- refs.each { |a_ref| a_ref.unbind }
168
+ refs.each(&:unbind)
153
169
  end
154
170
  @rhs = []
155
171
  end
156
172
 
157
173
  # Find all the positions where the digram occurs in the rhs
158
- # Synopsis:
159
- # Given the production p -> a b c a b a b d
160
- # Then p.positions_of(a, b) should returns [0, 3, 5]
161
- # Caution: "overlapping" digrams shouldn't be counted
162
- # Given the production p -> a a b a a a c d
163
- # Then p.positions_of(a, a) should returns [0, 3]
174
+ # @param symb1 [Object] first symbol of the digram
175
+ # @param symb2 [Object] second symbol of the digram
176
+ # @return [Array] the list of indices where the digram occurs in rhs.
177
+ # @example
178
+ # # Given the production p : a b c a b a b d
179
+ # #Then ...
180
+ # p.positions_of(a, b) # => [0, 3, 5]
181
+ # # Caution: "overlapping" digrams shouldn't be counted
182
+ # # Given the production p : a a b a a a c d
183
+ # # Then ...
184
+ # p.positions_of(a, a) # => [0, 3]
164
185
  def positions_of(symb1, symb2)
165
186
 
166
187
  # Find the positions where the digram occur in rhs
@@ -176,11 +197,12 @@ class Production
176
197
  end
177
198
 
178
199
 
179
- # Substitute in self all occurrences of the digram that
180
- # appears in the rhs of the other production
181
- # Pre-condition:
182
- # another has a rhs with exactly one digram (= a two-symbol sequence).
183
- def replace_digram(another)
200
+ # Given that the production P passed as argument has exactly 2 symbols
201
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
202
+ # s1 s2 by a reference to P.
203
+ # @param another [Production or ProductionRef] a production that
204
+ # consists exactly of one digram (= 2 symbols).
205
+ def reduce_step(another)
184
206
  (symb1, symb2) = another.rhs
185
207
  pos = positions_of(symb1, symb2).reverse
186
208
 
@@ -199,14 +221,17 @@ class Production
199
221
  recalc_digrams
200
222
  end
201
223
 
202
- # Replace every occurrence of 'another' production in rhs by
203
- # the rhs of 'another'.
204
- # Given the production p_A -> a p_B b p_B c
205
- # And the production p_B -> x y
206
- # Then the call p_A.replace_production(p_B)
207
- # Modifies p_A as into:
208
- # p_A -> a x y b x y c
209
- def replace_production(another)
224
+ # Replace every occurrence of 'another' production in self.rhs by
225
+ # the symbols in the rhs of 'another'.
226
+ # @param another [Production or ProductionRef] a production that
227
+ # consists exactly of one digram (= 2 symbols).
228
+ # @example Synopsis
229
+ # # Given the production p_A : a p_B b p_B c
230
+ # # And the production p_B : x y
231
+ # # Then...
232
+ # p_A.derive_step(p_B)
233
+ # #Modifies p_A as into: p_A -> a x y b x y c
234
+ def derive_step(another)
210
235
  (0...rhs.size).to_a.reverse.each do |index|
211
236
  next unless rhs[index] == another
212
237
 
@@ -223,8 +248,8 @@ class Production
223
248
  end
224
249
 
225
250
 
226
- # Part of the 'visitee' role.
227
- # [aVisitor] a GrammarVisitor instance
251
+ # Part of the 'visitee' role in Visitor design pattern.
252
+ # @param aVisitor[GrammarVisitor]
228
253
  def accept(aVisitor)
229
254
  aVisitor.start_visit_production(self)
230
255
 
@@ -9,24 +9,40 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
9
9
  # in the appropriate position in the RHS of P1.
10
10
  # In the literature, production references are also called non terminal
11
11
  # symbols
12
+ # @example
13
+ # # Given a production rule...
14
+ # prod = Sequitur::Production.new
15
+ # puts prod.refcount # outputs 0
16
+ # # ... Build a reference to it
17
+ # ref = Sequitur::ProductionRef.new(prod)
18
+ # # ... Production reference count is updated...
19
+ # puts prod.refcount # outputs 1
12
20
  class ProductionRef
13
21
 
14
- # Link to the production to reference
22
+ # Link to the production to reference.
15
23
  attr_reader(:production)
16
24
 
17
25
  # Constructor
18
- # [target] The production that is being referenced.
26
+ # @param target [Production or ProductionRef]
27
+ # The production that is being referenced.
19
28
  def initialize(target)
20
29
  bind_to(target)
21
30
  end
22
-
23
- # Copy constructor invoked by dup or clone methods
31
+
32
+ # Copy constructor invoked by dup or clone methods.
33
+ # @param orig [ProductionRef]
34
+ # @example
35
+ # prod = Sequitur::Production.new
36
+ # ref = Sequitur::ProductionRef.new(prod)
37
+ # copy_ref = ref.dup
38
+ # puts prod.refcount # outputs 2
24
39
  def initialize_copy(orig)
25
40
  @production = nil
26
41
  bind_to(orig.production)
27
42
  end
28
43
 
29
- # Return the text representation of a production reference.
44
+ # Emit the text representation of a production reference.
45
+ # @return [String]
30
46
  def to_s()
31
47
  return "#{production.object_id}"
32
48
  end
@@ -35,9 +51,11 @@ class ProductionRef
35
51
 
36
52
 
37
53
  # Equality testing.
38
- # A production ref is equal to another one when its
39
- # refers to the same production or when it is compared to
40
- # the production it refers to.
54
+ # A production ref is equal to another one when its
55
+ # refers to the same production or when it is compared to
56
+ # the production it refers to.
57
+ # @param other [ProductionRef]
58
+ # @return [true / false]
41
59
  def ==(other)
42
60
  return true if object_id == other.object_id
43
61
 
@@ -50,42 +68,48 @@ class ProductionRef
50
68
  return result
51
69
  end
52
70
 
53
- # Generates a Fixnum value as hash value.
54
- # As a reference has no identity on its own,
55
- # the method returns the hash value of the
56
- # referenced production
71
+ # Produce a hash value.
72
+ # A reference has no identity on its own,
73
+ # the method returns the hash value of the
74
+ # referenced production
75
+ # @return [Fixnum] the hash value
57
76
  def hash()
58
77
  fail StandardError, 'Nil production' if production.nil?
59
78
  return production.hash
60
79
  end
61
-
62
- # Make this reference points to the given production
80
+
81
+ # Make this reference point to the given production.
82
+ # @param aProduction [Production or ProductionRef] the production
83
+ # to refer to
63
84
  def bind_to(aProduction)
64
85
  return if aProduction == @production
65
-
86
+
66
87
  production.decr_refcount if production
67
88
  unless aProduction.kind_of?(Production)
68
89
  fail StandardError, "Illegal production type #{aProduction.class}"
69
90
  end
70
- @production = aProduction
91
+ @production = aProduction
71
92
  production.incr_refcount
72
93
  end
73
94
 
74
- # Clear the reference to the target production
95
+
96
+ # Clear the reference to the target production.
75
97
  def unbind()
76
98
  production.decr_refcount
77
99
  @production = nil
78
100
  end
79
101
 
80
102
  # Check that the this object doesn't refer to any production.
103
+ # @return [true / false] true when this object doesn't
104
+ # point to a production.
81
105
  def unbound?()
82
106
  return production.nil?
83
107
  end
84
-
85
- # Part of the 'visitee' role.
86
- # [aVisitor] a GrammarVisitor instance
108
+
109
+ # Part of the 'visitee' role in the Visitor design pattern.
110
+ # @param aVisitor [GrammarVisitor] the visitor
87
111
  def accept(aVisitor)
88
- aVisitor.visit_prod_ref(self)
112
+ aVisitor.visit_prod_ref(self)
89
113
  end
90
114
 
91
115
  end # class
@@ -3,13 +3,18 @@ require_relative 'dynamic_grammar'
3
3
 
4
4
  module Sequitur # Module for classes implementing the Sequitur algorithm
5
5
 
6
+ # Specialization of the DynamicGrammar class.
7
+ # A Sequitur grammar is a context-free grammar that is entirely built
8
+ # from a sequence of input tokens through the Sequitur algorithm.
6
9
  class SequiturGrammar < DynamicGrammar
7
10
 
8
- # Constructor. Build the grammar from an enumerator of tokens
11
+ # Build the grammar from an enumerator of tokens.
12
+ # @param anEnum [Enumerator] an enumerator that will iterate
13
+ # over the input tokens.
9
14
  def initialize(anEnum)
10
15
  super()
11
16
  # Make start production compliant with utility rule
12
- 2.times { root.incr_refcount }
17
+ 2.times { start.incr_refcount }
13
18
 
14
19
  # Read the input sequence and apply the Sequitur algorithm
15
20
  anEnum.each do |a_token|
@@ -18,10 +23,14 @@ class SequiturGrammar < DynamicGrammar
18
23
  end
19
24
  end
20
25
 
21
- public
26
+ private
22
27
 
23
-
24
- CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
28
+ # Struct used for internal purposes
29
+ CollisionDiagnosis = Struct.new(
30
+ :collision_found, # true if collision detected
31
+ :digram, # The digram involved in a collision
32
+ :productions # The productions where the digram occurs
33
+ )
25
34
 
26
35
 
27
36
  # Assuming that a new input token was added to the start production,
@@ -37,7 +46,7 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
37
46
  # end
38
47
  # end until digram unicity and rule utility are met
39
48
  def enforce_rules()
40
- begin
49
+ loop do
41
50
  unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
42
51
  restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
43
52
 
@@ -46,8 +55,8 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
46
55
 
47
56
  unicity_diagnosis = detect_collision
48
57
  useless_prod = detect_useless_production
49
-
50
- end while unicity_diagnosis.collision_found || useless_prod
58
+ break unless unicity_diagnosis.collision_found || useless_prod
59
+ end
51
60
  end
52
61
 
53
62
  # Check whether a digram is used twice in the grammar.
@@ -88,15 +97,11 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
88
97
  # Then create a new production that will have
89
98
  # the symbols of d as its rhs members.
90
99
  def restore_unicity(aDiagnosis)
91
- return if aDiagnosis.nil?
92
-
93
100
  digr = aDiagnosis.digram
94
101
  prods = aDiagnosis.productions
95
102
  if prods.any?(&:single_digram?)
96
- (simple, compound) = prods.partition do |a_prod|
97
- a_prod.single_digram?
98
- end
99
- compound[0].replace_digram(simple[0])
103
+ (simple, compound) = prods.partition(&:single_digram?)
104
+ compound[0].reduce_step(simple[0])
100
105
  else
101
106
  # Create a new production with the digram's symbols as its
102
107
  # sole rhs members.
@@ -104,9 +109,9 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
104
109
  digr.symbols.each { |sym| new_prod.append_symbol(sym) }
105
110
  add_production(new_prod)
106
111
  if prods[0] == prods[1]
107
- prods[0].replace_digram(new_prod)
112
+ prods[0].reduce_step(new_prod)
108
113
  else
109
- prods.each { |a_prod| a_prod.replace_digram(new_prod) }
114
+ prods.each { |a_prod| a_prod.reduce_step(new_prod) }
110
115
  end
111
116
  end
112
117
  end
@@ -136,7 +141,7 @@ CollisionDiagnosis = Struct.new(:collision_found, :digram, :productions)
136
141
  break
137
142
  end
138
143
 
139
- referencing.replace_production(useless_prod)
144
+ referencing.derive_step(useless_prod)
140
145
  remove_production(index)
141
146
  end
142
147