sequitur 0.1.10 → 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,119 +1,119 @@
1
-
2
-
3
- module Sequitur # Module for classes implementing the Sequitur algorithm
4
-
5
- # A production reference is a grammar symbol that may appear in the right-hand
6
- # side of a production P1 and that refers to a production P2.
7
- # Every time a production P2 appears in the left-hand side of
8
- # production P1, this is implemented by inserting a production reference to P2
9
- # in the appropriate position in the RHS of P1.
10
- # In the literature, production references are also called non terminal
11
- # symbols
12
- # @example
13
- # # Given a production rule...
14
- # prod = Sequitur::Production.new
15
- # puts prod.refcount # outputs 0
16
- # # ... Build a reference to it
17
- # ref = Sequitur::ProductionRef.new(prod)
18
- # # ... Production reference count is updated...
19
- # puts prod.refcount # outputs 1
20
- class ProductionRef
21
-
22
- # Link to the production to reference.
23
- attr_reader(:production)
24
-
25
- # Constructor
26
- # @param target [Production or ProductionRef]
27
- # The production that is being referenced.
28
- def initialize(target)
29
- bind_to(target)
30
- end
31
-
32
- # Copy constructor invoked by dup or clone methods.
33
- # @param orig [ProductionRef]
34
- # @example
35
- # prod = Sequitur::Production.new
36
- # ref = Sequitur::ProductionRef.new(prod)
37
- # copy_ref = ref.dup
38
- # puts prod.refcount # outputs 2
39
- def initialize_copy(orig)
40
- @production = nil
41
- bind_to(orig.production)
42
- end
43
-
44
- # Emit the text representation of a production reference.
45
- # @return [String]
46
- def to_s()
47
- return "#{production.object_id}"
48
- end
49
-
50
- alias_method :to_string, :to_s
51
-
52
-
53
- # Equality testing.
54
- # A production ref is equal to another one when its
55
- # refers to the same production or when it is compared to
56
- # the production it refers to.
57
- # @param other [ProductionRef]
58
- # @return [true / false]
59
- def ==(other)
60
- return true if object_id == other.object_id
61
-
62
- if other.is_a?(ProductionRef)
63
- result = (production == other.production)
64
- else
65
- result = (production == other)
66
- end
67
-
68
- return result
69
- end
70
-
71
- # Produce a hash value.
72
- # A reference has no identity on its own,
73
- # the method returns the hash value of the
74
- # referenced production
75
- # @return [Fixnum] the hash value
76
- def hash()
77
- fail StandardError, 'Nil production' if production.nil?
78
- return production.hash
79
- end
80
-
81
- # Make this reference point to the given production.
82
- # @param aProduction [Production or ProductionRef] the production
83
- # to refer to
84
- def bind_to(aProduction)
85
- return if aProduction == @production
86
-
87
- production.decr_refcount if production
88
- unless aProduction.kind_of?(Production)
89
- fail StandardError, "Illegal production type #{aProduction.class}"
90
- end
91
- @production = aProduction
92
- production.incr_refcount
93
- end
94
-
95
-
96
- # Clear the reference to the target production.
97
- def unbind()
98
- production.decr_refcount
99
- @production = nil
100
- end
101
-
102
- # Check that the this object doesn't refer to any production.
103
- # @return [true / false] true when this object doesn't
104
- # point to a production.
105
- def unbound?()
106
- return production.nil?
107
- end
108
-
109
- # Part of the 'visitee' role in the Visitor design pattern.
110
- # @param aVisitor [GrammarVisitor] the visitor
111
- def accept(aVisitor)
112
- aVisitor.visit_prod_ref(self)
113
- end
114
-
115
- end # class
116
-
117
- end # module
118
-
119
- # End of file
1
+
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+ # A production reference is a grammar symbol that may appear in the right-hand
6
+ # side of a production P1 and that refers to a production P2.
7
+ # Every time a production P2 appears in the left-hand side of
8
+ # production P1, this is implemented by inserting a production reference to P2
9
+ # in the appropriate position in the RHS of P1.
10
+ # In the literature, production references are also called non terminal
11
+ # symbols
12
+ # @example
13
+ # # Given a production rule...
14
+ # prod = Sequitur::Production.new
15
+ # puts prod.refcount # outputs 0
16
+ # # ... Build a reference to it
17
+ # ref = Sequitur::ProductionRef.new(prod)
18
+ # # ... Production reference count is updated...
19
+ # puts prod.refcount # outputs 1
20
+ class ProductionRef
21
+
22
+ # Link to the production to reference.
23
+ attr_reader(:production)
24
+
25
+ # Constructor
26
+ # @param target [Production or ProductionRef]
27
+ # The production that is being referenced.
28
+ def initialize(target)
29
+ bind_to(target)
30
+ end
31
+
32
+ # Copy constructor invoked by dup or clone methods.
33
+ # @param orig [ProductionRef]
34
+ # @example
35
+ # prod = Sequitur::Production.new
36
+ # ref = Sequitur::ProductionRef.new(prod)
37
+ # copy_ref = ref.dup
38
+ # puts prod.refcount # outputs 2
39
+ def initialize_copy(orig)
40
+ @production = nil
41
+ bind_to(orig.production)
42
+ end
43
+
44
+ # Emit the text representation of a production reference.
45
+ # @return [String]
46
+ def to_s()
47
+ return "#{production.object_id}"
48
+ end
49
+
50
+ alias_method :to_string, :to_s
51
+
52
+
53
+ # Equality testing.
54
+ # A production ref is equal to another one when its
55
+ # refers to the same production or when it is compared to
56
+ # the production it refers to.
57
+ # @param other [ProductionRef]
58
+ # @return [true / false]
59
+ def ==(other)
60
+ return true if object_id == other.object_id
61
+
62
+ if other.is_a?(ProductionRef)
63
+ result = (production == other.production)
64
+ else
65
+ result = (production == other)
66
+ end
67
+
68
+ return result
69
+ end
70
+
71
+ # Produce a hash value.
72
+ # A reference has no identity on its own,
73
+ # the method returns the hash value of the
74
+ # referenced production
75
+ # @return [Fixnum] the hash value
76
+ def hash()
77
+ fail StandardError, 'Nil production' if production.nil?
78
+ return production.hash
79
+ end
80
+
81
+ # Make this reference point to the given production.
82
+ # @param aProduction [Production or ProductionRef] the production
83
+ # to refer to
84
+ def bind_to(aProduction)
85
+ return if aProduction == @production
86
+
87
+ production.decr_refcount if production
88
+ unless aProduction.kind_of?(Production)
89
+ fail StandardError, "Illegal production type #{aProduction.class}"
90
+ end
91
+ @production = aProduction
92
+ production.incr_refcount
93
+ end
94
+
95
+
96
+ # Clear the reference to the target production.
97
+ def unbind()
98
+ production.decr_refcount
99
+ @production = nil
100
+ end
101
+
102
+ # Check that the this object doesn't refer to any production.
103
+ # @return [true / false] true when this object doesn't
104
+ # point to a production.
105
+ def unbound?()
106
+ return production.nil?
107
+ end
108
+
109
+ # Part of the 'visitee' role in the Visitor design pattern.
110
+ # @param aVisitor [GrammarVisitor] the visitor
111
+ def accept(aVisitor)
112
+ aVisitor.visit_prod_ref(self)
113
+ end
114
+
115
+ end # class
116
+
117
+ end # module
118
+
119
+ # End of file
@@ -1,158 +1,158 @@
1
- require_relative 'dynamic_grammar'
2
-
3
-
4
- module Sequitur # Module for classes implementing the Sequitur algorithm
5
-
6
- # Specialization of the DynamicGrammar class.
7
- # A Sequitur grammar is a context-free grammar that is entirely built
8
- # from a sequence of input tokens through the Sequitur algorithm.
9
- class SequiturGrammar < DynamicGrammar
10
-
11
- # Build the grammar from an enumerator of tokens.
12
- # @param anEnum [Enumerator] an enumerator that will iterate
13
- # over the input tokens.
14
- def initialize(anEnum)
15
- super()
16
- # Make start production compliant with utility rule
17
- 2.times { start.incr_refcount }
18
-
19
- # Read the input sequence and apply the Sequitur algorithm
20
- anEnum.each do |a_token|
21
- add_token(a_token)
22
- enforce_rules
23
- end
24
- end
25
-
26
- private
27
-
28
- # Struct used for internal purposes
29
- CollisionDiagnosis = Struct.new(
30
- :collision_found, # true if collision detected
31
- :digram, # The digram involved in a collision
32
- :productions # The productions where the digram occurs
33
- )
34
-
35
-
36
- # Assuming that a new input token was added to the start production,
37
- # enforce the digram unicity and rule utility rules
38
- # begin
39
- # if a digram D occurs twice in the grammar then
40
- # add a production P : D (if not already there)
41
- # replace both Ds with R (reduction step).
42
- # end
43
- # if a production P : RHS in referenced only once then
44
- # replace P by its RHS (derivation step)
45
- # remove P from grammar
46
- # end
47
- # end until digram unicity and rule utility are met
48
- def enforce_rules()
49
- loop do
50
- unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
- restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
-
53
- prod_index = detect_useless_production
54
- restore_utility(prod_index) unless prod_index.nil?
55
-
56
- unicity_diagnosis = detect_collision
57
- prod_index = detect_useless_production
58
- break unless unicity_diagnosis.collision_found || !prod_index.nil?
59
- end
60
- end
61
-
62
- # Check whether a digram is used twice in the grammar.
63
- # Return an empty Hash if each digram appears once.
64
- # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
65
- # Where Pi, Pk are two productions where the digram occurs.
66
- def detect_collision()
67
- diagnosis = CollisionDiagnosis.new(false)
68
- found_so_far = {}
69
- productions.each do |a_prod|
70
- prod_digrams = a_prod.digrams
71
- prod_digrams.each do |a_digr|
72
- its_key = a_digr.key
73
- if found_so_far.include? its_key
74
- orig_digr = found_so_far[its_key]
75
- # Disregard sequence like a a a
76
- if ((orig_digr.production == a_prod) && a_digr.repeating? &&
77
- (orig_digr == a_digr))
78
- next
79
- end
80
-
81
- diagnosis.digram = orig_digr
82
- diagnosis.productions = [orig_digr.production, a_prod]
83
- diagnosis.collision_found = true
84
- break
85
- else
86
- found_so_far[its_key] = a_digr
87
- end
88
- end
89
- break if diagnosis.collision_found
90
- end
91
-
92
- return diagnosis
93
- end
94
-
95
- # When a collision diagnosis indicates that a given
96
- # digram d occurs twice in the grammar
97
- # Then create a new production that will have
98
- # the symbols of d as its rhs members.
99
- def restore_unicity(aDiagnosis)
100
- prods = aDiagnosis.productions
101
- if prods.any?(&:single_digram?)
102
- (simple, compound) = prods.partition(&:single_digram?)
103
- compound[0].reduce_step(simple[0])
104
- else
105
- # Create a new production with the digram's symbols as its
106
- # sole rhs members.
107
- new_prod = build_production_for(aDiagnosis.digram)
108
- prods[0].reduce_step(new_prod)
109
- prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
110
- end
111
- end
112
-
113
- # Return a production that is used less than twice in the grammar.
114
- def detect_useless_production()
115
- useless = productions.index { |prod| prod.refcount < 2 }
116
- unless useless.nil?
117
- useless = nil if useless == 0
118
- end
119
- return useless
120
- end
121
-
122
- # Given the passed production P is referenced only once.
123
- # Then replace P by its RHS where it is referenced.
124
- # And delete P
125
- def restore_utility(prod_index)
126
- # Retrieve useless prod from its index
127
- useless_prod = productions[prod_index]
128
-
129
- # Retrieve production referencing useless one
130
- referencing = nil
131
- productions.reverse.each do |a_prod|
132
- # Next line assumes non-recursive productions
133
- next if a_prod == useless_prod
134
-
135
- refs = a_prod.references_of(useless_prod)
136
- next if refs.empty?
137
- referencing = a_prod
138
- break
139
- end
140
-
141
- referencing.derive_step(useless_prod)
142
- remove_production(prod_index)
143
- end
144
-
145
- # Create a new production that will have the symbols from digram
146
- # as its rhs members.
147
- def build_production_for(aDigram)
148
- new_prod = Production.new
149
- aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
150
- add_production(new_prod)
151
-
152
- return new_prod
153
- end
154
- end # class
155
-
156
- end # module
157
-
158
- # End of file
1
+ require_relative 'dynamic_grammar'
2
+
3
+
4
+ module Sequitur # Module for classes implementing the Sequitur algorithm
5
+
6
+ # Specialization of the DynamicGrammar class.
7
+ # A Sequitur grammar is a context-free grammar that is entirely built
8
+ # from a sequence of input tokens through the Sequitur algorithm.
9
+ class SequiturGrammar < DynamicGrammar
10
+
11
+ # Build the grammar from an enumerator of tokens.
12
+ # @param anEnum [Enumerator] an enumerator that will iterate
13
+ # over the input tokens.
14
+ def initialize(anEnum)
15
+ super()
16
+ # Make start production compliant with utility rule
17
+ 2.times { start.incr_refcount }
18
+
19
+ # Read the input sequence and apply the Sequitur algorithm
20
+ anEnum.each do |a_token|
21
+ add_token(a_token)
22
+ enforce_rules
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ # Struct used for internal purposes
29
+ CollisionDiagnosis = Struct.new(
30
+ :collision_found, # true if collision detected
31
+ :digram, # The digram involved in a collision
32
+ :productions # The productions where the digram occurs
33
+ )
34
+
35
+
36
+ # Assuming that a new input token was added to the start production,
37
+ # enforce the digram unicity and rule utility rules
38
+ # begin
39
+ # if a digram D occurs twice in the grammar then
40
+ # add a production P : D (if not already there)
41
+ # replace both Ds with R (reduction step).
42
+ # end
43
+ # if a production P : RHS in referenced only once then
44
+ # replace P by its RHS (derivation step)
45
+ # remove P from grammar
46
+ # end
47
+ # end until digram unicity and rule utility are met
48
+ def enforce_rules()
49
+ loop do
50
+ unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
+ restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
+
53
+ prod_index = detect_useless_production
54
+ restore_utility(prod_index) unless prod_index.nil?
55
+
56
+ unicity_diagnosis = detect_collision
57
+ prod_index = detect_useless_production
58
+ break unless unicity_diagnosis.collision_found || !prod_index.nil?
59
+ end
60
+ end
61
+
62
+ # Check whether a digram is used twice in the grammar.
63
+ # Return an empty Hash if each digram appears once.
64
+ # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
65
+ # Where Pi, Pk are two productions where the digram occurs.
66
+ def detect_collision()
67
+ diagnosis = CollisionDiagnosis.new(false)
68
+ found_so_far = {}
69
+ productions.each do |a_prod|
70
+ prod_digrams = a_prod.digrams
71
+ prod_digrams.each do |a_digr|
72
+ its_key = a_digr.key
73
+ if found_so_far.include? its_key
74
+ orig_digr = found_so_far[its_key]
75
+ # Disregard sequence like a a a
76
+ if ((orig_digr.production == a_prod) && a_digr.repeating? &&
77
+ (orig_digr == a_digr))
78
+ next
79
+ end
80
+
81
+ diagnosis.digram = orig_digr
82
+ diagnosis.productions = [orig_digr.production, a_prod]
83
+ diagnosis.collision_found = true
84
+ break
85
+ else
86
+ found_so_far[its_key] = a_digr
87
+ end
88
+ end
89
+ break if diagnosis.collision_found
90
+ end
91
+
92
+ return diagnosis
93
+ end
94
+
95
+ # When a collision diagnosis indicates that a given
96
+ # digram d occurs twice in the grammar
97
+ # Then create a new production that will have
98
+ # the symbols of d as its rhs members.
99
+ def restore_unicity(aDiagnosis)
100
+ prods = aDiagnosis.productions
101
+ if prods.any?(&:single_digram?)
102
+ (simple, compound) = prods.partition(&:single_digram?)
103
+ compound[0].reduce_step(simple[0])
104
+ else
105
+ # Create a new production with the digram's symbols as its
106
+ # sole rhs members.
107
+ new_prod = build_production_for(aDiagnosis.digram)
108
+ prods[0].reduce_step(new_prod)
109
+ prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
110
+ end
111
+ end
112
+
113
+ # Return a production that is used less than twice in the grammar.
114
+ def detect_useless_production()
115
+ useless = productions.index { |prod| prod.refcount < 2 }
116
+ unless useless.nil?
117
+ useless = nil if useless == 0
118
+ end
119
+ return useless
120
+ end
121
+
122
+ # Given the passed production P is referenced only once.
123
+ # Then replace P by its RHS where it is referenced.
124
+ # And delete P
125
+ def restore_utility(prod_index)
126
+ # Retrieve useless prod from its index
127
+ useless_prod = productions[prod_index]
128
+
129
+ # Retrieve production referencing useless one
130
+ referencing = nil
131
+ productions.reverse.each do |a_prod|
132
+ # Next line assumes non-recursive productions
133
+ next if a_prod == useless_prod
134
+
135
+ refs = a_prod.references_of(useless_prod)
136
+ next if refs.empty?
137
+ referencing = a_prod
138
+ break
139
+ end
140
+
141
+ referencing.derive_step(useless_prod)
142
+ remove_production(prod_index)
143
+ end
144
+
145
+ # Create a new production that will have the symbols from digram
146
+ # as its rhs members.
147
+ def build_production_for(aDigram)
148
+ new_prod = Production.new
149
+ aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
150
+ add_production(new_prod)
151
+
152
+ return new_prod
153
+ end
154
+ end # class
155
+
156
+ end # module
157
+
158
+ # End of file