sequitur 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,119 +1,119 @@
1
-
2
-
3
- module Sequitur # Module for classes implementing the Sequitur algorithm
4
-
5
- # A production reference is a grammar symbol that may appear in the right-hand
6
- # side of a production P1 and that refers to a production P2.
7
- # Every time a production P2 appears in the left-hand side of
8
- # production P1, this is implemented by inserting a production reference to P2
9
- # in the appropriate position in the RHS of P1.
10
- # In the literature, production references are also called non terminal
11
- # symbols
12
- # @example
13
- # # Given a production rule...
14
- # prod = Sequitur::Production.new
15
- # puts prod.refcount # outputs 0
16
- # # ... Build a reference to it
17
- # ref = Sequitur::ProductionRef.new(prod)
18
- # # ... Production reference count is updated...
19
- # puts prod.refcount # outputs 1
20
- class ProductionRef
21
-
22
- # Link to the production to reference.
23
- attr_reader(:production)
24
-
25
- # Constructor
26
- # @param target [Production or ProductionRef]
27
- # The production that is being referenced.
28
- def initialize(target)
29
- bind_to(target)
30
- end
31
-
32
- # Copy constructor invoked by dup or clone methods.
33
- # @param orig [ProductionRef]
34
- # @example
35
- # prod = Sequitur::Production.new
36
- # ref = Sequitur::ProductionRef.new(prod)
37
- # copy_ref = ref.dup
38
- # puts prod.refcount # outputs 2
39
- def initialize_copy(orig)
40
- @production = nil
41
- bind_to(orig.production)
42
- end
43
-
44
- # Emit the text representation of a production reference.
45
- # @return [String]
46
- def to_s()
47
- return "#{production.object_id}"
48
- end
49
-
50
- alias_method :to_string, :to_s
51
-
52
-
53
- # Equality testing.
54
- # A production ref is equal to another one when its
55
- # refers to the same production or when it is compared to
56
- # the production it refers to.
57
- # @param other [ProductionRef]
58
- # @return [true / false]
59
- def ==(other)
60
- return true if object_id == other.object_id
61
-
62
- if other.is_a?(ProductionRef)
63
- result = (production == other.production)
64
- else
65
- result = (production == other)
66
- end
67
-
68
- return result
69
- end
70
-
71
- # Produce a hash value.
72
- # A reference has no identity on its own,
73
- # the method returns the hash value of the
74
- # referenced production
75
- # @return [Fixnum] the hash value
76
- def hash()
77
- fail StandardError, 'Nil production' if production.nil?
78
- return production.hash
79
- end
80
-
81
- # Make this reference point to the given production.
82
- # @param aProduction [Production or ProductionRef] the production
83
- # to refer to
84
- def bind_to(aProduction)
85
- return if aProduction == @production
86
-
87
- production.decr_refcount if production
88
- unless aProduction.kind_of?(Production)
89
- fail StandardError, "Illegal production type #{aProduction.class}"
90
- end
91
- @production = aProduction
92
- production.incr_refcount
93
- end
94
-
95
-
96
- # Clear the reference to the target production.
97
- def unbind()
98
- production.decr_refcount
99
- @production = nil
100
- end
101
-
102
- # Check that the this object doesn't refer to any production.
103
- # @return [true / false] true when this object doesn't
104
- # point to a production.
105
- def unbound?()
106
- return production.nil?
107
- end
108
-
109
- # Part of the 'visitee' role in the Visitor design pattern.
110
- # @param aVisitor [GrammarVisitor] the visitor
111
- def accept(aVisitor)
112
- aVisitor.visit_prod_ref(self)
113
- end
114
-
115
- end # class
116
-
117
- end # module
118
-
119
- # End of file
1
+
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+ # A production reference is a grammar symbol that may appear in the right-hand
6
+ # side of a production P1 and that refers to a production P2.
7
+ # Every time a production P2 appears in the left-hand side of
8
+ # production P1, this is implemented by inserting a production reference to P2
9
+ # in the appropriate position in the RHS of P1.
10
+ # In the literature, production references are also called non terminal
11
+ # symbols
12
+ # @example
13
+ # # Given a production rule...
14
+ # prod = Sequitur::Production.new
15
+ # puts prod.refcount # outputs 0
16
+ # # ... Build a reference to it
17
+ # ref = Sequitur::ProductionRef.new(prod)
18
+ # # ... Production reference count is updated...
19
+ # puts prod.refcount # outputs 1
20
+ class ProductionRef
21
+
22
+ # Link to the production to reference.
23
+ attr_reader(:production)
24
+
25
+ # Constructor
26
+ # @param target [Production or ProductionRef]
27
+ # The production that is being referenced.
28
+ def initialize(target)
29
+ bind_to(target)
30
+ end
31
+
32
+ # Copy constructor invoked by dup or clone methods.
33
+ # @param orig [ProductionRef]
34
+ # @example
35
+ # prod = Sequitur::Production.new
36
+ # ref = Sequitur::ProductionRef.new(prod)
37
+ # copy_ref = ref.dup
38
+ # puts prod.refcount # outputs 2
39
+ def initialize_copy(orig)
40
+ @production = nil
41
+ bind_to(orig.production)
42
+ end
43
+
44
+ # Emit the text representation of a production reference.
45
+ # @return [String]
46
+ def to_s()
47
+ return "#{production.object_id}"
48
+ end
49
+
50
+ alias_method :to_string, :to_s
51
+
52
+
53
+ # Equality testing.
54
+ # A production ref is equal to another one when its
55
+ # refers to the same production or when it is compared to
56
+ # the production it refers to.
57
+ # @param other [ProductionRef]
58
+ # @return [true / false]
59
+ def ==(other)
60
+ return true if object_id == other.object_id
61
+
62
+ if other.is_a?(ProductionRef)
63
+ result = (production == other.production)
64
+ else
65
+ result = (production == other)
66
+ end
67
+
68
+ return result
69
+ end
70
+
71
+ # Produce a hash value.
72
+ # A reference has no identity on its own,
73
+ # the method returns the hash value of the
74
+ # referenced production
75
+ # @return [Fixnum] the hash value
76
+ def hash()
77
+ fail StandardError, 'Nil production' if production.nil?
78
+ return production.hash
79
+ end
80
+
81
+ # Make this reference point to the given production.
82
+ # @param aProduction [Production or ProductionRef] the production
83
+ # to refer to
84
+ def bind_to(aProduction)
85
+ return if aProduction == @production
86
+
87
+ production.decr_refcount if production
88
+ unless aProduction.kind_of?(Production)
89
+ fail StandardError, "Illegal production type #{aProduction.class}"
90
+ end
91
+ @production = aProduction
92
+ production.incr_refcount
93
+ end
94
+
95
+
96
+ # Clear the reference to the target production.
97
+ def unbind()
98
+ production.decr_refcount
99
+ @production = nil
100
+ end
101
+
102
+ # Check that the this object doesn't refer to any production.
103
+ # @return [true / false] true when this object doesn't
104
+ # point to a production.
105
+ def unbound?()
106
+ return production.nil?
107
+ end
108
+
109
+ # Part of the 'visitee' role in the Visitor design pattern.
110
+ # @param aVisitor [GrammarVisitor] the visitor
111
+ def accept(aVisitor)
112
+ aVisitor.visit_prod_ref(self)
113
+ end
114
+
115
+ end # class
116
+
117
+ end # module
118
+
119
+ # End of file
@@ -1,158 +1,158 @@
1
- require_relative 'dynamic_grammar'
2
-
3
-
4
- module Sequitur # Module for classes implementing the Sequitur algorithm
5
-
6
- # Specialization of the DynamicGrammar class.
7
- # A Sequitur grammar is a context-free grammar that is entirely built
8
- # from a sequence of input tokens through the Sequitur algorithm.
9
- class SequiturGrammar < DynamicGrammar
10
-
11
- # Build the grammar from an enumerator of tokens.
12
- # @param anEnum [Enumerator] an enumerator that will iterate
13
- # over the input tokens.
14
- def initialize(anEnum)
15
- super()
16
- # Make start production compliant with utility rule
17
- 2.times { start.incr_refcount }
18
-
19
- # Read the input sequence and apply the Sequitur algorithm
20
- anEnum.each do |a_token|
21
- add_token(a_token)
22
- enforce_rules
23
- end
24
- end
25
-
26
- private
27
-
28
- # Struct used for internal purposes
29
- CollisionDiagnosis = Struct.new(
30
- :collision_found, # true if collision detected
31
- :digram, # The digram involved in a collision
32
- :productions # The productions where the digram occurs
33
- )
34
-
35
-
36
- # Assuming that a new input token was added to the start production,
37
- # enforce the digram unicity and rule utility rules
38
- # begin
39
- # if a digram D occurs twice in the grammar then
40
- # add a production P : D (if not already there)
41
- # replace both Ds with R (reduction step).
42
- # end
43
- # if a production P : RHS in referenced only once then
44
- # replace P by its RHS (derivation step)
45
- # remove P from grammar
46
- # end
47
- # end until digram unicity and rule utility are met
48
- def enforce_rules()
49
- loop do
50
- unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
- restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
-
53
- prod_index = detect_useless_production
54
- restore_utility(prod_index) unless prod_index.nil?
55
-
56
- unicity_diagnosis = detect_collision
57
- prod_index = detect_useless_production
58
- break unless unicity_diagnosis.collision_found || !prod_index.nil?
59
- end
60
- end
61
-
62
- # Check whether a digram is used twice in the grammar.
63
- # Return an empty Hash if each digram appears once.
64
- # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
65
- # Where Pi, Pk are two productions where the digram occurs.
66
- def detect_collision()
67
- diagnosis = CollisionDiagnosis.new(false)
68
- found_so_far = {}
69
- productions.each do |a_prod|
70
- prod_digrams = a_prod.digrams
71
- prod_digrams.each do |a_digr|
72
- its_key = a_digr.key
73
- if found_so_far.include? its_key
74
- orig_digr = found_so_far[its_key]
75
- # Disregard sequence like a a a
76
- if ((orig_digr.production == a_prod) && a_digr.repeating? &&
77
- (orig_digr == a_digr))
78
- next
79
- end
80
-
81
- diagnosis.digram = orig_digr
82
- diagnosis.productions = [orig_digr.production, a_prod]
83
- diagnosis.collision_found = true
84
- break
85
- else
86
- found_so_far[its_key] = a_digr
87
- end
88
- end
89
- break if diagnosis.collision_found
90
- end
91
-
92
- return diagnosis
93
- end
94
-
95
- # When a collision diagnosis indicates that a given
96
- # digram d occurs twice in the grammar
97
- # Then create a new production that will have
98
- # the symbols of d as its rhs members.
99
- def restore_unicity(aDiagnosis)
100
- prods = aDiagnosis.productions
101
- if prods.any?(&:single_digram?)
102
- (simple, compound) = prods.partition(&:single_digram?)
103
- compound[0].reduce_step(simple[0])
104
- else
105
- # Create a new production with the digram's symbols as its
106
- # sole rhs members.
107
- new_prod = build_production_for(aDiagnosis.digram)
108
- prods[0].reduce_step(new_prod)
109
- prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
110
- end
111
- end
112
-
113
- # Return a production that is used less than twice in the grammar.
114
- def detect_useless_production()
115
- useless = productions.index { |prod| prod.refcount < 2 }
116
- unless useless.nil?
117
- useless = nil if useless == 0
118
- end
119
- return useless
120
- end
121
-
122
- # Given the passed production P is referenced only once.
123
- # Then replace P by its RHS where it is referenced.
124
- # And delete P
125
- def restore_utility(prod_index)
126
- # Retrieve useless prod from its index
127
- useless_prod = productions[prod_index]
128
-
129
- # Retrieve production referencing useless one
130
- referencing = nil
131
- productions.reverse.each do |a_prod|
132
- # Next line assumes non-recursive productions
133
- next if a_prod == useless_prod
134
-
135
- refs = a_prod.references_of(useless_prod)
136
- next if refs.empty?
137
- referencing = a_prod
138
- break
139
- end
140
-
141
- referencing.derive_step(useless_prod)
142
- remove_production(prod_index)
143
- end
144
-
145
- # Create a new production that will have the symbols from digram
146
- # as its rhs members.
147
- def build_production_for(aDigram)
148
- new_prod = Production.new
149
- aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
150
- add_production(new_prod)
151
-
152
- return new_prod
153
- end
154
- end # class
155
-
156
- end # module
157
-
158
- # End of file
1
+ require_relative 'dynamic_grammar'
2
+
3
+
4
+ module Sequitur # Module for classes implementing the Sequitur algorithm
5
+
6
+ # Specialization of the DynamicGrammar class.
7
+ # A Sequitur grammar is a context-free grammar that is entirely built
8
+ # from a sequence of input tokens through the Sequitur algorithm.
9
+ class SequiturGrammar < DynamicGrammar
10
+
11
+ # Build the grammar from an enumerator of tokens.
12
+ # @param anEnum [Enumerator] an enumerator that will iterate
13
+ # over the input tokens.
14
+ def initialize(anEnum)
15
+ super()
16
+ # Make start production compliant with utility rule
17
+ 2.times { start.incr_refcount }
18
+
19
+ # Read the input sequence and apply the Sequitur algorithm
20
+ anEnum.each do |a_token|
21
+ add_token(a_token)
22
+ enforce_rules
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ # Struct used for internal purposes
29
+ CollisionDiagnosis = Struct.new(
30
+ :collision_found, # true if collision detected
31
+ :digram, # The digram involved in a collision
32
+ :productions # The productions where the digram occurs
33
+ )
34
+
35
+
36
+ # Assuming that a new input token was added to the start production,
37
+ # enforce the digram unicity and rule utility rules
38
+ # begin
39
+ # if a digram D occurs twice in the grammar then
40
+ # add a production P : D (if not already there)
41
+ # replace both Ds with R (reduction step).
42
+ # end
43
+ # if a production P : RHS in referenced only once then
44
+ # replace P by its RHS (derivation step)
45
+ # remove P from grammar
46
+ # end
47
+ # end until digram unicity and rule utility are met
48
+ def enforce_rules()
49
+ loop do
50
+ unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
+ restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
+
53
+ prod_index = detect_useless_production
54
+ restore_utility(prod_index) unless prod_index.nil?
55
+
56
+ unicity_diagnosis = detect_collision
57
+ prod_index = detect_useless_production
58
+ break unless unicity_diagnosis.collision_found || !prod_index.nil?
59
+ end
60
+ end
61
+
62
+ # Check whether a digram is used twice in the grammar.
63
+ # Return an empty Hash if each digram appears once.
64
+ # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
65
+ # Where Pi, Pk are two productions where the digram occurs.
66
+ def detect_collision()
67
+ diagnosis = CollisionDiagnosis.new(false)
68
+ found_so_far = {}
69
+ productions.each do |a_prod|
70
+ prod_digrams = a_prod.digrams
71
+ prod_digrams.each do |a_digr|
72
+ its_key = a_digr.key
73
+ if found_so_far.include? its_key
74
+ orig_digr = found_so_far[its_key]
75
+ # Disregard sequence like a a a
76
+ if ((orig_digr.production == a_prod) && a_digr.repeating? &&
77
+ (orig_digr == a_digr))
78
+ next
79
+ end
80
+
81
+ diagnosis.digram = orig_digr
82
+ diagnosis.productions = [orig_digr.production, a_prod]
83
+ diagnosis.collision_found = true
84
+ break
85
+ else
86
+ found_so_far[its_key] = a_digr
87
+ end
88
+ end
89
+ break if diagnosis.collision_found
90
+ end
91
+
92
+ return diagnosis
93
+ end
94
+
95
+ # When a collision diagnosis indicates that a given
96
+ # digram d occurs twice in the grammar
97
+ # Then create a new production that will have
98
+ # the symbols of d as its rhs members.
99
+ def restore_unicity(aDiagnosis)
100
+ prods = aDiagnosis.productions
101
+ if prods.any?(&:single_digram?)
102
+ (simple, compound) = prods.partition(&:single_digram?)
103
+ compound[0].reduce_step(simple[0])
104
+ else
105
+ # Create a new production with the digram's symbols as its
106
+ # sole rhs members.
107
+ new_prod = build_production_for(aDiagnosis.digram)
108
+ prods[0].reduce_step(new_prod)
109
+ prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
110
+ end
111
+ end
112
+
113
+ # Return a production that is used less than twice in the grammar.
114
+ def detect_useless_production()
115
+ useless = productions.index { |prod| prod.refcount < 2 }
116
+ unless useless.nil?
117
+ useless = nil if useless == 0
118
+ end
119
+ return useless
120
+ end
121
+
122
+ # Given the passed production P is referenced only once.
123
+ # Then replace P by its RHS where it is referenced.
124
+ # And delete P
125
+ def restore_utility(prod_index)
126
+ # Retrieve useless prod from its index
127
+ useless_prod = productions[prod_index]
128
+
129
+ # Retrieve production referencing useless one
130
+ referencing = nil
131
+ productions.reverse.each do |a_prod|
132
+ # Next line assumes non-recursive productions
133
+ next if a_prod == useless_prod
134
+
135
+ refs = a_prod.references_of(useless_prod)
136
+ next if refs.empty?
137
+ referencing = a_prod
138
+ break
139
+ end
140
+
141
+ referencing.derive_step(useless_prod)
142
+ remove_production(prod_index)
143
+ end
144
+
145
+ # Create a new production that will have the symbols from digram
146
+ # as its rhs members.
147
+ def build_production_for(aDigram)
148
+ new_prod = Production.new
149
+ aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
150
+ add_production(new_prod)
151
+
152
+ return new_prod
153
+ end
154
+ end # class
155
+
156
+ end # module
157
+
158
+ # End of file