sequitur 0.1.18 → 0.1.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +430 -56
- data/.travis.yml +19 -13
- data/CHANGELOG.md +33 -0
- data/Gemfile +4 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -2
- data/Rakefile +2 -0
- data/appveyor.yml +20 -12
- data/examples/integer_sample.rb +8 -7
- data/examples/porridge.rb +6 -6
- data/examples/simple_case.rb +7 -6
- data/examples/symbol_sample.rb +8 -9
- data/examples/word_sample.rb +4 -3
- data/lib/sequitur/constants.rb +5 -3
- data/lib/sequitur/digram.rb +45 -43
- data/lib/sequitur/dynamic_grammar.rb +93 -95
- data/lib/sequitur/formatter/base_formatter.rb +3 -1
- data/lib/sequitur/formatter/base_text.rb +3 -1
- data/lib/sequitur/formatter/debug.rb +5 -3
- data/lib/sequitur/grammar_visitor.rb +99 -98
- data/lib/sequitur/production.rb +14 -19
- data/lib/sequitur/production_ref.rb +107 -106
- data/lib/sequitur/sequitur_grammar.rb +9 -6
- data/lib/sequitur/symbol_sequence.rb +10 -11
- data/lib/sequitur.rb +2 -0
- data/spec/sequitur/digram_spec.rb +10 -8
- data/spec/sequitur/dynamic_grammar_spec.rb +2 -0
- data/spec/sequitur/formatter/base_text_spec.rb +4 -2
- data/spec/sequitur/formatter/debug_spec.rb +4 -2
- data/spec/sequitur/grammar_visitor_spec.rb +2 -0
- data/spec/sequitur/production_ref_spec.rb +2 -0
- data/spec/sequitur/production_spec.rb +10 -8
- data/spec/sequitur/sequitur_grammar_spec.rb +13 -13
- data/spec/sequitur/symbol_sequence_spec.rb +6 -4
- data/spec/spec_helper.rb +2 -12
- metadata +18 -46
- data/.ruby-version +0 -1
- data/.simplecov +0 -7
data/lib/sequitur/production.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'digram'
|
2
4
|
require_relative 'symbol_sequence'
|
3
5
|
require_relative 'production_ref'
|
@@ -23,7 +25,7 @@ class Production
|
|
23
25
|
|
24
26
|
# Constructor.
|
25
27
|
# Build a production with an empty RHS.
|
26
|
-
def initialize
|
28
|
+
def initialize
|
27
29
|
@rhs = SymbolSequence.new
|
28
30
|
@refcount = 0
|
29
31
|
@digrams = []
|
@@ -44,7 +46,6 @@ class Production
|
|
44
46
|
return result
|
45
47
|
end
|
46
48
|
|
47
|
-
|
48
49
|
# Is the rhs empty?
|
49
50
|
# @ return true if the rhs has no members.
|
50
51
|
def empty?
|
@@ -52,20 +53,20 @@ class Production
|
|
52
53
|
end
|
53
54
|
|
54
55
|
# Increment the reference count by one.
|
55
|
-
def incr_refcount
|
56
|
+
def incr_refcount
|
56
57
|
@refcount += 1
|
57
58
|
end
|
58
59
|
|
59
60
|
# Decrement the reference count by one.
|
60
|
-
def decr_refcount
|
61
|
+
def decr_refcount
|
61
62
|
raise StandardError, 'Internal error' if @refcount.zero?
|
63
|
+
|
62
64
|
@refcount -= 1
|
63
65
|
end
|
64
66
|
|
65
|
-
|
66
67
|
# Select the references to production appearing in the rhs.
|
67
68
|
# @return [Array of ProductionRef]
|
68
|
-
def references
|
69
|
+
def references
|
69
70
|
return rhs.references
|
70
71
|
end
|
71
72
|
|
@@ -77,10 +78,9 @@ class Production
|
|
77
78
|
return rhs.references_of(real_prod)
|
78
79
|
end
|
79
80
|
|
80
|
-
|
81
81
|
# Enumerate the digrams appearing in the right-hand side (rhs)
|
82
82
|
# @return [Array] the list of digrams found in rhs of this production.
|
83
|
-
def recalc_digrams
|
83
|
+
def recalc_digrams
|
84
84
|
return [] if rhs.size < 2
|
85
85
|
|
86
86
|
result = []
|
@@ -88,20 +88,17 @@ class Production
|
|
88
88
|
@digrams = result
|
89
89
|
end
|
90
90
|
|
91
|
-
|
92
|
-
|
93
91
|
# Does the rhs have exactly one digram only (= 2 symbols)?
|
94
92
|
# @return [true/false] true when the rhs contains exactly two symbols.
|
95
93
|
def single_digram?
|
96
94
|
return rhs.size == 2
|
97
95
|
end
|
98
96
|
|
99
|
-
|
100
97
|
# Detect whether the last digram occurs twice
|
101
98
|
# Assumption: when a digram occurs twice in a production then it must occur
|
102
99
|
# at the end of the rhs
|
103
100
|
# @return [true/false] true when the digram occurs twice in rhs.
|
104
|
-
def repeated_digram?
|
101
|
+
def repeated_digram?
|
105
102
|
return false if rhs.size < 3
|
106
103
|
|
107
104
|
my_digrams = digrams
|
@@ -113,17 +110,16 @@ class Production
|
|
113
110
|
|
114
111
|
# Retrieve the last digram appearing in the RHS (if any).
|
115
112
|
# @return [Digram] last digram in the rhs otherwise nil.
|
116
|
-
def last_digram
|
113
|
+
def last_digram
|
117
114
|
result = digrams.empty? ? nil : digrams.last
|
118
115
|
return result
|
119
116
|
end
|
120
117
|
|
121
|
-
|
122
118
|
# Emit a text representation of the production rule.
|
123
119
|
# Text is of the form:
|
124
120
|
# object id of production : rhs as space-separated sequence of symbols.
|
125
121
|
# @return [String]
|
126
|
-
def to_string
|
122
|
+
def to_string
|
127
123
|
return "#{object_id} : #{rhs.to_string}."
|
128
124
|
end
|
129
125
|
|
@@ -150,7 +146,7 @@ class Production
|
|
150
146
|
|
151
147
|
# Clear the right-hand side.
|
152
148
|
# Any referenced production has its reference counter decremented.
|
153
|
-
def clear_rhs
|
149
|
+
def clear_rhs
|
154
150
|
rhs.clear
|
155
151
|
end
|
156
152
|
|
@@ -168,9 +164,10 @@ class Production
|
|
168
164
|
# p.positions_of(a, a) # => [0, 3]
|
169
165
|
def positions_of(symb1, symb2)
|
170
166
|
# Find the positions where the digram occur in rhs
|
171
|
-
indices = [
|
167
|
+
indices = [-2] # Dummy index!
|
172
168
|
(0...rhs.size).each do |i|
|
173
169
|
next if i == indices.last + 1
|
170
|
+
|
174
171
|
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
175
172
|
end
|
176
173
|
|
@@ -179,7 +176,6 @@ class Production
|
|
179
176
|
return indices
|
180
177
|
end
|
181
178
|
|
182
|
-
|
183
179
|
# Given that the production P passed as argument has exactly 2 symbols
|
184
180
|
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
185
181
|
# s1 s2 by a reference to P.
|
@@ -217,7 +213,6 @@ class Production
|
|
217
213
|
recalc_digrams
|
218
214
|
end
|
219
215
|
|
220
|
-
|
221
216
|
# Part of the 'visitee' role in Visitor design pattern.
|
222
217
|
# @param aVisitor[GrammarVisitor]
|
223
218
|
def accept(aVisitor)
|
@@ -1,115 +1,116 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
-
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
-
# side of a production P1 and that refers to a production P2.
|
6
|
-
# Every time a production P2 appears in the left-hand side of
|
7
|
-
# production P1, this is implemented by inserting a production reference to P2
|
8
|
-
# in the appropriate position in the RHS of P1.
|
9
|
-
# In the literature, production references are also called non terminal
|
10
|
-
# symbols
|
11
|
-
# @example
|
12
|
-
# # Given a production rule...
|
13
|
-
# prod = Sequitur::Production.new
|
14
|
-
# puts prod.refcount # outputs 0
|
15
|
-
# # ... Build a reference to it
|
16
|
-
# ref = Sequitur::ProductionRef.new(prod)
|
17
|
-
# # ... Production reference count is updated...
|
18
|
-
# puts prod.refcount # outputs 1
|
19
|
-
class ProductionRef
|
20
|
-
# Link to the production to reference.
|
21
|
-
attr_reader(:production)
|
22
|
-
|
23
|
-
# Constructor
|
24
|
-
# @param target [Production or ProductionRef]
|
25
|
-
# The production that is being referenced.
|
26
|
-
def initialize(target)
|
27
|
-
bind_to(target)
|
28
|
-
end
|
29
|
-
|
30
|
-
# Copy constructor invoked by dup or clone methods.
|
31
|
-
# @param orig [ProductionRef]
|
4
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
+
# side of a production P1 and that refers to a production P2.
|
6
|
+
# Every time a production P2 appears in the left-hand side of
|
7
|
+
# production P1, this is implemented by inserting a production reference to P2
|
8
|
+
# in the appropriate position in the RHS of P1.
|
9
|
+
# In the literature, production references are also called non terminal
|
10
|
+
# symbols
|
32
11
|
# @example
|
12
|
+
# # Given a production rule...
|
33
13
|
# prod = Sequitur::Production.new
|
14
|
+
# puts prod.refcount # outputs 0
|
15
|
+
# # ... Build a reference to it
|
34
16
|
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
-
#
|
36
|
-
# puts prod.refcount # outputs
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
17
|
+
# # ... Production reference count is updated...
|
18
|
+
# puts prod.refcount # outputs 1
|
19
|
+
class ProductionRef
|
20
|
+
# Link to the production to reference.
|
21
|
+
attr_reader(:production)
|
22
|
+
|
23
|
+
# Constructor
|
24
|
+
# @param target [Production or ProductionRef]
|
25
|
+
# The production that is being referenced.
|
26
|
+
def initialize(target)
|
27
|
+
bind_to(target)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Copy constructor invoked by dup or clone methods.
|
31
|
+
# @param orig [ProductionRef]
|
32
|
+
# @example
|
33
|
+
# prod = Sequitur::Production.new
|
34
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
+
# copy_ref = ref.dup
|
36
|
+
# puts prod.refcount # outputs 2
|
37
|
+
def initialize_copy(orig)
|
38
|
+
@production = nil
|
39
|
+
bind_to(orig.production)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Emit the text representation of a production reference.
|
43
|
+
# @return [String]
|
44
|
+
def to_s
|
45
|
+
return production.object_id.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
alias to_string to_s
|
49
|
+
|
50
|
+
|
51
|
+
# Equality testing.
|
52
|
+
# A production ref is equal to another one when its
|
53
|
+
# refers to the same production or when it is compared to
|
54
|
+
# the production it refers to.
|
55
|
+
# @param other [ProductionRef]
|
56
|
+
# @return [true / false]
|
57
|
+
def ==(other)
|
58
|
+
return true if object_id == other.object_id
|
59
|
+
|
60
|
+
result = if other.is_a?(ProductionRef)
|
61
|
+
(production == other.production)
|
62
|
+
else
|
63
|
+
(production == other)
|
64
|
+
end
|
65
|
+
|
66
|
+
return result
|
67
|
+
end
|
68
|
+
|
69
|
+
# Produce a hash value.
|
70
|
+
# A reference has no identity on its own,
|
71
|
+
# the method returns the hash value of the
|
72
|
+
# referenced production
|
73
|
+
# @return [Fixnum] the hash value
|
74
|
+
def hash
|
75
|
+
raise StandardError, 'Nil production' if production.nil?
|
76
|
+
|
77
|
+
return production.hash
|
78
|
+
end
|
79
|
+
|
80
|
+
# Make this reference point to the given production.
|
81
|
+
# @param aProduction [Production or ProductionRef] the production
|
82
|
+
# to refer to
|
83
|
+
def bind_to(aProduction)
|
84
|
+
return if aProduction == @production
|
85
|
+
|
86
|
+
production&.decr_refcount
|
87
|
+
unless aProduction.kind_of?(Production)
|
88
|
+
raise StandardError, "Illegal production type #{aProduction.class}"
|
89
|
+
end
|
90
|
+
|
91
|
+
@production = aProduction
|
92
|
+
production.incr_refcount
|
93
|
+
end
|
94
|
+
|
95
|
+
# Clear the reference to the target production.
|
96
|
+
def unbind
|
97
|
+
production.decr_refcount
|
98
|
+
@production = nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Check that the this object doesn't refer to any production.
|
102
|
+
# @return [true / false] true when this object doesn't
|
103
|
+
# point to a production.
|
104
|
+
def unbound?
|
105
|
+
return production.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
109
|
+
# @param aVisitor [GrammarVisitor] the visitor
|
110
|
+
def accept(aVisitor)
|
111
|
+
aVisitor.visit_prod_ref(self)
|
88
112
|
end
|
89
|
-
|
90
|
-
production.incr_refcount
|
91
|
-
end
|
92
|
-
|
93
|
-
|
94
|
-
# Clear the reference to the target production.
|
95
|
-
def unbind()
|
96
|
-
production.decr_refcount
|
97
|
-
@production = nil
|
98
|
-
end
|
99
|
-
|
100
|
-
# Check that the this object doesn't refer to any production.
|
101
|
-
# @return [true / false] true when this object doesn't
|
102
|
-
# point to a production.
|
103
|
-
def unbound?()
|
104
|
-
return production.nil?
|
105
|
-
end
|
106
|
-
|
107
|
-
# Part of the 'visitee' role in the Visitor design pattern.
|
108
|
-
# @param aVisitor [GrammarVisitor] the visitor
|
109
|
-
def accept(aVisitor)
|
110
|
-
aVisitor.visit_prod_ref(self)
|
111
|
-
end
|
112
|
-
end # class
|
113
|
+
end # class
|
113
114
|
end # module
|
114
115
|
|
115
116
|
# End of file
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'dynamic_grammar'
|
2
4
|
|
3
5
|
|
@@ -27,8 +29,8 @@ class SequiturGrammar < DynamicGrammar
|
|
27
29
|
CollisionDiagnosis = Struct.new(
|
28
30
|
:collision_found, # true if collision detected
|
29
31
|
:digram, # The digram involved in a collision
|
30
|
-
:productions # The productions where the digram occurs
|
31
|
-
|
32
|
+
:productions) # The productions where the digram occurs
|
33
|
+
|
32
34
|
|
33
35
|
|
34
36
|
# Assuming that a new input token was added to the start production,
|
@@ -43,7 +45,7 @@ class SequiturGrammar < DynamicGrammar
|
|
43
45
|
# remove P from grammar
|
44
46
|
# end
|
45
47
|
# end until digram unicity and rule utility are met
|
46
|
-
def enforce_rules
|
48
|
+
def enforce_rules
|
47
49
|
loop do
|
48
50
|
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
49
51
|
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
@@ -61,7 +63,7 @@ class SequiturGrammar < DynamicGrammar
|
|
61
63
|
# Return an empty Hash if each digram appears once.
|
62
64
|
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
63
65
|
# Where Pi, Pk are two productions where the digram occurs.
|
64
|
-
def detect_collision
|
66
|
+
def detect_collision
|
65
67
|
diagnosis = CollisionDiagnosis.new(false)
|
66
68
|
found_so_far = {}
|
67
69
|
productions.each do |a_prod|
|
@@ -109,9 +111,9 @@ class SequiturGrammar < DynamicGrammar
|
|
109
111
|
end
|
110
112
|
|
111
113
|
# Return a production that is used less than twice in the grammar.
|
112
|
-
def detect_useless_production
|
114
|
+
def detect_useless_production
|
113
115
|
useless = productions.index { |prod| prod.refcount < 2 }
|
114
|
-
useless = nil if useless
|
116
|
+
useless = nil if useless&.zero?
|
115
117
|
|
116
118
|
return useless
|
117
119
|
end
|
@@ -131,6 +133,7 @@ class SequiturGrammar < DynamicGrammar
|
|
131
133
|
|
132
134
|
refs = a_prod.references_of(useless_prod)
|
133
135
|
next if refs.empty?
|
136
|
+
|
134
137
|
referencing = a_prod
|
135
138
|
break
|
136
139
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
2
4
|
# Represents a sequence (concatenation) of grammar symbols
|
3
5
|
# as they appear in rhs of productions
|
@@ -6,7 +8,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
6
8
|
attr_reader(:symbols)
|
7
9
|
|
8
10
|
# Create an empty sequence
|
9
|
-
def initialize
|
11
|
+
def initialize
|
10
12
|
@symbols = []
|
11
13
|
end
|
12
14
|
|
@@ -21,7 +23,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
21
23
|
end
|
22
24
|
|
23
25
|
# Clear the symbol sequence.
|
24
|
-
def clear
|
26
|
+
def clear
|
25
27
|
refs = references
|
26
28
|
refs.each(&:unbind)
|
27
29
|
@symbols = []
|
@@ -30,13 +32,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
30
32
|
|
31
33
|
# Tell whether the sequence is empty.
|
32
34
|
# @return [true / false] true only if the sequence has no symbol in it.
|
33
|
-
def empty?
|
35
|
+
def empty?
|
34
36
|
return symbols.empty?
|
35
37
|
end
|
36
38
|
|
37
39
|
# Count the number of elements in the sequence.
|
38
40
|
# @return [Fixnum] the number of elements
|
39
|
-
def size
|
41
|
+
def size
|
40
42
|
return symbols.size
|
41
43
|
end
|
42
44
|
|
@@ -76,29 +78,27 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
76
78
|
return same
|
77
79
|
end
|
78
80
|
|
79
|
-
|
80
81
|
# Select the references to production appearing in the rhs.
|
81
82
|
# @return [Array of ProductionRef]
|
82
|
-
def references
|
83
|
+
def references
|
83
84
|
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
84
85
|
return @memo_references
|
85
86
|
end
|
86
87
|
|
87
|
-
|
88
88
|
# Select the references of the given production appearing in the rhs.
|
89
89
|
# @param aProduction [Production]
|
90
90
|
# @return [Array of ProductionRef]
|
91
91
|
def references_of(aProduction)
|
92
92
|
return [] if references.empty?
|
93
|
+
|
93
94
|
result = references.select { |a_ref| a_ref == aProduction }
|
94
95
|
return result
|
95
96
|
end
|
96
97
|
|
97
|
-
|
98
98
|
# Emit a text representation of the symbol sequence.
|
99
99
|
# Text is of the form: space-separated sequence of symbols.
|
100
100
|
# @return [String]
|
101
|
-
def to_string
|
101
|
+
def to_string
|
102
102
|
rhs_text = symbols.map do |elem|
|
103
103
|
case elem
|
104
104
|
when String then "'#{elem}'"
|
@@ -150,7 +150,6 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
150
150
|
symbols.delete_at(position)
|
151
151
|
end
|
152
152
|
|
153
|
-
|
154
153
|
# Part of the 'visitee' role in Visitor design pattern.
|
155
154
|
# @param aVisitor[GrammarVisitor]
|
156
155
|
def accept(aVisitor)
|
@@ -170,7 +169,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
170
169
|
|
171
170
|
private
|
172
171
|
|
173
|
-
def invalidate_refs
|
172
|
+
def invalidate_refs
|
174
173
|
@memo_references = nil
|
175
174
|
@lookup_references = nil
|
176
175
|
end
|