sequitur 0.1.18 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +430 -56
- data/.travis.yml +19 -13
- data/CHANGELOG.md +33 -0
- data/Gemfile +4 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -2
- data/Rakefile +2 -0
- data/appveyor.yml +20 -12
- data/examples/integer_sample.rb +8 -7
- data/examples/porridge.rb +6 -6
- data/examples/simple_case.rb +7 -6
- data/examples/symbol_sample.rb +8 -9
- data/examples/word_sample.rb +4 -3
- data/lib/sequitur/constants.rb +5 -3
- data/lib/sequitur/digram.rb +45 -43
- data/lib/sequitur/dynamic_grammar.rb +93 -95
- data/lib/sequitur/formatter/base_formatter.rb +3 -1
- data/lib/sequitur/formatter/base_text.rb +3 -1
- data/lib/sequitur/formatter/debug.rb +5 -3
- data/lib/sequitur/grammar_visitor.rb +99 -98
- data/lib/sequitur/production.rb +14 -19
- data/lib/sequitur/production_ref.rb +107 -106
- data/lib/sequitur/sequitur_grammar.rb +9 -6
- data/lib/sequitur/symbol_sequence.rb +10 -11
- data/lib/sequitur.rb +2 -0
- data/spec/sequitur/digram_spec.rb +10 -8
- data/spec/sequitur/dynamic_grammar_spec.rb +2 -0
- data/spec/sequitur/formatter/base_text_spec.rb +4 -2
- data/spec/sequitur/formatter/debug_spec.rb +4 -2
- data/spec/sequitur/grammar_visitor_spec.rb +2 -0
- data/spec/sequitur/production_ref_spec.rb +2 -0
- data/spec/sequitur/production_spec.rb +10 -8
- data/spec/sequitur/sequitur_grammar_spec.rb +13 -13
- data/spec/sequitur/symbol_sequence_spec.rb +6 -4
- data/spec/spec_helper.rb +2 -12
- metadata +18 -46
- data/.ruby-version +0 -1
- data/.simplecov +0 -7
data/lib/sequitur/production.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'digram'
|
2
4
|
require_relative 'symbol_sequence'
|
3
5
|
require_relative 'production_ref'
|
@@ -23,7 +25,7 @@ class Production
|
|
23
25
|
|
24
26
|
# Constructor.
|
25
27
|
# Build a production with an empty RHS.
|
26
|
-
def initialize
|
28
|
+
def initialize
|
27
29
|
@rhs = SymbolSequence.new
|
28
30
|
@refcount = 0
|
29
31
|
@digrams = []
|
@@ -44,7 +46,6 @@ class Production
|
|
44
46
|
return result
|
45
47
|
end
|
46
48
|
|
47
|
-
|
48
49
|
# Is the rhs empty?
|
49
50
|
# @ return true if the rhs has no members.
|
50
51
|
def empty?
|
@@ -52,20 +53,20 @@ class Production
|
|
52
53
|
end
|
53
54
|
|
54
55
|
# Increment the reference count by one.
|
55
|
-
def incr_refcount
|
56
|
+
def incr_refcount
|
56
57
|
@refcount += 1
|
57
58
|
end
|
58
59
|
|
59
60
|
# Decrement the reference count by one.
|
60
|
-
def decr_refcount
|
61
|
+
def decr_refcount
|
61
62
|
raise StandardError, 'Internal error' if @refcount.zero?
|
63
|
+
|
62
64
|
@refcount -= 1
|
63
65
|
end
|
64
66
|
|
65
|
-
|
66
67
|
# Select the references to production appearing in the rhs.
|
67
68
|
# @return [Array of ProductionRef]
|
68
|
-
def references
|
69
|
+
def references
|
69
70
|
return rhs.references
|
70
71
|
end
|
71
72
|
|
@@ -77,10 +78,9 @@ class Production
|
|
77
78
|
return rhs.references_of(real_prod)
|
78
79
|
end
|
79
80
|
|
80
|
-
|
81
81
|
# Enumerate the digrams appearing in the right-hand side (rhs)
|
82
82
|
# @return [Array] the list of digrams found in rhs of this production.
|
83
|
-
def recalc_digrams
|
83
|
+
def recalc_digrams
|
84
84
|
return [] if rhs.size < 2
|
85
85
|
|
86
86
|
result = []
|
@@ -88,20 +88,17 @@ class Production
|
|
88
88
|
@digrams = result
|
89
89
|
end
|
90
90
|
|
91
|
-
|
92
|
-
|
93
91
|
# Does the rhs have exactly one digram only (= 2 symbols)?
|
94
92
|
# @return [true/false] true when the rhs contains exactly two symbols.
|
95
93
|
def single_digram?
|
96
94
|
return rhs.size == 2
|
97
95
|
end
|
98
96
|
|
99
|
-
|
100
97
|
# Detect whether the last digram occurs twice
|
101
98
|
# Assumption: when a digram occurs twice in a production then it must occur
|
102
99
|
# at the end of the rhs
|
103
100
|
# @return [true/false] true when the digram occurs twice in rhs.
|
104
|
-
def repeated_digram?
|
101
|
+
def repeated_digram?
|
105
102
|
return false if rhs.size < 3
|
106
103
|
|
107
104
|
my_digrams = digrams
|
@@ -113,17 +110,16 @@ class Production
|
|
113
110
|
|
114
111
|
# Retrieve the last digram appearing in the RHS (if any).
|
115
112
|
# @return [Digram] last digram in the rhs otherwise nil.
|
116
|
-
def last_digram
|
113
|
+
def last_digram
|
117
114
|
result = digrams.empty? ? nil : digrams.last
|
118
115
|
return result
|
119
116
|
end
|
120
117
|
|
121
|
-
|
122
118
|
# Emit a text representation of the production rule.
|
123
119
|
# Text is of the form:
|
124
120
|
# object id of production : rhs as space-separated sequence of symbols.
|
125
121
|
# @return [String]
|
126
|
-
def to_string
|
122
|
+
def to_string
|
127
123
|
return "#{object_id} : #{rhs.to_string}."
|
128
124
|
end
|
129
125
|
|
@@ -150,7 +146,7 @@ class Production
|
|
150
146
|
|
151
147
|
# Clear the right-hand side.
|
152
148
|
# Any referenced production has its reference counter decremented.
|
153
|
-
def clear_rhs
|
149
|
+
def clear_rhs
|
154
150
|
rhs.clear
|
155
151
|
end
|
156
152
|
|
@@ -168,9 +164,10 @@ class Production
|
|
168
164
|
# p.positions_of(a, a) # => [0, 3]
|
169
165
|
def positions_of(symb1, symb2)
|
170
166
|
# Find the positions where the digram occur in rhs
|
171
|
-
indices = [
|
167
|
+
indices = [-2] # Dummy index!
|
172
168
|
(0...rhs.size).each do |i|
|
173
169
|
next if i == indices.last + 1
|
170
|
+
|
174
171
|
indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
|
175
172
|
end
|
176
173
|
|
@@ -179,7 +176,6 @@ class Production
|
|
179
176
|
return indices
|
180
177
|
end
|
181
178
|
|
182
|
-
|
183
179
|
# Given that the production P passed as argument has exactly 2 symbols
|
184
180
|
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
185
181
|
# s1 s2 by a reference to P.
|
@@ -217,7 +213,6 @@ class Production
|
|
217
213
|
recalc_digrams
|
218
214
|
end
|
219
215
|
|
220
|
-
|
221
216
|
# Part of the 'visitee' role in Visitor design pattern.
|
222
217
|
# @param aVisitor[GrammarVisitor]
|
223
218
|
def accept(aVisitor)
|
@@ -1,115 +1,116 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
-
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
-
# side of a production P1 and that refers to a production P2.
|
6
|
-
# Every time a production P2 appears in the left-hand side of
|
7
|
-
# production P1, this is implemented by inserting a production reference to P2
|
8
|
-
# in the appropriate position in the RHS of P1.
|
9
|
-
# In the literature, production references are also called non terminal
|
10
|
-
# symbols
|
11
|
-
# @example
|
12
|
-
# # Given a production rule...
|
13
|
-
# prod = Sequitur::Production.new
|
14
|
-
# puts prod.refcount # outputs 0
|
15
|
-
# # ... Build a reference to it
|
16
|
-
# ref = Sequitur::ProductionRef.new(prod)
|
17
|
-
# # ... Production reference count is updated...
|
18
|
-
# puts prod.refcount # outputs 1
|
19
|
-
class ProductionRef
|
20
|
-
# Link to the production to reference.
|
21
|
-
attr_reader(:production)
|
22
|
-
|
23
|
-
# Constructor
|
24
|
-
# @param target [Production or ProductionRef]
|
25
|
-
# The production that is being referenced.
|
26
|
-
def initialize(target)
|
27
|
-
bind_to(target)
|
28
|
-
end
|
29
|
-
|
30
|
-
# Copy constructor invoked by dup or clone methods.
|
31
|
-
# @param orig [ProductionRef]
|
4
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
5
|
+
# side of a production P1 and that refers to a production P2.
|
6
|
+
# Every time a production P2 appears in the left-hand side of
|
7
|
+
# production P1, this is implemented by inserting a production reference to P2
|
8
|
+
# in the appropriate position in the RHS of P1.
|
9
|
+
# In the literature, production references are also called non terminal
|
10
|
+
# symbols
|
32
11
|
# @example
|
12
|
+
# # Given a production rule...
|
33
13
|
# prod = Sequitur::Production.new
|
14
|
+
# puts prod.refcount # outputs 0
|
15
|
+
# # ... Build a reference to it
|
34
16
|
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
-
#
|
36
|
-
# puts prod.refcount # outputs
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
17
|
+
# # ... Production reference count is updated...
|
18
|
+
# puts prod.refcount # outputs 1
|
19
|
+
class ProductionRef
|
20
|
+
# Link to the production to reference.
|
21
|
+
attr_reader(:production)
|
22
|
+
|
23
|
+
# Constructor
|
24
|
+
# @param target [Production or ProductionRef]
|
25
|
+
# The production that is being referenced.
|
26
|
+
def initialize(target)
|
27
|
+
bind_to(target)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Copy constructor invoked by dup or clone methods.
|
31
|
+
# @param orig [ProductionRef]
|
32
|
+
# @example
|
33
|
+
# prod = Sequitur::Production.new
|
34
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
35
|
+
# copy_ref = ref.dup
|
36
|
+
# puts prod.refcount # outputs 2
|
37
|
+
def initialize_copy(orig)
|
38
|
+
@production = nil
|
39
|
+
bind_to(orig.production)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Emit the text representation of a production reference.
|
43
|
+
# @return [String]
|
44
|
+
def to_s
|
45
|
+
return production.object_id.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
alias to_string to_s
|
49
|
+
|
50
|
+
|
51
|
+
# Equality testing.
|
52
|
+
# A production ref is equal to another one when its
|
53
|
+
# refers to the same production or when it is compared to
|
54
|
+
# the production it refers to.
|
55
|
+
# @param other [ProductionRef]
|
56
|
+
# @return [true / false]
|
57
|
+
def ==(other)
|
58
|
+
return true if object_id == other.object_id
|
59
|
+
|
60
|
+
result = if other.is_a?(ProductionRef)
|
61
|
+
(production == other.production)
|
62
|
+
else
|
63
|
+
(production == other)
|
64
|
+
end
|
65
|
+
|
66
|
+
return result
|
67
|
+
end
|
68
|
+
|
69
|
+
# Produce a hash value.
|
70
|
+
# A reference has no identity on its own,
|
71
|
+
# the method returns the hash value of the
|
72
|
+
# referenced production
|
73
|
+
# @return [Fixnum] the hash value
|
74
|
+
def hash
|
75
|
+
raise StandardError, 'Nil production' if production.nil?
|
76
|
+
|
77
|
+
return production.hash
|
78
|
+
end
|
79
|
+
|
80
|
+
# Make this reference point to the given production.
|
81
|
+
# @param aProduction [Production or ProductionRef] the production
|
82
|
+
# to refer to
|
83
|
+
def bind_to(aProduction)
|
84
|
+
return if aProduction == @production
|
85
|
+
|
86
|
+
production&.decr_refcount
|
87
|
+
unless aProduction.kind_of?(Production)
|
88
|
+
raise StandardError, "Illegal production type #{aProduction.class}"
|
89
|
+
end
|
90
|
+
|
91
|
+
@production = aProduction
|
92
|
+
production.incr_refcount
|
93
|
+
end
|
94
|
+
|
95
|
+
# Clear the reference to the target production.
|
96
|
+
def unbind
|
97
|
+
production.decr_refcount
|
98
|
+
@production = nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Check that the this object doesn't refer to any production.
|
102
|
+
# @return [true / false] true when this object doesn't
|
103
|
+
# point to a production.
|
104
|
+
def unbound?
|
105
|
+
return production.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
109
|
+
# @param aVisitor [GrammarVisitor] the visitor
|
110
|
+
def accept(aVisitor)
|
111
|
+
aVisitor.visit_prod_ref(self)
|
88
112
|
end
|
89
|
-
|
90
|
-
production.incr_refcount
|
91
|
-
end
|
92
|
-
|
93
|
-
|
94
|
-
# Clear the reference to the target production.
|
95
|
-
def unbind()
|
96
|
-
production.decr_refcount
|
97
|
-
@production = nil
|
98
|
-
end
|
99
|
-
|
100
|
-
# Check that the this object doesn't refer to any production.
|
101
|
-
# @return [true / false] true when this object doesn't
|
102
|
-
# point to a production.
|
103
|
-
def unbound?()
|
104
|
-
return production.nil?
|
105
|
-
end
|
106
|
-
|
107
|
-
# Part of the 'visitee' role in the Visitor design pattern.
|
108
|
-
# @param aVisitor [GrammarVisitor] the visitor
|
109
|
-
def accept(aVisitor)
|
110
|
-
aVisitor.visit_prod_ref(self)
|
111
|
-
end
|
112
|
-
end # class
|
113
|
+
end # class
|
113
114
|
end # module
|
114
115
|
|
115
116
|
# End of file
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'dynamic_grammar'
|
2
4
|
|
3
5
|
|
@@ -27,8 +29,8 @@ class SequiturGrammar < DynamicGrammar
|
|
27
29
|
CollisionDiagnosis = Struct.new(
|
28
30
|
:collision_found, # true if collision detected
|
29
31
|
:digram, # The digram involved in a collision
|
30
|
-
:productions # The productions where the digram occurs
|
31
|
-
|
32
|
+
:productions) # The productions where the digram occurs
|
33
|
+
|
32
34
|
|
33
35
|
|
34
36
|
# Assuming that a new input token was added to the start production,
|
@@ -43,7 +45,7 @@ class SequiturGrammar < DynamicGrammar
|
|
43
45
|
# remove P from grammar
|
44
46
|
# end
|
45
47
|
# end until digram unicity and rule utility are met
|
46
|
-
def enforce_rules
|
48
|
+
def enforce_rules
|
47
49
|
loop do
|
48
50
|
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
49
51
|
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
@@ -61,7 +63,7 @@ class SequiturGrammar < DynamicGrammar
|
|
61
63
|
# Return an empty Hash if each digram appears once.
|
62
64
|
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
63
65
|
# Where Pi, Pk are two productions where the digram occurs.
|
64
|
-
def detect_collision
|
66
|
+
def detect_collision
|
65
67
|
diagnosis = CollisionDiagnosis.new(false)
|
66
68
|
found_so_far = {}
|
67
69
|
productions.each do |a_prod|
|
@@ -109,9 +111,9 @@ class SequiturGrammar < DynamicGrammar
|
|
109
111
|
end
|
110
112
|
|
111
113
|
# Return a production that is used less than twice in the grammar.
|
112
|
-
def detect_useless_production
|
114
|
+
def detect_useless_production
|
113
115
|
useless = productions.index { |prod| prod.refcount < 2 }
|
114
|
-
useless = nil if useless
|
116
|
+
useless = nil if useless&.zero?
|
115
117
|
|
116
118
|
return useless
|
117
119
|
end
|
@@ -131,6 +133,7 @@ class SequiturGrammar < DynamicGrammar
|
|
131
133
|
|
132
134
|
refs = a_prod.references_of(useless_prod)
|
133
135
|
next if refs.empty?
|
136
|
+
|
134
137
|
referencing = a_prod
|
135
138
|
break
|
136
139
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
2
4
|
# Represents a sequence (concatenation) of grammar symbols
|
3
5
|
# as they appear in rhs of productions
|
@@ -6,7 +8,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
6
8
|
attr_reader(:symbols)
|
7
9
|
|
8
10
|
# Create an empty sequence
|
9
|
-
def initialize
|
11
|
+
def initialize
|
10
12
|
@symbols = []
|
11
13
|
end
|
12
14
|
|
@@ -21,7 +23,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
21
23
|
end
|
22
24
|
|
23
25
|
# Clear the symbol sequence.
|
24
|
-
def clear
|
26
|
+
def clear
|
25
27
|
refs = references
|
26
28
|
refs.each(&:unbind)
|
27
29
|
@symbols = []
|
@@ -30,13 +32,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
30
32
|
|
31
33
|
# Tell whether the sequence is empty.
|
32
34
|
# @return [true / false] true only if the sequence has no symbol in it.
|
33
|
-
def empty?
|
35
|
+
def empty?
|
34
36
|
return symbols.empty?
|
35
37
|
end
|
36
38
|
|
37
39
|
# Count the number of elements in the sequence.
|
38
40
|
# @return [Fixnum] the number of elements
|
39
|
-
def size
|
41
|
+
def size
|
40
42
|
return symbols.size
|
41
43
|
end
|
42
44
|
|
@@ -76,29 +78,27 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
76
78
|
return same
|
77
79
|
end
|
78
80
|
|
79
|
-
|
80
81
|
# Select the references to production appearing in the rhs.
|
81
82
|
# @return [Array of ProductionRef]
|
82
|
-
def references
|
83
|
+
def references
|
83
84
|
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
84
85
|
return @memo_references
|
85
86
|
end
|
86
87
|
|
87
|
-
|
88
88
|
# Select the references of the given production appearing in the rhs.
|
89
89
|
# @param aProduction [Production]
|
90
90
|
# @return [Array of ProductionRef]
|
91
91
|
def references_of(aProduction)
|
92
92
|
return [] if references.empty?
|
93
|
+
|
93
94
|
result = references.select { |a_ref| a_ref == aProduction }
|
94
95
|
return result
|
95
96
|
end
|
96
97
|
|
97
|
-
|
98
98
|
# Emit a text representation of the symbol sequence.
|
99
99
|
# Text is of the form: space-separated sequence of symbols.
|
100
100
|
# @return [String]
|
101
|
-
def to_string
|
101
|
+
def to_string
|
102
102
|
rhs_text = symbols.map do |elem|
|
103
103
|
case elem
|
104
104
|
when String then "'#{elem}'"
|
@@ -150,7 +150,6 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
150
150
|
symbols.delete_at(position)
|
151
151
|
end
|
152
152
|
|
153
|
-
|
154
153
|
# Part of the 'visitee' role in Visitor design pattern.
|
155
154
|
# @param aVisitor[GrammarVisitor]
|
156
155
|
def accept(aVisitor)
|
@@ -170,7 +169,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
170
169
|
|
171
170
|
private
|
172
171
|
|
173
|
-
def invalidate_refs
|
172
|
+
def invalidate_refs
|
174
173
|
@memo_references = nil
|
175
174
|
@lookup_references = nil
|
176
175
|
end
|