sequitur 0.1.09 → 0.1.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +3 -0
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +1 -1
- data/lib/sequitur/grammar_visitor.rb +8 -7
- data/lib/sequitur/production.rb +4 -4
- data/lib/sequitur/sequitur_grammar.rb +24 -21
- data/lib/sequitur/symbol_sequence.rb +42 -10
- data/spec/sequitur/production_spec.rb +4 -3
- data/spec/sequitur/symbol_sequence_spec.rb +46 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZGMxY2RmOWZlOWI3MzljMjNmZmEyZDFjZDUzZDg2YzQzNzM2MmRhZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzNhZGJmNWY4YTg3MWY3ODI5YzE4ZTg5MTUyNTlkMGFmMzhlNjdiYw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NWY1NDhiZjUzNjJjNTdlMmE2ZjI0ODc3MDNlMjQ2MGM3ZjEzMWQxNzUxZDgz
|
10
|
+
YmE4ZmFlNzI4NDc2NGE0YjQzOTJhNmViYmQ4MThjZTg0YmEyN2JiOTlkMzk4
|
11
|
+
MTc4MWUwMWE3ZWY0NTQ0Y2ZlYjU3MmVkODNiZjUwMzgxZWNlMDQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWI3N2U3ZGU5N2Y4ZTEzNDdmYjQyYjExOWIzYWRjMWE0ZDUxYmM2MjI4YzU0
|
14
|
+
ODZiZTQ3ZDlkMWMxMmY2ZWQwM2JjNmZiNmU1ZDRhMTFhMjY4ZjI5MjJmNWFl
|
15
|
+
MDYxZmU0ZWUwOTdhNDBkODE4NDJjNzQ0MGRmMWQwMDIxMjkwMTI=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
### 0.1.10 / 2014-10-05
|
2
|
+
* [CHANGE] Code refactoring for performance. Impacted classes: `SequiturGrammar`, `SymbolSequence` and `Production`.
|
3
|
+
|
1
4
|
### 0.1.09 / 2014-10-03
|
2
5
|
* [NEW] Class `SymbolSequence`. Part of code refactoring that reduces code complexity reported by CodeClimate.
|
3
6
|
* [CHANGE] Class `Production` refactored to use a SymbolSequence instance as its rhs.
|
data/lib/sequitur/constants.rb
CHANGED
data/lib/sequitur/digram.rb
CHANGED
@@ -27,7 +27,7 @@ class Digram
|
|
27
27
|
# the sequence symbol1 symbol2 appears.
|
28
28
|
def initialize(symbol1, symbol2, aProduction)
|
29
29
|
@symbols = [symbol1, symbol2]
|
30
|
-
@key =
|
30
|
+
@key = symbol1.hash.to_s(16) + ':' + symbol2.hash.to_s(16)
|
31
31
|
@production = aProduction
|
32
32
|
end
|
33
33
|
|
@@ -48,14 +48,14 @@ class GrammarVisitor
|
|
48
48
|
def start_visit_production(aProduction)
|
49
49
|
broadcast(:before_production, aProduction)
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
# Visit event. The visitor is about to visit the given rhs of production.
|
53
|
-
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
53
|
+
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
54
54
|
def start_visit_rhs(rhs)
|
55
|
-
broadcast(:before_rhs, rhs)
|
55
|
+
broadcast(:before_rhs, rhs)
|
56
56
|
end
|
57
57
|
|
58
|
-
# Visit event. The visitor is visiting the
|
58
|
+
# Visit event. The visitor is visiting the
|
59
59
|
# given reference production (= non-terminal symbol).
|
60
60
|
# @param aProdRef [ProductionRef] the production reference to visit.
|
61
61
|
def visit_prod_ref(aProdRef)
|
@@ -64,18 +64,18 @@ class GrammarVisitor
|
|
64
64
|
broadcast(:after_non_terminal, production)
|
65
65
|
end
|
66
66
|
|
67
|
-
# Visit event. The visitor is visiting the
|
67
|
+
# Visit event. The visitor is visiting the
|
68
68
|
# given terminal symbol.
|
69
69
|
# @param aTerminal [Object] the terminal to visit.
|
70
70
|
def visit_terminal(aTerminal)
|
71
71
|
broadcast(:before_terminal, aTerminal)
|
72
72
|
broadcast(:after_terminal, aTerminal)
|
73
73
|
end
|
74
|
-
|
74
|
+
|
75
75
|
# Visit event. The visitor has completed its visit of the given rhs.
|
76
76
|
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
77
77
|
def end_visit_rhs(rhs)
|
78
|
-
broadcast(:after_rhs, rhs)
|
78
|
+
broadcast(:after_rhs, rhs)
|
79
79
|
end
|
80
80
|
|
81
81
|
# Visit event. The visitor has completed its visit of the given production.
|
@@ -91,6 +91,7 @@ class GrammarVisitor
|
|
91
91
|
end
|
92
92
|
|
93
93
|
private
|
94
|
+
|
94
95
|
# Send a notification to all subscribers.
|
95
96
|
# @param msg [Symbol] event to notify
|
96
97
|
# @param args [Array] arguments of the notification.
|
data/lib/sequitur/production.rb
CHANGED
@@ -78,9 +78,9 @@ class Production
|
|
78
78
|
# Look in the rhs all the references to a production passed a argument.
|
79
79
|
# aProduction [aProduction or ProductionRef] The production to search for.
|
80
80
|
# @return [Array] the array of ProductionRef to the passed production
|
81
|
-
def references_of(
|
82
|
-
|
83
|
-
return
|
81
|
+
def references_of(a_prod)
|
82
|
+
real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
|
83
|
+
return rhs.references_of(real_prod)
|
84
84
|
end
|
85
85
|
|
86
86
|
|
@@ -158,7 +158,7 @@ class Production
|
|
158
158
|
# Clear the right-hand side.
|
159
159
|
# Any referenced production has its reference counter decremented.
|
160
160
|
def clear_rhs()
|
161
|
-
rhs.clear
|
161
|
+
rhs.clear
|
162
162
|
end
|
163
163
|
|
164
164
|
# Find all the positions where the digram occurs in the rhs
|
@@ -9,7 +9,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
9
9
|
class SequiturGrammar < DynamicGrammar
|
10
10
|
|
11
11
|
# Build the grammar from an enumerator of tokens.
|
12
|
-
# @param anEnum [Enumerator] an enumerator that will iterate
|
12
|
+
# @param anEnum [Enumerator] an enumerator that will iterate
|
13
13
|
# over the input tokens.
|
14
14
|
def initialize(anEnum)
|
15
15
|
super()
|
@@ -25,12 +25,12 @@ class SequiturGrammar < DynamicGrammar
|
|
25
25
|
|
26
26
|
private
|
27
27
|
|
28
|
-
# Struct used for internal purposes
|
29
|
-
CollisionDiagnosis = Struct.new(
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
)
|
28
|
+
# Struct used for internal purposes
|
29
|
+
CollisionDiagnosis = Struct.new(
|
30
|
+
:collision_found, # true if collision detected
|
31
|
+
:digram, # The digram involved in a collision
|
32
|
+
:productions # The productions where the digram occurs
|
33
|
+
)
|
34
34
|
|
35
35
|
|
36
36
|
# Assuming that a new input token was added to the start production,
|
@@ -49,13 +49,13 @@ CollisionDiagnosis = Struct.new(
|
|
49
49
|
loop do
|
50
50
|
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
51
51
|
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
52
|
-
|
53
|
-
|
54
|
-
restore_utility(
|
52
|
+
|
53
|
+
prod_index = detect_useless_production
|
54
|
+
restore_utility(prod_index) unless prod_index.nil?
|
55
55
|
|
56
56
|
unicity_diagnosis = detect_collision
|
57
|
-
|
58
|
-
break unless unicity_diagnosis.collision_found ||
|
57
|
+
prod_index = detect_useless_production
|
58
|
+
break unless unicity_diagnosis.collision_found || !prod_index.nil?
|
59
59
|
end
|
60
60
|
end
|
61
61
|
|
@@ -72,7 +72,7 @@ CollisionDiagnosis = Struct.new(
|
|
72
72
|
its_key = a_digr.key
|
73
73
|
if found_so_far.include? its_key
|
74
74
|
orig_digr = found_so_far[its_key]
|
75
|
-
# Disregard sequence like a a a
|
75
|
+
# Disregard sequence like a a a
|
76
76
|
if ((orig_digr.production == a_prod) && a_digr.repeating? &&
|
77
77
|
(orig_digr == a_digr))
|
78
78
|
next
|
@@ -112,20 +112,23 @@ CollisionDiagnosis = Struct.new(
|
|
112
112
|
|
113
113
|
# Return a production that is used less than twice in the grammar.
|
114
114
|
def detect_useless_production()
|
115
|
-
useless = productions.
|
116
|
-
|
115
|
+
useless = productions.index { |prod| prod.refcount < 2 }
|
116
|
+
unless useless.nil?
|
117
|
+
useless = nil if useless == 0
|
118
|
+
end
|
119
|
+
return useless
|
117
120
|
end
|
118
121
|
|
119
122
|
# Given the passed production P is referenced only once.
|
120
123
|
# Then replace P by its RHS where it is referenced.
|
121
124
|
# And delete P
|
122
|
-
def restore_utility(
|
123
|
-
# Retrieve
|
124
|
-
|
125
|
+
def restore_utility(prod_index)
|
126
|
+
# Retrieve useless prod from its index
|
127
|
+
useless_prod = productions[prod_index]
|
125
128
|
|
126
129
|
# Retrieve production referencing useless one
|
127
130
|
referencing = nil
|
128
|
-
productions.each do |a_prod|
|
131
|
+
productions.reverse.each do |a_prod|
|
129
132
|
# Next line assumes non-recursive productions
|
130
133
|
next if a_prod == useless_prod
|
131
134
|
|
@@ -136,7 +139,7 @@ CollisionDiagnosis = Struct.new(
|
|
136
139
|
end
|
137
140
|
|
138
141
|
referencing.derive_step(useless_prod)
|
139
|
-
remove_production(
|
142
|
+
remove_production(prod_index)
|
140
143
|
end
|
141
144
|
|
142
145
|
# Create a new production that will have the symbols from digram
|
@@ -145,7 +148,7 @@ CollisionDiagnosis = Struct.new(
|
|
145
148
|
new_prod = Production.new
|
146
149
|
aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
|
147
150
|
add_production(new_prod)
|
148
|
-
|
151
|
+
|
149
152
|
return new_prod
|
150
153
|
end
|
151
154
|
end # class
|
@@ -17,6 +17,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
17
17
|
@symbols = orig.symbols.map do |sym|
|
18
18
|
sym.is_a?(Symbol) ? sym : sym.dup
|
19
19
|
end
|
20
|
+
invalidate_refs
|
20
21
|
end
|
21
22
|
|
22
23
|
public
|
@@ -26,6 +27,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
26
27
|
refs = references
|
27
28
|
refs.each(&:unbind)
|
28
29
|
@symbols = []
|
30
|
+
invalidate_refs
|
29
31
|
end
|
30
32
|
|
31
33
|
# Tell whether the sequence is empty.
|
@@ -44,6 +46,10 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
44
46
|
# @param aSymbol [Object] The symbol to append.
|
45
47
|
def <<(aSymbol)
|
46
48
|
symbols << aSymbol
|
49
|
+
if aSymbol.is_a?(ProductionRef)
|
50
|
+
@memo_references ||= []
|
51
|
+
@memo_references << aSymbol
|
52
|
+
end
|
47
53
|
end
|
48
54
|
|
49
55
|
# Retrieve the element from the sequence at given position.
|
@@ -58,13 +64,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
58
64
|
# @return true when an item from self equals the corresponding
|
59
65
|
# item from 'other'
|
60
66
|
def ==(other)
|
61
|
-
return true if
|
67
|
+
return true if object_id == other.object_id
|
62
68
|
|
63
69
|
case other
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
when SymbolSequence
|
71
|
+
same = symbols == other.symbols
|
72
|
+
when Array
|
73
|
+
same = symbols == other
|
68
74
|
else
|
69
75
|
same = false
|
70
76
|
end
|
@@ -76,7 +82,18 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
76
82
|
# Select the references to production appearing in the rhs.
|
77
83
|
# @return [Array of ProductionRef]
|
78
84
|
def references()
|
79
|
-
|
85
|
+
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
86
|
+
return @memo_references
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
# Select the references of the given production appearing in the rhs.
|
91
|
+
# @param aProduction [Production]
|
92
|
+
# @return [Array of ProductionRef]
|
93
|
+
def references_of(aProduction)
|
94
|
+
return [] if references.empty?
|
95
|
+
result = references.select { |a_ref| a_ref == aProduction }
|
96
|
+
return result
|
80
97
|
end
|
81
98
|
|
82
99
|
|
@@ -96,11 +113,12 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
96
113
|
|
97
114
|
# Insert at position the elements from another sequence.
|
98
115
|
# @param position [Fixnum] A zero-based index of the symbols to replace.
|
99
|
-
# @param another [
|
116
|
+
# @param another [SymbolSequence] A production with a two-elements rhs
|
100
117
|
# (a single digram).
|
101
118
|
def insert_at(position, another)
|
102
119
|
klone = another.dup
|
103
120
|
symbols.insert(position, *klone.symbols)
|
121
|
+
invalidate_refs
|
104
122
|
end
|
105
123
|
|
106
124
|
# Given that the production P passed as argument has exactly 2 symbols
|
@@ -114,16 +132,23 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
114
132
|
if symbols[index].is_a?(ProductionRef)
|
115
133
|
symbols[index].bind_to(aProduction)
|
116
134
|
else
|
117
|
-
|
135
|
+
new_ref = ProductionRef.new(aProduction)
|
136
|
+
symbols[index] = new_ref
|
137
|
+
@memo_references ||= []
|
138
|
+
@memo_references << new_ref
|
118
139
|
end
|
119
140
|
index1 = index + 1
|
120
|
-
|
141
|
+
if symbols[index1].is_a?(ProductionRef)
|
142
|
+
symbols[index1].unbind
|
143
|
+
invalidate_refs
|
144
|
+
end
|
121
145
|
delete_at(index1)
|
122
146
|
end
|
123
147
|
|
124
148
|
# Remove the element at given position
|
125
149
|
# @param position [Fixnum] a zero-based index.
|
126
150
|
def delete_at(position)
|
151
|
+
invalidate_refs if symbols[position].is_a?(ProductionRef)
|
127
152
|
symbols.delete_at(position)
|
128
153
|
end
|
129
154
|
|
@@ -144,7 +169,14 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
144
169
|
|
145
170
|
aVisitor.end_visit_rhs(self)
|
146
171
|
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def invalidate_refs()
|
176
|
+
@memo_references = nil
|
177
|
+
@lookup_references = nil
|
178
|
+
end
|
147
179
|
|
148
180
|
end # class
|
149
181
|
|
150
|
-
end # module
|
182
|
+
end # module
|
@@ -77,18 +77,19 @@ describe Production do
|
|
77
77
|
# Case 2: production with one reference
|
78
78
|
subject.append_symbol(p_a)
|
79
79
|
expect(subject.references).to eq([p_a])
|
80
|
-
expect(subject.references_of(p_a)).to eq([p_a])
|
80
|
+
expect(subject.references_of(p_a).map(&:production)).to eq([p_a])
|
81
|
+
|
81
82
|
|
82
83
|
# Case 3: production with repeated references
|
83
84
|
subject.append_symbol(p_a) # second time
|
84
85
|
expect(subject.references).to eq([p_a, p_a])
|
85
|
-
expect(subject.references_of(p_a)).to eq([p_a, p_a])
|
86
|
+
expect(subject.references_of(p_a).map(&:production)).to eq([p_a, p_a])
|
86
87
|
|
87
88
|
|
88
89
|
# Case 4: production with multiple distinct references
|
89
90
|
subject.append_symbol(p_bc)
|
90
91
|
expect(subject.references).to eq([p_a, p_a, p_bc])
|
91
|
-
expect(subject.references_of(p_bc)).to eq([p_bc])
|
92
|
+
expect(subject.references_of(p_bc).map(&:production)).to eq([p_bc])
|
92
93
|
end
|
93
94
|
|
94
95
|
it 'should know the position(s) of a given digram' do
|
@@ -9,7 +9,7 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
9
9
|
|
10
10
|
describe SymbolSequence do
|
11
11
|
|
12
|
-
let(:instance) { SymbolSequence.new }
|
12
|
+
let(:instance) { SymbolSequence.new }
|
13
13
|
|
14
14
|
context 'Creation and initialization:' do
|
15
15
|
|
@@ -24,34 +24,35 @@ describe SymbolSequence do
|
|
24
24
|
end # context
|
25
25
|
|
26
26
|
context 'Provided services:' do
|
27
|
+
let(:a_prod) { Production.new }
|
28
|
+
|
27
29
|
subject do
|
28
30
|
an_instance = SymbolSequence.new
|
29
31
|
[:a, :b, :c].each { |a_sym| an_instance << a_sym }
|
30
32
|
an_instance
|
31
33
|
end
|
32
|
-
|
34
|
+
|
33
35
|
it 'should deep-copy clone itself' do
|
34
|
-
a_prod = Production.new
|
35
36
|
ref = ProductionRef.new(a_prod)
|
36
|
-
|
37
|
+
|
37
38
|
a, c = 'a', 'c'
|
38
39
|
[a, ref, c].each { |ch| instance << ch }
|
39
40
|
clone_a = instance.clone
|
40
|
-
|
41
|
+
|
41
42
|
# Check that cloning works
|
42
43
|
expect(clone_a).to eq(instance)
|
43
|
-
|
44
|
+
|
44
45
|
# Reference objects are distinct but points to same production
|
45
46
|
expect(clone_a.symbols[1].object_id).not_to eq(instance.symbols[1])
|
46
|
-
|
47
|
+
|
47
48
|
# Modifying the clone...
|
48
49
|
clone_a.symbols[1] = 'diff'
|
49
50
|
expect(clone_a).not_to eq(instance)
|
50
|
-
|
51
|
+
|
51
52
|
# ... should leave original unchanged
|
52
53
|
expect(instance.symbols[1]).to eq(ref)
|
53
54
|
end
|
54
|
-
|
55
|
+
|
55
56
|
|
56
57
|
it 'should tell that it is equal to itself' do
|
57
58
|
# Case: Non-empty sequence
|
@@ -65,25 +66,53 @@ describe SymbolSequence do
|
|
65
66
|
expect(instance).to eq(instance)
|
66
67
|
|
67
68
|
expect(subject).not_to eq(instance)
|
68
|
-
[:a, :b, :c].each { |a_sym| instance << a_sym }
|
69
|
+
[:a, :b, :c].each { |a_sym| instance << a_sym }
|
69
70
|
expect(subject).to eq(instance)
|
70
|
-
|
71
|
+
|
71
72
|
# Check that element order is relevant
|
72
73
|
instance.symbols.rotate!
|
73
|
-
expect(subject).not_to eq(instance)
|
74
|
+
expect(subject).not_to eq(instance)
|
74
75
|
end
|
75
|
-
|
76
|
+
|
76
77
|
it 'should know whether it is equal to an array' do
|
77
78
|
expect(subject).to eq([:a, :b, :c])
|
78
|
-
|
79
|
+
|
79
80
|
# Check that element order is relevant
|
80
|
-
expect(subject).not_to eq([:c, :b, :a])
|
81
|
+
expect(subject).not_to eq([:c, :b, :a])
|
81
82
|
end
|
82
|
-
|
83
|
+
|
83
84
|
it 'should know that is not equal to something else' do
|
84
85
|
expect(subject).not_to eq(:abc)
|
85
86
|
end
|
86
87
|
|
88
|
+
|
89
|
+
it 'should know its references' do
|
90
|
+
ref = ProductionRef.new(a_prod)
|
91
|
+
2.times { subject << ref }
|
92
|
+
|
93
|
+
refs = subject.references
|
94
|
+
expect(refs.size).to eq(2)
|
95
|
+
expect(refs).to eq([ref, ref])
|
96
|
+
|
97
|
+
refs = subject.references
|
98
|
+
expect(refs.size).to eq(2)
|
99
|
+
expect(refs).to eq([ref, ref])
|
100
|
+
specific_refs = subject.references_of(a_prod)
|
101
|
+
expect(specific_refs).to eq(refs)
|
102
|
+
|
103
|
+
|
104
|
+
another = Production.new
|
105
|
+
another_ref = ProductionRef.new(another)
|
106
|
+
subject << another_ref
|
107
|
+
refs = subject.references
|
108
|
+
expect(refs.size).to eq(3)
|
109
|
+
expect(refs).to eq([ref, ref, another])
|
110
|
+
specific_refs = subject.references_of(a_prod)
|
111
|
+
expect(specific_refs).to eq([ref, ref])
|
112
|
+
specific_refs = subject.references_of(another)
|
113
|
+
expect(specific_refs).to eq([another])
|
114
|
+
end
|
115
|
+
|
87
116
|
end # context
|
88
117
|
|
89
118
|
|
@@ -92,4 +121,4 @@ end # describe
|
|
92
121
|
|
93
122
|
end # module
|
94
123
|
|
95
|
-
# End of file
|
124
|
+
# End of file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|