sequitur 0.1.09 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +3 -0
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +1 -1
- data/lib/sequitur/grammar_visitor.rb +8 -7
- data/lib/sequitur/production.rb +4 -4
- data/lib/sequitur/sequitur_grammar.rb +24 -21
- data/lib/sequitur/symbol_sequence.rb +42 -10
- data/spec/sequitur/production_spec.rb +4 -3
- data/spec/sequitur/symbol_sequence_spec.rb +46 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZGMxY2RmOWZlOWI3MzljMjNmZmEyZDFjZDUzZDg2YzQzNzM2MmRhZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzNhZGJmNWY4YTg3MWY3ODI5YzE4ZTg5MTUyNTlkMGFmMzhlNjdiYw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NWY1NDhiZjUzNjJjNTdlMmE2ZjI0ODc3MDNlMjQ2MGM3ZjEzMWQxNzUxZDgz
|
10
|
+
YmE4ZmFlNzI4NDc2NGE0YjQzOTJhNmViYmQ4MThjZTg0YmEyN2JiOTlkMzk4
|
11
|
+
MTc4MWUwMWE3ZWY0NTQ0Y2ZlYjU3MmVkODNiZjUwMzgxZWNlMDQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWI3N2U3ZGU5N2Y4ZTEzNDdmYjQyYjExOWIzYWRjMWE0ZDUxYmM2MjI4YzU0
|
14
|
+
ODZiZTQ3ZDlkMWMxMmY2ZWQwM2JjNmZiNmU1ZDRhMTFhMjY4ZjI5MjJmNWFl
|
15
|
+
MDYxZmU0ZWUwOTdhNDBkODE4NDJjNzQ0MGRmMWQwMDIxMjkwMTI=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
### 0.1.10 / 2014-10-05
|
2
|
+
* [CHANGE] Code refactoring for performance. Impacted classes: `SequiturGrammar`, `SymbolSequence` and `Production`.
|
3
|
+
|
1
4
|
### 0.1.09 / 2014-10-03
|
2
5
|
* [NEW] Class `SymbolSequence`. Part of code refactoring that reduces code complexity reported by CodeClimate.
|
3
6
|
* [CHANGE] Class `Production` refactored to use a SymbolSequence instance as its rhs.
|
data/lib/sequitur/constants.rb
CHANGED
data/lib/sequitur/digram.rb
CHANGED
@@ -27,7 +27,7 @@ class Digram
|
|
27
27
|
# the sequence symbol1 symbol2 appears.
|
28
28
|
def initialize(symbol1, symbol2, aProduction)
|
29
29
|
@symbols = [symbol1, symbol2]
|
30
|
-
@key =
|
30
|
+
@key = symbol1.hash.to_s(16) + ':' + symbol2.hash.to_s(16)
|
31
31
|
@production = aProduction
|
32
32
|
end
|
33
33
|
|
@@ -48,14 +48,14 @@ class GrammarVisitor
|
|
48
48
|
def start_visit_production(aProduction)
|
49
49
|
broadcast(:before_production, aProduction)
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
# Visit event. The visitor is about to visit the given rhs of production.
|
53
|
-
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
53
|
+
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
54
54
|
def start_visit_rhs(rhs)
|
55
|
-
broadcast(:before_rhs, rhs)
|
55
|
+
broadcast(:before_rhs, rhs)
|
56
56
|
end
|
57
57
|
|
58
|
-
# Visit event. The visitor is visiting the
|
58
|
+
# Visit event. The visitor is visiting the
|
59
59
|
# given reference production (= non-terminal symbol).
|
60
60
|
# @param aProdRef [ProductionRef] the production reference to visit.
|
61
61
|
def visit_prod_ref(aProdRef)
|
@@ -64,18 +64,18 @@ class GrammarVisitor
|
|
64
64
|
broadcast(:after_non_terminal, production)
|
65
65
|
end
|
66
66
|
|
67
|
-
# Visit event. The visitor is visiting the
|
67
|
+
# Visit event. The visitor is visiting the
|
68
68
|
# given terminal symbol.
|
69
69
|
# @param aTerminal [Object] the terminal to visit.
|
70
70
|
def visit_terminal(aTerminal)
|
71
71
|
broadcast(:before_terminal, aTerminal)
|
72
72
|
broadcast(:after_terminal, aTerminal)
|
73
73
|
end
|
74
|
-
|
74
|
+
|
75
75
|
# Visit event. The visitor has completed its visit of the given rhs.
|
76
76
|
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
77
77
|
def end_visit_rhs(rhs)
|
78
|
-
broadcast(:after_rhs, rhs)
|
78
|
+
broadcast(:after_rhs, rhs)
|
79
79
|
end
|
80
80
|
|
81
81
|
# Visit event. The visitor has completed its visit of the given production.
|
@@ -91,6 +91,7 @@ class GrammarVisitor
|
|
91
91
|
end
|
92
92
|
|
93
93
|
private
|
94
|
+
|
94
95
|
# Send a notification to all subscribers.
|
95
96
|
# @param msg [Symbol] event to notify
|
96
97
|
# @param args [Array] arguments of the notification.
|
data/lib/sequitur/production.rb
CHANGED
@@ -78,9 +78,9 @@ class Production
|
|
78
78
|
# Look in the rhs all the references to a production passed a argument.
|
79
79
|
# aProduction [aProduction or ProductionRef] The production to search for.
|
80
80
|
# @return [Array] the array of ProductionRef to the passed production
|
81
|
-
def references_of(
|
82
|
-
|
83
|
-
return
|
81
|
+
def references_of(a_prod)
|
82
|
+
real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
|
83
|
+
return rhs.references_of(real_prod)
|
84
84
|
end
|
85
85
|
|
86
86
|
|
@@ -158,7 +158,7 @@ class Production
|
|
158
158
|
# Clear the right-hand side.
|
159
159
|
# Any referenced production has its reference counter decremented.
|
160
160
|
def clear_rhs()
|
161
|
-
rhs.clear
|
161
|
+
rhs.clear
|
162
162
|
end
|
163
163
|
|
164
164
|
# Find all the positions where the digram occurs in the rhs
|
@@ -9,7 +9,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
9
9
|
class SequiturGrammar < DynamicGrammar
|
10
10
|
|
11
11
|
# Build the grammar from an enumerator of tokens.
|
12
|
-
# @param anEnum [Enumerator] an enumerator that will iterate
|
12
|
+
# @param anEnum [Enumerator] an enumerator that will iterate
|
13
13
|
# over the input tokens.
|
14
14
|
def initialize(anEnum)
|
15
15
|
super()
|
@@ -25,12 +25,12 @@ class SequiturGrammar < DynamicGrammar
|
|
25
25
|
|
26
26
|
private
|
27
27
|
|
28
|
-
# Struct used for internal purposes
|
29
|
-
CollisionDiagnosis = Struct.new(
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
)
|
28
|
+
# Struct used for internal purposes
|
29
|
+
CollisionDiagnosis = Struct.new(
|
30
|
+
:collision_found, # true if collision detected
|
31
|
+
:digram, # The digram involved in a collision
|
32
|
+
:productions # The productions where the digram occurs
|
33
|
+
)
|
34
34
|
|
35
35
|
|
36
36
|
# Assuming that a new input token was added to the start production,
|
@@ -49,13 +49,13 @@ CollisionDiagnosis = Struct.new(
|
|
49
49
|
loop do
|
50
50
|
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
51
51
|
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
52
|
-
|
53
|
-
|
54
|
-
restore_utility(
|
52
|
+
|
53
|
+
prod_index = detect_useless_production
|
54
|
+
restore_utility(prod_index) unless prod_index.nil?
|
55
55
|
|
56
56
|
unicity_diagnosis = detect_collision
|
57
|
-
|
58
|
-
break unless unicity_diagnosis.collision_found ||
|
57
|
+
prod_index = detect_useless_production
|
58
|
+
break unless unicity_diagnosis.collision_found || !prod_index.nil?
|
59
59
|
end
|
60
60
|
end
|
61
61
|
|
@@ -72,7 +72,7 @@ CollisionDiagnosis = Struct.new(
|
|
72
72
|
its_key = a_digr.key
|
73
73
|
if found_so_far.include? its_key
|
74
74
|
orig_digr = found_so_far[its_key]
|
75
|
-
# Disregard sequence like a a a
|
75
|
+
# Disregard sequence like a a a
|
76
76
|
if ((orig_digr.production == a_prod) && a_digr.repeating? &&
|
77
77
|
(orig_digr == a_digr))
|
78
78
|
next
|
@@ -112,20 +112,23 @@ CollisionDiagnosis = Struct.new(
|
|
112
112
|
|
113
113
|
# Return a production that is used less than twice in the grammar.
|
114
114
|
def detect_useless_production()
|
115
|
-
useless = productions.
|
116
|
-
|
115
|
+
useless = productions.index { |prod| prod.refcount < 2 }
|
116
|
+
unless useless.nil?
|
117
|
+
useless = nil if useless == 0
|
118
|
+
end
|
119
|
+
return useless
|
117
120
|
end
|
118
121
|
|
119
122
|
# Given the passed production P is referenced only once.
|
120
123
|
# Then replace P by its RHS where it is referenced.
|
121
124
|
# And delete P
|
122
|
-
def restore_utility(
|
123
|
-
# Retrieve
|
124
|
-
|
125
|
+
def restore_utility(prod_index)
|
126
|
+
# Retrieve useless prod from its index
|
127
|
+
useless_prod = productions[prod_index]
|
125
128
|
|
126
129
|
# Retrieve production referencing useless one
|
127
130
|
referencing = nil
|
128
|
-
productions.each do |a_prod|
|
131
|
+
productions.reverse.each do |a_prod|
|
129
132
|
# Next line assumes non-recursive productions
|
130
133
|
next if a_prod == useless_prod
|
131
134
|
|
@@ -136,7 +139,7 @@ CollisionDiagnosis = Struct.new(
|
|
136
139
|
end
|
137
140
|
|
138
141
|
referencing.derive_step(useless_prod)
|
139
|
-
remove_production(
|
142
|
+
remove_production(prod_index)
|
140
143
|
end
|
141
144
|
|
142
145
|
# Create a new production that will have the symbols from digram
|
@@ -145,7 +148,7 @@ CollisionDiagnosis = Struct.new(
|
|
145
148
|
new_prod = Production.new
|
146
149
|
aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
|
147
150
|
add_production(new_prod)
|
148
|
-
|
151
|
+
|
149
152
|
return new_prod
|
150
153
|
end
|
151
154
|
end # class
|
@@ -17,6 +17,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
17
17
|
@symbols = orig.symbols.map do |sym|
|
18
18
|
sym.is_a?(Symbol) ? sym : sym.dup
|
19
19
|
end
|
20
|
+
invalidate_refs
|
20
21
|
end
|
21
22
|
|
22
23
|
public
|
@@ -26,6 +27,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
26
27
|
refs = references
|
27
28
|
refs.each(&:unbind)
|
28
29
|
@symbols = []
|
30
|
+
invalidate_refs
|
29
31
|
end
|
30
32
|
|
31
33
|
# Tell whether the sequence is empty.
|
@@ -44,6 +46,10 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
44
46
|
# @param aSymbol [Object] The symbol to append.
|
45
47
|
def <<(aSymbol)
|
46
48
|
symbols << aSymbol
|
49
|
+
if aSymbol.is_a?(ProductionRef)
|
50
|
+
@memo_references ||= []
|
51
|
+
@memo_references << aSymbol
|
52
|
+
end
|
47
53
|
end
|
48
54
|
|
49
55
|
# Retrieve the element from the sequence at given position.
|
@@ -58,13 +64,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
58
64
|
# @return true when an item from self equals the corresponding
|
59
65
|
# item from 'other'
|
60
66
|
def ==(other)
|
61
|
-
return true if
|
67
|
+
return true if object_id == other.object_id
|
62
68
|
|
63
69
|
case other
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
when SymbolSequence
|
71
|
+
same = symbols == other.symbols
|
72
|
+
when Array
|
73
|
+
same = symbols == other
|
68
74
|
else
|
69
75
|
same = false
|
70
76
|
end
|
@@ -76,7 +82,18 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
76
82
|
# Select the references to production appearing in the rhs.
|
77
83
|
# @return [Array of ProductionRef]
|
78
84
|
def references()
|
79
|
-
|
85
|
+
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
86
|
+
return @memo_references
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
# Select the references of the given production appearing in the rhs.
|
91
|
+
# @param aProduction [Production]
|
92
|
+
# @return [Array of ProductionRef]
|
93
|
+
def references_of(aProduction)
|
94
|
+
return [] if references.empty?
|
95
|
+
result = references.select { |a_ref| a_ref == aProduction }
|
96
|
+
return result
|
80
97
|
end
|
81
98
|
|
82
99
|
|
@@ -96,11 +113,12 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
96
113
|
|
97
114
|
# Insert at position the elements from another sequence.
|
98
115
|
# @param position [Fixnum] A zero-based index of the symbols to replace.
|
99
|
-
# @param another [
|
116
|
+
# @param another [SymbolSequence] A production with a two-elements rhs
|
100
117
|
# (a single digram).
|
101
118
|
def insert_at(position, another)
|
102
119
|
klone = another.dup
|
103
120
|
symbols.insert(position, *klone.symbols)
|
121
|
+
invalidate_refs
|
104
122
|
end
|
105
123
|
|
106
124
|
# Given that the production P passed as argument has exactly 2 symbols
|
@@ -114,16 +132,23 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
114
132
|
if symbols[index].is_a?(ProductionRef)
|
115
133
|
symbols[index].bind_to(aProduction)
|
116
134
|
else
|
117
|
-
|
135
|
+
new_ref = ProductionRef.new(aProduction)
|
136
|
+
symbols[index] = new_ref
|
137
|
+
@memo_references ||= []
|
138
|
+
@memo_references << new_ref
|
118
139
|
end
|
119
140
|
index1 = index + 1
|
120
|
-
|
141
|
+
if symbols[index1].is_a?(ProductionRef)
|
142
|
+
symbols[index1].unbind
|
143
|
+
invalidate_refs
|
144
|
+
end
|
121
145
|
delete_at(index1)
|
122
146
|
end
|
123
147
|
|
124
148
|
# Remove the element at given position
|
125
149
|
# @param position [Fixnum] a zero-based index.
|
126
150
|
def delete_at(position)
|
151
|
+
invalidate_refs if symbols[position].is_a?(ProductionRef)
|
127
152
|
symbols.delete_at(position)
|
128
153
|
end
|
129
154
|
|
@@ -144,7 +169,14 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
144
169
|
|
145
170
|
aVisitor.end_visit_rhs(self)
|
146
171
|
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def invalidate_refs()
|
176
|
+
@memo_references = nil
|
177
|
+
@lookup_references = nil
|
178
|
+
end
|
147
179
|
|
148
180
|
end # class
|
149
181
|
|
150
|
-
end # module
|
182
|
+
end # module
|
@@ -77,18 +77,19 @@ describe Production do
|
|
77
77
|
# Case 2: production with one reference
|
78
78
|
subject.append_symbol(p_a)
|
79
79
|
expect(subject.references).to eq([p_a])
|
80
|
-
expect(subject.references_of(p_a)).to eq([p_a])
|
80
|
+
expect(subject.references_of(p_a).map(&:production)).to eq([p_a])
|
81
|
+
|
81
82
|
|
82
83
|
# Case 3: production with repeated references
|
83
84
|
subject.append_symbol(p_a) # second time
|
84
85
|
expect(subject.references).to eq([p_a, p_a])
|
85
|
-
expect(subject.references_of(p_a)).to eq([p_a, p_a])
|
86
|
+
expect(subject.references_of(p_a).map(&:production)).to eq([p_a, p_a])
|
86
87
|
|
87
88
|
|
88
89
|
# Case 4: production with multiple distinct references
|
89
90
|
subject.append_symbol(p_bc)
|
90
91
|
expect(subject.references).to eq([p_a, p_a, p_bc])
|
91
|
-
expect(subject.references_of(p_bc)).to eq([p_bc])
|
92
|
+
expect(subject.references_of(p_bc).map(&:production)).to eq([p_bc])
|
92
93
|
end
|
93
94
|
|
94
95
|
it 'should know the position(s) of a given digram' do
|
@@ -9,7 +9,7 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
9
9
|
|
10
10
|
describe SymbolSequence do
|
11
11
|
|
12
|
-
let(:instance) { SymbolSequence.new }
|
12
|
+
let(:instance) { SymbolSequence.new }
|
13
13
|
|
14
14
|
context 'Creation and initialization:' do
|
15
15
|
|
@@ -24,34 +24,35 @@ describe SymbolSequence do
|
|
24
24
|
end # context
|
25
25
|
|
26
26
|
context 'Provided services:' do
|
27
|
+
let(:a_prod) { Production.new }
|
28
|
+
|
27
29
|
subject do
|
28
30
|
an_instance = SymbolSequence.new
|
29
31
|
[:a, :b, :c].each { |a_sym| an_instance << a_sym }
|
30
32
|
an_instance
|
31
33
|
end
|
32
|
-
|
34
|
+
|
33
35
|
it 'should deep-copy clone itself' do
|
34
|
-
a_prod = Production.new
|
35
36
|
ref = ProductionRef.new(a_prod)
|
36
|
-
|
37
|
+
|
37
38
|
a, c = 'a', 'c'
|
38
39
|
[a, ref, c].each { |ch| instance << ch }
|
39
40
|
clone_a = instance.clone
|
40
|
-
|
41
|
+
|
41
42
|
# Check that cloning works
|
42
43
|
expect(clone_a).to eq(instance)
|
43
|
-
|
44
|
+
|
44
45
|
# Reference objects are distinct but points to same production
|
45
46
|
expect(clone_a.symbols[1].object_id).not_to eq(instance.symbols[1])
|
46
|
-
|
47
|
+
|
47
48
|
# Modifying the clone...
|
48
49
|
clone_a.symbols[1] = 'diff'
|
49
50
|
expect(clone_a).not_to eq(instance)
|
50
|
-
|
51
|
+
|
51
52
|
# ... should leave original unchanged
|
52
53
|
expect(instance.symbols[1]).to eq(ref)
|
53
54
|
end
|
54
|
-
|
55
|
+
|
55
56
|
|
56
57
|
it 'should tell that it is equal to itself' do
|
57
58
|
# Case: Non-empty sequence
|
@@ -65,25 +66,53 @@ describe SymbolSequence do
|
|
65
66
|
expect(instance).to eq(instance)
|
66
67
|
|
67
68
|
expect(subject).not_to eq(instance)
|
68
|
-
[:a, :b, :c].each { |a_sym| instance << a_sym }
|
69
|
+
[:a, :b, :c].each { |a_sym| instance << a_sym }
|
69
70
|
expect(subject).to eq(instance)
|
70
|
-
|
71
|
+
|
71
72
|
# Check that element order is relevant
|
72
73
|
instance.symbols.rotate!
|
73
|
-
expect(subject).not_to eq(instance)
|
74
|
+
expect(subject).not_to eq(instance)
|
74
75
|
end
|
75
|
-
|
76
|
+
|
76
77
|
it 'should know whether it is equal to an array' do
|
77
78
|
expect(subject).to eq([:a, :b, :c])
|
78
|
-
|
79
|
+
|
79
80
|
# Check that element order is relevant
|
80
|
-
expect(subject).not_to eq([:c, :b, :a])
|
81
|
+
expect(subject).not_to eq([:c, :b, :a])
|
81
82
|
end
|
82
|
-
|
83
|
+
|
83
84
|
it 'should know that is not equal to something else' do
|
84
85
|
expect(subject).not_to eq(:abc)
|
85
86
|
end
|
86
87
|
|
88
|
+
|
89
|
+
it 'should know its references' do
|
90
|
+
ref = ProductionRef.new(a_prod)
|
91
|
+
2.times { subject << ref }
|
92
|
+
|
93
|
+
refs = subject.references
|
94
|
+
expect(refs.size).to eq(2)
|
95
|
+
expect(refs).to eq([ref, ref])
|
96
|
+
|
97
|
+
refs = subject.references
|
98
|
+
expect(refs.size).to eq(2)
|
99
|
+
expect(refs).to eq([ref, ref])
|
100
|
+
specific_refs = subject.references_of(a_prod)
|
101
|
+
expect(specific_refs).to eq(refs)
|
102
|
+
|
103
|
+
|
104
|
+
another = Production.new
|
105
|
+
another_ref = ProductionRef.new(another)
|
106
|
+
subject << another_ref
|
107
|
+
refs = subject.references
|
108
|
+
expect(refs.size).to eq(3)
|
109
|
+
expect(refs).to eq([ref, ref, another])
|
110
|
+
specific_refs = subject.references_of(a_prod)
|
111
|
+
expect(specific_refs).to eq([ref, ref])
|
112
|
+
specific_refs = subject.references_of(another)
|
113
|
+
expect(specific_refs).to eq([another])
|
114
|
+
end
|
115
|
+
|
87
116
|
end # context
|
88
117
|
|
89
118
|
|
@@ -92,4 +121,4 @@ end # describe
|
|
92
121
|
|
93
122
|
end # module
|
94
123
|
|
95
|
-
# End of file
|
124
|
+
# End of file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|