sequitur 0.1.09 → 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2JmMGMzOGFjNjg3YmQzZDNlMWNjODI2ZmIzNjA4MTM4ZDNkMzUzZA==
4
+ ZGMxY2RmOWZlOWI3MzljMjNmZmEyZDFjZDUzZDg2YzQzNzM2MmRhZQ==
5
5
  data.tar.gz: !binary |-
6
- ZTAyMGI0MDYzMDBkMDJlNWFhNjE2Yjc5NTUyZjhiNDEzZjEzYzg1ZQ==
6
+ YzNhZGJmNWY4YTg3MWY3ODI5YzE4ZTg5MTUyNTlkMGFmMzhlNjdiYw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- OGQ1YjVhY2M2MmM2YWNiODYyYzQxMmRjNmFmYWY2NjYyMGYyMWM5YjVjNzFh
10
- OTVmMDhhZTE2YTRhMWNmODVhYTJjMDUyMDNlYjk0NmJjMjRkN2Q0ODcyYTRj
11
- YzAyZDA2ZDJiY2E1YWE5ZTYyYWExNmJkMzI3ODYyZDgwZGUyMzI=
9
+ NWY1NDhiZjUzNjJjNTdlMmE2ZjI0ODc3MDNlMjQ2MGM3ZjEzMWQxNzUxZDgz
10
+ YmE4ZmFlNzI4NDc2NGE0YjQzOTJhNmViYmQ4MThjZTg0YmEyN2JiOTlkMzk4
11
+ MTc4MWUwMWE3ZWY0NTQ0Y2ZlYjU3MmVkODNiZjUwMzgxZWNlMDQ=
12
12
  data.tar.gz: !binary |-
13
- N2EyZmU3NTNiMTc1YmZkZjk4ODNmMzYyY2ExYzI0YTNmNTFlYWY4OWUxZTQ4
14
- NjJmMWUzYzM3Yjc4ZDU5YzIyNTEwZTI5OWI1ZDlhY2RiYzQ3Yzc5OTgxZDZk
15
- MzY1MzEwMDBjYTliYmQxYTEwYzQ4MjQzYzFjYjVhNDgxMGE4YTA=
13
+ MWI3N2U3ZGU5N2Y4ZTEzNDdmYjQyYjExOWIzYWRjMWE0ZDUxYmM2MjI4YzU0
14
+ ODZiZTQ3ZDlkMWMxMmY2ZWQwM2JjNmZiNmU1ZDRhMTFhMjY4ZjI5MjJmNWFl
15
+ MDYxZmU0ZWUwOTdhNDBkODE4NDJjNzQ0MGRmMWQwMDIxMjkwMTI=
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ### 0.1.10 / 2014-10-05
2
+ * [CHANGE] Code refactoring for performance. Impacted classes: `SequiturGrammar`, `SymbolSequence` and `Production`.
3
+
1
4
  ### 0.1.09 / 2014-10-03
2
5
  * [NEW] Class `SymbolSequence`. Part of code refactoring that reduces code complexity reported by CodeClimate.
3
6
  * [CHANGE] Class `Production` refactored to use a SymbolSequence instance as its rhs.
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Sequitur # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.1.09'
6
+ Version = '0.1.10'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = 'Ruby implementation of the Sequitur algorithm'
@@ -27,7 +27,7 @@ class Digram
27
27
  # the sequence symbol1 symbol2 appears.
28
28
  def initialize(symbol1, symbol2, aProduction)
29
29
  @symbols = [symbol1, symbol2]
30
- @key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
30
+ @key = symbol1.hash.to_s(16) + ':' + symbol2.hash.to_s(16)
31
31
  @production = aProduction
32
32
  end
33
33
 
@@ -48,14 +48,14 @@ class GrammarVisitor
48
48
  def start_visit_production(aProduction)
49
49
  broadcast(:before_production, aProduction)
50
50
  end
51
-
51
+
52
52
  # Visit event. The visitor is about to visit the given rhs of production.
53
- # @param rhs [SymbolSequence] the rhs of a production to visit.
53
+ # @param rhs [SymbolSequence] the rhs of a production to visit.
54
54
  def start_visit_rhs(rhs)
55
- broadcast(:before_rhs, rhs)
55
+ broadcast(:before_rhs, rhs)
56
56
  end
57
57
 
58
- # Visit event. The visitor is visiting the
58
+ # Visit event. The visitor is visiting the
59
59
  # given reference production (= non-terminal symbol).
60
60
  # @param aProdRef [ProductionRef] the production reference to visit.
61
61
  def visit_prod_ref(aProdRef)
@@ -64,18 +64,18 @@ class GrammarVisitor
64
64
  broadcast(:after_non_terminal, production)
65
65
  end
66
66
 
67
- # Visit event. The visitor is visiting the
67
+ # Visit event. The visitor is visiting the
68
68
  # given terminal symbol.
69
69
  # @param aTerminal [Object] the terminal to visit.
70
70
  def visit_terminal(aTerminal)
71
71
  broadcast(:before_terminal, aTerminal)
72
72
  broadcast(:after_terminal, aTerminal)
73
73
  end
74
-
74
+
75
75
  # Visit event. The visitor has completed its visit of the given rhs.
76
76
  # @param rhs [SymbolSequence] the rhs of a production to visit.
77
77
  def end_visit_rhs(rhs)
78
- broadcast(:after_rhs, rhs)
78
+ broadcast(:after_rhs, rhs)
79
79
  end
80
80
 
81
81
  # Visit event. The visitor has completed its visit of the given production.
@@ -91,6 +91,7 @@ class GrammarVisitor
91
91
  end
92
92
 
93
93
  private
94
+
94
95
  # Send a notification to all subscribers.
95
96
  # @param msg [Symbol] event to notify
96
97
  # @param args [Array] arguments of the notification.
@@ -78,9 +78,9 @@ class Production
78
78
  # Look in the rhs all the references to a production passed a argument.
79
79
  # aProduction [aProduction or ProductionRef] The production to search for.
80
80
  # @return [Array] the array of ProductionRef to the passed production
81
- def references_of(aProduction)
82
- refs = references
83
- return refs.select { |a_ref| a_ref == aProduction }
81
+ def references_of(a_prod)
82
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
83
+ return rhs.references_of(real_prod)
84
84
  end
85
85
 
86
86
 
@@ -158,7 +158,7 @@ class Production
158
158
  # Clear the right-hand side.
159
159
  # Any referenced production has its reference counter decremented.
160
160
  def clear_rhs()
161
- rhs.clear()
161
+ rhs.clear
162
162
  end
163
163
 
164
164
  # Find all the positions where the digram occurs in the rhs
@@ -9,7 +9,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
9
9
  class SequiturGrammar < DynamicGrammar
10
10
 
11
11
  # Build the grammar from an enumerator of tokens.
12
- # @param anEnum [Enumerator] an enumerator that will iterate
12
+ # @param anEnum [Enumerator] an enumerator that will iterate
13
13
  # over the input tokens.
14
14
  def initialize(anEnum)
15
15
  super()
@@ -25,12 +25,12 @@ class SequiturGrammar < DynamicGrammar
25
25
 
26
26
  private
27
27
 
28
- # Struct used for internal purposes
29
- CollisionDiagnosis = Struct.new(
30
- :collision_found, # true if collision detected
31
- :digram, # The digram involved in a collision
32
- :productions # The productions where the digram occurs
33
- )
28
+ # Struct used for internal purposes
29
+ CollisionDiagnosis = Struct.new(
30
+ :collision_found, # true if collision detected
31
+ :digram, # The digram involved in a collision
32
+ :productions # The productions where the digram occurs
33
+ )
34
34
 
35
35
 
36
36
  # Assuming that a new input token was added to the start production,
@@ -49,13 +49,13 @@ CollisionDiagnosis = Struct.new(
49
49
  loop do
50
50
  unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
51
  restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
-
53
- useless_prod = detect_useless_production
54
- restore_utility(useless_prod) if useless_prod
52
+
53
+ prod_index = detect_useless_production
54
+ restore_utility(prod_index) unless prod_index.nil?
55
55
 
56
56
  unicity_diagnosis = detect_collision
57
- useless_prod = detect_useless_production
58
- break unless unicity_diagnosis.collision_found || useless_prod
57
+ prod_index = detect_useless_production
58
+ break unless unicity_diagnosis.collision_found || !prod_index.nil?
59
59
  end
60
60
  end
61
61
 
@@ -72,7 +72,7 @@ CollisionDiagnosis = Struct.new(
72
72
  its_key = a_digr.key
73
73
  if found_so_far.include? its_key
74
74
  orig_digr = found_so_far[its_key]
75
- # Disregard sequence like a a a
75
+ # Disregard sequence like a a a
76
76
  if ((orig_digr.production == a_prod) && a_digr.repeating? &&
77
77
  (orig_digr == a_digr))
78
78
  next
@@ -112,20 +112,23 @@ CollisionDiagnosis = Struct.new(
112
112
 
113
113
  # Return a production that is used less than twice in the grammar.
114
114
  def detect_useless_production()
115
- useless = productions.find { |prod| prod.refcount < 2 }
116
- return (useless == productions[0]) ? nil : useless
115
+ useless = productions.index { |prod| prod.refcount < 2 }
116
+ unless useless.nil?
117
+ useless = nil if useless == 0
118
+ end
119
+ return useless
117
120
  end
118
121
 
119
122
  # Given the passed production P is referenced only once.
120
123
  # Then replace P by its RHS where it is referenced.
121
124
  # And delete P
122
- def restore_utility(useless_prod)
123
- # Retrieve index of useless_prod
124
- index = productions.index(useless_prod)
125
+ def restore_utility(prod_index)
126
+ # Retrieve useless prod from its index
127
+ useless_prod = productions[prod_index]
125
128
 
126
129
  # Retrieve production referencing useless one
127
130
  referencing = nil
128
- productions.each do |a_prod|
131
+ productions.reverse.each do |a_prod|
129
132
  # Next line assumes non-recursive productions
130
133
  next if a_prod == useless_prod
131
134
 
@@ -136,7 +139,7 @@ CollisionDiagnosis = Struct.new(
136
139
  end
137
140
 
138
141
  referencing.derive_step(useless_prod)
139
- remove_production(index)
142
+ remove_production(prod_index)
140
143
  end
141
144
 
142
145
  # Create a new production that will have the symbols from digram
@@ -145,7 +148,7 @@ CollisionDiagnosis = Struct.new(
145
148
  new_prod = Production.new
146
149
  aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
147
150
  add_production(new_prod)
148
-
151
+
149
152
  return new_prod
150
153
  end
151
154
  end # class
@@ -17,6 +17,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
17
17
  @symbols = orig.symbols.map do |sym|
18
18
  sym.is_a?(Symbol) ? sym : sym.dup
19
19
  end
20
+ invalidate_refs
20
21
  end
21
22
 
22
23
  public
@@ -26,6 +27,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
26
27
  refs = references
27
28
  refs.each(&:unbind)
28
29
  @symbols = []
30
+ invalidate_refs
29
31
  end
30
32
 
31
33
  # Tell whether the sequence is empty.
@@ -44,6 +46,10 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
44
46
  # @param aSymbol [Object] The symbol to append.
45
47
  def <<(aSymbol)
46
48
  symbols << aSymbol
49
+ if aSymbol.is_a?(ProductionRef)
50
+ @memo_references ||= []
51
+ @memo_references << aSymbol
52
+ end
47
53
  end
48
54
 
49
55
  # Retrieve the element from the sequence at given position.
@@ -58,13 +64,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
58
64
  # @return true when an item from self equals the corresponding
59
65
  # item from 'other'
60
66
  def ==(other)
61
- return true if self.object_id == other.object_id
67
+ return true if object_id == other.object_id
62
68
 
63
69
  case other
64
- when SymbolSequence
65
- same = self.symbols == other.symbols
66
- when Array
67
- same = self.symbols == other
70
+ when SymbolSequence
71
+ same = symbols == other.symbols
72
+ when Array
73
+ same = symbols == other
68
74
  else
69
75
  same = false
70
76
  end
@@ -76,7 +82,18 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
76
82
  # Select the references to production appearing in the rhs.
77
83
  # @return [Array of ProductionRef]
78
84
  def references()
79
- return symbols.select { |symb| symb.is_a?(ProductionRef) }
85
+ @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
86
+ return @memo_references
87
+ end
88
+
89
+
90
+ # Select the references of the given production appearing in the rhs.
91
+ # @param aProduction [Production]
92
+ # @return [Array of ProductionRef]
93
+ def references_of(aProduction)
94
+ return [] if references.empty?
95
+ result = references.select { |a_ref| a_ref == aProduction }
96
+ return result
80
97
  end
81
98
 
82
99
 
@@ -96,11 +113,12 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
96
113
 
97
114
  # Insert at position the elements from another sequence.
98
115
  # @param position [Fixnum] A zero-based index of the symbols to replace.
99
- # @param another [Production] A production with a two-elements rhs
116
+ # @param another [SymbolSequence] A production with a two-elements rhs
100
117
  # (a single digram).
101
118
  def insert_at(position, another)
102
119
  klone = another.dup
103
120
  symbols.insert(position, *klone.symbols)
121
+ invalidate_refs
104
122
  end
105
123
 
106
124
  # Given that the production P passed as argument has exactly 2 symbols
@@ -114,16 +132,23 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
114
132
  if symbols[index].is_a?(ProductionRef)
115
133
  symbols[index].bind_to(aProduction)
116
134
  else
117
- symbols[index] = ProductionRef.new(aProduction)
135
+ new_ref = ProductionRef.new(aProduction)
136
+ symbols[index] = new_ref
137
+ @memo_references ||= []
138
+ @memo_references << new_ref
118
139
  end
119
140
  index1 = index + 1
120
- symbols[index1].unbind if symbols[index1].is_a?(ProductionRef)
141
+ if symbols[index1].is_a?(ProductionRef)
142
+ symbols[index1].unbind
143
+ invalidate_refs
144
+ end
121
145
  delete_at(index1)
122
146
  end
123
147
 
124
148
  # Remove the element at given position
125
149
  # @param position [Fixnum] a zero-based index.
126
150
  def delete_at(position)
151
+ invalidate_refs if symbols[position].is_a?(ProductionRef)
127
152
  symbols.delete_at(position)
128
153
  end
129
154
 
@@ -144,7 +169,14 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
144
169
 
145
170
  aVisitor.end_visit_rhs(self)
146
171
  end
172
+
173
+ private
174
+
175
+ def invalidate_refs()
176
+ @memo_references = nil
177
+ @lookup_references = nil
178
+ end
147
179
 
148
180
  end # class
149
181
 
150
- end # module
182
+ end # module
@@ -77,18 +77,19 @@ describe Production do
77
77
  # Case 2: production with one reference
78
78
  subject.append_symbol(p_a)
79
79
  expect(subject.references).to eq([p_a])
80
- expect(subject.references_of(p_a)).to eq([p_a])
80
+ expect(subject.references_of(p_a).map(&:production)).to eq([p_a])
81
+
81
82
 
82
83
  # Case 3: production with repeated references
83
84
  subject.append_symbol(p_a) # second time
84
85
  expect(subject.references).to eq([p_a, p_a])
85
- expect(subject.references_of(p_a)).to eq([p_a, p_a])
86
+ expect(subject.references_of(p_a).map(&:production)).to eq([p_a, p_a])
86
87
 
87
88
 
88
89
  # Case 4: production with multiple distinct references
89
90
  subject.append_symbol(p_bc)
90
91
  expect(subject.references).to eq([p_a, p_a, p_bc])
91
- expect(subject.references_of(p_bc)).to eq([p_bc])
92
+ expect(subject.references_of(p_bc).map(&:production)).to eq([p_bc])
92
93
  end
93
94
 
94
95
  it 'should know the position(s) of a given digram' do
@@ -9,7 +9,7 @@ module Sequitur # Re-open the module to get rid of qualified names
9
9
 
10
10
  describe SymbolSequence do
11
11
 
12
- let(:instance) { SymbolSequence.new }
12
+ let(:instance) { SymbolSequence.new }
13
13
 
14
14
  context 'Creation and initialization:' do
15
15
 
@@ -24,34 +24,35 @@ describe SymbolSequence do
24
24
  end # context
25
25
 
26
26
  context 'Provided services:' do
27
+ let(:a_prod) { Production.new }
28
+
27
29
  subject do
28
30
  an_instance = SymbolSequence.new
29
31
  [:a, :b, :c].each { |a_sym| an_instance << a_sym }
30
32
  an_instance
31
33
  end
32
-
34
+
33
35
  it 'should deep-copy clone itself' do
34
- a_prod = Production.new
35
36
  ref = ProductionRef.new(a_prod)
36
-
37
+
37
38
  a, c = 'a', 'c'
38
39
  [a, ref, c].each { |ch| instance << ch }
39
40
  clone_a = instance.clone
40
-
41
+
41
42
  # Check that cloning works
42
43
  expect(clone_a).to eq(instance)
43
-
44
+
44
45
  # Reference objects are distinct but points to same production
45
46
  expect(clone_a.symbols[1].object_id).not_to eq(instance.symbols[1])
46
-
47
+
47
48
  # Modifying the clone...
48
49
  clone_a.symbols[1] = 'diff'
49
50
  expect(clone_a).not_to eq(instance)
50
-
51
+
51
52
  # ... should leave original unchanged
52
53
  expect(instance.symbols[1]).to eq(ref)
53
54
  end
54
-
55
+
55
56
 
56
57
  it 'should tell that it is equal to itself' do
57
58
  # Case: Non-empty sequence
@@ -65,25 +66,53 @@ describe SymbolSequence do
65
66
  expect(instance).to eq(instance)
66
67
 
67
68
  expect(subject).not_to eq(instance)
68
- [:a, :b, :c].each { |a_sym| instance << a_sym }
69
+ [:a, :b, :c].each { |a_sym| instance << a_sym }
69
70
  expect(subject).to eq(instance)
70
-
71
+
71
72
  # Check that element order is relevant
72
73
  instance.symbols.rotate!
73
- expect(subject).not_to eq(instance)
74
+ expect(subject).not_to eq(instance)
74
75
  end
75
-
76
+
76
77
  it 'should know whether it is equal to an array' do
77
78
  expect(subject).to eq([:a, :b, :c])
78
-
79
+
79
80
  # Check that element order is relevant
80
- expect(subject).not_to eq([:c, :b, :a])
81
+ expect(subject).not_to eq([:c, :b, :a])
81
82
  end
82
-
83
+
83
84
  it 'should know that is not equal to something else' do
84
85
  expect(subject).not_to eq(:abc)
85
86
  end
86
87
 
88
+
89
+ it 'should know its references' do
90
+ ref = ProductionRef.new(a_prod)
91
+ 2.times { subject << ref }
92
+
93
+ refs = subject.references
94
+ expect(refs.size).to eq(2)
95
+ expect(refs).to eq([ref, ref])
96
+
97
+ refs = subject.references
98
+ expect(refs.size).to eq(2)
99
+ expect(refs).to eq([ref, ref])
100
+ specific_refs = subject.references_of(a_prod)
101
+ expect(specific_refs).to eq(refs)
102
+
103
+
104
+ another = Production.new
105
+ another_ref = ProductionRef.new(another)
106
+ subject << another_ref
107
+ refs = subject.references
108
+ expect(refs.size).to eq(3)
109
+ expect(refs).to eq([ref, ref, another])
110
+ specific_refs = subject.references_of(a_prod)
111
+ expect(specific_refs).to eq([ref, ref])
112
+ specific_refs = subject.references_of(another)
113
+ expect(specific_refs).to eq([another])
114
+ end
115
+
87
116
  end # context
88
117
 
89
118
 
@@ -92,4 +121,4 @@ end # describe
92
121
 
93
122
  end # module
94
123
 
95
- # End of file
124
+ # End of file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequitur
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.09
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-03 00:00:00.000000000 Z
11
+ date: 2014-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake