sequitur 0.1.09 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2JmMGMzOGFjNjg3YmQzZDNlMWNjODI2ZmIzNjA4MTM4ZDNkMzUzZA==
4
+ ZGMxY2RmOWZlOWI3MzljMjNmZmEyZDFjZDUzZDg2YzQzNzM2MmRhZQ==
5
5
  data.tar.gz: !binary |-
6
- ZTAyMGI0MDYzMDBkMDJlNWFhNjE2Yjc5NTUyZjhiNDEzZjEzYzg1ZQ==
6
+ YzNhZGJmNWY4YTg3MWY3ODI5YzE4ZTg5MTUyNTlkMGFmMzhlNjdiYw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- OGQ1YjVhY2M2MmM2YWNiODYyYzQxMmRjNmFmYWY2NjYyMGYyMWM5YjVjNzFh
10
- OTVmMDhhZTE2YTRhMWNmODVhYTJjMDUyMDNlYjk0NmJjMjRkN2Q0ODcyYTRj
11
- YzAyZDA2ZDJiY2E1YWE5ZTYyYWExNmJkMzI3ODYyZDgwZGUyMzI=
9
+ NWY1NDhiZjUzNjJjNTdlMmE2ZjI0ODc3MDNlMjQ2MGM3ZjEzMWQxNzUxZDgz
10
+ YmE4ZmFlNzI4NDc2NGE0YjQzOTJhNmViYmQ4MThjZTg0YmEyN2JiOTlkMzk4
11
+ MTc4MWUwMWE3ZWY0NTQ0Y2ZlYjU3MmVkODNiZjUwMzgxZWNlMDQ=
12
12
  data.tar.gz: !binary |-
13
- N2EyZmU3NTNiMTc1YmZkZjk4ODNmMzYyY2ExYzI0YTNmNTFlYWY4OWUxZTQ4
14
- NjJmMWUzYzM3Yjc4ZDU5YzIyNTEwZTI5OWI1ZDlhY2RiYzQ3Yzc5OTgxZDZk
15
- MzY1MzEwMDBjYTliYmQxYTEwYzQ4MjQzYzFjYjVhNDgxMGE4YTA=
13
+ MWI3N2U3ZGU5N2Y4ZTEzNDdmYjQyYjExOWIzYWRjMWE0ZDUxYmM2MjI4YzU0
14
+ ODZiZTQ3ZDlkMWMxMmY2ZWQwM2JjNmZiNmU1ZDRhMTFhMjY4ZjI5MjJmNWFl
15
+ MDYxZmU0ZWUwOTdhNDBkODE4NDJjNzQ0MGRmMWQwMDIxMjkwMTI=
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ### 0.1.10 / 2014-10-05
2
+ * [CHANGE] Code refactoring for performance. Impacted classes: `SequiturGrammar`, `SymbolSequence` and `Production`.
3
+
1
4
  ### 0.1.09 / 2014-10-03
2
5
  * [NEW] Class `SymbolSequence`. Part of code refactoring that reduces code complexity reported by CodeClimate.
3
6
  * [CHANGE] Class `Production` refactored to use a SymbolSequence instance as its rhs.
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Sequitur # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.1.09'
6
+ Version = '0.1.10'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = 'Ruby implementation of the Sequitur algorithm'
@@ -27,7 +27,7 @@ class Digram
27
27
  # the sequence symbol1 symbol2 appears.
28
28
  def initialize(symbol1, symbol2, aProduction)
29
29
  @symbols = [symbol1, symbol2]
30
- @key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
30
+ @key = symbol1.hash.to_s(16) + ':' + symbol2.hash.to_s(16)
31
31
  @production = aProduction
32
32
  end
33
33
 
@@ -48,14 +48,14 @@ class GrammarVisitor
48
48
  def start_visit_production(aProduction)
49
49
  broadcast(:before_production, aProduction)
50
50
  end
51
-
51
+
52
52
  # Visit event. The visitor is about to visit the given rhs of production.
53
- # @param rhs [SymbolSequence] the rhs of a production to visit.
53
+ # @param rhs [SymbolSequence] the rhs of a production to visit.
54
54
  def start_visit_rhs(rhs)
55
- broadcast(:before_rhs, rhs)
55
+ broadcast(:before_rhs, rhs)
56
56
  end
57
57
 
58
- # Visit event. The visitor is visiting the
58
+ # Visit event. The visitor is visiting the
59
59
  # given reference production (= non-terminal symbol).
60
60
  # @param aProdRef [ProductionRef] the production reference to visit.
61
61
  def visit_prod_ref(aProdRef)
@@ -64,18 +64,18 @@ class GrammarVisitor
64
64
  broadcast(:after_non_terminal, production)
65
65
  end
66
66
 
67
- # Visit event. The visitor is visiting the
67
+ # Visit event. The visitor is visiting the
68
68
  # given terminal symbol.
69
69
  # @param aTerminal [Object] the terminal to visit.
70
70
  def visit_terminal(aTerminal)
71
71
  broadcast(:before_terminal, aTerminal)
72
72
  broadcast(:after_terminal, aTerminal)
73
73
  end
74
-
74
+
75
75
  # Visit event. The visitor has completed its visit of the given rhs.
76
76
  # @param rhs [SymbolSequence] the rhs of a production to visit.
77
77
  def end_visit_rhs(rhs)
78
- broadcast(:after_rhs, rhs)
78
+ broadcast(:after_rhs, rhs)
79
79
  end
80
80
 
81
81
  # Visit event. The visitor has completed its visit of the given production.
@@ -91,6 +91,7 @@ class GrammarVisitor
91
91
  end
92
92
 
93
93
  private
94
+
94
95
  # Send a notification to all subscribers.
95
96
  # @param msg [Symbol] event to notify
96
97
  # @param args [Array] arguments of the notification.
@@ -78,9 +78,9 @@ class Production
78
78
  # Look in the rhs all the references to a production passed a argument.
79
79
  # aProduction [aProduction or ProductionRef] The production to search for.
80
80
  # @return [Array] the array of ProductionRef to the passed production
81
- def references_of(aProduction)
82
- refs = references
83
- return refs.select { |a_ref| a_ref == aProduction }
81
+ def references_of(a_prod)
82
+ real_prod = a_prod.is_a?(ProductionRef) ? a_prod.production : a_prod
83
+ return rhs.references_of(real_prod)
84
84
  end
85
85
 
86
86
 
@@ -158,7 +158,7 @@ class Production
158
158
  # Clear the right-hand side.
159
159
  # Any referenced production has its reference counter decremented.
160
160
  def clear_rhs()
161
- rhs.clear()
161
+ rhs.clear
162
162
  end
163
163
 
164
164
  # Find all the positions where the digram occurs in the rhs
@@ -9,7 +9,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
9
9
  class SequiturGrammar < DynamicGrammar
10
10
 
11
11
  # Build the grammar from an enumerator of tokens.
12
- # @param anEnum [Enumerator] an enumerator that will iterate
12
+ # @param anEnum [Enumerator] an enumerator that will iterate
13
13
  # over the input tokens.
14
14
  def initialize(anEnum)
15
15
  super()
@@ -25,12 +25,12 @@ class SequiturGrammar < DynamicGrammar
25
25
 
26
26
  private
27
27
 
28
- # Struct used for internal purposes
29
- CollisionDiagnosis = Struct.new(
30
- :collision_found, # true if collision detected
31
- :digram, # The digram involved in a collision
32
- :productions # The productions where the digram occurs
33
- )
28
+ # Struct used for internal purposes
29
+ CollisionDiagnosis = Struct.new(
30
+ :collision_found, # true if collision detected
31
+ :digram, # The digram involved in a collision
32
+ :productions # The productions where the digram occurs
33
+ )
34
34
 
35
35
 
36
36
  # Assuming that a new input token was added to the start production,
@@ -49,13 +49,13 @@ CollisionDiagnosis = Struct.new(
49
49
  loop do
50
50
  unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
51
  restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
-
53
- useless_prod = detect_useless_production
54
- restore_utility(useless_prod) if useless_prod
52
+
53
+ prod_index = detect_useless_production
54
+ restore_utility(prod_index) unless prod_index.nil?
55
55
 
56
56
  unicity_diagnosis = detect_collision
57
- useless_prod = detect_useless_production
58
- break unless unicity_diagnosis.collision_found || useless_prod
57
+ prod_index = detect_useless_production
58
+ break unless unicity_diagnosis.collision_found || !prod_index.nil?
59
59
  end
60
60
  end
61
61
 
@@ -72,7 +72,7 @@ CollisionDiagnosis = Struct.new(
72
72
  its_key = a_digr.key
73
73
  if found_so_far.include? its_key
74
74
  orig_digr = found_so_far[its_key]
75
- # Disregard sequence like a a a
75
+ # Disregard sequence like a a a
76
76
  if ((orig_digr.production == a_prod) && a_digr.repeating? &&
77
77
  (orig_digr == a_digr))
78
78
  next
@@ -112,20 +112,23 @@ CollisionDiagnosis = Struct.new(
112
112
 
113
113
  # Return a production that is used less than twice in the grammar.
114
114
  def detect_useless_production()
115
- useless = productions.find { |prod| prod.refcount < 2 }
116
- return (useless == productions[0]) ? nil : useless
115
+ useless = productions.index { |prod| prod.refcount < 2 }
116
+ unless useless.nil?
117
+ useless = nil if useless == 0
118
+ end
119
+ return useless
117
120
  end
118
121
 
119
122
  # Given the passed production P is referenced only once.
120
123
  # Then replace P by its RHS where it is referenced.
121
124
  # And delete P
122
- def restore_utility(useless_prod)
123
- # Retrieve index of useless_prod
124
- index = productions.index(useless_prod)
125
+ def restore_utility(prod_index)
126
+ # Retrieve useless prod from its index
127
+ useless_prod = productions[prod_index]
125
128
 
126
129
  # Retrieve production referencing useless one
127
130
  referencing = nil
128
- productions.each do |a_prod|
131
+ productions.reverse.each do |a_prod|
129
132
  # Next line assumes non-recursive productions
130
133
  next if a_prod == useless_prod
131
134
 
@@ -136,7 +139,7 @@ CollisionDiagnosis = Struct.new(
136
139
  end
137
140
 
138
141
  referencing.derive_step(useless_prod)
139
- remove_production(index)
142
+ remove_production(prod_index)
140
143
  end
141
144
 
142
145
  # Create a new production that will have the symbols from digram
@@ -145,7 +148,7 @@ CollisionDiagnosis = Struct.new(
145
148
  new_prod = Production.new
146
149
  aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
147
150
  add_production(new_prod)
148
-
151
+
149
152
  return new_prod
150
153
  end
151
154
  end # class
@@ -17,6 +17,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
17
17
  @symbols = orig.symbols.map do |sym|
18
18
  sym.is_a?(Symbol) ? sym : sym.dup
19
19
  end
20
+ invalidate_refs
20
21
  end
21
22
 
22
23
  public
@@ -26,6 +27,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
26
27
  refs = references
27
28
  refs.each(&:unbind)
28
29
  @symbols = []
30
+ invalidate_refs
29
31
  end
30
32
 
31
33
  # Tell whether the sequence is empty.
@@ -44,6 +46,10 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
44
46
  # @param aSymbol [Object] The symbol to append.
45
47
  def <<(aSymbol)
46
48
  symbols << aSymbol
49
+ if aSymbol.is_a?(ProductionRef)
50
+ @memo_references ||= []
51
+ @memo_references << aSymbol
52
+ end
47
53
  end
48
54
 
49
55
  # Retrieve the element from the sequence at given position.
@@ -58,13 +64,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
58
64
  # @return true when an item from self equals the corresponding
59
65
  # item from 'other'
60
66
  def ==(other)
61
- return true if self.object_id == other.object_id
67
+ return true if object_id == other.object_id
62
68
 
63
69
  case other
64
- when SymbolSequence
65
- same = self.symbols == other.symbols
66
- when Array
67
- same = self.symbols == other
70
+ when SymbolSequence
71
+ same = symbols == other.symbols
72
+ when Array
73
+ same = symbols == other
68
74
  else
69
75
  same = false
70
76
  end
@@ -76,7 +82,18 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
76
82
  # Select the references to production appearing in the rhs.
77
83
  # @return [Array of ProductionRef]
78
84
  def references()
79
- return symbols.select { |symb| symb.is_a?(ProductionRef) }
85
+ @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
86
+ return @memo_references
87
+ end
88
+
89
+
90
+ # Select the references of the given production appearing in the rhs.
91
+ # @param aProduction [Production]
92
+ # @return [Array of ProductionRef]
93
+ def references_of(aProduction)
94
+ return [] if references.empty?
95
+ result = references.select { |a_ref| a_ref == aProduction }
96
+ return result
80
97
  end
81
98
 
82
99
 
@@ -96,11 +113,12 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
96
113
 
97
114
  # Insert at position the elements from another sequence.
98
115
  # @param position [Fixnum] A zero-based index of the symbols to replace.
99
- # @param another [Production] A production with a two-elements rhs
116
+ # @param another [SymbolSequence] A production with a two-elements rhs
100
117
  # (a single digram).
101
118
  def insert_at(position, another)
102
119
  klone = another.dup
103
120
  symbols.insert(position, *klone.symbols)
121
+ invalidate_refs
104
122
  end
105
123
 
106
124
  # Given that the production P passed as argument has exactly 2 symbols
@@ -114,16 +132,23 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
114
132
  if symbols[index].is_a?(ProductionRef)
115
133
  symbols[index].bind_to(aProduction)
116
134
  else
117
- symbols[index] = ProductionRef.new(aProduction)
135
+ new_ref = ProductionRef.new(aProduction)
136
+ symbols[index] = new_ref
137
+ @memo_references ||= []
138
+ @memo_references << new_ref
118
139
  end
119
140
  index1 = index + 1
120
- symbols[index1].unbind if symbols[index1].is_a?(ProductionRef)
141
+ if symbols[index1].is_a?(ProductionRef)
142
+ symbols[index1].unbind
143
+ invalidate_refs
144
+ end
121
145
  delete_at(index1)
122
146
  end
123
147
 
124
148
  # Remove the element at given position
125
149
  # @param position [Fixnum] a zero-based index.
126
150
  def delete_at(position)
151
+ invalidate_refs if symbols[position].is_a?(ProductionRef)
127
152
  symbols.delete_at(position)
128
153
  end
129
154
 
@@ -144,7 +169,14 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
144
169
 
145
170
  aVisitor.end_visit_rhs(self)
146
171
  end
172
+
173
+ private
174
+
175
+ def invalidate_refs()
176
+ @memo_references = nil
177
+ @lookup_references = nil
178
+ end
147
179
 
148
180
  end # class
149
181
 
150
- end # module
182
+ end # module
@@ -77,18 +77,19 @@ describe Production do
77
77
  # Case 2: production with one reference
78
78
  subject.append_symbol(p_a)
79
79
  expect(subject.references).to eq([p_a])
80
- expect(subject.references_of(p_a)).to eq([p_a])
80
+ expect(subject.references_of(p_a).map(&:production)).to eq([p_a])
81
+
81
82
 
82
83
  # Case 3: production with repeated references
83
84
  subject.append_symbol(p_a) # second time
84
85
  expect(subject.references).to eq([p_a, p_a])
85
- expect(subject.references_of(p_a)).to eq([p_a, p_a])
86
+ expect(subject.references_of(p_a).map(&:production)).to eq([p_a, p_a])
86
87
 
87
88
 
88
89
  # Case 4: production with multiple distinct references
89
90
  subject.append_symbol(p_bc)
90
91
  expect(subject.references).to eq([p_a, p_a, p_bc])
91
- expect(subject.references_of(p_bc)).to eq([p_bc])
92
+ expect(subject.references_of(p_bc).map(&:production)).to eq([p_bc])
92
93
  end
93
94
 
94
95
  it 'should know the position(s) of a given digram' do
@@ -9,7 +9,7 @@ module Sequitur # Re-open the module to get rid of qualified names
9
9
 
10
10
  describe SymbolSequence do
11
11
 
12
- let(:instance) { SymbolSequence.new }
12
+ let(:instance) { SymbolSequence.new }
13
13
 
14
14
  context 'Creation and initialization:' do
15
15
 
@@ -24,34 +24,35 @@ describe SymbolSequence do
24
24
  end # context
25
25
 
26
26
  context 'Provided services:' do
27
+ let(:a_prod) { Production.new }
28
+
27
29
  subject do
28
30
  an_instance = SymbolSequence.new
29
31
  [:a, :b, :c].each { |a_sym| an_instance << a_sym }
30
32
  an_instance
31
33
  end
32
-
34
+
33
35
  it 'should deep-copy clone itself' do
34
- a_prod = Production.new
35
36
  ref = ProductionRef.new(a_prod)
36
-
37
+
37
38
  a, c = 'a', 'c'
38
39
  [a, ref, c].each { |ch| instance << ch }
39
40
  clone_a = instance.clone
40
-
41
+
41
42
  # Check that cloning works
42
43
  expect(clone_a).to eq(instance)
43
-
44
+
44
45
  # Reference objects are distinct but points to same production
45
46
  expect(clone_a.symbols[1].object_id).not_to eq(instance.symbols[1])
46
-
47
+
47
48
  # Modifying the clone...
48
49
  clone_a.symbols[1] = 'diff'
49
50
  expect(clone_a).not_to eq(instance)
50
-
51
+
51
52
  # ... should leave original unchanged
52
53
  expect(instance.symbols[1]).to eq(ref)
53
54
  end
54
-
55
+
55
56
 
56
57
  it 'should tell that it is equal to itself' do
57
58
  # Case: Non-empty sequence
@@ -65,25 +66,53 @@ describe SymbolSequence do
65
66
  expect(instance).to eq(instance)
66
67
 
67
68
  expect(subject).not_to eq(instance)
68
- [:a, :b, :c].each { |a_sym| instance << a_sym }
69
+ [:a, :b, :c].each { |a_sym| instance << a_sym }
69
70
  expect(subject).to eq(instance)
70
-
71
+
71
72
  # Check that element order is relevant
72
73
  instance.symbols.rotate!
73
- expect(subject).not_to eq(instance)
74
+ expect(subject).not_to eq(instance)
74
75
  end
75
-
76
+
76
77
  it 'should know whether it is equal to an array' do
77
78
  expect(subject).to eq([:a, :b, :c])
78
-
79
+
79
80
  # Check that element order is relevant
80
- expect(subject).not_to eq([:c, :b, :a])
81
+ expect(subject).not_to eq([:c, :b, :a])
81
82
  end
82
-
83
+
83
84
  it 'should know that is not equal to something else' do
84
85
  expect(subject).not_to eq(:abc)
85
86
  end
86
87
 
88
+
89
+ it 'should know its references' do
90
+ ref = ProductionRef.new(a_prod)
91
+ 2.times { subject << ref }
92
+
93
+ refs = subject.references
94
+ expect(refs.size).to eq(2)
95
+ expect(refs).to eq([ref, ref])
96
+
97
+ refs = subject.references
98
+ expect(refs.size).to eq(2)
99
+ expect(refs).to eq([ref, ref])
100
+ specific_refs = subject.references_of(a_prod)
101
+ expect(specific_refs).to eq(refs)
102
+
103
+
104
+ another = Production.new
105
+ another_ref = ProductionRef.new(another)
106
+ subject << another_ref
107
+ refs = subject.references
108
+ expect(refs.size).to eq(3)
109
+ expect(refs).to eq([ref, ref, another])
110
+ specific_refs = subject.references_of(a_prod)
111
+ expect(specific_refs).to eq([ref, ref])
112
+ specific_refs = subject.references_of(another)
113
+ expect(specific_refs).to eq([another])
114
+ end
115
+
87
116
  end # context
88
117
 
89
118
 
@@ -92,4 +121,4 @@ end # describe
92
121
 
93
122
  end # module
94
123
 
95
- # End of file
124
+ # End of file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequitur
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.09
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-03 00:00:00.000000000 Z
11
+ date: 2014-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake