sequitur 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,182 +1,182 @@
1
- module Sequitur # Module for classes implementing the Sequitur algorithm
2
- # Represents a sequence (concatenation) of grammar symbols
3
- # as they appear in rhs of productions
4
- class SymbolSequence
5
- # The sequence of symbols itself
6
- attr_reader(:symbols)
7
-
8
- # Create an empty sequence
9
- def initialize()
10
- @symbols = []
11
- end
12
-
13
- # Copy constructor invoked by dup or clone methods.
14
- # @param orig [SymbolSequence]
15
- def initialize_copy(orig)
16
- # Deep copy to avoid the aliasing of production reference
17
- @symbols = orig.symbols.map do |sym|
18
- sym.is_a?(Symbol) ? sym : sym.dup
19
- end
20
- invalidate_refs
21
- end
22
-
23
- public
24
-
25
- # Clear the symbol sequence.
26
- def clear()
27
- refs = references
28
- refs.each(&:unbind)
29
- @symbols = []
30
- invalidate_refs
31
- end
32
-
33
- # Tell whether the sequence is empty.
34
- # @return [true / false] true only if the sequence has no symbol in it.
35
- def empty?()
36
- return symbols.empty?
37
- end
38
-
39
- # Count the number of elements in the sequence.
40
- # @return [Fixnum] the number of elements
41
- def size()
42
- return symbols.size
43
- end
44
-
45
- # Append a grammar symbol at the end of the sequence.
46
- # @param aSymbol [Object] The symbol to append.
47
- def <<(aSymbol)
48
- symbols << aSymbol
49
- if aSymbol.is_a?(ProductionRef)
50
- @memo_references ||= []
51
- @memo_references << aSymbol
52
- end
53
- end
54
-
55
- # Retrieve the element from the sequence at given position.
56
- # @param anIndex [Fixnum] A zero-based index of the element to access.
57
- def [](anIndex)
58
- return symbols[anIndex]
59
- end
60
-
61
- # Equality testing.
62
- # @param other [SymbolSequence or Array] the other other sequence
63
- # to compare to.
64
- # @return true when an item from self equals the corresponding
65
- # item from 'other'
66
- def ==(other)
67
- return true if object_id == other.object_id
68
-
69
- case other
70
- when SymbolSequence
71
- same = symbols == other.symbols
72
- when Array
73
- same = symbols == other
74
- else
75
- same = false
76
- end
77
-
78
- return same
79
- end
80
-
81
-
82
- # Select the references to production appearing in the rhs.
83
- # @return [Array of ProductionRef]
84
- def references()
85
- @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
86
- return @memo_references
87
- end
88
-
89
-
90
- # Select the references of the given production appearing in the rhs.
91
- # @param aProduction [Production]
92
- # @return [Array of ProductionRef]
93
- def references_of(aProduction)
94
- return [] if references.empty?
95
- result = references.select { |a_ref| a_ref == aProduction }
96
- return result
97
- end
98
-
99
-
100
- # Emit a text representation of the symbol sequence.
101
- # Text is of the form: space-separated sequence of symbols.
102
- # @return [String]
103
- def to_string()
104
- rhs_text = symbols.map do |elem|
105
- case elem
106
- when String then "'#{elem}'"
107
- else elem.to_s
108
- end
109
- end
110
-
111
- return rhs_text.join(' ')
112
- end
113
-
114
- # Insert at position the elements from another sequence.
115
- # @param position [Fixnum] A zero-based index of the symbols to replace.
116
- # @param another [SymbolSequence] A production with a two-elements rhs
117
- # (a single digram).
118
- def insert_at(position, another)
119
- klone = another.dup
120
- symbols.insert(position, *klone.symbols)
121
- invalidate_refs
122
- end
123
-
124
- # Given that the production P passed as argument has exactly 2 symbols
125
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
126
- # s1 s2 by a reference to P.
127
- # @param index [Fixnum] the position of a two symbol sequence to be replaced
128
- # by the production
129
- # @param aProduction [Production or ProductionRef] a production that
130
- # consists exactly of one digram (= 2 symbols).
131
- def reduce_step(index, aProduction)
132
- if symbols[index].is_a?(ProductionRef)
133
- symbols[index].bind_to(aProduction)
134
- else
135
- new_ref = ProductionRef.new(aProduction)
136
- symbols[index] = new_ref
137
- @memo_references ||= []
138
- @memo_references << new_ref
139
- end
140
- index1 = index + 1
141
- if symbols[index1].is_a?(ProductionRef)
142
- symbols[index1].unbind
143
- invalidate_refs
144
- end
145
- delete_at(index1)
146
- end
147
-
148
- # Remove the element at given position
149
- # @param position [Fixnum] a zero-based index.
150
- def delete_at(position)
151
- invalidate_refs if symbols[position].is_a?(ProductionRef)
152
- symbols.delete_at(position)
153
- end
154
-
155
-
156
- # Part of the 'visitee' role in Visitor design pattern.
157
- # @param aVisitor[GrammarVisitor]
158
- def accept(aVisitor)
159
- aVisitor.start_visit_rhs(self)
160
-
161
- # Let's proceed with the visit of productions
162
- symbols.each do |a_symb|
163
- if a_symb.is_a?(ProductionRef)
164
- a_symb.accept(aVisitor)
165
- else
166
- aVisitor.visit_terminal(a_symb)
167
- end
168
- end
169
-
170
- aVisitor.end_visit_rhs(self)
171
- end
172
-
173
- private
174
-
175
- def invalidate_refs()
176
- @memo_references = nil
177
- @lookup_references = nil
178
- end
179
-
180
- end # class
181
-
182
- end # module
1
+ module Sequitur # Module for classes implementing the Sequitur algorithm
2
+ # Represents a sequence (concatenation) of grammar symbols
3
+ # as they appear in rhs of productions
4
+ class SymbolSequence
5
+ # The sequence of symbols itself
6
+ attr_reader(:symbols)
7
+
8
+ # Create an empty sequence
9
+ def initialize()
10
+ @symbols = []
11
+ end
12
+
13
+ # Copy constructor invoked by dup or clone methods.
14
+ # @param orig [SymbolSequence]
15
+ def initialize_copy(orig)
16
+ # Deep copy to avoid the aliasing of production reference
17
+ @symbols = orig.symbols.map do |sym|
18
+ sym.is_a?(ProductionRef) ? sym.dup : sym
19
+ end
20
+ invalidate_refs
21
+ end
22
+
23
+ public
24
+
25
+ # Clear the symbol sequence.
26
+ def clear()
27
+ refs = references
28
+ refs.each(&:unbind)
29
+ @symbols = []
30
+ invalidate_refs
31
+ end
32
+
33
+ # Tell whether the sequence is empty.
34
+ # @return [true / false] true only if the sequence has no symbol in it.
35
+ def empty?()
36
+ return symbols.empty?
37
+ end
38
+
39
+ # Count the number of elements in the sequence.
40
+ # @return [Fixnum] the number of elements
41
+ def size()
42
+ return symbols.size
43
+ end
44
+
45
+ # Append a grammar symbol at the end of the sequence.
46
+ # @param aSymbol [Object] The symbol to append.
47
+ def <<(aSymbol)
48
+ symbols << aSymbol
49
+ if aSymbol.is_a?(ProductionRef)
50
+ @memo_references ||= []
51
+ @memo_references << aSymbol
52
+ end
53
+ end
54
+
55
+ # Retrieve the element from the sequence at given position.
56
+ # @param anIndex [Fixnum] A zero-based index of the element to access.
57
+ def [](anIndex)
58
+ return symbols[anIndex]
59
+ end
60
+
61
+ # Equality testing.
62
+ # @param other [SymbolSequence or Array] the other other sequence
63
+ # to compare to.
64
+ # @return true when an item from self equals the corresponding
65
+ # item from 'other'
66
+ def ==(other)
67
+ return true if object_id == other.object_id
68
+
69
+ case other
70
+ when SymbolSequence
71
+ same = symbols == other.symbols
72
+ when Array
73
+ same = symbols == other
74
+ else
75
+ same = false
76
+ end
77
+
78
+ return same
79
+ end
80
+
81
+
82
+ # Select the references to production appearing in the rhs.
83
+ # @return [Array of ProductionRef]
84
+ def references()
85
+ @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
86
+ return @memo_references
87
+ end
88
+
89
+
90
+ # Select the references of the given production appearing in the rhs.
91
+ # @param aProduction [Production]
92
+ # @return [Array of ProductionRef]
93
+ def references_of(aProduction)
94
+ return [] if references.empty?
95
+ result = references.select { |a_ref| a_ref == aProduction }
96
+ return result
97
+ end
98
+
99
+
100
+ # Emit a text representation of the symbol sequence.
101
+ # Text is of the form: space-separated sequence of symbols.
102
+ # @return [String]
103
+ def to_string()
104
+ rhs_text = symbols.map do |elem|
105
+ case elem
106
+ when String then "'#{elem}'"
107
+ else elem.to_s
108
+ end
109
+ end
110
+
111
+ return rhs_text.join(' ')
112
+ end
113
+
114
+ # Insert at position the elements from another sequence.
115
+ # @param position [Fixnum] A zero-based index of the symbols to replace.
116
+ # @param another [SymbolSequence] A production with a two-elements rhs
117
+ # (a single digram).
118
+ def insert_at(position, another)
119
+ klone = another.dup
120
+ symbols.insert(position, *klone.symbols)
121
+ invalidate_refs
122
+ end
123
+
124
+ # Given that the production P passed as argument has exactly 2 symbols
125
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
126
+ # s1 s2 by a reference to P.
127
+ # @param index [Fixnum] the position of a two symbol sequence to be replaced
128
+ # by the production
129
+ # @param aProduction [Production or ProductionRef] a production that
130
+ # consists exactly of one digram (= 2 symbols).
131
+ def reduce_step(index, aProduction)
132
+ if symbols[index].is_a?(ProductionRef)
133
+ symbols[index].bind_to(aProduction)
134
+ else
135
+ new_ref = ProductionRef.new(aProduction)
136
+ symbols[index] = new_ref
137
+ @memo_references ||= []
138
+ @memo_references << new_ref
139
+ end
140
+ index1 = index + 1
141
+ if symbols[index1].is_a?(ProductionRef)
142
+ symbols[index1].unbind
143
+ invalidate_refs
144
+ end
145
+ delete_at(index1)
146
+ end
147
+
148
+ # Remove the element at given position
149
+ # @param position [Fixnum] a zero-based index.
150
+ def delete_at(position)
151
+ invalidate_refs if symbols[position].is_a?(ProductionRef)
152
+ symbols.delete_at(position)
153
+ end
154
+
155
+
156
+ # Part of the 'visitee' role in Visitor design pattern.
157
+ # @param aVisitor[GrammarVisitor]
158
+ def accept(aVisitor)
159
+ aVisitor.start_visit_rhs(self)
160
+
161
+ # Let's proceed with the visit of productions
162
+ symbols.each do |a_symb|
163
+ if a_symb.is_a?(ProductionRef)
164
+ a_symb.accept(aVisitor)
165
+ else
166
+ aVisitor.visit_terminal(a_symb)
167
+ end
168
+ end
169
+
170
+ aVisitor.end_visit_rhs(self)
171
+ end
172
+
173
+ private
174
+
175
+ def invalidate_refs()
176
+ @memo_references = nil
177
+ @lookup_references = nil
178
+ end
179
+
180
+ end # class
181
+
182
+ end # module
@@ -74,7 +74,7 @@ describe SequiturGrammar do
74
74
  expect(p_a.rhs).to eq([:a, :b, :c])
75
75
  expect(instance.start.rhs).to eq([p_a, p_a])
76
76
  end
77
-
77
+
78
78
  it 'should cope with a pattern that caused an exception' do
79
79
  input = 'aaac' # This sequence raised an exception
80
80
 
@@ -102,6 +102,78 @@ describe SequiturGrammar do
102
102
  expect(p3.rhs).to eq(['b', p2, 'e'])
103
103
  end
104
104
 
105
+ it 'should work with strings instead of single char input tokens' do
106
+ # Raw input is sequence of chars
107
+ raw_input = 'bbebeebebebbebee'
108
+
109
+ # Convert them into multichar strings
110
+ input = raw_input.chars.map do |ch|
111
+ 'letter_' + ch
112
+ end
113
+
114
+ # Creation
115
+ instance = SequiturGrammar.new(input.to_enum)
116
+
117
+ # Expectations:
118
+ # S: P3 P2 P3
119
+ # P1: b e
120
+ # P2: P1 P1
121
+ # P3: b P2 e
122
+ expect(instance.productions.size).to eq(4)
123
+ (p1, p2, p3) = instance.productions[1..3]
124
+ expect(instance.start.rhs).to eq([p3, p2, p3])
125
+ expect(p1.rhs).to eq(%w(letter_b letter_e))
126
+ expect(p2.rhs).to eq([p1, p1])
127
+ expect(p3.rhs).to eq(['letter_b',p2, 'letter_e'])
128
+ end
129
+
130
+ it 'should work with Symbol instead of single char input tokens' do
131
+ # Raw input is sequence of single characters
132
+ raw_input = 'bbebeebebebbebee'
133
+
134
+ # Convert them into symbols
135
+ input = raw_input.chars.map(&:to_sym)
136
+
137
+ # Creation
138
+ instance = SequiturGrammar.new(input.to_enum)
139
+
140
+ # Expectations:
141
+ # S: P3 P2 P3
142
+ # P1: b e
143
+ # P2: P1 P1
144
+ # P3: b P2 e
145
+ expect(instance.productions.size).to eq(4)
146
+ (p1, p2, p3) = instance.productions[1..3]
147
+ expect(instance.start.rhs).to eq([p3, p2, p3])
148
+ expect(p1.rhs).to eq([:b, :e])
149
+ expect(p2.rhs).to eq([p1, p1])
150
+ expect(p3.rhs).to eq([:b, p2, :e])
151
+ end
152
+
153
+
154
+ it 'should work with integer values as input tokens' do
155
+ # Raw input is sequence of hex digits
156
+ raw_input = 'bbebeebebebbebee'
157
+
158
+ # Convert them into Fixnums
159
+ input = raw_input.chars.map { |ch| ch.to_i(16) }
160
+
161
+ # Creation
162
+ instance = SequiturGrammar.new(input.to_enum)
163
+
164
+ # Expectations:
165
+ # S: P3 P2 P3
166
+ # P1: b e
167
+ # P2: P1 P1
168
+ # P3: b P2 e
169
+ expect(instance.productions.size).to eq(4)
170
+ (p1, p2, p3) = instance.productions[1..3]
171
+ expect(instance.start.rhs).to eq([p3, p2, p3])
172
+ expect(p1.rhs).to eq([0xb, 0xe])
173
+ expect(p2.rhs).to eq([p1, p1])
174
+ expect(p3.rhs).to eq([0xb, p2, 0xe])
175
+ end
176
+
105
177
  it 'should cope with the example from sequitur.info website' do
106
178
  input = 'abcabdabcabd'
107
179
  instance = SequiturGrammar.new(input.chars)
@@ -153,7 +225,7 @@ SNIPPET
153
225
  # 2 → h o t hot
154
226
  # 3 → 10 1 ,↵pease_porridge_
155
227
  # 4 → c 11 cold
156
- # 5 → 12 _ t h 8 t 10 n 12 9 d a y s _ 11 . ↵
228
+ # 5 → 12 _ t h 8 t 10 n 12 9 d a y s _ 11 . ↵
157
229
  # in_the_pot,↵nine_days_old.↵
158
230
  # 6 → s o m 9 l i k 9 i t _ some_like_it_
159
231
  # 7 → 10 6 ,↵some_like_it_
@@ -196,7 +268,7 @@ SNIPPET
196
268
  ].flatten
197
269
  expect(p12.rhs).to eq(p12_expectation) # Rule 5 above
198
270
  end
199
-
271
+
200
272
  it 'should work with a sequence of Ruby Symbols' do
201
273
  input = 'abcabdabcabd'.chars.map(&:to_sym)
202
274
  instance = SequiturGrammar.new(input.to_enum)