sequitur 0.1.10 → 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,182 +1,182 @@
1
- module Sequitur # Module for classes implementing the Sequitur algorithm
2
- # Represents a sequence (concatenation) of grammar symbols
3
- # as they appear in rhs of productions
4
- class SymbolSequence
5
- # The sequence of symbols itself
6
- attr_reader(:symbols)
7
-
8
- # Create an empty sequence
9
- def initialize()
10
- @symbols = []
11
- end
12
-
13
- # Copy constructor invoked by dup or clone methods.
14
- # @param orig [SymbolSequence]
15
- def initialize_copy(orig)
16
- # Deep copy to avoid the aliasing of production reference
17
- @symbols = orig.symbols.map do |sym|
18
- sym.is_a?(Symbol) ? sym : sym.dup
19
- end
20
- invalidate_refs
21
- end
22
-
23
- public
24
-
25
- # Clear the symbol sequence.
26
- def clear()
27
- refs = references
28
- refs.each(&:unbind)
29
- @symbols = []
30
- invalidate_refs
31
- end
32
-
33
- # Tell whether the sequence is empty.
34
- # @return [true / false] true only if the sequence has no symbol in it.
35
- def empty?()
36
- return symbols.empty?
37
- end
38
-
39
- # Count the number of elements in the sequence.
40
- # @return [Fixnum] the number of elements
41
- def size()
42
- return symbols.size
43
- end
44
-
45
- # Append a grammar symbol at the end of the sequence.
46
- # @param aSymbol [Object] The symbol to append.
47
- def <<(aSymbol)
48
- symbols << aSymbol
49
- if aSymbol.is_a?(ProductionRef)
50
- @memo_references ||= []
51
- @memo_references << aSymbol
52
- end
53
- end
54
-
55
- # Retrieve the element from the sequence at given position.
56
- # @param anIndex [Fixnum] A zero-based index of the element to access.
57
- def [](anIndex)
58
- return symbols[anIndex]
59
- end
60
-
61
- # Equality testing.
62
- # @param other [SymbolSequence or Array] the other other sequence
63
- # to compare to.
64
- # @return true when an item from self equals the corresponding
65
- # item from 'other'
66
- def ==(other)
67
- return true if object_id == other.object_id
68
-
69
- case other
70
- when SymbolSequence
71
- same = symbols == other.symbols
72
- when Array
73
- same = symbols == other
74
- else
75
- same = false
76
- end
77
-
78
- return same
79
- end
80
-
81
-
82
- # Select the references to production appearing in the rhs.
83
- # @return [Array of ProductionRef]
84
- def references()
85
- @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
86
- return @memo_references
87
- end
88
-
89
-
90
- # Select the references of the given production appearing in the rhs.
91
- # @param aProduction [Production]
92
- # @return [Array of ProductionRef]
93
- def references_of(aProduction)
94
- return [] if references.empty?
95
- result = references.select { |a_ref| a_ref == aProduction }
96
- return result
97
- end
98
-
99
-
100
- # Emit a text representation of the symbol sequence.
101
- # Text is of the form: space-separated sequence of symbols.
102
- # @return [String]
103
- def to_string()
104
- rhs_text = symbols.map do |elem|
105
- case elem
106
- when String then "'#{elem}'"
107
- else elem.to_s
108
- end
109
- end
110
-
111
- return rhs_text.join(' ')
112
- end
113
-
114
- # Insert at position the elements from another sequence.
115
- # @param position [Fixnum] A zero-based index of the symbols to replace.
116
- # @param another [SymbolSequence] A production with a two-elements rhs
117
- # (a single digram).
118
- def insert_at(position, another)
119
- klone = another.dup
120
- symbols.insert(position, *klone.symbols)
121
- invalidate_refs
122
- end
123
-
124
- # Given that the production P passed as argument has exactly 2 symbols
125
- # in its rhs s1 s2, substitute in the rhs of self all occurrences of
126
- # s1 s2 by a reference to P.
127
- # @param index [Fixnum] the position of a two symbol sequence to be replaced
128
- # by the production
129
- # @param aProduction [Production or ProductionRef] a production that
130
- # consists exactly of one digram (= 2 symbols).
131
- def reduce_step(index, aProduction)
132
- if symbols[index].is_a?(ProductionRef)
133
- symbols[index].bind_to(aProduction)
134
- else
135
- new_ref = ProductionRef.new(aProduction)
136
- symbols[index] = new_ref
137
- @memo_references ||= []
138
- @memo_references << new_ref
139
- end
140
- index1 = index + 1
141
- if symbols[index1].is_a?(ProductionRef)
142
- symbols[index1].unbind
143
- invalidate_refs
144
- end
145
- delete_at(index1)
146
- end
147
-
148
- # Remove the element at given position
149
- # @param position [Fixnum] a zero-based index.
150
- def delete_at(position)
151
- invalidate_refs if symbols[position].is_a?(ProductionRef)
152
- symbols.delete_at(position)
153
- end
154
-
155
-
156
- # Part of the 'visitee' role in Visitor design pattern.
157
- # @param aVisitor[GrammarVisitor]
158
- def accept(aVisitor)
159
- aVisitor.start_visit_rhs(self)
160
-
161
- # Let's proceed with the visit of productions
162
- symbols.each do |a_symb|
163
- if a_symb.is_a?(ProductionRef)
164
- a_symb.accept(aVisitor)
165
- else
166
- aVisitor.visit_terminal(a_symb)
167
- end
168
- end
169
-
170
- aVisitor.end_visit_rhs(self)
171
- end
172
-
173
- private
174
-
175
- def invalidate_refs()
176
- @memo_references = nil
177
- @lookup_references = nil
178
- end
179
-
180
- end # class
181
-
182
- end # module
1
+ module Sequitur # Module for classes implementing the Sequitur algorithm
2
+ # Represents a sequence (concatenation) of grammar symbols
3
+ # as they appear in rhs of productions
4
+ class SymbolSequence
5
+ # The sequence of symbols itself
6
+ attr_reader(:symbols)
7
+
8
+ # Create an empty sequence
9
+ def initialize()
10
+ @symbols = []
11
+ end
12
+
13
+ # Copy constructor invoked by dup or clone methods.
14
+ # @param orig [SymbolSequence]
15
+ def initialize_copy(orig)
16
+ # Deep copy to avoid the aliasing of production reference
17
+ @symbols = orig.symbols.map do |sym|
18
+ sym.is_a?(ProductionRef) ? sym.dup : sym
19
+ end
20
+ invalidate_refs
21
+ end
22
+
23
+ public
24
+
25
+ # Clear the symbol sequence.
26
+ def clear()
27
+ refs = references
28
+ refs.each(&:unbind)
29
+ @symbols = []
30
+ invalidate_refs
31
+ end
32
+
33
+ # Tell whether the sequence is empty.
34
+ # @return [true / false] true only if the sequence has no symbol in it.
35
+ def empty?()
36
+ return symbols.empty?
37
+ end
38
+
39
+ # Count the number of elements in the sequence.
40
+ # @return [Fixnum] the number of elements
41
+ def size()
42
+ return symbols.size
43
+ end
44
+
45
+ # Append a grammar symbol at the end of the sequence.
46
+ # @param aSymbol [Object] The symbol to append.
47
+ def <<(aSymbol)
48
+ symbols << aSymbol
49
+ if aSymbol.is_a?(ProductionRef)
50
+ @memo_references ||= []
51
+ @memo_references << aSymbol
52
+ end
53
+ end
54
+
55
+ # Retrieve the element from the sequence at given position.
56
+ # @param anIndex [Fixnum] A zero-based index of the element to access.
57
+ def [](anIndex)
58
+ return symbols[anIndex]
59
+ end
60
+
61
+ # Equality testing.
62
+ # @param other [SymbolSequence or Array] the other other sequence
63
+ # to compare to.
64
+ # @return true when an item from self equals the corresponding
65
+ # item from 'other'
66
+ def ==(other)
67
+ return true if object_id == other.object_id
68
+
69
+ case other
70
+ when SymbolSequence
71
+ same = symbols == other.symbols
72
+ when Array
73
+ same = symbols == other
74
+ else
75
+ same = false
76
+ end
77
+
78
+ return same
79
+ end
80
+
81
+
82
+ # Select the references to production appearing in the rhs.
83
+ # @return [Array of ProductionRef]
84
+ def references()
85
+ @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
86
+ return @memo_references
87
+ end
88
+
89
+
90
+ # Select the references of the given production appearing in the rhs.
91
+ # @param aProduction [Production]
92
+ # @return [Array of ProductionRef]
93
+ def references_of(aProduction)
94
+ return [] if references.empty?
95
+ result = references.select { |a_ref| a_ref == aProduction }
96
+ return result
97
+ end
98
+
99
+
100
+ # Emit a text representation of the symbol sequence.
101
+ # Text is of the form: space-separated sequence of symbols.
102
+ # @return [String]
103
+ def to_string()
104
+ rhs_text = symbols.map do |elem|
105
+ case elem
106
+ when String then "'#{elem}'"
107
+ else elem.to_s
108
+ end
109
+ end
110
+
111
+ return rhs_text.join(' ')
112
+ end
113
+
114
+ # Insert at position the elements from another sequence.
115
+ # @param position [Fixnum] A zero-based index of the symbols to replace.
116
+ # @param another [SymbolSequence] A production with a two-elements rhs
117
+ # (a single digram).
118
+ def insert_at(position, another)
119
+ klone = another.dup
120
+ symbols.insert(position, *klone.symbols)
121
+ invalidate_refs
122
+ end
123
+
124
+ # Given that the production P passed as argument has exactly 2 symbols
125
+ # in its rhs s1 s2, substitute in the rhs of self all occurrences of
126
+ # s1 s2 by a reference to P.
127
+ # @param index [Fixnum] the position of a two symbol sequence to be replaced
128
+ # by the production
129
+ # @param aProduction [Production or ProductionRef] a production that
130
+ # consists exactly of one digram (= 2 symbols).
131
+ def reduce_step(index, aProduction)
132
+ if symbols[index].is_a?(ProductionRef)
133
+ symbols[index].bind_to(aProduction)
134
+ else
135
+ new_ref = ProductionRef.new(aProduction)
136
+ symbols[index] = new_ref
137
+ @memo_references ||= []
138
+ @memo_references << new_ref
139
+ end
140
+ index1 = index + 1
141
+ if symbols[index1].is_a?(ProductionRef)
142
+ symbols[index1].unbind
143
+ invalidate_refs
144
+ end
145
+ delete_at(index1)
146
+ end
147
+
148
+ # Remove the element at given position
149
+ # @param position [Fixnum] a zero-based index.
150
+ def delete_at(position)
151
+ invalidate_refs if symbols[position].is_a?(ProductionRef)
152
+ symbols.delete_at(position)
153
+ end
154
+
155
+
156
+ # Part of the 'visitee' role in Visitor design pattern.
157
+ # @param aVisitor[GrammarVisitor]
158
+ def accept(aVisitor)
159
+ aVisitor.start_visit_rhs(self)
160
+
161
+ # Let's proceed with the visit of productions
162
+ symbols.each do |a_symb|
163
+ if a_symb.is_a?(ProductionRef)
164
+ a_symb.accept(aVisitor)
165
+ else
166
+ aVisitor.visit_terminal(a_symb)
167
+ end
168
+ end
169
+
170
+ aVisitor.end_visit_rhs(self)
171
+ end
172
+
173
+ private
174
+
175
+ def invalidate_refs()
176
+ @memo_references = nil
177
+ @lookup_references = nil
178
+ end
179
+
180
+ end # class
181
+
182
+ end # module
@@ -74,7 +74,7 @@ describe SequiturGrammar do
74
74
  expect(p_a.rhs).to eq([:a, :b, :c])
75
75
  expect(instance.start.rhs).to eq([p_a, p_a])
76
76
  end
77
-
77
+
78
78
  it 'should cope with a pattern that caused an exception' do
79
79
  input = 'aaac' # This sequence raised an exception
80
80
 
@@ -102,6 +102,78 @@ describe SequiturGrammar do
102
102
  expect(p3.rhs).to eq(['b', p2, 'e'])
103
103
  end
104
104
 
105
+ it 'should work with strings instead of single char input tokens' do
106
+ # Raw input is sequence of chars
107
+ raw_input = 'bbebeebebebbebee'
108
+
109
+ # Convert them into multichar strings
110
+ input = raw_input.chars.map do |ch|
111
+ 'letter_' + ch
112
+ end
113
+
114
+ # Creation
115
+ instance = SequiturGrammar.new(input.to_enum)
116
+
117
+ # Expectations:
118
+ # S: P3 P2 P3
119
+ # P1: b e
120
+ # P2: P1 P1
121
+ # P3: b P2 e
122
+ expect(instance.productions.size).to eq(4)
123
+ (p1, p2, p3) = instance.productions[1..3]
124
+ expect(instance.start.rhs).to eq([p3, p2, p3])
125
+ expect(p1.rhs).to eq(%w(letter_b letter_e))
126
+ expect(p2.rhs).to eq([p1, p1])
127
+ expect(p3.rhs).to eq(['letter_b',p2, 'letter_e'])
128
+ end
129
+
130
+ it 'should work with Symbol instead of single char input tokens' do
131
+ # Raw input is sequence of single characters
132
+ raw_input = 'bbebeebebebbebee'
133
+
134
+ # Convert them into symbols
135
+ input = raw_input.chars.map(&:to_sym)
136
+
137
+ # Creation
138
+ instance = SequiturGrammar.new(input.to_enum)
139
+
140
+ # Expectations:
141
+ # S: P3 P2 P3
142
+ # P1: b e
143
+ # P2: P1 P1
144
+ # P3: b P2 e
145
+ expect(instance.productions.size).to eq(4)
146
+ (p1, p2, p3) = instance.productions[1..3]
147
+ expect(instance.start.rhs).to eq([p3, p2, p3])
148
+ expect(p1.rhs).to eq([:b, :e])
149
+ expect(p2.rhs).to eq([p1, p1])
150
+ expect(p3.rhs).to eq([:b, p2, :e])
151
+ end
152
+
153
+
154
+ it 'should work with integer values as input tokens' do
155
+ # Raw input is sequence of hex digits
156
+ raw_input = 'bbebeebebebbebee'
157
+
158
+ # Convert them into Fixnums
159
+ input = raw_input.chars.map { |ch| ch.to_i(16) }
160
+
161
+ # Creation
162
+ instance = SequiturGrammar.new(input.to_enum)
163
+
164
+ # Expectations:
165
+ # S: P3 P2 P3
166
+ # P1: b e
167
+ # P2: P1 P1
168
+ # P3: b P2 e
169
+ expect(instance.productions.size).to eq(4)
170
+ (p1, p2, p3) = instance.productions[1..3]
171
+ expect(instance.start.rhs).to eq([p3, p2, p3])
172
+ expect(p1.rhs).to eq([0xb, 0xe])
173
+ expect(p2.rhs).to eq([p1, p1])
174
+ expect(p3.rhs).to eq([0xb, p2, 0xe])
175
+ end
176
+
105
177
  it 'should cope with the example from sequitur.info website' do
106
178
  input = 'abcabdabcabd'
107
179
  instance = SequiturGrammar.new(input.chars)
@@ -153,7 +225,7 @@ SNIPPET
153
225
  # 2 → h o t hot
154
226
  # 3 → 10 1 ,↵pease_porridge_
155
227
  # 4 → c 11 cold
156
- # 5 → 12 _ t h 8 t 10 n 12 9 d a y s _ 11 . ↵
228
+ # 5 → 12 _ t h 8 t 10 n 12 9 d a y s _ 11 . ↵
157
229
  # in_the_pot,↵nine_days_old.↵
158
230
  # 6 → s o m 9 l i k 9 i t _ some_like_it_
159
231
  # 7 → 10 6 ,↵some_like_it_
@@ -196,7 +268,7 @@ SNIPPET
196
268
  ].flatten
197
269
  expect(p12.rhs).to eq(p12_expectation) # Rule 5 above
198
270
  end
199
-
271
+
200
272
  it 'should work with a sequence of Ruby Symbols' do
201
273
  input = 'abcabdabcabd'.chars.map(&:to_sym)
202
274
  instance = SequiturGrammar.new(input.to_enum)