piglet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +22 -0
- data/LICENSE +20 -0
- data/README.rdoc +293 -0
- data/Rakefile +50 -0
- data/bin/piglet +9 -0
- data/examples/analysis.rb +311 -0
- data/examples/scratch.rb +11 -0
- data/examples/spike1.rb +43 -0
- data/examples/spike2.rb +40 -0
- data/examples/test1.rb +3 -0
- data/examples/test2.rb +5 -0
- data/examples/test3.rb +4 -0
- data/lib/piglet/assignment.rb +13 -0
- data/lib/piglet/cogroup.rb +31 -0
- data/lib/piglet/cross.rb +22 -0
- data/lib/piglet/describe.rb +5 -0
- data/lib/piglet/distinct.rb +16 -0
- data/lib/piglet/dump.rb +5 -0
- data/lib/piglet/explain.rb +13 -0
- data/lib/piglet/field.rb +40 -0
- data/lib/piglet/field_expression_functions.rb +62 -0
- data/lib/piglet/field_function_expression.rb +19 -0
- data/lib/piglet/field_infix_expression.rb +17 -0
- data/lib/piglet/field_prefix_expression.rb +21 -0
- data/lib/piglet/field_rename.rb +11 -0
- data/lib/piglet/field_suffix_expression.rb +17 -0
- data/lib/piglet/filter.rb +13 -0
- data/lib/piglet/foreach.rb +19 -0
- data/lib/piglet/group.rb +21 -0
- data/lib/piglet/illustrate.rb +5 -0
- data/lib/piglet/interpreter.rb +108 -0
- data/lib/piglet/join.rb +20 -0
- data/lib/piglet/limit.rb +13 -0
- data/lib/piglet/load.rb +31 -0
- data/lib/piglet/load_and_store.rb +16 -0
- data/lib/piglet/order.rb +29 -0
- data/lib/piglet/relation.rb +177 -0
- data/lib/piglet/sample.rb +13 -0
- data/lib/piglet/split.rb +41 -0
- data/lib/piglet/store.rb +17 -0
- data/lib/piglet/storing.rb +13 -0
- data/lib/piglet/stream.rb +5 -0
- data/lib/piglet/union.rb +19 -0
- data/lib/piglet.rb +45 -0
- data/spec/piglet/field_spec.rb +130 -0
- data/spec/piglet/interpreter_spec.rb +413 -0
- data/spec/piglet/relation_spec.rb +79 -0
- data/spec/piglet/split_spec.rb +34 -0
- data/spec/piglet_spec.rb +7 -0
- data/spec/spec.opts +3 -0
- data/spec/spec_helper.rb +14 -0
- metadata +123 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Field do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@field = Piglet::Field.new('field')
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '#to_s' do
|
11
|
+
it 'returns a string with the field name (as a string)' do
|
12
|
+
@field.to_s.should eql("field")
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'returns a string with the field name (as a symbol)' do
|
16
|
+
@field = Piglet::Field.new(:field)
|
17
|
+
@field.to_s.should eql("field")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'eval/aggregate functions' do
|
22
|
+
%w(avg count diff max min size sum tokenize).each do |function_name|
|
23
|
+
it "supports \"#{function_name.upcase}\" through ##{function_name}" do
|
24
|
+
@field.send(function_name).to_s.should eql("#{function_name.upcase}(field)")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'supports "IsEmpty" through #empty?' do
|
29
|
+
@field.empty?.to_s.should eql("IsEmpty(field)")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context 'nested expressions' do
|
34
|
+
it 'handles nested expressions' do
|
35
|
+
@field.max.min.avg.empty?.tokenize.to_s.should eql("TOKENIZE(IsEmpty(AVG(MIN(MAX(field)))))")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'field renaming' do
|
40
|
+
it 'supports renaming a field' do
|
41
|
+
@field.as('x').to_s.should eql('field AS x')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'supports renaming a derived field' do
|
45
|
+
@field.x.y.z.as('b').to_s.should eql('field.x.y.z AS b')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'supports renaming a calculated field' do
|
49
|
+
@field.max.as('m').to_s.should eql('MAX(field) AS m')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'infix and unary operators' do
|
54
|
+
before do
|
55
|
+
@field1 = Piglet::Field.new('field1')
|
56
|
+
@field2 = Piglet::Field.new('field2')
|
57
|
+
end
|
58
|
+
|
59
|
+
[:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/].each do |op|
|
60
|
+
it "supports #{op} on a field" do
|
61
|
+
@field1.send(op, @field2).to_s.should eql("field1 #{op} field2")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "supports #{op} on an expression" do
|
65
|
+
(@field1 + (@field1.send(op, @field2))).to_s.should eql("field1 + (field1 #{op} field2)")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'supports != through #ne on a field' do
|
70
|
+
@field1.ne(@field2).to_s.should eql("field1 != field2")
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'supports != through #ne on an expression' do
|
74
|
+
(@field1 + (@field1.ne(@field2))).to_s.should eql("field1 + (field1 != field2)")
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'supports "matches" on a field with a regex' do
|
78
|
+
@field1.matches(/.*\.pig$/).to_s.should eql("field1 matches '.*\\.pig$'")
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'supports "matches" on a field with a string' do
|
82
|
+
@field1.matches('.*\.pig$').to_s.should eql("field1 matches '.*\\.pig$'")
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'supports "matches" on an expression' do
|
86
|
+
(@field1 + @field2).matches(/.*\.pig$/).to_s.should eql("(field1 + field2) matches '.*\\.pig$'")
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'supports "is null" on a field' do
|
90
|
+
@field1.null?.to_s.should eql("field1 is null")
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'supports "is null" on an expression' do
|
94
|
+
(@field1 + @field2).null?.to_s.should eql("(field1 + field2) is null")
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'supports "is not null" on a field' do
|
98
|
+
@field1.not_null?.to_s.should eql("field1 is not null")
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'supports "is not null" on an expression' do
|
102
|
+
(@field1 + @field2).not_null?.to_s.should eql("(field1 + field2) is not null")
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'supports "NOT" on a field' do
|
106
|
+
@field1.not.to_s.should eql("NOT field1")
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'supports "NOT" on an expression' do
|
110
|
+
(@field1 == @field2).not.to_s.should eql("NOT (field1 == field2)")
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'supports unary - through #neg on a field' do
|
114
|
+
@field1.neg.to_s.should eql("-field1")
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'supports unary - through #neg on an expression' do
|
118
|
+
(@field1 + @field2).neg.to_s.should eql("-(field1 + field2)")
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'supports casts on a field' do
|
122
|
+
@field1.cast(:chararray).to_s.should eql("(chararray) field1")
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'supports casts on an expression' do
|
126
|
+
(@field1 + @field2).cast(:chararray).to_s.should eql("(chararray) (field1 + field2)")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
@@ -0,0 +1,413 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Interpreter do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@interpreter = Piglet::Interpreter.new
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'basic usage' do
|
11
|
+
it 'interprets a block given to #new' do
|
12
|
+
output = Piglet::Interpreter.new { store(load('some/path'), 'out') }
|
13
|
+
output.to_pig_latin.should_not be_empty
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'interprets a block given to #interpret' do
|
17
|
+
output = @interpreter.interpret { store(load('some/path'), 'out') }
|
18
|
+
output.to_pig_latin.should_not be_empty
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'does nothing with no commands' do
|
22
|
+
@interpreter.interpret.to_pig_latin.should be_empty
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'load & store operators:' do
|
27
|
+
describe 'LOAD' do
|
28
|
+
it 'outputs a LOAD statement' do
|
29
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
30
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path'")
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'outputs a LOAD statement without a USING clause if none specified' do
|
34
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
35
|
+
@interpreter.to_pig_latin.should_not include('USING')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'outputs a LOAD statement with a USING clause with a specified function' do
|
39
|
+
@interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
|
40
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
|
41
|
+
end
|
42
|
+
|
43
|
+
Piglet::LoadAndStore::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
|
44
|
+
it "knows that the load method :#{symbolic_name} means #{function}" do
|
45
|
+
@interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
|
46
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'outputs a LOAD statement with an AS clause' do
|
51
|
+
@interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
|
52
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'outputs a LOAD statement with an AS clause with types' do
|
56
|
+
@interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
|
57
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
|
61
|
+
@interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
|
62
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe 'STORE' do
|
67
|
+
it 'outputs a STORE statement' do
|
68
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
69
|
+
@interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'outputs a STORE statement without a USING clause if none specified' do
|
73
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
74
|
+
@interpreter.to_pig_latin.should_not include("USING")
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'outputs a STORE statement with a USING clause with a specified function' do
|
78
|
+
@interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
|
79
|
+
@interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'knows that the load method :pig_storage means PigStorage' do
|
83
|
+
@interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
|
84
|
+
@interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe 'DUMP' do
|
89
|
+
it 'outputs a DUMP statement' do
|
90
|
+
@interpreter.interpret { dump(load('some/path')) }
|
91
|
+
@interpreter.to_pig_latin.should match(/DUMP \w+/)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
context 'diagnostic operators:' do
|
97
|
+
describe 'ILLUSTRATE' do
|
98
|
+
it 'outputs an ILLUSTRATE statement' do
|
99
|
+
@interpreter.interpret { illustrate(load('some/path')) }
|
100
|
+
@interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
describe 'DESCRIBE' do
|
105
|
+
it 'outputs a DESCRIBE statement' do
|
106
|
+
@interpreter.interpret { describe(load('some/path')) }
|
107
|
+
@interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe 'EXPLAIN' do
|
112
|
+
it 'outputs an EXPLAIN statement' do
|
113
|
+
@interpreter.interpret { explain(load('some/path')) }
|
114
|
+
@interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'outputs an EXPLAIN statement without an alias' do
|
118
|
+
@interpreter.interpret { explain }
|
119
|
+
@interpreter.to_pig_latin.should match(/EXPLAIN;/)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context 'relation operators:' do
|
125
|
+
describe 'GROUP' do
|
126
|
+
it 'outputs a GROUP statement with one grouping field' do
|
127
|
+
@interpreter.interpret { store(load('in').group(:a), 'out') }
|
128
|
+
@interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'outputs a GROUP statement with more than one grouping field' do
|
132
|
+
@interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
|
133
|
+
@interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'outputs a GROUP statement with a PARALLEL clause' do
|
137
|
+
@interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
|
138
|
+
@interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
describe 'DISTINCT' do
|
143
|
+
it 'outputs a DISTINCT statement' do
|
144
|
+
@interpreter.interpret { store(load('in').distinct, 'out') }
|
145
|
+
@interpreter.to_pig_latin.should match(/DISTINCT \w+/)
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'outputs a DISTINCT statement with a PARALLEL clause' do
|
149
|
+
@interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
|
150
|
+
@interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
describe 'CROSS' do
|
155
|
+
it 'outputs a CROSS statement with two relations' do
|
156
|
+
@interpreter.interpret do
|
157
|
+
a = load('in1')
|
158
|
+
b = load('in2')
|
159
|
+
c = a.cross(b)
|
160
|
+
dump(c)
|
161
|
+
end
|
162
|
+
@interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'outputs a CROSS statement with many relations' do
|
166
|
+
@interpreter.interpret do
|
167
|
+
a = load('in1')
|
168
|
+
b = load('in2')
|
169
|
+
c = load('in3')
|
170
|
+
d = load('in4')
|
171
|
+
e = a.cross(b, c, d)
|
172
|
+
dump(e)
|
173
|
+
end
|
174
|
+
@interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'outputs a CROSS statement with a PARALLEL clause' do
|
178
|
+
@interpreter.interpret do
|
179
|
+
a = load('in1')
|
180
|
+
b = load('in2')
|
181
|
+
c = load('in3')
|
182
|
+
d = a.cross([b, c], :parallel => 4)
|
183
|
+
dump(d)
|
184
|
+
end
|
185
|
+
@interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
describe 'UNION' do
|
190
|
+
it 'outputs a UNION statement with two relations' do
|
191
|
+
@interpreter.interpret do
|
192
|
+
a = load('in1')
|
193
|
+
b = load('in2')
|
194
|
+
c = a.union(b)
|
195
|
+
dump(c)
|
196
|
+
end
|
197
|
+
@interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'outputs a UNION statement with many relations' do
|
201
|
+
@interpreter.interpret do
|
202
|
+
a = load('in1')
|
203
|
+
b = load('in2')
|
204
|
+
c = load('in3')
|
205
|
+
d = load('in4')
|
206
|
+
e = a.union(b, c, d)
|
207
|
+
dump(e)
|
208
|
+
end
|
209
|
+
@interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
describe 'SAMPLE' do
|
214
|
+
it 'outputs a SAMPLE statement' do
|
215
|
+
@interpreter.interpret { dump(load('in').sample(10)) }
|
216
|
+
@interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
describe 'LIMIT' do
|
221
|
+
it 'outputs a LIMIT statement' do
|
222
|
+
@interpreter.interpret { dump(load('in').limit(42)) }
|
223
|
+
@interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
describe 'FOREACH … GENERATE' do
|
228
|
+
it 'outputs a FOREACH … GENERATE statement' do
|
229
|
+
@interpreter.interpret { dump(load('in').foreach { |r| :a }) }
|
230
|
+
@interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
|
231
|
+
end
|
232
|
+
|
233
|
+
it 'outputs a FOREACH … GENERATE statement with a list of fields' do
|
234
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
|
235
|
+
@interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
|
236
|
+
end
|
237
|
+
|
238
|
+
it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
|
239
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
|
240
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
|
244
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
|
245
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
|
246
|
+
end
|
247
|
+
|
248
|
+
it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
|
249
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
|
250
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
|
251
|
+
end
|
252
|
+
|
253
|
+
it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
|
254
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
|
255
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
|
256
|
+
end
|
257
|
+
|
258
|
+
it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
|
259
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
|
260
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
describe 'FILTER' do
|
265
|
+
it 'outputs a FILTER statement' do
|
266
|
+
@interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
|
267
|
+
@interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
|
268
|
+
end
|
269
|
+
|
270
|
+
# it 'outputs a FILTER statement with a complex test' do
|
271
|
+
# @interpreter.interpret { dump(load('in').filter { |r| r.a > r.b && r.c != 3 }) }
|
272
|
+
# @interpreter.to_pig_latin.should match(/FILTER \w+ BY a > b AND c != 3/)
|
273
|
+
# end
|
274
|
+
end
|
275
|
+
|
276
|
+
describe 'SPLIT' do
|
277
|
+
it 'outputs a SPLIT statement' do
|
278
|
+
@interpreter.interpret do
|
279
|
+
a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
|
280
|
+
dump(a)
|
281
|
+
dump(b)
|
282
|
+
end
|
283
|
+
@interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
describe 'ORDER' do
|
288
|
+
it 'outputs an ORDER statement' do
|
289
|
+
@interpreter.interpret { dump(load('in').order(:a)) }
|
290
|
+
@interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
|
291
|
+
end
|
292
|
+
|
293
|
+
it 'outputs an ORDER statement with multiple fields' do
|
294
|
+
@interpreter.interpret { dump(load('in').order(:a, :b)) }
|
295
|
+
@interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
|
296
|
+
end
|
297
|
+
|
298
|
+
it 'outputs an ORDER statement with ASC and DESC' do
|
299
|
+
@interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
|
300
|
+
@interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
describe 'JOIN' do
|
305
|
+
it 'outputs a JOIN statement' do
|
306
|
+
@interpreter.interpret do
|
307
|
+
a = load('in1')
|
308
|
+
b = load('in2')
|
309
|
+
c = a.join(a => :x, b => :y)
|
310
|
+
dump(c)
|
311
|
+
end
|
312
|
+
@interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
|
313
|
+
end
|
314
|
+
|
315
|
+
it 'outputs a JOIN statement with a PARALLEL clause' do
|
316
|
+
@interpreter.interpret do
|
317
|
+
a = load('in1')
|
318
|
+
b = load('in2')
|
319
|
+
c = a.join(a => :x, b => :y, :parallel => 5)
|
320
|
+
dump(c)
|
321
|
+
end
|
322
|
+
@interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
|
323
|
+
end
|
324
|
+
|
325
|
+
it 'outputs a JOIN statement with a USING clause' do
|
326
|
+
@interpreter.interpret do
|
327
|
+
a = load('in1')
|
328
|
+
b = load('in2')
|
329
|
+
c = a.join(a => :x, b => :y, :using => :replicated)
|
330
|
+
dump(c)
|
331
|
+
end
|
332
|
+
@interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
describe 'COGROUP' do
|
337
|
+
it 'outputs a COGROUP statement' do
|
338
|
+
@interpreter.interpret do
|
339
|
+
a = load('in1')
|
340
|
+
b = load('in2')
|
341
|
+
c = a.cogroup(a => :x, b => :y)
|
342
|
+
dump(c)
|
343
|
+
end
|
344
|
+
@interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
|
345
|
+
end
|
346
|
+
|
347
|
+
it 'outputs a COGROUP statement with multiple join fields' do
|
348
|
+
@interpreter.interpret do
|
349
|
+
a = load('in1')
|
350
|
+
b = load('in2')
|
351
|
+
c = a.cogroup(a => :x, b => [:y, :z, :w])
|
352
|
+
dump(c)
|
353
|
+
end
|
354
|
+
@interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
|
355
|
+
end
|
356
|
+
|
357
|
+
it 'outputs a COGROUP statement with a PARALLEL clause' do
|
358
|
+
@interpreter.interpret do
|
359
|
+
a = load('in1')
|
360
|
+
b = load('in2')
|
361
|
+
c = a.cogroup(a => :x, b => :y, :parallel => 5)
|
362
|
+
dump(c)
|
363
|
+
end
|
364
|
+
@interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
|
365
|
+
end
|
366
|
+
|
367
|
+
it 'outputs a COGROUP statement with INNER and OUTER' do
|
368
|
+
@interpreter.interpret do
|
369
|
+
a = load('in1')
|
370
|
+
b = load('in2')
|
371
|
+
c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
|
372
|
+
dump(c)
|
373
|
+
end
|
374
|
+
@interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
|
375
|
+
@interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
context 'aliasing & multiple statements' do
|
381
|
+
it 'aliases the loaded relation and uses the same alias in the STORE statement' do
|
382
|
+
@interpreter.interpret { store(load('in'), 'out') }
|
383
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
|
384
|
+
end
|
385
|
+
|
386
|
+
it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
|
387
|
+
@interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
|
388
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
|
389
|
+
end
|
390
|
+
|
391
|
+
it 'aliases a whole row of statements' do
|
392
|
+
@interpreter.interpret do
|
393
|
+
a = load('in', :schema => [:a])
|
394
|
+
b = a.group(:a)
|
395
|
+
c = b.group(:a)
|
396
|
+
d = c.group(:a)
|
397
|
+
store(d, 'out')
|
398
|
+
end
|
399
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
|
400
|
+
end
|
401
|
+
|
402
|
+
it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
|
403
|
+
@interpreter.interpret do
|
404
|
+
a = load('in')
|
405
|
+
b = a.distinct
|
406
|
+
store(b, 'out1')
|
407
|
+
store(b, 'out2')
|
408
|
+
end
|
409
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Relation do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@relation = Object.new
|
8
|
+
@relation.extend Piglet::Relation
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'has a alias' do
|
12
|
+
@relation.alias.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has a unique alias' do
|
16
|
+
aliases = { }
|
17
|
+
1000.times do
|
18
|
+
@relation = Object.new
|
19
|
+
@relation.extend Piglet::Relation
|
20
|
+
aliases.should_not have_key(@relation.alias)
|
21
|
+
aliases[@relation.alias] = @relation
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#group' do
|
26
|
+
it 'returns a new relation with the target relation as source' do
|
27
|
+
@relation.group(:a).sources.should include(@relation)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '#distinct' do
|
32
|
+
it 'returns a new relation with the target relation as source' do
|
33
|
+
@relation.distinct.sources.should include(@relation)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '#cross' do
|
38
|
+
it 'returns a new relation with the target relation as one of the sources' do
|
39
|
+
other = Object.new
|
40
|
+
other.extend Piglet::Relation
|
41
|
+
@relation.cross(other).sources.should include(@relation)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '#union' do
|
46
|
+
it 'returns a new relation with the target relation as one of the sources' do
|
47
|
+
other = Object.new
|
48
|
+
other.extend Piglet::Relation
|
49
|
+
@relation.union(other).sources.should include(@relation)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe '#sample' do
|
54
|
+
it 'returns a new relation with the target relation as source' do
|
55
|
+
@relation.sample(10).sources.should include(@relation)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe '#limit' do
|
60
|
+
it 'returns a new relation with the target relation as source' do
|
61
|
+
@relation.limit(42).sources.should include(@relation)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'fields' do
|
66
|
+
it 'returns a field for a message that does not correspond to a method' do
|
67
|
+
@relation.a.should_not be_nil
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'returns fields that have the correct name' do
|
71
|
+
@relation.a.to_s.should eql('a')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'returns fields with positional notation' do
|
75
|
+
@relation[1].to_s.should eql('$1')
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Split do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@relation = mock('source')
|
8
|
+
@expr1 = mock('expr1')
|
9
|
+
@expr2 = mock('expr2')
|
10
|
+
@relation.stub!(:alias).and_return('rel')
|
11
|
+
@expr1.stub!(:to_s).and_return('y')
|
12
|
+
@expr2.stub!(:to_s).and_return('w')
|
13
|
+
@split = Piglet::Split.new(@relation, [@expr1, @expr2])
|
14
|
+
end
|
15
|
+
|
16
|
+
describe '#to_s' do
|
17
|
+
it 'outputs all x IF y expressions' do
|
18
|
+
@split.to_s.should match(/SPLIT rel INTO \w+ IF [yw], \w+ IF [yw]/)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'contains the names of all the shard relations' do
|
22
|
+
@shards = @split.shards
|
23
|
+
@split.to_s.should include("#{@shards[0].alias} IF y")
|
24
|
+
@split.to_s.should include("#{@shards[1].alias} IF w")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe '#shards' do
|
29
|
+
it 'returns the same number of shards as there are expressions' do
|
30
|
+
@split.shards.size.should == 2
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
data/spec/piglet_spec.rb
ADDED
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
|
4
|
+
|
5
|
+
require 'piglet'
|
6
|
+
require 'spec'
|
7
|
+
require 'spec/autorun'
|
8
|
+
|
9
|
+
|
10
|
+
require 'piglet/interpreter'
|
11
|
+
|
12
|
+
Spec::Runner.configure do |config|
|
13
|
+
|
14
|
+
end
|