piglet 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -74,6 +74,10 @@ describe Piglet::Relation::Relation do
74
74
  it 'returns fields with positional notation' do
75
75
  @relation[1].to_s.should eql('$1')
76
76
  end
77
+
78
+ it 'returns fields through a direct call to #field' do
79
+ @relation.field(:a).to_s.should eql('a')
80
+ end
77
81
  end
78
82
 
79
83
  end
@@ -0,0 +1,37 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ describe Piglet::Relation::Union do
5
+
6
+ before do
7
+ @relation1 = Object.new
8
+ @relation1.extend Piglet::Relation::Relation
9
+ @relation2 = mock('relation2')
10
+ @relation3 = mock('relation3')
11
+ @relation1.stub!(:alias).and_return('relation1')
12
+ @relation2.stub!(:alias).and_return('relation2')
13
+ @relation3.stub!(:alias).and_return('relation3')
14
+ end
15
+
16
+ describe '#to_s' do
17
+ it 'outputs the names of all the relations (given as separate arguments)' do
18
+ pig_latin = @relation1.union(@relation2, @relation3).to_s
19
+ pig_latin.should include('relation1')
20
+ pig_latin.should include('relation2')
21
+ pig_latin.should include('relation3')
22
+ end
23
+
24
+ it 'outputs the names of all the relations (given as an array)' do
25
+ pig_latin = @relation1.union([@relation2, @relation3]).to_s
26
+ pig_latin.should include('relation1')
27
+ pig_latin.should include('relation2')
28
+ pig_latin.should include('relation3')
29
+ end
30
+
31
+ it 'outputs a UNION statement with the right number of relations' do
32
+ pig_latin = @relation1.union(@relation2, @relation3).to_s
33
+ pig_latin.should match(/UNION \w+, \w+, \w+/)
34
+ end
35
+ end
36
+
37
+ end
@@ -0,0 +1,121 @@
1
+ require File.dirname(__FILE__) + '/../../spec_helper'
2
+
3
+
4
+ include Piglet::Schema
5
+
6
+
7
+ describe Tuple do
8
+
9
+ describe '.parse' do
10
+ it 'can parse a non-typed, single field description' do
11
+ tuple = Tuple.parse([:a])
12
+ tuple.field_names.should eql([:a])
13
+ end
14
+
15
+ it 'can parse a non-typed, multiple field description' do
16
+ tuple = Tuple.parse([:a, :b, :c])
17
+ tuple.field_names.should eql([:a, :b, :c])
18
+ end
19
+
20
+ it 'can parse a typed, single field description' do
21
+ tuple = Tuple.parse([[:a, :chararray]])
22
+ tuple.field_names.should eql([:a])
23
+ tuple.field_type(:a).should eql(:chararray)
24
+ end
25
+
26
+ it 'can parse a typed, multiple field description' do
27
+ tuple = Tuple.parse([[:a, :chararray], [:b, :double]])
28
+ tuple.field_names.should eql([:a, :b])
29
+ tuple.field_type(:a).should eql(:chararray)
30
+ tuple.field_type(:b).should eql(:double)
31
+ end
32
+
33
+ it 'can parse a mixed typed and non-typed field description' do
34
+ tuple = Tuple.parse([:a, [:b, :double]])
35
+ tuple.field_names.should eql([:a, :b])
36
+ tuple.field_type(:b).should eql(:double)
37
+ end
38
+
39
+ it 'defaults to :bytearray for untyped fields' do
40
+ tuple = Tuple.parse([:a])
41
+ tuple.field_type(:a).should eql(:bytearray)
42
+ end
43
+
44
+ it 'accepts a Tuple object as the type of a field' do
45
+ tuple = Tuple.parse([[:a, Tuple.parse([:c, :d])]])
46
+ tuple.field_type(:a).should be_a(Tuple)
47
+ tuple.field_type(:a).field_names.should eql([:c, :d])
48
+ end
49
+
50
+ it 'can parse a Tuple from a field typed as :tuple' do
51
+ tuple = Tuple.parse([[:a, :tuple, [:c, :d]]])
52
+ tuple.field_type(:a).should be_a(Tuple)
53
+ tuple.field_type(:a).field_names.should eql([:c, :d])
54
+ end
55
+
56
+ it 'accepts a Bag object as the type of a field' do
57
+ tuple = Tuple.parse([[:a, Bag.new(Tuple.parse([:c, :d]))]])
58
+ tuple.field_type(:a).should be_a(Bag)
59
+ tuple.field_type(:a).field_names.should eql([:c, :d])
60
+ end
61
+
62
+ it 'can parse a Bag from a field typed as :bag' do
63
+ tuple = Tuple.parse([[:a, :bag, [:c, :d]]])
64
+ tuple.field_type(:a).should be_a(Bag)
65
+ tuple.field_type(:a).field_names.should eql([:c, :d])
66
+ end
67
+
68
+ it 'can parse a description that lacks field names (and fall back to making the fields accessible by index)' do
69
+ tuple = Tuple.parse([[nil, :chararray], [nil, :int]])
70
+ tuple.field_type(1).should eql(:int)
71
+ end
72
+ end
73
+
74
+ describe '#union' do
75
+ it 'creates a new tuple with the fields from two tuples' do
76
+ t1 = Tuple.parse([:a, :b, :c])
77
+ t2 = Tuple.parse([:d, :e, :f])
78
+ t3 = t1.union(t2)
79
+ t3.field_names.should eql([:a, :b, :c, :d, :e, :f])
80
+ end
81
+
82
+ it 'creates a new tuple with the fields from three tuples' do
83
+ t1 = Tuple.parse([:a, :b, :c])
84
+ t2 = Tuple.parse([:d, :e, :f])
85
+ t3 = Tuple.parse([:g, :h, :i])
86
+ t4 = t1.union(t2, t3)
87
+ t4.field_names.should eql([:a, :b, :c, :d, :e, :f, :g, :h, :i])
88
+ end
89
+
90
+ it 'creates a new tuple with the fields from three tuples (arguments as an array)' do
91
+ t1 = Tuple.parse([:a, :b, :c])
92
+ t2 = Tuple.parse([:d, :e, :f])
93
+ t3 = Tuple.parse([:g, :h, :i])
94
+ t4 = t1.union([t2, t3])
95
+ t4.field_names.should eql([:a, :b, :c, :d, :e, :f, :g, :h, :i])
96
+ end
97
+
98
+ it 'retains all the fields even if some have the same name' do
99
+ t1 = Tuple.parse([:a, :b, :c])
100
+ t2 = Tuple.parse([:b, :c, :d])
101
+ t3 = t1.union(t2)
102
+ t3.field_names.should eql([:a, :b, :c, :b, :c, :d])
103
+ end
104
+ end
105
+
106
+ describe '#to_s' do
107
+ it 'returns the schema string for a simple untyped schema' do
108
+ Tuple.parse([:a, :b]).to_s.should eql('(a:bytearray, b:bytearray)')
109
+ end
110
+
111
+ it 'returns the schema string for a simple typed schema' do
112
+ Tuple.parse([[:a, :chararray], [:b, :int]]).to_s.should eql('(a:chararray, b:int)')
113
+ end
114
+
115
+ it 'returns the schema string for a nested schema' do
116
+ description = [[:a, :tuple, [[:x, :int], [:y, :float]]], [:b, :bag, [[:w, :bytearray]]]]
117
+ Tuple.parse(description).to_s.should eql('(a:tuple (x:int, y:float), b:bag {w:bytearray})')
118
+ end
119
+ end
120
+
121
+ end
data/spec/piglet_spec.rb CHANGED
@@ -3,5 +3,669 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
 
4
4
  describe Piglet do
5
5
 
6
+ before do
7
+ @interpreter = Piglet::Interpreter.new
8
+ end
9
+
10
+ context 'load & store operators:' do
11
+ describe 'LOAD' do
12
+ it 'outputs a LOAD statement' do
13
+ @interpreter.interpret { store(load('some/path'), 'out') }
14
+ @interpreter.to_pig_latin.should include("LOAD 'some/path'")
15
+ end
16
+
17
+ it 'outputs a LOAD statement without a USING clause if none specified' do
18
+ @interpreter.interpret { store(load('some/path'), 'out') }
19
+ @interpreter.to_pig_latin.should_not include('USING')
20
+ end
21
+
22
+ it 'outputs a LOAD statement with a USING clause with a specified function' do
23
+ @interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
24
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
25
+ end
26
+
27
+ Piglet::Inout::StorageTypes::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
28
+ it "knows that the load method :#{symbolic_name} means #{function}" do
29
+ @interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
30
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
31
+ end
32
+ end
33
+
34
+ it 'outputs a LOAD statement with an AS clause' do
35
+ @interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
36
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
37
+ end
38
+
39
+ it 'outputs a LOAD statement with an AS clause with types' do
40
+ @interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
41
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
42
+ end
43
+
44
+ it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
45
+ @interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
46
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
47
+ end
48
+ end
49
+
50
+ describe 'STORE' do
51
+ it 'outputs a STORE statement' do
52
+ @interpreter.interpret { store(load('some/path'), 'out') }
53
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
54
+ end
55
+
56
+ it 'outputs a STORE statement without a USING clause if none specified' do
57
+ @interpreter.interpret { store(load('some/path'), 'out') }
58
+ @interpreter.to_pig_latin.should_not include("USING")
59
+ end
60
+
61
+ it 'outputs a STORE statement with a USING clause with a specified function' do
62
+ @interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
63
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
64
+ end
65
+
66
+ it 'knows that the load method :pig_storage means PigStorage' do
67
+ @interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
68
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
69
+ end
70
+ end
71
+
72
+ describe 'DUMP' do
73
+ it 'outputs a DUMP statement' do
74
+ @interpreter.interpret { dump(load('some/path')) }
75
+ @interpreter.to_pig_latin.should match(/DUMP \w+/)
76
+ end
77
+ end
78
+ end
79
+
80
+ context 'diagnostic operators:' do
81
+ describe 'ILLUSTRATE' do
82
+ it 'outputs an ILLUSTRATE statement' do
83
+ @interpreter.interpret { illustrate(load('some/path')) }
84
+ @interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
85
+ end
86
+ end
87
+
88
+ describe 'DESCRIBE' do
89
+ it 'outputs a DESCRIBE statement' do
90
+ @interpreter.interpret { describe(load('some/path')) }
91
+ @interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
92
+ end
93
+ end
94
+
95
+ describe 'EXPLAIN' do
96
+ it 'outputs an EXPLAIN statement' do
97
+ @interpreter.interpret { explain(load('some/path')) }
98
+ @interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
99
+ end
100
+
101
+ it 'outputs an EXPLAIN statement without an alias' do
102
+ @interpreter.interpret { explain }
103
+ @interpreter.to_pig_latin.should match(/EXPLAIN;/)
104
+ end
105
+ end
106
+ end
107
+
108
+ context 'relation operators:' do
109
+ describe 'GROUP' do
110
+ it 'outputs a GROUP statement with one grouping field' do
111
+ @interpreter.interpret { store(load('in').group(:a), 'out') }
112
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
113
+ end
114
+
115
+ it 'outputs a GROUP statement with more than one grouping field' do
116
+ @interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
117
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
118
+ end
119
+
120
+ it 'outputs a GROUP statement with a PARALLEL clause' do
121
+ @interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
122
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
123
+ end
124
+ end
125
+
126
+ describe 'DISTINCT' do
127
+ it 'outputs a DISTINCT statement' do
128
+ @interpreter.interpret { store(load('in').distinct, 'out') }
129
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+/)
130
+ end
131
+
132
+ it 'outputs a DISTINCT statement with a PARALLEL clause' do
133
+ @interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
134
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
135
+ end
136
+ end
137
+
138
+ describe 'CROSS' do
139
+ it 'outputs a CROSS statement with two relations' do
140
+ @interpreter.interpret do
141
+ a = load('in1')
142
+ b = load('in2')
143
+ c = a.cross(b)
144
+ dump(c)
145
+ end
146
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
147
+ end
148
+
149
+ it 'outputs a CROSS statement with many relations' do
150
+ @interpreter.interpret do
151
+ a = load('in1')
152
+ b = load('in2')
153
+ c = load('in3')
154
+ d = load('in4')
155
+ e = a.cross(b, c, d)
156
+ dump(e)
157
+ end
158
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
159
+ end
160
+
161
+ it 'outputs a CROSS statement with a PARALLEL clause' do
162
+ @interpreter.interpret do
163
+ a = load('in1')
164
+ b = load('in2')
165
+ c = load('in3')
166
+ d = a.cross([b, c], :parallel => 4)
167
+ dump(d)
168
+ end
169
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
170
+ end
171
+ end
172
+
173
+ describe 'UNION' do
174
+ it 'outputs a UNION statement with two relations' do
175
+ @interpreter.interpret do
176
+ a = load('in1')
177
+ b = load('in2')
178
+ c = a.union(b)
179
+ dump(c)
180
+ end
181
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
182
+ end
183
+
184
+ it 'outputs a UNION statement with many relations' do
185
+ @interpreter.interpret do
186
+ a = load('in1')
187
+ b = load('in2')
188
+ c = load('in3')
189
+ d = load('in4')
190
+ e = a.union(b, c, d)
191
+ dump(e)
192
+ end
193
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
194
+ end
195
+ end
196
+
197
+ describe 'SAMPLE' do
198
+ it 'outputs a SAMPLE statement' do
199
+ @interpreter.interpret { dump(load('in').sample(10)) }
200
+ @interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
201
+ end
202
+ end
203
+
204
+ describe 'LIMIT' do
205
+ it 'outputs a LIMIT statement' do
206
+ @interpreter.interpret { dump(load('in').limit(42)) }
207
+ @interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
208
+ end
209
+ end
210
+
211
+ describe 'FOREACH … GENERATE' do
212
+ it 'outputs a FOREACH … GENERATE statement' do
213
+ @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
214
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
215
+ end
216
+
217
+ it 'outputs a FOREACH … GENERATE statement with a list of fields' do
218
+ @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
219
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
220
+ end
221
+
222
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
223
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
224
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
225
+ end
226
+
227
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
228
+ @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
229
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
230
+ end
231
+
232
+ it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
233
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
234
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
235
+ end
236
+
237
+ it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
238
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
239
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
240
+ end
241
+
242
+ it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
243
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
244
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
245
+ end
246
+ end
247
+
248
+ describe 'FILTER' do
249
+ it 'outputs a FILTER statement' do
250
+ @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
251
+ @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
252
+ end
253
+
254
+ it 'outputs a FILTER statement with a complex test' do
255
+ @interpreter.interpret { dump(load('in').filter { |r| (r.a > r.b).and(r.c.ne(3)) }) }
256
+ @interpreter.to_pig_latin.should match(/FILTER \w+ BY \(a > b\) AND \(c != 3\)/)
257
+ end
258
+ end
259
+
260
+ describe 'SPLIT' do
261
+ it 'outputs a SPLIT statement' do
262
+ @interpreter.interpret do
263
+ a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
264
+ dump(a)
265
+ dump(b)
266
+ end
267
+ @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
268
+ end
269
+ end
270
+
271
+ describe 'ORDER' do
272
+ it 'outputs an ORDER statement' do
273
+ @interpreter.interpret { dump(load('in').order(:a)) }
274
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
275
+ end
276
+
277
+ it 'outputs an ORDER statement with multiple fields' do
278
+ @interpreter.interpret { dump(load('in').order(:a, :b)) }
279
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
280
+ end
281
+
282
+ it 'outputs an ORDER statement with ASC and DESC' do
283
+ @interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
284
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
285
+ end
286
+ end
287
+
288
+ describe 'JOIN' do
289
+ it 'outputs a JOIN statement' do
290
+ @interpreter.interpret do
291
+ a = load('in1')
292
+ b = load('in2')
293
+ c = a.join(a => :x, b => :y)
294
+ dump(c)
295
+ end
296
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
297
+ end
298
+
299
+ it 'outputs a JOIN statement with a PARALLEL clause' do
300
+ @interpreter.interpret do
301
+ a = load('in1')
302
+ b = load('in2')
303
+ c = a.join(a => :x, b => :y, :parallel => 5)
304
+ dump(c)
305
+ end
306
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
307
+ end
308
+
309
+ it 'outputs a JOIN statement with a USING clause' do
310
+ @interpreter.interpret do
311
+ a = load('in1')
312
+ b = load('in2')
313
+ c = a.join(a => :x, b => :y, :using => :replicated)
314
+ dump(c)
315
+ end
316
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
317
+ end
318
+ end
319
+
320
+ describe 'COGROUP' do
321
+ it 'outputs a COGROUP statement' do
322
+ @interpreter.interpret do
323
+ a = load('in1')
324
+ b = load('in2')
325
+ c = a.cogroup(a => :x, b => :y)
326
+ dump(c)
327
+ end
328
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
329
+ end
330
+
331
+ it 'outputs a COGROUP statement with multiple join fields' do
332
+ @interpreter.interpret do
333
+ a = load('in1')
334
+ b = load('in2')
335
+ c = a.cogroup(a => :x, b => [:y, :z, :w])
336
+ dump(c)
337
+ end
338
+ @interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
339
+ end
340
+
341
+ it 'outputs a COGROUP statement with a PARALLEL clause' do
342
+ @interpreter.interpret do
343
+ a = load('in1')
344
+ b = load('in2')
345
+ c = a.cogroup(a => :x, b => :y, :parallel => 5)
346
+ dump(c)
347
+ end
348
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
349
+ end
350
+
351
+ it 'outputs a COGROUP statement with INNER and OUTER' do
352
+ @interpreter.interpret do
353
+ a = load('in1')
354
+ b = load('in2')
355
+ c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
356
+ dump(c)
357
+ end
358
+ @interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
359
+ @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
360
+ end
361
+ end
362
+ end
363
+
364
+ context 'aliasing & multiple statements' do
365
+ it 'aliases the loaded relation and uses the same alias in the STORE statement' do
366
+ @interpreter.interpret { store(load('in'), 'out') }
367
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
368
+ end
369
+
370
+ it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
371
+ @interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
372
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
373
+ end
374
+
375
+ it 'aliases a whole row of statements' do
376
+ @interpreter.interpret do
377
+ a = load('in', :schema => [:a])
378
+ b = a.group(:a)
379
+ c = b.group(:a)
380
+ d = c.group(:a)
381
+ store(d, 'out')
382
+ end
383
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
384
+ end
385
+
386
+ it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
387
+ @interpreter.interpret do
388
+ a = load('in')
389
+ b = a.distinct
390
+ store(b, 'out1')
391
+ store(b, 'out2')
392
+ end
393
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
394
+ end
395
+ end
396
+
397
+ context 'long and complex scripts' do
398
+ before do
399
+ @interpreter.interpret do
400
+ sessions = load('sessions', :schema => [
401
+ [:ad_id, :chararray],
402
+ [:site, :chararray],
403
+ [:size, :chararray],
404
+ [:name, :chararray],
405
+ [:impression, :int],
406
+ [:engagement, :int],
407
+ [:click_thru, :int]
408
+ ])
409
+ %w(site size name).each do |dimension|
410
+ result = sessions.group(:ad_id, dimension).foreach do |r|
411
+ [
412
+ r[0].ad_id.as(:ad_id),
413
+ literal(dimension).as(:dimension),
414
+ r[0].field(dimension).as(:value),
415
+ r[1].exposure.sum.as(:exposures),
416
+ r[1].impression.sum.as(:impressions),
417
+ r[1].engagement.sum.as(:engagements),
418
+ r[1].click_thru.sum.as(:click_thrus)
419
+ ]
420
+ end
421
+ store(result, "report_metrics-#{dimension}")
422
+ end
423
+ end
424
+ @output = @interpreter.to_pig_latin
425
+ end
426
+
427
+ it 'outputs the correct number of LOAD statements' do
428
+ @output.scan(/LOAD/).size.should eql(1)
429
+ end
430
+
431
+ it 'outputs the correct number of STORE statements' do
432
+ @output.scan(/STORE/).size.should eql(3)
433
+ end
434
+
435
+ it 'doesn\'t assign to the same relation twice' do
436
+ @assignments = @output.scan(/^(\w+)(?=\s*=)/).flatten
437
+ @assignments.uniq.should eql(@assignments)
438
+ end
439
+ end
440
+
441
+ context 'schemas' do
442
+ it 'knows the schema of a relation returned by #load, with types' do
443
+ schema = catch(:schema) do
444
+ @interpreter.interpret do
445
+ schema = load('in', :schema => [[:a, :chararray], [:b, :chararray]]).schema
446
+ throw :schema, schema
447
+ end
448
+ end
449
+ schema.field_names.should eql([:a, :b])
450
+ schema.field_type(:a).should eql(:chararray)
451
+ end
452
+
453
+ it 'knows the schema of a relation returned by #load, without types' do
454
+ schema = catch(:schema) do
455
+ @interpreter.interpret do
456
+ schema = load('in', :schema => [:a, :b]).schema
457
+ throw :schema, schema
458
+ end
459
+ end
460
+ schema.field_names.should eql([:a, :b])
461
+ schema.field_type(:a).should eql(:bytearray)
462
+ end
463
+
464
+ it 'knows the schema of a relation returned by #load, with and without types' do
465
+ schema = catch(:schema) do
466
+ @interpreter.interpret do
467
+ schema = load('in', :schema => [[:a, :float], :b]).schema
468
+ throw :schema, schema
469
+ end
470
+ end
471
+ schema.field_names.should eql([:a, :b])
472
+ schema.field_type(:a).should eql(:float)
473
+ end
474
+
475
+ it 'does not know anything about the schema of a relation returned by #load if no schema was given' do
476
+ relation = catch(:relation) do
477
+ @interpreter.interpret do
478
+ throw :relation, load('in')
479
+ end
480
+ end
481
+ relation.schema.should be_nil
482
+ end
483
+
484
+ it 'knows the schema of a relation derived through non-schema-changing operations' do
485
+ schema = catch(:schema) do
486
+ @interpreter.interpret do
487
+ relation = load('in', :schema => [[:a, :float], [:b, :int]]).limit(3).sample(0.1).distinct.order(:a)
488
+ throw :schema, relation.schema
489
+ end
490
+ end
491
+ schema.field_names.should eql([:a, :b])
492
+ schema.field_type(:a).should eql(:float)
493
+ schema.field_type(:b).should eql(:int)
494
+ end
495
+
496
+ it 'knows the schema of a relation grouped on one field' do
497
+ relation = catch(:relation) do
498
+ @interpreter.interpret do
499
+ relation = load('in', :schema => [[:a, :float], [:b, :int]]).group(:a)
500
+ throw :relation, relation
501
+ end
502
+ end
503
+ source_relation_name = relation.sources.first.alias.to_sym
504
+ relation.schema.field_names.should eql([:group, source_relation_name])
505
+ relation.schema.field_type(:group).should eql(:float)
506
+ relation.schema.field_type(source_relation_name).should be_a(Piglet::Schema::Bag)
507
+ relation.schema.field_type(source_relation_name).field_names.should eql([:a, :b])
508
+ relation.schema.field_type(source_relation_name).field_type(:a).should eql(:float)
509
+ end
510
+
511
+ it 'knows the schema of a relation grouped on more than one field' do
512
+ relation = catch(:relation) do
513
+ @interpreter.interpret do
514
+ relation = load('in', :schema => [[:a, :float], [:b, :int]]).group(:a, :b)
515
+ throw :relation, relation
516
+ end
517
+ end
518
+ source_relation_name = relation.sources.first.alias.to_sym
519
+ relation.schema.field_names.should eql([:group, source_relation_name])
520
+ relation.schema.field_type(:group).should be_a(Piglet::Schema::Tuple)
521
+ relation.schema.field_type(:group).field_names.should eql([:a, :b])
522
+ relation.schema.field_type(:group).field_type(:a).should eql(:float)
523
+ relation.schema.field_type(source_relation_name).should be_a(Piglet::Schema::Bag)
524
+ relation.schema.field_type(source_relation_name).field_names.should eql([:a, :b])
525
+ relation.schema.field_type(source_relation_name).field_type(:b).should eql(:int)
526
+ end
527
+
528
+ it 'knows the schema of a relation cross joined with itself' do
529
+ schema = catch(:schema) do
530
+ @interpreter.interpret do
531
+ relation = load('in', :schema => [[:a, :float], [:b, :int]])
532
+ relation = relation.cross(relation)
533
+ throw :schema, relation.schema
534
+ end
535
+ end
536
+ schema.field_names.should eql([:a, :b, :a, :b])
537
+ schema.field_type(:a).should eql(:float)
538
+ schema.field_type(:b).should eql(:int)
539
+ end
540
+
541
+ it 'knows the schema of a relation cross joined with another' do
542
+ schema = catch(:schema) do
543
+ @interpreter.interpret do
544
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
545
+ relation2 = load('in2', :schema => [[:c, :chararray], [:d, :double]])
546
+ relation3 = relation1.cross(relation2)
547
+ throw :schema, relation3.schema
548
+ end
549
+ end
550
+ schema.field_names.should eql([:a, :b, :c, :d])
551
+ schema.field_type(:a).should eql(:float)
552
+ schema.field_type(:b).should eql(:int)
553
+ schema.field_type(:c).should eql(:chararray)
554
+ schema.field_type(:d).should eql(:double)
555
+ end
556
+
557
+ it 'knows the schema of a relation joined with another' do
558
+ schema = catch(:schema) do
559
+ @interpreter.interpret do
560
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
561
+ relation2 = load('in2', :schema => [[:c, :int], [:d, :double]])
562
+ relation3 = relation1.join(relation1 => :b, relation2 => :c)
563
+ throw :schema, relation3.schema
564
+ end
565
+ end
566
+ schema.field_names.should eql([:a, :b, :c, :d])
567
+ schema.field_type(:a).should eql(:float)
568
+ schema.field_type(:b).should eql(:int)
569
+ schema.field_type(:c).should eql(:int)
570
+ schema.field_type(:d).should eql(:double)
571
+ end
572
+
573
+ it 'knows the schema of a relation cogrouped with another' do
574
+ relation1, relation2, relation3 = catch(:relations) do
575
+ @interpreter.interpret do
576
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
577
+ relation2 = load('in2', :schema => [[:c, :int], [:d, :double]])
578
+ relation3 = relation1.cogroup(relation1 => :b, relation2 => :c)
579
+ throw :relations, [relation1, relation2, relation3]
580
+ end
581
+ end
582
+ relation3.schema.field_names.should eql([:group, relation1.alias.to_sym, relation2.alias.to_sym])
583
+ relation3.schema.field_type(relation1.alias.to_sym).should be_a(Piglet::Schema::Bag)
584
+ relation3.schema.field_type(relation2.alias.to_sym).should be_a(Piglet::Schema::Bag)
585
+ relation3.schema.field_type(relation1.alias.to_sym).field_names.should eql([:a, :b])
586
+ relation3.schema.field_type(relation2.alias.to_sym).field_names.should eql([:c, :d])
587
+ end
588
+
589
+ it 'knows the schema of a relation projection' do
590
+ schema = catch(:schema) do
591
+ @interpreter.interpret do
592
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
593
+ relation2 = relation1.foreach { |r| [r.a] }
594
+ throw :schema, relation2.schema
595
+ end
596
+ end
597
+ schema.field_names.should eql([:a])
598
+ schema.field_type(:a).should eql(:float)
599
+ end
600
+
601
+ it 'knows the schema of a relation projection containing a call to MAX' do
602
+ schema = catch(:schema) do
603
+ @interpreter.interpret do
604
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
605
+ relation2 = relation1.foreach { |r| [r.a.max] }
606
+ throw :schema, relation2.schema
607
+ end
608
+ end
609
+ schema.field_names.should eql([nil])
610
+ schema.field_type(0).should eql(:float)
611
+ end
612
+
613
+ it 'knows the schema of a relation projection containing a call to COUNT' do
614
+ schema = catch(:schema) do
615
+ @interpreter.interpret do
616
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
617
+ relation2 = relation1.foreach { |r| [r.a.count] }
618
+ throw :schema, relation2.schema
619
+ end
620
+ end
621
+ schema.field_names.should eql([nil])
622
+ schema.field_type(0).should eql(:long)
623
+ end
624
+
625
+ it 'knows the schema of a relation projection containing a field rename' do
626
+ schema = catch(:schema) do
627
+ @interpreter.interpret do
628
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
629
+ relation2 = relation1.foreach { |r| [r.a.count.as(:x)] }
630
+ throw :schema, relation2.schema
631
+ end
632
+ end
633
+ schema.field_names.should eql([:x])
634
+ end
635
+
636
+ it 'knows the schema of a relation projection containing a literal string' do
637
+ schema = catch(:schema) do
638
+ @interpreter.interpret do
639
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
640
+ relation2 = relation1.foreach { |r| [literal('blipp')] }
641
+ throw :schema, relation2.schema
642
+ end
643
+ end
644
+ schema.field_type(0).should eql(:chararray)
645
+ end
646
+
647
+ it 'knows the schema of a relation projection containing a literal integer' do
648
+ schema = catch(:schema) do
649
+ @interpreter.interpret do
650
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
651
+ relation2 = relation1.foreach { |r| [literal(4)] }
652
+ throw :schema, relation2.schema
653
+ end
654
+ end
655
+ schema.field_type(0).should eql(:int)
656
+ end
657
+
658
+ it 'knows the schema of a relation projection containing a literal float' do
659
+ schema = catch(:schema) do
660
+ @interpreter.interpret do
661
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
662
+ relation2 = relation1.foreach { |r| [literal(3.14)] }
663
+ throw :schema, relation2.schema
664
+ end
665
+ end
666
+ schema.field_type(0).should eql(:double)
667
+ end
668
+
669
+ end
6
670
 
7
671
  end