piglet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,10 @@ describe Piglet::Relation::Relation do
74
74
  it 'returns fields with positional notation' do
75
75
  @relation[1].to_s.should eql('$1')
76
76
  end
77
+
78
+ it 'returns fields through a direct call to #field' do
79
+ @relation.field(:a).to_s.should eql('a')
80
+ end
77
81
  end
78
82
 
79
83
  end
@@ -0,0 +1,37 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ describe Piglet::Relation::Union do
5
+
6
+ before do
7
+ @relation1 = Object.new
8
+ @relation1.extend Piglet::Relation::Relation
9
+ @relation2 = mock('relation2')
10
+ @relation3 = mock('relation3')
11
+ @relation1.stub!(:alias).and_return('relation1')
12
+ @relation2.stub!(:alias).and_return('relation2')
13
+ @relation3.stub!(:alias).and_return('relation3')
14
+ end
15
+
16
+ describe '#to_s' do
17
+ it 'outputs the names of all the relations (given as separate arguments)' do
18
+ pig_latin = @relation1.union(@relation2, @relation3).to_s
19
+ pig_latin.should include('relation1')
20
+ pig_latin.should include('relation2')
21
+ pig_latin.should include('relation3')
22
+ end
23
+
24
+ it 'outputs the names of all the relations (given as an array)' do
25
+ pig_latin = @relation1.union([@relation2, @relation3]).to_s
26
+ pig_latin.should include('relation1')
27
+ pig_latin.should include('relation2')
28
+ pig_latin.should include('relation3')
29
+ end
30
+
31
+ it 'outputs a UNION statement with the right number of relations' do
32
+ pig_latin = @relation1.union(@relation2, @relation3).to_s
33
+ pig_latin.should match(/UNION \w+, \w+, \w+/)
34
+ end
35
+ end
36
+
37
+ end
@@ -0,0 +1,121 @@
1
+ require File.dirname(__FILE__) + '/../../spec_helper'
2
+
3
+
4
+ include Piglet::Schema
5
+
6
+
7
+ describe Tuple do
8
+
9
+ describe '.parse' do
10
+ it 'can parse a non-typed, single field description' do
11
+ tuple = Tuple.parse([:a])
12
+ tuple.field_names.should eql([:a])
13
+ end
14
+
15
+ it 'can parse a non-typed, multiple field description' do
16
+ tuple = Tuple.parse([:a, :b, :c])
17
+ tuple.field_names.should eql([:a, :b, :c])
18
+ end
19
+
20
+ it 'can parse a typed, single field description' do
21
+ tuple = Tuple.parse([[:a, :chararray]])
22
+ tuple.field_names.should eql([:a])
23
+ tuple.field_type(:a).should eql(:chararray)
24
+ end
25
+
26
+ it 'can parse a typed, multiple field description' do
27
+ tuple = Tuple.parse([[:a, :chararray], [:b, :double]])
28
+ tuple.field_names.should eql([:a, :b])
29
+ tuple.field_type(:a).should eql(:chararray)
30
+ tuple.field_type(:b).should eql(:double)
31
+ end
32
+
33
+ it 'can parse a mixed typed and non-typed field description' do
34
+ tuple = Tuple.parse([:a, [:b, :double]])
35
+ tuple.field_names.should eql([:a, :b])
36
+ tuple.field_type(:b).should eql(:double)
37
+ end
38
+
39
+ it 'defaults to :bytearray for untyped fields' do
40
+ tuple = Tuple.parse([:a])
41
+ tuple.field_type(:a).should eql(:bytearray)
42
+ end
43
+
44
+ it 'accepts a Tuple object as the type of a field' do
45
+ tuple = Tuple.parse([[:a, Tuple.parse([:c, :d])]])
46
+ tuple.field_type(:a).should be_a(Tuple)
47
+ tuple.field_type(:a).field_names.should eql([:c, :d])
48
+ end
49
+
50
+ it 'can parse a Tuple from a field typed as :tuple' do
51
+ tuple = Tuple.parse([[:a, :tuple, [:c, :d]]])
52
+ tuple.field_type(:a).should be_a(Tuple)
53
+ tuple.field_type(:a).field_names.should eql([:c, :d])
54
+ end
55
+
56
+ it 'accepts a Bag object as the type of a field' do
57
+ tuple = Tuple.parse([[:a, Bag.new(Tuple.parse([:c, :d]))]])
58
+ tuple.field_type(:a).should be_a(Bag)
59
+ tuple.field_type(:a).field_names.should eql([:c, :d])
60
+ end
61
+
62
+ it 'can parse a Bag from a field typed as :bag' do
63
+ tuple = Tuple.parse([[:a, :bag, [:c, :d]]])
64
+ tuple.field_type(:a).should be_a(Bag)
65
+ tuple.field_type(:a).field_names.should eql([:c, :d])
66
+ end
67
+
68
+ it 'can parse a description that lacks field names (and fall back to making the fields accessible by index)' do
69
+ tuple = Tuple.parse([[nil, :chararray], [nil, :int]])
70
+ tuple.field_type(1).should eql(:int)
71
+ end
72
+ end
73
+
74
+ describe '#union' do
75
+ it 'creates a new tuple with the fields from two tuples' do
76
+ t1 = Tuple.parse([:a, :b, :c])
77
+ t2 = Tuple.parse([:d, :e, :f])
78
+ t3 = t1.union(t2)
79
+ t3.field_names.should eql([:a, :b, :c, :d, :e, :f])
80
+ end
81
+
82
+ it 'creates a new tuple with the fields from three tuples' do
83
+ t1 = Tuple.parse([:a, :b, :c])
84
+ t2 = Tuple.parse([:d, :e, :f])
85
+ t3 = Tuple.parse([:g, :h, :i])
86
+ t4 = t1.union(t2, t3)
87
+ t4.field_names.should eql([:a, :b, :c, :d, :e, :f, :g, :h, :i])
88
+ end
89
+
90
+ it 'creates a new tuple with the fields from three tuples (arguments as an array)' do
91
+ t1 = Tuple.parse([:a, :b, :c])
92
+ t2 = Tuple.parse([:d, :e, :f])
93
+ t3 = Tuple.parse([:g, :h, :i])
94
+ t4 = t1.union([t2, t3])
95
+ t4.field_names.should eql([:a, :b, :c, :d, :e, :f, :g, :h, :i])
96
+ end
97
+
98
+ it 'retains all the fields even if some have the same name' do
99
+ t1 = Tuple.parse([:a, :b, :c])
100
+ t2 = Tuple.parse([:b, :c, :d])
101
+ t3 = t1.union(t2)
102
+ t3.field_names.should eql([:a, :b, :c, :b, :c, :d])
103
+ end
104
+ end
105
+
106
+ describe '#to_s' do
107
+ it 'returns the schema string for a simple untyped schema' do
108
+ Tuple.parse([:a, :b]).to_s.should eql('(a:bytearray, b:bytearray)')
109
+ end
110
+
111
+ it 'returns the schema string for a simple typed schema' do
112
+ Tuple.parse([[:a, :chararray], [:b, :int]]).to_s.should eql('(a:chararray, b:int)')
113
+ end
114
+
115
+ it 'returns the schema string for a nested schema' do
116
+ description = [[:a, :tuple, [[:x, :int], [:y, :float]]], [:b, :bag, [[:w, :bytearray]]]]
117
+ Tuple.parse(description).to_s.should eql('(a:tuple (x:int, y:float), b:bag {w:bytearray})')
118
+ end
119
+ end
120
+
121
+ end
data/spec/piglet_spec.rb CHANGED
@@ -3,5 +3,669 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
 
4
4
  describe Piglet do
5
5
 
6
+ before do
7
+ @interpreter = Piglet::Interpreter.new
8
+ end
9
+
10
+ context 'load & store operators:' do
11
+ describe 'LOAD' do
12
+ it 'outputs a LOAD statement' do
13
+ @interpreter.interpret { store(load('some/path'), 'out') }
14
+ @interpreter.to_pig_latin.should include("LOAD 'some/path'")
15
+ end
16
+
17
+ it 'outputs a LOAD statement without a USING clause if none specified' do
18
+ @interpreter.interpret { store(load('some/path'), 'out') }
19
+ @interpreter.to_pig_latin.should_not include('USING')
20
+ end
21
+
22
+ it 'outputs a LOAD statement with a USING clause with a specified function' do
23
+ @interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
24
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
25
+ end
26
+
27
+ Piglet::Inout::StorageTypes::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
28
+ it "knows that the load method :#{symbolic_name} means #{function}" do
29
+ @interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
30
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
31
+ end
32
+ end
33
+
34
+ it 'outputs a LOAD statement with an AS clause' do
35
+ @interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
36
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
37
+ end
38
+
39
+ it 'outputs a LOAD statement with an AS clause with types' do
40
+ @interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
41
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
42
+ end
43
+
44
+ it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
45
+ @interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
46
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
47
+ end
48
+ end
49
+
50
+ describe 'STORE' do
51
+ it 'outputs a STORE statement' do
52
+ @interpreter.interpret { store(load('some/path'), 'out') }
53
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
54
+ end
55
+
56
+ it 'outputs a STORE statement without a USING clause if none specified' do
57
+ @interpreter.interpret { store(load('some/path'), 'out') }
58
+ @interpreter.to_pig_latin.should_not include("USING")
59
+ end
60
+
61
+ it 'outputs a STORE statement with a USING clause with a specified function' do
62
+ @interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
63
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
64
+ end
65
+
66
+ it 'knows that the load method :pig_storage means PigStorage' do
67
+ @interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
68
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
69
+ end
70
+ end
71
+
72
+ describe 'DUMP' do
73
+ it 'outputs a DUMP statement' do
74
+ @interpreter.interpret { dump(load('some/path')) }
75
+ @interpreter.to_pig_latin.should match(/DUMP \w+/)
76
+ end
77
+ end
78
+ end
79
+
80
+ context 'diagnostic operators:' do
81
+ describe 'ILLUSTRATE' do
82
+ it 'outputs an ILLUSTRATE statement' do
83
+ @interpreter.interpret { illustrate(load('some/path')) }
84
+ @interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
85
+ end
86
+ end
87
+
88
+ describe 'DESCRIBE' do
89
+ it 'outputs a DESCRIBE statement' do
90
+ @interpreter.interpret { describe(load('some/path')) }
91
+ @interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
92
+ end
93
+ end
94
+
95
+ describe 'EXPLAIN' do
96
+ it 'outputs an EXPLAIN statement' do
97
+ @interpreter.interpret { explain(load('some/path')) }
98
+ @interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
99
+ end
100
+
101
+ it 'outputs an EXPLAIN statement without an alias' do
102
+ @interpreter.interpret { explain }
103
+ @interpreter.to_pig_latin.should match(/EXPLAIN;/)
104
+ end
105
+ end
106
+ end
107
+
108
+ context 'relation operators:' do
109
+ describe 'GROUP' do
110
+ it 'outputs a GROUP statement with one grouping field' do
111
+ @interpreter.interpret { store(load('in').group(:a), 'out') }
112
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
113
+ end
114
+
115
+ it 'outputs a GROUP statement with more than one grouping field' do
116
+ @interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
117
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
118
+ end
119
+
120
+ it 'outputs a GROUP statement with a PARALLEL clause' do
121
+ @interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
122
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
123
+ end
124
+ end
125
+
126
+ describe 'DISTINCT' do
127
+ it 'outputs a DISTINCT statement' do
128
+ @interpreter.interpret { store(load('in').distinct, 'out') }
129
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+/)
130
+ end
131
+
132
+ it 'outputs a DISTINCT statement with a PARALLEL clause' do
133
+ @interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
134
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
135
+ end
136
+ end
137
+
138
+ describe 'CROSS' do
139
+ it 'outputs a CROSS statement with two relations' do
140
+ @interpreter.interpret do
141
+ a = load('in1')
142
+ b = load('in2')
143
+ c = a.cross(b)
144
+ dump(c)
145
+ end
146
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
147
+ end
148
+
149
+ it 'outputs a CROSS statement with many relations' do
150
+ @interpreter.interpret do
151
+ a = load('in1')
152
+ b = load('in2')
153
+ c = load('in3')
154
+ d = load('in4')
155
+ e = a.cross(b, c, d)
156
+ dump(e)
157
+ end
158
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
159
+ end
160
+
161
+ it 'outputs a CROSS statement with a PARALLEL clause' do
162
+ @interpreter.interpret do
163
+ a = load('in1')
164
+ b = load('in2')
165
+ c = load('in3')
166
+ d = a.cross([b, c], :parallel => 4)
167
+ dump(d)
168
+ end
169
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
170
+ end
171
+ end
172
+
173
+ describe 'UNION' do
174
+ it 'outputs a UNION statement with two relations' do
175
+ @interpreter.interpret do
176
+ a = load('in1')
177
+ b = load('in2')
178
+ c = a.union(b)
179
+ dump(c)
180
+ end
181
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
182
+ end
183
+
184
+ it 'outputs a UNION statement with many relations' do
185
+ @interpreter.interpret do
186
+ a = load('in1')
187
+ b = load('in2')
188
+ c = load('in3')
189
+ d = load('in4')
190
+ e = a.union(b, c, d)
191
+ dump(e)
192
+ end
193
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
194
+ end
195
+ end
196
+
197
+ describe 'SAMPLE' do
198
+ it 'outputs a SAMPLE statement' do
199
+ @interpreter.interpret { dump(load('in').sample(10)) }
200
+ @interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
201
+ end
202
+ end
203
+
204
+ describe 'LIMIT' do
205
+ it 'outputs a LIMIT statement' do
206
+ @interpreter.interpret { dump(load('in').limit(42)) }
207
+ @interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
208
+ end
209
+ end
210
+
211
+ describe 'FOREACH … GENERATE' do
212
+ it 'outputs a FOREACH … GENERATE statement' do
213
+ @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
214
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
215
+ end
216
+
217
+ it 'outputs a FOREACH … GENERATE statement with a list of fields' do
218
+ @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
219
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
220
+ end
221
+
222
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
223
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
224
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
225
+ end
226
+
227
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
228
+ @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
229
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
230
+ end
231
+
232
+ it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
233
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
234
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
235
+ end
236
+
237
+ it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
238
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
239
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
240
+ end
241
+
242
+ it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
243
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
244
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
245
+ end
246
+ end
247
+
248
+ describe 'FILTER' do
249
+ it 'outputs a FILTER statement' do
250
+ @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
251
+ @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
252
+ end
253
+
254
+ it 'outputs a FILTER statement with a complex test' do
255
+ @interpreter.interpret { dump(load('in').filter { |r| (r.a > r.b).and(r.c.ne(3)) }) }
256
+ @interpreter.to_pig_latin.should match(/FILTER \w+ BY \(a > b\) AND \(c != 3\)/)
257
+ end
258
+ end
259
+
260
+ describe 'SPLIT' do
261
+ it 'outputs a SPLIT statement' do
262
+ @interpreter.interpret do
263
+ a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
264
+ dump(a)
265
+ dump(b)
266
+ end
267
+ @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
268
+ end
269
+ end
270
+
271
+ describe 'ORDER' do
272
+ it 'outputs an ORDER statement' do
273
+ @interpreter.interpret { dump(load('in').order(:a)) }
274
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
275
+ end
276
+
277
+ it 'outputs an ORDER statement with multiple fields' do
278
+ @interpreter.interpret { dump(load('in').order(:a, :b)) }
279
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
280
+ end
281
+
282
+ it 'outputs an ORDER statement with ASC and DESC' do
283
+ @interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
284
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
285
+ end
286
+ end
287
+
288
+ describe 'JOIN' do
289
+ it 'outputs a JOIN statement' do
290
+ @interpreter.interpret do
291
+ a = load('in1')
292
+ b = load('in2')
293
+ c = a.join(a => :x, b => :y)
294
+ dump(c)
295
+ end
296
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
297
+ end
298
+
299
+ it 'outputs a JOIN statement with a PARALLEL clause' do
300
+ @interpreter.interpret do
301
+ a = load('in1')
302
+ b = load('in2')
303
+ c = a.join(a => :x, b => :y, :parallel => 5)
304
+ dump(c)
305
+ end
306
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
307
+ end
308
+
309
+ it 'outputs a JOIN statement with a USING clause' do
310
+ @interpreter.interpret do
311
+ a = load('in1')
312
+ b = load('in2')
313
+ c = a.join(a => :x, b => :y, :using => :replicated)
314
+ dump(c)
315
+ end
316
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
317
+ end
318
+ end
319
+
320
+ describe 'COGROUP' do
321
+ it 'outputs a COGROUP statement' do
322
+ @interpreter.interpret do
323
+ a = load('in1')
324
+ b = load('in2')
325
+ c = a.cogroup(a => :x, b => :y)
326
+ dump(c)
327
+ end
328
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
329
+ end
330
+
331
+ it 'outputs a COGROUP statement with multiple join fields' do
332
+ @interpreter.interpret do
333
+ a = load('in1')
334
+ b = load('in2')
335
+ c = a.cogroup(a => :x, b => [:y, :z, :w])
336
+ dump(c)
337
+ end
338
+ @interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
339
+ end
340
+
341
+ it 'outputs a COGROUP statement with a PARALLEL clause' do
342
+ @interpreter.interpret do
343
+ a = load('in1')
344
+ b = load('in2')
345
+ c = a.cogroup(a => :x, b => :y, :parallel => 5)
346
+ dump(c)
347
+ end
348
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
349
+ end
350
+
351
+ it 'outputs a COGROUP statement with INNER and OUTER' do
352
+ @interpreter.interpret do
353
+ a = load('in1')
354
+ b = load('in2')
355
+ c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
356
+ dump(c)
357
+ end
358
+ @interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
359
+ @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
360
+ end
361
+ end
362
+ end
363
+
364
+ context 'aliasing & multiple statements' do
365
+ it 'aliases the loaded relation and uses the same alias in the STORE statement' do
366
+ @interpreter.interpret { store(load('in'), 'out') }
367
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
368
+ end
369
+
370
+ it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
371
+ @interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
372
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
373
+ end
374
+
375
+ it 'aliases a whole row of statements' do
376
+ @interpreter.interpret do
377
+ a = load('in', :schema => [:a])
378
+ b = a.group(:a)
379
+ c = b.group(:a)
380
+ d = c.group(:a)
381
+ store(d, 'out')
382
+ end
383
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
384
+ end
385
+
386
+ it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
387
+ @interpreter.interpret do
388
+ a = load('in')
389
+ b = a.distinct
390
+ store(b, 'out1')
391
+ store(b, 'out2')
392
+ end
393
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
394
+ end
395
+ end
396
+
397
+ context 'long and complex scripts' do
398
+ before do
399
+ @interpreter.interpret do
400
+ sessions = load('sessions', :schema => [
401
+ [:ad_id, :chararray],
402
+ [:site, :chararray],
403
+ [:size, :chararray],
404
+ [:name, :chararray],
405
+ [:impression, :int],
406
+ [:engagement, :int],
407
+ [:click_thru, :int]
408
+ ])
409
+ %w(site size name).each do |dimension|
410
+ result = sessions.group(:ad_id, dimension).foreach do |r|
411
+ [
412
+ r[0].ad_id.as(:ad_id),
413
+ literal(dimension).as(:dimension),
414
+ r[0].field(dimension).as(:value),
415
+ r[1].exposure.sum.as(:exposures),
416
+ r[1].impression.sum.as(:impressions),
417
+ r[1].engagement.sum.as(:engagements),
418
+ r[1].click_thru.sum.as(:click_thrus)
419
+ ]
420
+ end
421
+ store(result, "report_metrics-#{dimension}")
422
+ end
423
+ end
424
+ @output = @interpreter.to_pig_latin
425
+ end
426
+
427
+ it 'outputs the correct number of LOAD statements' do
428
+ @output.scan(/LOAD/).size.should eql(1)
429
+ end
430
+
431
+ it 'outputs the correct number of STORE statements' do
432
+ @output.scan(/STORE/).size.should eql(3)
433
+ end
434
+
435
+ it 'doesn\'t assign to the same relation twice' do
436
+ @assignments = @output.scan(/^(\w+)(?=\s*=)/).flatten
437
+ @assignments.uniq.should eql(@assignments)
438
+ end
439
+ end
440
+
441
+ context 'schemas' do
442
+ it 'knows the schema of a relation returned by #load, with types' do
443
+ schema = catch(:schema) do
444
+ @interpreter.interpret do
445
+ schema = load('in', :schema => [[:a, :chararray], [:b, :chararray]]).schema
446
+ throw :schema, schema
447
+ end
448
+ end
449
+ schema.field_names.should eql([:a, :b])
450
+ schema.field_type(:a).should eql(:chararray)
451
+ end
452
+
453
+ it 'knows the schema of a relation returned by #load, without types' do
454
+ schema = catch(:schema) do
455
+ @interpreter.interpret do
456
+ schema = load('in', :schema => [:a, :b]).schema
457
+ throw :schema, schema
458
+ end
459
+ end
460
+ schema.field_names.should eql([:a, :b])
461
+ schema.field_type(:a).should eql(:bytearray)
462
+ end
463
+
464
+ it 'knows the schema of a relation returned by #load, with and without types' do
465
+ schema = catch(:schema) do
466
+ @interpreter.interpret do
467
+ schema = load('in', :schema => [[:a, :float], :b]).schema
468
+ throw :schema, schema
469
+ end
470
+ end
471
+ schema.field_names.should eql([:a, :b])
472
+ schema.field_type(:a).should eql(:float)
473
+ end
474
+
475
+ it 'does not know anything about the schema of a relation returned by #load if no schema was given' do
476
+ relation = catch(:relation) do
477
+ @interpreter.interpret do
478
+ throw :relation, load('in')
479
+ end
480
+ end
481
+ relation.schema.should be_nil
482
+ end
483
+
484
+ it 'knows the schema of a relation derived through non-schema-changing operations' do
485
+ schema = catch(:schema) do
486
+ @interpreter.interpret do
487
+ relation = load('in', :schema => [[:a, :float], [:b, :int]]).limit(3).sample(0.1).distinct.order(:a)
488
+ throw :schema, relation.schema
489
+ end
490
+ end
491
+ schema.field_names.should eql([:a, :b])
492
+ schema.field_type(:a).should eql(:float)
493
+ schema.field_type(:b).should eql(:int)
494
+ end
495
+
496
+ it 'knows the schema of a relation grouped on one field' do
497
+ relation = catch(:relation) do
498
+ @interpreter.interpret do
499
+ relation = load('in', :schema => [[:a, :float], [:b, :int]]).group(:a)
500
+ throw :relation, relation
501
+ end
502
+ end
503
+ source_relation_name = relation.sources.first.alias.to_sym
504
+ relation.schema.field_names.should eql([:group, source_relation_name])
505
+ relation.schema.field_type(:group).should eql(:float)
506
+ relation.schema.field_type(source_relation_name).should be_a(Piglet::Schema::Bag)
507
+ relation.schema.field_type(source_relation_name).field_names.should eql([:a, :b])
508
+ relation.schema.field_type(source_relation_name).field_type(:a).should eql(:float)
509
+ end
510
+
511
+ it 'knows the schema of a relation grouped on more than one field' do
512
+ relation = catch(:relation) do
513
+ @interpreter.interpret do
514
+ relation = load('in', :schema => [[:a, :float], [:b, :int]]).group(:a, :b)
515
+ throw :relation, relation
516
+ end
517
+ end
518
+ source_relation_name = relation.sources.first.alias.to_sym
519
+ relation.schema.field_names.should eql([:group, source_relation_name])
520
+ relation.schema.field_type(:group).should be_a(Piglet::Schema::Tuple)
521
+ relation.schema.field_type(:group).field_names.should eql([:a, :b])
522
+ relation.schema.field_type(:group).field_type(:a).should eql(:float)
523
+ relation.schema.field_type(source_relation_name).should be_a(Piglet::Schema::Bag)
524
+ relation.schema.field_type(source_relation_name).field_names.should eql([:a, :b])
525
+ relation.schema.field_type(source_relation_name).field_type(:b).should eql(:int)
526
+ end
527
+
528
+ it 'knows the schema of a relation cross joined with itself' do
529
+ schema = catch(:schema) do
530
+ @interpreter.interpret do
531
+ relation = load('in', :schema => [[:a, :float], [:b, :int]])
532
+ relation = relation.cross(relation)
533
+ throw :schema, relation.schema
534
+ end
535
+ end
536
+ schema.field_names.should eql([:a, :b, :a, :b])
537
+ schema.field_type(:a).should eql(:float)
538
+ schema.field_type(:b).should eql(:int)
539
+ end
540
+
541
+ it 'knows the schema of a relation cross joined with another' do
542
+ schema = catch(:schema) do
543
+ @interpreter.interpret do
544
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
545
+ relation2 = load('in2', :schema => [[:c, :chararray], [:d, :double]])
546
+ relation3 = relation1.cross(relation2)
547
+ throw :schema, relation3.schema
548
+ end
549
+ end
550
+ schema.field_names.should eql([:a, :b, :c, :d])
551
+ schema.field_type(:a).should eql(:float)
552
+ schema.field_type(:b).should eql(:int)
553
+ schema.field_type(:c).should eql(:chararray)
554
+ schema.field_type(:d).should eql(:double)
555
+ end
556
+
557
+ it 'knows the schema of a relation joined with another' do
558
+ schema = catch(:schema) do
559
+ @interpreter.interpret do
560
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
561
+ relation2 = load('in2', :schema => [[:c, :int], [:d, :double]])
562
+ relation3 = relation1.join(relation1 => :b, relation2 => :c)
563
+ throw :schema, relation3.schema
564
+ end
565
+ end
566
+ schema.field_names.should eql([:a, :b, :c, :d])
567
+ schema.field_type(:a).should eql(:float)
568
+ schema.field_type(:b).should eql(:int)
569
+ schema.field_type(:c).should eql(:int)
570
+ schema.field_type(:d).should eql(:double)
571
+ end
572
+
573
+ it 'knows the schema of a relation cogrouped with another' do
574
+ relation1, relation2, relation3 = catch(:relations) do
575
+ @interpreter.interpret do
576
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
577
+ relation2 = load('in2', :schema => [[:c, :int], [:d, :double]])
578
+ relation3 = relation1.cogroup(relation1 => :b, relation2 => :c)
579
+ throw :relations, [relation1, relation2, relation3]
580
+ end
581
+ end
582
+ relation3.schema.field_names.should eql([:group, relation1.alias.to_sym, relation2.alias.to_sym])
583
+ relation3.schema.field_type(relation1.alias.to_sym).should be_a(Piglet::Schema::Bag)
584
+ relation3.schema.field_type(relation2.alias.to_sym).should be_a(Piglet::Schema::Bag)
585
+ relation3.schema.field_type(relation1.alias.to_sym).field_names.should eql([:a, :b])
586
+ relation3.schema.field_type(relation2.alias.to_sym).field_names.should eql([:c, :d])
587
+ end
588
+
589
+ it 'knows the schema of a relation projection' do
590
+ schema = catch(:schema) do
591
+ @interpreter.interpret do
592
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
593
+ relation2 = relation1.foreach { |r| [r.a] }
594
+ throw :schema, relation2.schema
595
+ end
596
+ end
597
+ schema.field_names.should eql([:a])
598
+ schema.field_type(:a).should eql(:float)
599
+ end
600
+
601
+ it 'knows the schema of a relation projection containing a call to MAX' do
602
+ schema = catch(:schema) do
603
+ @interpreter.interpret do
604
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
605
+ relation2 = relation1.foreach { |r| [r.a.max] }
606
+ throw :schema, relation2.schema
607
+ end
608
+ end
609
+ schema.field_names.should eql([nil])
610
+ schema.field_type(0).should eql(:float)
611
+ end
612
+
613
+ it 'knows the schema of a relation projection containing a call to COUNT' do
614
+ schema = catch(:schema) do
615
+ @interpreter.interpret do
616
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
617
+ relation2 = relation1.foreach { |r| [r.a.count] }
618
+ throw :schema, relation2.schema
619
+ end
620
+ end
621
+ schema.field_names.should eql([nil])
622
+ schema.field_type(0).should eql(:long)
623
+ end
624
+
625
+ it 'knows the schema of a relation projection containing a field rename' do
626
+ schema = catch(:schema) do
627
+ @interpreter.interpret do
628
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
629
+ relation2 = relation1.foreach { |r| [r.a.count.as(:x)] }
630
+ throw :schema, relation2.schema
631
+ end
632
+ end
633
+ schema.field_names.should eql([:x])
634
+ end
635
+
636
+ it 'knows the schema of a relation projection containing a literal string' do
637
+ schema = catch(:schema) do
638
+ @interpreter.interpret do
639
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
640
+ relation2 = relation1.foreach { |r| [literal('blipp')] }
641
+ throw :schema, relation2.schema
642
+ end
643
+ end
644
+ schema.field_type(0).should eql(:chararray)
645
+ end
646
+
647
+ it 'knows the schema of a relation projection containing a literal integer' do
648
+ schema = catch(:schema) do
649
+ @interpreter.interpret do
650
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
651
+ relation2 = relation1.foreach { |r| [literal(4)] }
652
+ throw :schema, relation2.schema
653
+ end
654
+ end
655
+ schema.field_type(0).should eql(:int)
656
+ end
657
+
658
+ it 'knows the schema of a relation projection containing a literal float' do
659
+ schema = catch(:schema) do
660
+ @interpreter.interpret do
661
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
662
+ relation2 = relation1.foreach { |r| [literal(3.14)] }
663
+ throw :schema, relation2.schema
664
+ end
665
+ end
666
+ schema.field_type(0).should eql(:double)
667
+ end
668
+
669
+ end
6
670
 
7
671
  end