piglet 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe Literal do
8
+
9
+ describe '#type' do
10
+ it 'knows that the type of a string is chararray' do
11
+ Literal.new("hello world").type.should eql(:chararray)
12
+ end
13
+
14
+ it 'knows that the type of an integer is int' do
15
+ Literal.new(3).type.should eql(:int)
16
+ end
17
+
18
+ it 'knows that the type of a float is double' do
19
+ Literal.new(3.14).type.should eql(:double)
20
+ end
21
+
22
+ it 'uses the specified type instead of the inferred' do
23
+ Literal.new(3.14, :type => :float).type.should eql(:float)
24
+ end
25
+ end
26
+
27
+ end
@@ -19,7 +19,7 @@ describe Piglet::Field::Reference do
19
19
  end
20
20
 
21
21
  context 'eval/aggregate functions' do
22
- %w(avg count diff max min size sum tokenize).each do |function_name|
22
+ %w(avg count max min size sum tokenize).each do |function_name|
23
23
  it "supports \"#{function_name.upcase}\" through ##{function_name}" do
24
24
  @field.send(function_name).to_s.should eql("#{function_name.upcase}(field)")
25
25
  end
@@ -36,6 +36,20 @@ describe Piglet::Field::Reference do
36
36
  end
37
37
  end
38
38
 
39
+ context 'nested fields' do
40
+ it 'handles nested field access' do
41
+ @field.a.to_s.should eql('field.a')
42
+ end
43
+
44
+ it 'handles nested field access through #field' do
45
+ @field.field(:a).to_s.should eql('field.a')
46
+ end
47
+
48
+ it 'handles nested field access throuh []' do
49
+ @field[0].to_s.should eql('field.$0')
50
+ end
51
+ end
52
+
39
53
  context 'field renaming' do
40
54
  it 'supports renaming a field' do
41
55
  @field.as('x').to_s.should eql('field AS x')
@@ -23,395 +23,8 @@ describe Piglet::Interpreter do
23
23
  end
24
24
  end
25
25
 
26
- context 'load & store operators:' do
27
- describe 'LOAD' do
28
- it 'outputs a LOAD statement' do
29
- @interpreter.interpret { store(load('some/path'), 'out') }
30
- @interpreter.to_pig_latin.should include("LOAD 'some/path'")
31
- end
32
-
33
- it 'outputs a LOAD statement without a USING clause if none specified' do
34
- @interpreter.interpret { store(load('some/path'), 'out') }
35
- @interpreter.to_pig_latin.should_not include('USING')
36
- end
37
-
38
- it 'outputs a LOAD statement with a USING clause with a specified function' do
39
- @interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
40
- @interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
41
- end
42
-
43
- Piglet::Inout::StorageTypes::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
44
- it "knows that the load method :#{symbolic_name} means #{function}" do
45
- @interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
46
- @interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
47
- end
48
- end
49
-
50
- it 'outputs a LOAD statement with an AS clause' do
51
- @interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
52
- @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
53
- end
54
-
55
- it 'outputs a LOAD statement with an AS clause with types' do
56
- @interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
57
- @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
58
- end
59
-
60
- it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
61
- @interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
62
- @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
63
- end
64
- end
65
-
66
- describe 'STORE' do
67
- it 'outputs a STORE statement' do
68
- @interpreter.interpret { store(load('some/path'), 'out') }
69
- @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
70
- end
71
-
72
- it 'outputs a STORE statement without a USING clause if none specified' do
73
- @interpreter.interpret { store(load('some/path'), 'out') }
74
- @interpreter.to_pig_latin.should_not include("USING")
75
- end
76
-
77
- it 'outputs a STORE statement with a USING clause with a specified function' do
78
- @interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
79
- @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
80
- end
81
-
82
- it 'knows that the load method :pig_storage means PigStorage' do
83
- @interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
84
- @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
85
- end
86
- end
87
-
88
- describe 'DUMP' do
89
- it 'outputs a DUMP statement' do
90
- @interpreter.interpret { dump(load('some/path')) }
91
- @interpreter.to_pig_latin.should match(/DUMP \w+/)
92
- end
93
- end
94
- end
95
-
96
- context 'diagnostic operators:' do
97
- describe 'ILLUSTRATE' do
98
- it 'outputs an ILLUSTRATE statement' do
99
- @interpreter.interpret { illustrate(load('some/path')) }
100
- @interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
101
- end
102
- end
103
-
104
- describe 'DESCRIBE' do
105
- it 'outputs a DESCRIBE statement' do
106
- @interpreter.interpret { describe(load('some/path')) }
107
- @interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
108
- end
109
- end
110
-
111
- describe 'EXPLAIN' do
112
- it 'outputs an EXPLAIN statement' do
113
- @interpreter.interpret { explain(load('some/path')) }
114
- @interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
115
- end
116
-
117
- it 'outputs an EXPLAIN statement without an alias' do
118
- @interpreter.interpret { explain }
119
- @interpreter.to_pig_latin.should match(/EXPLAIN;/)
120
- end
121
- end
122
- end
123
-
124
- context 'relation operators:' do
125
- describe 'GROUP' do
126
- it 'outputs a GROUP statement with one grouping field' do
127
- @interpreter.interpret { store(load('in').group(:a), 'out') }
128
- @interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
129
- end
130
-
131
- it 'outputs a GROUP statement with more than one grouping field' do
132
- @interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
133
- @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
134
- end
135
-
136
- it 'outputs a GROUP statement with a PARALLEL clause' do
137
- @interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
138
- @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
139
- end
140
- end
141
-
142
- describe 'DISTINCT' do
143
- it 'outputs a DISTINCT statement' do
144
- @interpreter.interpret { store(load('in').distinct, 'out') }
145
- @interpreter.to_pig_latin.should match(/DISTINCT \w+/)
146
- end
147
-
148
- it 'outputs a DISTINCT statement with a PARALLEL clause' do
149
- @interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
150
- @interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
151
- end
152
- end
153
-
154
- describe 'CROSS' do
155
- it 'outputs a CROSS statement with two relations' do
156
- @interpreter.interpret do
157
- a = load('in1')
158
- b = load('in2')
159
- c = a.cross(b)
160
- dump(c)
161
- end
162
- @interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
163
- end
164
-
165
- it 'outputs a CROSS statement with many relations' do
166
- @interpreter.interpret do
167
- a = load('in1')
168
- b = load('in2')
169
- c = load('in3')
170
- d = load('in4')
171
- e = a.cross(b, c, d)
172
- dump(e)
173
- end
174
- @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
175
- end
176
-
177
- it 'outputs a CROSS statement with a PARALLEL clause' do
178
- @interpreter.interpret do
179
- a = load('in1')
180
- b = load('in2')
181
- c = load('in3')
182
- d = a.cross([b, c], :parallel => 4)
183
- dump(d)
184
- end
185
- @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
186
- end
187
- end
188
-
189
- describe 'UNION' do
190
- it 'outputs a UNION statement with two relations' do
191
- @interpreter.interpret do
192
- a = load('in1')
193
- b = load('in2')
194
- c = a.union(b)
195
- dump(c)
196
- end
197
- @interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
198
- end
199
-
200
- it 'outputs a UNION statement with many relations' do
201
- @interpreter.interpret do
202
- a = load('in1')
203
- b = load('in2')
204
- c = load('in3')
205
- d = load('in4')
206
- e = a.union(b, c, d)
207
- dump(e)
208
- end
209
- @interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
210
- end
211
- end
212
-
213
- describe 'SAMPLE' do
214
- it 'outputs a SAMPLE statement' do
215
- @interpreter.interpret { dump(load('in').sample(10)) }
216
- @interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
217
- end
218
- end
219
-
220
- describe 'LIMIT' do
221
- it 'outputs a LIMIT statement' do
222
- @interpreter.interpret { dump(load('in').limit(42)) }
223
- @interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
224
- end
225
- end
226
-
227
- describe 'FOREACH … GENERATE' do
228
- it 'outputs a FOREACH … GENERATE statement' do
229
- @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
230
- @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
231
- end
232
-
233
- it 'outputs a FOREACH … GENERATE statement with a list of fields' do
234
- @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
235
- @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
236
- end
237
-
238
- it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
239
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
240
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
241
- end
242
-
243
- it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
244
- @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
245
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
246
- end
247
-
248
- it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
249
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
250
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
251
- end
252
-
253
- it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
254
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
255
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
256
- end
257
-
258
- it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
259
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
260
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
261
- end
262
- end
263
-
264
- describe 'FILTER' do
265
- it 'outputs a FILTER statement' do
266
- @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
267
- @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
268
- end
269
-
270
- it 'outputs a FILTER statement with a complex test' do
271
- @interpreter.interpret { dump(load('in').filter { |r| (r.a > r.b).and(r.c.ne(3)) }) }
272
- @interpreter.to_pig_latin.should match(/FILTER \w+ BY \(a > b\) AND \(c != 3\)/)
273
- end
274
- end
275
-
276
- describe 'SPLIT' do
277
- it 'outputs a SPLIT statement' do
278
- @interpreter.interpret do
279
- a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
280
- dump(a)
281
- dump(b)
282
- end
283
- @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
284
- end
285
- end
286
-
287
- describe 'ORDER' do
288
- it 'outputs an ORDER statement' do
289
- @interpreter.interpret { dump(load('in').order(:a)) }
290
- @interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
291
- end
292
-
293
- it 'outputs an ORDER statement with multiple fields' do
294
- @interpreter.interpret { dump(load('in').order(:a, :b)) }
295
- @interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
296
- end
297
-
298
- it 'outputs an ORDER statement with ASC and DESC' do
299
- @interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
300
- @interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
301
- end
302
- end
303
-
304
- describe 'JOIN' do
305
- it 'outputs a JOIN statement' do
306
- @interpreter.interpret do
307
- a = load('in1')
308
- b = load('in2')
309
- c = a.join(a => :x, b => :y)
310
- dump(c)
311
- end
312
- @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
313
- end
314
-
315
- it 'outputs a JOIN statement with a PARALLEL clause' do
316
- @interpreter.interpret do
317
- a = load('in1')
318
- b = load('in2')
319
- c = a.join(a => :x, b => :y, :parallel => 5)
320
- dump(c)
321
- end
322
- @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
323
- end
324
-
325
- it 'outputs a JOIN statement with a USING clause' do
326
- @interpreter.interpret do
327
- a = load('in1')
328
- b = load('in2')
329
- c = a.join(a => :x, b => :y, :using => :replicated)
330
- dump(c)
331
- end
332
- @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
333
- end
334
- end
335
-
336
- describe 'COGROUP' do
337
- it 'outputs a COGROUP statement' do
338
- @interpreter.interpret do
339
- a = load('in1')
340
- b = load('in2')
341
- c = a.cogroup(a => :x, b => :y)
342
- dump(c)
343
- end
344
- @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
345
- end
346
-
347
- it 'outputs a COGROUP statement with multiple join fields' do
348
- @interpreter.interpret do
349
- a = load('in1')
350
- b = load('in2')
351
- c = a.cogroup(a => :x, b => [:y, :z, :w])
352
- dump(c)
353
- end
354
- @interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
355
- end
356
-
357
- it 'outputs a COGROUP statement with a PARALLEL clause' do
358
- @interpreter.interpret do
359
- a = load('in1')
360
- b = load('in2')
361
- c = a.cogroup(a => :x, b => :y, :parallel => 5)
362
- dump(c)
363
- end
364
- @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
365
- end
366
-
367
- it 'outputs a COGROUP statement with INNER and OUTER' do
368
- @interpreter.interpret do
369
- a = load('in1')
370
- b = load('in2')
371
- c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
372
- dump(c)
373
- end
374
- @interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
375
- @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
376
- end
377
- end
378
- end
379
-
380
- context 'aliasing & multiple statements' do
381
- it 'aliases the loaded relation and uses the same alias in the STORE statement' do
382
- @interpreter.interpret { store(load('in'), 'out') }
383
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
384
- end
385
-
386
- it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
387
- @interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
388
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
389
- end
390
-
391
- it 'aliases a whole row of statements' do
392
- @interpreter.interpret do
393
- a = load('in', :schema => [:a])
394
- b = a.group(:a)
395
- c = b.group(:a)
396
- d = c.group(:a)
397
- store(d, 'out')
398
- end
399
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
400
- end
401
-
402
- it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
403
- @interpreter.interpret do
404
- a = load('in')
405
- b = a.distinct
406
- store(b, 'out1')
407
- store(b, 'out2')
408
- end
409
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
410
- end
411
- end
412
-
413
- context 'misc. operators' do
414
- it 'outputs a binary conditional when using #test' do
26
+ describe '#test' do
27
+ it 'outputs a binary conditional' do
415
28
  @interpreter.interpret do
416
29
  dump(load('in').foreach { |r| [test(r.a == r.b, r.a, r.b)]})
417
30
  end
@@ -419,36 +32,36 @@ describe Piglet::Interpreter do
419
32
  end
420
33
  end
421
34
 
422
- context 'literals' do
423
- it 'outputs a literal string when passing a string to #literal' do
35
+ describe '#literal' do
36
+ it 'outputs a literal string' do
424
37
  @interpreter.interpret do
425
38
  dump(load('in').foreach { |r| [literal('hello').as(:world)]})
426
39
  end
427
40
  @interpreter.to_pig_latin.should include("'hello' AS world")
428
41
  end
429
42
 
430
- it 'outputs a literal integer when passing an integer to #literal' do
43
+ it 'outputs a literal integer' do
431
44
  @interpreter.interpret do
432
45
  dump(load('in').foreach { |r| [literal(3).as(:n)]})
433
46
  end
434
47
  @interpreter.to_pig_latin.should include("3 AS n")
435
48
  end
436
49
 
437
- it 'outputs a literal float when passing a float to #literal' do
50
+ it 'outputs a literal float' do
438
51
  @interpreter.interpret do
439
52
  dump(load('in').foreach { |r| [literal(3.14).as(:pi)]})
440
53
  end
441
54
  @interpreter.to_pig_latin.should include("3.14 AS pi")
442
55
  end
443
56
 
444
- it 'outputs a literal string when passing an arbitrary object to #literal' do
57
+ it 'outputs a literal string when passed an arbitrary object' do
445
58
  @interpreter.interpret do
446
59
  dump(load('in').foreach { |r| [literal(self).as(:interpreter)]})
447
60
  end
448
61
  @interpreter.to_pig_latin.should match(/'[^']+' AS interpreter/)
449
62
  end
450
63
 
451
- it 'escapes single quotes in literal strings' do
64
+ it 'escapes single quotes' do
452
65
  @interpreter.interpret do
453
66
  dump(load('in').foreach { |r| [literal("hello 'world'").as(:str)]})
454
67
  end