piglet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe Literal do
8
+
9
+ describe '#type' do
10
+ it 'knows that the type of a string is chararray' do
11
+ Literal.new("hello world").type.should eql(:chararray)
12
+ end
13
+
14
+ it 'knows that the type of an integer is int' do
15
+ Literal.new(3).type.should eql(:int)
16
+ end
17
+
18
+ it 'knows that the type of a float is double' do
19
+ Literal.new(3.14).type.should eql(:double)
20
+ end
21
+
22
+ it 'uses the specified type instead of the inferred' do
23
+ Literal.new(3.14, :type => :float).type.should eql(:float)
24
+ end
25
+ end
26
+
27
+ end
@@ -19,7 +19,7 @@ describe Piglet::Field::Reference do
19
19
  end
20
20
 
21
21
  context 'eval/aggregate functions' do
22
- %w(avg count diff max min size sum tokenize).each do |function_name|
22
+ %w(avg count max min size sum tokenize).each do |function_name|
23
23
  it "supports \"#{function_name.upcase}\" through ##{function_name}" do
24
24
  @field.send(function_name).to_s.should eql("#{function_name.upcase}(field)")
25
25
  end
@@ -36,6 +36,20 @@ describe Piglet::Field::Reference do
36
36
  end
37
37
  end
38
38
 
39
+ context 'nested fields' do
40
+ it 'handles nested field access' do
41
+ @field.a.to_s.should eql('field.a')
42
+ end
43
+
44
+ it 'handles nested field access through #field' do
45
+ @field.field(:a).to_s.should eql('field.a')
46
+ end
47
+
48
+ it 'handles nested field access throuh []' do
49
+ @field[0].to_s.should eql('field.$0')
50
+ end
51
+ end
52
+
39
53
  context 'field renaming' do
40
54
  it 'supports renaming a field' do
41
55
  @field.as('x').to_s.should eql('field AS x')
@@ -23,395 +23,8 @@ describe Piglet::Interpreter do
23
23
  end
24
24
  end
25
25
 
26
- context 'load & store operators:' do
27
- describe 'LOAD' do
28
- it 'outputs a LOAD statement' do
29
- @interpreter.interpret { store(load('some/path'), 'out') }
30
- @interpreter.to_pig_latin.should include("LOAD 'some/path'")
31
- end
32
-
33
- it 'outputs a LOAD statement without a USING clause if none specified' do
34
- @interpreter.interpret { store(load('some/path'), 'out') }
35
- @interpreter.to_pig_latin.should_not include('USING')
36
- end
37
-
38
- it 'outputs a LOAD statement with a USING clause with a specified function' do
39
- @interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
40
- @interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
41
- end
42
-
43
- Piglet::Inout::StorageTypes::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
44
- it "knows that the load method :#{symbolic_name} means #{function}" do
45
- @interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
46
- @interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
47
- end
48
- end
49
-
50
- it 'outputs a LOAD statement with an AS clause' do
51
- @interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
52
- @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
53
- end
54
-
55
- it 'outputs a LOAD statement with an AS clause with types' do
56
- @interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
57
- @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
58
- end
59
-
60
- it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
61
- @interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
62
- @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
63
- end
64
- end
65
-
66
- describe 'STORE' do
67
- it 'outputs a STORE statement' do
68
- @interpreter.interpret { store(load('some/path'), 'out') }
69
- @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
70
- end
71
-
72
- it 'outputs a STORE statement without a USING clause if none specified' do
73
- @interpreter.interpret { store(load('some/path'), 'out') }
74
- @interpreter.to_pig_latin.should_not include("USING")
75
- end
76
-
77
- it 'outputs a STORE statement with a USING clause with a specified function' do
78
- @interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
79
- @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
80
- end
81
-
82
- it 'knows that the load method :pig_storage means PigStorage' do
83
- @interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
84
- @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
85
- end
86
- end
87
-
88
- describe 'DUMP' do
89
- it 'outputs a DUMP statement' do
90
- @interpreter.interpret { dump(load('some/path')) }
91
- @interpreter.to_pig_latin.should match(/DUMP \w+/)
92
- end
93
- end
94
- end
95
-
96
- context 'diagnostic operators:' do
97
- describe 'ILLUSTRATE' do
98
- it 'outputs an ILLUSTRATE statement' do
99
- @interpreter.interpret { illustrate(load('some/path')) }
100
- @interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
101
- end
102
- end
103
-
104
- describe 'DESCRIBE' do
105
- it 'outputs a DESCRIBE statement' do
106
- @interpreter.interpret { describe(load('some/path')) }
107
- @interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
108
- end
109
- end
110
-
111
- describe 'EXPLAIN' do
112
- it 'outputs an EXPLAIN statement' do
113
- @interpreter.interpret { explain(load('some/path')) }
114
- @interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
115
- end
116
-
117
- it 'outputs an EXPLAIN statement without an alias' do
118
- @interpreter.interpret { explain }
119
- @interpreter.to_pig_latin.should match(/EXPLAIN;/)
120
- end
121
- end
122
- end
123
-
124
- context 'relation operators:' do
125
- describe 'GROUP' do
126
- it 'outputs a GROUP statement with one grouping field' do
127
- @interpreter.interpret { store(load('in').group(:a), 'out') }
128
- @interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
129
- end
130
-
131
- it 'outputs a GROUP statement with more than one grouping field' do
132
- @interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
133
- @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
134
- end
135
-
136
- it 'outputs a GROUP statement with a PARALLEL clause' do
137
- @interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
138
- @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
139
- end
140
- end
141
-
142
- describe 'DISTINCT' do
143
- it 'outputs a DISTINCT statement' do
144
- @interpreter.interpret { store(load('in').distinct, 'out') }
145
- @interpreter.to_pig_latin.should match(/DISTINCT \w+/)
146
- end
147
-
148
- it 'outputs a DISTINCT statement with a PARALLEL clause' do
149
- @interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
150
- @interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
151
- end
152
- end
153
-
154
- describe 'CROSS' do
155
- it 'outputs a CROSS statement with two relations' do
156
- @interpreter.interpret do
157
- a = load('in1')
158
- b = load('in2')
159
- c = a.cross(b)
160
- dump(c)
161
- end
162
- @interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
163
- end
164
-
165
- it 'outputs a CROSS statement with many relations' do
166
- @interpreter.interpret do
167
- a = load('in1')
168
- b = load('in2')
169
- c = load('in3')
170
- d = load('in4')
171
- e = a.cross(b, c, d)
172
- dump(e)
173
- end
174
- @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
175
- end
176
-
177
- it 'outputs a CROSS statement with a PARALLEL clause' do
178
- @interpreter.interpret do
179
- a = load('in1')
180
- b = load('in2')
181
- c = load('in3')
182
- d = a.cross([b, c], :parallel => 4)
183
- dump(d)
184
- end
185
- @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
186
- end
187
- end
188
-
189
- describe 'UNION' do
190
- it 'outputs a UNION statement with two relations' do
191
- @interpreter.interpret do
192
- a = load('in1')
193
- b = load('in2')
194
- c = a.union(b)
195
- dump(c)
196
- end
197
- @interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
198
- end
199
-
200
- it 'outputs a UNION statement with many relations' do
201
- @interpreter.interpret do
202
- a = load('in1')
203
- b = load('in2')
204
- c = load('in3')
205
- d = load('in4')
206
- e = a.union(b, c, d)
207
- dump(e)
208
- end
209
- @interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
210
- end
211
- end
212
-
213
- describe 'SAMPLE' do
214
- it 'outputs a SAMPLE statement' do
215
- @interpreter.interpret { dump(load('in').sample(10)) }
216
- @interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
217
- end
218
- end
219
-
220
- describe 'LIMIT' do
221
- it 'outputs a LIMIT statement' do
222
- @interpreter.interpret { dump(load('in').limit(42)) }
223
- @interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
224
- end
225
- end
226
-
227
- describe 'FOREACH … GENERATE' do
228
- it 'outputs a FOREACH … GENERATE statement' do
229
- @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
230
- @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
231
- end
232
-
233
- it 'outputs a FOREACH … GENERATE statement with a list of fields' do
234
- @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
235
- @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
236
- end
237
-
238
- it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
239
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
240
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
241
- end
242
-
243
- it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
244
- @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
245
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
246
- end
247
-
248
- it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
249
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
250
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
251
- end
252
-
253
- it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
254
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
255
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
256
- end
257
-
258
- it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
259
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
260
- @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
261
- end
262
- end
263
-
264
- describe 'FILTER' do
265
- it 'outputs a FILTER statement' do
266
- @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
267
- @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
268
- end
269
-
270
- it 'outputs a FILTER statement with a complex test' do
271
- @interpreter.interpret { dump(load('in').filter { |r| (r.a > r.b).and(r.c.ne(3)) }) }
272
- @interpreter.to_pig_latin.should match(/FILTER \w+ BY \(a > b\) AND \(c != 3\)/)
273
- end
274
- end
275
-
276
- describe 'SPLIT' do
277
- it 'outputs a SPLIT statement' do
278
- @interpreter.interpret do
279
- a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
280
- dump(a)
281
- dump(b)
282
- end
283
- @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
284
- end
285
- end
286
-
287
- describe 'ORDER' do
288
- it 'outputs an ORDER statement' do
289
- @interpreter.interpret { dump(load('in').order(:a)) }
290
- @interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
291
- end
292
-
293
- it 'outputs an ORDER statement with multiple fields' do
294
- @interpreter.interpret { dump(load('in').order(:a, :b)) }
295
- @interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
296
- end
297
-
298
- it 'outputs an ORDER statement with ASC and DESC' do
299
- @interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
300
- @interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
301
- end
302
- end
303
-
304
- describe 'JOIN' do
305
- it 'outputs a JOIN statement' do
306
- @interpreter.interpret do
307
- a = load('in1')
308
- b = load('in2')
309
- c = a.join(a => :x, b => :y)
310
- dump(c)
311
- end
312
- @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
313
- end
314
-
315
- it 'outputs a JOIN statement with a PARALLEL clause' do
316
- @interpreter.interpret do
317
- a = load('in1')
318
- b = load('in2')
319
- c = a.join(a => :x, b => :y, :parallel => 5)
320
- dump(c)
321
- end
322
- @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
323
- end
324
-
325
- it 'outputs a JOIN statement with a USING clause' do
326
- @interpreter.interpret do
327
- a = load('in1')
328
- b = load('in2')
329
- c = a.join(a => :x, b => :y, :using => :replicated)
330
- dump(c)
331
- end
332
- @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
333
- end
334
- end
335
-
336
- describe 'COGROUP' do
337
- it 'outputs a COGROUP statement' do
338
- @interpreter.interpret do
339
- a = load('in1')
340
- b = load('in2')
341
- c = a.cogroup(a => :x, b => :y)
342
- dump(c)
343
- end
344
- @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
345
- end
346
-
347
- it 'outputs a COGROUP statement with multiple join fields' do
348
- @interpreter.interpret do
349
- a = load('in1')
350
- b = load('in2')
351
- c = a.cogroup(a => :x, b => [:y, :z, :w])
352
- dump(c)
353
- end
354
- @interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
355
- end
356
-
357
- it 'outputs a COGROUP statement with a PARALLEL clause' do
358
- @interpreter.interpret do
359
- a = load('in1')
360
- b = load('in2')
361
- c = a.cogroup(a => :x, b => :y, :parallel => 5)
362
- dump(c)
363
- end
364
- @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
365
- end
366
-
367
- it 'outputs a COGROUP statement with INNER and OUTER' do
368
- @interpreter.interpret do
369
- a = load('in1')
370
- b = load('in2')
371
- c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
372
- dump(c)
373
- end
374
- @interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
375
- @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
376
- end
377
- end
378
- end
379
-
380
- context 'aliasing & multiple statements' do
381
- it 'aliases the loaded relation and uses the same alias in the STORE statement' do
382
- @interpreter.interpret { store(load('in'), 'out') }
383
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
384
- end
385
-
386
- it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
387
- @interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
388
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
389
- end
390
-
391
- it 'aliases a whole row of statements' do
392
- @interpreter.interpret do
393
- a = load('in', :schema => [:a])
394
- b = a.group(:a)
395
- c = b.group(:a)
396
- d = c.group(:a)
397
- store(d, 'out')
398
- end
399
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
400
- end
401
-
402
- it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
403
- @interpreter.interpret do
404
- a = load('in')
405
- b = a.distinct
406
- store(b, 'out1')
407
- store(b, 'out2')
408
- end
409
- @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
410
- end
411
- end
412
-
413
- context 'misc. operators' do
414
- it 'outputs a binary conditional when using #test' do
26
+ describe '#test' do
27
+ it 'outputs a binary conditional' do
415
28
  @interpreter.interpret do
416
29
  dump(load('in').foreach { |r| [test(r.a == r.b, r.a, r.b)]})
417
30
  end
@@ -419,36 +32,36 @@ describe Piglet::Interpreter do
419
32
  end
420
33
  end
421
34
 
422
- context 'literals' do
423
- it 'outputs a literal string when passing a string to #literal' do
35
+ describe '#literal' do
36
+ it 'outputs a literal string' do
424
37
  @interpreter.interpret do
425
38
  dump(load('in').foreach { |r| [literal('hello').as(:world)]})
426
39
  end
427
40
  @interpreter.to_pig_latin.should include("'hello' AS world")
428
41
  end
429
42
 
430
- it 'outputs a literal integer when passing an integer to #literal' do
43
+ it 'outputs a literal integer' do
431
44
  @interpreter.interpret do
432
45
  dump(load('in').foreach { |r| [literal(3).as(:n)]})
433
46
  end
434
47
  @interpreter.to_pig_latin.should include("3 AS n")
435
48
  end
436
49
 
437
- it 'outputs a literal float when passing a float to #literal' do
50
+ it 'outputs a literal float' do
438
51
  @interpreter.interpret do
439
52
  dump(load('in').foreach { |r| [literal(3.14).as(:pi)]})
440
53
  end
441
54
  @interpreter.to_pig_latin.should include("3.14 AS pi")
442
55
  end
443
56
 
444
- it 'outputs a literal string when passing an arbitrary object to #literal' do
57
+ it 'outputs a literal string when passed an arbitrary object' do
445
58
  @interpreter.interpret do
446
59
  dump(load('in').foreach { |r| [literal(self).as(:interpreter)]})
447
60
  end
448
61
  @interpreter.to_pig_latin.should match(/'[^']+' AS interpreter/)
449
62
  end
450
63
 
451
- it 'escapes single quotes in literal strings' do
64
+ it 'escapes single quotes' do
452
65
  @interpreter.interpret do
453
66
  dump(load('in').foreach { |r| [literal("hello 'world'").as(:str)]})
454
67
  end