piglet 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.document +5 -0
  2. data/.gitignore +22 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +293 -0
  5. data/Rakefile +50 -0
  6. data/bin/piglet +9 -0
  7. data/examples/analysis.rb +311 -0
  8. data/examples/scratch.rb +11 -0
  9. data/examples/spike1.rb +43 -0
  10. data/examples/spike2.rb +40 -0
  11. data/examples/test1.rb +3 -0
  12. data/examples/test2.rb +5 -0
  13. data/examples/test3.rb +4 -0
  14. data/lib/piglet/assignment.rb +13 -0
  15. data/lib/piglet/cogroup.rb +31 -0
  16. data/lib/piglet/cross.rb +22 -0
  17. data/lib/piglet/describe.rb +5 -0
  18. data/lib/piglet/distinct.rb +16 -0
  19. data/lib/piglet/dump.rb +5 -0
  20. data/lib/piglet/explain.rb +13 -0
  21. data/lib/piglet/field.rb +40 -0
  22. data/lib/piglet/field_expression_functions.rb +62 -0
  23. data/lib/piglet/field_function_expression.rb +19 -0
  24. data/lib/piglet/field_infix_expression.rb +17 -0
  25. data/lib/piglet/field_prefix_expression.rb +21 -0
  26. data/lib/piglet/field_rename.rb +11 -0
  27. data/lib/piglet/field_suffix_expression.rb +17 -0
  28. data/lib/piglet/filter.rb +13 -0
  29. data/lib/piglet/foreach.rb +19 -0
  30. data/lib/piglet/group.rb +21 -0
  31. data/lib/piglet/illustrate.rb +5 -0
  32. data/lib/piglet/interpreter.rb +108 -0
  33. data/lib/piglet/join.rb +20 -0
  34. data/lib/piglet/limit.rb +13 -0
  35. data/lib/piglet/load.rb +31 -0
  36. data/lib/piglet/load_and_store.rb +16 -0
  37. data/lib/piglet/order.rb +29 -0
  38. data/lib/piglet/relation.rb +177 -0
  39. data/lib/piglet/sample.rb +13 -0
  40. data/lib/piglet/split.rb +41 -0
  41. data/lib/piglet/store.rb +17 -0
  42. data/lib/piglet/storing.rb +13 -0
  43. data/lib/piglet/stream.rb +5 -0
  44. data/lib/piglet/union.rb +19 -0
  45. data/lib/piglet.rb +45 -0
  46. data/spec/piglet/field_spec.rb +130 -0
  47. data/spec/piglet/interpreter_spec.rb +413 -0
  48. data/spec/piglet/relation_spec.rb +79 -0
  49. data/spec/piglet/split_spec.rb +34 -0
  50. data/spec/piglet_spec.rb +7 -0
  51. data/spec/spec.opts +3 -0
  52. data/spec/spec_helper.rb +14 -0
  53. metadata +123 -0
@@ -0,0 +1,130 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Field do
5
+
6
+ before do
7
+ @field = Piglet::Field.new('field')
8
+ end
9
+
10
+ describe '#to_s' do
11
+ it 'returns a string with the field name (as a string)' do
12
+ @field.to_s.should eql("field")
13
+ end
14
+
15
+ it 'returns a string with the field name (as a symbol)' do
16
+ @field = Piglet::Field.new(:field)
17
+ @field.to_s.should eql("field")
18
+ end
19
+ end
20
+
21
+ context 'eval/aggregate functions' do
22
+ %w(avg count diff max min size sum tokenize).each do |function_name|
23
+ it "supports \"#{function_name.upcase}\" through ##{function_name}" do
24
+ @field.send(function_name).to_s.should eql("#{function_name.upcase}(field)")
25
+ end
26
+ end
27
+
28
+ it 'supports "IsEmpty" through #empty?' do
29
+ @field.empty?.to_s.should eql("IsEmpty(field)")
30
+ end
31
+ end
32
+
33
+ context 'nested expressions' do
34
+ it 'handles nested expressions' do
35
+ @field.max.min.avg.empty?.tokenize.to_s.should eql("TOKENIZE(IsEmpty(AVG(MIN(MAX(field)))))")
36
+ end
37
+ end
38
+
39
+ context 'field renaming' do
40
+ it 'supports renaming a field' do
41
+ @field.as('x').to_s.should eql('field AS x')
42
+ end
43
+
44
+ it 'supports renaming a derived field' do
45
+ @field.x.y.z.as('b').to_s.should eql('field.x.y.z AS b')
46
+ end
47
+
48
+ it 'supports renaming a calculated field' do
49
+ @field.max.as('m').to_s.should eql('MAX(field) AS m')
50
+ end
51
+ end
52
+
53
+ context 'infix and unary operators' do
54
+ before do
55
+ @field1 = Piglet::Field.new('field1')
56
+ @field2 = Piglet::Field.new('field2')
57
+ end
58
+
59
+ [:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/].each do |op|
60
+ it "supports #{op} on a field" do
61
+ @field1.send(op, @field2).to_s.should eql("field1 #{op} field2")
62
+ end
63
+
64
+ it "supports #{op} on an expression" do
65
+ (@field1 + (@field1.send(op, @field2))).to_s.should eql("field1 + (field1 #{op} field2)")
66
+ end
67
+ end
68
+
69
+ it 'supports != through #ne on a field' do
70
+ @field1.ne(@field2).to_s.should eql("field1 != field2")
71
+ end
72
+
73
+ it 'supports != through #ne on an expression' do
74
+ (@field1 + (@field1.ne(@field2))).to_s.should eql("field1 + (field1 != field2)")
75
+ end
76
+
77
+ it 'supports "matches" on a field with a regex' do
78
+ @field1.matches(/.*\.pig$/).to_s.should eql("field1 matches '.*\\.pig$'")
79
+ end
80
+
81
+ it 'supports "matches" on a field with a string' do
82
+ @field1.matches('.*\.pig$').to_s.should eql("field1 matches '.*\\.pig$'")
83
+ end
84
+
85
+ it 'supports "matches" on an expression' do
86
+ (@field1 + @field2).matches(/.*\.pig$/).to_s.should eql("(field1 + field2) matches '.*\\.pig$'")
87
+ end
88
+
89
+ it 'supports "is null" on a field' do
90
+ @field1.null?.to_s.should eql("field1 is null")
91
+ end
92
+
93
+ it 'supports "is null" on an expression' do
94
+ (@field1 + @field2).null?.to_s.should eql("(field1 + field2) is null")
95
+ end
96
+
97
+ it 'supports "is not null" on a field' do
98
+ @field1.not_null?.to_s.should eql("field1 is not null")
99
+ end
100
+
101
+ it 'supports "is not null" on an expression' do
102
+ (@field1 + @field2).not_null?.to_s.should eql("(field1 + field2) is not null")
103
+ end
104
+
105
+ it 'supports "NOT" on a field' do
106
+ @field1.not.to_s.should eql("NOT field1")
107
+ end
108
+
109
+ it 'supports "NOT" on an expression' do
110
+ (@field1 == @field2).not.to_s.should eql("NOT (field1 == field2)")
111
+ end
112
+
113
+ it 'supports unary - through #neg on a field' do
114
+ @field1.neg.to_s.should eql("-field1")
115
+ end
116
+
117
+ it 'supports unary - through #neg on an expression' do
118
+ (@field1 + @field2).neg.to_s.should eql("-(field1 + field2)")
119
+ end
120
+
121
+ it 'supports casts on a field' do
122
+ @field1.cast(:chararray).to_s.should eql("(chararray) field1")
123
+ end
124
+
125
+ it 'supports casts on an expression' do
126
+ (@field1 + @field2).cast(:chararray).to_s.should eql("(chararray) (field1 + field2)")
127
+ end
128
+ end
129
+
130
+ end
@@ -0,0 +1,413 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Interpreter do
5
+
6
+ before do
7
+ @interpreter = Piglet::Interpreter.new
8
+ end
9
+
10
+ context 'basic usage' do
11
+ it 'interprets a block given to #new' do
12
+ output = Piglet::Interpreter.new { store(load('some/path'), 'out') }
13
+ output.to_pig_latin.should_not be_empty
14
+ end
15
+
16
+ it 'interprets a block given to #interpret' do
17
+ output = @interpreter.interpret { store(load('some/path'), 'out') }
18
+ output.to_pig_latin.should_not be_empty
19
+ end
20
+
21
+ it 'does nothing with no commands' do
22
+ @interpreter.interpret.to_pig_latin.should be_empty
23
+ end
24
+ end
25
+
26
+ context 'load & store operators:' do
27
+ describe 'LOAD' do
28
+ it 'outputs a LOAD statement' do
29
+ @interpreter.interpret { store(load('some/path'), 'out') }
30
+ @interpreter.to_pig_latin.should include("LOAD 'some/path'")
31
+ end
32
+
33
+ it 'outputs a LOAD statement without a USING clause if none specified' do
34
+ @interpreter.interpret { store(load('some/path'), 'out') }
35
+ @interpreter.to_pig_latin.should_not include('USING')
36
+ end
37
+
38
+ it 'outputs a LOAD statement with a USING clause with a specified function' do
39
+ @interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
40
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
41
+ end
42
+
43
+ Piglet::LoadAndStore::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
44
+ it "knows that the load method :#{symbolic_name} means #{function}" do
45
+ @interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
46
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
47
+ end
48
+ end
49
+
50
+ it 'outputs a LOAD statement with an AS clause' do
51
+ @interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
52
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
53
+ end
54
+
55
+ it 'outputs a LOAD statement with an AS clause with types' do
56
+ @interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
57
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
58
+ end
59
+
60
+ it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
61
+ @interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
62
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
63
+ end
64
+ end
65
+
66
+ describe 'STORE' do
67
+ it 'outputs a STORE statement' do
68
+ @interpreter.interpret { store(load('some/path'), 'out') }
69
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
70
+ end
71
+
72
+ it 'outputs a STORE statement without a USING clause if none specified' do
73
+ @interpreter.interpret { store(load('some/path'), 'out') }
74
+ @interpreter.to_pig_latin.should_not include("USING")
75
+ end
76
+
77
+ it 'outputs a STORE statement with a USING clause with a specified function' do
78
+ @interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
79
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
80
+ end
81
+
82
+ it 'knows that the load method :pig_storage means PigStorage' do
83
+ @interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
84
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
85
+ end
86
+ end
87
+
88
+ describe 'DUMP' do
89
+ it 'outputs a DUMP statement' do
90
+ @interpreter.interpret { dump(load('some/path')) }
91
+ @interpreter.to_pig_latin.should match(/DUMP \w+/)
92
+ end
93
+ end
94
+ end
95
+
96
+ context 'diagnostic operators:' do
97
+ describe 'ILLUSTRATE' do
98
+ it 'outputs an ILLUSTRATE statement' do
99
+ @interpreter.interpret { illustrate(load('some/path')) }
100
+ @interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
101
+ end
102
+ end
103
+
104
+ describe 'DESCRIBE' do
105
+ it 'outputs a DESCRIBE statement' do
106
+ @interpreter.interpret { describe(load('some/path')) }
107
+ @interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
108
+ end
109
+ end
110
+
111
+ describe 'EXPLAIN' do
112
+ it 'outputs an EXPLAIN statement' do
113
+ @interpreter.interpret { explain(load('some/path')) }
114
+ @interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
115
+ end
116
+
117
+ it 'outputs an EXPLAIN statement without an alias' do
118
+ @interpreter.interpret { explain }
119
+ @interpreter.to_pig_latin.should match(/EXPLAIN;/)
120
+ end
121
+ end
122
+ end
123
+
124
+ context 'relation operators:' do
125
+ describe 'GROUP' do
126
+ it 'outputs a GROUP statement with one grouping field' do
127
+ @interpreter.interpret { store(load('in').group(:a), 'out') }
128
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
129
+ end
130
+
131
+ it 'outputs a GROUP statement with more than one grouping field' do
132
+ @interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
133
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
134
+ end
135
+
136
+ it 'outputs a GROUP statement with a PARALLEL clause' do
137
+ @interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
138
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
139
+ end
140
+ end
141
+
142
+ describe 'DISTINCT' do
143
+ it 'outputs a DISTINCT statement' do
144
+ @interpreter.interpret { store(load('in').distinct, 'out') }
145
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+/)
146
+ end
147
+
148
+ it 'outputs a DISTINCT statement with a PARALLEL clause' do
149
+ @interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
150
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
151
+ end
152
+ end
153
+
154
+ describe 'CROSS' do
155
+ it 'outputs a CROSS statement with two relations' do
156
+ @interpreter.interpret do
157
+ a = load('in1')
158
+ b = load('in2')
159
+ c = a.cross(b)
160
+ dump(c)
161
+ end
162
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
163
+ end
164
+
165
+ it 'outputs a CROSS statement with many relations' do
166
+ @interpreter.interpret do
167
+ a = load('in1')
168
+ b = load('in2')
169
+ c = load('in3')
170
+ d = load('in4')
171
+ e = a.cross(b, c, d)
172
+ dump(e)
173
+ end
174
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
175
+ end
176
+
177
+ it 'outputs a CROSS statement with a PARALLEL clause' do
178
+ @interpreter.interpret do
179
+ a = load('in1')
180
+ b = load('in2')
181
+ c = load('in3')
182
+ d = a.cross([b, c], :parallel => 4)
183
+ dump(d)
184
+ end
185
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
186
+ end
187
+ end
188
+
189
+ describe 'UNION' do
190
+ it 'outputs a UNION statement with two relations' do
191
+ @interpreter.interpret do
192
+ a = load('in1')
193
+ b = load('in2')
194
+ c = a.union(b)
195
+ dump(c)
196
+ end
197
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
198
+ end
199
+
200
+ it 'outputs a UNION statement with many relations' do
201
+ @interpreter.interpret do
202
+ a = load('in1')
203
+ b = load('in2')
204
+ c = load('in3')
205
+ d = load('in4')
206
+ e = a.union(b, c, d)
207
+ dump(e)
208
+ end
209
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
210
+ end
211
+ end
212
+
213
+ describe 'SAMPLE' do
214
+ it 'outputs a SAMPLE statement' do
215
+ @interpreter.interpret { dump(load('in').sample(10)) }
216
+ @interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
217
+ end
218
+ end
219
+
220
+ describe 'LIMIT' do
221
+ it 'outputs a LIMIT statement' do
222
+ @interpreter.interpret { dump(load('in').limit(42)) }
223
+ @interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
224
+ end
225
+ end
226
+
227
+ describe 'FOREACH … GENERATE' do
228
+ it 'outputs a FOREACH … GENERATE statement' do
229
+ @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
230
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
231
+ end
232
+
233
+ it 'outputs a FOREACH … GENERATE statement with a list of fields' do
234
+ @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
235
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
236
+ end
237
+
238
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
239
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
240
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
241
+ end
242
+
243
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
244
+ @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
245
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
246
+ end
247
+
248
+ it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
249
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
250
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
251
+ end
252
+
253
+ it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
254
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
255
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
256
+ end
257
+
258
+ it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
259
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
260
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
261
+ end
262
+ end
263
+
264
+ describe 'FILTER' do
265
+ it 'outputs a FILTER statement' do
266
+ @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
267
+ @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
268
+ end
269
+
270
+ # it 'outputs a FILTER statement with a complex test' do
271
+ # @interpreter.interpret { dump(load('in').filter { |r| r.a > r.b && r.c != 3 }) }
272
+ # @interpreter.to_pig_latin.should match(/FILTER \w+ BY a > b AND c != 3/)
273
+ # end
274
+ end
275
+
276
+ describe 'SPLIT' do
277
+ it 'outputs a SPLIT statement' do
278
+ @interpreter.interpret do
279
+ a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
280
+ dump(a)
281
+ dump(b)
282
+ end
283
+ @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
284
+ end
285
+ end
286
+
287
+ describe 'ORDER' do
288
+ it 'outputs an ORDER statement' do
289
+ @interpreter.interpret { dump(load('in').order(:a)) }
290
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
291
+ end
292
+
293
+ it 'outputs an ORDER statement with multiple fields' do
294
+ @interpreter.interpret { dump(load('in').order(:a, :b)) }
295
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
296
+ end
297
+
298
+ it 'outputs an ORDER statement with ASC and DESC' do
299
+ @interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
300
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
301
+ end
302
+ end
303
+
304
+ describe 'JOIN' do
305
+ it 'outputs a JOIN statement' do
306
+ @interpreter.interpret do
307
+ a = load('in1')
308
+ b = load('in2')
309
+ c = a.join(a => :x, b => :y)
310
+ dump(c)
311
+ end
312
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
313
+ end
314
+
315
+ it 'outputs a JOIN statement with a PARALLEL clause' do
316
+ @interpreter.interpret do
317
+ a = load('in1')
318
+ b = load('in2')
319
+ c = a.join(a => :x, b => :y, :parallel => 5)
320
+ dump(c)
321
+ end
322
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
323
+ end
324
+
325
+ it 'outputs a JOIN statement with a USING clause' do
326
+ @interpreter.interpret do
327
+ a = load('in1')
328
+ b = load('in2')
329
+ c = a.join(a => :x, b => :y, :using => :replicated)
330
+ dump(c)
331
+ end
332
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
333
+ end
334
+ end
335
+
336
+ describe 'COGROUP' do
337
+ it 'outputs a COGROUP statement' do
338
+ @interpreter.interpret do
339
+ a = load('in1')
340
+ b = load('in2')
341
+ c = a.cogroup(a => :x, b => :y)
342
+ dump(c)
343
+ end
344
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
345
+ end
346
+
347
+ it 'outputs a COGROUP statement with multiple join fields' do
348
+ @interpreter.interpret do
349
+ a = load('in1')
350
+ b = load('in2')
351
+ c = a.cogroup(a => :x, b => [:y, :z, :w])
352
+ dump(c)
353
+ end
354
+ @interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
355
+ end
356
+
357
+ it 'outputs a COGROUP statement with a PARALLEL clause' do
358
+ @interpreter.interpret do
359
+ a = load('in1')
360
+ b = load('in2')
361
+ c = a.cogroup(a => :x, b => :y, :parallel => 5)
362
+ dump(c)
363
+ end
364
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
365
+ end
366
+
367
+ it 'outputs a COGROUP statement with INNER and OUTER' do
368
+ @interpreter.interpret do
369
+ a = load('in1')
370
+ b = load('in2')
371
+ c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
372
+ dump(c)
373
+ end
374
+ @interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
375
+ @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
376
+ end
377
+ end
378
+ end
379
+
380
+ context 'aliasing & multiple statements' do
381
+ it 'aliases the loaded relation and uses the same alias in the STORE statement' do
382
+ @interpreter.interpret { store(load('in'), 'out') }
383
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
384
+ end
385
+
386
+ it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
387
+ @interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
388
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
389
+ end
390
+
391
+ it 'aliases a whole row of statements' do
392
+ @interpreter.interpret do
393
+ a = load('in', :schema => [:a])
394
+ b = a.group(:a)
395
+ c = b.group(:a)
396
+ d = c.group(:a)
397
+ store(d, 'out')
398
+ end
399
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
400
+ end
401
+
402
+ it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
403
+ @interpreter.interpret do
404
+ a = load('in')
405
+ b = a.distinct
406
+ store(b, 'out1')
407
+ store(b, 'out2')
408
+ end
409
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
410
+ end
411
+ end
412
+
413
+ end
@@ -0,0 +1,79 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Relation do
5
+
6
+ before do
7
+ @relation = Object.new
8
+ @relation.extend Piglet::Relation
9
+ end
10
+
11
+ it 'has a alias' do
12
+ @relation.alias.should_not be_nil
13
+ end
14
+
15
+ it 'has a unique alias' do
16
+ aliases = { }
17
+ 1000.times do
18
+ @relation = Object.new
19
+ @relation.extend Piglet::Relation
20
+ aliases.should_not have_key(@relation.alias)
21
+ aliases[@relation.alias] = @relation
22
+ end
23
+ end
24
+
25
+ describe '#group' do
26
+ it 'returns a new relation with the target relation as source' do
27
+ @relation.group(:a).sources.should include(@relation)
28
+ end
29
+ end
30
+
31
+ describe '#distinct' do
32
+ it 'returns a new relation with the target relation as source' do
33
+ @relation.distinct.sources.should include(@relation)
34
+ end
35
+ end
36
+
37
+ describe '#cross' do
38
+ it 'returns a new relation with the target relation as one of the sources' do
39
+ other = Object.new
40
+ other.extend Piglet::Relation
41
+ @relation.cross(other).sources.should include(@relation)
42
+ end
43
+ end
44
+
45
+ describe '#union' do
46
+ it 'returns a new relation with the target relation as one of the sources' do
47
+ other = Object.new
48
+ other.extend Piglet::Relation
49
+ @relation.union(other).sources.should include(@relation)
50
+ end
51
+ end
52
+
53
+ describe '#sample' do
54
+ it 'returns a new relation with the target relation as source' do
55
+ @relation.sample(10).sources.should include(@relation)
56
+ end
57
+ end
58
+
59
+ describe '#limit' do
60
+ it 'returns a new relation with the target relation as source' do
61
+ @relation.limit(42).sources.should include(@relation)
62
+ end
63
+ end
64
+
65
+ context 'fields' do
66
+ it 'returns a field for a message that does not correspond to a method' do
67
+ @relation.a.should_not be_nil
68
+ end
69
+
70
+ it 'returns fields that have the correct name' do
71
+ @relation.a.to_s.should eql('a')
72
+ end
73
+
74
+ it 'returns fields with positional notation' do
75
+ @relation[1].to_s.should eql('$1')
76
+ end
77
+ end
78
+
79
+ end
@@ -0,0 +1,34 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Split do
5
+
6
+ before do
7
+ @relation = mock('source')
8
+ @expr1 = mock('expr1')
9
+ @expr2 = mock('expr2')
10
+ @relation.stub!(:alias).and_return('rel')
11
+ @expr1.stub!(:to_s).and_return('y')
12
+ @expr2.stub!(:to_s).and_return('w')
13
+ @split = Piglet::Split.new(@relation, [@expr1, @expr2])
14
+ end
15
+
16
+ describe '#to_s' do
17
+ it 'outputs all x IF y expressions' do
18
+ @split.to_s.should match(/SPLIT rel INTO \w+ IF [yw], \w+ IF [yw]/)
19
+ end
20
+
21
+ it 'contains the names of all the shard relations' do
22
+ @shards = @split.shards
23
+ @split.to_s.should include("#{@shards[0].alias} IF y")
24
+ @split.to_s.should include("#{@shards[1].alias} IF w")
25
+ end
26
+ end
27
+
28
+ describe '#shards' do
29
+ it 'returns the same number of shards as there are expressions' do
30
+ @split.shards.size.should == 2
31
+ end
32
+ end
33
+
34
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+
4
+ describe Piglet do
5
+
6
+
7
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format specdoc
3
+ --backtrace
@@ -0,0 +1,14 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+
5
+ require 'piglet'
6
+ require 'spec'
7
+ require 'spec/autorun'
8
+
9
+
10
+ require 'piglet/interpreter'
11
+
12
+ Spec::Runner.configure do |config|
13
+
14
+ end