piglet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.document +5 -0
  2. data/.gitignore +22 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +293 -0
  5. data/Rakefile +50 -0
  6. data/bin/piglet +9 -0
  7. data/examples/analysis.rb +311 -0
  8. data/examples/scratch.rb +11 -0
  9. data/examples/spike1.rb +43 -0
  10. data/examples/spike2.rb +40 -0
  11. data/examples/test1.rb +3 -0
  12. data/examples/test2.rb +5 -0
  13. data/examples/test3.rb +4 -0
  14. data/lib/piglet/assignment.rb +13 -0
  15. data/lib/piglet/cogroup.rb +31 -0
  16. data/lib/piglet/cross.rb +22 -0
  17. data/lib/piglet/describe.rb +5 -0
  18. data/lib/piglet/distinct.rb +16 -0
  19. data/lib/piglet/dump.rb +5 -0
  20. data/lib/piglet/explain.rb +13 -0
  21. data/lib/piglet/field.rb +40 -0
  22. data/lib/piglet/field_expression_functions.rb +62 -0
  23. data/lib/piglet/field_function_expression.rb +19 -0
  24. data/lib/piglet/field_infix_expression.rb +17 -0
  25. data/lib/piglet/field_prefix_expression.rb +21 -0
  26. data/lib/piglet/field_rename.rb +11 -0
  27. data/lib/piglet/field_suffix_expression.rb +17 -0
  28. data/lib/piglet/filter.rb +13 -0
  29. data/lib/piglet/foreach.rb +19 -0
  30. data/lib/piglet/group.rb +21 -0
  31. data/lib/piglet/illustrate.rb +5 -0
  32. data/lib/piglet/interpreter.rb +108 -0
  33. data/lib/piglet/join.rb +20 -0
  34. data/lib/piglet/limit.rb +13 -0
  35. data/lib/piglet/load.rb +31 -0
  36. data/lib/piglet/load_and_store.rb +16 -0
  37. data/lib/piglet/order.rb +29 -0
  38. data/lib/piglet/relation.rb +177 -0
  39. data/lib/piglet/sample.rb +13 -0
  40. data/lib/piglet/split.rb +41 -0
  41. data/lib/piglet/store.rb +17 -0
  42. data/lib/piglet/storing.rb +13 -0
  43. data/lib/piglet/stream.rb +5 -0
  44. data/lib/piglet/union.rb +19 -0
  45. data/lib/piglet.rb +45 -0
  46. data/spec/piglet/field_spec.rb +130 -0
  47. data/spec/piglet/interpreter_spec.rb +413 -0
  48. data/spec/piglet/relation_spec.rb +79 -0
  49. data/spec/piglet/split_spec.rb +34 -0
  50. data/spec/piglet_spec.rb +7 -0
  51. data/spec/spec.opts +3 -0
  52. data/spec/spec_helper.rb +14 -0
  53. metadata +123 -0
@@ -0,0 +1,130 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Field do
5
+
6
+ before do
7
+ @field = Piglet::Field.new('field')
8
+ end
9
+
10
+ describe '#to_s' do
11
+ it 'returns a string with the field name (as a string)' do
12
+ @field.to_s.should eql("field")
13
+ end
14
+
15
+ it 'returns a string with the field name (as a symbol)' do
16
+ @field = Piglet::Field.new(:field)
17
+ @field.to_s.should eql("field")
18
+ end
19
+ end
20
+
21
+ context 'eval/aggregate functions' do
22
+ %w(avg count diff max min size sum tokenize).each do |function_name|
23
+ it "supports \"#{function_name.upcase}\" through ##{function_name}" do
24
+ @field.send(function_name).to_s.should eql("#{function_name.upcase}(field)")
25
+ end
26
+ end
27
+
28
+ it 'supports "IsEmpty" through #empty?' do
29
+ @field.empty?.to_s.should eql("IsEmpty(field)")
30
+ end
31
+ end
32
+
33
+ context 'nested expressions' do
34
+ it 'handles nested expressions' do
35
+ @field.max.min.avg.empty?.tokenize.to_s.should eql("TOKENIZE(IsEmpty(AVG(MIN(MAX(field)))))")
36
+ end
37
+ end
38
+
39
+ context 'field renaming' do
40
+ it 'supports renaming a field' do
41
+ @field.as('x').to_s.should eql('field AS x')
42
+ end
43
+
44
+ it 'supports renaming a derived field' do
45
+ @field.x.y.z.as('b').to_s.should eql('field.x.y.z AS b')
46
+ end
47
+
48
+ it 'supports renaming a calculated field' do
49
+ @field.max.as('m').to_s.should eql('MAX(field) AS m')
50
+ end
51
+ end
52
+
53
+ context 'infix and unary operators' do
54
+ before do
55
+ @field1 = Piglet::Field.new('field1')
56
+ @field2 = Piglet::Field.new('field2')
57
+ end
58
+
59
+ [:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/].each do |op|
60
+ it "supports #{op} on a field" do
61
+ @field1.send(op, @field2).to_s.should eql("field1 #{op} field2")
62
+ end
63
+
64
+ it "supports #{op} on an expression" do
65
+ (@field1 + (@field1.send(op, @field2))).to_s.should eql("field1 + (field1 #{op} field2)")
66
+ end
67
+ end
68
+
69
+ it 'supports != through #ne on a field' do
70
+ @field1.ne(@field2).to_s.should eql("field1 != field2")
71
+ end
72
+
73
+ it 'supports != through #ne on an expression' do
74
+ (@field1 + (@field1.ne(@field2))).to_s.should eql("field1 + (field1 != field2)")
75
+ end
76
+
77
+ it 'supports "matches" on a field with a regex' do
78
+ @field1.matches(/.*\.pig$/).to_s.should eql("field1 matches '.*\\.pig$'")
79
+ end
80
+
81
+ it 'supports "matches" on a field with a string' do
82
+ @field1.matches('.*\.pig$').to_s.should eql("field1 matches '.*\\.pig$'")
83
+ end
84
+
85
+ it 'supports "matches" on an expression' do
86
+ (@field1 + @field2).matches(/.*\.pig$/).to_s.should eql("(field1 + field2) matches '.*\\.pig$'")
87
+ end
88
+
89
+ it 'supports "is null" on a field' do
90
+ @field1.null?.to_s.should eql("field1 is null")
91
+ end
92
+
93
+ it 'supports "is null" on an expression' do
94
+ (@field1 + @field2).null?.to_s.should eql("(field1 + field2) is null")
95
+ end
96
+
97
+ it 'supports "is not null" on a field' do
98
+ @field1.not_null?.to_s.should eql("field1 is not null")
99
+ end
100
+
101
+ it 'supports "is not null" on an expression' do
102
+ (@field1 + @field2).not_null?.to_s.should eql("(field1 + field2) is not null")
103
+ end
104
+
105
+ it 'supports "NOT" on a field' do
106
+ @field1.not.to_s.should eql("NOT field1")
107
+ end
108
+
109
+ it 'supports "NOT" on an expression' do
110
+ (@field1 == @field2).not.to_s.should eql("NOT (field1 == field2)")
111
+ end
112
+
113
+ it 'supports unary - through #neg on a field' do
114
+ @field1.neg.to_s.should eql("-field1")
115
+ end
116
+
117
+ it 'supports unary - through #neg on an expression' do
118
+ (@field1 + @field2).neg.to_s.should eql("-(field1 + field2)")
119
+ end
120
+
121
+ it 'supports casts on a field' do
122
+ @field1.cast(:chararray).to_s.should eql("(chararray) field1")
123
+ end
124
+
125
+ it 'supports casts on an expression' do
126
+ (@field1 + @field2).cast(:chararray).to_s.should eql("(chararray) (field1 + field2)")
127
+ end
128
+ end
129
+
130
+ end
@@ -0,0 +1,413 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Interpreter do
5
+
6
+ before do
7
+ @interpreter = Piglet::Interpreter.new
8
+ end
9
+
10
+ context 'basic usage' do
11
+ it 'interprets a block given to #new' do
12
+ output = Piglet::Interpreter.new { store(load('some/path'), 'out') }
13
+ output.to_pig_latin.should_not be_empty
14
+ end
15
+
16
+ it 'interprets a block given to #interpret' do
17
+ output = @interpreter.interpret { store(load('some/path'), 'out') }
18
+ output.to_pig_latin.should_not be_empty
19
+ end
20
+
21
+ it 'does nothing with no commands' do
22
+ @interpreter.interpret.to_pig_latin.should be_empty
23
+ end
24
+ end
25
+
26
+ context 'load & store operators:' do
27
+ describe 'LOAD' do
28
+ it 'outputs a LOAD statement' do
29
+ @interpreter.interpret { store(load('some/path'), 'out') }
30
+ @interpreter.to_pig_latin.should include("LOAD 'some/path'")
31
+ end
32
+
33
+ it 'outputs a LOAD statement without a USING clause if none specified' do
34
+ @interpreter.interpret { store(load('some/path'), 'out') }
35
+ @interpreter.to_pig_latin.should_not include('USING')
36
+ end
37
+
38
+ it 'outputs a LOAD statement with a USING clause with a specified function' do
39
+ @interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
40
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
41
+ end
42
+
43
+ Piglet::LoadAndStore::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
44
+ it "knows that the load method :#{symbolic_name} means #{function}" do
45
+ @interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
46
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
47
+ end
48
+ end
49
+
50
+ it 'outputs a LOAD statement with an AS clause' do
51
+ @interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
52
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
53
+ end
54
+
55
+ it 'outputs a LOAD statement with an AS clause with types' do
56
+ @interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
57
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
58
+ end
59
+
60
+ it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
61
+ @interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
62
+ @interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
63
+ end
64
+ end
65
+
66
+ describe 'STORE' do
67
+ it 'outputs a STORE statement' do
68
+ @interpreter.interpret { store(load('some/path'), 'out') }
69
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
70
+ end
71
+
72
+ it 'outputs a STORE statement without a USING clause if none specified' do
73
+ @interpreter.interpret { store(load('some/path'), 'out') }
74
+ @interpreter.to_pig_latin.should_not include("USING")
75
+ end
76
+
77
+ it 'outputs a STORE statement with a USING clause with a specified function' do
78
+ @interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
79
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
80
+ end
81
+
82
+ it 'knows that the load method :pig_storage means PigStorage' do
83
+ @interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
84
+ @interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
85
+ end
86
+ end
87
+
88
+ describe 'DUMP' do
89
+ it 'outputs a DUMP statement' do
90
+ @interpreter.interpret { dump(load('some/path')) }
91
+ @interpreter.to_pig_latin.should match(/DUMP \w+/)
92
+ end
93
+ end
94
+ end
95
+
96
+ context 'diagnostic operators:' do
97
+ describe 'ILLUSTRATE' do
98
+ it 'outputs an ILLUSTRATE statement' do
99
+ @interpreter.interpret { illustrate(load('some/path')) }
100
+ @interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
101
+ end
102
+ end
103
+
104
+ describe 'DESCRIBE' do
105
+ it 'outputs a DESCRIBE statement' do
106
+ @interpreter.interpret { describe(load('some/path')) }
107
+ @interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
108
+ end
109
+ end
110
+
111
+ describe 'EXPLAIN' do
112
+ it 'outputs an EXPLAIN statement' do
113
+ @interpreter.interpret { explain(load('some/path')) }
114
+ @interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
115
+ end
116
+
117
+ it 'outputs an EXPLAIN statement without an alias' do
118
+ @interpreter.interpret { explain }
119
+ @interpreter.to_pig_latin.should match(/EXPLAIN;/)
120
+ end
121
+ end
122
+ end
123
+
124
+ context 'relation operators:' do
125
+ describe 'GROUP' do
126
+ it 'outputs a GROUP statement with one grouping field' do
127
+ @interpreter.interpret { store(load('in').group(:a), 'out') }
128
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
129
+ end
130
+
131
+ it 'outputs a GROUP statement with more than one grouping field' do
132
+ @interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
133
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
134
+ end
135
+
136
+ it 'outputs a GROUP statement with a PARALLEL clause' do
137
+ @interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
138
+ @interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
139
+ end
140
+ end
141
+
142
+ describe 'DISTINCT' do
143
+ it 'outputs a DISTINCT statement' do
144
+ @interpreter.interpret { store(load('in').distinct, 'out') }
145
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+/)
146
+ end
147
+
148
+ it 'outputs a DISTINCT statement with a PARALLEL clause' do
149
+ @interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
150
+ @interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
151
+ end
152
+ end
153
+
154
+ describe 'CROSS' do
155
+ it 'outputs a CROSS statement with two relations' do
156
+ @interpreter.interpret do
157
+ a = load('in1')
158
+ b = load('in2')
159
+ c = a.cross(b)
160
+ dump(c)
161
+ end
162
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
163
+ end
164
+
165
+ it 'outputs a CROSS statement with many relations' do
166
+ @interpreter.interpret do
167
+ a = load('in1')
168
+ b = load('in2')
169
+ c = load('in3')
170
+ d = load('in4')
171
+ e = a.cross(b, c, d)
172
+ dump(e)
173
+ end
174
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
175
+ end
176
+
177
+ it 'outputs a CROSS statement with a PARALLEL clause' do
178
+ @interpreter.interpret do
179
+ a = load('in1')
180
+ b = load('in2')
181
+ c = load('in3')
182
+ d = a.cross([b, c], :parallel => 4)
183
+ dump(d)
184
+ end
185
+ @interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
186
+ end
187
+ end
188
+
189
+ describe 'UNION' do
190
+ it 'outputs a UNION statement with two relations' do
191
+ @interpreter.interpret do
192
+ a = load('in1')
193
+ b = load('in2')
194
+ c = a.union(b)
195
+ dump(c)
196
+ end
197
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
198
+ end
199
+
200
+ it 'outputs a UNION statement with many relations' do
201
+ @interpreter.interpret do
202
+ a = load('in1')
203
+ b = load('in2')
204
+ c = load('in3')
205
+ d = load('in4')
206
+ e = a.union(b, c, d)
207
+ dump(e)
208
+ end
209
+ @interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
210
+ end
211
+ end
212
+
213
+ describe 'SAMPLE' do
214
+ it 'outputs a SAMPLE statement' do
215
+ @interpreter.interpret { dump(load('in').sample(10)) }
216
+ @interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
217
+ end
218
+ end
219
+
220
+ describe 'LIMIT' do
221
+ it 'outputs a LIMIT statement' do
222
+ @interpreter.interpret { dump(load('in').limit(42)) }
223
+ @interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
224
+ end
225
+ end
226
+
227
+ describe 'FOREACH … GENERATE' do
228
+ it 'outputs a FOREACH … GENERATE statement' do
229
+ @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
230
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
231
+ end
232
+
233
+ it 'outputs a FOREACH … GENERATE statement with a list of fields' do
234
+ @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
235
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
236
+ end
237
+
238
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
239
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
240
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
241
+ end
242
+
243
+ it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
244
+ @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
245
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
246
+ end
247
+
248
+ it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
249
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
250
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
251
+ end
252
+
253
+ it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
254
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
255
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
256
+ end
257
+
258
+ it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
259
+ @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
260
+ @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
261
+ end
262
+ end
263
+
264
+ describe 'FILTER' do
265
+ it 'outputs a FILTER statement' do
266
+ @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
267
+ @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
268
+ end
269
+
270
+ # it 'outputs a FILTER statement with a complex test' do
271
+ # @interpreter.interpret { dump(load('in').filter { |r| r.a > r.b && r.c != 3 }) }
272
+ # @interpreter.to_pig_latin.should match(/FILTER \w+ BY a > b AND c != 3/)
273
+ # end
274
+ end
275
+
276
+ describe 'SPLIT' do
277
+ it 'outputs a SPLIT statement' do
278
+ @interpreter.interpret do
279
+ a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
280
+ dump(a)
281
+ dump(b)
282
+ end
283
+ @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
284
+ end
285
+ end
286
+
287
+ describe 'ORDER' do
288
+ it 'outputs an ORDER statement' do
289
+ @interpreter.interpret { dump(load('in').order(:a)) }
290
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
291
+ end
292
+
293
+ it 'outputs an ORDER statement with multiple fields' do
294
+ @interpreter.interpret { dump(load('in').order(:a, :b)) }
295
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
296
+ end
297
+
298
+ it 'outputs an ORDER statement with ASC and DESC' do
299
+ @interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
300
+ @interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
301
+ end
302
+ end
303
+
304
+ describe 'JOIN' do
305
+ it 'outputs a JOIN statement' do
306
+ @interpreter.interpret do
307
+ a = load('in1')
308
+ b = load('in2')
309
+ c = a.join(a => :x, b => :y)
310
+ dump(c)
311
+ end
312
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
313
+ end
314
+
315
+ it 'outputs a JOIN statement with a PARALLEL clause' do
316
+ @interpreter.interpret do
317
+ a = load('in1')
318
+ b = load('in2')
319
+ c = a.join(a => :x, b => :y, :parallel => 5)
320
+ dump(c)
321
+ end
322
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
323
+ end
324
+
325
+ it 'outputs a JOIN statement with a USING clause' do
326
+ @interpreter.interpret do
327
+ a = load('in1')
328
+ b = load('in2')
329
+ c = a.join(a => :x, b => :y, :using => :replicated)
330
+ dump(c)
331
+ end
332
+ @interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
333
+ end
334
+ end
335
+
336
+ describe 'COGROUP' do
337
+ it 'outputs a COGROUP statement' do
338
+ @interpreter.interpret do
339
+ a = load('in1')
340
+ b = load('in2')
341
+ c = a.cogroup(a => :x, b => :y)
342
+ dump(c)
343
+ end
344
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
345
+ end
346
+
347
+ it 'outputs a COGROUP statement with multiple join fields' do
348
+ @interpreter.interpret do
349
+ a = load('in1')
350
+ b = load('in2')
351
+ c = a.cogroup(a => :x, b => [:y, :z, :w])
352
+ dump(c)
353
+ end
354
+ @interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
355
+ end
356
+
357
+ it 'outputs a COGROUP statement with a PARALLEL clause' do
358
+ @interpreter.interpret do
359
+ a = load('in1')
360
+ b = load('in2')
361
+ c = a.cogroup(a => :x, b => :y, :parallel => 5)
362
+ dump(c)
363
+ end
364
+ @interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
365
+ end
366
+
367
+ it 'outputs a COGROUP statement with INNER and OUTER' do
368
+ @interpreter.interpret do
369
+ a = load('in1')
370
+ b = load('in2')
371
+ c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
372
+ dump(c)
373
+ end
374
+ @interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
375
+ @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
376
+ end
377
+ end
378
+ end
379
+
380
+ context 'aliasing & multiple statements' do
381
+ it 'aliases the loaded relation and uses the same alias in the STORE statement' do
382
+ @interpreter.interpret { store(load('in'), 'out') }
383
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
384
+ end
385
+
386
+ it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
387
+ @interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
388
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
389
+ end
390
+
391
+ it 'aliases a whole row of statements' do
392
+ @interpreter.interpret do
393
+ a = load('in', :schema => [:a])
394
+ b = a.group(:a)
395
+ c = b.group(:a)
396
+ d = c.group(:a)
397
+ store(d, 'out')
398
+ end
399
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
400
+ end
401
+
402
+ it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
403
+ @interpreter.interpret do
404
+ a = load('in')
405
+ b = a.distinct
406
+ store(b, 'out1')
407
+ store(b, 'out2')
408
+ end
409
+ @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
410
+ end
411
+ end
412
+
413
+ end
@@ -0,0 +1,79 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Relation do
5
+
6
+ before do
7
+ @relation = Object.new
8
+ @relation.extend Piglet::Relation
9
+ end
10
+
11
+ it 'has a alias' do
12
+ @relation.alias.should_not be_nil
13
+ end
14
+
15
+ it 'has a unique alias' do
16
+ aliases = { }
17
+ 1000.times do
18
+ @relation = Object.new
19
+ @relation.extend Piglet::Relation
20
+ aliases.should_not have_key(@relation.alias)
21
+ aliases[@relation.alias] = @relation
22
+ end
23
+ end
24
+
25
+ describe '#group' do
26
+ it 'returns a new relation with the target relation as source' do
27
+ @relation.group(:a).sources.should include(@relation)
28
+ end
29
+ end
30
+
31
+ describe '#distinct' do
32
+ it 'returns a new relation with the target relation as source' do
33
+ @relation.distinct.sources.should include(@relation)
34
+ end
35
+ end
36
+
37
+ describe '#cross' do
38
+ it 'returns a new relation with the target relation as one of the sources' do
39
+ other = Object.new
40
+ other.extend Piglet::Relation
41
+ @relation.cross(other).sources.should include(@relation)
42
+ end
43
+ end
44
+
45
+ describe '#union' do
46
+ it 'returns a new relation with the target relation as one of the sources' do
47
+ other = Object.new
48
+ other.extend Piglet::Relation
49
+ @relation.union(other).sources.should include(@relation)
50
+ end
51
+ end
52
+
53
+ describe '#sample' do
54
+ it 'returns a new relation with the target relation as source' do
55
+ @relation.sample(10).sources.should include(@relation)
56
+ end
57
+ end
58
+
59
+ describe '#limit' do
60
+ it 'returns a new relation with the target relation as source' do
61
+ @relation.limit(42).sources.should include(@relation)
62
+ end
63
+ end
64
+
65
+ context 'fields' do
66
+ it 'returns a field for a message that does not correspond to a method' do
67
+ @relation.a.should_not be_nil
68
+ end
69
+
70
+ it 'returns fields that have the correct name' do
71
+ @relation.a.to_s.should eql('a')
72
+ end
73
+
74
+ it 'returns fields with positional notation' do
75
+ @relation[1].to_s.should eql('$1')
76
+ end
77
+ end
78
+
79
+ end
@@ -0,0 +1,34 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+
4
+ describe Piglet::Split do
5
+
6
+ before do
7
+ @relation = mock('source')
8
+ @expr1 = mock('expr1')
9
+ @expr2 = mock('expr2')
10
+ @relation.stub!(:alias).and_return('rel')
11
+ @expr1.stub!(:to_s).and_return('y')
12
+ @expr2.stub!(:to_s).and_return('w')
13
+ @split = Piglet::Split.new(@relation, [@expr1, @expr2])
14
+ end
15
+
16
+ describe '#to_s' do
17
+ it 'outputs all x IF y expressions' do
18
+ @split.to_s.should match(/SPLIT rel INTO \w+ IF [yw], \w+ IF [yw]/)
19
+ end
20
+
21
+ it 'contains the names of all the shard relations' do
22
+ @shards = @split.shards
23
+ @split.to_s.should include("#{@shards[0].alias} IF y")
24
+ @split.to_s.should include("#{@shards[1].alias} IF w")
25
+ end
26
+ end
27
+
28
+ describe '#shards' do
29
+ it 'returns the same number of shards as there are expressions' do
30
+ @split.shards.size.should == 2
31
+ end
32
+ end
33
+
34
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+
4
+ describe Piglet do
5
+
6
+
7
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format specdoc
3
+ --backtrace
@@ -0,0 +1,14 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+
5
+ require 'piglet'
6
+ require 'spec'
7
+ require 'spec/autorun'
8
+
9
+
10
+ require 'piglet/interpreter'
11
+
12
+ Spec::Runner.configure do |config|
13
+
14
+ end