piglet 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +22 -0
- data/LICENSE +20 -0
- data/README.rdoc +293 -0
- data/Rakefile +50 -0
- data/bin/piglet +9 -0
- data/examples/analysis.rb +311 -0
- data/examples/scratch.rb +11 -0
- data/examples/spike1.rb +43 -0
- data/examples/spike2.rb +40 -0
- data/examples/test1.rb +3 -0
- data/examples/test2.rb +5 -0
- data/examples/test3.rb +4 -0
- data/lib/piglet/assignment.rb +13 -0
- data/lib/piglet/cogroup.rb +31 -0
- data/lib/piglet/cross.rb +22 -0
- data/lib/piglet/describe.rb +5 -0
- data/lib/piglet/distinct.rb +16 -0
- data/lib/piglet/dump.rb +5 -0
- data/lib/piglet/explain.rb +13 -0
- data/lib/piglet/field.rb +40 -0
- data/lib/piglet/field_expression_functions.rb +62 -0
- data/lib/piglet/field_function_expression.rb +19 -0
- data/lib/piglet/field_infix_expression.rb +17 -0
- data/lib/piglet/field_prefix_expression.rb +21 -0
- data/lib/piglet/field_rename.rb +11 -0
- data/lib/piglet/field_suffix_expression.rb +17 -0
- data/lib/piglet/filter.rb +13 -0
- data/lib/piglet/foreach.rb +19 -0
- data/lib/piglet/group.rb +21 -0
- data/lib/piglet/illustrate.rb +5 -0
- data/lib/piglet/interpreter.rb +108 -0
- data/lib/piglet/join.rb +20 -0
- data/lib/piglet/limit.rb +13 -0
- data/lib/piglet/load.rb +31 -0
- data/lib/piglet/load_and_store.rb +16 -0
- data/lib/piglet/order.rb +29 -0
- data/lib/piglet/relation.rb +177 -0
- data/lib/piglet/sample.rb +13 -0
- data/lib/piglet/split.rb +41 -0
- data/lib/piglet/store.rb +17 -0
- data/lib/piglet/storing.rb +13 -0
- data/lib/piglet/stream.rb +5 -0
- data/lib/piglet/union.rb +19 -0
- data/lib/piglet.rb +45 -0
- data/spec/piglet/field_spec.rb +130 -0
- data/spec/piglet/interpreter_spec.rb +413 -0
- data/spec/piglet/relation_spec.rb +79 -0
- data/spec/piglet/split_spec.rb +34 -0
- data/spec/piglet_spec.rb +7 -0
- data/spec/spec.opts +3 -0
- data/spec/spec_helper.rb +14 -0
- metadata +123 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Field do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@field = Piglet::Field.new('field')
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '#to_s' do
|
11
|
+
it 'returns a string with the field name (as a string)' do
|
12
|
+
@field.to_s.should eql("field")
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'returns a string with the field name (as a symbol)' do
|
16
|
+
@field = Piglet::Field.new(:field)
|
17
|
+
@field.to_s.should eql("field")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'eval/aggregate functions' do
|
22
|
+
%w(avg count diff max min size sum tokenize).each do |function_name|
|
23
|
+
it "supports \"#{function_name.upcase}\" through ##{function_name}" do
|
24
|
+
@field.send(function_name).to_s.should eql("#{function_name.upcase}(field)")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'supports "IsEmpty" through #empty?' do
|
29
|
+
@field.empty?.to_s.should eql("IsEmpty(field)")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context 'nested expressions' do
|
34
|
+
it 'handles nested expressions' do
|
35
|
+
@field.max.min.avg.empty?.tokenize.to_s.should eql("TOKENIZE(IsEmpty(AVG(MIN(MAX(field)))))")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'field renaming' do
|
40
|
+
it 'supports renaming a field' do
|
41
|
+
@field.as('x').to_s.should eql('field AS x')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'supports renaming a derived field' do
|
45
|
+
@field.x.y.z.as('b').to_s.should eql('field.x.y.z AS b')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'supports renaming a calculated field' do
|
49
|
+
@field.max.as('m').to_s.should eql('MAX(field) AS m')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'infix and unary operators' do
|
54
|
+
before do
|
55
|
+
@field1 = Piglet::Field.new('field1')
|
56
|
+
@field2 = Piglet::Field.new('field2')
|
57
|
+
end
|
58
|
+
|
59
|
+
[:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/].each do |op|
|
60
|
+
it "supports #{op} on a field" do
|
61
|
+
@field1.send(op, @field2).to_s.should eql("field1 #{op} field2")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "supports #{op} on an expression" do
|
65
|
+
(@field1 + (@field1.send(op, @field2))).to_s.should eql("field1 + (field1 #{op} field2)")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'supports != through #ne on a field' do
|
70
|
+
@field1.ne(@field2).to_s.should eql("field1 != field2")
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'supports != through #ne on an expression' do
|
74
|
+
(@field1 + (@field1.ne(@field2))).to_s.should eql("field1 + (field1 != field2)")
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'supports "matches" on a field with a regex' do
|
78
|
+
@field1.matches(/.*\.pig$/).to_s.should eql("field1 matches '.*\\.pig$'")
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'supports "matches" on a field with a string' do
|
82
|
+
@field1.matches('.*\.pig$').to_s.should eql("field1 matches '.*\\.pig$'")
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'supports "matches" on an expression' do
|
86
|
+
(@field1 + @field2).matches(/.*\.pig$/).to_s.should eql("(field1 + field2) matches '.*\\.pig$'")
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'supports "is null" on a field' do
|
90
|
+
@field1.null?.to_s.should eql("field1 is null")
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'supports "is null" on an expression' do
|
94
|
+
(@field1 + @field2).null?.to_s.should eql("(field1 + field2) is null")
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'supports "is not null" on a field' do
|
98
|
+
@field1.not_null?.to_s.should eql("field1 is not null")
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'supports "is not null" on an expression' do
|
102
|
+
(@field1 + @field2).not_null?.to_s.should eql("(field1 + field2) is not null")
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'supports "NOT" on a field' do
|
106
|
+
@field1.not.to_s.should eql("NOT field1")
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'supports "NOT" on an expression' do
|
110
|
+
(@field1 == @field2).not.to_s.should eql("NOT (field1 == field2)")
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'supports unary - through #neg on a field' do
|
114
|
+
@field1.neg.to_s.should eql("-field1")
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'supports unary - through #neg on an expression' do
|
118
|
+
(@field1 + @field2).neg.to_s.should eql("-(field1 + field2)")
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'supports casts on a field' do
|
122
|
+
@field1.cast(:chararray).to_s.should eql("(chararray) field1")
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'supports casts on an expression' do
|
126
|
+
(@field1 + @field2).cast(:chararray).to_s.should eql("(chararray) (field1 + field2)")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
@@ -0,0 +1,413 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Interpreter do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@interpreter = Piglet::Interpreter.new
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'basic usage' do
|
11
|
+
it 'interprets a block given to #new' do
|
12
|
+
output = Piglet::Interpreter.new { store(load('some/path'), 'out') }
|
13
|
+
output.to_pig_latin.should_not be_empty
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'interprets a block given to #interpret' do
|
17
|
+
output = @interpreter.interpret { store(load('some/path'), 'out') }
|
18
|
+
output.to_pig_latin.should_not be_empty
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'does nothing with no commands' do
|
22
|
+
@interpreter.interpret.to_pig_latin.should be_empty
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'load & store operators:' do
|
27
|
+
describe 'LOAD' do
|
28
|
+
it 'outputs a LOAD statement' do
|
29
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
30
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path'")
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'outputs a LOAD statement without a USING clause if none specified' do
|
34
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
35
|
+
@interpreter.to_pig_latin.should_not include('USING')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'outputs a LOAD statement with a USING clause with a specified function' do
|
39
|
+
@interpreter.interpret { store(load('some/path', :using => 'XYZ'), 'out') }
|
40
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' USING XYZ;")
|
41
|
+
end
|
42
|
+
|
43
|
+
Piglet::LoadAndStore::LOAD_STORE_FUNCTIONS.each do |symbolic_name, function|
|
44
|
+
it "knows that the load method :#{symbolic_name} means #{function}" do
|
45
|
+
@interpreter.interpret { store(load('some/path', :using => symbolic_name), 'out') }
|
46
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' USING #{function};")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'outputs a LOAD statement with an AS clause' do
|
51
|
+
@interpreter.interpret { store(load('some/path', :schema => %w(a b c)), 'out') }
|
52
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b, c);")
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'outputs a LOAD statement with an AS clause with types' do
|
56
|
+
@interpreter.interpret { store(load('some/path', :schema => [:a, [:b, :chararray], :c]), 'out') }
|
57
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'outputs a LOAD statement with an AS clause with types specified as both strings and symbols' do
|
61
|
+
@interpreter.interpret { store(load('some/path', :schema => [:a, %w(b chararray), :c]), 'out') }
|
62
|
+
@interpreter.to_pig_latin.should include("LOAD 'some/path' AS (a, b:chararray, c);")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe 'STORE' do
|
67
|
+
it 'outputs a STORE statement' do
|
68
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
69
|
+
@interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out'/)
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'outputs a STORE statement without a USING clause if none specified' do
|
73
|
+
@interpreter.interpret { store(load('some/path'), 'out') }
|
74
|
+
@interpreter.to_pig_latin.should_not include("USING")
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'outputs a STORE statement with a USING clause with a specified function' do
|
78
|
+
@interpreter.interpret { store(load('some/path'), 'out', :using => 'XYZ') }
|
79
|
+
@interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING XYZ/)
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'knows that the load method :pig_storage means PigStorage' do
|
83
|
+
@interpreter.interpret { store(load('some/path'), 'out', :using => :pig_storage) }
|
84
|
+
@interpreter.to_pig_latin.should match(/STORE \w+ INTO 'out' USING PigStorage/)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe 'DUMP' do
|
89
|
+
it 'outputs a DUMP statement' do
|
90
|
+
@interpreter.interpret { dump(load('some/path')) }
|
91
|
+
@interpreter.to_pig_latin.should match(/DUMP \w+/)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
context 'diagnostic operators:' do
|
97
|
+
describe 'ILLUSTRATE' do
|
98
|
+
it 'outputs an ILLUSTRATE statement' do
|
99
|
+
@interpreter.interpret { illustrate(load('some/path')) }
|
100
|
+
@interpreter.to_pig_latin.should match(/ILLUSTRATE \w+/)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
describe 'DESCRIBE' do
|
105
|
+
it 'outputs a DESCRIBE statement' do
|
106
|
+
@interpreter.interpret { describe(load('some/path')) }
|
107
|
+
@interpreter.to_pig_latin.should match(/DESCRIBE \w+/)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe 'EXPLAIN' do
|
112
|
+
it 'outputs an EXPLAIN statement' do
|
113
|
+
@interpreter.interpret { explain(load('some/path')) }
|
114
|
+
@interpreter.to_pig_latin.should match(/EXPLAIN \w+/)
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'outputs an EXPLAIN statement without an alias' do
|
118
|
+
@interpreter.interpret { explain }
|
119
|
+
@interpreter.to_pig_latin.should match(/EXPLAIN;/)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context 'relation operators:' do
|
125
|
+
describe 'GROUP' do
|
126
|
+
it 'outputs a GROUP statement with one grouping field' do
|
127
|
+
@interpreter.interpret { store(load('in').group(:a), 'out') }
|
128
|
+
@interpreter.to_pig_latin.should match(/GROUP \w+ BY a/)
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'outputs a GROUP statement with more than one grouping field' do
|
132
|
+
@interpreter.interpret { store(load('in').group(:a, :b, :c), 'out') }
|
133
|
+
@interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\)/)
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'outputs a GROUP statement with a PARALLEL clause' do
|
137
|
+
@interpreter.interpret { store(load('in').group([:a, :b, :c], :parallel => 3), 'out') }
|
138
|
+
@interpreter.to_pig_latin.should match(/GROUP \w+ BY \(a, b, c\) PARALLEL 3/)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
describe 'DISTINCT' do
|
143
|
+
it 'outputs a DISTINCT statement' do
|
144
|
+
@interpreter.interpret { store(load('in').distinct, 'out') }
|
145
|
+
@interpreter.to_pig_latin.should match(/DISTINCT \w+/)
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'outputs a DISTINCT statement with a PARALLEL clause' do
|
149
|
+
@interpreter.interpret { store(load('in').distinct(:parallel => 4), 'out') }
|
150
|
+
@interpreter.to_pig_latin.should match(/DISTINCT \w+ PARALLEL 4/)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
describe 'CROSS' do
|
155
|
+
it 'outputs a CROSS statement with two relations' do
|
156
|
+
@interpreter.interpret do
|
157
|
+
a = load('in1')
|
158
|
+
b = load('in2')
|
159
|
+
c = a.cross(b)
|
160
|
+
dump(c)
|
161
|
+
end
|
162
|
+
@interpreter.to_pig_latin.should match(/CROSS \w+, \w+/)
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'outputs a CROSS statement with many relations' do
|
166
|
+
@interpreter.interpret do
|
167
|
+
a = load('in1')
|
168
|
+
b = load('in2')
|
169
|
+
c = load('in3')
|
170
|
+
d = load('in4')
|
171
|
+
e = a.cross(b, c, d)
|
172
|
+
dump(e)
|
173
|
+
end
|
174
|
+
@interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+, \w+/)
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'outputs a CROSS statement with a PARALLEL clause' do
|
178
|
+
@interpreter.interpret do
|
179
|
+
a = load('in1')
|
180
|
+
b = load('in2')
|
181
|
+
c = load('in3')
|
182
|
+
d = a.cross([b, c], :parallel => 4)
|
183
|
+
dump(d)
|
184
|
+
end
|
185
|
+
@interpreter.to_pig_latin.should match(/CROSS \w+, \w+, \w+ PARALLEL 4/)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
describe 'UNION' do
|
190
|
+
it 'outputs a UNION statement with two relations' do
|
191
|
+
@interpreter.interpret do
|
192
|
+
a = load('in1')
|
193
|
+
b = load('in2')
|
194
|
+
c = a.union(b)
|
195
|
+
dump(c)
|
196
|
+
end
|
197
|
+
@interpreter.to_pig_latin.should match(/UNION \w+, \w+/)
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'outputs a UNION statement with many relations' do
|
201
|
+
@interpreter.interpret do
|
202
|
+
a = load('in1')
|
203
|
+
b = load('in2')
|
204
|
+
c = load('in3')
|
205
|
+
d = load('in4')
|
206
|
+
e = a.union(b, c, d)
|
207
|
+
dump(e)
|
208
|
+
end
|
209
|
+
@interpreter.to_pig_latin.should match(/UNION \w+, \w+, \w+, \w+/)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
describe 'SAMPLE' do
|
214
|
+
it 'outputs a SAMPLE statement' do
|
215
|
+
@interpreter.interpret { dump(load('in').sample(10)) }
|
216
|
+
@interpreter.to_pig_latin.should match(/SAMPLE \w+ 10/)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
describe 'LIMIT' do
|
221
|
+
it 'outputs a LIMIT statement' do
|
222
|
+
@interpreter.interpret { dump(load('in').limit(42)) }
|
223
|
+
@interpreter.to_pig_latin.should match(/LIMIT \w+ 42/)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
describe 'FOREACH … GENERATE' do
|
228
|
+
it 'outputs a FOREACH … GENERATE statement' do
|
229
|
+
@interpreter.interpret { dump(load('in').foreach { |r| :a }) }
|
230
|
+
@interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
|
231
|
+
end
|
232
|
+
|
233
|
+
it 'outputs a FOREACH … GENERATE statement with a list of fields' do
|
234
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
|
235
|
+
@interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
|
236
|
+
end
|
237
|
+
|
238
|
+
it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
|
239
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
|
240
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
|
244
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
|
245
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
|
246
|
+
end
|
247
|
+
|
248
|
+
it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
|
249
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
|
250
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
|
251
|
+
end
|
252
|
+
|
253
|
+
it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
|
254
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
|
255
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
|
256
|
+
end
|
257
|
+
|
258
|
+
it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
|
259
|
+
@interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
|
260
|
+
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
describe 'FILTER' do
|
265
|
+
it 'outputs a FILTER statement' do
|
266
|
+
@interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
|
267
|
+
@interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
|
268
|
+
end
|
269
|
+
|
270
|
+
# it 'outputs a FILTER statement with a complex test' do
|
271
|
+
# @interpreter.interpret { dump(load('in').filter { |r| r.a > r.b && r.c != 3 }) }
|
272
|
+
# @interpreter.to_pig_latin.should match(/FILTER \w+ BY a > b AND c != 3/)
|
273
|
+
# end
|
274
|
+
end
|
275
|
+
|
276
|
+
describe 'SPLIT' do
|
277
|
+
it 'outputs a SPLIT statement' do
|
278
|
+
@interpreter.interpret do
|
279
|
+
a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
|
280
|
+
dump(a)
|
281
|
+
dump(b)
|
282
|
+
end
|
283
|
+
@interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
describe 'ORDER' do
|
288
|
+
it 'outputs an ORDER statement' do
|
289
|
+
@interpreter.interpret { dump(load('in').order(:a)) }
|
290
|
+
@interpreter.to_pig_latin.should match(/ORDER \w+ BY a/)
|
291
|
+
end
|
292
|
+
|
293
|
+
it 'outputs an ORDER statement with multiple fields' do
|
294
|
+
@interpreter.interpret { dump(load('in').order(:a, :b)) }
|
295
|
+
@interpreter.to_pig_latin.should match(/ORDER \w+ BY a, b/)
|
296
|
+
end
|
297
|
+
|
298
|
+
it 'outputs an ORDER statement with ASC and DESC' do
|
299
|
+
@interpreter.interpret { dump(load('in').order([:a, :asc], [:b, :desc])) }
|
300
|
+
@interpreter.to_pig_latin.should match(/ORDER \w+ BY a ASC, b DESC/)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
describe 'JOIN' do
|
305
|
+
it 'outputs a JOIN statement' do
|
306
|
+
@interpreter.interpret do
|
307
|
+
a = load('in1')
|
308
|
+
b = load('in2')
|
309
|
+
c = a.join(a => :x, b => :y)
|
310
|
+
dump(c)
|
311
|
+
end
|
312
|
+
@interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+/)
|
313
|
+
end
|
314
|
+
|
315
|
+
it 'outputs a JOIN statement with a PARALLEL clause' do
|
316
|
+
@interpreter.interpret do
|
317
|
+
a = load('in1')
|
318
|
+
b = load('in2')
|
319
|
+
c = a.join(a => :x, b => :y, :parallel => 5)
|
320
|
+
dump(c)
|
321
|
+
end
|
322
|
+
@interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
|
323
|
+
end
|
324
|
+
|
325
|
+
it 'outputs a JOIN statement with a USING clause' do
|
326
|
+
@interpreter.interpret do
|
327
|
+
a = load('in1')
|
328
|
+
b = load('in2')
|
329
|
+
c = a.join(a => :x, b => :y, :using => :replicated)
|
330
|
+
dump(c)
|
331
|
+
end
|
332
|
+
@interpreter.to_pig_latin.should match(/JOIN \w+ BY \w+, \w+ BY \w+ USING "replicated"/)
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
describe 'COGROUP' do
|
337
|
+
it 'outputs a COGROUP statement' do
|
338
|
+
@interpreter.interpret do
|
339
|
+
a = load('in1')
|
340
|
+
b = load('in2')
|
341
|
+
c = a.cogroup(a => :x, b => :y)
|
342
|
+
dump(c)
|
343
|
+
end
|
344
|
+
@interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+/)
|
345
|
+
end
|
346
|
+
|
347
|
+
it 'outputs a COGROUP statement with multiple join fields' do
|
348
|
+
@interpreter.interpret do
|
349
|
+
a = load('in1')
|
350
|
+
b = load('in2')
|
351
|
+
c = a.cogroup(a => :x, b => [:y, :z, :w])
|
352
|
+
dump(c)
|
353
|
+
end
|
354
|
+
@interpreter.to_pig_latin.should match(/\w+ BY \(y, z, w\)/)
|
355
|
+
end
|
356
|
+
|
357
|
+
it 'outputs a COGROUP statement with a PARALLEL clause' do
|
358
|
+
@interpreter.interpret do
|
359
|
+
a = load('in1')
|
360
|
+
b = load('in2')
|
361
|
+
c = a.cogroup(a => :x, b => :y, :parallel => 5)
|
362
|
+
dump(c)
|
363
|
+
end
|
364
|
+
@interpreter.to_pig_latin.should match(/COGROUP \w+ BY \w+, \w+ BY \w+ PARALLEL 5/)
|
365
|
+
end
|
366
|
+
|
367
|
+
it 'outputs a COGROUP statement with INNER and OUTER' do
|
368
|
+
@interpreter.interpret do
|
369
|
+
a = load('in1')
|
370
|
+
b = load('in2')
|
371
|
+
c = a.cogroup(a => [:x, :inner], b => [:y, :outer])
|
372
|
+
dump(c)
|
373
|
+
end
|
374
|
+
@interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
|
375
|
+
@interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
context 'aliasing & multiple statements' do
|
381
|
+
it 'aliases the loaded relation and uses the same alias in the STORE statement' do
|
382
|
+
@interpreter.interpret { store(load('in'), 'out') }
|
383
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
|
384
|
+
end
|
385
|
+
|
386
|
+
it 'aliases both a loaded relation and a grouped relation and uses the latter in the STORE statement' do
|
387
|
+
@interpreter.interpret { store(load('in', :schema => [:a]).group(:a), 'out') }
|
388
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\nSTORE \2 INTO 'out';/)
|
389
|
+
end
|
390
|
+
|
391
|
+
it 'aliases a whole row of statements' do
|
392
|
+
@interpreter.interpret do
|
393
|
+
a = load('in', :schema => [:a])
|
394
|
+
b = a.group(:a)
|
395
|
+
c = b.group(:a)
|
396
|
+
d = c.group(:a)
|
397
|
+
store(d, 'out')
|
398
|
+
end
|
399
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in' AS \(a\);\n(\w+) = GROUP \1 BY a;\n(\w+) = GROUP \2 BY a;\n(\w+) = GROUP \3 BY a;\nSTORE \4 INTO 'out';/)
|
400
|
+
end
|
401
|
+
|
402
|
+
it 'outputs the statements for an alias only once, regardless of home many times it is stored' do
|
403
|
+
@interpreter.interpret do
|
404
|
+
a = load('in')
|
405
|
+
b = a.distinct
|
406
|
+
store(b, 'out1')
|
407
|
+
store(b, 'out2')
|
408
|
+
end
|
409
|
+
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\n(\w+) = DISTINCT \1;\nSTORE \2 INTO 'out1';\nSTORE \2 INTO 'out2';/)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Relation do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@relation = Object.new
|
8
|
+
@relation.extend Piglet::Relation
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'has a alias' do
|
12
|
+
@relation.alias.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has a unique alias' do
|
16
|
+
aliases = { }
|
17
|
+
1000.times do
|
18
|
+
@relation = Object.new
|
19
|
+
@relation.extend Piglet::Relation
|
20
|
+
aliases.should_not have_key(@relation.alias)
|
21
|
+
aliases[@relation.alias] = @relation
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#group' do
|
26
|
+
it 'returns a new relation with the target relation as source' do
|
27
|
+
@relation.group(:a).sources.should include(@relation)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '#distinct' do
|
32
|
+
it 'returns a new relation with the target relation as source' do
|
33
|
+
@relation.distinct.sources.should include(@relation)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '#cross' do
|
38
|
+
it 'returns a new relation with the target relation as one of the sources' do
|
39
|
+
other = Object.new
|
40
|
+
other.extend Piglet::Relation
|
41
|
+
@relation.cross(other).sources.should include(@relation)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '#union' do
|
46
|
+
it 'returns a new relation with the target relation as one of the sources' do
|
47
|
+
other = Object.new
|
48
|
+
other.extend Piglet::Relation
|
49
|
+
@relation.union(other).sources.should include(@relation)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe '#sample' do
|
54
|
+
it 'returns a new relation with the target relation as source' do
|
55
|
+
@relation.sample(10).sources.should include(@relation)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe '#limit' do
|
60
|
+
it 'returns a new relation with the target relation as source' do
|
61
|
+
@relation.limit(42).sources.should include(@relation)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'fields' do
|
66
|
+
it 'returns a field for a message that does not correspond to a method' do
|
67
|
+
@relation.a.should_not be_nil
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'returns fields that have the correct name' do
|
71
|
+
@relation.a.to_s.should eql('a')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'returns fields with positional notation' do
|
75
|
+
@relation[1].to_s.should eql('$1')
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
describe Piglet::Split do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@relation = mock('source')
|
8
|
+
@expr1 = mock('expr1')
|
9
|
+
@expr2 = mock('expr2')
|
10
|
+
@relation.stub!(:alias).and_return('rel')
|
11
|
+
@expr1.stub!(:to_s).and_return('y')
|
12
|
+
@expr2.stub!(:to_s).and_return('w')
|
13
|
+
@split = Piglet::Split.new(@relation, [@expr1, @expr2])
|
14
|
+
end
|
15
|
+
|
16
|
+
describe '#to_s' do
|
17
|
+
it 'outputs all x IF y expressions' do
|
18
|
+
@split.to_s.should match(/SPLIT rel INTO \w+ IF [yw], \w+ IF [yw]/)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'contains the names of all the shard relations' do
|
22
|
+
@shards = @split.shards
|
23
|
+
@split.to_s.should include("#{@shards[0].alias} IF y")
|
24
|
+
@split.to_s.should include("#{@shards[1].alias} IF w")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe '#shards' do
|
29
|
+
it 'returns the same number of shards as there are expressions' do
|
30
|
+
@split.shards.size.should == 2
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
data/spec/piglet_spec.rb
ADDED
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
|
4
|
+
|
5
|
+
require 'piglet'
|
6
|
+
require 'spec'
|
7
|
+
require 'spec/autorun'
|
8
|
+
|
9
|
+
|
10
|
+
require 'piglet/interpreter'
|
11
|
+
|
12
|
+
Spec::Runner.configure do |config|
|
13
|
+
|
14
|
+
end
|