cascading.jruby 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/HACKING.md +15 -0
- data/History.txt +0 -0
- data/LICENSE.txt +165 -0
- data/README.md +7 -0
- data/Rakefile +45 -0
- data/bin/make_job +81 -0
- data/lib/cascading/assembly.rb +726 -0
- data/lib/cascading/base.rb +63 -0
- data/lib/cascading/cascade.rb +63 -0
- data/lib/cascading/cascading.rb +134 -0
- data/lib/cascading/cascading_exception.rb +30 -0
- data/lib/cascading/expr_stub.rb +33 -0
- data/lib/cascading/ext/array.rb +15 -0
- data/lib/cascading/flow.rb +168 -0
- data/lib/cascading/operations.rb +204 -0
- data/lib/cascading/scope.rb +160 -0
- data/lib/cascading.rb +63 -0
- data/samples/branch.rb +31 -0
- data/samples/cascading.rb +41 -0
- data/samples/copy.rb +18 -0
- data/samples/data/data2.txt +88799 -0
- data/samples/data/data_join1.txt +3 -0
- data/samples/data/data_join2.txt +3 -0
- data/samples/data/data_join3.txt +3 -0
- data/samples/join.rb +32 -0
- data/samples/logwordcount.rb +22 -0
- data/samples/project.rb +24 -0
- data/samples/rename.rb +21 -0
- data/samples/scorenames.rb +20 -0
- data/samples/splitter.rb +20 -0
- data/samples/union.rb +35 -0
- data/spec/cascading_spec.rb +100 -0
- data/spec/expr_spec.rb +10 -0
- data/spec/primary_key_spec.rb +119 -0
- data/spec/resource/join_input.txt +3 -0
- data/spec/resource/test_input.txt +4 -0
- data/spec/scope_spec.rb +174 -0
- data/spec/spec.opts +6 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/spec_util.rb +188 -0
- data/src/cascading/jruby/Main.java +38 -0
- data/src/cascading/jruby/runner.rb +6 -0
- data/tags +238 -0
- data/tasks/ann.rake +80 -0
- data/tasks/ant.rake +11 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +206 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/samples.rake +13 -0
- data/tasks/setup.rb +300 -0
- data/tasks/spec.rake +59 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +42 -0
- data/test/data/data1.txt +14 -0
- data/test/data/data2.txt +14 -0
- data/test/test_assembly.rb +321 -0
- data/test/test_cascading.rb +49 -0
- data/test/test_flow.rb +15 -0
- metadata +137 -0
data/samples/join.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'join' do
|
8
|
+
flow 'join' do
|
9
|
+
source 'input1', tap('samples/data/data_join1.txt')
|
10
|
+
source 'input2', tap('samples/data/data_join2.txt')
|
11
|
+
source 'input3', tap('samples/data/data_join3.txt')
|
12
|
+
|
13
|
+
assembly 'input1' do
|
14
|
+
split 'line', ['id', 'name']
|
15
|
+
end
|
16
|
+
|
17
|
+
assembly 'input2' do
|
18
|
+
split 'line', ['id', 'age']
|
19
|
+
end
|
20
|
+
|
21
|
+
assembly 'input3' do
|
22
|
+
split 'line', ['id', 'city']
|
23
|
+
end
|
24
|
+
|
25
|
+
assembly 'join' do
|
26
|
+
join 'input1', 'input2', 'input3', :on => 'id'
|
27
|
+
project 'id', 'name', 'age', 'city'
|
28
|
+
end
|
29
|
+
|
30
|
+
sink 'join', tap('output/join', :sink_mode => :replace)
|
31
|
+
end
|
32
|
+
end.complete(sample_properties)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'logwordcount' do
|
8
|
+
flow 'logwordcount' do
|
9
|
+
source 'input', tap('http://www.gutenberg.org/files/20417/20417-8.txt')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
# TODO: create a helper for RegexSplitGenerator
|
13
|
+
each 'line', :function => regex_split_generator('word', :pattern => /[.,]*\s+/)
|
14
|
+
group_by 'word' do
|
15
|
+
count
|
16
|
+
end
|
17
|
+
group_by 'count', :reverse => true
|
18
|
+
end
|
19
|
+
|
20
|
+
sink 'input', tap('output/logwordcount', :sink_mode => :replace)
|
21
|
+
end
|
22
|
+
end.complete(sample_properties)
|
data/samples/project.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
# History: "project" (verb) used to be known as "restrict"
|
5
|
+
|
6
|
+
require 'cascading'
|
7
|
+
require 'samples/cascading'
|
8
|
+
|
9
|
+
cascade 'project' do
|
10
|
+
flow 'project' do
|
11
|
+
source 'input', tap('samples/data/data2.txt')
|
12
|
+
|
13
|
+
assembly 'input' do
|
14
|
+
split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
15
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
|
16
|
+
project 'name', 'score1', 'score2'
|
17
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(3)
|
18
|
+
project 'name', 'score2'
|
19
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(2)
|
20
|
+
end
|
21
|
+
|
22
|
+
sink 'input', tap('output/project', :sink_mode => :replace)
|
23
|
+
end
|
24
|
+
end.complete(sample_properties)
|
data/samples/rename.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'rename' do
|
8
|
+
flow 'rename' do
|
9
|
+
source 'input', tap('samples/data/data2.txt')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
13
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
|
14
|
+
rename 'name' => 'new_name', 'score1' => 'new_score1', 'score2' => 'new_score2'
|
15
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
|
16
|
+
puts "Final field names: #{scope.values_fields.to_a.inspect}"
|
17
|
+
end
|
18
|
+
|
19
|
+
sink 'input', tap('output/rename', :sink_mode => :replace)
|
20
|
+
end
|
21
|
+
end.complete(sample_properties)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'scorenames' do
|
8
|
+
flow 'scorenames' do
|
9
|
+
# You don't have to curl and cache inputs: tap can fetch via HTTP
|
10
|
+
source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
|
11
|
+
|
12
|
+
assembly 'input' do
|
13
|
+
split 'line', ['name', 'val1', 'val2', 'id']
|
14
|
+
insert 'val3' => expr('val2:double < 40.0 ? val1:double : val2:double')
|
15
|
+
project 'name', 'val3', 'id'
|
16
|
+
end
|
17
|
+
|
18
|
+
sink 'input', tap('output/scorenames', :sink_mode => :replace)
|
19
|
+
end
|
20
|
+
end.complete(sample_properties)
|
data/samples/splitter.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'splitter' do
|
8
|
+
flow 'splitter' do
|
9
|
+
source 'input', tap('samples/data/data2.txt')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
13
|
+
group_by 'score1' do
|
14
|
+
count
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
sink 'input', tap('output/splitter', :sink_mode => :replace)
|
19
|
+
end
|
20
|
+
end.complete(sample_properties)
|
data/samples/union.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'union' do
|
8
|
+
flow 'union' do
|
9
|
+
source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
split 'line', ['name', 'score1', 'score2', 'id']
|
13
|
+
|
14
|
+
branch 'branch1' do
|
15
|
+
group_by 'score1', 'name' do
|
16
|
+
count
|
17
|
+
end
|
18
|
+
rename 'score1' => 'score'
|
19
|
+
end
|
20
|
+
|
21
|
+
branch 'branch2' do
|
22
|
+
group_by 'score2', 'name' do
|
23
|
+
count
|
24
|
+
end
|
25
|
+
rename 'score2' => 'score'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
assembly 'union' do
|
30
|
+
union 'branch1', 'branch2'
|
31
|
+
end
|
32
|
+
|
33
|
+
sink 'union', tap('output/union', :sink_mode => :replace)
|
34
|
+
end
|
35
|
+
end.complete(sample_properties)
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Cascading do
|
4
|
+
it 'should dedup field names from multiple sources' do
|
5
|
+
left_names = ['a', 'b', 'c', 'd', 'e']
|
6
|
+
mid_names = ['a', 'f']
|
7
|
+
right_names = ['a', 'g']
|
8
|
+
|
9
|
+
field_names = dedup_field_names(left_names, mid_names, right_names)
|
10
|
+
field_names.should == [
|
11
|
+
'a', 'b', 'c', 'd', 'e',
|
12
|
+
'a_', 'f',
|
13
|
+
'a__', 'g'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should fail to resolve duplicate fields' do
|
18
|
+
incoming = fields(['line'])
|
19
|
+
declared = fields(['line'])
|
20
|
+
outgoing = all_fields
|
21
|
+
lambda do
|
22
|
+
begin
|
23
|
+
resolved = Java::CascadingTuple::Fields.resolve(outgoing, [incoming, declared].to_java(Java::CascadingTuple::Fields))
|
24
|
+
rescue NativeException => e
|
25
|
+
raise e.cause
|
26
|
+
end
|
27
|
+
end.should raise_error Java::CascadingTuple::TupleException, 'field name already exists: line'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should find branches to sink' do
|
31
|
+
cascade 'branched_pass' do
|
32
|
+
flow 'branched_pass' do
|
33
|
+
source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
34
|
+
assembly 'input' do
|
35
|
+
branch 'branched_input' do
|
36
|
+
project 'line'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :kind => :lfs, :sink_mode => :replace)
|
40
|
+
end
|
41
|
+
end.complete
|
42
|
+
|
43
|
+
ilc = `wc -l spec/resource/test_input.txt`.split(/\s+/).first
|
44
|
+
olc = `wc -l #{OUTPUT_DIR}/branched_pass_out/part-00000`.split(/\s+/).first
|
45
|
+
ilc.should == olc
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should create an isolated namespace per cascade' do
|
49
|
+
cascade 'double' do
|
50
|
+
flow 'double' do
|
51
|
+
source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
52
|
+
assembly 'input' do # Dup name
|
53
|
+
insert 'doubled' => expr('line:string + "," + line:string')
|
54
|
+
project 'doubled'
|
55
|
+
end
|
56
|
+
sink 'input', tap("#{OUTPUT_DIR}/double_out", :kind => :lfs, :sink_mode => :replace)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
cascade 'pass' do
|
61
|
+
flow 'pass' do
|
62
|
+
source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
63
|
+
assembly 'input' do # Dup name
|
64
|
+
project 'line'
|
65
|
+
end
|
66
|
+
sink 'input', tap("#{OUTPUT_DIR}/pass_out", :kind => :lfs, :sink_mode => :replace)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
Cascade.get('double').complete
|
71
|
+
Cascade.get('pass').complete
|
72
|
+
diff = `diff #{OUTPUT_DIR}/double_out/part-00000 #{OUTPUT_DIR}/pass_out/part-00000`
|
73
|
+
diff.should_not be_empty
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'should support joins in branches' do
|
77
|
+
cascade 'branch_join' do
|
78
|
+
flow 'branch_join' do
|
79
|
+
source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
80
|
+
source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
81
|
+
|
82
|
+
assembly 'left' do
|
83
|
+
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
84
|
+
project 'x', 'y', 'z'
|
85
|
+
end
|
86
|
+
|
87
|
+
assembly 'right' do
|
88
|
+
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
89
|
+
project 'x', 'y', 'z'
|
90
|
+
|
91
|
+
branch 'branch_join' do
|
92
|
+
join 'left', 'right', :on => 'x'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :kind => :lfs, :sink_mode => :replace)
|
97
|
+
end
|
98
|
+
end.complete
|
99
|
+
end
|
100
|
+
end
|
data/spec/expr_spec.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Object do
|
4
|
+
it 'should allow expr syntax' do
|
5
|
+
test_assembly do
|
6
|
+
insert 'foo' => 1, 'bar' => expr('offset:int')
|
7
|
+
check_scope :values_fields => ['offset', 'line', 'bar', 'foo']
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Cascading::Scope do
|
4
|
+
it 'should allow override of primary key' do
|
5
|
+
test_assembly do
|
6
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
7
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
8
|
+
:primary_key_fields => ['offset']
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should pass primary key through Each' do
|
13
|
+
test_assembly do
|
14
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
15
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
16
|
+
:primary_key_fields => ['offset']
|
17
|
+
pass
|
18
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
19
|
+
:primary_key_fields => ['offset']
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should support renaming primary keys' do
|
24
|
+
test_assembly do
|
25
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
26
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
27
|
+
:primary_key_fields => ['offset']
|
28
|
+
rename 'offset' => 'primary_key', 'line' => 'data'
|
29
|
+
check_scope :values_fields => ['primary_key', 'data', 'x', 'y'],
|
30
|
+
:primary_key_fields => ['primary_key']
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should clear primary keys when a subset of their fields are discarded' do
|
35
|
+
test_assembly do
|
36
|
+
primary 'offset', 'line' # Make primary keys interesting
|
37
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
38
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
39
|
+
:primary_key_fields => ['offset', 'line']
|
40
|
+
project 'line', 'x', 'y'
|
41
|
+
check_scope :values_fields => ['line', 'x', 'y'],
|
42
|
+
:primary_key_fields => nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should pass primary key through branch' do
|
47
|
+
test_assembly do
|
48
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
49
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
50
|
+
:primary_key_fields => ['offset']
|
51
|
+
|
52
|
+
branch 'check_keys' do
|
53
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
54
|
+
:primary_key_fields => ['offset']
|
55
|
+
pass
|
56
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
57
|
+
:primary_key_fields => ['offset']
|
58
|
+
end
|
59
|
+
|
60
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
61
|
+
:primary_key_fields => ['offset']
|
62
|
+
pass
|
63
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
64
|
+
:primary_key_fields => ['offset']
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should pass primary key through GroupBy followed by Each' do
|
69
|
+
test_assembly do
|
70
|
+
group_by 'offset'
|
71
|
+
check_scope :values_fields => ['offset', 'line'],
|
72
|
+
:grouping_fields => ['offset'],
|
73
|
+
:primary_key_fields => ['offset']
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should pass primary key through GroupBy followed by Every' do
|
78
|
+
test_assembly do
|
79
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
80
|
+
group_by 'offset', 'x' do
|
81
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
82
|
+
:grouping_fields => ['offset', 'x'],
|
83
|
+
:primary_key_fields => ['offset'],
|
84
|
+
:grouping_primary_key_fields => ['offset', 'x']
|
85
|
+
count 'line'
|
86
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
87
|
+
:grouping_fields => ['offset', 'x', 'line'],
|
88
|
+
:primary_key_fields => ['offset'],
|
89
|
+
:grouping_primary_key_fields => ['offset', 'x']
|
90
|
+
count 'y'
|
91
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
92
|
+
:grouping_fields => ['offset', 'x', 'line', 'y'],
|
93
|
+
:primary_key_fields => ['offset', 'x'], # FIXME: why has the pk changed?
|
94
|
+
:grouping_primary_key_fields => ['offset', 'x']
|
95
|
+
end
|
96
|
+
check_scope :values_fields => ['offset', 'x', 'line', 'y'],
|
97
|
+
:primary_key_fields => ['offset', 'x']
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'should not clear primary key when grouping on other fields' do
|
102
|
+
test_assembly do
|
103
|
+
group_by 'line'
|
104
|
+
check_scope :values_fields => ['offset', 'line'],
|
105
|
+
:grouping_fields => ['line'],
|
106
|
+
:primary_key_fields => ['offset'],
|
107
|
+
:grouping_primary_key_fields => ['line']
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'should pass primary key through CoGroup' do
|
112
|
+
test_join_assembly do
|
113
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
114
|
+
:grouping_fields => ['x'],
|
115
|
+
:primary_key_fields => ['offset'],
|
116
|
+
:grouping_primary_key_fields => ['x']
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
data/spec/scope_spec.rb
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Cascading::Scope do
|
4
|
+
it 'should match Cascading fields names from source tap scheme' do
|
5
|
+
test_assembly do
|
6
|
+
# Pass that uses our scope instead of all_fields
|
7
|
+
operation = Java::CascadingOperation::Identity.new
|
8
|
+
make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
|
9
|
+
|
10
|
+
check_scope :values_fields => ['offset', 'line']
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should match Cascading fields names after CoGroup' do
|
15
|
+
test_join_assembly do
|
16
|
+
# Pass that uses our scope instead of all_fields
|
17
|
+
operation = Java::CascadingOperation::Identity.new
|
18
|
+
make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
|
19
|
+
|
20
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should match Cascading fields names after Every' do
|
25
|
+
test_join_assembly do
|
26
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
27
|
+
|
28
|
+
# Pass that uses our grouping fields instead of all_fields
|
29
|
+
operation = Java::CascadingOperation::Identity.new
|
30
|
+
make_each(# FIXME: names of grouping fields are not what we'd expect!
|
31
|
+
Java::CascadingPipe::Each, tail_pipe, fields([0, 'x_sum']), operation)
|
32
|
+
|
33
|
+
check_scope :values_fields => [0, 'x_sum']
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should pick up names from source tap scheme' do
|
38
|
+
test_assembly do
|
39
|
+
pass
|
40
|
+
|
41
|
+
check_scope :values_fields => ['offset', 'line']
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should propagate names through Each' do
|
46
|
+
test_assembly do
|
47
|
+
check_scope :values_fields => ['offset', 'line']
|
48
|
+
assert_size_equals 2
|
49
|
+
|
50
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
51
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y']
|
52
|
+
assert_size_equals 4
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should allow field filtration at Each' do
|
57
|
+
test_assembly do
|
58
|
+
check_scope :values_fields => ['offset', 'line']
|
59
|
+
assert_size_equals 2
|
60
|
+
|
61
|
+
split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
|
62
|
+
check_scope :values_fields => ['x', 'y']
|
63
|
+
assert_size_equals 2
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'should propagate names through CoGroup' do
|
68
|
+
test_join_assembly do
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'should pass grouping fields to Every' do
|
73
|
+
test_join_assembly do
|
74
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
75
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
76
|
+
:grouping_fields => ['x', 'x_sum']
|
77
|
+
assert_group_size_equals 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should pass grouping fields through chained Every' do
|
82
|
+
test_join_assembly do
|
83
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
84
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
85
|
+
:grouping_fields => ['x', 'x_sum']
|
86
|
+
assert_group_size_equals 1
|
87
|
+
|
88
|
+
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
89
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
90
|
+
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
91
|
+
assert_group_size_equals 1
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'should propagate names through Every' do
|
96
|
+
test_join_assembly do
|
97
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
98
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
99
|
+
:grouping_fields => ['x', 'x_sum']
|
100
|
+
assert_group_size_equals 1
|
101
|
+
|
102
|
+
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
103
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
104
|
+
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
105
|
+
assert_group_size_equals 1
|
106
|
+
|
107
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
108
|
+
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
109
|
+
assert_size_equals 3
|
110
|
+
|
111
|
+
# No rename service provided unless you use the block form of join!
|
112
|
+
check_scope :values_fields => [0, 'x_sum', 'y_sum']
|
113
|
+
|
114
|
+
# Mimic rename service
|
115
|
+
bind_names ['x', 'x_sum', 'y_sum']
|
116
|
+
check_scope :values_fields => ['x', 'x_sum', 'y_sum']
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
|
121
|
+
test_join_assembly do
|
122
|
+
assert_size_equals 10
|
123
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'should fail to pass grouping fields to Every immediately following Each' do
|
128
|
+
lambda do # Composition fails
|
129
|
+
test_join_assembly do
|
130
|
+
pass
|
131
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
132
|
+
begin
|
133
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
134
|
+
rescue CascadingException => e
|
135
|
+
raise e.cause(3)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should propagate values fields and field names into branch' do
|
142
|
+
test_join_assembly(:branches => ['data_tuple']) do
|
143
|
+
branch 'data_tuple' do
|
144
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
145
|
+
:grouping_fields => ['x']
|
146
|
+
assert_size_equals 10
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'should fail to propagate grouping fields to branch' do
|
152
|
+
lambda do # Execution fails
|
153
|
+
begin
|
154
|
+
test_join_assembly(:branches => ['attempt_group']) do
|
155
|
+
branch 'attempt_group' do
|
156
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
157
|
+
:grouping_fields => ['x']
|
158
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
159
|
+
end
|
160
|
+
end
|
161
|
+
rescue CascadingException => e
|
162
|
+
raise e.cause(4)
|
163
|
+
end
|
164
|
+
end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'should propagate names through GroupBy' do
|
168
|
+
test_assembly do
|
169
|
+
group_by 'line'
|
170
|
+
check_scope :values_fields => ['offset', 'line'],
|
171
|
+
:grouping_fields => ['line']
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
data/spec/spec.opts
ADDED