cascading.jruby 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HACKING.md +15 -0
- data/History.txt +0 -0
- data/LICENSE.txt +165 -0
- data/README.md +7 -0
- data/Rakefile +45 -0
- data/bin/make_job +81 -0
- data/lib/cascading/assembly.rb +726 -0
- data/lib/cascading/base.rb +63 -0
- data/lib/cascading/cascade.rb +63 -0
- data/lib/cascading/cascading.rb +134 -0
- data/lib/cascading/cascading_exception.rb +30 -0
- data/lib/cascading/expr_stub.rb +33 -0
- data/lib/cascading/ext/array.rb +15 -0
- data/lib/cascading/flow.rb +168 -0
- data/lib/cascading/operations.rb +204 -0
- data/lib/cascading/scope.rb +160 -0
- data/lib/cascading.rb +63 -0
- data/samples/branch.rb +31 -0
- data/samples/cascading.rb +41 -0
- data/samples/copy.rb +18 -0
- data/samples/data/data2.txt +88799 -0
- data/samples/data/data_join1.txt +3 -0
- data/samples/data/data_join2.txt +3 -0
- data/samples/data/data_join3.txt +3 -0
- data/samples/join.rb +32 -0
- data/samples/logwordcount.rb +22 -0
- data/samples/project.rb +24 -0
- data/samples/rename.rb +21 -0
- data/samples/scorenames.rb +20 -0
- data/samples/splitter.rb +20 -0
- data/samples/union.rb +35 -0
- data/spec/cascading_spec.rb +100 -0
- data/spec/expr_spec.rb +10 -0
- data/spec/primary_key_spec.rb +119 -0
- data/spec/resource/join_input.txt +3 -0
- data/spec/resource/test_input.txt +4 -0
- data/spec/scope_spec.rb +174 -0
- data/spec/spec.opts +6 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/spec_util.rb +188 -0
- data/src/cascading/jruby/Main.java +38 -0
- data/src/cascading/jruby/runner.rb +6 -0
- data/tags +238 -0
- data/tasks/ann.rake +80 -0
- data/tasks/ant.rake +11 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +206 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/samples.rake +13 -0
- data/tasks/setup.rb +300 -0
- data/tasks/spec.rake +59 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +42 -0
- data/test/data/data1.txt +14 -0
- data/test/data/data2.txt +14 -0
- data/test/test_assembly.rb +321 -0
- data/test/test_cascading.rb +49 -0
- data/test/test_flow.rb +15 -0
- metadata +137 -0
data/samples/join.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'join' do
|
8
|
+
flow 'join' do
|
9
|
+
source 'input1', tap('samples/data/data_join1.txt')
|
10
|
+
source 'input2', tap('samples/data/data_join2.txt')
|
11
|
+
source 'input3', tap('samples/data/data_join3.txt')
|
12
|
+
|
13
|
+
assembly 'input1' do
|
14
|
+
split 'line', ['id', 'name']
|
15
|
+
end
|
16
|
+
|
17
|
+
assembly 'input2' do
|
18
|
+
split 'line', ['id', 'age']
|
19
|
+
end
|
20
|
+
|
21
|
+
assembly 'input3' do
|
22
|
+
split 'line', ['id', 'city']
|
23
|
+
end
|
24
|
+
|
25
|
+
assembly 'join' do
|
26
|
+
join 'input1', 'input2', 'input3', :on => 'id'
|
27
|
+
project 'id', 'name', 'age', 'city'
|
28
|
+
end
|
29
|
+
|
30
|
+
sink 'join', tap('output/join', :sink_mode => :replace)
|
31
|
+
end
|
32
|
+
end.complete(sample_properties)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'logwordcount' do
|
8
|
+
flow 'logwordcount' do
|
9
|
+
source 'input', tap('http://www.gutenberg.org/files/20417/20417-8.txt')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
# TODO: create a helper for RegexSplitGenerator
|
13
|
+
each 'line', :function => regex_split_generator('word', :pattern => /[.,]*\s+/)
|
14
|
+
group_by 'word' do
|
15
|
+
count
|
16
|
+
end
|
17
|
+
group_by 'count', :reverse => true
|
18
|
+
end
|
19
|
+
|
20
|
+
sink 'input', tap('output/logwordcount', :sink_mode => :replace)
|
21
|
+
end
|
22
|
+
end.complete(sample_properties)
|
data/samples/project.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
# History: "project" (verb) used to be known as "restrict"
|
5
|
+
|
6
|
+
require 'cascading'
|
7
|
+
require 'samples/cascading'
|
8
|
+
|
9
|
+
cascade 'project' do
|
10
|
+
flow 'project' do
|
11
|
+
source 'input', tap('samples/data/data2.txt')
|
12
|
+
|
13
|
+
assembly 'input' do
|
14
|
+
split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
15
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
|
16
|
+
project 'name', 'score1', 'score2'
|
17
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(3)
|
18
|
+
project 'name', 'score2'
|
19
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(2)
|
20
|
+
end
|
21
|
+
|
22
|
+
sink 'input', tap('output/project', :sink_mode => :replace)
|
23
|
+
end
|
24
|
+
end.complete(sample_properties)
|
data/samples/rename.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'rename' do
|
8
|
+
flow 'rename' do
|
9
|
+
source 'input', tap('samples/data/data2.txt')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
13
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
|
14
|
+
rename 'name' => 'new_name', 'score1' => 'new_score1', 'score2' => 'new_score2'
|
15
|
+
assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
|
16
|
+
puts "Final field names: #{scope.values_fields.to_a.inspect}"
|
17
|
+
end
|
18
|
+
|
19
|
+
sink 'input', tap('output/rename', :sink_mode => :replace)
|
20
|
+
end
|
21
|
+
end.complete(sample_properties)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'scorenames' do
|
8
|
+
flow 'scorenames' do
|
9
|
+
# You don't have to curl and cache inputs: tap can fetch via HTTP
|
10
|
+
source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
|
11
|
+
|
12
|
+
assembly 'input' do
|
13
|
+
split 'line', ['name', 'val1', 'val2', 'id']
|
14
|
+
insert 'val3' => expr('val2:double < 40.0 ? val1:double : val2:double')
|
15
|
+
project 'name', 'val3', 'id'
|
16
|
+
end
|
17
|
+
|
18
|
+
sink 'input', tap('output/scorenames', :sink_mode => :replace)
|
19
|
+
end
|
20
|
+
end.complete(sample_properties)
|
data/samples/splitter.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'splitter' do
|
8
|
+
flow 'splitter' do
|
9
|
+
source 'input', tap('samples/data/data2.txt')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
13
|
+
group_by 'score1' do
|
14
|
+
count
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
sink 'input', tap('output/splitter', :sink_mode => :replace)
|
19
|
+
end
|
20
|
+
end.complete(sample_properties)
|
data/samples/union.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'cascading'
|
5
|
+
require 'samples/cascading'
|
6
|
+
|
7
|
+
cascade 'union' do
|
8
|
+
flow 'union' do
|
9
|
+
source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
|
10
|
+
|
11
|
+
assembly 'input' do
|
12
|
+
split 'line', ['name', 'score1', 'score2', 'id']
|
13
|
+
|
14
|
+
branch 'branch1' do
|
15
|
+
group_by 'score1', 'name' do
|
16
|
+
count
|
17
|
+
end
|
18
|
+
rename 'score1' => 'score'
|
19
|
+
end
|
20
|
+
|
21
|
+
branch 'branch2' do
|
22
|
+
group_by 'score2', 'name' do
|
23
|
+
count
|
24
|
+
end
|
25
|
+
rename 'score2' => 'score'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
assembly 'union' do
|
30
|
+
union 'branch1', 'branch2'
|
31
|
+
end
|
32
|
+
|
33
|
+
sink 'union', tap('output/union', :sink_mode => :replace)
|
34
|
+
end
|
35
|
+
end.complete(sample_properties)
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Cascading do
|
4
|
+
it 'should dedup field names from multiple sources' do
|
5
|
+
left_names = ['a', 'b', 'c', 'd', 'e']
|
6
|
+
mid_names = ['a', 'f']
|
7
|
+
right_names = ['a', 'g']
|
8
|
+
|
9
|
+
field_names = dedup_field_names(left_names, mid_names, right_names)
|
10
|
+
field_names.should == [
|
11
|
+
'a', 'b', 'c', 'd', 'e',
|
12
|
+
'a_', 'f',
|
13
|
+
'a__', 'g'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should fail to resolve duplicate fields' do
|
18
|
+
incoming = fields(['line'])
|
19
|
+
declared = fields(['line'])
|
20
|
+
outgoing = all_fields
|
21
|
+
lambda do
|
22
|
+
begin
|
23
|
+
resolved = Java::CascadingTuple::Fields.resolve(outgoing, [incoming, declared].to_java(Java::CascadingTuple::Fields))
|
24
|
+
rescue NativeException => e
|
25
|
+
raise e.cause
|
26
|
+
end
|
27
|
+
end.should raise_error Java::CascadingTuple::TupleException, 'field name already exists: line'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should find branches to sink' do
|
31
|
+
cascade 'branched_pass' do
|
32
|
+
flow 'branched_pass' do
|
33
|
+
source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
34
|
+
assembly 'input' do
|
35
|
+
branch 'branched_input' do
|
36
|
+
project 'line'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :kind => :lfs, :sink_mode => :replace)
|
40
|
+
end
|
41
|
+
end.complete
|
42
|
+
|
43
|
+
ilc = `wc -l spec/resource/test_input.txt`.split(/\s+/).first
|
44
|
+
olc = `wc -l #{OUTPUT_DIR}/branched_pass_out/part-00000`.split(/\s+/).first
|
45
|
+
ilc.should == olc
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should create an isolated namespace per cascade' do
|
49
|
+
cascade 'double' do
|
50
|
+
flow 'double' do
|
51
|
+
source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
52
|
+
assembly 'input' do # Dup name
|
53
|
+
insert 'doubled' => expr('line:string + "," + line:string')
|
54
|
+
project 'doubled'
|
55
|
+
end
|
56
|
+
sink 'input', tap("#{OUTPUT_DIR}/double_out", :kind => :lfs, :sink_mode => :replace)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
cascade 'pass' do
|
61
|
+
flow 'pass' do
|
62
|
+
source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
63
|
+
assembly 'input' do # Dup name
|
64
|
+
project 'line'
|
65
|
+
end
|
66
|
+
sink 'input', tap("#{OUTPUT_DIR}/pass_out", :kind => :lfs, :sink_mode => :replace)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
Cascade.get('double').complete
|
71
|
+
Cascade.get('pass').complete
|
72
|
+
diff = `diff #{OUTPUT_DIR}/double_out/part-00000 #{OUTPUT_DIR}/pass_out/part-00000`
|
73
|
+
diff.should_not be_empty
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'should support joins in branches' do
|
77
|
+
cascade 'branch_join' do
|
78
|
+
flow 'branch_join' do
|
79
|
+
source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
80
|
+
source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
|
81
|
+
|
82
|
+
assembly 'left' do
|
83
|
+
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
84
|
+
project 'x', 'y', 'z'
|
85
|
+
end
|
86
|
+
|
87
|
+
assembly 'right' do
|
88
|
+
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
89
|
+
project 'x', 'y', 'z'
|
90
|
+
|
91
|
+
branch 'branch_join' do
|
92
|
+
join 'left', 'right', :on => 'x'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :kind => :lfs, :sink_mode => :replace)
|
97
|
+
end
|
98
|
+
end.complete
|
99
|
+
end
|
100
|
+
end
|
data/spec/expr_spec.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Object do
|
4
|
+
it 'should allow expr syntax' do
|
5
|
+
test_assembly do
|
6
|
+
insert 'foo' => 1, 'bar' => expr('offset:int')
|
7
|
+
check_scope :values_fields => ['offset', 'line', 'bar', 'foo']
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Cascading::Scope do
|
4
|
+
it 'should allow override of primary key' do
|
5
|
+
test_assembly do
|
6
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
7
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
8
|
+
:primary_key_fields => ['offset']
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should pass primary key through Each' do
|
13
|
+
test_assembly do
|
14
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
15
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
16
|
+
:primary_key_fields => ['offset']
|
17
|
+
pass
|
18
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
19
|
+
:primary_key_fields => ['offset']
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should support renaming primary keys' do
|
24
|
+
test_assembly do
|
25
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
26
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
27
|
+
:primary_key_fields => ['offset']
|
28
|
+
rename 'offset' => 'primary_key', 'line' => 'data'
|
29
|
+
check_scope :values_fields => ['primary_key', 'data', 'x', 'y'],
|
30
|
+
:primary_key_fields => ['primary_key']
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should clear primary keys when a subset of their fields are discarded' do
|
35
|
+
test_assembly do
|
36
|
+
primary 'offset', 'line' # Make primary keys interesting
|
37
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
38
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
39
|
+
:primary_key_fields => ['offset', 'line']
|
40
|
+
project 'line', 'x', 'y'
|
41
|
+
check_scope :values_fields => ['line', 'x', 'y'],
|
42
|
+
:primary_key_fields => nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should pass primary key through branch' do
|
47
|
+
test_assembly do
|
48
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
49
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
50
|
+
:primary_key_fields => ['offset']
|
51
|
+
|
52
|
+
branch 'check_keys' do
|
53
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
54
|
+
:primary_key_fields => ['offset']
|
55
|
+
pass
|
56
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
57
|
+
:primary_key_fields => ['offset']
|
58
|
+
end
|
59
|
+
|
60
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
61
|
+
:primary_key_fields => ['offset']
|
62
|
+
pass
|
63
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
64
|
+
:primary_key_fields => ['offset']
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should pass primary key through GroupBy followed by Each' do
|
69
|
+
test_assembly do
|
70
|
+
group_by 'offset'
|
71
|
+
check_scope :values_fields => ['offset', 'line'],
|
72
|
+
:grouping_fields => ['offset'],
|
73
|
+
:primary_key_fields => ['offset']
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should pass primary key through GroupBy followed by Every' do
|
78
|
+
test_assembly do
|
79
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
80
|
+
group_by 'offset', 'x' do
|
81
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
82
|
+
:grouping_fields => ['offset', 'x'],
|
83
|
+
:primary_key_fields => ['offset'],
|
84
|
+
:grouping_primary_key_fields => ['offset', 'x']
|
85
|
+
count 'line'
|
86
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
87
|
+
:grouping_fields => ['offset', 'x', 'line'],
|
88
|
+
:primary_key_fields => ['offset'],
|
89
|
+
:grouping_primary_key_fields => ['offset', 'x']
|
90
|
+
count 'y'
|
91
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
92
|
+
:grouping_fields => ['offset', 'x', 'line', 'y'],
|
93
|
+
:primary_key_fields => ['offset', 'x'], # FIXME: why has the pk changed?
|
94
|
+
:grouping_primary_key_fields => ['offset', 'x']
|
95
|
+
end
|
96
|
+
check_scope :values_fields => ['offset', 'x', 'line', 'y'],
|
97
|
+
:primary_key_fields => ['offset', 'x']
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'should not clear primary key when grouping on other fields' do
|
102
|
+
test_assembly do
|
103
|
+
group_by 'line'
|
104
|
+
check_scope :values_fields => ['offset', 'line'],
|
105
|
+
:grouping_fields => ['line'],
|
106
|
+
:primary_key_fields => ['offset'],
|
107
|
+
:grouping_primary_key_fields => ['line']
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'should pass primary key through CoGroup' do
|
112
|
+
test_join_assembly do
|
113
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
114
|
+
:grouping_fields => ['x'],
|
115
|
+
:primary_key_fields => ['offset'],
|
116
|
+
:grouping_primary_key_fields => ['x']
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
data/spec/scope_spec.rb
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Cascading::Scope do
|
4
|
+
it 'should match Cascading fields names from source tap scheme' do
|
5
|
+
test_assembly do
|
6
|
+
# Pass that uses our scope instead of all_fields
|
7
|
+
operation = Java::CascadingOperation::Identity.new
|
8
|
+
make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
|
9
|
+
|
10
|
+
check_scope :values_fields => ['offset', 'line']
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should match Cascading fields names after CoGroup' do
|
15
|
+
test_join_assembly do
|
16
|
+
# Pass that uses our scope instead of all_fields
|
17
|
+
operation = Java::CascadingOperation::Identity.new
|
18
|
+
make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
|
19
|
+
|
20
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should match Cascading fields names after Every' do
|
25
|
+
test_join_assembly do
|
26
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
27
|
+
|
28
|
+
# Pass that uses our grouping fields instead of all_fields
|
29
|
+
operation = Java::CascadingOperation::Identity.new
|
30
|
+
make_each(# FIXME: names of grouping fields are not what we'd expect!
|
31
|
+
Java::CascadingPipe::Each, tail_pipe, fields([0, 'x_sum']), operation)
|
32
|
+
|
33
|
+
check_scope :values_fields => [0, 'x_sum']
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should pick up names from source tap scheme' do
|
38
|
+
test_assembly do
|
39
|
+
pass
|
40
|
+
|
41
|
+
check_scope :values_fields => ['offset', 'line']
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should propagate names through Each' do
|
46
|
+
test_assembly do
|
47
|
+
check_scope :values_fields => ['offset', 'line']
|
48
|
+
assert_size_equals 2
|
49
|
+
|
50
|
+
split 'line', ['x', 'y'], :pattern => /,/
|
51
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y']
|
52
|
+
assert_size_equals 4
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should allow field filtration at Each' do
|
57
|
+
test_assembly do
|
58
|
+
check_scope :values_fields => ['offset', 'line']
|
59
|
+
assert_size_equals 2
|
60
|
+
|
61
|
+
split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
|
62
|
+
check_scope :values_fields => ['x', 'y']
|
63
|
+
assert_size_equals 2
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'should propagate names through CoGroup' do
|
68
|
+
test_join_assembly do
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'should pass grouping fields to Every' do
|
73
|
+
test_join_assembly do
|
74
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
75
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
76
|
+
:grouping_fields => ['x', 'x_sum']
|
77
|
+
assert_group_size_equals 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should pass grouping fields through chained Every' do
|
82
|
+
test_join_assembly do
|
83
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
84
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
85
|
+
:grouping_fields => ['x', 'x_sum']
|
86
|
+
assert_group_size_equals 1
|
87
|
+
|
88
|
+
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
89
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
90
|
+
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
91
|
+
assert_group_size_equals 1
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'should propagate names through Every' do
|
96
|
+
test_join_assembly do
|
97
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
98
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
99
|
+
:grouping_fields => ['x', 'x_sum']
|
100
|
+
assert_group_size_equals 1
|
101
|
+
|
102
|
+
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
103
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
104
|
+
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
105
|
+
assert_group_size_equals 1
|
106
|
+
|
107
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
108
|
+
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
109
|
+
assert_size_equals 3
|
110
|
+
|
111
|
+
# No rename service provided unless you use the block form of join!
|
112
|
+
check_scope :values_fields => [0, 'x_sum', 'y_sum']
|
113
|
+
|
114
|
+
# Mimic rename service
|
115
|
+
bind_names ['x', 'x_sum', 'y_sum']
|
116
|
+
check_scope :values_fields => ['x', 'x_sum', 'y_sum']
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
|
121
|
+
test_join_assembly do
|
122
|
+
assert_size_equals 10
|
123
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'should fail to pass grouping fields to Every immediately following Each' do
|
128
|
+
lambda do # Composition fails
|
129
|
+
test_join_assembly do
|
130
|
+
pass
|
131
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
132
|
+
begin
|
133
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
134
|
+
rescue CascadingException => e
|
135
|
+
raise e.cause(3)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should propagate values fields and field names into branch' do
|
142
|
+
test_join_assembly(:branches => ['data_tuple']) do
|
143
|
+
branch 'data_tuple' do
|
144
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
145
|
+
:grouping_fields => ['x']
|
146
|
+
assert_size_equals 10
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'should fail to propagate grouping fields to branch' do
|
152
|
+
lambda do # Execution fails
|
153
|
+
begin
|
154
|
+
test_join_assembly(:branches => ['attempt_group']) do
|
155
|
+
branch 'attempt_group' do
|
156
|
+
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
157
|
+
:grouping_fields => ['x']
|
158
|
+
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
159
|
+
end
|
160
|
+
end
|
161
|
+
rescue CascadingException => e
|
162
|
+
raise e.cause(4)
|
163
|
+
end
|
164
|
+
end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'should propagate names through GroupBy' do
|
168
|
+
test_assembly do
|
169
|
+
group_by 'line'
|
170
|
+
check_scope :values_fields => ['offset', 'line'],
|
171
|
+
:grouping_fields => ['line']
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
data/spec/spec.opts
ADDED