cascading.jruby 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/HACKING.md +15 -0
  2. data/History.txt +0 -0
  3. data/LICENSE.txt +165 -0
  4. data/README.md +7 -0
  5. data/Rakefile +45 -0
  6. data/bin/make_job +81 -0
  7. data/lib/cascading/assembly.rb +726 -0
  8. data/lib/cascading/base.rb +63 -0
  9. data/lib/cascading/cascade.rb +63 -0
  10. data/lib/cascading/cascading.rb +134 -0
  11. data/lib/cascading/cascading_exception.rb +30 -0
  12. data/lib/cascading/expr_stub.rb +33 -0
  13. data/lib/cascading/ext/array.rb +15 -0
  14. data/lib/cascading/flow.rb +168 -0
  15. data/lib/cascading/operations.rb +204 -0
  16. data/lib/cascading/scope.rb +160 -0
  17. data/lib/cascading.rb +63 -0
  18. data/samples/branch.rb +31 -0
  19. data/samples/cascading.rb +41 -0
  20. data/samples/copy.rb +18 -0
  21. data/samples/data/data2.txt +88799 -0
  22. data/samples/data/data_join1.txt +3 -0
  23. data/samples/data/data_join2.txt +3 -0
  24. data/samples/data/data_join3.txt +3 -0
  25. data/samples/join.rb +32 -0
  26. data/samples/logwordcount.rb +22 -0
  27. data/samples/project.rb +24 -0
  28. data/samples/rename.rb +21 -0
  29. data/samples/scorenames.rb +20 -0
  30. data/samples/splitter.rb +20 -0
  31. data/samples/union.rb +35 -0
  32. data/spec/cascading_spec.rb +100 -0
  33. data/spec/expr_spec.rb +10 -0
  34. data/spec/primary_key_spec.rb +119 -0
  35. data/spec/resource/join_input.txt +3 -0
  36. data/spec/resource/test_input.txt +4 -0
  37. data/spec/scope_spec.rb +174 -0
  38. data/spec/spec.opts +6 -0
  39. data/spec/spec_helper.rb +5 -0
  40. data/spec/spec_util.rb +188 -0
  41. data/src/cascading/jruby/Main.java +38 -0
  42. data/src/cascading/jruby/runner.rb +6 -0
  43. data/tags +238 -0
  44. data/tasks/ann.rake +80 -0
  45. data/tasks/ant.rake +11 -0
  46. data/tasks/bones.rake +20 -0
  47. data/tasks/gem.rake +206 -0
  48. data/tasks/git.rake +40 -0
  49. data/tasks/notes.rake +27 -0
  50. data/tasks/post_load.rake +34 -0
  51. data/tasks/rdoc.rake +50 -0
  52. data/tasks/rubyforge.rake +55 -0
  53. data/tasks/samples.rake +13 -0
  54. data/tasks/setup.rb +300 -0
  55. data/tasks/spec.rake +59 -0
  56. data/tasks/svn.rake +47 -0
  57. data/tasks/test.rake +42 -0
  58. data/test/data/data1.txt +14 -0
  59. data/test/data/data2.txt +14 -0
  60. data/test/test_assembly.rb +321 -0
  61. data/test/test_cascading.rb +49 -0
  62. data/test/test_flow.rb +15 -0
  63. metadata +137 -0
@@ -0,0 +1,3 @@
1
+ 1 Grégoire
2
+ 2 Mathias
3
+ 3 Stéphane
@@ -0,0 +1,3 @@
1
+ 1 33
2
+ 2 30
3
+ 3 25
@@ -0,0 +1,3 @@
1
+ 1 Cannes
2
+ 2 Boston
3
+ 3 Paris
data/samples/join.rb ADDED
@@ -0,0 +1,32 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'join' do
8
+ flow 'join' do
9
+ source 'input1', tap('samples/data/data_join1.txt')
10
+ source 'input2', tap('samples/data/data_join2.txt')
11
+ source 'input3', tap('samples/data/data_join3.txt')
12
+
13
+ assembly 'input1' do
14
+ split 'line', ['id', 'name']
15
+ end
16
+
17
+ assembly 'input2' do
18
+ split 'line', ['id', 'age']
19
+ end
20
+
21
+ assembly 'input3' do
22
+ split 'line', ['id', 'city']
23
+ end
24
+
25
+ assembly 'join' do
26
+ join 'input1', 'input2', 'input3', :on => 'id'
27
+ project 'id', 'name', 'age', 'city'
28
+ end
29
+
30
+ sink 'join', tap('output/join', :sink_mode => :replace)
31
+ end
32
+ end.complete(sample_properties)
@@ -0,0 +1,22 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'logwordcount' do
8
+ flow 'logwordcount' do
9
+ source 'input', tap('http://www.gutenberg.org/files/20417/20417-8.txt')
10
+
11
+ assembly 'input' do
12
+ # TODO: create a helper for RegexSplitGenerator
13
+ each 'line', :function => regex_split_generator('word', :pattern => /[.,]*\s+/)
14
+ group_by 'word' do
15
+ count
16
+ end
17
+ group_by 'count', :reverse => true
18
+ end
19
+
20
+ sink 'input', tap('output/logwordcount', :sink_mode => :replace)
21
+ end
22
+ end.complete(sample_properties)
@@ -0,0 +1,24 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ # History: "project" (verb) used to be known as "restrict"
5
+
6
+ require 'cascading'
7
+ require 'samples/cascading'
8
+
9
+ cascade 'project' do
10
+ flow 'project' do
11
+ source 'input', tap('samples/data/data2.txt')
12
+
13
+ assembly 'input' do
14
+ split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
15
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
16
+ project 'name', 'score1', 'score2'
17
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(3)
18
+ project 'name', 'score2'
19
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(2)
20
+ end
21
+
22
+ sink 'input', tap('output/project', :sink_mode => :replace)
23
+ end
24
+ end.complete(sample_properties)
data/samples/rename.rb ADDED
@@ -0,0 +1,21 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'rename' do
8
+ flow 'rename' do
9
+ source 'input', tap('samples/data/data2.txt')
10
+
11
+ assembly 'input' do
12
+ split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
13
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
14
+ rename 'name' => 'new_name', 'score1' => 'new_score1', 'score2' => 'new_score2'
15
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
16
+ puts "Final field names: #{scope.values_fields.to_a.inspect}"
17
+ end
18
+
19
+ sink 'input', tap('output/rename', :sink_mode => :replace)
20
+ end
21
+ end.complete(sample_properties)
@@ -0,0 +1,20 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'scorenames' do
8
+ flow 'scorenames' do
9
+ # You don't have to curl and cache inputs: tap can fetch via HTTP
10
+ source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
11
+
12
+ assembly 'input' do
13
+ split 'line', ['name', 'val1', 'val2', 'id']
14
+ insert 'val3' => expr('val2:double < 40.0 ? val1:double : val2:double')
15
+ project 'name', 'val3', 'id'
16
+ end
17
+
18
+ sink 'input', tap('output/scorenames', :sink_mode => :replace)
19
+ end
20
+ end.complete(sample_properties)
@@ -0,0 +1,20 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'splitter' do
8
+ flow 'splitter' do
9
+ source 'input', tap('samples/data/data2.txt')
10
+
11
+ assembly 'input' do
12
+ split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
13
+ group_by 'score1' do
14
+ count
15
+ end
16
+ end
17
+
18
+ sink 'input', tap('output/splitter', :sink_mode => :replace)
19
+ end
20
+ end.complete(sample_properties)
data/samples/union.rb ADDED
@@ -0,0 +1,35 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'union' do
8
+ flow 'union' do
9
+ source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
10
+
11
+ assembly 'input' do
12
+ split 'line', ['name', 'score1', 'score2', 'id']
13
+
14
+ branch 'branch1' do
15
+ group_by 'score1', 'name' do
16
+ count
17
+ end
18
+ rename 'score1' => 'score'
19
+ end
20
+
21
+ branch 'branch2' do
22
+ group_by 'score2', 'name' do
23
+ count
24
+ end
25
+ rename 'score2' => 'score'
26
+ end
27
+ end
28
+
29
+ assembly 'union' do
30
+ union 'branch1', 'branch2'
31
+ end
32
+
33
+ sink 'union', tap('output/union', :sink_mode => :replace)
34
+ end
35
+ end.complete(sample_properties)
@@ -0,0 +1,100 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Cascading do
4
+ it 'should dedup field names from multiple sources' do
5
+ left_names = ['a', 'b', 'c', 'd', 'e']
6
+ mid_names = ['a', 'f']
7
+ right_names = ['a', 'g']
8
+
9
+ field_names = dedup_field_names(left_names, mid_names, right_names)
10
+ field_names.should == [
11
+ 'a', 'b', 'c', 'd', 'e',
12
+ 'a_', 'f',
13
+ 'a__', 'g'
14
+ ]
15
+ end
16
+
17
+ it 'should fail to resolve duplicate fields' do
18
+ incoming = fields(['line'])
19
+ declared = fields(['line'])
20
+ outgoing = all_fields
21
+ lambda do
22
+ begin
23
+ resolved = Java::CascadingTuple::Fields.resolve(outgoing, [incoming, declared].to_java(Java::CascadingTuple::Fields))
24
+ rescue NativeException => e
25
+ raise e.cause
26
+ end
27
+ end.should raise_error Java::CascadingTuple::TupleException, 'field name already exists: line'
28
+ end
29
+
30
+ it 'should find branches to sink' do
31
+ cascade 'branched_pass' do
32
+ flow 'branched_pass' do
33
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
34
+ assembly 'input' do
35
+ branch 'branched_input' do
36
+ project 'line'
37
+ end
38
+ end
39
+ sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :kind => :lfs, :sink_mode => :replace)
40
+ end
41
+ end.complete
42
+
43
+ ilc = `wc -l spec/resource/test_input.txt`.split(/\s+/).first
44
+ olc = `wc -l #{OUTPUT_DIR}/branched_pass_out/part-00000`.split(/\s+/).first
45
+ ilc.should == olc
46
+ end
47
+
48
+ it 'should create an isolated namespace per cascade' do
49
+ cascade 'double' do
50
+ flow 'double' do
51
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
52
+ assembly 'input' do # Dup name
53
+ insert 'doubled' => expr('line:string + "," + line:string')
54
+ project 'doubled'
55
+ end
56
+ sink 'input', tap("#{OUTPUT_DIR}/double_out", :kind => :lfs, :sink_mode => :replace)
57
+ end
58
+ end
59
+
60
+ cascade 'pass' do
61
+ flow 'pass' do
62
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
63
+ assembly 'input' do # Dup name
64
+ project 'line'
65
+ end
66
+ sink 'input', tap("#{OUTPUT_DIR}/pass_out", :kind => :lfs, :sink_mode => :replace)
67
+ end
68
+ end
69
+
70
+ Cascade.get('double').complete
71
+ Cascade.get('pass').complete
72
+ diff = `diff #{OUTPUT_DIR}/double_out/part-00000 #{OUTPUT_DIR}/pass_out/part-00000`
73
+ diff.should_not be_empty
74
+ end
75
+
76
+ it 'should support joins in branches' do
77
+ cascade 'branch_join' do
78
+ flow 'branch_join' do
79
+ source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
80
+ source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
81
+
82
+ assembly 'left' do
83
+ split 'line', ['x', 'y', 'z'], :pattern => /,/
84
+ project 'x', 'y', 'z'
85
+ end
86
+
87
+ assembly 'right' do
88
+ split 'line', ['x', 'y', 'z'], :pattern => /,/
89
+ project 'x', 'y', 'z'
90
+
91
+ branch 'branch_join' do
92
+ join 'left', 'right', :on => 'x'
93
+ end
94
+ end
95
+
96
+ sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :kind => :lfs, :sink_mode => :replace)
97
+ end
98
+ end.complete
99
+ end
100
+ end
data/spec/expr_spec.rb ADDED
@@ -0,0 +1,10 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Object do
4
+ it 'should allow expr syntax' do
5
+ test_assembly do
6
+ insert 'foo' => 1, 'bar' => expr('offset:int')
7
+ check_scope :values_fields => ['offset', 'line', 'bar', 'foo']
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,119 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Cascading::Scope do
4
+ it 'should allow override of primary key' do
5
+ test_assembly do
6
+ split 'line', ['x', 'y'], :pattern => /,/
7
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
8
+ :primary_key_fields => ['offset']
9
+ end
10
+ end
11
+
12
+ it 'should pass primary key through Each' do
13
+ test_assembly do
14
+ split 'line', ['x', 'y'], :pattern => /,/
15
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
16
+ :primary_key_fields => ['offset']
17
+ pass
18
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
19
+ :primary_key_fields => ['offset']
20
+ end
21
+ end
22
+
23
+ it 'should support renaming primary keys' do
24
+ test_assembly do
25
+ split 'line', ['x', 'y'], :pattern => /,/
26
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
27
+ :primary_key_fields => ['offset']
28
+ rename 'offset' => 'primary_key', 'line' => 'data'
29
+ check_scope :values_fields => ['primary_key', 'data', 'x', 'y'],
30
+ :primary_key_fields => ['primary_key']
31
+ end
32
+ end
33
+
34
+ it 'should clear primary keys when a subset of their fields are discarded' do
35
+ test_assembly do
36
+ primary 'offset', 'line' # Make primary keys interesting
37
+ split 'line', ['x', 'y'], :pattern => /,/
38
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
39
+ :primary_key_fields => ['offset', 'line']
40
+ project 'line', 'x', 'y'
41
+ check_scope :values_fields => ['line', 'x', 'y'],
42
+ :primary_key_fields => nil
43
+ end
44
+ end
45
+
46
+ it 'should pass primary key through branch' do
47
+ test_assembly do
48
+ split 'line', ['x', 'y'], :pattern => /,/
49
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
50
+ :primary_key_fields => ['offset']
51
+
52
+ branch 'check_keys' do
53
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
54
+ :primary_key_fields => ['offset']
55
+ pass
56
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
57
+ :primary_key_fields => ['offset']
58
+ end
59
+
60
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
61
+ :primary_key_fields => ['offset']
62
+ pass
63
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
64
+ :primary_key_fields => ['offset']
65
+ end
66
+ end
67
+
68
+ it 'should pass primary key through GroupBy followed by Each' do
69
+ test_assembly do
70
+ group_by 'offset'
71
+ check_scope :values_fields => ['offset', 'line'],
72
+ :grouping_fields => ['offset'],
73
+ :primary_key_fields => ['offset']
74
+ end
75
+ end
76
+
77
+ it 'should pass primary key through GroupBy followed by Every' do
78
+ test_assembly do
79
+ split 'line', ['x', 'y'], :pattern => /,/
80
+ group_by 'offset', 'x' do
81
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
82
+ :grouping_fields => ['offset', 'x'],
83
+ :primary_key_fields => ['offset'],
84
+ :grouping_primary_key_fields => ['offset', 'x']
85
+ count 'line'
86
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
87
+ :grouping_fields => ['offset', 'x', 'line'],
88
+ :primary_key_fields => ['offset'],
89
+ :grouping_primary_key_fields => ['offset', 'x']
90
+ count 'y'
91
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
92
+ :grouping_fields => ['offset', 'x', 'line', 'y'],
93
+ :primary_key_fields => ['offset', 'x'], # FIXME: why has the pk changed?
94
+ :grouping_primary_key_fields => ['offset', 'x']
95
+ end
96
+ check_scope :values_fields => ['offset', 'x', 'line', 'y'],
97
+ :primary_key_fields => ['offset', 'x']
98
+ end
99
+ end
100
+
101
+ it 'should not clear primary key when grouping on other fields' do
102
+ test_assembly do
103
+ group_by 'line'
104
+ check_scope :values_fields => ['offset', 'line'],
105
+ :grouping_fields => ['line'],
106
+ :primary_key_fields => ['offset'],
107
+ :grouping_primary_key_fields => ['line']
108
+ end
109
+ end
110
+
111
+ it 'should pass primary key through CoGroup' do
112
+ test_join_assembly do
113
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
114
+ :grouping_fields => ['x'],
115
+ :primary_key_fields => ['offset'],
116
+ :grouping_primary_key_fields => ['x']
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,3 @@
1
+ 1,4,world
2
+ 2,3,bar
3
+ 3,8,baz
@@ -0,0 +1,4 @@
1
+ hello,world
2
+ foo,bar
3
+ foo,bar
4
+ biz,baz
@@ -0,0 +1,174 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Cascading::Scope do
4
+ it 'should match Cascading fields names from source tap scheme' do
5
+ test_assembly do
6
+ # Pass that uses our scope instead of all_fields
7
+ operation = Java::CascadingOperation::Identity.new
8
+ make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
9
+
10
+ check_scope :values_fields => ['offset', 'line']
11
+ end
12
+ end
13
+
14
+ it 'should match Cascading fields names after CoGroup' do
15
+ test_join_assembly do
16
+ # Pass that uses our scope instead of all_fields
17
+ operation = Java::CascadingOperation::Identity.new
18
+ make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
19
+
20
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
21
+ end
22
+ end
23
+
24
+ it 'should match Cascading fields names after Every' do
25
+ test_join_assembly do
26
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
27
+
28
+ # Pass that uses our grouping fields instead of all_fields
29
+ operation = Java::CascadingOperation::Identity.new
30
+ make_each(# FIXME: names of grouping fields are not what we'd expect!
31
+ Java::CascadingPipe::Each, tail_pipe, fields([0, 'x_sum']), operation)
32
+
33
+ check_scope :values_fields => [0, 'x_sum']
34
+ end
35
+ end
36
+
37
+ it 'should pick up names from source tap scheme' do
38
+ test_assembly do
39
+ pass
40
+
41
+ check_scope :values_fields => ['offset', 'line']
42
+ end
43
+ end
44
+
45
+ it 'should propagate names through Each' do
46
+ test_assembly do
47
+ check_scope :values_fields => ['offset', 'line']
48
+ assert_size_equals 2
49
+
50
+ split 'line', ['x', 'y'], :pattern => /,/
51
+ check_scope :values_fields => ['offset', 'line', 'x', 'y']
52
+ assert_size_equals 4
53
+ end
54
+ end
55
+
56
+ it 'should allow field filtration at Each' do
57
+ test_assembly do
58
+ check_scope :values_fields => ['offset', 'line']
59
+ assert_size_equals 2
60
+
61
+ split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
62
+ check_scope :values_fields => ['x', 'y']
63
+ assert_size_equals 2
64
+ end
65
+ end
66
+
67
+ it 'should propagate names through CoGroup' do
68
+ test_join_assembly do
69
+ end
70
+ end
71
+
72
+ it 'should pass grouping fields to Every' do
73
+ test_join_assembly do
74
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
75
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
76
+ :grouping_fields => ['x', 'x_sum']
77
+ assert_group_size_equals 1
78
+ end
79
+ end
80
+
81
+ it 'should pass grouping fields through chained Every' do
82
+ test_join_assembly do
83
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
84
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
85
+ :grouping_fields => ['x', 'x_sum']
86
+ assert_group_size_equals 1
87
+
88
+ sum :mapping => {'y' => 'y_sum'}, :type => :int
89
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
90
+ :grouping_fields => ['x', 'x_sum', 'y_sum']
91
+ assert_group_size_equals 1
92
+ end
93
+ end
94
+
95
+ it 'should propagate names through Every' do
96
+ test_join_assembly do
97
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
98
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
99
+ :grouping_fields => ['x', 'x_sum']
100
+ assert_group_size_equals 1
101
+
102
+ sum :mapping => {'y' => 'y_sum'}, :type => :int
103
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
104
+ :grouping_fields => ['x', 'x_sum', 'y_sum']
105
+ assert_group_size_equals 1
106
+
107
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
108
+ :grouping_fields => ['x', 'x_sum', 'y_sum']
109
+ assert_size_equals 3
110
+
111
+ # No rename service provided unless you use the block form of join!
112
+ check_scope :values_fields => [0, 'x_sum', 'y_sum']
113
+
114
+ # Mimic rename service
115
+ bind_names ['x', 'x_sum', 'y_sum']
116
+ check_scope :values_fields => ['x', 'x_sum', 'y_sum']
117
+ end
118
+ end
119
+
120
+ it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
121
+ test_join_assembly do
122
+ assert_size_equals 10
123
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
124
+ end
125
+ end
126
+
127
+ it 'should fail to pass grouping fields to Every immediately following Each' do
128
+ lambda do # Composition fails
129
+ test_join_assembly do
130
+ pass
131
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
132
+ begin
133
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
134
+ rescue CascadingException => e
135
+ raise e.cause(3)
136
+ end
137
+ end
138
+ end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
139
+ end
140
+
141
+ it 'should propagate values fields and field names into branch' do
142
+ test_join_assembly(:branches => ['data_tuple']) do
143
+ branch 'data_tuple' do
144
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
145
+ :grouping_fields => ['x']
146
+ assert_size_equals 10
147
+ end
148
+ end
149
+ end
150
+
151
+ it 'should fail to propagate grouping fields to branch' do
152
+ lambda do # Execution fails
153
+ begin
154
+ test_join_assembly(:branches => ['attempt_group']) do
155
+ branch 'attempt_group' do
156
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
157
+ :grouping_fields => ['x']
158
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
159
+ end
160
+ end
161
+ rescue CascadingException => e
162
+ raise e.cause(4)
163
+ end
164
+ end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
165
+ end
166
+
167
+ it 'should propagate names through GroupBy' do
168
+ test_assembly do
169
+ group_by 'line'
170
+ check_scope :values_fields => ['offset', 'line'],
171
+ :grouping_fields => ['line']
172
+ end
173
+ end
174
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,6 @@
1
+ --colour
2
+ --format
3
+ progress
4
+ --loadby
5
+ mtime
6
+ --reverse
@@ -0,0 +1,5 @@
1
+ require 'spec'
2
+ require 'rubygems'
3
+ require 'cascading'
4
+
5
+ require File.expand_path(File.dirname(__FILE__) + '/spec_util')