cascading.jruby 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/HACKING.md +15 -0
  2. data/History.txt +0 -0
  3. data/LICENSE.txt +165 -0
  4. data/README.md +7 -0
  5. data/Rakefile +45 -0
  6. data/bin/make_job +81 -0
  7. data/lib/cascading/assembly.rb +726 -0
  8. data/lib/cascading/base.rb +63 -0
  9. data/lib/cascading/cascade.rb +63 -0
  10. data/lib/cascading/cascading.rb +134 -0
  11. data/lib/cascading/cascading_exception.rb +30 -0
  12. data/lib/cascading/expr_stub.rb +33 -0
  13. data/lib/cascading/ext/array.rb +15 -0
  14. data/lib/cascading/flow.rb +168 -0
  15. data/lib/cascading/operations.rb +204 -0
  16. data/lib/cascading/scope.rb +160 -0
  17. data/lib/cascading.rb +63 -0
  18. data/samples/branch.rb +31 -0
  19. data/samples/cascading.rb +41 -0
  20. data/samples/copy.rb +18 -0
  21. data/samples/data/data2.txt +88799 -0
  22. data/samples/data/data_join1.txt +3 -0
  23. data/samples/data/data_join2.txt +3 -0
  24. data/samples/data/data_join3.txt +3 -0
  25. data/samples/join.rb +32 -0
  26. data/samples/logwordcount.rb +22 -0
  27. data/samples/project.rb +24 -0
  28. data/samples/rename.rb +21 -0
  29. data/samples/scorenames.rb +20 -0
  30. data/samples/splitter.rb +20 -0
  31. data/samples/union.rb +35 -0
  32. data/spec/cascading_spec.rb +100 -0
  33. data/spec/expr_spec.rb +10 -0
  34. data/spec/primary_key_spec.rb +119 -0
  35. data/spec/resource/join_input.txt +3 -0
  36. data/spec/resource/test_input.txt +4 -0
  37. data/spec/scope_spec.rb +174 -0
  38. data/spec/spec.opts +6 -0
  39. data/spec/spec_helper.rb +5 -0
  40. data/spec/spec_util.rb +188 -0
  41. data/src/cascading/jruby/Main.java +38 -0
  42. data/src/cascading/jruby/runner.rb +6 -0
  43. data/tags +238 -0
  44. data/tasks/ann.rake +80 -0
  45. data/tasks/ant.rake +11 -0
  46. data/tasks/bones.rake +20 -0
  47. data/tasks/gem.rake +206 -0
  48. data/tasks/git.rake +40 -0
  49. data/tasks/notes.rake +27 -0
  50. data/tasks/post_load.rake +34 -0
  51. data/tasks/rdoc.rake +50 -0
  52. data/tasks/rubyforge.rake +55 -0
  53. data/tasks/samples.rake +13 -0
  54. data/tasks/setup.rb +300 -0
  55. data/tasks/spec.rake +59 -0
  56. data/tasks/svn.rake +47 -0
  57. data/tasks/test.rake +42 -0
  58. data/test/data/data1.txt +14 -0
  59. data/test/data/data2.txt +14 -0
  60. data/test/test_assembly.rb +321 -0
  61. data/test/test_cascading.rb +49 -0
  62. data/test/test_flow.rb +15 -0
  63. metadata +137 -0
@@ -0,0 +1,3 @@
1
+ 1 Grégoire
2
+ 2 Mathias
3
+ 3 Stéphane
@@ -0,0 +1,3 @@
1
+ 1 33
2
+ 2 30
3
+ 3 25
@@ -0,0 +1,3 @@
1
+ 1 Cannes
2
+ 2 Boston
3
+ 3 Paris
data/samples/join.rb ADDED
@@ -0,0 +1,32 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'join' do
8
+ flow 'join' do
9
+ source 'input1', tap('samples/data/data_join1.txt')
10
+ source 'input2', tap('samples/data/data_join2.txt')
11
+ source 'input3', tap('samples/data/data_join3.txt')
12
+
13
+ assembly 'input1' do
14
+ split 'line', ['id', 'name']
15
+ end
16
+
17
+ assembly 'input2' do
18
+ split 'line', ['id', 'age']
19
+ end
20
+
21
+ assembly 'input3' do
22
+ split 'line', ['id', 'city']
23
+ end
24
+
25
+ assembly 'join' do
26
+ join 'input1', 'input2', 'input3', :on => 'id'
27
+ project 'id', 'name', 'age', 'city'
28
+ end
29
+
30
+ sink 'join', tap('output/join', :sink_mode => :replace)
31
+ end
32
+ end.complete(sample_properties)
@@ -0,0 +1,22 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'logwordcount' do
8
+ flow 'logwordcount' do
9
+ source 'input', tap('http://www.gutenberg.org/files/20417/20417-8.txt')
10
+
11
+ assembly 'input' do
12
+ # TODO: create a helper for RegexSplitGenerator
13
+ each 'line', :function => regex_split_generator('word', :pattern => /[.,]*\s+/)
14
+ group_by 'word' do
15
+ count
16
+ end
17
+ group_by 'count', :reverse => true
18
+ end
19
+
20
+ sink 'input', tap('output/logwordcount', :sink_mode => :replace)
21
+ end
22
+ end.complete(sample_properties)
@@ -0,0 +1,24 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ # History: "project" (verb) used to be known as "restrict"
5
+
6
+ require 'cascading'
7
+ require 'samples/cascading'
8
+
9
+ cascade 'project' do
10
+ flow 'project' do
11
+ source 'input', tap('samples/data/data2.txt')
12
+
13
+ assembly 'input' do
14
+ split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
15
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
16
+ project 'name', 'score1', 'score2'
17
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(3)
18
+ project 'name', 'score2'
19
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(2)
20
+ end
21
+
22
+ sink 'input', tap('output/project', :sink_mode => :replace)
23
+ end
24
+ end.complete(sample_properties)
data/samples/rename.rb ADDED
@@ -0,0 +1,21 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'rename' do
8
+ flow 'rename' do
9
+ source 'input', tap('samples/data/data2.txt')
10
+
11
+ assembly 'input' do
12
+ split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
13
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
14
+ rename 'name' => 'new_name', 'score1' => 'new_score1', 'score2' => 'new_score2'
15
+ assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
16
+ puts "Final field names: #{scope.values_fields.to_a.inspect}"
17
+ end
18
+
19
+ sink 'input', tap('output/rename', :sink_mode => :replace)
20
+ end
21
+ end.complete(sample_properties)
@@ -0,0 +1,20 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'scorenames' do
8
+ flow 'scorenames' do
9
+ # You don't have to curl and cache inputs: tap can fetch via HTTP
10
+ source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
11
+
12
+ assembly 'input' do
13
+ split 'line', ['name', 'val1', 'val2', 'id']
14
+ insert 'val3' => expr('val2:double < 40.0 ? val1:double : val2:double')
15
+ project 'name', 'val3', 'id'
16
+ end
17
+
18
+ sink 'input', tap('output/scorenames', :sink_mode => :replace)
19
+ end
20
+ end.complete(sample_properties)
@@ -0,0 +1,20 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'splitter' do
8
+ flow 'splitter' do
9
+ source 'input', tap('samples/data/data2.txt')
10
+
11
+ assembly 'input' do
12
+ split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
13
+ group_by 'score1' do
14
+ count
15
+ end
16
+ end
17
+
18
+ sink 'input', tap('output/splitter', :sink_mode => :replace)
19
+ end
20
+ end.complete(sample_properties)
data/samples/union.rb ADDED
@@ -0,0 +1,35 @@
1
+ #! /usr/bin/env jruby
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'cascading'
5
+ require 'samples/cascading'
6
+
7
+ cascade 'union' do
8
+ flow 'union' do
9
+ source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
10
+
11
+ assembly 'input' do
12
+ split 'line', ['name', 'score1', 'score2', 'id']
13
+
14
+ branch 'branch1' do
15
+ group_by 'score1', 'name' do
16
+ count
17
+ end
18
+ rename 'score1' => 'score'
19
+ end
20
+
21
+ branch 'branch2' do
22
+ group_by 'score2', 'name' do
23
+ count
24
+ end
25
+ rename 'score2' => 'score'
26
+ end
27
+ end
28
+
29
+ assembly 'union' do
30
+ union 'branch1', 'branch2'
31
+ end
32
+
33
+ sink 'union', tap('output/union', :sink_mode => :replace)
34
+ end
35
+ end.complete(sample_properties)
@@ -0,0 +1,100 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Cascading do
4
+ it 'should dedup field names from multiple sources' do
5
+ left_names = ['a', 'b', 'c', 'd', 'e']
6
+ mid_names = ['a', 'f']
7
+ right_names = ['a', 'g']
8
+
9
+ field_names = dedup_field_names(left_names, mid_names, right_names)
10
+ field_names.should == [
11
+ 'a', 'b', 'c', 'd', 'e',
12
+ 'a_', 'f',
13
+ 'a__', 'g'
14
+ ]
15
+ end
16
+
17
+ it 'should fail to resolve duplicate fields' do
18
+ incoming = fields(['line'])
19
+ declared = fields(['line'])
20
+ outgoing = all_fields
21
+ lambda do
22
+ begin
23
+ resolved = Java::CascadingTuple::Fields.resolve(outgoing, [incoming, declared].to_java(Java::CascadingTuple::Fields))
24
+ rescue NativeException => e
25
+ raise e.cause
26
+ end
27
+ end.should raise_error Java::CascadingTuple::TupleException, 'field name already exists: line'
28
+ end
29
+
30
+ it 'should find branches to sink' do
31
+ cascade 'branched_pass' do
32
+ flow 'branched_pass' do
33
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
34
+ assembly 'input' do
35
+ branch 'branched_input' do
36
+ project 'line'
37
+ end
38
+ end
39
+ sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :kind => :lfs, :sink_mode => :replace)
40
+ end
41
+ end.complete
42
+
43
+ ilc = `wc -l spec/resource/test_input.txt`.split(/\s+/).first
44
+ olc = `wc -l #{OUTPUT_DIR}/branched_pass_out/part-00000`.split(/\s+/).first
45
+ ilc.should == olc
46
+ end
47
+
48
+ it 'should create an isolated namespace per cascade' do
49
+ cascade 'double' do
50
+ flow 'double' do
51
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
52
+ assembly 'input' do # Dup name
53
+ insert 'doubled' => expr('line:string + "," + line:string')
54
+ project 'doubled'
55
+ end
56
+ sink 'input', tap("#{OUTPUT_DIR}/double_out", :kind => :lfs, :sink_mode => :replace)
57
+ end
58
+ end
59
+
60
+ cascade 'pass' do
61
+ flow 'pass' do
62
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
63
+ assembly 'input' do # Dup name
64
+ project 'line'
65
+ end
66
+ sink 'input', tap("#{OUTPUT_DIR}/pass_out", :kind => :lfs, :sink_mode => :replace)
67
+ end
68
+ end
69
+
70
+ Cascade.get('double').complete
71
+ Cascade.get('pass').complete
72
+ diff = `diff #{OUTPUT_DIR}/double_out/part-00000 #{OUTPUT_DIR}/pass_out/part-00000`
73
+ diff.should_not be_empty
74
+ end
75
+
76
+ it 'should support joins in branches' do
77
+ cascade 'branch_join' do
78
+ flow 'branch_join' do
79
+ source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
80
+ source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
81
+
82
+ assembly 'left' do
83
+ split 'line', ['x', 'y', 'z'], :pattern => /,/
84
+ project 'x', 'y', 'z'
85
+ end
86
+
87
+ assembly 'right' do
88
+ split 'line', ['x', 'y', 'z'], :pattern => /,/
89
+ project 'x', 'y', 'z'
90
+
91
+ branch 'branch_join' do
92
+ join 'left', 'right', :on => 'x'
93
+ end
94
+ end
95
+
96
+ sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :kind => :lfs, :sink_mode => :replace)
97
+ end
98
+ end.complete
99
+ end
100
+ end
data/spec/expr_spec.rb ADDED
@@ -0,0 +1,10 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Object do
4
+ it 'should allow expr syntax' do
5
+ test_assembly do
6
+ insert 'foo' => 1, 'bar' => expr('offset:int')
7
+ check_scope :values_fields => ['offset', 'line', 'bar', 'foo']
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,119 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Cascading::Scope do
4
+ it 'should allow override of primary key' do
5
+ test_assembly do
6
+ split 'line', ['x', 'y'], :pattern => /,/
7
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
8
+ :primary_key_fields => ['offset']
9
+ end
10
+ end
11
+
12
+ it 'should pass primary key through Each' do
13
+ test_assembly do
14
+ split 'line', ['x', 'y'], :pattern => /,/
15
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
16
+ :primary_key_fields => ['offset']
17
+ pass
18
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
19
+ :primary_key_fields => ['offset']
20
+ end
21
+ end
22
+
23
+ it 'should support renaming primary keys' do
24
+ test_assembly do
25
+ split 'line', ['x', 'y'], :pattern => /,/
26
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
27
+ :primary_key_fields => ['offset']
28
+ rename 'offset' => 'primary_key', 'line' => 'data'
29
+ check_scope :values_fields => ['primary_key', 'data', 'x', 'y'],
30
+ :primary_key_fields => ['primary_key']
31
+ end
32
+ end
33
+
34
+ it 'should clear primary keys when a subset of their fields are discarded' do
35
+ test_assembly do
36
+ primary 'offset', 'line' # Make primary keys interesting
37
+ split 'line', ['x', 'y'], :pattern => /,/
38
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
39
+ :primary_key_fields => ['offset', 'line']
40
+ project 'line', 'x', 'y'
41
+ check_scope :values_fields => ['line', 'x', 'y'],
42
+ :primary_key_fields => nil
43
+ end
44
+ end
45
+
46
+ it 'should pass primary key through branch' do
47
+ test_assembly do
48
+ split 'line', ['x', 'y'], :pattern => /,/
49
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
50
+ :primary_key_fields => ['offset']
51
+
52
+ branch 'check_keys' do
53
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
54
+ :primary_key_fields => ['offset']
55
+ pass
56
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
57
+ :primary_key_fields => ['offset']
58
+ end
59
+
60
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
61
+ :primary_key_fields => ['offset']
62
+ pass
63
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
64
+ :primary_key_fields => ['offset']
65
+ end
66
+ end
67
+
68
+ it 'should pass primary key through GroupBy followed by Each' do
69
+ test_assembly do
70
+ group_by 'offset'
71
+ check_scope :values_fields => ['offset', 'line'],
72
+ :grouping_fields => ['offset'],
73
+ :primary_key_fields => ['offset']
74
+ end
75
+ end
76
+
77
+ it 'should pass primary key through GroupBy followed by Every' do
78
+ test_assembly do
79
+ split 'line', ['x', 'y'], :pattern => /,/
80
+ group_by 'offset', 'x' do
81
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
82
+ :grouping_fields => ['offset', 'x'],
83
+ :primary_key_fields => ['offset'],
84
+ :grouping_primary_key_fields => ['offset', 'x']
85
+ count 'line'
86
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
87
+ :grouping_fields => ['offset', 'x', 'line'],
88
+ :primary_key_fields => ['offset'],
89
+ :grouping_primary_key_fields => ['offset', 'x']
90
+ count 'y'
91
+ check_scope :values_fields => ['offset', 'line', 'x', 'y'],
92
+ :grouping_fields => ['offset', 'x', 'line', 'y'],
93
+ :primary_key_fields => ['offset', 'x'], # FIXME: why has the pk changed?
94
+ :grouping_primary_key_fields => ['offset', 'x']
95
+ end
96
+ check_scope :values_fields => ['offset', 'x', 'line', 'y'],
97
+ :primary_key_fields => ['offset', 'x']
98
+ end
99
+ end
100
+
101
+ it 'should not clear primary key when grouping on other fields' do
102
+ test_assembly do
103
+ group_by 'line'
104
+ check_scope :values_fields => ['offset', 'line'],
105
+ :grouping_fields => ['line'],
106
+ :primary_key_fields => ['offset'],
107
+ :grouping_primary_key_fields => ['line']
108
+ end
109
+ end
110
+
111
+ it 'should pass primary key through CoGroup' do
112
+ test_join_assembly do
113
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
114
+ :grouping_fields => ['x'],
115
+ :primary_key_fields => ['offset'],
116
+ :grouping_primary_key_fields => ['x']
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,3 @@
1
+ 1,4,world
2
+ 2,3,bar
3
+ 3,8,baz
@@ -0,0 +1,4 @@
1
+ hello,world
2
+ foo,bar
3
+ foo,bar
4
+ biz,baz
@@ -0,0 +1,174 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Cascading::Scope do
4
+ it 'should match Cascading fields names from source tap scheme' do
5
+ test_assembly do
6
+ # Pass that uses our scope instead of all_fields
7
+ operation = Java::CascadingOperation::Identity.new
8
+ make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
9
+
10
+ check_scope :values_fields => ['offset', 'line']
11
+ end
12
+ end
13
+
14
+ it 'should match Cascading fields names after CoGroup' do
15
+ test_join_assembly do
16
+ # Pass that uses our scope instead of all_fields
17
+ operation = Java::CascadingOperation::Identity.new
18
+ make_each(Java::CascadingPipe::Each, tail_pipe, scope.values_fields, operation)
19
+
20
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
21
+ end
22
+ end
23
+
24
+ it 'should match Cascading fields names after Every' do
25
+ test_join_assembly do
26
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
27
+
28
+ # Pass that uses our grouping fields instead of all_fields
29
+ operation = Java::CascadingOperation::Identity.new
30
+ make_each(# FIXME: names of grouping fields are not what we'd expect!
31
+ Java::CascadingPipe::Each, tail_pipe, fields([0, 'x_sum']), operation)
32
+
33
+ check_scope :values_fields => [0, 'x_sum']
34
+ end
35
+ end
36
+
37
+ it 'should pick up names from source tap scheme' do
38
+ test_assembly do
39
+ pass
40
+
41
+ check_scope :values_fields => ['offset', 'line']
42
+ end
43
+ end
44
+
45
+ it 'should propagate names through Each' do
46
+ test_assembly do
47
+ check_scope :values_fields => ['offset', 'line']
48
+ assert_size_equals 2
49
+
50
+ split 'line', ['x', 'y'], :pattern => /,/
51
+ check_scope :values_fields => ['offset', 'line', 'x', 'y']
52
+ assert_size_equals 4
53
+ end
54
+ end
55
+
56
+ it 'should allow field filtration at Each' do
57
+ test_assembly do
58
+ check_scope :values_fields => ['offset', 'line']
59
+ assert_size_equals 2
60
+
61
+ split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
62
+ check_scope :values_fields => ['x', 'y']
63
+ assert_size_equals 2
64
+ end
65
+ end
66
+
67
+ it 'should propagate names through CoGroup' do
68
+ test_join_assembly do
69
+ end
70
+ end
71
+
72
+ it 'should pass grouping fields to Every' do
73
+ test_join_assembly do
74
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
75
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
76
+ :grouping_fields => ['x', 'x_sum']
77
+ assert_group_size_equals 1
78
+ end
79
+ end
80
+
81
+ it 'should pass grouping fields through chained Every' do
82
+ test_join_assembly do
83
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
84
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
85
+ :grouping_fields => ['x', 'x_sum']
86
+ assert_group_size_equals 1
87
+
88
+ sum :mapping => {'y' => 'y_sum'}, :type => :int
89
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
90
+ :grouping_fields => ['x', 'x_sum', 'y_sum']
91
+ assert_group_size_equals 1
92
+ end
93
+ end
94
+
95
+ it 'should propagate names through Every' do
96
+ test_join_assembly do
97
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
98
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
99
+ :grouping_fields => ['x', 'x_sum']
100
+ assert_group_size_equals 1
101
+
102
+ sum :mapping => {'y' => 'y_sum'}, :type => :int
103
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
104
+ :grouping_fields => ['x', 'x_sum', 'y_sum']
105
+ assert_group_size_equals 1
106
+
107
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
108
+ :grouping_fields => ['x', 'x_sum', 'y_sum']
109
+ assert_size_equals 3
110
+
111
+ # No rename service provided unless you use the block form of join!
112
+ check_scope :values_fields => [0, 'x_sum', 'y_sum']
113
+
114
+ # Mimic rename service
115
+ bind_names ['x', 'x_sum', 'y_sum']
116
+ check_scope :values_fields => ['x', 'x_sum', 'y_sum']
117
+ end
118
+ end
119
+
120
+ it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
121
+ test_join_assembly do
122
+ assert_size_equals 10
123
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
124
+ end
125
+ end
126
+
127
+ it 'should fail to pass grouping fields to Every immediately following Each' do
128
+ lambda do # Composition fails
129
+ test_join_assembly do
130
+ pass
131
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
132
+ begin
133
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
134
+ rescue CascadingException => e
135
+ raise e.cause(3)
136
+ end
137
+ end
138
+ end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
139
+ end
140
+
141
+ it 'should propagate values fields and field names into branch' do
142
+ test_join_assembly(:branches => ['data_tuple']) do
143
+ branch 'data_tuple' do
144
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
145
+ :grouping_fields => ['x']
146
+ assert_size_equals 10
147
+ end
148
+ end
149
+ end
150
+
151
+ it 'should fail to propagate grouping fields to branch' do
152
+ lambda do # Execution fails
153
+ begin
154
+ test_join_assembly(:branches => ['attempt_group']) do
155
+ branch 'attempt_group' do
156
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
157
+ :grouping_fields => ['x']
158
+ sum :mapping => {'x' => 'x_sum'}, :type => :int
159
+ end
160
+ end
161
+ rescue CascadingException => e
162
+ raise e.cause(4)
163
+ end
164
+ end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
165
+ end
166
+
167
+ it 'should propagate names through GroupBy' do
168
+ test_assembly do
169
+ group_by 'line'
170
+ check_scope :values_fields => ['offset', 'line'],
171
+ :grouping_fields => ['line']
172
+ end
173
+ end
174
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,6 @@
1
+ --colour
2
+ --format
3
+ progress
4
+ --loadby
5
+ mtime
6
+ --reverse
@@ -0,0 +1,5 @@
1
+ require 'spec'
2
+ require 'rubygems'
3
+ require 'cascading'
4
+
5
+ require File.expand_path(File.dirname(__FILE__) + '/spec_util')