cascading.jruby 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/samples/branch.rb CHANGED
@@ -4,7 +4,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
4
4
 
5
5
  require 'cascading'
6
6
 
7
- cascade 'branch' do
7
+ cascade 'branch', :mode => :local do
8
8
  flow 'branch' do
9
9
  source 'input', tap('samples/data/data2.txt')
10
10
 
@@ -27,4 +27,4 @@ cascade 'branch' do
27
27
  sink 'branch1', tap('output/branch1', :sink_mode => :replace)
28
28
  sink 'branch2', tap('output/branch2', :sink_mode => :replace)
29
29
  end
30
- end.complete(local_properties('build/sample'))
30
+ end.complete
data/samples/copy.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'copy' do
6
+ cascade 'copy', :mode => :local do
7
7
  flow 'copy' do
8
8
  # You don't have to curl and cache inputs: tap can fetch via HTTP
9
9
  #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
@@ -17,4 +17,4 @@ cascade 'copy' do
17
17
 
18
18
  sink 'input', tap('output/copy', :sink_mode => :replace)
19
19
  end
20
- end.complete(local_properties('build/sample'))
20
+ end.complete
data/samples/group_by.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'group_by' do
6
+ cascade 'group_by', :mode => :local do
7
7
  flow 'group_by' do
8
8
  source 'input', tap('samples/data/data_group_by.txt')
9
9
 
@@ -53,9 +53,9 @@ cascade 'group_by' do
53
53
  sink 'empty_group_by', tap('output/empty_group_by', :sink_mode => :replace)
54
54
  sink 'blockless_group_by', tap('output/blockless_group_by', :sink_mode => :replace)
55
55
  sink 'aggregate_by', tap('output/aggregate_by', :sink_mode => :replace)
56
- sink 'empty_aggregate_by', tap('output/empty_aggregate_by', :sink_mode => :replace)
57
- sink 'blockless_aggregate_by', tap('output/blockless_aggregate_by', :sink_mode => :replace)
56
+ #sink 'empty_aggregate_by', tap('output/empty_aggregate_by', :sink_mode => :replace)
57
+ #sink 'blockless_aggregate_by', tap('output/blockless_aggregate_by', :sink_mode => :replace)
58
58
  sink 'unique', tap('output/unique', :sink_mode => :replace)
59
59
  end
60
- #end.draw(ARGV[0], local_properties('build/sample'))
61
- end.complete(local_properties('build/sample'))
60
+ #end.draw(ARGV[0])
61
+ end.complete
data/samples/join.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'join' do
6
+ cascade 'join', :mode => :local do
7
7
  flow 'join' do
8
8
  source 'input1', tap('samples/data/data_join1.txt')
9
9
  source 'input2', tap('samples/data/data_join2.txt')
@@ -28,4 +28,4 @@ cascade 'join' do
28
28
 
29
29
  sink 'join', tap('output/join', :sink_mode => :replace)
30
30
  end
31
- end.complete(local_properties('build/sample'))
31
+ end.complete
@@ -3,15 +3,14 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'logwordcount' do
6
+ cascade 'logwordcount', :mode => :local do
7
7
  flow 'logwordcount' do
8
8
  # This works just as well, but will get you blocked by Project Gutenberg
9
9
  #source 'input', tap('http://www.gutenberg.org/files/20417/20417-8.txt')
10
10
  source 'input', tap('samples/data/gutenberg/the_outline_of_science_vol_1')
11
11
 
12
12
  assembly 'input' do
13
- # TODO: create a helper for RegexSplitGenerator
14
- each 'line', :function => regex_split_generator('word', :pattern => /[.,]*\s+/)
13
+ split_rows 'line', 'word', :pattern => /[.,]*\s+/, :output => 'word'
15
14
  group_by 'word' do
16
15
  count
17
16
  end
@@ -20,4 +19,4 @@ cascade 'logwordcount' do
20
19
 
21
20
  sink 'input', tap('output/logwordcount', :sink_mode => :replace)
22
21
  end
23
- end.complete(local_properties('build/sample'))
22
+ end.complete
data/samples/project.rb CHANGED
@@ -5,7 +5,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
5
5
 
6
6
  require 'cascading'
7
7
 
8
- cascade 'project' do
8
+ cascade 'project', :mode => :local do
9
9
  flow 'project' do
10
10
  source 'input', tap('samples/data/data2.txt')
11
11
 
@@ -20,4 +20,4 @@ cascade 'project' do
20
20
 
21
21
  sink 'input', tap('output/project', :sink_mode => :replace)
22
22
  end
23
- end.complete(local_properties('build/sample'))
23
+ end.complete
data/samples/rename.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'rename' do
6
+ cascade 'rename', :mode => :local do
7
7
  flow 'rename' do
8
8
  source 'input', tap('samples/data/data2.txt')
9
9
 
@@ -17,4 +17,4 @@ cascade 'rename' do
17
17
 
18
18
  sink 'input', tap('output/rename', :sink_mode => :replace)
19
19
  end
20
- end.complete(local_properties('build/sample'))
20
+ end.complete
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'scorenames' do
6
+ cascade 'scorenames', :mode => :local do
7
7
  flow 'scorenames' do
8
8
  # You don't have to curl and cache inputs: tap can fetch via HTTP
9
9
  #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
@@ -17,4 +17,4 @@ cascade 'scorenames' do
17
17
 
18
18
  sink 'input', tap('output/scorenames', :sink_mode => :replace)
19
19
  end
20
- end.complete(local_properties('build/sample'))
20
+ end.complete
data/samples/splitter.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'splitter' do
6
+ cascade 'splitter', :mode => :local do
7
7
  flow 'splitter' do
8
8
  source 'input', tap('samples/data/data2.txt')
9
9
 
@@ -16,4 +16,4 @@ cascade 'splitter' do
16
16
 
17
17
  sink 'input', tap('output/splitter', :sink_mode => :replace)
18
18
  end
19
- end.complete(local_properties('build/sample'))
19
+ end.complete
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'sub_assembly' do
6
+ cascade 'sub_assembly', :mode => :local do
7
7
  flow 'sub_assembly' do
8
8
  source 'input', tap('samples/data/data2.txt')
9
9
 
@@ -27,4 +27,4 @@ cascade 'sub_assembly' do
27
27
 
28
28
  sink 'input', tap('output/sub_assembly', :sink_mode => :replace)
29
29
  end
30
- end.complete(local_properties('build/sample'))
30
+ end.complete
data/samples/union.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'union' do
6
+ cascade 'union', :mode => :local do
7
7
  flow 'union' do
8
8
  # You don't have to curl and cache inputs: tap can fetch via HTTP
9
9
  #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
@@ -33,4 +33,4 @@ cascade 'union' do
33
33
 
34
34
  sink 'union', tap('output/union', :sink_mode => :replace)
35
35
  end
36
- end.complete(local_properties('build/sample'))
36
+ end.complete
@@ -33,56 +33,56 @@ context Cascading do
33
33
  end
34
34
 
35
35
  it 'should find branches to sink' do
36
- cascade 'branched_pass' do
36
+ cascade 'branched_pass', :mode => :local do
37
37
  flow 'branched_pass' do
38
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
38
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
39
39
  assembly 'input' do
40
40
  branch 'branched_input' do
41
41
  project 'line'
42
42
  end
43
43
  end
44
- sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :kind => :lfs, :sink_mode => :replace)
44
+ sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :sink_mode => :replace)
45
45
  end
46
46
  end.complete
47
47
 
48
- ilc = `wc -l spec/resource/test_input.txt`.split(/\s+/).first
49
- olc = `wc -l #{OUTPUT_DIR}/branched_pass_out/part-00000`.split(/\s+/).first
50
- ilc.should == olc
48
+ ilc = `wc -l spec/resource/test_input.txt`.strip.split(/\s+/).first
49
+ olc = `wc -l #{OUTPUT_DIR}/branched_pass_out`.strip.split(/\s+/).first
50
+ olc.should == ilc
51
51
  end
52
52
 
53
53
  it 'should create an isolated namespace per cascade' do
54
- cascade 'double' do
54
+ cascade 'double', :mode => :local do
55
55
  flow 'double' do
56
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
56
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
57
57
  assembly 'input' do # Dup name
58
58
  insert 'doubled' => expr('line:string + "," + line:string')
59
59
  project 'doubled'
60
60
  end
61
- sink 'input', tap("#{OUTPUT_DIR}/double_out", :kind => :lfs, :sink_mode => :replace)
61
+ sink 'input', tap("#{OUTPUT_DIR}/double_out", :sink_mode => :replace)
62
62
  end
63
63
  end
64
64
 
65
- cascade 'pass' do
65
+ cascade 'pass', :mode => :local do
66
66
  flow 'pass' do
67
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
67
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
68
68
  assembly 'input' do # Dup name
69
69
  project 'line'
70
70
  end
71
- sink 'input', tap("#{OUTPUT_DIR}/pass_out", :kind => :lfs, :sink_mode => :replace)
71
+ sink 'input', tap("#{OUTPUT_DIR}/pass_out", :sink_mode => :replace)
72
72
  end
73
73
  end
74
74
 
75
75
  Cascade.get('double').complete
76
76
  Cascade.get('pass').complete
77
- diff = `diff #{OUTPUT_DIR}/double_out/part-00000 #{OUTPUT_DIR}/pass_out/part-00000`
77
+ diff = `diff #{OUTPUT_DIR}/double_out #{OUTPUT_DIR}/pass_out`
78
78
  diff.should_not be_empty
79
79
  end
80
80
 
81
81
  it 'should support joins in branches' do
82
- cascade 'branch_join' do
82
+ cascade 'branch_join', :mode => :local do
83
83
  flow 'branch_join' do
84
- source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
85
- source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
84
+ source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
85
+ source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
86
86
 
87
87
  assembly 'left' do
88
88
  split 'line', ['x', 'y', 'z'], :pattern => /,/
@@ -98,7 +98,7 @@ context Cascading do
98
98
  end
99
99
  end
100
100
 
101
- sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :kind => :lfs, :sink_mode => :replace)
101
+ sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :sink_mode => :replace)
102
102
  end
103
103
  end.complete
104
104
  end
data/spec/spec_util.rb CHANGED
@@ -23,28 +23,28 @@ module Cascading
23
23
  end
24
24
 
25
25
  def test_flow(&block)
26
- cascade = cascade 'test_app' do
26
+ cascade = cascade 'test_app', :mode => :local do
27
27
  flow 'test', &block
28
28
  end
29
- cascade.complete(local_properties(BUILD_DIR))
29
+ cascade.complete
30
30
  end
31
31
 
32
32
  def test_assembly(params = {}, &block)
33
33
  branches = params[:branches] || []
34
34
 
35
35
  test_flow do
36
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
36
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
37
37
 
38
38
  # Default Fields defined by TextLineScheme
39
39
  check_scope :source => 'input', :values_fields => ['offset', 'line']
40
40
 
41
41
  assembly 'input', &block
42
42
 
43
- sink 'input', tap("#{OUTPUT_DIR}/out.txt", :kind => :lfs, :sink_mode => :replace)
43
+ sink 'input', tap("#{OUTPUT_DIR}/out.txt", :sink_mode => :replace)
44
44
 
45
45
  # Branches must be sunk so that they (and their assertions) will be run
46
46
  branches.each do |branch|
47
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :kind => :lfs, :sink_mode => :replace)
47
+ sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
48
48
  end
49
49
  end
50
50
  end
@@ -54,8 +54,8 @@ def test_join_assembly(params = {}, &block)
54
54
  post_join_block = params[:post_join_block]
55
55
 
56
56
  test_flow do
57
- source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
58
- source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
57
+ source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
58
+ source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
59
59
 
60
60
  # Default Fields defined by TextLineScheme
61
61
  check_scope :source => 'left', :values_fields => ['offset', 'line']
@@ -82,11 +82,11 @@ def test_join_assembly(params = {}, &block)
82
82
  instance_eval &post_join_block if post_join_block
83
83
  end
84
84
 
85
- sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :kind => :lfs, :sink_mode => :replace)
85
+ sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :sink_mode => :replace)
86
86
 
87
87
  # Branches must be sunk so that they (and their assertions) will be run
88
88
  branches.each do |branch|
89
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :kind => :lfs, :sink_mode => :replace)
89
+ sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
90
90
  end
91
91
  end
92
92
  end