cascading.jruby 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/samples/branch.rb CHANGED
@@ -4,7 +4,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
4
4
 
5
5
  require 'cascading'
6
6
 
7
- cascade 'branch' do
7
+ cascade 'branch', :mode => :local do
8
8
  flow 'branch' do
9
9
  source 'input', tap('samples/data/data2.txt')
10
10
 
@@ -27,4 +27,4 @@ cascade 'branch' do
27
27
  sink 'branch1', tap('output/branch1', :sink_mode => :replace)
28
28
  sink 'branch2', tap('output/branch2', :sink_mode => :replace)
29
29
  end
30
- end.complete(local_properties('build/sample'))
30
+ end.complete
data/samples/copy.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'copy' do
6
+ cascade 'copy', :mode => :local do
7
7
  flow 'copy' do
8
8
  # You don't have to curl and cache inputs: tap can fetch via HTTP
9
9
  #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
@@ -17,4 +17,4 @@ cascade 'copy' do
17
17
 
18
18
  sink 'input', tap('output/copy', :sink_mode => :replace)
19
19
  end
20
- end.complete(local_properties('build/sample'))
20
+ end.complete
data/samples/group_by.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'group_by' do
6
+ cascade 'group_by', :mode => :local do
7
7
  flow 'group_by' do
8
8
  source 'input', tap('samples/data/data_group_by.txt')
9
9
 
@@ -53,9 +53,9 @@ cascade 'group_by' do
53
53
  sink 'empty_group_by', tap('output/empty_group_by', :sink_mode => :replace)
54
54
  sink 'blockless_group_by', tap('output/blockless_group_by', :sink_mode => :replace)
55
55
  sink 'aggregate_by', tap('output/aggregate_by', :sink_mode => :replace)
56
- sink 'empty_aggregate_by', tap('output/empty_aggregate_by', :sink_mode => :replace)
57
- sink 'blockless_aggregate_by', tap('output/blockless_aggregate_by', :sink_mode => :replace)
56
+ #sink 'empty_aggregate_by', tap('output/empty_aggregate_by', :sink_mode => :replace)
57
+ #sink 'blockless_aggregate_by', tap('output/blockless_aggregate_by', :sink_mode => :replace)
58
58
  sink 'unique', tap('output/unique', :sink_mode => :replace)
59
59
  end
60
- #end.draw(ARGV[0], local_properties('build/sample'))
61
- end.complete(local_properties('build/sample'))
60
+ #end.draw(ARGV[0])
61
+ end.complete
data/samples/join.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'join' do
6
+ cascade 'join', :mode => :local do
7
7
  flow 'join' do
8
8
  source 'input1', tap('samples/data/data_join1.txt')
9
9
  source 'input2', tap('samples/data/data_join2.txt')
@@ -28,4 +28,4 @@ cascade 'join' do
28
28
 
29
29
  sink 'join', tap('output/join', :sink_mode => :replace)
30
30
  end
31
- end.complete(local_properties('build/sample'))
31
+ end.complete
@@ -3,15 +3,14 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'logwordcount' do
6
+ cascade 'logwordcount', :mode => :local do
7
7
  flow 'logwordcount' do
8
8
  # This works just as well, but will get you blocked by Project Gutenberg
9
9
  #source 'input', tap('http://www.gutenberg.org/files/20417/20417-8.txt')
10
10
  source 'input', tap('samples/data/gutenberg/the_outline_of_science_vol_1')
11
11
 
12
12
  assembly 'input' do
13
- # TODO: create a helper for RegexSplitGenerator
14
- each 'line', :function => regex_split_generator('word', :pattern => /[.,]*\s+/)
13
+ split_rows 'line', 'word', :pattern => /[.,]*\s+/, :output => 'word'
15
14
  group_by 'word' do
16
15
  count
17
16
  end
@@ -20,4 +19,4 @@ cascade 'logwordcount' do
20
19
 
21
20
  sink 'input', tap('output/logwordcount', :sink_mode => :replace)
22
21
  end
23
- end.complete(local_properties('build/sample'))
22
+ end.complete
data/samples/project.rb CHANGED
@@ -5,7 +5,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
5
5
 
6
6
  require 'cascading'
7
7
 
8
- cascade 'project' do
8
+ cascade 'project', :mode => :local do
9
9
  flow 'project' do
10
10
  source 'input', tap('samples/data/data2.txt')
11
11
 
@@ -20,4 +20,4 @@ cascade 'project' do
20
20
 
21
21
  sink 'input', tap('output/project', :sink_mode => :replace)
22
22
  end
23
- end.complete(local_properties('build/sample'))
23
+ end.complete
data/samples/rename.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'rename' do
6
+ cascade 'rename', :mode => :local do
7
7
  flow 'rename' do
8
8
  source 'input', tap('samples/data/data2.txt')
9
9
 
@@ -17,4 +17,4 @@ cascade 'rename' do
17
17
 
18
18
  sink 'input', tap('output/rename', :sink_mode => :replace)
19
19
  end
20
- end.complete(local_properties('build/sample'))
20
+ end.complete
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'scorenames' do
6
+ cascade 'scorenames', :mode => :local do
7
7
  flow 'scorenames' do
8
8
  # You don't have to curl and cache inputs: tap can fetch via HTTP
9
9
  #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
@@ -17,4 +17,4 @@ cascade 'scorenames' do
17
17
 
18
18
  sink 'input', tap('output/scorenames', :sink_mode => :replace)
19
19
  end
20
- end.complete(local_properties('build/sample'))
20
+ end.complete
data/samples/splitter.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'splitter' do
6
+ cascade 'splitter', :mode => :local do
7
7
  flow 'splitter' do
8
8
  source 'input', tap('samples/data/data2.txt')
9
9
 
@@ -16,4 +16,4 @@ cascade 'splitter' do
16
16
 
17
17
  sink 'input', tap('output/splitter', :sink_mode => :replace)
18
18
  end
19
- end.complete(local_properties('build/sample'))
19
+ end.complete
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'sub_assembly' do
6
+ cascade 'sub_assembly', :mode => :local do
7
7
  flow 'sub_assembly' do
8
8
  source 'input', tap('samples/data/data2.txt')
9
9
 
@@ -27,4 +27,4 @@ cascade 'sub_assembly' do
27
27
 
28
28
  sink 'input', tap('output/sub_assembly', :sink_mode => :replace)
29
29
  end
30
- end.complete(local_properties('build/sample'))
30
+ end.complete
data/samples/union.rb CHANGED
@@ -3,7 +3,7 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
3
 
4
4
  require 'cascading'
5
5
 
6
- cascade 'union' do
6
+ cascade 'union', :mode => :local do
7
7
  flow 'union' do
8
8
  # You don't have to curl and cache inputs: tap can fetch via HTTP
9
9
  #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
@@ -33,4 +33,4 @@ cascade 'union' do
33
33
 
34
34
  sink 'union', tap('output/union', :sink_mode => :replace)
35
35
  end
36
- end.complete(local_properties('build/sample'))
36
+ end.complete
@@ -33,56 +33,56 @@ context Cascading do
33
33
  end
34
34
 
35
35
  it 'should find branches to sink' do
36
- cascade 'branched_pass' do
36
+ cascade 'branched_pass', :mode => :local do
37
37
  flow 'branched_pass' do
38
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
38
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
39
39
  assembly 'input' do
40
40
  branch 'branched_input' do
41
41
  project 'line'
42
42
  end
43
43
  end
44
- sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :kind => :lfs, :sink_mode => :replace)
44
+ sink 'branched_input', tap("#{OUTPUT_DIR}/branched_pass_out", :sink_mode => :replace)
45
45
  end
46
46
  end.complete
47
47
 
48
- ilc = `wc -l spec/resource/test_input.txt`.split(/\s+/).first
49
- olc = `wc -l #{OUTPUT_DIR}/branched_pass_out/part-00000`.split(/\s+/).first
50
- ilc.should == olc
48
+ ilc = `wc -l spec/resource/test_input.txt`.strip.split(/\s+/).first
49
+ olc = `wc -l #{OUTPUT_DIR}/branched_pass_out`.strip.split(/\s+/).first
50
+ olc.should == ilc
51
51
  end
52
52
 
53
53
  it 'should create an isolated namespace per cascade' do
54
- cascade 'double' do
54
+ cascade 'double', :mode => :local do
55
55
  flow 'double' do
56
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
56
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
57
57
  assembly 'input' do # Dup name
58
58
  insert 'doubled' => expr('line:string + "," + line:string')
59
59
  project 'doubled'
60
60
  end
61
- sink 'input', tap("#{OUTPUT_DIR}/double_out", :kind => :lfs, :sink_mode => :replace)
61
+ sink 'input', tap("#{OUTPUT_DIR}/double_out", :sink_mode => :replace)
62
62
  end
63
63
  end
64
64
 
65
- cascade 'pass' do
65
+ cascade 'pass', :mode => :local do
66
66
  flow 'pass' do
67
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
67
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
68
68
  assembly 'input' do # Dup name
69
69
  project 'line'
70
70
  end
71
- sink 'input', tap("#{OUTPUT_DIR}/pass_out", :kind => :lfs, :sink_mode => :replace)
71
+ sink 'input', tap("#{OUTPUT_DIR}/pass_out", :sink_mode => :replace)
72
72
  end
73
73
  end
74
74
 
75
75
  Cascade.get('double').complete
76
76
  Cascade.get('pass').complete
77
- diff = `diff #{OUTPUT_DIR}/double_out/part-00000 #{OUTPUT_DIR}/pass_out/part-00000`
77
+ diff = `diff #{OUTPUT_DIR}/double_out #{OUTPUT_DIR}/pass_out`
78
78
  diff.should_not be_empty
79
79
  end
80
80
 
81
81
  it 'should support joins in branches' do
82
- cascade 'branch_join' do
82
+ cascade 'branch_join', :mode => :local do
83
83
  flow 'branch_join' do
84
- source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
85
- source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
84
+ source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
85
+ source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
86
86
 
87
87
  assembly 'left' do
88
88
  split 'line', ['x', 'y', 'z'], :pattern => /,/
@@ -98,7 +98,7 @@ context Cascading do
98
98
  end
99
99
  end
100
100
 
101
- sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :kind => :lfs, :sink_mode => :replace)
101
+ sink 'branch_join', tap("#{OUTPUT_DIR}/branch_join_out.txt", :sink_mode => :replace)
102
102
  end
103
103
  end.complete
104
104
  end
data/spec/spec_util.rb CHANGED
@@ -23,28 +23,28 @@ module Cascading
23
23
  end
24
24
 
25
25
  def test_flow(&block)
26
- cascade = cascade 'test_app' do
26
+ cascade = cascade 'test_app', :mode => :local do
27
27
  flow 'test', &block
28
28
  end
29
- cascade.complete(local_properties(BUILD_DIR))
29
+ cascade.complete
30
30
  end
31
31
 
32
32
  def test_assembly(params = {}, &block)
33
33
  branches = params[:branches] || []
34
34
 
35
35
  test_flow do
36
- source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
36
+ source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
37
37
 
38
38
  # Default Fields defined by TextLineScheme
39
39
  check_scope :source => 'input', :values_fields => ['offset', 'line']
40
40
 
41
41
  assembly 'input', &block
42
42
 
43
- sink 'input', tap("#{OUTPUT_DIR}/out.txt", :kind => :lfs, :sink_mode => :replace)
43
+ sink 'input', tap("#{OUTPUT_DIR}/out.txt", :sink_mode => :replace)
44
44
 
45
45
  # Branches must be sunk so that they (and their assertions) will be run
46
46
  branches.each do |branch|
47
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :kind => :lfs, :sink_mode => :replace)
47
+ sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
48
48
  end
49
49
  end
50
50
  end
@@ -54,8 +54,8 @@ def test_join_assembly(params = {}, &block)
54
54
  post_join_block = params[:post_join_block]
55
55
 
56
56
  test_flow do
57
- source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
58
- source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
57
+ source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
58
+ source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
59
59
 
60
60
  # Default Fields defined by TextLineScheme
61
61
  check_scope :source => 'left', :values_fields => ['offset', 'line']
@@ -82,11 +82,11 @@ def test_join_assembly(params = {}, &block)
82
82
  instance_eval &post_join_block if post_join_block
83
83
  end
84
84
 
85
- sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :kind => :lfs, :sink_mode => :replace)
85
+ sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :sink_mode => :replace)
86
86
 
87
87
  # Branches must be sunk so that they (and their assertions) will be run
88
88
  branches.each do |branch|
89
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :kind => :lfs, :sink_mode => :replace)
89
+ sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
90
90
  end
91
91
  end
92
92
  end