rubadoop 0.7.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +43 -0
- data/README.rdoc +7 -0
- data/Rakefile +25 -0
- data/lib/rubadoop.rb +26 -0
- data/lib/rubadoop/base_dsl.rb +31 -0
- data/lib/rubadoop/emr.rb +23 -0
- data/lib/rubadoop/emr/jobflow_builder.rb +36 -0
- data/lib/rubadoop/emr/jobflow_builder/bootstrap_action.rb +27 -0
- data/lib/rubadoop/emr/jobflow_builder/job_spec.rb +77 -0
- data/lib/rubadoop/emr/jobflow_builder/step.rb +41 -0
- data/lib/rubadoop/map_reduce.rb +23 -0
- data/lib/rubadoop/map_reduce/call_java.rb +112 -0
- data/lib/rubadoop/map_reduce/call_streaming.rb +55 -0
- data/lib/rubadoop/map_reduce/identity.rb +30 -0
- data/lib/rubadoop/map_reduce/io.rb +128 -0
- data/lib/rubadoop/map_reduce/job_conf_environment.rb +9 -0
- data/lib/rubadoop/map_reduce/mappable.rb +59 -0
- data/lib/rubadoop/map_reduce/mapper.rb +15 -0
- data/lib/rubadoop/map_reduce/reducable.rb +74 -0
- data/lib/rubadoop/map_reduce/reducer.rb +12 -0
- data/lib/rubadoop/map_reduce/test_assist.rb +65 -0
- data/lib/rubadoop/map_reduce/utils.rb +29 -0
- data/lib/rubadoop/oozie/workflow_builder.rb +42 -0
- data/lib/rubadoop/oozie/workflow_builder/job_properties.rb +19 -0
- data/lib/rubadoop/version.rb +3 -0
- data/test/rubadoop/base_dsl_test.rb +27 -0
- data/test/rubadoop/emr/jobflow_builder_test.rb +184 -0
- data/test/rubadoop/map_reduce/call_java_test.rb +122 -0
- data/test/rubadoop/map_reduce/call_streaming_test.rb +81 -0
- data/test/rubadoop/map_reduce/identity_test.rb +40 -0
- data/test/rubadoop/map_reduce/io_test.rb +51 -0
- data/test/rubadoop/map_reduce/job_conf_environment_test.rb +28 -0
- data/test/rubadoop/map_reduce/mappable_test.rb +62 -0
- data/test/rubadoop/map_reduce/mapper_test.rb +76 -0
- data/test/rubadoop/map_reduce/reducable_test.rb +12 -0
- data/test/rubadoop/map_reduce/reducer_test.rb +137 -0
- data/test/rubadoop/map_reduce/test_assist_test.rb +76 -0
- data/test/rubadoop/oozie/workflow_builder_test.rb +21 -0
- data/test/test_helper.rb +10 -0
- metadata +140 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
module Rubadoop
|
2
|
+
module MapReduce
|
3
|
+
module Utils
|
4
|
+
def key_value_split(line)
|
5
|
+
line.split("\t", 2)
|
6
|
+
end
|
7
|
+
|
8
|
+
def log_counter(group, counter, amount)
|
9
|
+
::Rubadoop::MapReduce.out.counter(group, counter, amount)
|
10
|
+
end
|
11
|
+
|
12
|
+
def log_status(status)
|
13
|
+
::Rubadoop::MapReduce.out.status(status)
|
14
|
+
end
|
15
|
+
|
16
|
+
def log_error(message)
|
17
|
+
::Rubadoop::MapReduce.out.error(message)
|
18
|
+
end
|
19
|
+
|
20
|
+
def out_entry(value)
|
21
|
+
::Rubadoop::MapReduce.out.entry(value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def out_map_entry(key, value)
|
25
|
+
::Rubadoop::MapReduce.out.map_entry(key, value)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Rubadoop
|
2
|
+
module Oozie
|
3
|
+
module WorkflowBuilder
|
4
|
+
extend ActiveSupport::Autoload
|
5
|
+
|
6
|
+
autoload :JobProperties
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def new_job_properties(params = {}, &block)
|
10
|
+
job_spec = JobProperties.new(params)
|
11
|
+
|
12
|
+
if block_given?
|
13
|
+
if block.arity == 1
|
14
|
+
yield job_spec
|
15
|
+
else
|
16
|
+
job_spec.instance_eval &block
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
unless params.has_key? :skip_aws_keys
|
21
|
+
require 'aws-sdk'
|
22
|
+
aws_config = AWS.config.credentials
|
23
|
+
job_spec.prop 'fs.s3n.awsAccessKeyId', aws_config[:access_key_id]
|
24
|
+
job_spec.prop 'fs.s3.awsAccessKeyId', aws_config[:access_key_id]
|
25
|
+
job_spec.prop 'fs.s3n.awsSecretAccessKey', aws_config[:secret_access_key]
|
26
|
+
job_spec.prop 'fs.s3.awsSecretAccessKey', aws_config[:secret_access_key]
|
27
|
+
end
|
28
|
+
|
29
|
+
job_spec
|
30
|
+
end
|
31
|
+
|
32
|
+
def load_job_properties(__params__ = {}, __spec_code__)
|
33
|
+
new_job_properties(__params__) do |dsl|
|
34
|
+
dsl.instance_eval __spec_code__
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Rubadoop
|
2
|
+
module Oozie
|
3
|
+
module WorkflowBuilder
|
4
|
+
class JobProperties < Rubadoop::BaseDsl
|
5
|
+
|
6
|
+
def prop(name, value)
|
7
|
+
@props ||= {}
|
8
|
+
@props[name] = value
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_h
|
12
|
+
@props ||= {}
|
13
|
+
@props
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
class BaseDslTest < MiniTest::Spec
|
5
|
+
|
6
|
+
def test_params_missing
|
7
|
+
test = self
|
8
|
+
BaseDsl.new(Hash[a: true, 'dude' => 'rubadoop']).instance_eval do
|
9
|
+
test.assert params[:a]
|
10
|
+
test.assert_raises RuntimeError do
|
11
|
+
params[:nope]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_params_with_indifferent_access
|
17
|
+
test = self
|
18
|
+
BaseDsl.new(Hash[a: true, 'dude' => 'rubadoop']).instance_eval do
|
19
|
+
test.assert params[:a]
|
20
|
+
test.assert params['a']
|
21
|
+
test.assert_equal 'rubadoop', params[:dude]
|
22
|
+
test.assert_equal 'rubadoop', params['dude']
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module Emr
|
5
|
+
class JobflowBuilderTest < MiniTest::Spec
|
6
|
+
|
7
|
+
def test_base
|
8
|
+
command = JobflowBuilder.new_job_spec do; end.to_create_command
|
9
|
+
assert_equal command, Hash.new
|
10
|
+
|
11
|
+
command = JobflowBuilder.new_job_spec do
|
12
|
+
my_name = "testink"
|
13
|
+
@name = my_name
|
14
|
+
end.to_create_command
|
15
|
+
assert_equal command, Hash[name: 'testink']
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_param
|
19
|
+
command = JobflowBuilder.new_job_spec(name: 'testink') do
|
20
|
+
@name = params[:name]
|
21
|
+
end.to_create_command
|
22
|
+
assert_equal command, Hash[name: 'testink']
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_missing_param
|
26
|
+
assert_raises RuntimeError do
|
27
|
+
JobflowBuilder.new_job_spec(name: 'testink') do
|
28
|
+
@name = params[:namey]
|
29
|
+
end.to_create_command
|
30
|
+
end
|
31
|
+
|
32
|
+
assert_raises RuntimeError do
|
33
|
+
JobflowBuilder.new_job_spec(name: 'testink') do |s|
|
34
|
+
s.name = s.params[:namey]
|
35
|
+
end.to_create_command
|
36
|
+
end
|
37
|
+
|
38
|
+
assert_raises NameError do
|
39
|
+
JobflowBuilder.new_job_spec(name: 'testink') do |s|
|
40
|
+
s.name = params[:namey]
|
41
|
+
end.to_create_command
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_with_instances
|
46
|
+
command = JobflowBuilder.new_job_spec(name: 'something') do
|
47
|
+
optional_param :instance_type, 'c1.medium'
|
48
|
+
optional_param :instance_count, 1
|
49
|
+
|
50
|
+
with_instances 'c1.medium', params[:instance_type], params[:instance_count]
|
51
|
+
end.to_create_command
|
52
|
+
assert_equal command, {:instances=>{:master_instance_type=>"c1.medium", :slave_instance_type=>"c1.medium", :instance_count=>2}}
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_bootstrap
|
56
|
+
|
57
|
+
command = JobflowBuilder.new_job_spec do
|
58
|
+
with_bootstrap_action "bootstrap location" do |b|
|
59
|
+
b.name = 'bootstrap name'
|
60
|
+
b.args = ['arg1', 'arg2']
|
61
|
+
end
|
62
|
+
end.to_create_command
|
63
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"bootstrap name",
|
64
|
+
script_bootstrap_action:{path:"bootstrap location",
|
65
|
+
args:["arg1", "arg2"]}}]]
|
66
|
+
|
67
|
+
command = JobflowBuilder.new_job_spec do
|
68
|
+
with_bootstrap_action "bootstrap location"
|
69
|
+
end.to_create_command
|
70
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"Bootstrap Action",
|
71
|
+
script_bootstrap_action:{path:"bootstrap location"}}]]
|
72
|
+
|
73
|
+
command = JobflowBuilder.new_job_spec do
|
74
|
+
with_bootstrap_action "bootstrap location" do
|
75
|
+
@name = 'bootstrap name'
|
76
|
+
arg "arg1", "arg2"
|
77
|
+
end
|
78
|
+
end.to_create_command
|
79
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"bootstrap name",
|
80
|
+
script_bootstrap_action:{path:"bootstrap location",
|
81
|
+
args:["arg1", "arg2"]}}]]
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_bootstrap_param
|
85
|
+
command = JobflowBuilder.new_job_spec(bs_name: "total bs") do
|
86
|
+
with_bootstrap_action "bootstrap location" do |b|
|
87
|
+
b.name = b.params[:bs_name]
|
88
|
+
end
|
89
|
+
end.to_create_command
|
90
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"total bs",
|
91
|
+
script_bootstrap_action:{path:"bootstrap location"}}]]
|
92
|
+
|
93
|
+
command = JobflowBuilder.new_job_spec(bs_name: "total bs") do
|
94
|
+
with_bootstrap_action "bootstrap location" do
|
95
|
+
@name = params[:bs_name]
|
96
|
+
end
|
97
|
+
end.to_create_command
|
98
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"total bs",
|
99
|
+
script_bootstrap_action:{path:"bootstrap location"}}]]
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_jar_steps
|
103
|
+
command = JobflowBuilder.new_job_spec do
|
104
|
+
add_jar_step 'jar name', 'jar jar', 'Binks' do |s|
|
105
|
+
s.args = ["Meesa", "Stupid"]
|
106
|
+
s.action_on_failure = 'CANCEL_AND_WAIT'
|
107
|
+
end
|
108
|
+
end.to_steps_command
|
109
|
+
assert_equal [{name:"jar name", hadoop_jar_step:{jar:"jar jar", main_class:"Binks",
|
110
|
+
args:["Meesa", "Stupid"]},
|
111
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_streaming_steps
|
115
|
+
command = JobflowBuilder.new_job_spec do
|
116
|
+
add_streaming_step "streaming name" do |s|
|
117
|
+
s.mapper = 'mapit'
|
118
|
+
s.reducer = 'reduceit'
|
119
|
+
s.input = 'fromhere'
|
120
|
+
s.output = 'tothere'
|
121
|
+
s.args = [ 'arg1', 'arg2',]
|
122
|
+
s.action_on_failure = 'CANCEL_AND_WAIT'
|
123
|
+
end
|
124
|
+
end.to_steps_command
|
125
|
+
assert_equal [{name:"streaming name", hadoop_jar_step:{jar: JobflowBuilder::JobSpec::STREAMING_JAR_LOCATION,
|
126
|
+
args:["-input", "fromhere", "-output","tothere", "-mapper", "mapit", "-reducer", "reduceit", "arg1", "arg2"]},
|
127
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_streaming_steps2
|
131
|
+
command = JobflowBuilder.new_job_spec do
|
132
|
+
add_streaming_step "streaming name" do
|
133
|
+
@mapper = 'mapit'
|
134
|
+
@reducer = 'reduceit'
|
135
|
+
@input = 'fromhere'
|
136
|
+
@output = 'tothere'
|
137
|
+
@args = [ 'arg1', 'arg2',]
|
138
|
+
@action_on_failure = 'CANCEL_AND_WAIT'
|
139
|
+
end
|
140
|
+
end.to_steps_command
|
141
|
+
assert_equal [{name:"streaming name", hadoop_jar_step:{jar: JobflowBuilder::JobSpec::STREAMING_JAR_LOCATION,
|
142
|
+
args:["-input", "fromhere", "-output","tothere", "-mapper", "mapit", "-reducer", "reduceit", "arg1", "arg2"]},
|
143
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
def test_steps_params
|
148
|
+
command = JobflowBuilder.new_job_spec(arg1: 'aack') do
|
149
|
+
add_jar_step 'jar name', 'jar jar', 'Binks' do |s|
|
150
|
+
s.arg s.params[:arg1]
|
151
|
+
s.action_on_failure = 'CANCEL_AND_WAIT'
|
152
|
+
end
|
153
|
+
end.to_steps_command
|
154
|
+
assert_equal [{name:"jar name", hadoop_jar_step:{jar:"jar jar", main_class:"Binks",
|
155
|
+
args:["aack"]},
|
156
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
157
|
+
|
158
|
+
command = JobflowBuilder.new_job_spec(arg1: 'aack') do
|
159
|
+
add_jar_step 'jar name', 'jar jar', 'Binks' do
|
160
|
+
arg params[:arg1]
|
161
|
+
@action_on_failure = 'CANCEL_AND_WAIT'
|
162
|
+
end
|
163
|
+
end.to_steps_command
|
164
|
+
assert_equal [{name:"jar name", hadoop_jar_step:{jar:"jar jar", main_class:"Binks",
|
165
|
+
args:["aack"]},
|
166
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
167
|
+
end
|
168
|
+
|
169
|
+
def test_keepalive
|
170
|
+
command = JobflowBuilder.new_job_spec do |job|
|
171
|
+
job.keep_alive true
|
172
|
+
end.to_create_command
|
173
|
+
assert_equal command, Hash[instances: {keep_job_flow_alive_when_no_steps: true}]
|
174
|
+
|
175
|
+
assert_raises RuntimeError do
|
176
|
+
JobflowBuilder.new_job_spec do
|
177
|
+
keep_alive "false" #if this doesn't check boolean, result would be true
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
|
6
|
+
class CallJavaTest < MiniTest::Spec
|
7
|
+
|
8
|
+
def test_simple
|
9
|
+
call = CallJava.new_java_call() do |s|
|
10
|
+
s.jar = 'hadoop-pie.jar'
|
11
|
+
end
|
12
|
+
assert_equal "hadoop jar hadoop-pie.jar", call.to_hadoop_cli.join(' ')
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_env
|
16
|
+
call = CallJava.new_java_call() do |s|
|
17
|
+
s.jar = 'hadoop-pie.jar'
|
18
|
+
s.env 'abcd', 'efgh'
|
19
|
+
end
|
20
|
+
assert_equal "hadoop jar hadoop-pie.jar -cmdenv abcd=efgh", call.to_hadoop_cli.join(' ')
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_conf
|
24
|
+
call = CallJava.new_java_call() do |s|
|
25
|
+
s.jar = 'hadoop-pie.jar'
|
26
|
+
s.conf 'abcd', 'efgh'
|
27
|
+
end
|
28
|
+
assert_equal "hadoop jar hadoop-pie.jar -Dabcd=efgh", call.to_hadoop_cli.join(' ')
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_conf_multi
|
32
|
+
call = CallJava.new_java_call() do |s|
|
33
|
+
s.jar = 'hadoop-pie.jar'
|
34
|
+
s.conf 'abcd', 'efgh'
|
35
|
+
s.conf_concat 'abcd', 'ijkl'
|
36
|
+
s.conf_concat 'abcd', 'mnop'
|
37
|
+
end
|
38
|
+
assert_equal "hadoop jar hadoop-pie.jar -Dabcd=efgh -Dabcd=ijkl -Dabcd=mnop", call.to_hadoop_cli.join(' ')
|
39
|
+
|
40
|
+
call = CallJava.new_java_call() do |s|
|
41
|
+
s.jar = 'hadoop-pie.jar'
|
42
|
+
s.conf_concat 'abcd', 'mnop'
|
43
|
+
end
|
44
|
+
assert_equal "hadoop jar hadoop-pie.jar -Dabcd=mnop", call.to_hadoop_cli.join(' ')
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_arg
|
48
|
+
call = CallJava.new_java_call() do
|
49
|
+
@jar = 'hadoop-pie.jar'
|
50
|
+
arg 'abcd', 'efgh'
|
51
|
+
end
|
52
|
+
assert_equal "hadoop jar hadoop-pie.jar abcd efgh", call.to_hadoop_cli.join(' ')
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_archive
|
56
|
+
call = CallJava.new_java_call() do
|
57
|
+
@jar = 'hadoop-pie.jar'
|
58
|
+
archive 'dungeon', 'd'
|
59
|
+
end
|
60
|
+
assert_equal "hadoop jar hadoop-pie.jar -cacheArchive dungeon#d", call.to_hadoop_cli.join(' ')
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_files
|
64
|
+
call = CallJava.new_java_call() do
|
65
|
+
@jar = 'hadoop-pie.jar'
|
66
|
+
file 'phile', 'f'
|
67
|
+
end
|
68
|
+
assert_equal "hadoop jar hadoop-pie.jar -cacheFile phile#f", call.to_hadoop_cli.join(' ')
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_order
|
72
|
+
call = CallJava.new_java_call() do
|
73
|
+
@jar = 'hadoop-pie.jar'
|
74
|
+
@main_class = 'com.java.package.DoItLive'
|
75
|
+
env 'e1', 'dudio'
|
76
|
+
env 'e2', 'dudi-rio'
|
77
|
+
file 'phile', 'f'
|
78
|
+
conf 'c1', 'conf'
|
79
|
+
conf 'c2', 'cronf'
|
80
|
+
arg 'seriously'
|
81
|
+
archive 'dungeon', 'd'
|
82
|
+
file 'phile2', 'f2'
|
83
|
+
end
|
84
|
+
assert_equal "hadoop jar hadoop-pie.jar com.java.package.DoItLive -Dc1=conf -Dc2=cronf -cmdenv e1=dudio -cmdenv e2=dudi-rio -cacheFile phile#f -cacheFile phile2#f2 -cacheArchive dungeon#d seriously", call.to_hadoop_cli.join(' ')
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_validation
|
88
|
+
assert_raises RuntimeError do
|
89
|
+
CallJava.new_java_call() do
|
90
|
+
@main_class = 'com.java.package.DoItLive'
|
91
|
+
end.to_hadoop_cli
|
92
|
+
end
|
93
|
+
|
94
|
+
assert_raises RuntimeError do
|
95
|
+
CallJava.new_java_call() do
|
96
|
+
@main_class = 'com.java.package.DoItLive'
|
97
|
+
end.to_h
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_hash
|
102
|
+
call = CallJava.new_java_call() do
|
103
|
+
@jar = 'hadoop-pie.jar'
|
104
|
+
@main_class = 'com.java.package.DoItLive'
|
105
|
+
env 'e1', 'dudio'
|
106
|
+
env 'e2', 'dudi-rio'
|
107
|
+
file 'phile', 'f'
|
108
|
+
conf 'c1', 'conf'
|
109
|
+
conf 'c2', 'cronf'
|
110
|
+
arg 'seriously'
|
111
|
+
archive 'dungeon', 'd'
|
112
|
+
file 'phile', 'f'
|
113
|
+
@poopsy = 'poo'
|
114
|
+
end
|
115
|
+
assert_equal Hash[jar: "hadoop-pie.jar", archives: ["dungeon#d"],
|
116
|
+
main_class: "com.java.package.DoItLive", envs: {e1: "dudio", e2: "dudi-rio"},
|
117
|
+
files: ["phile#f", "phile#f"], confs: {c1: "conf", c2: "cronf"}, args: ["seriously"]], call.to_h
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
|
6
|
+
class CallStreamingTest < MiniTest::Spec
|
7
|
+
|
8
|
+
def test_simple
|
9
|
+
streaming = CallStreaming.new_streaming_call do |s|
|
10
|
+
s.jar = 'hadoop-streaming.jar'
|
11
|
+
s.input = 'input'
|
12
|
+
s.output = 'output'
|
13
|
+
s.mapper = '/bin/cat'
|
14
|
+
s.reducer = '/bin/wc'
|
15
|
+
end
|
16
|
+
assert_equal "hadoop jar hadoop-streaming.jar -input input -output output -mapper /bin/cat -reducer /bin/wc", streaming.to_hadoop_cli.join(' ')
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_hash
|
20
|
+
streaming = CallStreaming.new_streaming_call do |s|
|
21
|
+
s.jar = 'hadoop-streaming.jar'
|
22
|
+
s.inputformat = 'green'
|
23
|
+
s.input = 'input'
|
24
|
+
s.outputformat = 'blue'
|
25
|
+
s.output = 'output'
|
26
|
+
s.mapper = '/bin/cat'
|
27
|
+
s.reducer = '/bin/wc'
|
28
|
+
@test = 'noshow'
|
29
|
+
end
|
30
|
+
assert_equal Hash[jar: "hadoop-streaming.jar", main_class: nil, envs: nil, args: nil, confs: nil, files: nil, archives: nil,
|
31
|
+
mapper: "/bin/cat", reducer: "/bin/wc", inputformat: "green", input: "input", outputformat: "blue", output: "output"], streaming.to_h
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_cli
|
35
|
+
streaming = CallStreaming.new_streaming_call do |s|
|
36
|
+
s.jar = 'hadoop-streaming.jar'
|
37
|
+
s.inputformat = 'green'
|
38
|
+
s.input = 'input'
|
39
|
+
s.outputformat = 'blue'
|
40
|
+
s.output = 'output'
|
41
|
+
s.mapper = '/bin/cat'
|
42
|
+
s.reducer = '/bin/wc'
|
43
|
+
@test = 'noshow'
|
44
|
+
end
|
45
|
+
assert_equal ["hadoop", "jar", "hadoop-streaming.jar", "-inputformat", "green", "-input", "input", "-outputformat", "blue",
|
46
|
+
"-output", "output", "-mapper", "/bin/cat", "-reducer", "/bin/wc"], streaming.to_hadoop_cli
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_validation
|
50
|
+
assert_raises RuntimeError do
|
51
|
+
CallStreaming.new_streaming_call do |s|
|
52
|
+
s.jar = 'hadoop-streaming.jar'
|
53
|
+
s.input = 'input'
|
54
|
+
s.output = 'output'
|
55
|
+
s.reducer = '/bin/wc'
|
56
|
+
end.to_hadoop_cli
|
57
|
+
end
|
58
|
+
|
59
|
+
assert_raises RuntimeError do
|
60
|
+
CallStreaming.new_streaming_call() do
|
61
|
+
@jar = 'hadoop-streaming.jar'
|
62
|
+
@input = 'input'
|
63
|
+
@mapper = '/bin/cat'
|
64
|
+
@reducer = '/bin/wc'
|
65
|
+
end.to_h
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_validation_inherited
|
70
|
+
assert_raises RuntimeError do
|
71
|
+
CallStreaming.new_streaming_call() do |s|
|
72
|
+
s.input = 'input'
|
73
|
+
s.output = 'output'
|
74
|
+
s.mapper = '/bin/cat'
|
75
|
+
s.reducer = '/bin/wc'
|
76
|
+
end.to_hadoop_cli
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|