rubadoop 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +43 -0
- data/README.rdoc +7 -0
- data/Rakefile +25 -0
- data/lib/rubadoop.rb +26 -0
- data/lib/rubadoop/base_dsl.rb +31 -0
- data/lib/rubadoop/emr.rb +23 -0
- data/lib/rubadoop/emr/jobflow_builder.rb +36 -0
- data/lib/rubadoop/emr/jobflow_builder/bootstrap_action.rb +27 -0
- data/lib/rubadoop/emr/jobflow_builder/job_spec.rb +77 -0
- data/lib/rubadoop/emr/jobflow_builder/step.rb +41 -0
- data/lib/rubadoop/map_reduce.rb +23 -0
- data/lib/rubadoop/map_reduce/call_java.rb +112 -0
- data/lib/rubadoop/map_reduce/call_streaming.rb +55 -0
- data/lib/rubadoop/map_reduce/identity.rb +30 -0
- data/lib/rubadoop/map_reduce/io.rb +128 -0
- data/lib/rubadoop/map_reduce/job_conf_environment.rb +9 -0
- data/lib/rubadoop/map_reduce/mappable.rb +59 -0
- data/lib/rubadoop/map_reduce/mapper.rb +15 -0
- data/lib/rubadoop/map_reduce/reducable.rb +74 -0
- data/lib/rubadoop/map_reduce/reducer.rb +12 -0
- data/lib/rubadoop/map_reduce/test_assist.rb +65 -0
- data/lib/rubadoop/map_reduce/utils.rb +29 -0
- data/lib/rubadoop/oozie/workflow_builder.rb +42 -0
- data/lib/rubadoop/oozie/workflow_builder/job_properties.rb +19 -0
- data/lib/rubadoop/version.rb +3 -0
- data/test/rubadoop/base_dsl_test.rb +27 -0
- data/test/rubadoop/emr/jobflow_builder_test.rb +184 -0
- data/test/rubadoop/map_reduce/call_java_test.rb +122 -0
- data/test/rubadoop/map_reduce/call_streaming_test.rb +81 -0
- data/test/rubadoop/map_reduce/identity_test.rb +40 -0
- data/test/rubadoop/map_reduce/io_test.rb +51 -0
- data/test/rubadoop/map_reduce/job_conf_environment_test.rb +28 -0
- data/test/rubadoop/map_reduce/mappable_test.rb +62 -0
- data/test/rubadoop/map_reduce/mapper_test.rb +76 -0
- data/test/rubadoop/map_reduce/reducable_test.rb +12 -0
- data/test/rubadoop/map_reduce/reducer_test.rb +137 -0
- data/test/rubadoop/map_reduce/test_assist_test.rb +76 -0
- data/test/rubadoop/oozie/workflow_builder_test.rb +21 -0
- data/test/test_helper.rb +10 -0
- metadata +140 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module Rubadoop
|
|
2
|
+
module MapReduce
|
|
3
|
+
module Utils
|
|
4
|
+
def key_value_split(line)
|
|
5
|
+
line.split("\t", 2)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def log_counter(group, counter, amount)
|
|
9
|
+
::Rubadoop::MapReduce.out.counter(group, counter, amount)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def log_status(status)
|
|
13
|
+
::Rubadoop::MapReduce.out.status(status)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def log_error(message)
|
|
17
|
+
::Rubadoop::MapReduce.out.error(message)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def out_entry(value)
|
|
21
|
+
::Rubadoop::MapReduce.out.entry(value)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def out_map_entry(key, value)
|
|
25
|
+
::Rubadoop::MapReduce.out.map_entry(key, value)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
module Rubadoop
|
|
2
|
+
module Oozie
|
|
3
|
+
module WorkflowBuilder
|
|
4
|
+
extend ActiveSupport::Autoload
|
|
5
|
+
|
|
6
|
+
autoload :JobProperties
|
|
7
|
+
|
|
8
|
+
class << self
|
|
9
|
+
def new_job_properties(params = {}, &block)
|
|
10
|
+
job_spec = JobProperties.new(params)
|
|
11
|
+
|
|
12
|
+
if block_given?
|
|
13
|
+
if block.arity == 1
|
|
14
|
+
yield job_spec
|
|
15
|
+
else
|
|
16
|
+
job_spec.instance_eval &block
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
unless params.has_key? :skip_aws_keys
|
|
21
|
+
require 'aws-sdk'
|
|
22
|
+
aws_config = AWS.config.credentials
|
|
23
|
+
job_spec.prop 'fs.s3n.awsAccessKeyId', aws_config[:access_key_id]
|
|
24
|
+
job_spec.prop 'fs.s3.awsAccessKeyId', aws_config[:access_key_id]
|
|
25
|
+
job_spec.prop 'fs.s3n.awsSecretAccessKey', aws_config[:secret_access_key]
|
|
26
|
+
job_spec.prop 'fs.s3.awsSecretAccessKey', aws_config[:secret_access_key]
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
job_spec
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def load_job_properties(__params__ = {}, __spec_code__)
|
|
33
|
+
new_job_properties(__params__) do |dsl|
|
|
34
|
+
dsl.instance_eval __spec_code__
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Rubadoop
|
|
2
|
+
module Oozie
|
|
3
|
+
module WorkflowBuilder
|
|
4
|
+
class JobProperties < Rubadoop::BaseDsl
|
|
5
|
+
|
|
6
|
+
def prop(name, value)
|
|
7
|
+
@props ||= {}
|
|
8
|
+
@props[name] = value
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_h
|
|
12
|
+
@props ||= {}
|
|
13
|
+
@props
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
|
|
3
|
+
module Rubadoop
|
|
4
|
+
class BaseDslTest < MiniTest::Spec
|
|
5
|
+
|
|
6
|
+
def test_params_missing
|
|
7
|
+
test = self
|
|
8
|
+
BaseDsl.new(Hash[a: true, 'dude' => 'rubadoop']).instance_eval do
|
|
9
|
+
test.assert params[:a]
|
|
10
|
+
test.assert_raises RuntimeError do
|
|
11
|
+
params[:nope]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_params_with_indifferent_access
|
|
17
|
+
test = self
|
|
18
|
+
BaseDsl.new(Hash[a: true, 'dude' => 'rubadoop']).instance_eval do
|
|
19
|
+
test.assert params[:a]
|
|
20
|
+
test.assert params['a']
|
|
21
|
+
test.assert_equal 'rubadoop', params[:dude]
|
|
22
|
+
test.assert_equal 'rubadoop', params['dude']
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
|
|
3
|
+
module Rubadoop
|
|
4
|
+
module Emr
|
|
5
|
+
class JobflowBuilderTest < MiniTest::Spec
|
|
6
|
+
|
|
7
|
+
def test_base
|
|
8
|
+
command = JobflowBuilder.new_job_spec do; end.to_create_command
|
|
9
|
+
assert_equal command, Hash.new
|
|
10
|
+
|
|
11
|
+
command = JobflowBuilder.new_job_spec do
|
|
12
|
+
my_name = "testink"
|
|
13
|
+
@name = my_name
|
|
14
|
+
end.to_create_command
|
|
15
|
+
assert_equal command, Hash[name: 'testink']
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_param
|
|
19
|
+
command = JobflowBuilder.new_job_spec(name: 'testink') do
|
|
20
|
+
@name = params[:name]
|
|
21
|
+
end.to_create_command
|
|
22
|
+
assert_equal command, Hash[name: 'testink']
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_missing_param
|
|
26
|
+
assert_raises RuntimeError do
|
|
27
|
+
JobflowBuilder.new_job_spec(name: 'testink') do
|
|
28
|
+
@name = params[:namey]
|
|
29
|
+
end.to_create_command
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
assert_raises RuntimeError do
|
|
33
|
+
JobflowBuilder.new_job_spec(name: 'testink') do |s|
|
|
34
|
+
s.name = s.params[:namey]
|
|
35
|
+
end.to_create_command
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
assert_raises NameError do
|
|
39
|
+
JobflowBuilder.new_job_spec(name: 'testink') do |s|
|
|
40
|
+
s.name = params[:namey]
|
|
41
|
+
end.to_create_command
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_with_instances
|
|
46
|
+
command = JobflowBuilder.new_job_spec(name: 'something') do
|
|
47
|
+
optional_param :instance_type, 'c1.medium'
|
|
48
|
+
optional_param :instance_count, 1
|
|
49
|
+
|
|
50
|
+
with_instances 'c1.medium', params[:instance_type], params[:instance_count]
|
|
51
|
+
end.to_create_command
|
|
52
|
+
assert_equal command, {:instances=>{:master_instance_type=>"c1.medium", :slave_instance_type=>"c1.medium", :instance_count=>2}}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_bootstrap
|
|
56
|
+
|
|
57
|
+
command = JobflowBuilder.new_job_spec do
|
|
58
|
+
with_bootstrap_action "bootstrap location" do |b|
|
|
59
|
+
b.name = 'bootstrap name'
|
|
60
|
+
b.args = ['arg1', 'arg2']
|
|
61
|
+
end
|
|
62
|
+
end.to_create_command
|
|
63
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"bootstrap name",
|
|
64
|
+
script_bootstrap_action:{path:"bootstrap location",
|
|
65
|
+
args:["arg1", "arg2"]}}]]
|
|
66
|
+
|
|
67
|
+
command = JobflowBuilder.new_job_spec do
|
|
68
|
+
with_bootstrap_action "bootstrap location"
|
|
69
|
+
end.to_create_command
|
|
70
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"Bootstrap Action",
|
|
71
|
+
script_bootstrap_action:{path:"bootstrap location"}}]]
|
|
72
|
+
|
|
73
|
+
command = JobflowBuilder.new_job_spec do
|
|
74
|
+
with_bootstrap_action "bootstrap location" do
|
|
75
|
+
@name = 'bootstrap name'
|
|
76
|
+
arg "arg1", "arg2"
|
|
77
|
+
end
|
|
78
|
+
end.to_create_command
|
|
79
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"bootstrap name",
|
|
80
|
+
script_bootstrap_action:{path:"bootstrap location",
|
|
81
|
+
args:["arg1", "arg2"]}}]]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def test_bootstrap_param
|
|
85
|
+
command = JobflowBuilder.new_job_spec(bs_name: "total bs") do
|
|
86
|
+
with_bootstrap_action "bootstrap location" do |b|
|
|
87
|
+
b.name = b.params[:bs_name]
|
|
88
|
+
end
|
|
89
|
+
end.to_create_command
|
|
90
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"total bs",
|
|
91
|
+
script_bootstrap_action:{path:"bootstrap location"}}]]
|
|
92
|
+
|
|
93
|
+
command = JobflowBuilder.new_job_spec(bs_name: "total bs") do
|
|
94
|
+
with_bootstrap_action "bootstrap location" do
|
|
95
|
+
@name = params[:bs_name]
|
|
96
|
+
end
|
|
97
|
+
end.to_create_command
|
|
98
|
+
assert_equal command, Hash[bootstrap_actions:[{name:"total bs",
|
|
99
|
+
script_bootstrap_action:{path:"bootstrap location"}}]]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def test_jar_steps
|
|
103
|
+
command = JobflowBuilder.new_job_spec do
|
|
104
|
+
add_jar_step 'jar name', 'jar jar', 'Binks' do |s|
|
|
105
|
+
s.args = ["Meesa", "Stupid"]
|
|
106
|
+
s.action_on_failure = 'CANCEL_AND_WAIT'
|
|
107
|
+
end
|
|
108
|
+
end.to_steps_command
|
|
109
|
+
assert_equal [{name:"jar name", hadoop_jar_step:{jar:"jar jar", main_class:"Binks",
|
|
110
|
+
args:["Meesa", "Stupid"]},
|
|
111
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def test_streaming_steps
|
|
115
|
+
command = JobflowBuilder.new_job_spec do
|
|
116
|
+
add_streaming_step "streaming name" do |s|
|
|
117
|
+
s.mapper = 'mapit'
|
|
118
|
+
s.reducer = 'reduceit'
|
|
119
|
+
s.input = 'fromhere'
|
|
120
|
+
s.output = 'tothere'
|
|
121
|
+
s.args = [ 'arg1', 'arg2',]
|
|
122
|
+
s.action_on_failure = 'CANCEL_AND_WAIT'
|
|
123
|
+
end
|
|
124
|
+
end.to_steps_command
|
|
125
|
+
assert_equal [{name:"streaming name", hadoop_jar_step:{jar: JobflowBuilder::JobSpec::STREAMING_JAR_LOCATION,
|
|
126
|
+
args:["-input", "fromhere", "-output","tothere", "-mapper", "mapit", "-reducer", "reduceit", "arg1", "arg2"]},
|
|
127
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def test_streaming_steps2
|
|
131
|
+
command = JobflowBuilder.new_job_spec do
|
|
132
|
+
add_streaming_step "streaming name" do
|
|
133
|
+
@mapper = 'mapit'
|
|
134
|
+
@reducer = 'reduceit'
|
|
135
|
+
@input = 'fromhere'
|
|
136
|
+
@output = 'tothere'
|
|
137
|
+
@args = [ 'arg1', 'arg2',]
|
|
138
|
+
@action_on_failure = 'CANCEL_AND_WAIT'
|
|
139
|
+
end
|
|
140
|
+
end.to_steps_command
|
|
141
|
+
assert_equal [{name:"streaming name", hadoop_jar_step:{jar: JobflowBuilder::JobSpec::STREAMING_JAR_LOCATION,
|
|
142
|
+
args:["-input", "fromhere", "-output","tothere", "-mapper", "mapit", "-reducer", "reduceit", "arg1", "arg2"]},
|
|
143
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def test_steps_params
|
|
148
|
+
command = JobflowBuilder.new_job_spec(arg1: 'aack') do
|
|
149
|
+
add_jar_step 'jar name', 'jar jar', 'Binks' do |s|
|
|
150
|
+
s.arg s.params[:arg1]
|
|
151
|
+
s.action_on_failure = 'CANCEL_AND_WAIT'
|
|
152
|
+
end
|
|
153
|
+
end.to_steps_command
|
|
154
|
+
assert_equal [{name:"jar name", hadoop_jar_step:{jar:"jar jar", main_class:"Binks",
|
|
155
|
+
args:["aack"]},
|
|
156
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
|
157
|
+
|
|
158
|
+
command = JobflowBuilder.new_job_spec(arg1: 'aack') do
|
|
159
|
+
add_jar_step 'jar name', 'jar jar', 'Binks' do
|
|
160
|
+
arg params[:arg1]
|
|
161
|
+
@action_on_failure = 'CANCEL_AND_WAIT'
|
|
162
|
+
end
|
|
163
|
+
end.to_steps_command
|
|
164
|
+
assert_equal [{name:"jar name", hadoop_jar_step:{jar:"jar jar", main_class:"Binks",
|
|
165
|
+
args:["aack"]},
|
|
166
|
+
action_on_failure:"CANCEL_AND_WAIT"}], command
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def test_keepalive
|
|
170
|
+
command = JobflowBuilder.new_job_spec do |job|
|
|
171
|
+
job.keep_alive true
|
|
172
|
+
end.to_create_command
|
|
173
|
+
assert_equal command, Hash[instances: {keep_job_flow_alive_when_no_steps: true}]
|
|
174
|
+
|
|
175
|
+
assert_raises RuntimeError do
|
|
176
|
+
JobflowBuilder.new_job_spec do
|
|
177
|
+
keep_alive "false" #if this doesn't check boolean, result would be true
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
|
|
3
|
+
module Rubadoop
|
|
4
|
+
module MapReduce
|
|
5
|
+
|
|
6
|
+
class CallJavaTest < MiniTest::Spec
|
|
7
|
+
|
|
8
|
+
def test_simple
|
|
9
|
+
call = CallJava.new_java_call() do |s|
|
|
10
|
+
s.jar = 'hadoop-pie.jar'
|
|
11
|
+
end
|
|
12
|
+
assert_equal "hadoop jar hadoop-pie.jar", call.to_hadoop_cli.join(' ')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_env
|
|
16
|
+
call = CallJava.new_java_call() do |s|
|
|
17
|
+
s.jar = 'hadoop-pie.jar'
|
|
18
|
+
s.env 'abcd', 'efgh'
|
|
19
|
+
end
|
|
20
|
+
assert_equal "hadoop jar hadoop-pie.jar -cmdenv abcd=efgh", call.to_hadoop_cli.join(' ')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def test_conf
|
|
24
|
+
call = CallJava.new_java_call() do |s|
|
|
25
|
+
s.jar = 'hadoop-pie.jar'
|
|
26
|
+
s.conf 'abcd', 'efgh'
|
|
27
|
+
end
|
|
28
|
+
assert_equal "hadoop jar hadoop-pie.jar -Dabcd=efgh", call.to_hadoop_cli.join(' ')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_conf_multi
|
|
32
|
+
call = CallJava.new_java_call() do |s|
|
|
33
|
+
s.jar = 'hadoop-pie.jar'
|
|
34
|
+
s.conf 'abcd', 'efgh'
|
|
35
|
+
s.conf_concat 'abcd', 'ijkl'
|
|
36
|
+
s.conf_concat 'abcd', 'mnop'
|
|
37
|
+
end
|
|
38
|
+
assert_equal "hadoop jar hadoop-pie.jar -Dabcd=efgh -Dabcd=ijkl -Dabcd=mnop", call.to_hadoop_cli.join(' ')
|
|
39
|
+
|
|
40
|
+
call = CallJava.new_java_call() do |s|
|
|
41
|
+
s.jar = 'hadoop-pie.jar'
|
|
42
|
+
s.conf_concat 'abcd', 'mnop'
|
|
43
|
+
end
|
|
44
|
+
assert_equal "hadoop jar hadoop-pie.jar -Dabcd=mnop", call.to_hadoop_cli.join(' ')
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_arg
|
|
48
|
+
call = CallJava.new_java_call() do
|
|
49
|
+
@jar = 'hadoop-pie.jar'
|
|
50
|
+
arg 'abcd', 'efgh'
|
|
51
|
+
end
|
|
52
|
+
assert_equal "hadoop jar hadoop-pie.jar abcd efgh", call.to_hadoop_cli.join(' ')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_archive
|
|
56
|
+
call = CallJava.new_java_call() do
|
|
57
|
+
@jar = 'hadoop-pie.jar'
|
|
58
|
+
archive 'dungeon', 'd'
|
|
59
|
+
end
|
|
60
|
+
assert_equal "hadoop jar hadoop-pie.jar -cacheArchive dungeon#d", call.to_hadoop_cli.join(' ')
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_files
|
|
64
|
+
call = CallJava.new_java_call() do
|
|
65
|
+
@jar = 'hadoop-pie.jar'
|
|
66
|
+
file 'phile', 'f'
|
|
67
|
+
end
|
|
68
|
+
assert_equal "hadoop jar hadoop-pie.jar -cacheFile phile#f", call.to_hadoop_cli.join(' ')
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def test_order
|
|
72
|
+
call = CallJava.new_java_call() do
|
|
73
|
+
@jar = 'hadoop-pie.jar'
|
|
74
|
+
@main_class = 'com.java.package.DoItLive'
|
|
75
|
+
env 'e1', 'dudio'
|
|
76
|
+
env 'e2', 'dudi-rio'
|
|
77
|
+
file 'phile', 'f'
|
|
78
|
+
conf 'c1', 'conf'
|
|
79
|
+
conf 'c2', 'cronf'
|
|
80
|
+
arg 'seriously'
|
|
81
|
+
archive 'dungeon', 'd'
|
|
82
|
+
file 'phile2', 'f2'
|
|
83
|
+
end
|
|
84
|
+
assert_equal "hadoop jar hadoop-pie.jar com.java.package.DoItLive -Dc1=conf -Dc2=cronf -cmdenv e1=dudio -cmdenv e2=dudi-rio -cacheFile phile#f -cacheFile phile2#f2 -cacheArchive dungeon#d seriously", call.to_hadoop_cli.join(' ')
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def test_validation
|
|
88
|
+
assert_raises RuntimeError do
|
|
89
|
+
CallJava.new_java_call() do
|
|
90
|
+
@main_class = 'com.java.package.DoItLive'
|
|
91
|
+
end.to_hadoop_cli
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
assert_raises RuntimeError do
|
|
95
|
+
CallJava.new_java_call() do
|
|
96
|
+
@main_class = 'com.java.package.DoItLive'
|
|
97
|
+
end.to_h
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def test_hash
|
|
102
|
+
call = CallJava.new_java_call() do
|
|
103
|
+
@jar = 'hadoop-pie.jar'
|
|
104
|
+
@main_class = 'com.java.package.DoItLive'
|
|
105
|
+
env 'e1', 'dudio'
|
|
106
|
+
env 'e2', 'dudi-rio'
|
|
107
|
+
file 'phile', 'f'
|
|
108
|
+
conf 'c1', 'conf'
|
|
109
|
+
conf 'c2', 'cronf'
|
|
110
|
+
arg 'seriously'
|
|
111
|
+
archive 'dungeon', 'd'
|
|
112
|
+
file 'phile', 'f'
|
|
113
|
+
@poopsy = 'poo'
|
|
114
|
+
end
|
|
115
|
+
assert_equal Hash[jar: "hadoop-pie.jar", archives: ["dungeon#d"],
|
|
116
|
+
main_class: "com.java.package.DoItLive", envs: {e1: "dudio", e2: "dudi-rio"},
|
|
117
|
+
files: ["phile#f", "phile#f"], confs: {c1: "conf", c2: "cronf"}, args: ["seriously"]], call.to_h
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
|
|
3
|
+
module Rubadoop
|
|
4
|
+
module MapReduce
|
|
5
|
+
|
|
6
|
+
class CallStreamingTest < MiniTest::Spec
|
|
7
|
+
|
|
8
|
+
def test_simple
|
|
9
|
+
streaming = CallStreaming.new_streaming_call do |s|
|
|
10
|
+
s.jar = 'hadoop-streaming.jar'
|
|
11
|
+
s.input = 'input'
|
|
12
|
+
s.output = 'output'
|
|
13
|
+
s.mapper = '/bin/cat'
|
|
14
|
+
s.reducer = '/bin/wc'
|
|
15
|
+
end
|
|
16
|
+
assert_equal "hadoop jar hadoop-streaming.jar -input input -output output -mapper /bin/cat -reducer /bin/wc", streaming.to_hadoop_cli.join(' ')
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_hash
|
|
20
|
+
streaming = CallStreaming.new_streaming_call do |s|
|
|
21
|
+
s.jar = 'hadoop-streaming.jar'
|
|
22
|
+
s.inputformat = 'green'
|
|
23
|
+
s.input = 'input'
|
|
24
|
+
s.outputformat = 'blue'
|
|
25
|
+
s.output = 'output'
|
|
26
|
+
s.mapper = '/bin/cat'
|
|
27
|
+
s.reducer = '/bin/wc'
|
|
28
|
+
@test = 'noshow'
|
|
29
|
+
end
|
|
30
|
+
assert_equal Hash[jar: "hadoop-streaming.jar", main_class: nil, envs: nil, args: nil, confs: nil, files: nil, archives: nil,
|
|
31
|
+
mapper: "/bin/cat", reducer: "/bin/wc", inputformat: "green", input: "input", outputformat: "blue", output: "output"], streaming.to_h
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def test_cli
|
|
35
|
+
streaming = CallStreaming.new_streaming_call do |s|
|
|
36
|
+
s.jar = 'hadoop-streaming.jar'
|
|
37
|
+
s.inputformat = 'green'
|
|
38
|
+
s.input = 'input'
|
|
39
|
+
s.outputformat = 'blue'
|
|
40
|
+
s.output = 'output'
|
|
41
|
+
s.mapper = '/bin/cat'
|
|
42
|
+
s.reducer = '/bin/wc'
|
|
43
|
+
@test = 'noshow'
|
|
44
|
+
end
|
|
45
|
+
assert_equal ["hadoop", "jar", "hadoop-streaming.jar", "-inputformat", "green", "-input", "input", "-outputformat", "blue",
|
|
46
|
+
"-output", "output", "-mapper", "/bin/cat", "-reducer", "/bin/wc"], streaming.to_hadoop_cli
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_validation
|
|
50
|
+
assert_raises RuntimeError do
|
|
51
|
+
CallStreaming.new_streaming_call do |s|
|
|
52
|
+
s.jar = 'hadoop-streaming.jar'
|
|
53
|
+
s.input = 'input'
|
|
54
|
+
s.output = 'output'
|
|
55
|
+
s.reducer = '/bin/wc'
|
|
56
|
+
end.to_hadoop_cli
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
assert_raises RuntimeError do
|
|
60
|
+
CallStreaming.new_streaming_call() do
|
|
61
|
+
@jar = 'hadoop-streaming.jar'
|
|
62
|
+
@input = 'input'
|
|
63
|
+
@mapper = '/bin/cat'
|
|
64
|
+
@reducer = '/bin/wc'
|
|
65
|
+
end.to_h
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def test_validation_inherited
|
|
70
|
+
assert_raises RuntimeError do
|
|
71
|
+
CallStreaming.new_streaming_call() do |s|
|
|
72
|
+
s.input = 'input'
|
|
73
|
+
s.output = 'output'
|
|
74
|
+
s.mapper = '/bin/cat'
|
|
75
|
+
s.reducer = '/bin/wc'
|
|
76
|
+
end.to_hadoop_cli
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|