elasticity 1.0.1 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +2 -0
- data/HISTORY.mediawiki +12 -0
- data/README.mediawiki +20 -2
- data/elasticity.gemspec +3 -0
- data/lib/elasticity.rb +1 -0
- data/lib/elasticity/hive_job.rb +85 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/hive_job_spec.rb +77 -0
- metadata +57 -11
data/.autotest
ADDED
data/HISTORY.mediawiki
ADDED
data/README.mediawiki
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently wrap the API operations in a manner that makes working with EMR job flows from Ruby more enjoyable. At the very least, using Elasticity allows you to easily experiment with the EMR API :)
|
1
|
+
Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently wrap the API operations in a manner that makes working with EMR job flows from Ruby more productive and more enjoyable, without having to understand the nuts and bolts of the EMR REST API. At the very least, using Elasticity allows you to easily experiment with the EMR API :)
|
2
2
|
|
3
3
|
'''BACKLOG''': Have a look at the [https://www.pivotaltracker.com/projects/272429 backlog] to see where this is headed.
|
4
4
|
|
@@ -12,7 +12,25 @@ Elasticity provides programmatic access to Amazon's Elastic Map Reduce service.
|
|
12
12
|
|
13
13
|
All you have to do is <code>require 'elasticity'</code> and you're all set!
|
14
14
|
|
15
|
-
= API Reference =
|
15
|
+
= Simplified API Reference =
|
16
|
+
|
17
|
+
Elasticity currently provides simplified access to launching Hive job flows. Eventually I'd like to add several simplified wrappers, but I only need Hive at the moment :)
|
18
|
+
|
19
|
+
== Hive ==
|
20
|
+
|
21
|
+
HiveJob allows you to quickly launch Hive jobs without having to understand the ins and outs of the EMR API. Specify only the Hive script location and (optionally) variables to make available to the Hive script.
|
22
|
+
|
23
|
+
<pre>
|
24
|
+
hive = Elasticity::HiveJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
|
25
|
+
hive.run("s3n://slif-hive/test.q", {
|
26
|
+
"LIB" => "s3n://slif-test/lib",
|
27
|
+
"OUTPUT" => "s3n://slif-test/output"
|
28
|
+
})
|
29
|
+
|
30
|
+
> "j-129V5AQFMKO1C"
|
31
|
+
</pre>
|
32
|
+
|
33
|
+
= Amazon API Reference =
|
16
34
|
|
17
35
|
Elasticity wraps all of the EMR API calls. Please see the Amazon guide for details on these operations because the default values aren't obvious (e.g. the meaning of <code>DescribeJobFlows</code> without parameters).
|
18
36
|
|
data/elasticity.gemspec
CHANGED
@@ -16,10 +16,13 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.add_dependency("rest-client")
|
17
17
|
s.add_dependency("nokogiri")
|
18
18
|
|
19
|
+
s.add_development_dependency("autotest-fsevent")
|
20
|
+
s.add_development_dependency("autotest-growl")
|
19
21
|
s.add_development_dependency("rake")
|
20
22
|
s.add_development_dependency("rspec", ">= 2.5.0")
|
21
23
|
s.add_development_dependency("vcr", ">= 1.5.1")
|
22
24
|
s.add_development_dependency("webmock", ">= 1.6.2")
|
25
|
+
s.add_development_dependency("ZenTest")
|
23
26
|
|
24
27
|
s.files = `git ls-files`.split("\n")
|
25
28
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/elasticity.rb
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
# HiveJob allows you quickly easily kick off a Hive jobflow without
|
4
|
+
# having to understand the entirety of the EMR API.
|
5
|
+
class HiveJob
|
6
|
+
|
7
|
+
attr_accessor :aws_access_key_id
|
8
|
+
attr_accessor :aws_secret_access_key
|
9
|
+
attr_accessor :ec2_key_name
|
10
|
+
attr_accessor :name
|
11
|
+
attr_accessor :hadoop_version
|
12
|
+
attr_accessor :instance_count
|
13
|
+
attr_accessor :master_instance_type
|
14
|
+
attr_accessor :slave_instance_type
|
15
|
+
|
16
|
+
def initialize(aws_access_key_id, aws_secret_access_key)
|
17
|
+
@aws_access_key_id = aws_access_key_id
|
18
|
+
@aws_secret_access_key = aws_secret_access_key
|
19
|
+
@ec2_key_name = "default"
|
20
|
+
@hadoop_version = "0.20"
|
21
|
+
@instance_count = 2
|
22
|
+
@master_instance_type = "m1.small"
|
23
|
+
@name = "Elasticity Hive Job"
|
24
|
+
@slave_instance_type = "m1.small"
|
25
|
+
|
26
|
+
@aws_request = Elasticity::AwsRequest.new(aws_access_key_id, aws_secret_access_key)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Run the specified Hive script with the specified variables.
|
30
|
+
#
|
31
|
+
# hive = Elasticity::HiveJob.new("access", "secret")
|
32
|
+
# jobflow_id = hive.run('s3n://slif-hive/test.q', {
|
33
|
+
# 'SCRIPTS' => 's3n://slif-test/scripts',
|
34
|
+
# 'OUTPUT' => 's3n://slif-test/output',
|
35
|
+
# 'XREFS' => 's3n://slif-test/xrefs'
|
36
|
+
# })
|
37
|
+
#
|
38
|
+
# The variables are accessible within your Hive scripts by using the
|
39
|
+
# standard ${NAME} syntax. E.g.
|
40
|
+
#
|
41
|
+
# ADD JAR ${SCRIPTS}/jsonserde.jar;
|
42
|
+
def run(hive_script, hive_variables={})
|
43
|
+
script_arguments = ["s3://elasticmapreduce/libs/hive/hive-script", "--run-hive-script", "--args"]
|
44
|
+
script_arguments.concat(["-f", hive_script])
|
45
|
+
hive_variables.each do |variable_name, value|
|
46
|
+
script_arguments.concat(["-d", "#{variable_name}=#{value}"])
|
47
|
+
end
|
48
|
+
jobflow_config = {
|
49
|
+
:name => @name,
|
50
|
+
:instances => {
|
51
|
+
:ec2_key_name => @ec2_key_name,
|
52
|
+
:hadoop_version => @hadoop_version,
|
53
|
+
:instance_count => @instance_count,
|
54
|
+
:master_instance_type => @master_instance_type,
|
55
|
+
:slave_instance_type => @slave_instance_type,
|
56
|
+
},
|
57
|
+
:steps => [
|
58
|
+
{
|
59
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
60
|
+
:hadoop_jar_step => {
|
61
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
62
|
+
:args => [
|
63
|
+
"s3://elasticmapreduce/libs/hive/hive-script",
|
64
|
+
"--base-path", "s3://elasticmapreduce/libs/hive/",
|
65
|
+
"--install-hive"
|
66
|
+
],
|
67
|
+
},
|
68
|
+
:name => "Setup Hive"
|
69
|
+
},
|
70
|
+
{
|
71
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
72
|
+
:hadoop_jar_step => {
|
73
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
74
|
+
:args => script_arguments,
|
75
|
+
},
|
76
|
+
:name => "Run Hive Script"
|
77
|
+
}
|
78
|
+
]
|
79
|
+
}
|
80
|
+
@aws_request.run_job_flow(jobflow_config)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
data/lib/elasticity/version.rb
CHANGED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Elasticity::HiveJob do
|
4
|
+
|
5
|
+
describe ".new" do
|
6
|
+
|
7
|
+
it "should have good defaults" do
|
8
|
+
hive = Elasticity::HiveJob.new("access", "secret")
|
9
|
+
hive.aws_access_key_id.should == "access"
|
10
|
+
hive.aws_secret_access_key.should == "secret"
|
11
|
+
hive.ec2_key_name.should == "default"
|
12
|
+
hive.hadoop_version.should == "0.20"
|
13
|
+
hive.instance_count.should == 2
|
14
|
+
hive.master_instance_type.should == "m1.small"
|
15
|
+
hive.name.should == "Elasticity Hive Job"
|
16
|
+
hive.slave_instance_type.should == "m1.small"
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "#run" do
|
22
|
+
|
23
|
+
it "run the script with the specified variables and return the jobflow_id" do
|
24
|
+
aws = Elasticity::AwsRequest.new("", "")
|
25
|
+
aws.should_receive(:run_job_flow).with({
|
26
|
+
:name => "Elasticity Hive Job",
|
27
|
+
:instances => {
|
28
|
+
:ec2_key_name => "default",
|
29
|
+
:hadoop_version => "0.20",
|
30
|
+
:instance_count => 2,
|
31
|
+
:master_instance_type => "m1.small",
|
32
|
+
:slave_instance_type => "m1.small",
|
33
|
+
},
|
34
|
+
:steps => [
|
35
|
+
{
|
36
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
37
|
+
:hadoop_jar_step => {
|
38
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
39
|
+
:args => [
|
40
|
+
"s3://elasticmapreduce/libs/hive/hive-script",
|
41
|
+
"--base-path",
|
42
|
+
"s3://elasticmapreduce/libs/hive/",
|
43
|
+
"--install-hive"
|
44
|
+
],
|
45
|
+
},
|
46
|
+
:name => "Setup Hive"
|
47
|
+
},
|
48
|
+
{
|
49
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
50
|
+
:hadoop_jar_step => {
|
51
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
52
|
+
:args => [
|
53
|
+
"s3://elasticmapreduce/libs/hive/hive-script",
|
54
|
+
"--run-hive-script",
|
55
|
+
"--args",
|
56
|
+
"-f", "s3n://slif-hive/test.q",
|
57
|
+
"-d", "XREFS=s3n://slif-test/xrefs",
|
58
|
+
"-d", "OUTPUT=s3n://slif-test/output"
|
59
|
+
],
|
60
|
+
},
|
61
|
+
:name => "Run Hive Script"
|
62
|
+
}
|
63
|
+
]
|
64
|
+
}).and_return("new_jobflow_id")
|
65
|
+
Elasticity::AwsRequest.should_receive(:new).with("access", "secret").and_return(aws)
|
66
|
+
|
67
|
+
hive = Elasticity::HiveJob.new("access", "secret")
|
68
|
+
jobflow_id = hive.run('s3n://slif-hive/test.q', {
|
69
|
+
'OUTPUT' => 's3n://slif-test/output',
|
70
|
+
'XREFS' => 's3n://slif-test/xrefs'
|
71
|
+
})
|
72
|
+
jobflow_id.should == "new_jobflow_id"
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
version: 1.
|
9
|
+
version: "1.1"
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Robert Slifka
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2011-04-
|
17
|
+
date: 2011-04-24 00:00:00 -07:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -47,7 +46,7 @@ dependencies:
|
|
47
46
|
type: :runtime
|
48
47
|
version_requirements: *id002
|
49
48
|
- !ruby/object:Gem::Dependency
|
50
|
-
name:
|
49
|
+
name: autotest-fsevent
|
51
50
|
prerelease: false
|
52
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
53
52
|
none: false
|
@@ -61,9 +60,37 @@ dependencies:
|
|
61
60
|
type: :development
|
62
61
|
version_requirements: *id003
|
63
62
|
- !ruby/object:Gem::Dependency
|
64
|
-
name:
|
63
|
+
name: autotest-growl
|
65
64
|
prerelease: false
|
66
65
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: rake
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
type: :development
|
89
|
+
version_requirements: *id005
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: rspec
|
92
|
+
prerelease: false
|
93
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
67
94
|
none: false
|
68
95
|
requirements:
|
69
96
|
- - ">="
|
@@ -75,11 +102,11 @@ dependencies:
|
|
75
102
|
- 0
|
76
103
|
version: 2.5.0
|
77
104
|
type: :development
|
78
|
-
version_requirements: *
|
105
|
+
version_requirements: *id006
|
79
106
|
- !ruby/object:Gem::Dependency
|
80
107
|
name: vcr
|
81
108
|
prerelease: false
|
82
|
-
requirement: &
|
109
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
83
110
|
none: false
|
84
111
|
requirements:
|
85
112
|
- - ">="
|
@@ -91,11 +118,11 @@ dependencies:
|
|
91
118
|
- 1
|
92
119
|
version: 1.5.1
|
93
120
|
type: :development
|
94
|
-
version_requirements: *
|
121
|
+
version_requirements: *id007
|
95
122
|
- !ruby/object:Gem::Dependency
|
96
123
|
name: webmock
|
97
124
|
prerelease: false
|
98
|
-
requirement: &
|
125
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
99
126
|
none: false
|
100
127
|
requirements:
|
101
128
|
- - ">="
|
@@ -107,7 +134,21 @@ dependencies:
|
|
107
134
|
- 2
|
108
135
|
version: 1.6.2
|
109
136
|
type: :development
|
110
|
-
version_requirements: *
|
137
|
+
version_requirements: *id008
|
138
|
+
- !ruby/object:Gem::Dependency
|
139
|
+
name: ZenTest
|
140
|
+
prerelease: false
|
141
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
142
|
+
none: false
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
hash: 3
|
147
|
+
segments:
|
148
|
+
- 0
|
149
|
+
version: "0"
|
150
|
+
type: :development
|
151
|
+
version_requirements: *id009
|
111
152
|
description: Programmatic access to Amazon's Elastic Map Reduce service.
|
112
153
|
email:
|
113
154
|
executables: []
|
@@ -117,10 +158,12 @@ extensions: []
|
|
117
158
|
extra_rdoc_files: []
|
118
159
|
|
119
160
|
files:
|
161
|
+
- .autotest
|
120
162
|
- .gitignore
|
121
163
|
- .rspec
|
122
164
|
- .rvmrc
|
123
165
|
- Gemfile
|
166
|
+
- HISTORY.mediawiki
|
124
167
|
- LICENSE
|
125
168
|
- README.mediawiki
|
126
169
|
- Rakefile
|
@@ -128,6 +171,7 @@ files:
|
|
128
171
|
- lib/elasticity.rb
|
129
172
|
- lib/elasticity/aws_request.rb
|
130
173
|
- lib/elasticity/emr.rb
|
174
|
+
- lib/elasticity/hive_job.rb
|
131
175
|
- lib/elasticity/job_flow.rb
|
132
176
|
- lib/elasticity/job_flow_step.rb
|
133
177
|
- lib/elasticity/version.rb
|
@@ -143,6 +187,7 @@ files:
|
|
143
187
|
- spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml
|
144
188
|
- spec/lib/elasticity/aws_request_spec.rb
|
145
189
|
- spec/lib/elasticity/emr_spec.rb
|
190
|
+
- spec/lib/elasticity/hive_job_spec.rb
|
146
191
|
- spec/lib/elasticity/job_flow_spec.rb
|
147
192
|
- spec/lib/elasticity/job_flow_step_spec.rb
|
148
193
|
- spec/spec_helper.rb
|
@@ -193,6 +238,7 @@ test_files:
|
|
193
238
|
- spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml
|
194
239
|
- spec/lib/elasticity/aws_request_spec.rb
|
195
240
|
- spec/lib/elasticity/emr_spec.rb
|
241
|
+
- spec/lib/elasticity/hive_job_spec.rb
|
196
242
|
- spec/lib/elasticity/job_flow_spec.rb
|
197
243
|
- spec/lib/elasticity/job_flow_step_spec.rb
|
198
244
|
- spec/spec_helper.rb
|