elasticity 1.0.1 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.autotest ADDED
@@ -0,0 +1,2 @@
1
+ require 'autotest/fsevent'
2
+ require 'autotest/growl'
data/HISTORY.mediawiki ADDED
@@ -0,0 +1,12 @@
1
+ === 1.0 ===
2
+
3
+ * Released!
4
+
5
+ === 1.0.1 ===
6
+
7
+ * Added LICENSE.
8
+
9
+ === 1.1 ===
10
+
11
+ * Added HiveJob, a simplified way to launch basic Hive job flows.
12
+ * Added HISTORY.
data/README.mediawiki CHANGED
@@ -1,4 +1,4 @@
1
- Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently wrap the API operations in a manner that makes working with EMR job flows from Ruby more enjoyable. At the very least, using Elasticity allows you to easily experiment with the EMR API :)
1
+ Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently wrap the API operations in a manner that makes working with EMR job flows from Ruby more productive and more enjoyable, without having to understand the nuts and bolts of the EMR REST API. At the very least, using Elasticity allows you to easily experiment with the EMR API :)
2
2
 
3
3
  '''BACKLOG''': Have a look at the [https://www.pivotaltracker.com/projects/272429 backlog] to see where this is headed.
4
4
 
@@ -12,7 +12,25 @@ Elasticity provides programmatic access to Amazon's Elastic Map Reduce service.
12
12
 
13
13
  All you have to do is <code>require 'elasticity'</code> and you're all set!
14
14
 
15
- = API Reference =
15
+ = Simplified API Reference =
16
+
17
+ Elasticity currently provides simplified access to launching Hive job flows. Eventually I'd like to add several simplified wrappers, but I only need Hive at the moment :)
18
+
19
+ == Hive ==
20
+
21
+ HiveJob allows you to quickly launch Hive jobs without having to understand the ins and outs of the EMR API. Specify only the Hive script location and (optionally) variables to make available to the Hive script.
22
+
23
+ <pre>
24
+ hive = Elasticity::HiveJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
25
+ hive.run("s3n://slif-hive/test.q", {
26
+ "LIB" => "s3n://slif-test/lib",
27
+ "OUTPUT" => "s3n://slif-test/output"
28
+ })
29
+
30
+ > "j-129V5AQFMKO1C"
31
+ </pre>
32
+
33
+ = Amazon API Reference =
16
34
 
17
35
  Elasticity wraps all of the EMR API calls. Please see the Amazon guide for details on these operations because the default values aren't obvious (e.g. the meaning of <code>DescribeJobFlows</code> without parameters).
18
36
 
data/elasticity.gemspec CHANGED
@@ -16,10 +16,13 @@ Gem::Specification.new do |s|
16
16
  s.add_dependency("rest-client")
17
17
  s.add_dependency("nokogiri")
18
18
 
19
+ s.add_development_dependency("autotest-fsevent")
20
+ s.add_development_dependency("autotest-growl")
19
21
  s.add_development_dependency("rake")
20
22
  s.add_development_dependency("rspec", ">= 2.5.0")
21
23
  s.add_development_dependency("vcr", ">= 1.5.1")
22
24
  s.add_development_dependency("webmock", ">= 1.6.2")
25
+ s.add_development_dependency("ZenTest")
23
26
 
24
27
  s.files = `git ls-files`.split("\n")
25
28
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
data/lib/elasticity.rb CHANGED
@@ -5,6 +5,7 @@ require 'nokogiri'
5
5
 
6
6
  require 'elasticity/aws_request'
7
7
  require 'elasticity/emr'
8
+ require 'elasticity/hive_job'
8
9
  require 'elasticity/job_flow'
9
10
  require 'elasticity/job_flow_step'
10
11
 
@@ -0,0 +1,85 @@
1
+ module Elasticity
2
+
3
+ # HiveJob allows you quickly easily kick off a Hive jobflow without
4
+ # having to understand the entirety of the EMR API.
5
+ class HiveJob
6
+
7
+ attr_accessor :aws_access_key_id
8
+ attr_accessor :aws_secret_access_key
9
+ attr_accessor :ec2_key_name
10
+ attr_accessor :name
11
+ attr_accessor :hadoop_version
12
+ attr_accessor :instance_count
13
+ attr_accessor :master_instance_type
14
+ attr_accessor :slave_instance_type
15
+
16
+ def initialize(aws_access_key_id, aws_secret_access_key)
17
+ @aws_access_key_id = aws_access_key_id
18
+ @aws_secret_access_key = aws_secret_access_key
19
+ @ec2_key_name = "default"
20
+ @hadoop_version = "0.20"
21
+ @instance_count = 2
22
+ @master_instance_type = "m1.small"
23
+ @name = "Elasticity Hive Job"
24
+ @slave_instance_type = "m1.small"
25
+
26
+ @aws_request = Elasticity::AwsRequest.new(aws_access_key_id, aws_secret_access_key)
27
+ end
28
+
29
+ # Run the specified Hive script with the specified variables.
30
+ #
31
+ # hive = Elasticity::HiveJob.new("access", "secret")
32
+ # jobflow_id = hive.run('s3n://slif-hive/test.q', {
33
+ # 'SCRIPTS' => 's3n://slif-test/scripts',
34
+ # 'OUTPUT' => 's3n://slif-test/output',
35
+ # 'XREFS' => 's3n://slif-test/xrefs'
36
+ # })
37
+ #
38
+ # The variables are accessible within your Hive scripts by using the
39
+ # standard ${NAME} syntax. E.g.
40
+ #
41
+ # ADD JAR ${SCRIPTS}/jsonserde.jar;
42
+ def run(hive_script, hive_variables={})
43
+ script_arguments = ["s3://elasticmapreduce/libs/hive/hive-script", "--run-hive-script", "--args"]
44
+ script_arguments.concat(["-f", hive_script])
45
+ hive_variables.each do |variable_name, value|
46
+ script_arguments.concat(["-d", "#{variable_name}=#{value}"])
47
+ end
48
+ jobflow_config = {
49
+ :name => @name,
50
+ :instances => {
51
+ :ec2_key_name => @ec2_key_name,
52
+ :hadoop_version => @hadoop_version,
53
+ :instance_count => @instance_count,
54
+ :master_instance_type => @master_instance_type,
55
+ :slave_instance_type => @slave_instance_type,
56
+ },
57
+ :steps => [
58
+ {
59
+ :action_on_failure => "TERMINATE_JOB_FLOW",
60
+ :hadoop_jar_step => {
61
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
62
+ :args => [
63
+ "s3://elasticmapreduce/libs/hive/hive-script",
64
+ "--base-path", "s3://elasticmapreduce/libs/hive/",
65
+ "--install-hive"
66
+ ],
67
+ },
68
+ :name => "Setup Hive"
69
+ },
70
+ {
71
+ :action_on_failure => "TERMINATE_JOB_FLOW",
72
+ :hadoop_jar_step => {
73
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
74
+ :args => script_arguments,
75
+ },
76
+ :name => "Run Hive Script"
77
+ }
78
+ ]
79
+ }
80
+ @aws_request.run_job_flow(jobflow_config)
81
+ end
82
+
83
+ end
84
+
85
+ end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = "1.0.1"
2
+ VERSION = "1.1"
3
3
  end
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ describe Elasticity::HiveJob do
4
+
5
+ describe ".new" do
6
+
7
+ it "should have good defaults" do
8
+ hive = Elasticity::HiveJob.new("access", "secret")
9
+ hive.aws_access_key_id.should == "access"
10
+ hive.aws_secret_access_key.should == "secret"
11
+ hive.ec2_key_name.should == "default"
12
+ hive.hadoop_version.should == "0.20"
13
+ hive.instance_count.should == 2
14
+ hive.master_instance_type.should == "m1.small"
15
+ hive.name.should == "Elasticity Hive Job"
16
+ hive.slave_instance_type.should == "m1.small"
17
+ end
18
+
19
+ end
20
+
21
+ describe "#run" do
22
+
23
+ it "run the script with the specified variables and return the jobflow_id" do
24
+ aws = Elasticity::AwsRequest.new("", "")
25
+ aws.should_receive(:run_job_flow).with({
26
+ :name => "Elasticity Hive Job",
27
+ :instances => {
28
+ :ec2_key_name => "default",
29
+ :hadoop_version => "0.20",
30
+ :instance_count => 2,
31
+ :master_instance_type => "m1.small",
32
+ :slave_instance_type => "m1.small",
33
+ },
34
+ :steps => [
35
+ {
36
+ :action_on_failure => "TERMINATE_JOB_FLOW",
37
+ :hadoop_jar_step => {
38
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
39
+ :args => [
40
+ "s3://elasticmapreduce/libs/hive/hive-script",
41
+ "--base-path",
42
+ "s3://elasticmapreduce/libs/hive/",
43
+ "--install-hive"
44
+ ],
45
+ },
46
+ :name => "Setup Hive"
47
+ },
48
+ {
49
+ :action_on_failure => "TERMINATE_JOB_FLOW",
50
+ :hadoop_jar_step => {
51
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
52
+ :args => [
53
+ "s3://elasticmapreduce/libs/hive/hive-script",
54
+ "--run-hive-script",
55
+ "--args",
56
+ "-f", "s3n://slif-hive/test.q",
57
+ "-d", "XREFS=s3n://slif-test/xrefs",
58
+ "-d", "OUTPUT=s3n://slif-test/output"
59
+ ],
60
+ },
61
+ :name => "Run Hive Script"
62
+ }
63
+ ]
64
+ }).and_return("new_jobflow_id")
65
+ Elasticity::AwsRequest.should_receive(:new).with("access", "secret").and_return(aws)
66
+
67
+ hive = Elasticity::HiveJob.new("access", "secret")
68
+ jobflow_id = hive.run('s3n://slif-hive/test.q', {
69
+ 'OUTPUT' => 's3n://slif-test/output',
70
+ 'XREFS' => 's3n://slif-test/xrefs'
71
+ })
72
+ jobflow_id.should == "new_jobflow_id"
73
+ end
74
+
75
+ end
76
+
77
+ end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 13
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 0
9
8
  - 1
10
- version: 1.0.1
9
+ version: "1.1"
11
10
  platform: ruby
12
11
  authors:
13
12
  - Robert Slifka
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2011-04-22 00:00:00 -07:00
17
+ date: 2011-04-24 00:00:00 -07:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -47,7 +46,7 @@ dependencies:
47
46
  type: :runtime
48
47
  version_requirements: *id002
49
48
  - !ruby/object:Gem::Dependency
50
- name: rake
49
+ name: autotest-fsevent
51
50
  prerelease: false
52
51
  requirement: &id003 !ruby/object:Gem::Requirement
53
52
  none: false
@@ -61,9 +60,37 @@ dependencies:
61
60
  type: :development
62
61
  version_requirements: *id003
63
62
  - !ruby/object:Gem::Dependency
64
- name: rspec
63
+ name: autotest-growl
65
64
  prerelease: false
66
65
  requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: rake
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ type: :development
89
+ version_requirements: *id005
90
+ - !ruby/object:Gem::Dependency
91
+ name: rspec
92
+ prerelease: false
93
+ requirement: &id006 !ruby/object:Gem::Requirement
67
94
  none: false
68
95
  requirements:
69
96
  - - ">="
@@ -75,11 +102,11 @@ dependencies:
75
102
  - 0
76
103
  version: 2.5.0
77
104
  type: :development
78
- version_requirements: *id004
105
+ version_requirements: *id006
79
106
  - !ruby/object:Gem::Dependency
80
107
  name: vcr
81
108
  prerelease: false
82
- requirement: &id005 !ruby/object:Gem::Requirement
109
+ requirement: &id007 !ruby/object:Gem::Requirement
83
110
  none: false
84
111
  requirements:
85
112
  - - ">="
@@ -91,11 +118,11 @@ dependencies:
91
118
  - 1
92
119
  version: 1.5.1
93
120
  type: :development
94
- version_requirements: *id005
121
+ version_requirements: *id007
95
122
  - !ruby/object:Gem::Dependency
96
123
  name: webmock
97
124
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
125
+ requirement: &id008 !ruby/object:Gem::Requirement
99
126
  none: false
100
127
  requirements:
101
128
  - - ">="
@@ -107,7 +134,21 @@ dependencies:
107
134
  - 2
108
135
  version: 1.6.2
109
136
  type: :development
110
- version_requirements: *id006
137
+ version_requirements: *id008
138
+ - !ruby/object:Gem::Dependency
139
+ name: ZenTest
140
+ prerelease: false
141
+ requirement: &id009 !ruby/object:Gem::Requirement
142
+ none: false
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ hash: 3
147
+ segments:
148
+ - 0
149
+ version: "0"
150
+ type: :development
151
+ version_requirements: *id009
111
152
  description: Programmatic access to Amazon's Elastic Map Reduce service.
112
153
  email:
113
154
  executables: []
@@ -117,10 +158,12 @@ extensions: []
117
158
  extra_rdoc_files: []
118
159
 
119
160
  files:
161
+ - .autotest
120
162
  - .gitignore
121
163
  - .rspec
122
164
  - .rvmrc
123
165
  - Gemfile
166
+ - HISTORY.mediawiki
124
167
  - LICENSE
125
168
  - README.mediawiki
126
169
  - Rakefile
@@ -128,6 +171,7 @@ files:
128
171
  - lib/elasticity.rb
129
172
  - lib/elasticity/aws_request.rb
130
173
  - lib/elasticity/emr.rb
174
+ - lib/elasticity/hive_job.rb
131
175
  - lib/elasticity/job_flow.rb
132
176
  - lib/elasticity/job_flow_step.rb
133
177
  - lib/elasticity/version.rb
@@ -143,6 +187,7 @@ files:
143
187
  - spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml
144
188
  - spec/lib/elasticity/aws_request_spec.rb
145
189
  - spec/lib/elasticity/emr_spec.rb
190
+ - spec/lib/elasticity/hive_job_spec.rb
146
191
  - spec/lib/elasticity/job_flow_spec.rb
147
192
  - spec/lib/elasticity/job_flow_step_spec.rb
148
193
  - spec/spec_helper.rb
@@ -193,6 +238,7 @@ test_files:
193
238
  - spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml
194
239
  - spec/lib/elasticity/aws_request_spec.rb
195
240
  - spec/lib/elasticity/emr_spec.rb
241
+ - spec/lib/elasticity/hive_job_spec.rb
196
242
  - spec/lib/elasticity/job_flow_spec.rb
197
243
  - spec/lib/elasticity/job_flow_step_spec.rb
198
244
  - spec/spec_helper.rb