elasticity 1.1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,18 +1,22 @@
1
- === 1.0 ===
1
+ === 1.2 ===
2
2
 
3
- * Released!
3
+ * Added PigJob!
4
4
 
5
- === 1.0.1 ===
5
+ === 1.1.1 ===
6
6
 
7
- * Added LICENSE.
7
+ * HiveJob critical bug fixed, now it works :)
8
+ * Added log_uri and action_on_failure as options to HiveJob.
9
+ * Added integration tests to HiveJob.
8
10
 
9
11
  === 1.1 ===
10
12
 
11
13
  * Added HiveJob, a simplified way to launch basic Hive job flows.
12
14
  * Added HISTORY.
13
15
 
14
- === 1.1.1 ===
16
+ === 1.0.1 ===
15
17
 
16
- * HiveJob critical bug fixed, now it works :)
17
- * Added log_uri and action_on_failure as options to HiveJob.
18
- * Added integration tests to HiveJob.
18
+ * Added LICENSE.
19
+
20
+ === 1.0 ===
21
+
22
+ * Released!
@@ -14,7 +14,19 @@ All you have to do is <code>require 'elasticity'</code> and you're all set!
14
14
 
15
15
  = Simplified API Reference =
16
16
 
17
- Elasticity currently provides simplified access to launching Hive job flows. Eventually I'd like to add several simplified wrappers, but I only need Hive at the moment :)
17
+ Elasticity currently provides simplified access to launching Hive and Pig job flows, specifying several default values that you may optionally override:
18
+
19
+ <pre>
20
+ @action_on_failure = "TERMINATE_JOB_FLOW"
21
+ @ec2_key_name = "default"
22
+ @hadoop_version = "0.20"
23
+ @instance_count = 2
24
+ @master_instance_type = "m1.small"
25
+ @name = "Elasticity Job"
26
+ @slave_instance_type = "m1.small"
27
+ </pre>
28
+
29
+ These are all accessible from HiveJob and PigJob instances. See the PigJob description for an example.
18
30
 
19
31
  == Hive ==
20
32
 
@@ -30,6 +42,22 @@ HiveJob allows you to quickly launch Hive jobs without having to understand the
30
42
  > "j-129V5AQFMKO1C"
31
43
  </pre>
32
44
 
45
+ == Pig ==
46
+
47
+ Like HiveJob, PigJob allows you to quickly launch Pig jobs :)
48
+
49
+ <pre>
50
+ pig = Elasticity::PigJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
51
+ pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
52
+ pig.ec2_key_name = "slif_dev"
53
+ pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
54
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
55
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
56
+ })
57
+
58
+ > "j-16PZ24OED71C6"
59
+ </pre>
60
+
33
61
  = Amazon API Reference =
34
62
 
35
63
  Elasticity wraps all of the EMR API calls. Please see the Amazon guide for details on these operations because the default values aren't obvious (e.g. the meaning of <code>DescribeJobFlows</code> without parameters).
@@ -5,9 +5,12 @@ require 'nokogiri'
5
5
 
6
6
  require 'elasticity/aws_request'
7
7
  require 'elasticity/emr'
8
- require 'elasticity/hive_job'
9
8
  require 'elasticity/job_flow'
10
9
  require 'elasticity/job_flow_step'
11
10
 
11
+ require 'elasticity/simple_job'
12
+ require 'elasticity/hive_job'
13
+ require 'elasticity/pig_job'
14
+
12
15
  module Elasticity
13
16
  end
@@ -2,31 +2,11 @@ module Elasticity
2
2
 
3
3
  # HiveJob allows you quickly easily kick off a Hive jobflow without
4
4
  # having to understand the entirety of the EMR API.
5
- class HiveJob
6
-
7
- attr_accessor :action_on_failure
8
- attr_accessor :aws_access_key_id
9
- attr_accessor :aws_secret_access_key
10
- attr_accessor :ec2_key_name
11
- attr_accessor :name
12
- attr_accessor :hadoop_version
13
- attr_accessor :instance_count
14
- attr_accessor :log_uri
15
- attr_accessor :master_instance_type
16
- attr_accessor :slave_instance_type
5
+ class HiveJob < Elasticity::SimpleJob
17
6
 
18
7
  def initialize(aws_access_key_id, aws_secret_access_key)
19
- @action_on_failure = "TERMINATE_JOB_FLOW"
20
- @aws_access_key_id = aws_access_key_id
21
- @aws_secret_access_key = aws_secret_access_key
22
- @ec2_key_name = "default"
23
- @hadoop_version = "0.20"
24
- @instance_count = 2
25
- @master_instance_type = "m1.small"
8
+ super
26
9
  @name = "Elasticity Hive Job"
27
- @slave_instance_type = "m1.small"
28
-
29
- @emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
30
10
  end
31
11
 
32
12
  # Run the specified Hive script with the specified variables.
@@ -0,0 +1,67 @@
1
+ module Elasticity
2
+
3
+ class PigJob < Elasticity::SimpleJob
4
+
5
+ def initialize(aws_access_key_id, aws_secret_access_key)
6
+ super
7
+ @name = "Elasticity Pig Job"
8
+ end
9
+
10
+ # Run the specified Pig script with the specified variables.
11
+ #
12
+ # pig = Elasticity::PigJob.new("access", "secret")
13
+ # jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
14
+ # 'SCRIPTS' => 's3n://slif-pig-test/scripts',
15
+ # 'OUTPUT' => 's3n://slif-pig-test/output',
16
+ # 'XREFS' => 's3n://slif-pig-test/xrefs'
17
+ # })
18
+ #
19
+ # The variables are accessible within your Pig scripts by using the
20
+ # standard ${NAME} syntax.
21
+ def run(pig_script, pig_variables={})
22
+ script_arguments = ["s3://elasticmapreduce/libs/pig/pig-script", "--run-pig-script", "--args"]
23
+ pig_variables.keys.sort.each do |variable_name|
24
+ script_arguments.concat(["-p", "#{variable_name}=#{pig_variables[variable_name]}"])
25
+ end
26
+ script_arguments << pig_script
27
+ jobflow_config = {
28
+ :name => @name,
29
+ :instances => {
30
+ :ec2_key_name => @ec2_key_name,
31
+ :hadoop_version => @hadoop_version,
32
+ :instance_count => @instance_count,
33
+ :master_instance_type => @master_instance_type,
34
+ :slave_instance_type => @slave_instance_type,
35
+ },
36
+ :steps => [
37
+ {
38
+ :action_on_failure => "TERMINATE_JOB_FLOW",
39
+ :hadoop_jar_step => {
40
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
41
+ :args => [
42
+ "s3://elasticmapreduce/libs/pig/pig-script",
43
+ "--base-path", "s3://elasticmapreduce/libs/pig/",
44
+ "--install-pig"
45
+ ],
46
+ },
47
+ :name => "Setup Pig"
48
+ },
49
+ {
50
+ :action_on_failure => @action_on_failure,
51
+ :hadoop_jar_step => {
52
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
53
+ :args => script_arguments,
54
+ },
55
+ :name => "Run Pig Script"
56
+ }
57
+ ]
58
+ }
59
+
60
+ jobflow_config.merge!(:log_uri => @log_uri) if @log_uri
61
+
62
+ @emr.run_job_flow(jobflow_config)
63
+ end
64
+
65
+ end
66
+
67
+ end
@@ -0,0 +1,32 @@
1
+ module Elasticity
2
+
3
+ class SimpleJob
4
+
5
+ attr_accessor :action_on_failure
6
+ attr_accessor :aws_access_key_id
7
+ attr_accessor :aws_secret_access_key
8
+ attr_accessor :ec2_key_name
9
+ attr_accessor :name
10
+ attr_accessor :hadoop_version
11
+ attr_accessor :instance_count
12
+ attr_accessor :log_uri
13
+ attr_accessor :master_instance_type
14
+ attr_accessor :slave_instance_type
15
+
16
+ def initialize(aws_access_key_id, aws_secret_access_key)
17
+ @action_on_failure = "TERMINATE_JOB_FLOW"
18
+ @aws_access_key_id = aws_access_key_id
19
+ @aws_secret_access_key = aws_secret_access_key
20
+ @ec2_key_name = "default"
21
+ @hadoop_version = "0.20"
22
+ @instance_count = 2
23
+ @master_instance_type = "m1.small"
24
+ @name = "Elasticity Job"
25
+ @slave_instance_type = "m1.small"
26
+
27
+ @emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = "1.1.1"
2
+ VERSION = "1.2"
3
3
  end
@@ -0,0 +1,35 @@
1
+ ---
2
+ - !ruby/struct:VCR::HTTPInteraction
3
+ request: !ruby/struct:VCR::Request
4
+ method: :get
5
+ uri: !ruby/regexp /^http:\/\/elasticmapreduce\.amazonaws\.com:80\/\?AWSAccessKeyId=AKIAI7HEMMNKGT6VFFSA&Instances\.Ec2KeyName=sharethrough_dev&Instances\.HadoopVersion=0\.20&Instances\.InstanceCount=2&Instances\.MasterInstanceType=m1\.small&Instances\.SlaveInstanceType=m1\.small&LogUri=s3n:\/\/slif-elasticity\/pig-apache\/logs&Name=Elasticity%20Pig%20Job&Operation=RunJobFlow&.*&Steps\.member\.1\.ActionOnFailure=TERMINATE_JOB_FLOW&Steps\.member\.1\.HadoopJarStep\.Args\.member\.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps\.member\.1\.HadoopJarStep\.Args\.member\.2=--base-path&Steps\.member\.1\.HadoopJarStep\.Args\.member\.3=s3:\/\/elasticmapreduce\/libs\/pig\/&Steps\.member\.1\.HadoopJarStep\.Args\.member\.4=--install-pig&Steps\.member\.1\.HadoopJarStep\.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner\.jar&Steps\.member\.1\.Name=Setup%20Pig&Steps\.member\.2\.ActionOnFailure=TERMINATE_JOB_FLOW&Steps\.member\.2\.HadoopJarStep\.Args\.member\.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps\.member\.2\.HadoopJarStep\.Args\.member\.2=--run-pig-script&Steps\.member\.2\.HadoopJarStep\.Args\.member\.3=--args&Steps\.member\.2\.HadoopJarStep\.Args\.member\.4=-p&Steps\.member\.2\.HadoopJarStep\.Args\.member\.5=INPUT=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/input&Steps\.member\.2\.HadoopJarStep\.Args\.member\.6=-p&Steps\.member\.2\.HadoopJarStep\.Args\.member\.7=OUTPUT=s3n:\/\/slif-elasticity\/pig-apache\/output\/2011-05-04&Steps\.member\.2\.HadoopJarStep\.Args\.member\.8=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/do-reports\.pig&Steps\.member\.2\.HadoopJarStep\.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner\.jar&Steps\.member\.2\.Name=Run%20Pig%20Script/
6
+ body:
7
+ headers:
8
+ accept:
9
+ - "*/*; q=0.5, application/xml"
10
+ accept-encoding:
11
+ - gzip, deflate
12
+ response: !ruby/struct:VCR::Response
13
+ status: !ruby/struct:VCR::ResponseStatus
14
+ code: 200
15
+ message: OK
16
+ headers:
17
+ x-amzn-requestid:
18
+ - b237df84-761d-11e0-b625-05a26eeda1d8
19
+ content-type:
20
+ - text/xml
21
+ date:
22
+ - Wed, 04 May 2011 07:11:13 GMT
23
+ content-length:
24
+ - "297"
25
+ body: |
26
+ <RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
27
+ <RunJobFlowResult>
28
+ <JobFlowId>j-16PZ24OED71C6</JobFlowId>
29
+ </RunJobFlowResult>
30
+ <ResponseMetadata>
31
+ <RequestId>b237df84-761d-11e0-b625-05a26eeda1d8</RequestId>
32
+ </ResponseMetadata>
33
+ </RunJobFlowResponse>
34
+
35
+ http_version: "1.1"
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+
3
+ describe Elasticity::PigJob do
4
+
5
+ describe ".new" do
6
+
7
+ it "should have good defaults" do
8
+ hive = Elasticity::PigJob.new("access", "secret")
9
+ hive.aws_access_key_id.should == "access"
10
+ hive.aws_secret_access_key.should == "secret"
11
+ hive.ec2_key_name.should == "default"
12
+ hive.hadoop_version.should == "0.20"
13
+ hive.instance_count.should == 2
14
+ hive.master_instance_type.should == "m1.small"
15
+ hive.name.should == "Elasticity Pig Job"
16
+ hive.slave_instance_type.should == "m1.small"
17
+ hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
18
+ hive.log_uri.should == nil
19
+ end
20
+
21
+ end
22
+
23
+ describe "#run" do
24
+
25
+ it "should run the script with the specified variables and return the jobflow_id" do
26
+ aws = Elasticity::EMR.new("", "")
27
+ aws.should_receive(:run_job_flow).with({
28
+ :name => "Elasticity Pig Job",
29
+ :log_uri => "s3n://slif-test/output/logs",
30
+ :instances => {
31
+ :ec2_key_name => "default",
32
+ :hadoop_version => "0.20",
33
+ :instance_count => 2,
34
+ :master_instance_type => "m1.small",
35
+ :slave_instance_type => "m1.small",
36
+ },
37
+ :steps => [
38
+ {
39
+ :action_on_failure => "TERMINATE_JOB_FLOW",
40
+ :hadoop_jar_step => {
41
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
42
+ :args => [
43
+ "s3://elasticmapreduce/libs/pig/pig-script",
44
+ "--base-path",
45
+ "s3://elasticmapreduce/libs/pig/",
46
+ "--install-pig"
47
+ ],
48
+ },
49
+ :name => "Setup Pig"
50
+ },
51
+ {
52
+ :action_on_failure => "CONTINUE",
53
+ :hadoop_jar_step => {
54
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
55
+ :args => [
56
+ "s3://elasticmapreduce/libs/pig/pig-script",
57
+ "--run-pig-script",
58
+ "--args",
59
+ "-p", "OUTPUT=s3n://slif-pig-test/output",
60
+ "-p", "XREFS=s3n://slif-pig-test/xrefs",
61
+ "s3n://slif-pig-test/test.pig"
62
+ ],
63
+ },
64
+ :name => "Run Pig Script"
65
+ }
66
+ ]
67
+ }).and_return("new_jobflow_id")
68
+ Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
69
+
70
+ pig = Elasticity::PigJob.new("access", "secret")
71
+ pig.log_uri = "s3n://slif-test/output/logs"
72
+ pig.action_on_failure = "CONTINUE"
73
+ jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
74
+ 'OUTPUT' => 's3n://slif-pig-test/output',
75
+ 'XREFS' => 's3n://slif-pig-test/xrefs'
76
+ })
77
+ jobflow_id.should == "new_jobflow_id"
78
+ end
79
+
80
+ end
81
+
82
+ describe "integration happy path" do
83
+ use_vcr_cassette "pig_job/apache_log_reports", :record => :none
84
+ it "should kick off the sample Amazion EMR Pig application" do
85
+ pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
86
+ pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
87
+ pig.ec2_key_name = "sharethrough_dev"
88
+ jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
89
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
90
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
91
+ })
92
+ jobflow_id.should == "j-16PZ24OED71C6"
93
+ end
94
+ end
95
+
96
+ end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 1
9
- - 1
10
- version: 1.1.1
8
+ - 2
9
+ version: "1.2"
11
10
  platform: ruby
12
11
  authors:
13
12
  - Robert Slifka
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2011-04-25 00:00:00 -07:00
17
+ date: 2011-05-04 00:00:00 -07:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -175,6 +174,8 @@ files:
175
174
  - lib/elasticity/hive_job.rb
176
175
  - lib/elasticity/job_flow.rb
177
176
  - lib/elasticity/job_flow_step.rb
177
+ - lib/elasticity/pig_job.rb
178
+ - lib/elasticity/simple_job.rb
178
179
  - lib/elasticity/version.rb
179
180
  - spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml
180
181
  - spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml
@@ -183,6 +184,7 @@ files:
183
184
  - spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml
184
185
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
185
186
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
187
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
186
188
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
187
189
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
188
190
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
@@ -192,6 +194,7 @@ files:
192
194
  - spec/lib/elasticity/hive_job_spec.rb
193
195
  - spec/lib/elasticity/job_flow_spec.rb
194
196
  - spec/lib/elasticity/job_flow_step_spec.rb
197
+ - spec/lib/elasticity/pig_job_spec.rb
195
198
  - spec/spec_helper.rb
196
199
  has_rdoc: true
197
200
  homepage: http://www.github.com/rslifka/elasticity
@@ -235,6 +238,7 @@ test_files:
235
238
  - spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml
236
239
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
237
240
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
241
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
238
242
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
239
243
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
240
244
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
@@ -244,4 +248,5 @@ test_files:
244
248
  - spec/lib/elasticity/hive_job_spec.rb
245
249
  - spec/lib/elasticity/job_flow_spec.rb
246
250
  - spec/lib/elasticity/job_flow_step_spec.rb
251
+ - spec/lib/elasticity/pig_job_spec.rb
247
252
  - spec/spec_helper.rb