elasticity 1.1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,22 @@
1
- === 1.0 ===
1
+ === 1.2 ===
2
2
 
3
- * Released!
3
+ * Added PigJob!
4
4
 
5
- === 1.0.1 ===
5
+ === 1.1.1 ===
6
6
 
7
- * Added LICENSE.
7
+ * HiveJob critical bug fixed, now it works :)
8
+ * Added log_uri and action_on_failure as options to HiveJob.
9
+ * Added integration tests to HiveJob.
8
10
 
9
11
  === 1.1 ===
10
12
 
11
13
  * Added HiveJob, a simplified way to launch basic Hive job flows.
12
14
  * Added HISTORY.
13
15
 
14
- === 1.1.1 ===
16
+ === 1.0.1 ===
15
17
 
16
- * HiveJob critical bug fixed, now it works :)
17
- * Added log_uri and action_on_failure as options to HiveJob.
18
- * Added integration tests to HiveJob.
18
+ * Added LICENSE.
19
+
20
+ === 1.0 ===
21
+
22
+ * Released!
@@ -14,7 +14,19 @@ All you have to do is <code>require 'elasticity'</code> and you're all set!
14
14
 
15
15
  = Simplified API Reference =
16
16
 
17
- Elasticity currently provides simplified access to launching Hive job flows. Eventually I'd like to add several simplified wrappers, but I only need Hive at the moment :)
17
+ Elasticity currently provides simplified access to launching Hive and Pig job flows, specifying several default values that you may optionally override:
18
+
19
+ <pre>
20
+ @action_on_failure = "TERMINATE_JOB_FLOW"
21
+ @ec2_key_name = "default"
22
+ @hadoop_version = "0.20"
23
+ @instance_count = 2
24
+ @master_instance_type = "m1.small"
25
+ @name = "Elasticity Job"
26
+ @slave_instance_type = "m1.small"
27
+ </pre>
28
+
29
+ These are all accessible from HiveJob and PigJob instances. See the PigJob description for an example.
18
30
 
19
31
  == Hive ==
20
32
 
@@ -30,6 +42,22 @@ HiveJob allows you to quickly launch Hive jobs without having to understand the
30
42
  > "j-129V5AQFMKO1C"
31
43
  </pre>
32
44
 
45
+ == Pig ==
46
+
47
+ Like HiveJob, PigJob allows you to quickly launch Pig jobs :)
48
+
49
+ <pre>
50
+ pig = Elasticity::PigJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
51
+ pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
52
+ pig.ec2_key_name = "slif_dev"
53
+ pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
54
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
55
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
56
+ })
57
+
58
+ > "j-16PZ24OED71C6"
59
+ </pre>
60
+
33
61
  = Amazon API Reference =
34
62
 
35
63
  Elasticity wraps all of the EMR API calls. Please see the Amazon guide for details on these operations because the default values aren't obvious (e.g. the meaning of <code>DescribeJobFlows</code> without parameters).
@@ -5,9 +5,12 @@ require 'nokogiri'
5
5
 
6
6
  require 'elasticity/aws_request'
7
7
  require 'elasticity/emr'
8
- require 'elasticity/hive_job'
9
8
  require 'elasticity/job_flow'
10
9
  require 'elasticity/job_flow_step'
11
10
 
11
+ require 'elasticity/simple_job'
12
+ require 'elasticity/hive_job'
13
+ require 'elasticity/pig_job'
14
+
12
15
  module Elasticity
13
16
  end
@@ -2,31 +2,11 @@ module Elasticity
2
2
 
3
3
  # HiveJob allows you quickly easily kick off a Hive jobflow without
4
4
  # having to understand the entirety of the EMR API.
5
- class HiveJob
6
-
7
- attr_accessor :action_on_failure
8
- attr_accessor :aws_access_key_id
9
- attr_accessor :aws_secret_access_key
10
- attr_accessor :ec2_key_name
11
- attr_accessor :name
12
- attr_accessor :hadoop_version
13
- attr_accessor :instance_count
14
- attr_accessor :log_uri
15
- attr_accessor :master_instance_type
16
- attr_accessor :slave_instance_type
5
+ class HiveJob < Elasticity::SimpleJob
17
6
 
18
7
  def initialize(aws_access_key_id, aws_secret_access_key)
19
- @action_on_failure = "TERMINATE_JOB_FLOW"
20
- @aws_access_key_id = aws_access_key_id
21
- @aws_secret_access_key = aws_secret_access_key
22
- @ec2_key_name = "default"
23
- @hadoop_version = "0.20"
24
- @instance_count = 2
25
- @master_instance_type = "m1.small"
8
+ super
26
9
  @name = "Elasticity Hive Job"
27
- @slave_instance_type = "m1.small"
28
-
29
- @emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
30
10
  end
31
11
 
32
12
  # Run the specified Hive script with the specified variables.
@@ -0,0 +1,67 @@
1
+ module Elasticity
2
+
3
+ class PigJob < Elasticity::SimpleJob
4
+
5
+ def initialize(aws_access_key_id, aws_secret_access_key)
6
+ super
7
+ @name = "Elasticity Pig Job"
8
+ end
9
+
10
+ # Run the specified Pig script with the specified variables.
11
+ #
12
+ # pig = Elasticity::PigJob.new("access", "secret")
13
+ # jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
14
+ # 'SCRIPTS' => 's3n://slif-pig-test/scripts',
15
+ # 'OUTPUT' => 's3n://slif-pig-test/output',
16
+ # 'XREFS' => 's3n://slif-pig-test/xrefs'
17
+ # })
18
+ #
19
+ # The variables are accessible within your Pig scripts by using the
20
+ # standard ${NAME} syntax.
21
+ def run(pig_script, pig_variables={})
22
+ script_arguments = ["s3://elasticmapreduce/libs/pig/pig-script", "--run-pig-script", "--args"]
23
+ pig_variables.keys.sort.each do |variable_name|
24
+ script_arguments.concat(["-p", "#{variable_name}=#{pig_variables[variable_name]}"])
25
+ end
26
+ script_arguments << pig_script
27
+ jobflow_config = {
28
+ :name => @name,
29
+ :instances => {
30
+ :ec2_key_name => @ec2_key_name,
31
+ :hadoop_version => @hadoop_version,
32
+ :instance_count => @instance_count,
33
+ :master_instance_type => @master_instance_type,
34
+ :slave_instance_type => @slave_instance_type,
35
+ },
36
+ :steps => [
37
+ {
38
+ :action_on_failure => "TERMINATE_JOB_FLOW",
39
+ :hadoop_jar_step => {
40
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
41
+ :args => [
42
+ "s3://elasticmapreduce/libs/pig/pig-script",
43
+ "--base-path", "s3://elasticmapreduce/libs/pig/",
44
+ "--install-pig"
45
+ ],
46
+ },
47
+ :name => "Setup Pig"
48
+ },
49
+ {
50
+ :action_on_failure => @action_on_failure,
51
+ :hadoop_jar_step => {
52
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
53
+ :args => script_arguments,
54
+ },
55
+ :name => "Run Pig Script"
56
+ }
57
+ ]
58
+ }
59
+
60
+ jobflow_config.merge!(:log_uri => @log_uri) if @log_uri
61
+
62
+ @emr.run_job_flow(jobflow_config)
63
+ end
64
+
65
+ end
66
+
67
+ end
@@ -0,0 +1,32 @@
1
+ module Elasticity
2
+
3
+ class SimpleJob
4
+
5
+ attr_accessor :action_on_failure
6
+ attr_accessor :aws_access_key_id
7
+ attr_accessor :aws_secret_access_key
8
+ attr_accessor :ec2_key_name
9
+ attr_accessor :name
10
+ attr_accessor :hadoop_version
11
+ attr_accessor :instance_count
12
+ attr_accessor :log_uri
13
+ attr_accessor :master_instance_type
14
+ attr_accessor :slave_instance_type
15
+
16
+ def initialize(aws_access_key_id, aws_secret_access_key)
17
+ @action_on_failure = "TERMINATE_JOB_FLOW"
18
+ @aws_access_key_id = aws_access_key_id
19
+ @aws_secret_access_key = aws_secret_access_key
20
+ @ec2_key_name = "default"
21
+ @hadoop_version = "0.20"
22
+ @instance_count = 2
23
+ @master_instance_type = "m1.small"
24
+ @name = "Elasticity Job"
25
+ @slave_instance_type = "m1.small"
26
+
27
+ @emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = "1.1.1"
2
+ VERSION = "1.2"
3
3
  end
@@ -0,0 +1,35 @@
1
+ ---
2
+ - !ruby/struct:VCR::HTTPInteraction
3
+ request: !ruby/struct:VCR::Request
4
+ method: :get
5
+ uri: !ruby/regexp /^http:\/\/elasticmapreduce\.amazonaws\.com:80\/\?AWSAccessKeyId=AKIAI7HEMMNKGT6VFFSA&Instances\.Ec2KeyName=sharethrough_dev&Instances\.HadoopVersion=0\.20&Instances\.InstanceCount=2&Instances\.MasterInstanceType=m1\.small&Instances\.SlaveInstanceType=m1\.small&LogUri=s3n:\/\/slif-elasticity\/pig-apache\/logs&Name=Elasticity%20Pig%20Job&Operation=RunJobFlow&.*&Steps\.member\.1\.ActionOnFailure=TERMINATE_JOB_FLOW&Steps\.member\.1\.HadoopJarStep\.Args\.member\.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps\.member\.1\.HadoopJarStep\.Args\.member\.2=--base-path&Steps\.member\.1\.HadoopJarStep\.Args\.member\.3=s3:\/\/elasticmapreduce\/libs\/pig\/&Steps\.member\.1\.HadoopJarStep\.Args\.member\.4=--install-pig&Steps\.member\.1\.HadoopJarStep\.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner\.jar&Steps\.member\.1\.Name=Setup%20Pig&Steps\.member\.2\.ActionOnFailure=TERMINATE_JOB_FLOW&Steps\.member\.2\.HadoopJarStep\.Args\.member\.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps\.member\.2\.HadoopJarStep\.Args\.member\.2=--run-pig-script&Steps\.member\.2\.HadoopJarStep\.Args\.member\.3=--args&Steps\.member\.2\.HadoopJarStep\.Args\.member\.4=-p&Steps\.member\.2\.HadoopJarStep\.Args\.member\.5=INPUT=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/input&Steps\.member\.2\.HadoopJarStep\.Args\.member\.6=-p&Steps\.member\.2\.HadoopJarStep\.Args\.member\.7=OUTPUT=s3n:\/\/slif-elasticity\/pig-apache\/output\/2011-05-04&Steps\.member\.2\.HadoopJarStep\.Args\.member\.8=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/do-reports\.pig&Steps\.member\.2\.HadoopJarStep\.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner\.jar&Steps\.member\.2\.Name=Run%20Pig%20Script/
6
+ body:
7
+ headers:
8
+ accept:
9
+ - "*/*; q=0.5, application/xml"
10
+ accept-encoding:
11
+ - gzip, deflate
12
+ response: !ruby/struct:VCR::Response
13
+ status: !ruby/struct:VCR::ResponseStatus
14
+ code: 200
15
+ message: OK
16
+ headers:
17
+ x-amzn-requestid:
18
+ - b237df84-761d-11e0-b625-05a26eeda1d8
19
+ content-type:
20
+ - text/xml
21
+ date:
22
+ - Wed, 04 May 2011 07:11:13 GMT
23
+ content-length:
24
+ - "297"
25
+ body: |
26
+ <RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
27
+ <RunJobFlowResult>
28
+ <JobFlowId>j-16PZ24OED71C6</JobFlowId>
29
+ </RunJobFlowResult>
30
+ <ResponseMetadata>
31
+ <RequestId>b237df84-761d-11e0-b625-05a26eeda1d8</RequestId>
32
+ </ResponseMetadata>
33
+ </RunJobFlowResponse>
34
+
35
+ http_version: "1.1"
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+
3
+ describe Elasticity::PigJob do
4
+
5
+ describe ".new" do
6
+
7
+ it "should have good defaults" do
8
+ hive = Elasticity::PigJob.new("access", "secret")
9
+ hive.aws_access_key_id.should == "access"
10
+ hive.aws_secret_access_key.should == "secret"
11
+ hive.ec2_key_name.should == "default"
12
+ hive.hadoop_version.should == "0.20"
13
+ hive.instance_count.should == 2
14
+ hive.master_instance_type.should == "m1.small"
15
+ hive.name.should == "Elasticity Pig Job"
16
+ hive.slave_instance_type.should == "m1.small"
17
+ hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
18
+ hive.log_uri.should == nil
19
+ end
20
+
21
+ end
22
+
23
+ describe "#run" do
24
+
25
+ it "should run the script with the specified variables and return the jobflow_id" do
26
+ aws = Elasticity::EMR.new("", "")
27
+ aws.should_receive(:run_job_flow).with({
28
+ :name => "Elasticity Pig Job",
29
+ :log_uri => "s3n://slif-test/output/logs",
30
+ :instances => {
31
+ :ec2_key_name => "default",
32
+ :hadoop_version => "0.20",
33
+ :instance_count => 2,
34
+ :master_instance_type => "m1.small",
35
+ :slave_instance_type => "m1.small",
36
+ },
37
+ :steps => [
38
+ {
39
+ :action_on_failure => "TERMINATE_JOB_FLOW",
40
+ :hadoop_jar_step => {
41
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
42
+ :args => [
43
+ "s3://elasticmapreduce/libs/pig/pig-script",
44
+ "--base-path",
45
+ "s3://elasticmapreduce/libs/pig/",
46
+ "--install-pig"
47
+ ],
48
+ },
49
+ :name => "Setup Pig"
50
+ },
51
+ {
52
+ :action_on_failure => "CONTINUE",
53
+ :hadoop_jar_step => {
54
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
55
+ :args => [
56
+ "s3://elasticmapreduce/libs/pig/pig-script",
57
+ "--run-pig-script",
58
+ "--args",
59
+ "-p", "OUTPUT=s3n://slif-pig-test/output",
60
+ "-p", "XREFS=s3n://slif-pig-test/xrefs",
61
+ "s3n://slif-pig-test/test.pig"
62
+ ],
63
+ },
64
+ :name => "Run Pig Script"
65
+ }
66
+ ]
67
+ }).and_return("new_jobflow_id")
68
+ Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
69
+
70
+ pig = Elasticity::PigJob.new("access", "secret")
71
+ pig.log_uri = "s3n://slif-test/output/logs"
72
+ pig.action_on_failure = "CONTINUE"
73
+ jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
74
+ 'OUTPUT' => 's3n://slif-pig-test/output',
75
+ 'XREFS' => 's3n://slif-pig-test/xrefs'
76
+ })
77
+ jobflow_id.should == "new_jobflow_id"
78
+ end
79
+
80
+ end
81
+
82
+ describe "integration happy path" do
83
+ use_vcr_cassette "pig_job/apache_log_reports", :record => :none
84
+ it "should kick off the sample Amazion EMR Pig application" do
85
+ pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
86
+ pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
87
+ pig.ec2_key_name = "sharethrough_dev"
88
+ jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
89
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
90
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
91
+ })
92
+ jobflow_id.should == "j-16PZ24OED71C6"
93
+ end
94
+ end
95
+
96
+ end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 1
9
- - 1
10
- version: 1.1.1
8
+ - 2
9
+ version: "1.2"
11
10
  platform: ruby
12
11
  authors:
13
12
  - Robert Slifka
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2011-04-25 00:00:00 -07:00
17
+ date: 2011-05-04 00:00:00 -07:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -175,6 +174,8 @@ files:
175
174
  - lib/elasticity/hive_job.rb
176
175
  - lib/elasticity/job_flow.rb
177
176
  - lib/elasticity/job_flow_step.rb
177
+ - lib/elasticity/pig_job.rb
178
+ - lib/elasticity/simple_job.rb
178
179
  - lib/elasticity/version.rb
179
180
  - spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml
180
181
  - spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml
@@ -183,6 +184,7 @@ files:
183
184
  - spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml
184
185
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
185
186
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
187
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
186
188
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
187
189
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
188
190
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
@@ -192,6 +194,7 @@ files:
192
194
  - spec/lib/elasticity/hive_job_spec.rb
193
195
  - spec/lib/elasticity/job_flow_spec.rb
194
196
  - spec/lib/elasticity/job_flow_step_spec.rb
197
+ - spec/lib/elasticity/pig_job_spec.rb
195
198
  - spec/spec_helper.rb
196
199
  has_rdoc: true
197
200
  homepage: http://www.github.com/rslifka/elasticity
@@ -235,6 +238,7 @@ test_files:
235
238
  - spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml
236
239
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
237
240
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
241
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
238
242
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
239
243
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
240
244
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
@@ -244,4 +248,5 @@ test_files:
244
248
  - spec/lib/elasticity/hive_job_spec.rb
245
249
  - spec/lib/elasticity/job_flow_spec.rb
246
250
  - spec/lib/elasticity/job_flow_step_spec.rb
251
+ - spec/lib/elasticity/pig_job_spec.rb
247
252
  - spec/spec_helper.rb