buzzoink 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.rdoc +50 -0
- data/Rakefile +27 -0
- data/config/buzzoink.yml +0 -0
- data/lib/buzzoink/configuration.rb +192 -0
- data/lib/buzzoink/job.rb +273 -0
- data/lib/buzzoink/version.rb +3 -0
- data/lib/buzzoink.rb +54 -0
- data/lib/tasks/buzzoink_tasks.rake +4 -0
- metadata +158 -0
data/MIT-LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright 2012 YOURNAME
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
= Buzzoink
|
|
2
|
+
|
|
3
|
+
A very simple interface for Amazon's EMR service. It will start interactive Hive, Pig and Streaming sessions.
|
|
4
|
+
It only exposes a few pieces of information like state, name and DNS name for the master machine. It also allows only a few
|
|
5
|
+
machine configurations
|
|
6
|
+
|
|
7
|
+
{<img src="https://secure.travis-ci.org/Raybeam/buzzoink.png?branch=master" />}[http://travis-ci.org/Raybeam/buzzoink]
|
|
8
|
+
|
|
9
|
+
== Cluster sizes
|
|
10
|
+
|
|
11
|
+
[test] Very small, 1 master and 1 slave. It basically for running tests on the gem
|
|
12
|
+
|
|
13
|
+
[development] Around 10 slaves. It's for testing small EMR jobs
|
|
14
|
+
|
|
15
|
+
[production] At least 20 machines. Up to 60 if the spot reservations come online
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
== Examples
|
|
19
|
+
|
|
20
|
+
=== Configuration
|
|
21
|
+
|
|
22
|
+
For a full list of configuration options, check the documentation.
|
|
23
|
+
|
|
24
|
+
Buzzoink.configure do | c |
|
|
25
|
+
c.aws_access_key_id = 'accesskey'
|
|
26
|
+
c.aws_secret_access_key = 'secretkey'
|
|
27
|
+
c.instance_settings = :production
|
|
28
|
+
c.max_machines = 20 # This will be 60 if spot reservations fire
|
|
29
|
+
c.key_name = 'mysecretkey'
|
|
30
|
+
c.epoch = DateTime.now.ago(1.hour)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
=== Start a hive instance
|
|
35
|
+
|
|
36
|
+
You can also start a Pig or Streaming instance
|
|
37
|
+
|
|
38
|
+
hive = Buzzoink::Job.find_or_start_hive
|
|
39
|
+
begin
|
|
40
|
+
sleep(20)
|
|
41
|
+
end until hive.ready?
|
|
42
|
+
puts "Connect here: #{hive.public_dns}"
|
|
43
|
+
|
|
44
|
+
=== Kill all jobs managed by Buzzoink
|
|
45
|
+
|
|
46
|
+
This kills jobs of all types (e.g. Hive, Pig, etc)
|
|
47
|
+
|
|
48
|
+
Buzzoink::Job.kill_all
|
|
49
|
+
|
|
50
|
+
This project uses MIT-LICENSE.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env rake
|
|
2
|
+
begin
|
|
3
|
+
require 'bundler/setup'
|
|
4
|
+
rescue LoadError
|
|
5
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
|
6
|
+
end
|
|
7
|
+
begin
|
|
8
|
+
require 'rdoc/task'
|
|
9
|
+
rescue LoadError
|
|
10
|
+
require 'rdoc/rdoc'
|
|
11
|
+
require 'rake/rdoctask'
|
|
12
|
+
RDoc::Task = Rake::RDocTask
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
|
16
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
17
|
+
rdoc.title = 'Buzzoink'
|
|
18
|
+
rdoc.options << '--line-numbers'
|
|
19
|
+
rdoc.rdoc_files.include('README.rdoc')
|
|
20
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Bundler::GemHelper.install_tasks
|
|
27
|
+
|
data/config/buzzoink.yml
ADDED
|
File without changes
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
require 'fog'
|
|
2
|
+
|
|
3
|
+
module Buzzoink
|
|
4
|
+
class Configuration
|
|
5
|
+
|
|
6
|
+
# Sets the prefix of the process name. It's
|
|
7
|
+
# a way to keep track of all buzzoink processes.
|
|
8
|
+
# Don't change this unless you know what you're
|
|
9
|
+
# doing. Use "name=" instead if you want to just
|
|
10
|
+
# set your process apart.
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# Buzzoink.configure do | c |
|
|
14
|
+
# c.name_prefix = "My favorite hive process"
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# @default Managed Buzzoink process
|
|
18
|
+
#
|
|
19
|
+
attr_writer :name_prefix
|
|
20
|
+
def name_prefix
|
|
21
|
+
@name_prefix ||= 'Managed Buzzoink process'
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Sets the process name.
|
|
25
|
+
#
|
|
26
|
+
# @example
|
|
27
|
+
# Buzzoinks.configure do | c |
|
|
28
|
+
# c.name = "One off test"
|
|
29
|
+
# end
|
|
30
|
+
#
|
|
31
|
+
# @default Main EMR process
|
|
32
|
+
#
|
|
33
|
+
attr_writer :name
|
|
34
|
+
def name
|
|
35
|
+
@name ||= 'Main EMR process'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Full name of the process
|
|
39
|
+
def full_name *args
|
|
40
|
+
suffix_options = args.extract_options!
|
|
41
|
+
|
|
42
|
+
"#{name_prefix} : #{name}".tap do | str |
|
|
43
|
+
unless suffix_options.blank?
|
|
44
|
+
suffix = " " + suffix_options.map { | k, v | "#{k} => #{v}" }.join(", ")
|
|
45
|
+
str << suffix
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Sets the a backstop datetime for all EMR
|
|
51
|
+
# queries. Set it to a time before which
|
|
52
|
+
# you care nothing about.
|
|
53
|
+
#
|
|
54
|
+
# @example
|
|
55
|
+
# Buzzoink.configure do | c |
|
|
56
|
+
# c.epoch = DateTime.today.ago(3.weeks)
|
|
57
|
+
# end
|
|
58
|
+
#
|
|
59
|
+
# @default One day ago
|
|
60
|
+
#
|
|
61
|
+
attr_writer :epoch
|
|
62
|
+
def epoch
|
|
63
|
+
@epoch ||= DateTime.now.ago(1.day)
|
|
64
|
+
@epoch.iso8601
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Sets the max number of on demand slaves
|
|
68
|
+
# for this job flow.
|
|
69
|
+
# If there are spot instances included
|
|
70
|
+
# in the deployment, their number will
|
|
71
|
+
# double this setting
|
|
72
|
+
#
|
|
73
|
+
# For example, if you leave the default of 20
|
|
74
|
+
# set, there will be 20 on demand instances,
|
|
75
|
+
# 40 spot instances and a master resulting
|
|
76
|
+
# in 61 total machines.
|
|
77
|
+
#
|
|
78
|
+
attr_writer :max_instances
|
|
79
|
+
def max_instances
|
|
80
|
+
@max_instances ||= 20
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
attr_accessor :aws_access_key_id
|
|
84
|
+
attr_accessor :aws_secret_access_key
|
|
85
|
+
attr_accessor :key_name
|
|
86
|
+
attr_accessor :key_path
|
|
87
|
+
attr_accessor :s3_log_location
|
|
88
|
+
|
|
89
|
+
def emr
|
|
90
|
+
@emr ||= Fog::AWS::EMR.new(
|
|
91
|
+
:aws_access_key_id => @aws_access_key_id,
|
|
92
|
+
:aws_secret_access_key => @aws_secret_access_key
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def full_emr_configuration
|
|
97
|
+
instances = {
|
|
98
|
+
:ec2_key_name => key_name,
|
|
99
|
+
:instance_groups => instance_settings_config,
|
|
100
|
+
:keep_job_flow_alive_when_no_steps => true,
|
|
101
|
+
:termination_protected => false
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
config = {
|
|
105
|
+
:ami_version => '2.0', # Latest
|
|
106
|
+
:instances => instances,
|
|
107
|
+
}
|
|
108
|
+
config[:log_uri] = s3_log_location unless s3_log_location.nil?
|
|
109
|
+
|
|
110
|
+
Buzzoink.convert_hash_keys(config)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Gets one of the EMR preset configurations
|
|
114
|
+
# for instance groups. The presets are currently
|
|
115
|
+
# test and production. Test is a single machine, Production
|
|
116
|
+
# is 60 machines.
|
|
117
|
+
#
|
|
118
|
+
# @example
|
|
119
|
+
# Buzzoink.configure do | c |
|
|
120
|
+
# c.instance_settings = :production
|
|
121
|
+
# end
|
|
122
|
+
#
|
|
123
|
+
# @default :test
|
|
124
|
+
attr_writer :instance_settings
|
|
125
|
+
def instance_settings *args
|
|
126
|
+
@instance_settings ||= :test
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def instance_settings_config
|
|
130
|
+
meth = "instance_settings_#{instance_settings.to_s}"
|
|
131
|
+
self.send(meth)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def instance_settings_production
|
|
135
|
+
instance_groups = []
|
|
136
|
+
|
|
137
|
+
# Master
|
|
138
|
+
instance_groups << {
|
|
139
|
+
:instance_count => 1,
|
|
140
|
+
:instance_role => 'MASTER',
|
|
141
|
+
:instance_type => 'm1.small',
|
|
142
|
+
:market => 'ON_DEMAND',
|
|
143
|
+
:name => 'Master group'
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Core
|
|
147
|
+
instance_groups << {
|
|
148
|
+
:instance_count => max_instances,
|
|
149
|
+
:instance_role => 'CORE',
|
|
150
|
+
:instance_type => 'c1.medium',
|
|
151
|
+
:market => 'ON_DEMAND',
|
|
152
|
+
:name => 'Compute group'
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
# Task :: Spot instances
|
|
156
|
+
instance_groups << {
|
|
157
|
+
:instance_count => (max_instances * 2),
|
|
158
|
+
:bid_price => '0.08',
|
|
159
|
+
:instance_role => 'TASK',
|
|
160
|
+
:instance_type => 'c1.medium',
|
|
161
|
+
:market => 'SPOT',
|
|
162
|
+
:name => 'Flex group'
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
instance_groups
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def instance_settings_test
|
|
169
|
+
instance_groups = []
|
|
170
|
+
|
|
171
|
+
# Master
|
|
172
|
+
instance_groups << {
|
|
173
|
+
:instance_count => 1,
|
|
174
|
+
:instance_role => 'MASTER',
|
|
175
|
+
:instance_type => 'm1.small',
|
|
176
|
+
:market => 'ON_DEMAND',
|
|
177
|
+
:name => 'Master group'
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# Core
|
|
181
|
+
instance_groups << {
|
|
182
|
+
:instance_count => 1,
|
|
183
|
+
:instance_role => 'CORE',
|
|
184
|
+
:instance_type => 'c1.medium',
|
|
185
|
+
:market => 'ON_DEMAND',
|
|
186
|
+
:name => 'Compute group'
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
instance_groups
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
data/lib/buzzoink/job.rb
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
module Buzzoink
|
|
2
|
+
# A representation of the job in Amazon's EMR system. There are a few dynamic methods
|
|
3
|
+
# created in this object. The are basically candy backed by another method in this class
|
|
4
|
+
#
|
|
5
|
+
# [start_hive] start(:type => :hive)
|
|
6
|
+
# [start_pig] start(:type => :pig)
|
|
7
|
+
# [find_or_start_hive] find_or_start(:type => :hive)
|
|
8
|
+
# [find_or_start_pig] find_or_start(:type => :pig)
|
|
9
|
+
#
|
|
10
|
+
class Job
|
|
11
|
+
attr_reader :job_flow_id
|
|
12
|
+
attr_reader :body
|
|
13
|
+
|
|
14
|
+
READY_STATES = ['RUNNING', 'WAITING']
|
|
15
|
+
PENDING_STATES = ['STARTING', 'BOOTSTRAPPING']
|
|
16
|
+
ACTIVE_STATES = READY_STATES + PENDING_STATES
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
# Gets an EMR job based on job flow ID
|
|
20
|
+
# This is not limited to Buzzoink jobs
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# Buzzoink::Job.get 'j-234324325'
|
|
24
|
+
#
|
|
25
|
+
#
|
|
26
|
+
def get job_id
|
|
27
|
+
begin
|
|
28
|
+
job_flows = Buzzoink.emr.describe_job_flows('JobFlowIds' => [job_id]).body
|
|
29
|
+
|
|
30
|
+
self.new(job_flows['JobFlows'].first)
|
|
31
|
+
rescue Excon::Errors::Error => e
|
|
32
|
+
raise Buzzoink::NoJobError if e.message =~ /Specified job flow ID not valid/
|
|
33
|
+
raise e
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Get all EMR jobs created after the epoch. The EMR
|
|
38
|
+
# jobs are instantiated as Buzzoink::Job objects
|
|
39
|
+
# Not limited to Buzzoink jobs
|
|
40
|
+
#
|
|
41
|
+
# @example
|
|
42
|
+
# jobs = Buzzoink::Job.get_jobs
|
|
43
|
+
#
|
|
44
|
+
#
|
|
45
|
+
def get_jobs
|
|
46
|
+
jobs = []
|
|
47
|
+
raw = Buzzoink.emr.describe_job_flows('CreatedAfter' => Buzzoink.configure.epoch).body
|
|
48
|
+
raw['JobFlows'].each do | jf |
|
|
49
|
+
jobs << self.new(jf)
|
|
50
|
+
end
|
|
51
|
+
jobs
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Gets all jobs managed by Buzzoink. Pass
|
|
55
|
+
# a :type to get only certain job types
|
|
56
|
+
#
|
|
57
|
+
# @example
|
|
58
|
+
# all_buzz_jobs = Buzzoink::Job.get_managed_jobs
|
|
59
|
+
# just_hive_jobs = Buzzoink::Job.get_managed_jobs :type => :hive
|
|
60
|
+
#
|
|
61
|
+
# @default all Buzzoink jobs
|
|
62
|
+
def get_managed_jobs *args
|
|
63
|
+
options = args.extract_options!
|
|
64
|
+
jobs = get_jobs
|
|
65
|
+
|
|
66
|
+
return jobs.select { | j | j.body['Name'] =~ /^#{Buzzoink.configure.name_prefix}/} if options.blank?
|
|
67
|
+
|
|
68
|
+
full_name = Buzzoink.configure.full_name(options)
|
|
69
|
+
jobs.select { | j | j.body['Name'] == full_name}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Gets all active jobs managed by Buzzoink. Pass
|
|
73
|
+
# a :type to get only certain job types
|
|
74
|
+
#
|
|
75
|
+
# @example
|
|
76
|
+
# all_buzz_jobs = Buzzoink::Job.get_active_managed_jobs
|
|
77
|
+
# just_hive_jobs = Buzzoink::Job.get_active_managed_jobs :type => :hive
|
|
78
|
+
#
|
|
79
|
+
# @default all active Buzzoink jobs
|
|
80
|
+
def get_active_managed_jobs *args
|
|
81
|
+
options = args.extract_options!
|
|
82
|
+
jobs = get_managed_jobs options
|
|
83
|
+
|
|
84
|
+
jobs.select { | j | ACTIVE_STATES.include?(j.body['ExecutionStatusDetail']['State'])}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# If a job type is already running, return it. Otherwise start
|
|
88
|
+
# a new job of that type
|
|
89
|
+
#
|
|
90
|
+
# @example
|
|
91
|
+
# job = Buzzoink::Job.find_or_start :type => :hive
|
|
92
|
+
#
|
|
93
|
+
# @default :type => :pig
|
|
94
|
+
def find_or_start *args
|
|
95
|
+
options = args.extract_options!
|
|
96
|
+
options.reverse_merge! :type => :pig
|
|
97
|
+
|
|
98
|
+
jobs = get_active_managed_jobs :type => options[:type]
|
|
99
|
+
return jobs.first unless jobs.blank?
|
|
100
|
+
|
|
101
|
+
self.start options
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Kill all Buzzoink managed jobs
|
|
105
|
+
#
|
|
106
|
+
# @example
|
|
107
|
+
# Buzzoink::Job.kill_all
|
|
108
|
+
#
|
|
109
|
+
def kill_all
|
|
110
|
+
jobs = get_managed_jobs
|
|
111
|
+
return false if jobs.blank?
|
|
112
|
+
|
|
113
|
+
ids = jobs.map(&:id)
|
|
114
|
+
kill_jobs ids
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Kill jobs based on job flow ID.
|
|
118
|
+
# This is not limited to Buzzoink jobs
|
|
119
|
+
#
|
|
120
|
+
# @example
|
|
121
|
+
# true_or_false = Buzzoink::Job.kill_jobs 'j-23432432', 'j-54654'
|
|
122
|
+
#
|
|
123
|
+
def kill_jobs job_flow_ids
|
|
124
|
+
# In case a single ID is sent
|
|
125
|
+
flows = []
|
|
126
|
+
flows << job_flow_ids
|
|
127
|
+
|
|
128
|
+
result = Buzzoink.emr.terminate_job_flows('JobFlowIds' => flows.flatten)
|
|
129
|
+
!!result.body['RequestId']
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Starts a job flow in EMR. This job flow
|
|
133
|
+
# can either be streaming, hive or pig.
|
|
134
|
+
#
|
|
135
|
+
# @example
|
|
136
|
+
# Buzzoink::Job.start(:type => :hive)
|
|
137
|
+
#
|
|
138
|
+
# @default :type => :pig
|
|
139
|
+
#
|
|
140
|
+
def start *args
|
|
141
|
+
options = args.extract_options!
|
|
142
|
+
options.reverse_merge! :type => :pig
|
|
143
|
+
|
|
144
|
+
active_jobs = get_active_managed_jobs :type => options[:type]
|
|
145
|
+
raise Buzzoink::DuplicateJobError unless active_jobs.blank?
|
|
146
|
+
|
|
147
|
+
conf = Buzzoink.configure
|
|
148
|
+
job = Buzzoink.emr.run_job_flow(conf.full_name(:type => options[:type]), conf.full_emr_configuration)
|
|
149
|
+
|
|
150
|
+
# Add appropriate step for interactive job
|
|
151
|
+
if options[:type] == :hive
|
|
152
|
+
Buzzoink.emr.add_job_flow_steps(job.body['JobFlowId'], {'Steps' => [hive_step]})
|
|
153
|
+
elsif options[:type] == :pig
|
|
154
|
+
Buzzoink.emr.add_job_flow_steps(job.body['JobFlowId'], {'Steps' => [pig_step]})
|
|
155
|
+
else
|
|
156
|
+
raise Buzzoink::Error, "Current type is invalid :: #{options[:type]}"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
get job.body['JobFlowId']
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# :nodoc:
|
|
163
|
+
def hive_step
|
|
164
|
+
{
|
|
165
|
+
'Name' => 'Hive',
|
|
166
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
|
167
|
+
'HadoopJarStep' => {
|
|
168
|
+
'Jar' => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
|
169
|
+
'Args' => [
|
|
170
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
|
171
|
+
'--base-path', 's3://elasticmapreduce/libs/hive/',
|
|
172
|
+
'--install-hive',
|
|
173
|
+
'--hive-versions', '0.7.1.1'
|
|
174
|
+
]
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# :nodoc:
|
|
180
|
+
def pig_step
|
|
181
|
+
{
|
|
182
|
+
'Name' => 'Pig',
|
|
183
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
|
184
|
+
'HadoopJarStep' => {
|
|
185
|
+
'Jar' => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
|
186
|
+
'Args' => [
|
|
187
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
|
188
|
+
'--base-path', 's3://elasticmapreduce/libs/pig/',
|
|
189
|
+
'--install-pig'
|
|
190
|
+
]
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Some helper methods are included for each type of
|
|
196
|
+
# job.
|
|
197
|
+
#
|
|
198
|
+
# @example
|
|
199
|
+
# j = Buzzoink::Job.start_hive
|
|
200
|
+
# j = Buzzoink::Job.find_or_start_pig
|
|
201
|
+
#
|
|
202
|
+
[:hive, :pig, :streaming].each do | type |
|
|
203
|
+
self.class_eval <<-RUBY
|
|
204
|
+
def start_#{type.to_s}
|
|
205
|
+
start(:type => :#{type})
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def find_or_start_#{type.to_s}
|
|
209
|
+
find_or_start(:type => :#{type})
|
|
210
|
+
end
|
|
211
|
+
RUBY
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def initialize body
|
|
216
|
+
@job_flow_id = body['JobFlowId']
|
|
217
|
+
@body = body
|
|
218
|
+
end
|
|
219
|
+
alias_method :id, :job_flow_id
|
|
220
|
+
|
|
221
|
+
# Kills the current job
|
|
222
|
+
#
|
|
223
|
+
# @example
|
|
224
|
+
# job.kill
|
|
225
|
+
#
|
|
226
|
+
#
|
|
227
|
+
def kill
|
|
228
|
+
self.class.kill_jobs self.id
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Pulls the latest description for this
|
|
232
|
+
# job from EMR
|
|
233
|
+
def refresh!
|
|
234
|
+
new_job = self.class.get id
|
|
235
|
+
@body = new_job.body
|
|
236
|
+
self
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# The current boot state of the instance
|
|
240
|
+
def state
|
|
241
|
+
body['ExecutionStatusDetail']['State']
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Boolean for determining whether the instance
|
|
245
|
+
# is ready for operation
|
|
246
|
+
def ready?
|
|
247
|
+
READY_STATES.include?(state)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def steps
|
|
251
|
+
body['Steps']
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Address to use for connection
|
|
255
|
+
def public_dns
|
|
256
|
+
body['Instances']['MasterPublicDnsName']
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Is this instance protected from termination
|
|
260
|
+
def termination_protected?
|
|
261
|
+
body['Instances']['TerminationProtected'] == 'true'
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# The type of job (e.g. Hive, Pig, etc)
|
|
265
|
+
def type
|
|
266
|
+
if body['Name'] =~ /type => (\w+)/
|
|
267
|
+
return $1.to_sym
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
return nil
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
end
|
data/lib/buzzoink.rb
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'active_support/core_ext'
|
|
2
|
+
|
|
3
|
+
require 'buzzoink/configuration'
|
|
4
|
+
require 'buzzoink/job'
|
|
5
|
+
|
|
6
|
+
module Buzzoink
|
|
7
|
+
extend self
|
|
8
|
+
|
|
9
|
+
def configure
|
|
10
|
+
@configuration ||= Configuration.new
|
|
11
|
+
if block_given?
|
|
12
|
+
yield @configuration
|
|
13
|
+
end
|
|
14
|
+
@configuration
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def emr
|
|
18
|
+
configure.emr
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def clear_configuration
|
|
22
|
+
@configuration = nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
#
|
|
26
|
+
# Candy so I can use underscore symbol keys
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
# Camel case an underscore symbol
|
|
30
|
+
def camelcase_key(k)
|
|
31
|
+
k.to_s.camelize
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Recursively set hash keys
|
|
35
|
+
def convert_hash_keys(value)
|
|
36
|
+
case value
|
|
37
|
+
when Array
|
|
38
|
+
value.map { |v| convert_hash_keys(v) }
|
|
39
|
+
when Hash
|
|
40
|
+
Hash[value.map { |k, v| [camelcase_key(k), convert_hash_keys(v)] }]
|
|
41
|
+
else
|
|
42
|
+
value
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class BuzzoinkError < StandardError
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class DuplicateJobError < BuzzoinkError
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class NoJobError < BuzzoinkError
|
|
53
|
+
end
|
|
54
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: buzzoink
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
prerelease:
|
|
5
|
+
version: 0.0.3
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Chris Hagar
|
|
9
|
+
- Bob Briski
|
|
10
|
+
autorequire:
|
|
11
|
+
bindir: bin
|
|
12
|
+
cert_chain: []
|
|
13
|
+
|
|
14
|
+
date: 2012-02-10 00:00:00 Z
|
|
15
|
+
dependencies:
|
|
16
|
+
- !ruby/object:Gem::Dependency
|
|
17
|
+
name: activesupport
|
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
19
|
+
none: false
|
|
20
|
+
requirements:
|
|
21
|
+
- - ~>
|
|
22
|
+
- !ruby/object:Gem::Version
|
|
23
|
+
version: "3.0"
|
|
24
|
+
type: :runtime
|
|
25
|
+
prerelease: false
|
|
26
|
+
version_requirements: *id001
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: addressable
|
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
30
|
+
none: false
|
|
31
|
+
requirements:
|
|
32
|
+
- - ~>
|
|
33
|
+
- !ruby/object:Gem::Version
|
|
34
|
+
version: "2.2"
|
|
35
|
+
type: :runtime
|
|
36
|
+
prerelease: false
|
|
37
|
+
version_requirements: *id002
|
|
38
|
+
- !ruby/object:Gem::Dependency
|
|
39
|
+
name: andand
|
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
|
41
|
+
none: false
|
|
42
|
+
requirements:
|
|
43
|
+
- - ~>
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: "1.3"
|
|
46
|
+
type: :runtime
|
|
47
|
+
prerelease: false
|
|
48
|
+
version_requirements: *id003
|
|
49
|
+
- !ruby/object:Gem::Dependency
|
|
50
|
+
name: fog
|
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
|
52
|
+
none: false
|
|
53
|
+
requirements:
|
|
54
|
+
- - ~>
|
|
55
|
+
- !ruby/object:Gem::Version
|
|
56
|
+
version: 1.1.2
|
|
57
|
+
type: :runtime
|
|
58
|
+
prerelease: false
|
|
59
|
+
version_requirements: *id004
|
|
60
|
+
- !ruby/object:Gem::Dependency
|
|
61
|
+
name: rspec-rails
|
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
|
63
|
+
none: false
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: "0"
|
|
68
|
+
type: :development
|
|
69
|
+
prerelease: false
|
|
70
|
+
version_requirements: *id005
|
|
71
|
+
- !ruby/object:Gem::Dependency
|
|
72
|
+
name: webmock
|
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
|
74
|
+
none: false
|
|
75
|
+
requirements:
|
|
76
|
+
- - ">="
|
|
77
|
+
- !ruby/object:Gem::Version
|
|
78
|
+
version: "0"
|
|
79
|
+
type: :development
|
|
80
|
+
prerelease: false
|
|
81
|
+
version_requirements: *id006
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: fabrication
|
|
84
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
|
85
|
+
none: false
|
|
86
|
+
requirements:
|
|
87
|
+
- - ">="
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: "0"
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: *id007
|
|
93
|
+
- !ruby/object:Gem::Dependency
|
|
94
|
+
name: vcr
|
|
95
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
|
96
|
+
none: false
|
|
97
|
+
requirements:
|
|
98
|
+
- - ">="
|
|
99
|
+
- !ruby/object:Gem::Version
|
|
100
|
+
version: "0"
|
|
101
|
+
type: :development
|
|
102
|
+
prerelease: false
|
|
103
|
+
version_requirements: *id008
|
|
104
|
+
description: Simple interface for Pig, Hive and Streaming over EMR.
|
|
105
|
+
email:
|
|
106
|
+
- chagar@raybeam.com
|
|
107
|
+
- bbriski@raybeam.com
|
|
108
|
+
executables: []
|
|
109
|
+
|
|
110
|
+
extensions: []
|
|
111
|
+
|
|
112
|
+
extra_rdoc_files: []
|
|
113
|
+
|
|
114
|
+
files:
|
|
115
|
+
- config/buzzoink.yml
|
|
116
|
+
- lib/buzzoink/configuration.rb
|
|
117
|
+
- lib/buzzoink/job.rb
|
|
118
|
+
- lib/buzzoink/version.rb
|
|
119
|
+
- lib/buzzoink.rb
|
|
120
|
+
- lib/tasks/buzzoink_tasks.rake
|
|
121
|
+
- MIT-LICENSE
|
|
122
|
+
- Rakefile
|
|
123
|
+
- README.rdoc
|
|
124
|
+
homepage: https://github.com/Raybeam/buzzoink
|
|
125
|
+
licenses: []
|
|
126
|
+
|
|
127
|
+
post_install_message:
|
|
128
|
+
rdoc_options: []
|
|
129
|
+
|
|
130
|
+
require_paths:
|
|
131
|
+
- lib
|
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
|
+
none: false
|
|
134
|
+
requirements:
|
|
135
|
+
- - ">="
|
|
136
|
+
- !ruby/object:Gem::Version
|
|
137
|
+
hash: 3861862602005810237
|
|
138
|
+
segments:
|
|
139
|
+
- 0
|
|
140
|
+
version: "0"
|
|
141
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
|
+
none: false
|
|
143
|
+
requirements:
|
|
144
|
+
- - ">="
|
|
145
|
+
- !ruby/object:Gem::Version
|
|
146
|
+
hash: 3861862602005810237
|
|
147
|
+
segments:
|
|
148
|
+
- 0
|
|
149
|
+
version: "0"
|
|
150
|
+
requirements: []
|
|
151
|
+
|
|
152
|
+
rubyforge_project:
|
|
153
|
+
rubygems_version: 1.8.15
|
|
154
|
+
signing_key:
|
|
155
|
+
specification_version: 3
|
|
156
|
+
summary: Start hive or pig processes in EMR
|
|
157
|
+
test_files: []
|
|
158
|
+
|