buzzoink 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.rdoc +50 -0
- data/Rakefile +27 -0
- data/config/buzzoink.yml +0 -0
- data/lib/buzzoink/configuration.rb +192 -0
- data/lib/buzzoink/job.rb +273 -0
- data/lib/buzzoink/version.rb +3 -0
- data/lib/buzzoink.rb +54 -0
- data/lib/tasks/buzzoink_tasks.rake +4 -0
- metadata +158 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2012 YOURNAME
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
= Buzzoink
|
2
|
+
|
3
|
+
A very simple interface for Amazon's EMR service. It will start interactive Hive, Pig and Streaming sessions.
|
4
|
+
It only exposes a few pieces of information like state, name and DNS name for the master machine. It also allows only a few
|
5
|
+
machine configurations
|
6
|
+
|
7
|
+
{<img src="https://secure.travis-ci.org/Raybeam/buzzoink.png?branch=master" />}[http://travis-ci.org/Raybeam/buzzoink]
|
8
|
+
|
9
|
+
== Cluster sizes
|
10
|
+
|
11
|
+
[test] Very small, 1 master and 1 slave. It basically for running tests on the gem
|
12
|
+
|
13
|
+
[development] Around 10 slaves. It's for testing small EMR jobs
|
14
|
+
|
15
|
+
[production] At least 20 machines. Up to 60 if the spot reservations come online
|
16
|
+
|
17
|
+
|
18
|
+
== Examples
|
19
|
+
|
20
|
+
=== Configuration
|
21
|
+
|
22
|
+
For a full list of configuration options, check the documentation.
|
23
|
+
|
24
|
+
Buzzoink.configure do | c |
|
25
|
+
c.aws_access_key_id = 'accesskey'
|
26
|
+
c.aws_secret_access_key = 'secretkey'
|
27
|
+
c.instance_settings = :production
|
28
|
+
c.max_machines = 20 # This will be 60 if spot reservations fire
|
29
|
+
c.key_name = 'mysecretkey'
|
30
|
+
c.epoch = DateTime.now.ago(1.hour)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
=== Start a hive instance
|
35
|
+
|
36
|
+
You can also start a Pig or Streaming instance
|
37
|
+
|
38
|
+
hive = Buzzoink::Job.find_or_start_hive
|
39
|
+
begin
|
40
|
+
sleep(20)
|
41
|
+
end until hive.ready?
|
42
|
+
puts "Connect here: #{hive.public_dns}"
|
43
|
+
|
44
|
+
=== Kill all jobs managed by Buzzoink
|
45
|
+
|
46
|
+
This kills jobs of all types (e.g. Hive, Pig, etc)
|
47
|
+
|
48
|
+
Buzzoink::Job.kill_all
|
49
|
+
|
50
|
+
This project uses MIT-LICENSE.
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
begin
|
3
|
+
require 'bundler/setup'
|
4
|
+
rescue LoadError
|
5
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
6
|
+
end
|
7
|
+
begin
|
8
|
+
require 'rdoc/task'
|
9
|
+
rescue LoadError
|
10
|
+
require 'rdoc/rdoc'
|
11
|
+
require 'rake/rdoctask'
|
12
|
+
RDoc::Task = Rake::RDocTask
|
13
|
+
end
|
14
|
+
|
15
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
16
|
+
rdoc.rdoc_dir = 'rdoc'
|
17
|
+
rdoc.title = 'Buzzoink'
|
18
|
+
rdoc.options << '--line-numbers'
|
19
|
+
rdoc.rdoc_files.include('README.rdoc')
|
20
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
Bundler::GemHelper.install_tasks
|
27
|
+
|
data/config/buzzoink.yml
ADDED
File without changes
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require 'fog'
|
2
|
+
|
3
|
+
module Buzzoink
|
4
|
+
class Configuration
|
5
|
+
|
6
|
+
# Sets the prefix of the process name. It's
|
7
|
+
# a way to keep track of all buzzoink processes.
|
8
|
+
# Don't change this unless you know what you're
|
9
|
+
# doing. Use "name=" instead if you want to just
|
10
|
+
# set your process apart.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# Buzzoink.configure do | c |
|
14
|
+
# c.name_prefix = "My favorite hive process"
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# @default Managed Buzzoink process
|
18
|
+
#
|
19
|
+
attr_writer :name_prefix
|
20
|
+
def name_prefix
|
21
|
+
@name_prefix ||= 'Managed Buzzoink process'
|
22
|
+
end
|
23
|
+
|
24
|
+
# Sets the process name.
|
25
|
+
#
|
26
|
+
# @example
|
27
|
+
# Buzzoinks.configure do | c |
|
28
|
+
# c.name = "One off test"
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# @default Main EMR process
|
32
|
+
#
|
33
|
+
attr_writer :name
|
34
|
+
def name
|
35
|
+
@name ||= 'Main EMR process'
|
36
|
+
end
|
37
|
+
|
38
|
+
# Full name of the process
|
39
|
+
def full_name *args
|
40
|
+
suffix_options = args.extract_options!
|
41
|
+
|
42
|
+
"#{name_prefix} : #{name}".tap do | str |
|
43
|
+
unless suffix_options.blank?
|
44
|
+
suffix = " " + suffix_options.map { | k, v | "#{k} => #{v}" }.join(", ")
|
45
|
+
str << suffix
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Sets the a backstop datetime for all EMR
|
51
|
+
# queries. Set it to a time before which
|
52
|
+
# you care nothing about.
|
53
|
+
#
|
54
|
+
# @example
|
55
|
+
# Buzzoink.configure do | c |
|
56
|
+
# c.epoch = DateTime.today.ago(3.weeks)
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# @default One day ago
|
60
|
+
#
|
61
|
+
attr_writer :epoch
|
62
|
+
def epoch
|
63
|
+
@epoch ||= DateTime.now.ago(1.day)
|
64
|
+
@epoch.iso8601
|
65
|
+
end
|
66
|
+
|
67
|
+
# Sets the max number of on demand slaves
|
68
|
+
# for this job flow.
|
69
|
+
# If there are spot instances included
|
70
|
+
# in the deployment, their number will
|
71
|
+
# double this setting
|
72
|
+
#
|
73
|
+
# For example, if you leave the default of 20
|
74
|
+
# set, there will be 20 on demand instances,
|
75
|
+
# 40 spot instances and a master resulting
|
76
|
+
# in 61 total machines.
|
77
|
+
#
|
78
|
+
attr_writer :max_instances
|
79
|
+
def max_instances
|
80
|
+
@max_instances ||= 20
|
81
|
+
end
|
82
|
+
|
83
|
+
attr_accessor :aws_access_key_id
|
84
|
+
attr_accessor :aws_secret_access_key
|
85
|
+
attr_accessor :key_name
|
86
|
+
attr_accessor :key_path
|
87
|
+
attr_accessor :s3_log_location
|
88
|
+
|
89
|
+
def emr
|
90
|
+
@emr ||= Fog::AWS::EMR.new(
|
91
|
+
:aws_access_key_id => @aws_access_key_id,
|
92
|
+
:aws_secret_access_key => @aws_secret_access_key
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
def full_emr_configuration
|
97
|
+
instances = {
|
98
|
+
:ec2_key_name => key_name,
|
99
|
+
:instance_groups => instance_settings_config,
|
100
|
+
:keep_job_flow_alive_when_no_steps => true,
|
101
|
+
:termination_protected => false
|
102
|
+
}
|
103
|
+
|
104
|
+
config = {
|
105
|
+
:ami_version => '2.0', # Latest
|
106
|
+
:instances => instances,
|
107
|
+
}
|
108
|
+
config[:log_uri] = s3_log_location unless s3_log_location.nil?
|
109
|
+
|
110
|
+
Buzzoink.convert_hash_keys(config)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Gets one of the EMR preset configurations
|
114
|
+
# for instance groups. The presets are currently
|
115
|
+
# test and production. Test is a single machine, Production
|
116
|
+
# is 60 machines.
|
117
|
+
#
|
118
|
+
# @example
|
119
|
+
# Buzzoink.configure do | c |
|
120
|
+
# c.instance_settings = :production
|
121
|
+
# end
|
122
|
+
#
|
123
|
+
# @default :test
|
124
|
+
attr_writer :instance_settings
|
125
|
+
def instance_settings *args
|
126
|
+
@instance_settings ||= :test
|
127
|
+
end
|
128
|
+
|
129
|
+
def instance_settings_config
|
130
|
+
meth = "instance_settings_#{instance_settings.to_s}"
|
131
|
+
self.send(meth)
|
132
|
+
end
|
133
|
+
|
134
|
+
def instance_settings_production
|
135
|
+
instance_groups = []
|
136
|
+
|
137
|
+
# Master
|
138
|
+
instance_groups << {
|
139
|
+
:instance_count => 1,
|
140
|
+
:instance_role => 'MASTER',
|
141
|
+
:instance_type => 'm1.small',
|
142
|
+
:market => 'ON_DEMAND',
|
143
|
+
:name => 'Master group'
|
144
|
+
}
|
145
|
+
|
146
|
+
# Core
|
147
|
+
instance_groups << {
|
148
|
+
:instance_count => max_instances,
|
149
|
+
:instance_role => 'CORE',
|
150
|
+
:instance_type => 'c1.medium',
|
151
|
+
:market => 'ON_DEMAND',
|
152
|
+
:name => 'Compute group'
|
153
|
+
}
|
154
|
+
|
155
|
+
# Task :: Spot instances
|
156
|
+
instance_groups << {
|
157
|
+
:instance_count => (max_instances * 2),
|
158
|
+
:bid_price => '0.08',
|
159
|
+
:instance_role => 'TASK',
|
160
|
+
:instance_type => 'c1.medium',
|
161
|
+
:market => 'SPOT',
|
162
|
+
:name => 'Flex group'
|
163
|
+
}
|
164
|
+
|
165
|
+
instance_groups
|
166
|
+
end
|
167
|
+
|
168
|
+
def instance_settings_test
|
169
|
+
instance_groups = []
|
170
|
+
|
171
|
+
# Master
|
172
|
+
instance_groups << {
|
173
|
+
:instance_count => 1,
|
174
|
+
:instance_role => 'MASTER',
|
175
|
+
:instance_type => 'm1.small',
|
176
|
+
:market => 'ON_DEMAND',
|
177
|
+
:name => 'Master group'
|
178
|
+
}
|
179
|
+
|
180
|
+
# Core
|
181
|
+
instance_groups << {
|
182
|
+
:instance_count => 1,
|
183
|
+
:instance_role => 'CORE',
|
184
|
+
:instance_type => 'c1.medium',
|
185
|
+
:market => 'ON_DEMAND',
|
186
|
+
:name => 'Compute group'
|
187
|
+
}
|
188
|
+
|
189
|
+
instance_groups
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/lib/buzzoink/job.rb
ADDED
@@ -0,0 +1,273 @@
|
|
1
|
+
module Buzzoink
|
2
|
+
# A representation of the job in Amazon's EMR system. There are a few dynamic methods
|
3
|
+
# created in this object. The are basically candy backed by another method in this class
|
4
|
+
#
|
5
|
+
# [start_hive] start(:type => :hive)
|
6
|
+
# [start_pig] start(:type => :pig)
|
7
|
+
# [find_or_start_hive] find_or_start(:type => :hive)
|
8
|
+
# [find_or_start_pig] find_or_start(:type => :pig)
|
9
|
+
#
|
10
|
+
class Job
|
11
|
+
attr_reader :job_flow_id
|
12
|
+
attr_reader :body
|
13
|
+
|
14
|
+
READY_STATES = ['RUNNING', 'WAITING']
|
15
|
+
PENDING_STATES = ['STARTING', 'BOOTSTRAPPING']
|
16
|
+
ACTIVE_STATES = READY_STATES + PENDING_STATES
|
17
|
+
|
18
|
+
class << self
|
19
|
+
# Gets an EMR job based on job flow ID
|
20
|
+
# This is not limited to Buzzoink jobs
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# Buzzoink::Job.get 'j-234324325'
|
24
|
+
#
|
25
|
+
#
|
26
|
+
def get job_id
|
27
|
+
begin
|
28
|
+
job_flows = Buzzoink.emr.describe_job_flows('JobFlowIds' => [job_id]).body
|
29
|
+
|
30
|
+
self.new(job_flows['JobFlows'].first)
|
31
|
+
rescue Excon::Errors::Error => e
|
32
|
+
raise Buzzoink::NoJobError if e.message =~ /Specified job flow ID not valid/
|
33
|
+
raise e
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get all EMR jobs created after the epoch. The EMR
|
38
|
+
# jobs are instantiated as Buzzoink::Job objects
|
39
|
+
# Not limited to Buzzoink jobs
|
40
|
+
#
|
41
|
+
# @example
|
42
|
+
# jobs = Buzzoink::Job.get_jobs
|
43
|
+
#
|
44
|
+
#
|
45
|
+
def get_jobs
|
46
|
+
jobs = []
|
47
|
+
raw = Buzzoink.emr.describe_job_flows('CreatedAfter' => Buzzoink.configure.epoch).body
|
48
|
+
raw['JobFlows'].each do | jf |
|
49
|
+
jobs << self.new(jf)
|
50
|
+
end
|
51
|
+
jobs
|
52
|
+
end
|
53
|
+
|
54
|
+
# Gets all jobs managed by Buzzoink. Pass
|
55
|
+
# a :type to get only certain job types
|
56
|
+
#
|
57
|
+
# @example
|
58
|
+
# all_buzz_jobs = Buzzoink::Job.get_managed_jobs
|
59
|
+
# just_hive_jobs = Buzzoink::Job.get_managed_jobs :type => :hive
|
60
|
+
#
|
61
|
+
# @default all Buzzoink jobs
|
62
|
+
def get_managed_jobs *args
|
63
|
+
options = args.extract_options!
|
64
|
+
jobs = get_jobs
|
65
|
+
|
66
|
+
return jobs.select { | j | j.body['Name'] =~ /^#{Buzzoink.configure.name_prefix}/} if options.blank?
|
67
|
+
|
68
|
+
full_name = Buzzoink.configure.full_name(options)
|
69
|
+
jobs.select { | j | j.body['Name'] == full_name}
|
70
|
+
end
|
71
|
+
|
72
|
+
# Gets all active jobs managed by Buzzoink. Pass
|
73
|
+
# a :type to get only certain job types
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# all_buzz_jobs = Buzzoink::Job.get_active_managed_jobs
|
77
|
+
# just_hive_jobs = Buzzoink::Job.get_active_managed_jobs :type => :hive
|
78
|
+
#
|
79
|
+
# @default all active Buzzoink jobs
|
80
|
+
def get_active_managed_jobs *args
|
81
|
+
options = args.extract_options!
|
82
|
+
jobs = get_managed_jobs options
|
83
|
+
|
84
|
+
jobs.select { | j | ACTIVE_STATES.include?(j.body['ExecutionStatusDetail']['State'])}
|
85
|
+
end
|
86
|
+
|
87
|
+
# If a job type is already running, return it. Otherwise start
|
88
|
+
# a new job of that type
|
89
|
+
#
|
90
|
+
# @example
|
91
|
+
# job = Buzzoink::Job.find_or_start :type => :hive
|
92
|
+
#
|
93
|
+
# @default :type => :pig
|
94
|
+
def find_or_start *args
|
95
|
+
options = args.extract_options!
|
96
|
+
options.reverse_merge! :type => :pig
|
97
|
+
|
98
|
+
jobs = get_active_managed_jobs :type => options[:type]
|
99
|
+
return jobs.first unless jobs.blank?
|
100
|
+
|
101
|
+
self.start options
|
102
|
+
end
|
103
|
+
|
104
|
+
# Kill all Buzzoink managed jobs
|
105
|
+
#
|
106
|
+
# @example
|
107
|
+
# Buzzoink::Job.kill_all
|
108
|
+
#
|
109
|
+
def kill_all
|
110
|
+
jobs = get_managed_jobs
|
111
|
+
return false if jobs.blank?
|
112
|
+
|
113
|
+
ids = jobs.map(&:id)
|
114
|
+
kill_jobs ids
|
115
|
+
end
|
116
|
+
|
117
|
+
# Kill jobs based on job flow ID.
|
118
|
+
# This is not limited to Buzzoink jobs
|
119
|
+
#
|
120
|
+
# @example
|
121
|
+
# true_or_false = Buzzoink::Job.kill_jobs 'j-23432432', 'j-54654'
|
122
|
+
#
|
123
|
+
def kill_jobs job_flow_ids
|
124
|
+
# In case a single ID is sent
|
125
|
+
flows = []
|
126
|
+
flows << job_flow_ids
|
127
|
+
|
128
|
+
result = Buzzoink.emr.terminate_job_flows('JobFlowIds' => flows.flatten)
|
129
|
+
!!result.body['RequestId']
|
130
|
+
end
|
131
|
+
|
132
|
+
# Starts a job flow in EMR. This job flow
|
133
|
+
# can either be streaming, hive or pig.
|
134
|
+
#
|
135
|
+
# @example
|
136
|
+
# Buzzoink::Job.start(:type => :hive)
|
137
|
+
#
|
138
|
+
# @default :type => :pig
|
139
|
+
#
|
140
|
+
def start *args
|
141
|
+
options = args.extract_options!
|
142
|
+
options.reverse_merge! :type => :pig
|
143
|
+
|
144
|
+
active_jobs = get_active_managed_jobs :type => options[:type]
|
145
|
+
raise Buzzoink::DuplicateJobError unless active_jobs.blank?
|
146
|
+
|
147
|
+
conf = Buzzoink.configure
|
148
|
+
job = Buzzoink.emr.run_job_flow(conf.full_name(:type => options[:type]), conf.full_emr_configuration)
|
149
|
+
|
150
|
+
# Add appropriate step for interactive job
|
151
|
+
if options[:type] == :hive
|
152
|
+
Buzzoink.emr.add_job_flow_steps(job.body['JobFlowId'], {'Steps' => [hive_step]})
|
153
|
+
elsif options[:type] == :pig
|
154
|
+
Buzzoink.emr.add_job_flow_steps(job.body['JobFlowId'], {'Steps' => [pig_step]})
|
155
|
+
else
|
156
|
+
raise Buzzoink::Error, "Current type is invalid :: #{options[:type]}"
|
157
|
+
end
|
158
|
+
|
159
|
+
get job.body['JobFlowId']
|
160
|
+
end
|
161
|
+
|
162
|
+
# :nodoc:
|
163
|
+
def hive_step
|
164
|
+
{
|
165
|
+
'Name' => 'Hive',
|
166
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
167
|
+
'HadoopJarStep' => {
|
168
|
+
'Jar' => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
169
|
+
'Args' => [
|
170
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
171
|
+
'--base-path', 's3://elasticmapreduce/libs/hive/',
|
172
|
+
'--install-hive',
|
173
|
+
'--hive-versions', '0.7.1.1'
|
174
|
+
]
|
175
|
+
}
|
176
|
+
}
|
177
|
+
end
|
178
|
+
|
179
|
+
# :nodoc:
|
180
|
+
def pig_step
|
181
|
+
{
|
182
|
+
'Name' => 'Pig',
|
183
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
184
|
+
'HadoopJarStep' => {
|
185
|
+
'Jar' => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
186
|
+
'Args' => [
|
187
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
188
|
+
'--base-path', 's3://elasticmapreduce/libs/pig/',
|
189
|
+
'--install-pig'
|
190
|
+
]
|
191
|
+
}
|
192
|
+
}
|
193
|
+
end
|
194
|
+
|
195
|
+
# Some helper methods are included for each type of
|
196
|
+
# job.
|
197
|
+
#
|
198
|
+
# @example
|
199
|
+
# j = Buzzoink::Job.start_hive
|
200
|
+
# j = Buzzoink::Job.find_or_start_pig
|
201
|
+
#
|
202
|
+
[:hive, :pig, :streaming].each do | type |
|
203
|
+
self.class_eval <<-RUBY
|
204
|
+
def start_#{type.to_s}
|
205
|
+
start(:type => :#{type})
|
206
|
+
end
|
207
|
+
|
208
|
+
def find_or_start_#{type.to_s}
|
209
|
+
find_or_start(:type => :#{type})
|
210
|
+
end
|
211
|
+
RUBY
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def initialize body
|
216
|
+
@job_flow_id = body['JobFlowId']
|
217
|
+
@body = body
|
218
|
+
end
|
219
|
+
alias_method :id, :job_flow_id
|
220
|
+
|
221
|
+
# Kills the current job
|
222
|
+
#
|
223
|
+
# @example
|
224
|
+
# job.kill
|
225
|
+
#
|
226
|
+
#
|
227
|
+
def kill
|
228
|
+
self.class.kill_jobs self.id
|
229
|
+
end
|
230
|
+
|
231
|
+
# Pulls the latest description for this
|
232
|
+
# job from EMR
|
233
|
+
def refresh!
|
234
|
+
new_job = self.class.get id
|
235
|
+
@body = new_job.body
|
236
|
+
self
|
237
|
+
end
|
238
|
+
|
239
|
+
# The current boot state of the instance
|
240
|
+
def state
|
241
|
+
body['ExecutionStatusDetail']['State']
|
242
|
+
end
|
243
|
+
|
244
|
+
# Boolean for determining whether the instance
|
245
|
+
# is ready for operation
|
246
|
+
def ready?
|
247
|
+
READY_STATES.include?(state)
|
248
|
+
end
|
249
|
+
|
250
|
+
def steps
|
251
|
+
body['Steps']
|
252
|
+
end
|
253
|
+
|
254
|
+
# Address to use for connection
|
255
|
+
def public_dns
|
256
|
+
body['Instances']['MasterPublicDnsName']
|
257
|
+
end
|
258
|
+
|
259
|
+
# Is this instance protected from termination
|
260
|
+
def termination_protected?
|
261
|
+
body['Instances']['TerminationProtected'] == 'true'
|
262
|
+
end
|
263
|
+
|
264
|
+
# The type of job (e.g. Hive, Pig, etc)
|
265
|
+
def type
|
266
|
+
if body['Name'] =~ /type => (\w+)/
|
267
|
+
return $1.to_sym
|
268
|
+
end
|
269
|
+
|
270
|
+
return nil
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
data/lib/buzzoink.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'active_support/core_ext'
|
2
|
+
|
3
|
+
require 'buzzoink/configuration'
|
4
|
+
require 'buzzoink/job'
|
5
|
+
|
6
|
+
module Buzzoink
|
7
|
+
extend self
|
8
|
+
|
9
|
+
def configure
|
10
|
+
@configuration ||= Configuration.new
|
11
|
+
if block_given?
|
12
|
+
yield @configuration
|
13
|
+
end
|
14
|
+
@configuration
|
15
|
+
end
|
16
|
+
|
17
|
+
def emr
|
18
|
+
configure.emr
|
19
|
+
end
|
20
|
+
|
21
|
+
def clear_configuration
|
22
|
+
@configuration = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Candy so I can use underscore symbol keys
|
27
|
+
#
|
28
|
+
|
29
|
+
# Camel case an underscore symbol
|
30
|
+
def camelcase_key(k)
|
31
|
+
k.to_s.camelize
|
32
|
+
end
|
33
|
+
|
34
|
+
# Recursively set hash keys
|
35
|
+
def convert_hash_keys(value)
|
36
|
+
case value
|
37
|
+
when Array
|
38
|
+
value.map { |v| convert_hash_keys(v) }
|
39
|
+
when Hash
|
40
|
+
Hash[value.map { |k, v| [camelcase_key(k), convert_hash_keys(v)] }]
|
41
|
+
else
|
42
|
+
value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class BuzzoinkError < StandardError
|
47
|
+
end
|
48
|
+
|
49
|
+
class DuplicateJobError < BuzzoinkError
|
50
|
+
end
|
51
|
+
|
52
|
+
class NoJobError < BuzzoinkError
|
53
|
+
end
|
54
|
+
end
|
metadata
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: buzzoink
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.3
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Chris Hagar
|
9
|
+
- Bob Briski
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
|
14
|
+
date: 2012-02-10 00:00:00 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activesupport
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ~>
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "3.0"
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: addressable
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ~>
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "2.2"
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: andand
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "1.3"
|
46
|
+
type: :runtime
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: fog
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ~>
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 1.1.2
|
57
|
+
type: :runtime
|
58
|
+
prerelease: false
|
59
|
+
version_requirements: *id004
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: rspec-rails
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
type: :development
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: *id005
|
71
|
+
- !ruby/object:Gem::Dependency
|
72
|
+
name: webmock
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: "0"
|
79
|
+
type: :development
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: *id006
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: fabrication
|
84
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: "0"
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: *id007
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: vcr
|
95
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: "0"
|
101
|
+
type: :development
|
102
|
+
prerelease: false
|
103
|
+
version_requirements: *id008
|
104
|
+
description: Simple interface for Pig, Hive and Streaming over EMR.
|
105
|
+
email:
|
106
|
+
- chagar@raybeam.com
|
107
|
+
- bbriski@raybeam.com
|
108
|
+
executables: []
|
109
|
+
|
110
|
+
extensions: []
|
111
|
+
|
112
|
+
extra_rdoc_files: []
|
113
|
+
|
114
|
+
files:
|
115
|
+
- config/buzzoink.yml
|
116
|
+
- lib/buzzoink/configuration.rb
|
117
|
+
- lib/buzzoink/job.rb
|
118
|
+
- lib/buzzoink/version.rb
|
119
|
+
- lib/buzzoink.rb
|
120
|
+
- lib/tasks/buzzoink_tasks.rake
|
121
|
+
- MIT-LICENSE
|
122
|
+
- Rakefile
|
123
|
+
- README.rdoc
|
124
|
+
homepage: https://github.com/Raybeam/buzzoink
|
125
|
+
licenses: []
|
126
|
+
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
|
130
|
+
require_paths:
|
131
|
+
- lib
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
+
none: false
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
hash: 3861862602005810237
|
138
|
+
segments:
|
139
|
+
- 0
|
140
|
+
version: "0"
|
141
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
|
+
none: false
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
hash: 3861862602005810237
|
147
|
+
segments:
|
148
|
+
- 0
|
149
|
+
version: "0"
|
150
|
+
requirements: []
|
151
|
+
|
152
|
+
rubyforge_project:
|
153
|
+
rubygems_version: 1.8.15
|
154
|
+
signing_key:
|
155
|
+
specification_version: 3
|
156
|
+
summary: Start hive or pig processes in EMR
|
157
|
+
test_files: []
|
158
|
+
|