elastic-mapreduce 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +51 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +16 -0
- data/LICENSE.txt +393 -0
- data/NOTICE.txt +26 -0
- data/README +1007 -0
- data/Rakefile +35 -0
- data/VERSION +1 -0
- data/bin/elastic-mapreduce +27 -0
- data/cacert.pem +280 -0
- data/elastic-mapreduce.gemspec +104 -0
- data/lib/amazon/aws/exceptions.rb +211 -0
- data/lib/amazon/coral/awsquery.rb +128 -0
- data/lib/amazon/coral/awsquerychainhelper.rb +92 -0
- data/lib/amazon/coral/awsqueryhandler.rb +170 -0
- data/lib/amazon/coral/awsqueryurihandler.rb +34 -0
- data/lib/amazon/coral/call.rb +68 -0
- data/lib/amazon/coral/dispatcher.rb +33 -0
- data/lib/amazon/coral/ec2client.rb +91 -0
- data/lib/amazon/coral/elasticmapreduceclient.rb +198 -0
- data/lib/amazon/coral/handler.rb +20 -0
- data/lib/amazon/coral/httpdelegationhelper.rb +27 -0
- data/lib/amazon/coral/httpdestinationhandler.rb +36 -0
- data/lib/amazon/coral/httphandler.rb +124 -0
- data/lib/amazon/coral/identityhandler.rb +32 -0
- data/lib/amazon/coral/job.rb +25 -0
- data/lib/amazon/coral/logfactory.rb +35 -0
- data/lib/amazon/coral/option.rb +70 -0
- data/lib/amazon/coral/orchestrator.rb +49 -0
- data/lib/amazon/coral/querystringmap.rb +93 -0
- data/lib/amazon/coral/service.rb +130 -0
- data/lib/amazon/coral/simplelog.rb +98 -0
- data/lib/amazon/coral/urlencoding.rb +19 -0
- data/lib/amazon/coral/v0signaturehandler.rb +33 -0
- data/lib/amazon/coral/v0signaturehelper.rb +83 -0
- data/lib/amazon/coral/v1signaturehandler.rb +32 -0
- data/lib/amazon/coral/v1signaturehelper.rb +58 -0
- data/lib/amazon/coral/v2signaturehandler.rb +46 -0
- data/lib/amazon/coral/v2signaturehelper.rb +76 -0
- data/lib/amazon/retry_delegator.rb +66 -0
- data/lib/amazon/stderr_logger.rb +23 -0
- data/lib/client.rb +117 -0
- data/lib/commands.rb +1690 -0
- data/lib/credentials.rb +86 -0
- data/lib/ec2_client_wrapper.rb +73 -0
- data/lib/json/lexer.rb +294 -0
- data/lib/json/objects.rb +200 -0
- data/lib/json.rb +58 -0
- data/lib/simple_executor.rb +11 -0
- data/lib/simple_logger.rb +38 -0
- data/lib/uuidtools/version.rb +32 -0
- data/lib/uuidtools.rb +655 -0
- data/run_tests.rb +8 -0
- data/samples/freebase/code/freebase_jobflow.json +44 -0
- data/samples/similarity/lastfm_jobflow.json +78 -0
- data/samples/wordSplitter.py +18 -0
- data/tests/commands_test.rb +587 -0
- data/tests/credentials.json +7 -0
- data/tests/example.json +14 -0
- metadata +154 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
|
|
4
|
+
require 'amazon/stderr_logger.rb'
|
|
5
|
+
|
|
6
|
+
module Amazon
|
|
7
|
+
|
|
8
|
+
# RetryDelegator
|
|
9
|
+
# this is a wrapper around a client that will retry if exceptions are raised.
|
|
10
|
+
class RetryDelegator
|
|
11
|
+
def initialize(client, options={})
|
|
12
|
+
@client = client
|
|
13
|
+
@log = options[:log] || StdErrLogger.new
|
|
14
|
+
@backoff_seconds = options[:backoff_seconds] || 2
|
|
15
|
+
@backoff_mult = options[:backoff_mult] || 1.5
|
|
16
|
+
@retries = options[:retries] || 8
|
|
17
|
+
@retry_if = options[:retry_if]
|
|
18
|
+
@pass_exceptions = options[:pass_exceptions] || [ScriptError, SignalException, ArgumentError, StandardError]
|
|
19
|
+
@retry_exceptions = options[:retry_exceptions] || [IOError, EOFError, RuntimeError]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def is_retry_exception(e)
|
|
23
|
+
if @retry_exceptions then
|
|
24
|
+
for retry_exception in @retry_exceptions do
|
|
25
|
+
return true if e.is_a?(retry_exception)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
if @pass_exceptions then
|
|
29
|
+
for pass_exception in @pass_exceptions do
|
|
30
|
+
return false if e.is_a?(pass_exception)
|
|
31
|
+
end
|
|
32
|
+
return true
|
|
33
|
+
else
|
|
34
|
+
return false
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def method_missing(method, *args)
|
|
39
|
+
backoff_seconds = @backoff_seconds
|
|
40
|
+
backoff_mult = @backoff_mult
|
|
41
|
+
retries_remaining = @retries
|
|
42
|
+
begin
|
|
43
|
+
response = @client.send(method, *args)
|
|
44
|
+
if @retry_if && @retry_if.call(response) then
|
|
45
|
+
raise "Retriable invalid response returned from #{method}: #{response.inspect}"
|
|
46
|
+
end
|
|
47
|
+
return response
|
|
48
|
+
rescue Exception => e
|
|
49
|
+
if retries_remaining > 0 && is_retry_exception(e) then
|
|
50
|
+
if @log != nil then
|
|
51
|
+
@log.info "Exception #{e} while calling #{method} on #{@client.class}, retrying in #{@backoff_seconds * backoff_mult} seconds."
|
|
52
|
+
end
|
|
53
|
+
sleep(@backoff_seconds * backoff_mult)
|
|
54
|
+
backoff_mult *= 2
|
|
55
|
+
retries_remaining -= 1
|
|
56
|
+
retry
|
|
57
|
+
else
|
|
58
|
+
if @log != nil then
|
|
59
|
+
@log.info "Exception #{e} while calling #{method} on #{@client.class}, failing"
|
|
60
|
+
end
|
|
61
|
+
raise e
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
|
|
4
|
+
module Amazon
|
|
5
|
+
class StdErrLogger
|
|
6
|
+
INFO = { :level => 4, :string => "INFO" }
|
|
7
|
+
|
|
8
|
+
def initialize(level=nil)
|
|
9
|
+
@level = level || INFO[:level]
|
|
10
|
+
@file = STDERR
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def message(level, msg)
|
|
14
|
+
if level[:level] <= @level then
|
|
15
|
+
@file.puts(level[:string] + " " + msg)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def info(*args)
|
|
20
|
+
message(INFO, *args)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
data/lib/client.rb
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
|
|
4
|
+
require 'credentials'
|
|
5
|
+
require 'amazon/retry_delegator'
|
|
6
|
+
require 'amazon/coral/elasticmapreduceclient'
|
|
7
|
+
|
|
8
|
+
class EmrClient
|
|
9
|
+
attr_accessor :commands, :logger, :options
|
|
10
|
+
|
|
11
|
+
def initialize(commands, logger, client_class)
|
|
12
|
+
@commands = commands
|
|
13
|
+
@logger = logger
|
|
14
|
+
@options = commands.global_options
|
|
15
|
+
|
|
16
|
+
@config = {
|
|
17
|
+
:endpoint => @options[:endpoint] || "https://elasticmapreduce.amazonaws.com",
|
|
18
|
+
:ca_file => File.join(File.dirname(__FILE__), "cacert.pem"),
|
|
19
|
+
:aws_access_key => @options[:aws_access_id],
|
|
20
|
+
:aws_secret_key => @options[:aws_secret_key],
|
|
21
|
+
:signature_algorithm => :V2,
|
|
22
|
+
:content_type => 'JSON',
|
|
23
|
+
:verbose => (@options[:verbose] != nil)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
@client = Amazon::RetryDelegator.new(
|
|
27
|
+
client_class.new_aws_query(@config),
|
|
28
|
+
:retry_if => Proc.new { |*opts| self.is_retryable_error_response(*opts) }
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def is_retryable_error_response(response)
|
|
33
|
+
if response == nil then
|
|
34
|
+
false
|
|
35
|
+
else
|
|
36
|
+
ret = false
|
|
37
|
+
if response['Error'] then
|
|
38
|
+
# note: 'Timeout' is not retryable because the operation might have completed just the connection timed out
|
|
39
|
+
ret ||= ['InternalFailure', 'Throttling', 'ServiceUnavailable'].include?(response['Error']['Code'])
|
|
40
|
+
end
|
|
41
|
+
ret
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def is_error_response(response)
|
|
46
|
+
response != nil && response.key?('Error')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def raise_on_error(response)
|
|
50
|
+
if is_error_response(response) then
|
|
51
|
+
raise RuntimeError, response["Error"]["Message"]
|
|
52
|
+
end
|
|
53
|
+
return response
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def describe_jobflow_with_id(jobflow_id)
|
|
57
|
+
logger.trace "DescribeJobFlows('JobFlowIds' => [ #{jobflow_id} ])"
|
|
58
|
+
result = @client.DescribeJobFlows('JobFlowIds' => [ jobflow_id ], 'DescriptionType' => 'EXTENDED')
|
|
59
|
+
logger.trace result.inspect
|
|
60
|
+
raise_on_error(result)
|
|
61
|
+
if result == nil || result['JobFlows'].size() == 0 then
|
|
62
|
+
raise RuntimeError, "Jobflow with id #{jobflow_id} not found"
|
|
63
|
+
end
|
|
64
|
+
return result['JobFlows'].first
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def add_steps(jobflow_id, steps)
|
|
68
|
+
logger.trace "AddJobFlowSteps('JobFlowId' => #{jobflow_id.inspect}, 'Steps' => #{steps.inspect})"
|
|
69
|
+
result = @client.AddJobFlowSteps('JobFlowId' => jobflow_id, 'Steps' => steps)
|
|
70
|
+
logger.trace result.inspect
|
|
71
|
+
return raise_on_error(result)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def run_jobflow(jobflow)
|
|
75
|
+
logger.trace "RunJobFlow(#{jobflow.inspect})"
|
|
76
|
+
result = @client.RunJobFlow(jobflow)
|
|
77
|
+
logger.trace result.inspect
|
|
78
|
+
return raise_on_error(result)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def describe_jobflow(options)
|
|
82
|
+
logger.trace "DescribeJobFlows(#{options.inspect})"
|
|
83
|
+
result = @client.DescribeJobFlows(options.merge('DescriptionType' => 'EXTENDED'))
|
|
84
|
+
logger.trace result.inspect
|
|
85
|
+
return raise_on_error(result)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def set_termination_protection(jobflow_ids, protected)
|
|
89
|
+
logger.trace "SetTerminationProtection('JobFlowIds' => #{jobflow_ids.inspect}, 'TerminationProtected' => #{protected})"
|
|
90
|
+
result = @client.SetTerminationProtection('JobFlowIds' => jobflow_ids, 'TerminationProtected' => protected)
|
|
91
|
+
logger.trace result.inspect
|
|
92
|
+
return raise_on_error(result)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def terminate_jobflows(jobflow_ids)
|
|
96
|
+
logger.trace "TerminateJobFlows('JobFlowIds' => #{jobflow_ids.inspect})"
|
|
97
|
+
result = @client.TerminateJobFlows('JobFlowIds' => jobflow_ids)
|
|
98
|
+
logger.trace result.inspect
|
|
99
|
+
return raise_on_error(result)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def modify_instance_groups(options)
|
|
103
|
+
logger.trace "ModifyInstanceGroups(#{options.inspect})"
|
|
104
|
+
result = @client.ModifyInstanceGroups(options)
|
|
105
|
+
logger.trace result.inspect
|
|
106
|
+
return raise_on_error(result)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def add_instance_groups(options)
|
|
110
|
+
logger.trace "AddInstanceGroups(#{options.inspect})"
|
|
111
|
+
result = @client.AddInstanceGroups(options)
|
|
112
|
+
logger.trace result.inspect
|
|
113
|
+
return raise_on_error(result)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
|