elastic-mapreduce 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. data/CHANGELOG +51 -0
  2. data/Gemfile +13 -0
  3. data/Gemfile.lock +16 -0
  4. data/LICENSE.txt +393 -0
  5. data/NOTICE.txt +26 -0
  6. data/README +1007 -0
  7. data/Rakefile +35 -0
  8. data/VERSION +1 -0
  9. data/bin/elastic-mapreduce +27 -0
  10. data/cacert.pem +280 -0
  11. data/elastic-mapreduce.gemspec +104 -0
  12. data/lib/amazon/aws/exceptions.rb +211 -0
  13. data/lib/amazon/coral/awsquery.rb +128 -0
  14. data/lib/amazon/coral/awsquerychainhelper.rb +92 -0
  15. data/lib/amazon/coral/awsqueryhandler.rb +170 -0
  16. data/lib/amazon/coral/awsqueryurihandler.rb +34 -0
  17. data/lib/amazon/coral/call.rb +68 -0
  18. data/lib/amazon/coral/dispatcher.rb +33 -0
  19. data/lib/amazon/coral/ec2client.rb +91 -0
  20. data/lib/amazon/coral/elasticmapreduceclient.rb +198 -0
  21. data/lib/amazon/coral/handler.rb +20 -0
  22. data/lib/amazon/coral/httpdelegationhelper.rb +27 -0
  23. data/lib/amazon/coral/httpdestinationhandler.rb +36 -0
  24. data/lib/amazon/coral/httphandler.rb +124 -0
  25. data/lib/amazon/coral/identityhandler.rb +32 -0
  26. data/lib/amazon/coral/job.rb +25 -0
  27. data/lib/amazon/coral/logfactory.rb +35 -0
  28. data/lib/amazon/coral/option.rb +70 -0
  29. data/lib/amazon/coral/orchestrator.rb +49 -0
  30. data/lib/amazon/coral/querystringmap.rb +93 -0
  31. data/lib/amazon/coral/service.rb +130 -0
  32. data/lib/amazon/coral/simplelog.rb +98 -0
  33. data/lib/amazon/coral/urlencoding.rb +19 -0
  34. data/lib/amazon/coral/v0signaturehandler.rb +33 -0
  35. data/lib/amazon/coral/v0signaturehelper.rb +83 -0
  36. data/lib/amazon/coral/v1signaturehandler.rb +32 -0
  37. data/lib/amazon/coral/v1signaturehelper.rb +58 -0
  38. data/lib/amazon/coral/v2signaturehandler.rb +46 -0
  39. data/lib/amazon/coral/v2signaturehelper.rb +76 -0
  40. data/lib/amazon/retry_delegator.rb +66 -0
  41. data/lib/amazon/stderr_logger.rb +23 -0
  42. data/lib/client.rb +117 -0
  43. data/lib/commands.rb +1690 -0
  44. data/lib/credentials.rb +86 -0
  45. data/lib/ec2_client_wrapper.rb +73 -0
  46. data/lib/json/lexer.rb +294 -0
  47. data/lib/json/objects.rb +200 -0
  48. data/lib/json.rb +58 -0
  49. data/lib/simple_executor.rb +11 -0
  50. data/lib/simple_logger.rb +38 -0
  51. data/lib/uuidtools/version.rb +32 -0
  52. data/lib/uuidtools.rb +655 -0
  53. data/run_tests.rb +8 -0
  54. data/samples/freebase/code/freebase_jobflow.json +44 -0
  55. data/samples/similarity/lastfm_jobflow.json +78 -0
  56. data/samples/wordSplitter.py +18 -0
  57. data/tests/commands_test.rb +587 -0
  58. data/tests/credentials.json +7 -0
  59. data/tests/example.json +14 -0
  60. metadata +154 -0
@@ -0,0 +1,66 @@
1
+ #
2
+ # Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+
4
+ require 'amazon/stderr_logger.rb'
5
+
6
+ module Amazon
7
+
8
+ # RetryDelegator
9
+ # this is a wrapper around a client that will retry if exceptions are raised.
10
+ class RetryDelegator
11
+ def initialize(client, options={})
12
+ @client = client
13
+ @log = options[:log] || StdErrLogger.new
14
+ @backoff_seconds = options[:backoff_seconds] || 2
15
+ @backoff_mult = options[:backoff_mult] || 1.5
16
+ @retries = options[:retries] || 8
17
+ @retry_if = options[:retry_if]
18
+ @pass_exceptions = options[:pass_exceptions] || [ScriptError, SignalException, ArgumentError, StandardError]
19
+ @retry_exceptions = options[:retry_exceptions] || [IOError, EOFError, RuntimeError]
20
+ end
21
+
22
+ def is_retry_exception(e)
23
+ if @retry_exceptions then
24
+ for retry_exception in @retry_exceptions do
25
+ return true if e.is_a?(retry_exception)
26
+ end
27
+ end
28
+ if @pass_exceptions then
29
+ for pass_exception in @pass_exceptions do
30
+ return false if e.is_a?(pass_exception)
31
+ end
32
+ return true
33
+ else
34
+ return false
35
+ end
36
+ end
37
+
38
+ def method_missing(method, *args)
39
+ backoff_seconds = @backoff_seconds
40
+ backoff_mult = @backoff_mult
41
+ retries_remaining = @retries
42
+ begin
43
+ response = @client.send(method, *args)
44
+ if @retry_if && @retry_if.call(response) then
45
+ raise "Retriable invalid response returned from #{method}: #{response.inspect}"
46
+ end
47
+ return response
48
+ rescue Exception => e
49
+ if retries_remaining > 0 && is_retry_exception(e) then
50
+ if @log != nil then
51
+ @log.info "Exception #{e} while calling #{method} on #{@client.class}, retrying in #{@backoff_seconds * backoff_mult} seconds."
52
+ end
53
+ sleep(@backoff_seconds * backoff_mult)
54
+ backoff_mult *= 2
55
+ retries_remaining -= 1
56
+ retry
57
+ else
58
+ if @log != nil then
59
+ @log.info "Exception #{e} while calling #{method} on #{@client.class}, failing"
60
+ end
61
+ raise e
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,23 @@
1
+ #
2
+ # Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+
4
+ module Amazon
5
+ class StdErrLogger
6
+ INFO = { :level => 4, :string => "INFO" }
7
+
8
+ def initialize(level=nil)
9
+ @level = level || INFO[:level]
10
+ @file = STDERR
11
+ end
12
+
13
+ def message(level, msg)
14
+ if level[:level] <= @level then
15
+ @file.puts(level[:string] + " " + msg)
16
+ end
17
+ end
18
+
19
+ def info(*args)
20
+ message(INFO, *args)
21
+ end
22
+ end
23
+ end
data/lib/client.rb ADDED
@@ -0,0 +1,117 @@
1
+ #
2
+ # Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+
4
+ require 'credentials'
5
+ require 'amazon/retry_delegator'
6
+ require 'amazon/coral/elasticmapreduceclient'
7
+
8
+ class EmrClient
9
+ attr_accessor :commands, :logger, :options
10
+
11
+ def initialize(commands, logger, client_class)
12
+ @commands = commands
13
+ @logger = logger
14
+ @options = commands.global_options
15
+
16
+ @config = {
17
+ :endpoint => @options[:endpoint] || "https://elasticmapreduce.amazonaws.com",
18
+ :ca_file => File.join(File.dirname(__FILE__), "cacert.pem"),
19
+ :aws_access_key => @options[:aws_access_id],
20
+ :aws_secret_key => @options[:aws_secret_key],
21
+ :signature_algorithm => :V2,
22
+ :content_type => 'JSON',
23
+ :verbose => (@options[:verbose] != nil)
24
+ }
25
+
26
+ @client = Amazon::RetryDelegator.new(
27
+ client_class.new_aws_query(@config),
28
+ :retry_if => Proc.new { |*opts| self.is_retryable_error_response(*opts) }
29
+ )
30
+ end
31
+
32
+ def is_retryable_error_response(response)
33
+ if response == nil then
34
+ false
35
+ else
36
+ ret = false
37
+ if response['Error'] then
38
+ # note: 'Timeout' is not retryable because the operation might have completed just the connection timed out
39
+ ret ||= ['InternalFailure', 'Throttling', 'ServiceUnavailable'].include?(response['Error']['Code'])
40
+ end
41
+ ret
42
+ end
43
+ end
44
+
45
+ def is_error_response(response)
46
+ response != nil && response.key?('Error')
47
+ end
48
+
49
+ def raise_on_error(response)
50
+ if is_error_response(response) then
51
+ raise RuntimeError, response["Error"]["Message"]
52
+ end
53
+ return response
54
+ end
55
+
56
+ def describe_jobflow_with_id(jobflow_id)
57
+ logger.trace "DescribeJobFlows('JobFlowIds' => [ #{jobflow_id} ])"
58
+ result = @client.DescribeJobFlows('JobFlowIds' => [ jobflow_id ], 'DescriptionType' => 'EXTENDED')
59
+ logger.trace result.inspect
60
+ raise_on_error(result)
61
+ if result == nil || result['JobFlows'].size() == 0 then
62
+ raise RuntimeError, "Jobflow with id #{jobflow_id} not found"
63
+ end
64
+ return result['JobFlows'].first
65
+ end
66
+
67
+ def add_steps(jobflow_id, steps)
68
+ logger.trace "AddJobFlowSteps('JobFlowId' => #{jobflow_id.inspect}, 'Steps' => #{steps.inspect})"
69
+ result = @client.AddJobFlowSteps('JobFlowId' => jobflow_id, 'Steps' => steps)
70
+ logger.trace result.inspect
71
+ return raise_on_error(result)
72
+ end
73
+
74
+ def run_jobflow(jobflow)
75
+ logger.trace "RunJobFlow(#{jobflow.inspect})"
76
+ result = @client.RunJobFlow(jobflow)
77
+ logger.trace result.inspect
78
+ return raise_on_error(result)
79
+ end
80
+
81
+ def describe_jobflow(options)
82
+ logger.trace "DescribeJobFlows(#{options.inspect})"
83
+ result = @client.DescribeJobFlows(options.merge('DescriptionType' => 'EXTENDED'))
84
+ logger.trace result.inspect
85
+ return raise_on_error(result)
86
+ end
87
+
88
+ def set_termination_protection(jobflow_ids, protected)
89
+ logger.trace "SetTerminationProtection('JobFlowIds' => #{jobflow_ids.inspect}, 'TerminationProtected' => #{protected})"
90
+ result = @client.SetTerminationProtection('JobFlowIds' => jobflow_ids, 'TerminationProtected' => protected)
91
+ logger.trace result.inspect
92
+ return raise_on_error(result)
93
+ end
94
+
95
+ def terminate_jobflows(jobflow_ids)
96
+ logger.trace "TerminateJobFlows('JobFlowIds' => #{jobflow_ids.inspect})"
97
+ result = @client.TerminateJobFlows('JobFlowIds' => jobflow_ids)
98
+ logger.trace result.inspect
99
+ return raise_on_error(result)
100
+ end
101
+
102
+ def modify_instance_groups(options)
103
+ logger.trace "ModifyInstanceGroups(#{options.inspect})"
104
+ result = @client.ModifyInstanceGroups(options)
105
+ logger.trace result.inspect
106
+ return raise_on_error(result)
107
+ end
108
+
109
+ def add_instance_groups(options)
110
+ logger.trace "AddInstanceGroups(#{options.inspect})"
111
+ result = @client.AddInstanceGroups(options)
112
+ logger.trace result.inspect
113
+ return raise_on_error(result)
114
+ end
115
+
116
+ end
117
+