spot_build 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c740d2bdc6f1ab910728822d03e4a857e4ffc144f50c5bcb4d15dd402c730d76
4
- data.tar.gz: 167ece42931edd0c434d00eb9409714fe6a0458160da2397f68f89719d2d2bb0
3
+ metadata.gz: b908bf73e6792eb7a8eec7330ff361f0b169a2f41f73b165e2c6aa7974abc579
4
+ data.tar.gz: b022e0447aa52a5653b857d18e6b43faa7478d7d862149af1f3a149484e8852a
5
5
  SHA512:
6
- metadata.gz: d62f1d929dfeef74c78e176d405cae7ba6acb536042b273100edc2a19f79c451ef2fd260f3fb1d8a8ed1797254e87a7e5b57fdccbc12ac3376a964e828f00239
7
- data.tar.gz: 7a037186b55880955ff82fa4101e126137b2e8789d19a6d8421d3ce151ea430dc586ec1a3896026f4510339f1dbfee7734fa21e8f360b69260d608c5dc298d70
6
+ metadata.gz: 7a5c89ebd53903534085a0fb4b0d8530b31460bab4c53aa09ffbae8036ab1eb47e9fa36818747b02974396a776be00dba2db9389b2be8aad919a53d60dbf7ff9
7
+ data.tar.gz: e4c5fd24ad5d37ec89b5ed4f658f1062e81f02f7e8c5e6ac8f438029596141001eadf1c8e633b6cd43ae61f757489078397d601e35c095f5c4aad363faf2c772
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/lib/spot_build.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'spot_build/buildkite_agent'
1
+ require 'spot_build/buildkite_agents'
2
2
  require 'spot_build/spot_instance'
3
3
  require 'spot_build/sqs_event'
4
4
  require 'optparse'
@@ -15,19 +15,25 @@ module SpotBuild
15
15
  checks.push(SqsEvent.new(url: options[:queue_url], timeout: options[:timeout], region: options[:aws_region]))
16
16
  end
17
17
 
18
- agent = BuildkiteAgent.new(options[:token], options[:org_slug])
18
+ agents = BuildkiteAgents.new(options[:token], options[:org_slug])
19
19
  loop do
20
20
  checks.each do |check|
21
21
  terminating = check.shutdown_if_required do
22
- timeout = SpotInstance.scheduled_for_termination? ? (SpotInstance.time_until_termination - 30) : options[:timeout]
23
-
24
- agent.stop
25
- Timeout::timeout(timeout) do
26
- while agent.agent_running?
22
+ agents.stop
23
+ if options[:auto_retries]
24
+ timeout = SpotInstance.scheduled_for_termination? ? (SpotInstance.time_until_termination - 30) : options[:timeout]
25
+
26
+ Timeout::timeout(timeout) do
27
+ while agents.agents_running?
28
+ sleep 5
29
+ end
30
+ end rescue Timeout::Error
31
+ agents.the_end_is_nigh
32
+ else
33
+ while agents.agents_running?
27
34
  sleep 5
28
35
  end
29
- end rescue Timeout::Error
30
- agent.the_end_is_nigh
36
+ end
31
37
  end
32
38
  %x(shutdown -h now) if terminating
33
39
  end
@@ -36,7 +42,7 @@ module SpotBuild
36
42
  end
37
43
 
38
44
  def self.parse_options
39
- options = {}
45
+ options = {auto_retries: true}
40
46
  parser = OptionParser.new do |opts|
41
47
  opts.banner = "Usage: #{__FILE__} [options]"
42
48
  opts.on("-t", "--token TOKEN", "Buildkite API token") { |v| options[:token] = v }
@@ -44,6 +50,7 @@ module SpotBuild
44
50
  opts.on("-s", "--sqs-queue SQS-QUEUE-URL", "The SQS Queue URL we should monitor for events that tell us to shutdown") { |v| options[:queue_url] = v }
45
51
  opts.on("--timeout TIMEOUT", "The amount of time to wait for the buildkite agent to stop before shutting down. Only used if --sqs-queue is specified") { |v| options[:timeout] = v.to_i }
46
52
  opts.on("-r", "--aws-region REGION", "The AWS Region the SQS queue resides in") { |v| options[:aws_region] = v }
53
+ opts.on("-n", "--[no-]auto-retry", "Disable automatic retries") { |v| options[:auto_retries] = v }
47
54
  end
48
55
  parser.parse!
49
56
 
@@ -3,35 +3,62 @@ require 'socket'
3
3
  require 'link_header'
4
4
 
5
5
  module SpotBuild
6
- class BuildkiteAgent
6
+ class BuildkiteAgents
7
7
  def initialize(token, org_slug)
8
8
  @client = Buildkit.new(token: token)
9
9
  @org_slug = org_slug
10
10
  end
11
11
 
12
- def the_end_is_nigh
13
- return unless agent_running?
14
- job = current_job
15
- stop(true)
16
- reschedule_job(job)
12
+ def the_end_is_nigh(host = Socket.gethostname)
13
+ agents = agents_on_this_host(host)
14
+ agents.each do |agent|
15
+ stop_agent(agent, force: true)
16
+ end
17
+ agents.each do |agent|
18
+ reschedule_job(agent.job)
19
+ end
20
+ agents.count
17
21
  end
18
22
 
19
- def stop(force="false")
20
- return unless agent_running?
21
- @client.stop_agent(@org_slug, agent_id, "{\"force\": #{force}}")
23
+ def stop_agent(agent, force: false)
24
+ @client.stop_agent(@org_slug, agent.id, "{\"force\": #{force}}")
22
25
  rescue Buildkit::UnprocessableEntity
23
26
  # Swallow the error, this is generally thrown when the agent has already stopped
24
27
  end
25
28
 
26
- def agent_running?
27
- !agent.nil?
29
+ def agents_running?(host = Socket.gethostname)
30
+ !agents_on_this_host(host).empty?
31
+ end
32
+
33
+ def stop(host = Socket.gethostname)
34
+ agents_on_this_host(host).each do |agent|
35
+ stop_agent(agent, force: false)
36
+ end
28
37
  end
29
38
 
30
39
  private
31
40
 
41
+ RETRY_MESSAGE = /Only failed or timed out jobs can be retried/.freeze
42
+
32
43
  def reschedule_job(job)
33
44
  return if job.nil?
34
- @client.retry_job(@org_slug, job_pipeline(job[:build_url]), job_build(job[:build_url]), job[:id])
45
+ retry_error(Buildkit::BadRequest, RETRY_MESSAGE) do
46
+ @client.retry_job(@org_slug, job_pipeline(job[:build_url]), job_build(job[:build_url]), job[:id])
47
+ end
48
+ end
49
+
50
+ def retry_error(error_class, message_regex, sleep_seconds: 1, retries: 20)
51
+ begin
52
+ yield
53
+ rescue error_class => e
54
+ if retries > 0 && e.message =~ message_regex
55
+ sleep sleep_seconds
56
+ retries -= 1
57
+ retry
58
+ else
59
+ raise
60
+ end
61
+ end
35
62
  end
36
63
 
37
64
  # build_url: https://api.buildkite.com/v2/organizations/my-great-org/pipelines/sleeper/builds/50
@@ -43,19 +70,11 @@ module SpotBuild
43
70
  build_url[%r{organizations/#{@org_slug}/pipelines/[^/]*/builds/([0-9]*)}, 1]
44
71
  end
45
72
 
46
- def current_job
47
- agent.job
48
- end
49
-
50
- def agent_id
51
- @agent_id ||= agent.id
52
- end
53
-
54
- def agent
55
- agents.select { |agent| agent.hostname == Socket.gethostname }.first
73
+ def agents_on_this_host(host)
74
+ all_agents.select { |agent| agent.hostname == host }
56
75
  end
57
76
 
58
- def agents
77
+ def all_agents
59
78
  with_pagination do |options = {}|
60
79
  @client.agents(@org_slug, options)
61
80
  end
@@ -1,12 +1,21 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe SpotBuild::BuildkiteAgent do
3
+ describe SpotBuild::BuildkiteAgents do
4
4
  let(:org_slug) { "envato" }
5
- subject { described_class.new('deadbeef', org_slug) }
6
-
5
+ let(:pipeline) { "my-app" }
6
+ subject(:buildkite_agent) { described_class.new('deadbeef', org_slug) }
7
7
  let(:last_response_stub) { instance_double(Sawyer::Response) }
8
- let(:buildkit_stub) { double("Buildkit", :agents => agent_stubs) }
8
+ let(:buildkit_stub) { instance_double("Buildkit::Client", :agents => agent_stubs) }
9
9
  let(:hostname) { "i-1234567890" }
10
+ let(:build_id) { "12345678" }
11
+
12
+ def agent(id:, build_id: "12345678", job_id: "1")
13
+ double("BuildkiteAgent#{id}",
14
+ hostname: hostname,
15
+ id: id,
16
+ job: {build_url: "organizations/#{org_slug}/pipelines/#{pipeline}/builds/#{build_id}", id: job_id}
17
+ )
18
+ end
10
19
 
11
20
  before do
12
21
  allow(Buildkit).to receive(:new).and_return(buildkit_stub)
@@ -15,50 +24,80 @@ describe SpotBuild::BuildkiteAgent do
15
24
  allow(last_response_stub).to receive(:headers).and_return({"link" => nil})
16
25
  end
17
26
 
27
+ describe '#agents_running?' do
28
+ context 'when agents are running' do
29
+ let(:agent_stubs) { [agent(id: '123', build_id: build_id, job_id: '1')] }
30
+
31
+ it 'returns true' do
32
+ expect(buildkite_agent.agents_running?).to eq true
33
+ end
34
+ end
35
+
36
+ context "when agents aren't running" do
37
+ let(:agent_stubs) { [] }
38
+
39
+ it 'returns false' do
40
+ expect(buildkite_agent.agents_running?).to eq false
41
+ end
42
+ end
43
+ end
44
+
18
45
  describe '#the_end_is_nigh' do
19
46
  context 'the agent is not running' do
20
47
  let(:agent_stubs) { [] }
21
48
 
22
- it 'returns nil' do
23
- expect(subject.the_end_is_nigh).to equal(nil)
49
+ it 'does nothing' do
50
+ expect(buildkit_stub).to_not receive(:stop_agent)
51
+ expect(buildkit_stub).to_not receive(:retry_job)
52
+ buildkite_agent.the_end_is_nigh
24
53
  end
25
54
  end
26
55
 
27
- context 'the agent is running' do
28
- let(:agent_id) { 9876 }
29
- let(:agent_stubs) {
30
- [double("BuildkiteAgent",
31
- hostname: hostname,
32
- id: agent_id,
33
- job: {build_url: "organizations/#{org_slug}/pipelines/my-app/builds/12345678", id: "12345678"}
34
- )]
35
- }
56
+ context 'agents are running' do
57
+ let(:agent_1_id) { '9876' }
58
+ let(:agent_2_id) { '9877' }
59
+ let(:agent_stubs) { [agent(id: agent_1_id, build_id: build_id, job_id: '1'),
60
+ agent(id: agent_2_id, build_id: build_id, job_id: '2')] }
36
61
 
37
62
  before do
38
63
  allow(buildkit_stub).to receive(:stop_agent)
39
64
  allow(buildkit_stub).to receive(:retry_job)
40
65
  end
41
66
 
42
- it 'stops the agent forcefully' do
43
- expect(buildkit_stub).to receive(:stop_agent).with(org_slug, agent_id, '{"force": true}')
44
- subject.the_end_is_nigh
67
+ it 'stops each agent forcefully' do
68
+ expect(buildkit_stub).to receive(:stop_agent).with(org_slug, agent_1_id, '{"force": true}')
69
+ expect(buildkit_stub).to receive(:stop_agent).with(org_slug, agent_2_id, '{"force": true}')
70
+ buildkite_agent.the_end_is_nigh
45
71
  end
46
72
 
47
73
  it 'reschedules the job' do
48
- expect(buildkit_stub).to receive(:retry_job)
49
- subject.the_end_is_nigh
74
+ expect(buildkit_stub).to receive(:retry_job).with(org_slug, pipeline, build_id, '1')
75
+ expect(buildkit_stub).to receive(:retry_job).with(org_slug, pipeline, build_id, '2')
76
+ buildkite_agent.the_end_is_nigh
77
+ end
78
+
79
+ context "when the jobs aren't retryable yet" do
80
+ let(:agent_stubs) { [agent(id: agent_1_id, build_id: build_id, job_id: '1')] }
81
+
82
+ it 'retries' do
83
+ responses = [
84
+ -> { raise Buildkit::BadRequest, {method: 'PUT', url: 'https://api.buildkite.com/v2/organizations/#{org_slug}/pipelines/#{pipeline}/builds/18961/jobs/1/retry', body: 'Only failed or timed out jobs can be retried'} },
85
+ -> { nil }
86
+ ]
87
+ allow(buildkit_stub).to receive(:retry_job).with(org_slug, pipeline, build_id, '1') do
88
+ response = responses.shift
89
+ response.call if response
90
+ end
91
+ buildkite_agent.the_end_is_nigh
92
+ expect(buildkit_stub).to have_received(:retry_job)
93
+ .with(org_slug, pipeline, build_id, '1')
94
+ .twice
95
+ end
50
96
  end
51
97
  end
52
98
 
53
99
  context 'the agent stops while we are trying to stop it' do
54
- let(:agent_id) { 9876 }
55
- let(:agent_stubs) {
56
- [double("BuildkiteAgent",
57
- hostname: hostname,
58
- id: agent_id,
59
- job: {build_url: "organizations/#{org_slug}/pipelines/my-app/builds/12345678", id: "12345678"}
60
- )]
61
- }
100
+ let(:agent_stubs) { [agent(id: '9876')] }
62
101
 
63
102
  before do
64
103
  allow(buildkit_stub).to receive(:stop_agent).and_raise(Buildkit::UnprocessableEntity)
@@ -67,7 +106,7 @@ describe SpotBuild::BuildkiteAgent do
67
106
 
68
107
  it 'retries the job' do
69
108
  expect(buildkit_stub).to receive(:retry_job)
70
- subject.the_end_is_nigh
109
+ buildkite_agent.the_end_is_nigh
71
110
  end
72
111
  end
73
112
  end
data/spot_build.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = 'spot_build'
6
- gem.version = '1.0.0'
6
+ gem.version = '1.1.0'
7
7
  gem.authors = ['Patrick Robinson']
8
8
  gem.email = []
9
9
  gem.description = 'Helps manage Buildkite Agents running on EC2 Spot instances'
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
16
  gem.require_paths = ['lib']
17
17
 
18
- gem.add_dependency 'buildkit', '~> 0.4'
18
+ gem.add_dependency 'buildkit', '~> 1.4'
19
19
  gem.add_dependency 'aws-sdk', '~> 2'
20
20
  gem.add_dependency 'link_header', '~> 0.0.2'
21
21
  gem.add_development_dependency 'rspec', '~> 3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spot_build
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patrick Robinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-19 00:00:00.000000000 Z
11
+ date: 2019-03-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: buildkit
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.4'
19
+ version: '1.4'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.4'
26
+ version: '1.4'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: aws-sdk
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -87,16 +87,16 @@ executables:
87
87
  extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
+ - ".gitignore"
90
91
  - ".ruby-version"
91
92
  - ".travis.yml"
92
93
  - Gemfile
93
- - Gemfile.lock
94
94
  - LICENSE
95
95
  - README.md
96
96
  - Rakefile
97
97
  - bin/spot_build
98
98
  - lib/spot_build.rb
99
- - lib/spot_build/buildkite_agent.rb
99
+ - lib/spot_build/buildkite_agents.rb
100
100
  - lib/spot_build/spot_instance.rb
101
101
  - lib/spot_build/sqs_event.rb
102
102
  - spec/buildkite_agent_spec.rb
@@ -120,8 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
120
120
  - !ruby/object:Gem::Version
121
121
  version: '0'
122
122
  requirements: []
123
- rubyforge_project:
124
- rubygems_version: 2.7.6
123
+ rubygems_version: 3.0.3
125
124
  signing_key:
126
125
  specification_version: 4
127
126
  summary: Helps manage Buildkite Agents running on EC2 Spot instances
data/Gemfile.lock DELETED
@@ -1,56 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- spot_build (0.0.4)
5
- aws-sdk (~> 2)
6
- buildkit (~> 0.4)
7
- link_header (~> 0.0.2)
8
-
9
- GEM
10
- remote: https://rubygems.org/
11
- specs:
12
- addressable (2.3.8)
13
- aws-sdk (2.11.33)
14
- aws-sdk-resources (= 2.11.33)
15
- aws-sdk-core (2.11.33)
16
- aws-sigv4 (~> 1.0)
17
- jmespath (~> 1.0)
18
- aws-sdk-resources (2.11.33)
19
- aws-sdk-core (= 2.11.33)
20
- aws-sigv4 (1.0.2)
21
- buildkit (0.4.0)
22
- sawyer (~> 0.6.0)
23
- diff-lcs (1.3)
24
- faraday (0.9.2)
25
- multipart-post (>= 1.2, < 3)
26
- jmespath (1.4.0)
27
- link_header (0.0.8)
28
- multipart-post (2.0.0)
29
- rake (12.3.1)
30
- rspec (3.7.0)
31
- rspec-core (~> 3.7.0)
32
- rspec-expectations (~> 3.7.0)
33
- rspec-mocks (~> 3.7.0)
34
- rspec-core (3.7.1)
35
- rspec-support (~> 3.7.0)
36
- rspec-expectations (3.7.0)
37
- diff-lcs (>= 1.2.0, < 2.0)
38
- rspec-support (~> 3.7.0)
39
- rspec-mocks (3.7.0)
40
- diff-lcs (>= 1.2.0, < 2.0)
41
- rspec-support (~> 3.7.0)
42
- rspec-support (3.7.1)
43
- sawyer (0.6.0)
44
- addressable (~> 2.3.5)
45
- faraday (~> 0.8, < 0.10)
46
-
47
- PLATFORMS
48
- ruby
49
-
50
- DEPENDENCIES
51
- rake
52
- rspec (~> 3)
53
- spot_build!
54
-
55
- BUNDLED WITH
56
- 1.16.1