ey_stonith 0.3.6 → 0.4.1.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +10 -0
  2. data/Gemfile +3 -0
  3. data/Rakefile +26 -0
  4. data/ey_stonith.gemspec +36 -0
  5. data/features/check.feature +57 -0
  6. data/features/cron.feature +67 -0
  7. data/features/fixtures/dead.ru +6 -0
  8. data/features/fixtures/healthy.ru +1 -0
  9. data/features/help.feature +7 -0
  10. data/features/not_found.feature +24 -0
  11. data/features/notify.feature +35 -0
  12. data/features/resume.feature +49 -0
  13. data/features/steps/stonith_steps.rb +40 -0
  14. data/features/stop.feature +48 -0
  15. data/features/support/env.rb +77 -0
  16. data/lib/ey_stonith/awsm_notifier.rb +13 -8
  17. data/lib/ey_stonith/commands/abstract.rb +0 -4
  18. data/lib/ey_stonith/commands/check.rb +13 -36
  19. data/lib/ey_stonith/commands/claim.rb +5 -96
  20. data/lib/ey_stonith/commands/cron.rb +3 -5
  21. data/lib/ey_stonith/commands/info.rb +1 -4
  22. data/lib/ey_stonith/commands/not_found.rb +1 -0
  23. data/lib/ey_stonith/commands/notify.rb +16 -55
  24. data/lib/ey_stonith/commands/reset.rb +0 -1
  25. data/lib/ey_stonith/commands/stop.rb +0 -1
  26. data/lib/ey_stonith/commands/takeover.rb +5 -90
  27. data/lib/ey_stonith/commands.rb +1 -1
  28. data/lib/ey_stonith/config.rb +17 -66
  29. data/lib/ey_stonith/rackapp.rb +26 -2
  30. data/lib/ey_stonith.rb +8 -15
  31. data/spec/config_spec.rb +53 -0
  32. data/spec/fixtures/config.yml +11 -0
  33. data/spec/fixtures/empty.yml +1 -0
  34. data/spec/helpers.rb +15 -0
  35. data/spec/history_spec.rb +58 -0
  36. data/spec/rackapp_spec.rb +100 -0
  37. data/spec/spec_helper.rb +24 -0
  38. metadata +240 -60
  39. data/lib/ey_stonith/address_stealer.rb +0 -40
  40. data/lib/ey_stonith/check_recorder.rb +0 -55
  41. data/lib/ey_stonith/data.rb +0 -11
  42. data/lib/ey_stonith/database.rb +0 -78
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ .DS_Store
2
+ .bundle
3
+ *.swp
4
+ *.swo
5
+ pkg
6
+ coverage
7
+ spec/config.yml
8
+ tmp
9
+ vendor
10
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'spec/rake/spectask'
5
+ desc "Run unit specifications"
6
+ Spec::Rake::SpecTask.new do |t|
7
+ t.spec_opts << %w(-fs --color)
8
+ t.spec_opts << '--loadby' << 'random'
9
+ t.spec_files = Dir["spec/**/*_spec.rb"]
10
+ t.rcov = ENV["RCOV"] == "true"
11
+ t.rcov_opts << '--exclude' << 'spec,gems'
12
+ t.rcov_opts << '--text-summary'
13
+ t.rcov_opts << '--sort' << 'coverage' << '--sort-reverse'
14
+ end
15
+
16
+ require 'cucumber/rake/task'
17
+ Cucumber::Rake::Task.new do |t|
18
+ t.cucumber_opts = %w{--format pretty}
19
+ t.rcov = ENV["RCOV"] == "true"
20
+ t.rcov_opts << '--exclude' << 'features,gems'
21
+ t.rcov_opts << '--text-summary'
22
+ t.rcov_opts << '--sort' << 'coverage' << '--sort-reverse'
23
+ end
24
+
25
+ desc "Run specs and cukes"
26
+ task :default => [:spec, :cucumber]
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "ey_stonith"
6
+ s.version = "0.4.1.pre"
7
+ s.summary = "Shoot The Other Node In The Head (with a nerf gun)"
8
+ s.description = s.summary
9
+ s.has_rdoc = true
10
+ s.extra_rdoc_files = ["README.rdoc", "LICENSE"]
11
+ s.authors = ["Ezra Zygmuntowicz", "Larry Diehl", "Martin Emde", "Tim Carey-Smith", "Jason Hansen"]
12
+ s.email = "awsmdev@engineyard.com"
13
+ s.homepage = "http://engineyard.com/cloud"
14
+
15
+ s.add_dependency 'json'
16
+ s.add_dependency 'sinatra', '~>1.0'
17
+ s.add_dependency 'SystemTimer', '~>1.2'
18
+
19
+ s.add_development_dependency 'rspec'
20
+ s.add_development_dependency 'cucumber'
21
+ s.add_development_dependency 'open4'
22
+ s.add_development_dependency 'aruba', '0.3.2'
23
+ s.add_development_dependency 'ruby-debug'
24
+ s.add_development_dependency 'rake'
25
+ s.add_development_dependency 'randexp'
26
+ s.add_development_dependency 'cucumber'
27
+ s.add_development_dependency 'rcov'
28
+ s.add_development_dependency 'realweb', '~>0.1.4'
29
+ s.add_development_dependency 'rack-client'
30
+
31
+ s.files = `git ls-files`.split("\n")
32
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
33
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
34
+ s.require_paths = ["lib"]
35
+ end
36
+
@@ -0,0 +1,57 @@
1
+ Feature: check
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Unhealthy master without a successful check
19
+ Given a file named "var/run/stonith/success" should not exist
20
+ And a "dead" master in "stonith.yml"
21
+ And a "healthy" endpoint in "stonith.yml"
22
+ When I successfully run "stonith check -c stonith.yml"
23
+ Then the output should contain exactly ""
24
+ And a file named "var/run/stonith/success" should not exist
25
+ When I successfully run "stonith status -c stonith.yml"
26
+ Then the output should contain exactly:
27
+ """
28
+ check
29
+
30
+ """
31
+
32
+ Scenario: Unhealthy master with previous successful check
33
+ Given a "dead" master in "stonith.yml"
34
+ And a "healthy" endpoint in "stonith.yml"
35
+ And an empty file named "var/run/stonith/success"
36
+ When I successfully run "stonith check -c stonith.yml"
37
+ Then the output should contain exactly ""
38
+ Then the file "server.log" should contain "notify i-12345678 __master_hostname__"
39
+ When I successfully run "stonith status -c stonith.yml"
40
+ Then the output should contain exactly:
41
+ """
42
+ notify -> notified
43
+
44
+ """
45
+
46
+ Scenario: Healthy master
47
+ Given a file named "var/run/stonith/success" should not exist
48
+ Given a "healthy" master in "stonith.yml"
49
+ When I successfully run "stonith check -c stonith.yml"
50
+ Then the output should contain exactly ""
51
+ And a file named "var/run/stonith/success" should exist
52
+ When I successfully run "stonith status -c stonith.yml"
53
+ Then the output should contain exactly:
54
+ """
55
+ check
56
+
57
+ """
@@ -0,0 +1,67 @@
1
+ Feature: cron startup & stop
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ heartbeat: 1
10
+ endpoint_uri: http://example.com/stonith
11
+ endpoint_token: token-for-i-12345678
12
+ endpoint_id: i-12345678
13
+ monitor_host: localhost
14
+ monitor_path: /health
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+ Given a "healthy" endpoint in "stonith.yml"
18
+
19
+ Scenario: Monitor - Running normally
20
+ Given a "healthy" master in "stonith.yml"
21
+ When I run "stonith cron -c stonith.yml" interactively
22
+ When I wait 5 seconds
23
+ And I successfully run "stonith status -c stonith.yml"
24
+ Then the output should contain exactly:
25
+ """
26
+ check
27
+
28
+ """
29
+ When I successfully run "stonith stop -c stonith.yml"
30
+ When I run "stonith status -c stonith.yml"
31
+ Then it should pass with exactly:
32
+ """
33
+ check -> stop
34
+
35
+ """
36
+ When I successfully run "stonith resume -c stonith.yml"
37
+ When I successfully run "stonith status -c stonith.yml"
38
+ Then the output should contain exactly:
39
+ """
40
+ stop -> resume
41
+
42
+ """
43
+ And I wait 5 seconds
44
+ When I successfully run "stonith status -c stonith.yml"
45
+ Then the output should contain exactly:
46
+ """
47
+ resume -> check
48
+
49
+ """
50
+
51
+ Scenario: Monitor - Resuming without stopping
52
+ Given a "healthy" master in "stonith.yml"
53
+ When I run "stonith cron -c stonith.yml" interactively
54
+ And I wait 5 seconds
55
+ And I successfully run "stonith status -c stonith.yml"
56
+ Then the output should contain exactly:
57
+ """
58
+ check
59
+
60
+ """
61
+ When I successfully run "stonith resume -c stonith.yml"
62
+ When I successfully run "stonith status -c stonith.yml"
63
+ Then the output should contain exactly:
64
+ """
65
+ check
66
+
67
+ """
@@ -0,0 +1,6 @@
1
+ EY::Stonith.callback_module = Class.new MockMapper do
2
+ def self.healthy?() false end
3
+ def self.notify(_) raise end
4
+ end
5
+
6
+ run EY::Stonith::Rackapp
@@ -0,0 +1 @@
1
+ run EY::Stonith::Rackapp
@@ -0,0 +1,7 @@
1
+ Feature: help
2
+
3
+ Scenario: Printing the help file
4
+ When I successfully run "stonith help"
5
+ Then the output should contain "Usage: stonith [COMMAND]"
6
+ And the output should not contain "Command not found"
7
+ And the output should not contain "Config file not found"
@@ -0,0 +1,24 @@
1
+ Feature: not found
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Running the stonith twitter notifier (command not found)
19
+ When I failingly run "stonith twitter -c stonith.yml"
20
+ Then the stderr should contain "Command not found"
21
+
22
+ Scenario: Config file not found
23
+ When I failingly run "stonith status"
24
+ Then the stderr should contain "Config file /etc/stonith.yml: File not found."
@@ -0,0 +1,35 @@
1
+ Feature: notify
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Endpoint is dead
19
+ Given a "dead" endpoint in "stonith.yml"
20
+ When I failingly run "stonith notify -c stonith.yml"
21
+ Then the stderr should contain "Unable to notify provisioning server that master is down."
22
+ When I successfully run "stonith status -c stonith.yml"
23
+ Then the stdout from "stonith status -c stonith.yml" should contain "notify"
24
+
25
+ Scenario: Endpoint is happy
26
+ Given a "healthy" endpoint in "stonith.yml"
27
+ When I successfully run "stonith notify -c stonith.yml"
28
+ Then the output should contain exactly ""
29
+ Then the file "stonith.log" should contain "Provisioning server notified: (notification_count: 1, takeover: false)"
30
+ When I successfully run "stonith status -c stonith.yml"
31
+ Then the output should contain exactly:
32
+ """
33
+ notify -> notified
34
+
35
+ """
@@ -0,0 +1,49 @@
1
+ Feature: not found
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Not stopped
19
+ Given a file named "var/run/stonith/history" with:
20
+ """
21
+ check
22
+ """
23
+ When I successfully run "stonith resume -c stonith.yml"
24
+ Then the output should contain exactly ""
25
+ And the following files should not exist:
26
+ | var/run/stonith/stop |
27
+ When I successfully run "stonith status -c stonith.yml"
28
+ Then the output should contain exactly:
29
+ """
30
+ check
31
+
32
+ """
33
+
34
+ Scenario: Already stopped
35
+ Given a file named "var/run/stonith/history" with:
36
+ """
37
+ check -> stop
38
+ """
39
+ And an empty file named "var/run/stonith/stop"
40
+ When I successfully run "stonith resume -c stonith.yml"
41
+ Then the output should contain exactly ""
42
+ And the following files should not exist:
43
+ | var/run/stonith/stop |
44
+ When I successfully run "stonith status -c stonith.yml"
45
+ Then the output should contain exactly:
46
+ """
47
+ stop -> resume
48
+
49
+ """
@@ -0,0 +1,40 @@
1
+ When /^I wait (\d+) seconds$/ do |time|
2
+ sleep time.to_i
3
+ end
4
+
5
+ When /^I failingly run "(.*)"$/ do |cmd|
6
+ And %|I run "#{cmd}"|
7
+ Then %|the exit status should not be 0|
8
+ end
9
+
10
+ When /^I run "(.*)" (\d) seconds? later$/ do |cmd, wait|
11
+ Given %|an executable named "bin/wait_then_run" with:|, <<-SCRIPT
12
+ #!/bin/bash
13
+ sleep #{wait}
14
+ #{cmd}
15
+ SCRIPT
16
+ And %|I run "bin/wait_then_run" interactively|
17
+ end
18
+
19
+ Given /^an executable named "([^\"]*)" with:$/ do |file_name, file_content|
20
+ Given %|a file named "#{file_name}" with:|, file_content
21
+ Given %|I successfully run "chmod a+x #{file_name}"|
22
+ end
23
+
24
+ Transform %r|(.*)__master_hostname__(.*)| do |before, after|
25
+ "#{before}#{master_hostname}#{after}"
26
+ end
27
+
28
+ def master_hostname
29
+ @master_hostname
30
+ end
31
+
32
+ Given /^a "([^\"]+)" master in "(.*)"$/ do |type, path|
33
+ @master_hostname = start_server type
34
+ update_config({"monitor_host" => @master_hostname}, path)
35
+ end
36
+
37
+ Given /^an? "(.*)" endpoint in "(.*)"$/ do |type, path|
38
+ @endpoint_hostname = start_server type
39
+ update_config({'endpoint_uri' => "http://#{@endpoint_hostname}"}, path)
40
+ end
@@ -0,0 +1,48 @@
1
+ Feature: stop
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ """
15
+ Given I successfully run "stonith reset -c stonith.yml"
16
+
17
+ Scenario: Already stopped
18
+ Given a file named "var/run/stonith/history" with:
19
+ """
20
+ check -> stop
21
+ """
22
+ And an empty file named "var/run/stonith/stop"
23
+ When I successfully run "stonith stop -c stonith.yml"
24
+ Then the output should contain exactly ""
25
+ And the following files should exist:
26
+ | var/run/stonith/stop |
27
+ When I successfully run "stonith status -c stonith.yml"
28
+ Then the output should contain exactly:
29
+ """
30
+ check -> stop
31
+
32
+ """
33
+
34
+ Scenario: Stopping when not stopped
35
+ Given a file named "var/run/stonith/history" with:
36
+ """
37
+ check
38
+ """
39
+ When I run "echo 'check -> stop' > var/run/stonith/history" 3 seconds later
40
+ When I successfully run "stonith stop -c stonith.yml"
41
+ And the following files should exist:
42
+ | var/run/stonith/stop |
43
+ And I successfully run "stonith status -c stonith.yml"
44
+ Then the output should contain exactly:
45
+ """
46
+ check -> stop
47
+
48
+ """
@@ -0,0 +1,77 @@
1
+ require 'aruba'
2
+ require 'open4'
3
+ require 'realweb'
4
+ require 'aruba/cucumber'
5
+ require File.expand_path('../../../lib/ey_stonith', __FILE__)
6
+
7
+
8
+ ENV['PATH'] = [File.expand_path('../../../bin', __FILE__), ENV['PATH']].join(":")
9
+ ENV['RACK_ENV'] = 'test'
10
+
11
+ class MockMapper
12
+ @@notifications = []
13
+
14
+ def self.log(msg)
15
+ File.open('tmp/aruba/server.log', 'w') { |f| f.puts msg }
16
+ end
17
+ def self.healthy?() true end
18
+ def self.takeover(_) true end
19
+ def self.notify(label, monitor_host)
20
+ log "notify #{label} #{monitor_host}"
21
+ true
22
+ end
23
+ def self.token_for(label) "token-for-#{label}" end
24
+ end
25
+
26
+ module Helpers
27
+ def safe_read(io)
28
+ io.read_nonblock(1000)
29
+ rescue EOFError
30
+ io.read
31
+ rescue Errno::EAGAIN
32
+ ""
33
+ end
34
+
35
+ def start_server(config)
36
+ config_ru = File.expand_path("../../fixtures/#{config}.ru", __FILE__)
37
+ @server = RealWeb.start_server_in_fork(config_ru)
38
+ at_exit { stop_server }
39
+ "127.0.0.1:#{@server.port}"
40
+ end
41
+
42
+ def stop_server
43
+ if @server
44
+ @server.stop
45
+ @server = nil
46
+ end
47
+ end
48
+
49
+ def update_config(options, path = "stonith.yml")
50
+ path = File.expand_path(path, 'tmp/aruba')
51
+ config = YAML.load_file(path).merge(options)
52
+ File.open(path,'w') { |file| YAML.dump(config, file) }
53
+ end
54
+
55
+ def kill_run_in_background
56
+ if @pid
57
+ Process.kill('TERM', @pid)
58
+ Process.wait @pid
59
+ end
60
+ rescue
61
+ # erm... noop
62
+ ensure
63
+ @pid = nil
64
+ end
65
+ end
66
+
67
+ World(Helpers)
68
+
69
+ After do
70
+ kill_run_in_background
71
+ stop_server
72
+ end
73
+
74
+ Before do
75
+ @aruba_timeout_seconds = 8
76
+ EY::Stonith.callback_module = MockMapper
77
+ end
@@ -6,20 +6,22 @@ require 'timeout'
6
6
  module EY
7
7
  module Stonith
8
8
  class AwsmNotifier
9
- def initialize(instance_id, uri, token)
10
- @instance_id, @uri, @token = instance_id, uri, token
9
+ def initialize(uri, instance_id, token, params)
10
+ @uri, @instance_id, @token, @params = uri, instance_id, token, params
11
11
  end
12
12
 
13
- def notify(success, unreachable, refused)
14
- Stonith.logger.info "Attempting to notify #@uri that I did a takeover."
13
+ def notify(success, unreachable)
14
+ Stonith.logger.info "Attempting to notify #@uri that master is down."
15
15
 
16
16
  code, body = do_request
17
17
 
18
18
  case code
19
19
  when 200...300
20
- success.call
21
- when 410
22
- refused.call body
20
+ if body.empty?
21
+ success.call {}
22
+ else
23
+ success.call JSON.parse(body)
24
+ end
23
25
  else
24
26
  unreachable.call
25
27
  end
@@ -43,7 +45,10 @@ module EY
43
45
  end
44
46
 
45
47
  def payload
46
- {'label' => @instance_id, 'token' => @token}
48
+ @params.merge({
49
+ 'label' => @instance_id,
50
+ 'token' => @token,
51
+ })
47
52
  end
48
53
 
49
54
  # ripped from restclient
@@ -36,10 +36,6 @@ module EY
36
36
  @history ||= History.new(config.history_path)
37
37
  end
38
38
 
39
- def database
40
- @database ||= Database.new(config)
41
- end
42
-
43
39
  def call
44
40
  invoke
45
41
  rescue Config::Error => e
@@ -14,47 +14,24 @@ module EY
14
14
  end
15
15
 
16
16
  def invoke
17
- database.with_data do |data|
18
- abort_if_master(data.hostname)
17
+ history << :check
19
18
 
20
- history << :check
21
-
22
- begin
23
- timeout(config.monitor_timeout) do
24
- open("http://#{data.hostname}#{config.monitor_path}").read
25
- end
26
- rescue StandardError, Timeout::Error
27
- bad_check(data.key, data.instance_id)
19
+ begin
20
+ timeout(config.monitor_timeout) do
21
+ open("http://#{config.monitor_host}#{config.monitor_path}").read
22
+ end
23
+ rescue StandardError, Timeout::Error => e
24
+ Stonith.logger.error "Got error when checking master: #{e.class}: #{e.message}"
25
+ if config.success_path.exist?
26
+ execute :notify
28
27
  else
29
- good_check(data.key)
28
+ Stonith.logger.error "No successful check against master yet. Not notifying."
30
29
  end
31
-
32
- exit
30
+ else
31
+ config.success_path.open('w') {}
33
32
  end
34
33
 
35
- abort "No master to check!"
36
- end
37
-
38
- def abort_if_master(hostname)
39
- return if config.meta_data_hostname != hostname
40
- abort "This instance is master! Can't check self."
41
- end
42
-
43
- def good_check(key)
44
- check_recorder.good_check!(key)
45
- end
46
-
47
- def bad_check(key, instance_id)
48
- check_recorder.bad_check!(key)
49
-
50
- if check_recorder.limit_exceeded?
51
- Stonith.logger.info "Invoking takeover of instance #{instance_id}"
52
- execute :takeover, "--instance #{instance_id}"
53
- end
54
- end
55
-
56
- def check_recorder
57
- @check_recorder ||= CheckRecorder.new(config.checks_path)
34
+ exit
58
35
  end
59
36
  end
60
37
  end