ey_stonith 0.3.6 → 0.4.1.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/.gitignore +10 -0
  2. data/Gemfile +3 -0
  3. data/Rakefile +26 -0
  4. data/ey_stonith.gemspec +36 -0
  5. data/features/check.feature +57 -0
  6. data/features/cron.feature +67 -0
  7. data/features/fixtures/dead.ru +6 -0
  8. data/features/fixtures/healthy.ru +1 -0
  9. data/features/help.feature +7 -0
  10. data/features/not_found.feature +24 -0
  11. data/features/notify.feature +35 -0
  12. data/features/resume.feature +49 -0
  13. data/features/steps/stonith_steps.rb +40 -0
  14. data/features/stop.feature +48 -0
  15. data/features/support/env.rb +77 -0
  16. data/lib/ey_stonith/awsm_notifier.rb +13 -8
  17. data/lib/ey_stonith/commands/abstract.rb +0 -4
  18. data/lib/ey_stonith/commands/check.rb +13 -36
  19. data/lib/ey_stonith/commands/claim.rb +5 -96
  20. data/lib/ey_stonith/commands/cron.rb +3 -5
  21. data/lib/ey_stonith/commands/info.rb +1 -4
  22. data/lib/ey_stonith/commands/not_found.rb +1 -0
  23. data/lib/ey_stonith/commands/notify.rb +16 -55
  24. data/lib/ey_stonith/commands/reset.rb +0 -1
  25. data/lib/ey_stonith/commands/stop.rb +0 -1
  26. data/lib/ey_stonith/commands/takeover.rb +5 -90
  27. data/lib/ey_stonith/commands.rb +1 -1
  28. data/lib/ey_stonith/config.rb +17 -66
  29. data/lib/ey_stonith/rackapp.rb +26 -2
  30. data/lib/ey_stonith.rb +8 -15
  31. data/spec/config_spec.rb +53 -0
  32. data/spec/fixtures/config.yml +11 -0
  33. data/spec/fixtures/empty.yml +1 -0
  34. data/spec/helpers.rb +15 -0
  35. data/spec/history_spec.rb +58 -0
  36. data/spec/rackapp_spec.rb +100 -0
  37. data/spec/spec_helper.rb +24 -0
  38. metadata +240 -60
  39. data/lib/ey_stonith/address_stealer.rb +0 -40
  40. data/lib/ey_stonith/check_recorder.rb +0 -55
  41. data/lib/ey_stonith/data.rb +0 -11
  42. data/lib/ey_stonith/database.rb +0 -78
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ .DS_Store
2
+ .bundle
3
+ *.swp
4
+ *.swo
5
+ pkg
6
+ coverage
7
+ spec/config.yml
8
+ tmp
9
+ vendor
10
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'spec/rake/spectask'
5
+ desc "Run unit specifications"
6
+ Spec::Rake::SpecTask.new do |t|
7
+ t.spec_opts << %w(-fs --color)
8
+ t.spec_opts << '--loadby' << 'random'
9
+ t.spec_files = Dir["spec/**/*_spec.rb"]
10
+ t.rcov = ENV["RCOV"] == "true"
11
+ t.rcov_opts << '--exclude' << 'spec,gems'
12
+ t.rcov_opts << '--text-summary'
13
+ t.rcov_opts << '--sort' << 'coverage' << '--sort-reverse'
14
+ end
15
+
16
+ require 'cucumber/rake/task'
17
+ Cucumber::Rake::Task.new do |t|
18
+ t.cucumber_opts = %w{--format pretty}
19
+ t.rcov = ENV["RCOV"] == "true"
20
+ t.rcov_opts << '--exclude' << 'features,gems'
21
+ t.rcov_opts << '--text-summary'
22
+ t.rcov_opts << '--sort' << 'coverage' << '--sort-reverse'
23
+ end
24
+
25
+ desc "Run specs and cukes"
26
+ task :default => [:spec, :cucumber]
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "ey_stonith"
6
+ s.version = "0.4.1.pre"
7
+ s.summary = "Shoot The Other Node In The Head (with a nerf gun)"
8
+ s.description = s.summary
9
+ s.has_rdoc = true
10
+ s.extra_rdoc_files = ["README.rdoc", "LICENSE"]
11
+ s.authors = ["Ezra Zygmuntowicz", "Larry Diehl", "Martin Emde", "Tim Carey-Smith", "Jason Hansen"]
12
+ s.email = "awsmdev@engineyard.com"
13
+ s.homepage = "http://engineyard.com/cloud"
14
+
15
+ s.add_dependency 'json'
16
+ s.add_dependency 'sinatra', '~>1.0'
17
+ s.add_dependency 'SystemTimer', '~>1.2'
18
+
19
+ s.add_development_dependency 'rspec'
20
+ s.add_development_dependency 'cucumber'
21
+ s.add_development_dependency 'open4'
22
+ s.add_development_dependency 'aruba', '0.3.2'
23
+ s.add_development_dependency 'ruby-debug'
24
+ s.add_development_dependency 'rake'
25
+ s.add_development_dependency 'randexp'
26
+ s.add_development_dependency 'cucumber'
27
+ s.add_development_dependency 'rcov'
28
+ s.add_development_dependency 'realweb', '~>0.1.4'
29
+ s.add_development_dependency 'rack-client'
30
+
31
+ s.files = `git ls-files`.split("\n")
32
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
33
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
34
+ s.require_paths = ["lib"]
35
+ end
36
+
@@ -0,0 +1,57 @@
1
+ Feature: check
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Unhealthy master without a successful check
19
+ Given a file named "var/run/stonith/success" should not exist
20
+ And a "dead" master in "stonith.yml"
21
+ And a "healthy" endpoint in "stonith.yml"
22
+ When I successfully run "stonith check -c stonith.yml"
23
+ Then the output should contain exactly ""
24
+ And a file named "var/run/stonith/success" should not exist
25
+ When I successfully run "stonith status -c stonith.yml"
26
+ Then the output should contain exactly:
27
+ """
28
+ check
29
+
30
+ """
31
+
32
+ Scenario: Unhealthy master with previous successful check
33
+ Given a "dead" master in "stonith.yml"
34
+ And a "healthy" endpoint in "stonith.yml"
35
+ And an empty file named "var/run/stonith/success"
36
+ When I successfully run "stonith check -c stonith.yml"
37
+ Then the output should contain exactly ""
38
+ Then the file "server.log" should contain "notify i-12345678 __master_hostname__"
39
+ When I successfully run "stonith status -c stonith.yml"
40
+ Then the output should contain exactly:
41
+ """
42
+ notify -> notified
43
+
44
+ """
45
+
46
+ Scenario: Healthy master
47
+ Given a file named "var/run/stonith/success" should not exist
48
+ Given a "healthy" master in "stonith.yml"
49
+ When I successfully run "stonith check -c stonith.yml"
50
+ Then the output should contain exactly ""
51
+ And a file named "var/run/stonith/success" should exist
52
+ When I successfully run "stonith status -c stonith.yml"
53
+ Then the output should contain exactly:
54
+ """
55
+ check
56
+
57
+ """
@@ -0,0 +1,67 @@
1
+ Feature: cron startup & stop
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ heartbeat: 1
10
+ endpoint_uri: http://example.com/stonith
11
+ endpoint_token: token-for-i-12345678
12
+ endpoint_id: i-12345678
13
+ monitor_host: localhost
14
+ monitor_path: /health
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+ Given a "healthy" endpoint in "stonith.yml"
18
+
19
+ Scenario: Monitor - Running normally
20
+ Given a "healthy" master in "stonith.yml"
21
+ When I run "stonith cron -c stonith.yml" interactively
22
+ When I wait 5 seconds
23
+ And I successfully run "stonith status -c stonith.yml"
24
+ Then the output should contain exactly:
25
+ """
26
+ check
27
+
28
+ """
29
+ When I successfully run "stonith stop -c stonith.yml"
30
+ When I run "stonith status -c stonith.yml"
31
+ Then it should pass with exactly:
32
+ """
33
+ check -> stop
34
+
35
+ """
36
+ When I successfully run "stonith resume -c stonith.yml"
37
+ When I successfully run "stonith status -c stonith.yml"
38
+ Then the output should contain exactly:
39
+ """
40
+ stop -> resume
41
+
42
+ """
43
+ And I wait 5 seconds
44
+ When I successfully run "stonith status -c stonith.yml"
45
+ Then the output should contain exactly:
46
+ """
47
+ resume -> check
48
+
49
+ """
50
+
51
+ Scenario: Monitor - Resuming without stopping
52
+ Given a "healthy" master in "stonith.yml"
53
+ When I run "stonith cron -c stonith.yml" interactively
54
+ And I wait 5 seconds
55
+ And I successfully run "stonith status -c stonith.yml"
56
+ Then the output should contain exactly:
57
+ """
58
+ check
59
+
60
+ """
61
+ When I successfully run "stonith resume -c stonith.yml"
62
+ When I successfully run "stonith status -c stonith.yml"
63
+ Then the output should contain exactly:
64
+ """
65
+ check
66
+
67
+ """
@@ -0,0 +1,6 @@
1
+ EY::Stonith.callback_module = Class.new MockMapper do
2
+ def self.healthy?() false end
3
+ def self.notify(_) raise end
4
+ end
5
+
6
+ run EY::Stonith::Rackapp
@@ -0,0 +1 @@
1
+ run EY::Stonith::Rackapp
@@ -0,0 +1,7 @@
1
+ Feature: help
2
+
3
+ Scenario: Printing the help file
4
+ When I successfully run "stonith help"
5
+ Then the output should contain "Usage: stonith [COMMAND]"
6
+ And the output should not contain "Command not found"
7
+ And the output should not contain "Config file not found"
@@ -0,0 +1,24 @@
1
+ Feature: not found
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Running the stonith twitter notifier (command not found)
19
+ When I failingly run "stonith twitter -c stonith.yml"
20
+ Then the stderr should contain "Command not found"
21
+
22
+ Scenario: Config file not found
23
+ When I failingly run "stonith status"
24
+ Then the stderr should contain "Config file /etc/stonith.yml: File not found."
@@ -0,0 +1,35 @@
1
+ Feature: notify
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Endpoint is dead
19
+ Given a "dead" endpoint in "stonith.yml"
20
+ When I failingly run "stonith notify -c stonith.yml"
21
+ Then the stderr should contain "Unable to notify provisioning server that master is down."
22
+ When I successfully run "stonith status -c stonith.yml"
23
+ Then the stdout from "stonith status -c stonith.yml" should contain "notify"
24
+
25
+ Scenario: Endpoint is happy
26
+ Given a "healthy" endpoint in "stonith.yml"
27
+ When I successfully run "stonith notify -c stonith.yml"
28
+ Then the output should contain exactly ""
29
+ Then the file "stonith.log" should contain "Provisioning server notified: (notification_count: 1, takeover: false)"
30
+ When I successfully run "stonith status -c stonith.yml"
31
+ Then the output should contain exactly:
32
+ """
33
+ notify -> notified
34
+
35
+ """
@@ -0,0 +1,49 @@
1
+ Feature: not found
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ success_path: var/run/stonith/success
15
+ """
16
+ Given I successfully run "stonith reset -c stonith.yml"
17
+
18
+ Scenario: Not stopped
19
+ Given a file named "var/run/stonith/history" with:
20
+ """
21
+ check
22
+ """
23
+ When I successfully run "stonith resume -c stonith.yml"
24
+ Then the output should contain exactly ""
25
+ And the following files should not exist:
26
+ | var/run/stonith/stop |
27
+ When I successfully run "stonith status -c stonith.yml"
28
+ Then the output should contain exactly:
29
+ """
30
+ check
31
+
32
+ """
33
+
34
+ Scenario: Already stopped
35
+ Given a file named "var/run/stonith/history" with:
36
+ """
37
+ check -> stop
38
+ """
39
+ And an empty file named "var/run/stonith/stop"
40
+ When I successfully run "stonith resume -c stonith.yml"
41
+ Then the output should contain exactly ""
42
+ And the following files should not exist:
43
+ | var/run/stonith/stop |
44
+ When I successfully run "stonith status -c stonith.yml"
45
+ Then the output should contain exactly:
46
+ """
47
+ stop -> resume
48
+
49
+ """
@@ -0,0 +1,40 @@
1
+ When /^I wait (\d+) seconds$/ do |time|
2
+ sleep time.to_i
3
+ end
4
+
5
+ When /^I failingly run "(.*)"$/ do |cmd|
6
+ And %|I run "#{cmd}"|
7
+ Then %|the exit status should not be 0|
8
+ end
9
+
10
+ When /^I run "(.*)" (\d) seconds? later$/ do |cmd, wait|
11
+ Given %|an executable named "bin/wait_then_run" with:|, <<-SCRIPT
12
+ #!/bin/bash
13
+ sleep #{wait}
14
+ #{cmd}
15
+ SCRIPT
16
+ And %|I run "bin/wait_then_run" interactively|
17
+ end
18
+
19
+ Given /^an executable named "([^\"]*)" with:$/ do |file_name, file_content|
20
+ Given %|a file named "#{file_name}" with:|, file_content
21
+ Given %|I successfully run "chmod a+x #{file_name}"|
22
+ end
23
+
24
+ Transform %r|(.*)__master_hostname__(.*)| do |before, after|
25
+ "#{before}#{master_hostname}#{after}"
26
+ end
27
+
28
+ def master_hostname
29
+ @master_hostname
30
+ end
31
+
32
+ Given /^a "([^\"]+)" master in "(.*)"$/ do |type, path|
33
+ @master_hostname = start_server type
34
+ update_config({"monitor_host" => @master_hostname}, path)
35
+ end
36
+
37
+ Given /^an? "(.*)" endpoint in "(.*)"$/ do |type, path|
38
+ @endpoint_hostname = start_server type
39
+ update_config({'endpoint_uri' => "http://#{@endpoint_hostname}"}, path)
40
+ end
@@ -0,0 +1,48 @@
1
+ Feature: stop
2
+
3
+ Background:
4
+ Given a file named "stonith.yml" with:
5
+ """
6
+ ---
7
+ log: stonith.log
8
+ state_dir: var/run/stonith
9
+ endpoint_uri: http://example.com/stonith
10
+ endpoint_token: token-for-i-12345678
11
+ endpoint_id: i-12345678
12
+ monitor_host: localhost
13
+ monitor_path: /health
14
+ """
15
+ Given I successfully run "stonith reset -c stonith.yml"
16
+
17
+ Scenario: Already stopped
18
+ Given a file named "var/run/stonith/history" with:
19
+ """
20
+ check -> stop
21
+ """
22
+ And an empty file named "var/run/stonith/stop"
23
+ When I successfully run "stonith stop -c stonith.yml"
24
+ Then the output should contain exactly ""
25
+ And the following files should exist:
26
+ | var/run/stonith/stop |
27
+ When I successfully run "stonith status -c stonith.yml"
28
+ Then the output should contain exactly:
29
+ """
30
+ check -> stop
31
+
32
+ """
33
+
34
+ Scenario: Stopping when not stopped
35
+ Given a file named "var/run/stonith/history" with:
36
+ """
37
+ check
38
+ """
39
+ When I run "echo 'check -> stop' > var/run/stonith/history" 3 seconds later
40
+ When I successfully run "stonith stop -c stonith.yml"
41
+ And the following files should exist:
42
+ | var/run/stonith/stop |
43
+ And I successfully run "stonith status -c stonith.yml"
44
+ Then the output should contain exactly:
45
+ """
46
+ check -> stop
47
+
48
+ """
@@ -0,0 +1,77 @@
1
+ require 'aruba'
2
+ require 'open4'
3
+ require 'realweb'
4
+ require 'aruba/cucumber'
5
+ require File.expand_path('../../../lib/ey_stonith', __FILE__)
6
+
7
+
8
+ ENV['PATH'] = [File.expand_path('../../../bin', __FILE__), ENV['PATH']].join(":")
9
+ ENV['RACK_ENV'] = 'test'
10
+
11
+ class MockMapper
12
+ @@notifications = []
13
+
14
+ def self.log(msg)
15
+ File.open('tmp/aruba/server.log', 'w') { |f| f.puts msg }
16
+ end
17
+ def self.healthy?() true end
18
+ def self.takeover(_) true end
19
+ def self.notify(label, monitor_host)
20
+ log "notify #{label} #{monitor_host}"
21
+ true
22
+ end
23
+ def self.token_for(label) "token-for-#{label}" end
24
+ end
25
+
26
+ module Helpers
27
+ def safe_read(io)
28
+ io.read_nonblock(1000)
29
+ rescue EOFError
30
+ io.read
31
+ rescue Errno::EAGAIN
32
+ ""
33
+ end
34
+
35
+ def start_server(config)
36
+ config_ru = File.expand_path("../../fixtures/#{config}.ru", __FILE__)
37
+ @server = RealWeb.start_server_in_fork(config_ru)
38
+ at_exit { stop_server }
39
+ "127.0.0.1:#{@server.port}"
40
+ end
41
+
42
+ def stop_server
43
+ if @server
44
+ @server.stop
45
+ @server = nil
46
+ end
47
+ end
48
+
49
+ def update_config(options, path = "stonith.yml")
50
+ path = File.expand_path(path, 'tmp/aruba')
51
+ config = YAML.load_file(path).merge(options)
52
+ File.open(path,'w') { |file| YAML.dump(config, file) }
53
+ end
54
+
55
+ def kill_run_in_background
56
+ if @pid
57
+ Process.kill('TERM', @pid)
58
+ Process.wait @pid
59
+ end
60
+ rescue
61
+ # erm... noop
62
+ ensure
63
+ @pid = nil
64
+ end
65
+ end
66
+
67
+ World(Helpers)
68
+
69
+ After do
70
+ kill_run_in_background
71
+ stop_server
72
+ end
73
+
74
+ Before do
75
+ @aruba_timeout_seconds = 8
76
+ EY::Stonith.callback_module = MockMapper
77
+ end
@@ -6,20 +6,22 @@ require 'timeout'
6
6
  module EY
7
7
  module Stonith
8
8
  class AwsmNotifier
9
- def initialize(instance_id, uri, token)
10
- @instance_id, @uri, @token = instance_id, uri, token
9
+ def initialize(uri, instance_id, token, params)
10
+ @uri, @instance_id, @token, @params = uri, instance_id, token, params
11
11
  end
12
12
 
13
- def notify(success, unreachable, refused)
14
- Stonith.logger.info "Attempting to notify #@uri that I did a takeover."
13
+ def notify(success, unreachable)
14
+ Stonith.logger.info "Attempting to notify #@uri that master is down."
15
15
 
16
16
  code, body = do_request
17
17
 
18
18
  case code
19
19
  when 200...300
20
- success.call
21
- when 410
22
- refused.call body
20
+ if body.empty?
21
+ success.call {}
22
+ else
23
+ success.call JSON.parse(body)
24
+ end
23
25
  else
24
26
  unreachable.call
25
27
  end
@@ -43,7 +45,10 @@ module EY
43
45
  end
44
46
 
45
47
  def payload
46
- {'label' => @instance_id, 'token' => @token}
48
+ @params.merge({
49
+ 'label' => @instance_id,
50
+ 'token' => @token,
51
+ })
47
52
  end
48
53
 
49
54
  # ripped from restclient
@@ -36,10 +36,6 @@ module EY
36
36
  @history ||= History.new(config.history_path)
37
37
  end
38
38
 
39
- def database
40
- @database ||= Database.new(config)
41
- end
42
-
43
39
  def call
44
40
  invoke
45
41
  rescue Config::Error => e
@@ -14,47 +14,24 @@ module EY
14
14
  end
15
15
 
16
16
  def invoke
17
- database.with_data do |data|
18
- abort_if_master(data.hostname)
17
+ history << :check
19
18
 
20
- history << :check
21
-
22
- begin
23
- timeout(config.monitor_timeout) do
24
- open("http://#{data.hostname}#{config.monitor_path}").read
25
- end
26
- rescue StandardError, Timeout::Error
27
- bad_check(data.key, data.instance_id)
19
+ begin
20
+ timeout(config.monitor_timeout) do
21
+ open("http://#{config.monitor_host}#{config.monitor_path}").read
22
+ end
23
+ rescue StandardError, Timeout::Error => e
24
+ Stonith.logger.error "Got error when checking master: #{e.class}: #{e.message}"
25
+ if config.success_path.exist?
26
+ execute :notify
28
27
  else
29
- good_check(data.key)
28
+ Stonith.logger.error "No successful check against master yet. Not notifying."
30
29
  end
31
-
32
- exit
30
+ else
31
+ config.success_path.open('w') {}
33
32
  end
34
33
 
35
- abort "No master to check!"
36
- end
37
-
38
- def abort_if_master(hostname)
39
- return if config.meta_data_hostname != hostname
40
- abort "This instance is master! Can't check self."
41
- end
42
-
43
- def good_check(key)
44
- check_recorder.good_check!(key)
45
- end
46
-
47
- def bad_check(key, instance_id)
48
- check_recorder.bad_check!(key)
49
-
50
- if check_recorder.limit_exceeded?
51
- Stonith.logger.info "Invoking takeover of instance #{instance_id}"
52
- execute :takeover, "--instance #{instance_id}"
53
- end
54
- end
55
-
56
- def check_recorder
57
- @check_recorder ||= CheckRecorder.new(config.checks_path)
34
+ exit
58
35
  end
59
36
  end
60
37
  end