ey_stonith 0.1.4 → 0.1.5.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/bin/ey-monitor +2 -5
  2. data/bin/{ey-monitor-reset → stonith} +1 -1
  3. data/bin/{ey-monitor-stop → stonith-check} +1 -1
  4. data/bin/stonith-claim +5 -0
  5. data/bin/{ey-monitor-resume → stonith-cron} +1 -1
  6. data/bin/stonith-notify +5 -0
  7. data/bin/stonith-reset +5 -0
  8. data/bin/stonith-resume +5 -0
  9. data/bin/stonith-status +5 -0
  10. data/bin/{ey-monitor-status → stonith-stop} +1 -1
  11. data/bin/stonith-takeover +5 -0
  12. data/lib/ey_stonith/address_stealer.rb +1 -6
  13. data/lib/ey_stonith/awsm_notifier.rb +28 -29
  14. data/lib/ey_stonith/check_recorder.rb +19 -17
  15. data/lib/ey_stonith/commands/abstract.rb +94 -0
  16. data/lib/ey_stonith/commands/check.rb +58 -0
  17. data/lib/ey_stonith/commands/claim.rb +113 -0
  18. data/lib/ey_stonith/commands/commands.rb +26 -0
  19. data/lib/ey_stonith/commands/cron.rb +40 -0
  20. data/lib/ey_stonith/commands/help.rb +16 -0
  21. data/lib/ey_stonith/commands/not_found.rb +11 -0
  22. data/lib/ey_stonith/commands/notify.rb +85 -0
  23. data/lib/ey_stonith/commands/reset.rb +21 -0
  24. data/lib/ey_stonith/commands/resume.rb +19 -0
  25. data/lib/ey_stonith/commands/status.rb +23 -0
  26. data/lib/ey_stonith/commands/stop.rb +21 -0
  27. data/lib/ey_stonith/commands/takeover.rb +106 -0
  28. data/lib/ey_stonith/commands.rb +40 -0
  29. data/lib/ey_stonith/config.rb +107 -14
  30. data/lib/ey_stonith/data.rb +5 -1
  31. data/lib/ey_stonith/database.rb +28 -6
  32. data/lib/ey_stonith/history.rb +1 -1
  33. data/lib/ey_stonith.rb +2 -8
  34. metadata +57 -61
  35. data/lib/ey_stonith/abstract_master.rb +0 -15
  36. data/lib/ey_stonith/box.rb +0 -61
  37. data/lib/ey_stonith/cli.rb +0 -138
  38. data/lib/ey_stonith/local_master.rb +0 -28
  39. data/lib/ey_stonith/master.rb +0 -37
  40. data/lib/ey_stonith/meta_data.rb +0 -11
  41. data/lib/ey_stonith/slave.rb +0 -41
data/bin/ey-monitor CHANGED
@@ -1,5 +1,2 @@
1
- #!/usr/bin/env ruby
2
- $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
- require 'ey_stonith'
4
-
5
- EY::Stonith::CLI.new(:start, ARGV)
1
+ #!/bin/bash
2
+ stonith-start
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:reset, ARGV)
5
+ EY::Stonith::Commands.invoke(ARGV)
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:stop, ARGV)
5
+ EY::Stonith::Commands::Check.new(ARGV).call
data/bin/stonith-claim ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Claim.new(ARGV).call
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:resume, ARGV)
5
+ EY::Stonith::Commands::Cron.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Notify.new(ARGV).call
data/bin/stonith-reset ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Reset.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Resume.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Status.new(ARGV).call
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:status, ARGV)
5
+ EY::Stonith::Commands::Stop.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Takeover.new(ARGV).call
@@ -3,12 +3,7 @@ require 'fog'
3
3
  module EY
4
4
  module Stonith
5
5
  class AddressStealer
6
- def self.fog(credentials)
7
- Fog::AWS::EC2.new(
8
- :aws_access_key_id => credentials[:aws_secret_id],
9
- :aws_secret_access_key => credentials[:aws_secret_key]
10
- )
11
- end
6
+ def self.fog(credentials) Fog::AWS::EC2.new(credentials) end
12
7
 
13
8
  def initialize(server_id, ip, credentials)
14
9
  @fog = self.class.fog(credentials)
@@ -1,49 +1,48 @@
1
- require 'em-http'
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'timeout'
2
4
 
3
5
  module EY
4
6
  module Stonith
5
7
  class AwsmNotifier
6
- def initialize(instance_id, notify_uri, opts, heartbeat = 5)
7
- @instance_id, @notify_uri, @opts, @heartbeat = instance_id, notify_uri, opts, heartbeat
8
- call_api
8
+ def initialize(instance_id, uri, credentials)
9
+ @instance_id, @uri, @credentials = instance_id, uri, credentials
9
10
  end
10
11
 
11
- private
12
+ def notify(success, unreachable, refused)
13
+ Stonith.logger.info "Attempting to notify #@uri that I did a takeover."
12
14
 
13
- def try_again
14
- EM.add_timer(@heartbeat) { call_api }
15
- end
15
+ code, body = do_request
16
16
 
17
- def call_api
18
- Stonith.logger.info("Notifying awsm that I did a takeover")
19
- http = EM::HttpRequest.new(@notify_uri).post :body => body, :head => head, :timeout => 10
20
- http.callback {
21
- ok = (200...300).include?(http.response_header.status)
22
- if ok && JSON.parse(http.response)['status'] == 'ok'
23
- Stonith.logger.info("Notified awsm!")
17
+ case code
18
+ when 200...300
19
+ if JSON.parse(body)['status'] == 'ok'
20
+ success.call
24
21
  else
25
- try_again
22
+ refused.call body
26
23
  end
27
- }
28
- http.errback { try_again }
24
+ else
25
+ unreachable.call
26
+ end
27
+ rescue
28
+ unreachable.call
29
29
  end
30
30
 
31
- def head
32
- {"Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/json"}
31
+ private
32
+
33
+ def do_request
34
+ response = Timeout.timeout(10) do
35
+ Net::HTTP.new(@uri.host, @uri.port).request_post(@uri.path, process_payload(payload), headers)
36
+ end
37
+ [response.code.to_i, response.body]
33
38
  end
34
39
 
35
- def body
36
- result = process_payload(payload)
37
- def result.bytesize() size end unless result.respond_to?(:bytesize)
38
- result
40
+ def headers
41
+ {"Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/json"}
39
42
  end
40
43
 
41
44
  def payload
42
- {
43
- 'instance_id' => @instance_id,
44
- 'aws_secret_id' => @opts[:aws_secret_id],
45
- 'aws_secret_key' => @opts[:aws_secret_key],
46
- }
45
+ @credentials.merge('instance_id' => @instance_id)
47
46
  end
48
47
 
49
48
  # ripped from restclient so we can use eventmachine
@@ -3,50 +3,52 @@ module EY
3
3
  class CheckRecorder
4
4
  BAD_CHECK_MAX = 5
5
5
 
6
- def initialize
7
- reset
6
+ attr_reader :bad
7
+
8
+ def initialize(pathname)
9
+ @pathname = pathname
10
+ rehydrate
8
11
  end
9
12
 
10
13
  def bad_check!(key)
11
14
  reset_on_key_change(key)
12
15
  log_bad_check
13
16
  @bad += 1 if @seen_good
17
+ save
14
18
  end
15
19
 
16
20
  def good_check!(key)
17
- @key = key
18
- @bad = 0
19
- @seen_good = true
20
- end
21
-
22
- def seen_good?
23
- @seen_good
21
+ @key, @bad, @seen_good = key, 0, true
22
+ save
24
23
  end
25
24
 
26
25
  def limit_exceeded?
27
- seen_good? && @bad > BAD_CHECK_MAX
26
+ @seen_good && @bad > BAD_CHECK_MAX
28
27
  end
29
28
 
30
- def checking_key?(key)
31
- @key == key
29
+ protected
30
+
31
+ def save
32
+ @pathname.open('w') { |f| f << Marshal.dump([@key, @bad, @seen_good]) }
32
33
  end
33
34
 
34
- protected
35
+ def rehydrate
36
+ @key, @bad, @seen_good = Marshal.load(@pathname.read) if @pathname.readable?
37
+ end
35
38
 
36
39
  def reset
37
- @bad = 0
38
- @seen_good = false
40
+ @key, @bad, @seen_good = nil, 0, false
39
41
  end
40
42
 
41
43
  def reset_on_key_change(key)
42
- unless checking_key?(key)
44
+ unless @key == key
43
45
  reset
44
46
  @key = key
45
47
  end
46
48
  end
47
49
 
48
50
  def log_bad_check
49
- Stonith.logger.info("Bad check against #{@key}. Seen good? #{@seen_good}")
51
+ Stonith.logger.warn("Bad check against #{@key}. Seen good? #{@seen_good}")
50
52
  end
51
53
  end
52
54
  end
@@ -0,0 +1,94 @@
1
+ require 'optparse'
2
+ require 'pathname'
3
+
4
+ module EY
5
+ module Stonith
6
+ module Commands
7
+ class Abstract
8
+ DEFAULT_CONFIG_PATH = Pathname.new("/etc/stonith.yml")
9
+ SCRIPT_NAME = 'stonith'
10
+
11
+ def self.command
12
+ '[COMMAND]'
13
+ end
14
+
15
+ def initialize(argv)
16
+ @argv = argv
17
+ parse!
18
+ init_logger
19
+ end
20
+
21
+ # Load the logger without requiring the config file to be present.
22
+ def init_logger
23
+ Stonith.log_to begin
24
+ config.log_path
25
+ rescue Config::FileNotFound
26
+ $stderr
27
+ end
28
+ Stonith.logger.level = Logger::INFO
29
+ end
30
+
31
+ def config
32
+ @config ||= Config.new(@options[:config_path])
33
+ end
34
+
35
+ def history
36
+ @history ||= History.new(config.history_path)
37
+ end
38
+
39
+ def database
40
+ @database ||= Database.new(config)
41
+ end
42
+
43
+ def call
44
+ invoke
45
+ rescue Config::Error => e
46
+ Stonith.logger.error e.message
47
+ abort "#{e.message}\n\n#{parser}"
48
+ rescue => e
49
+ error = "#{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
50
+ Stonith.logger.error error
51
+ abort error
52
+ end
53
+
54
+ def invoke
55
+ puts parser
56
+ end
57
+
58
+ def command_options
59
+ if @options[:config_path] != DEFAULT_CONFIG_PATH
60
+ " --config #{@options[:config_path]}"
61
+ else
62
+ ""
63
+ end
64
+ end
65
+
66
+ def parse!
67
+ parser.parse!(@argv)
68
+ end
69
+
70
+ def parser
71
+ @parser ||= OptionParser.new do |parser|
72
+ @options = {:config_path => DEFAULT_CONFIG_PATH}
73
+
74
+ parser.banner = "Usage: #{SCRIPT_NAME} #{self.class.command} [FLAGS]"
75
+ parser.separator ""
76
+ parser.separator "COMMANDS"
77
+ parser.separator Stonith::Commands.formatted_command_list
78
+ parser.separator ""
79
+ parser.separator "FLAGS"
80
+
81
+ parser.on('-c', '--config [FILE]', "Location of Stonith YAML config file (default #{@options[:config_path]})") do |path|
82
+ @options[:config_path] = Pathname.new(path)
83
+ end
84
+
85
+ parser.on_tail("-h", "--help", "Show this message") do
86
+ puts parser
87
+ exit
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,58 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Check < Abstract
5
+
6
+ def self.command
7
+ 'check'
8
+ end
9
+
10
+ def self.banner
11
+ "Check the remote master haproxy"
12
+ end
13
+
14
+ def invoke
15
+ database.with_data do |data|
16
+ abort_if_master(data.hostname)
17
+
18
+ history << :check
19
+
20
+ begin
21
+ open("http://#{data.hostname}#{config.monitor_path}").read
22
+ rescue
23
+ bad_check(data.key, data.instance_id)
24
+ else
25
+ good_check(data.key)
26
+ end
27
+
28
+ exit
29
+ end
30
+
31
+ $stderr.puts "No master to check!"
32
+ end
33
+
34
+ def abort_if_master(hostname)
35
+ return if config.meta_data_hostname != hostname
36
+ abort "This instance is master! Can't check self."
37
+ end
38
+
39
+ def good_check(key)
40
+ check_recorder.good_check!(key)
41
+ end
42
+
43
+ def bad_check(key, instance_id)
44
+ check_recorder.bad_check!(key)
45
+
46
+ if check_recorder.limit_exceeded?
47
+ Stonith.logger.info "Invoking takeover of instance #{instance_id}"
48
+ exec "#{SCRIPT_NAME} takeover#{command_options} --instance #{instance_id}"
49
+ end
50
+ end
51
+
52
+ def check_recorder
53
+ @check_recorder ||= CheckRecorder.new(config.checks_path)
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,113 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Claim < Abstract
5
+
6
+ def self.command
7
+ 'claim'
8
+ end
9
+
10
+ def self.banner
11
+ 'Claim the master record in the database'
12
+ end
13
+
14
+ def claim_path
15
+ @claim_path ||= config.claim_path
16
+ end
17
+
18
+ def invoke
19
+ claim_path.exist?? invoke_with_claim_file : invoke_without_claim_file
20
+ end
21
+
22
+ def invoke_with_claim_file
23
+ abort_on_existing_data
24
+
25
+ attempts = (claim_path.read || 0).to_i.succ
26
+
27
+ if @force || attempts >= 6
28
+ reclaim!
29
+ else
30
+ persist_reclaim_attempt(attempts)
31
+ end
32
+ end
33
+
34
+ def abort_on_existing_data
35
+ return unless master_hostname
36
+
37
+ if config.meta_data_hostname == master_hostname
38
+ abort "Already claimed, not claiming."
39
+ else
40
+ claim_path.delete
41
+ abort "#{master_hostname} is master, not claiming.\nRemoving stale claim file."
42
+ end
43
+ end
44
+
45
+ def reclaim!
46
+ Stonith.logger.info @force ? "Reclaim forced." : "Reclaim after 6 consecutive checks."
47
+ claim_path.delete
48
+ invoke_without_claim_file
49
+ end
50
+
51
+ def persist_reclaim_attempt(attempts)
52
+ claim_path.open("w") { |f| f << attempts }
53
+
54
+ abort <<-ERROR
55
+ Master unknown and claim file exists, not claiming.
56
+ Reclaim at 6th attempt or use --force
57
+
58
+ Failed attempts: #{attempts}
59
+ ERROR
60
+ end
61
+
62
+ def invoke_without_claim_file
63
+ @force ? database.reset : confirm_master!
64
+
65
+ data = Data.new(config.meta_data_hostname, config.meta_data_id, config.meta_data_ip)
66
+ database.set data
67
+ claim_path.open('w') {}
68
+
69
+ Stonith.logger.info "Claimed with data: #{data}"
70
+ history << :claim
71
+ end
72
+
73
+ def confirm_master!
74
+ # TODO: Only claim if the master has an IP?
75
+ # if fog.servers.get(config.meta_data_id).addresses.empty?
76
+ # abort "No IP, not claiming." + (@force ? "\nIgnoring --force" : "")
77
+ # end
78
+
79
+ confirm_master_with_database
80
+ confirm_master_with_config
81
+ end
82
+
83
+ def confirm_master_with_database
84
+ return unless master_hostname
85
+
86
+ if config.meta_data_hostname == master_hostname
87
+ claim_path.open('w') {}
88
+ abort "Already claimed, not claiming. Touching claim file."
89
+ else
90
+ abort "#{master_hostname} is master, not claiming."
91
+ end
92
+ end
93
+
94
+ def confirm_master_with_config
95
+ if config.meta_data_hostname != config.monitor_host
96
+ abort "#{config.monitor_host} is master, not claiming."
97
+ end
98
+ end
99
+
100
+ def master_hostname
101
+ @master_hostname ||= database.with_data { |data| data.hostname }
102
+ end
103
+
104
+ def parser
105
+ super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
106
+ @force = f
107
+ end
108
+ super
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,26 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Commands < Abstract
5
+ def self.command
6
+ 'commands'
7
+ end
8
+
9
+ def self.banner
10
+ "Print all the available commands"
11
+ end
12
+
13
+ def invoke
14
+ puts <<-MESSAGE
15
+ Available stonith commands:
16
+
17
+ #{Stonith::Commands.formatted_command_list}
18
+
19
+ Example:
20
+ $ stonith status
21
+ MESSAGE
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Cron < Abstract
5
+ def self.command
6
+ 'cron'
7
+ end
8
+
9
+ def self.banner
10
+ "Run the Stonith cron-like process that invokes other stonith commands periodically"
11
+ end
12
+
13
+ def invoke
14
+ heartbeat_loop do |beat|
15
+ unless_stopped { run_commands if beat.zero? }
16
+ sleep 1
17
+ end
18
+ end
19
+
20
+ def heartbeat_loop(&block)
21
+ loop { config.heartbeat.ceil.times(&block) }
22
+ end
23
+
24
+ def unless_stopped
25
+ if config.stop_path.exist?
26
+ history << :stop
27
+ else
28
+ yield
29
+ end
30
+ end
31
+
32
+ def run_commands
33
+ %w[claim notify check].each do |cmd|
34
+ system("stonith #{cmd}#{command_options}")
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,16 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Help < Abstract
5
+ def self.banner
6
+ "A helpful message :D"
7
+ end
8
+
9
+ def invoke
10
+ puts "Stonith is a set of takeover & monitoring scripts for Engine Yard clusters.\n\n"
11
+ super
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class NotFound < Abstract
5
+ def invoke
6
+ abort "Command not found.\n#{parser}"
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end