ey_stonith 0.1.4 → 0.1.5.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/bin/ey-monitor +2 -5
  2. data/bin/{ey-monitor-reset → stonith} +1 -1
  3. data/bin/{ey-monitor-stop → stonith-check} +1 -1
  4. data/bin/stonith-claim +5 -0
  5. data/bin/{ey-monitor-resume → stonith-cron} +1 -1
  6. data/bin/stonith-notify +5 -0
  7. data/bin/stonith-reset +5 -0
  8. data/bin/stonith-resume +5 -0
  9. data/bin/stonith-status +5 -0
  10. data/bin/{ey-monitor-status → stonith-stop} +1 -1
  11. data/bin/stonith-takeover +5 -0
  12. data/lib/ey_stonith/address_stealer.rb +1 -6
  13. data/lib/ey_stonith/awsm_notifier.rb +28 -29
  14. data/lib/ey_stonith/check_recorder.rb +19 -17
  15. data/lib/ey_stonith/commands/abstract.rb +94 -0
  16. data/lib/ey_stonith/commands/check.rb +58 -0
  17. data/lib/ey_stonith/commands/claim.rb +113 -0
  18. data/lib/ey_stonith/commands/commands.rb +26 -0
  19. data/lib/ey_stonith/commands/cron.rb +40 -0
  20. data/lib/ey_stonith/commands/help.rb +16 -0
  21. data/lib/ey_stonith/commands/not_found.rb +11 -0
  22. data/lib/ey_stonith/commands/notify.rb +85 -0
  23. data/lib/ey_stonith/commands/reset.rb +21 -0
  24. data/lib/ey_stonith/commands/resume.rb +19 -0
  25. data/lib/ey_stonith/commands/status.rb +23 -0
  26. data/lib/ey_stonith/commands/stop.rb +21 -0
  27. data/lib/ey_stonith/commands/takeover.rb +106 -0
  28. data/lib/ey_stonith/commands.rb +40 -0
  29. data/lib/ey_stonith/config.rb +107 -14
  30. data/lib/ey_stonith/data.rb +5 -1
  31. data/lib/ey_stonith/database.rb +28 -6
  32. data/lib/ey_stonith/history.rb +1 -1
  33. data/lib/ey_stonith.rb +2 -8
  34. metadata +57 -61
  35. data/lib/ey_stonith/abstract_master.rb +0 -15
  36. data/lib/ey_stonith/box.rb +0 -61
  37. data/lib/ey_stonith/cli.rb +0 -138
  38. data/lib/ey_stonith/local_master.rb +0 -28
  39. data/lib/ey_stonith/master.rb +0 -37
  40. data/lib/ey_stonith/meta_data.rb +0 -11
  41. data/lib/ey_stonith/slave.rb +0 -41
data/bin/ey-monitor CHANGED
@@ -1,5 +1,2 @@
1
- #!/usr/bin/env ruby
2
- $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
- require 'ey_stonith'
4
-
5
- EY::Stonith::CLI.new(:start, ARGV)
1
+ #!/bin/bash
2
+ stonith-start
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:reset, ARGV)
5
+ EY::Stonith::Commands.invoke(ARGV)
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:stop, ARGV)
5
+ EY::Stonith::Commands::Check.new(ARGV).call
data/bin/stonith-claim ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Claim.new(ARGV).call
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:resume, ARGV)
5
+ EY::Stonith::Commands::Cron.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Notify.new(ARGV).call
data/bin/stonith-reset ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Reset.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Resume.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Status.new(ARGV).call
@@ -2,4 +2,4 @@
2
2
  $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
3
  require 'ey_stonith'
4
4
 
5
- EY::Stonith::CLI.new(:status, ARGV)
5
+ EY::Stonith::Commands::Stop.new(ARGV).call
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib/', __FILE__))
3
+ require 'ey_stonith'
4
+
5
+ EY::Stonith::Commands::Takeover.new(ARGV).call
@@ -3,12 +3,7 @@ require 'fog'
3
3
  module EY
4
4
  module Stonith
5
5
  class AddressStealer
6
- def self.fog(credentials)
7
- Fog::AWS::EC2.new(
8
- :aws_access_key_id => credentials[:aws_secret_id],
9
- :aws_secret_access_key => credentials[:aws_secret_key]
10
- )
11
- end
6
+ def self.fog(credentials) Fog::AWS::EC2.new(credentials) end
12
7
 
13
8
  def initialize(server_id, ip, credentials)
14
9
  @fog = self.class.fog(credentials)
@@ -1,49 +1,48 @@
1
- require 'em-http'
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'timeout'
2
4
 
3
5
  module EY
4
6
  module Stonith
5
7
  class AwsmNotifier
6
- def initialize(instance_id, notify_uri, opts, heartbeat = 5)
7
- @instance_id, @notify_uri, @opts, @heartbeat = instance_id, notify_uri, opts, heartbeat
8
- call_api
8
+ def initialize(instance_id, uri, credentials)
9
+ @instance_id, @uri, @credentials = instance_id, uri, credentials
9
10
  end
10
11
 
11
- private
12
+ def notify(success, unreachable, refused)
13
+ Stonith.logger.info "Attempting to notify #@uri that I did a takeover."
12
14
 
13
- def try_again
14
- EM.add_timer(@heartbeat) { call_api }
15
- end
15
+ code, body = do_request
16
16
 
17
- def call_api
18
- Stonith.logger.info("Notifying awsm that I did a takeover")
19
- http = EM::HttpRequest.new(@notify_uri).post :body => body, :head => head, :timeout => 10
20
- http.callback {
21
- ok = (200...300).include?(http.response_header.status)
22
- if ok && JSON.parse(http.response)['status'] == 'ok'
23
- Stonith.logger.info("Notified awsm!")
17
+ case code
18
+ when 200...300
19
+ if JSON.parse(body)['status'] == 'ok'
20
+ success.call
24
21
  else
25
- try_again
22
+ refused.call body
26
23
  end
27
- }
28
- http.errback { try_again }
24
+ else
25
+ unreachable.call
26
+ end
27
+ rescue
28
+ unreachable.call
29
29
  end
30
30
 
31
- def head
32
- {"Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/json"}
31
+ private
32
+
33
+ def do_request
34
+ response = Timeout.timeout(10) do
35
+ Net::HTTP.new(@uri.host, @uri.port).request_post(@uri.path, process_payload(payload), headers)
36
+ end
37
+ [response.code.to_i, response.body]
33
38
  end
34
39
 
35
- def body
36
- result = process_payload(payload)
37
- def result.bytesize() size end unless result.respond_to?(:bytesize)
38
- result
40
+ def headers
41
+ {"Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/json"}
39
42
  end
40
43
 
41
44
  def payload
42
- {
43
- 'instance_id' => @instance_id,
44
- 'aws_secret_id' => @opts[:aws_secret_id],
45
- 'aws_secret_key' => @opts[:aws_secret_key],
46
- }
45
+ @credentials.merge('instance_id' => @instance_id)
47
46
  end
48
47
 
49
48
  # ripped from restclient so we can use eventmachine
@@ -3,50 +3,52 @@ module EY
3
3
  class CheckRecorder
4
4
  BAD_CHECK_MAX = 5
5
5
 
6
- def initialize
7
- reset
6
+ attr_reader :bad
7
+
8
+ def initialize(pathname)
9
+ @pathname = pathname
10
+ rehydrate
8
11
  end
9
12
 
10
13
  def bad_check!(key)
11
14
  reset_on_key_change(key)
12
15
  log_bad_check
13
16
  @bad += 1 if @seen_good
17
+ save
14
18
  end
15
19
 
16
20
  def good_check!(key)
17
- @key = key
18
- @bad = 0
19
- @seen_good = true
20
- end
21
-
22
- def seen_good?
23
- @seen_good
21
+ @key, @bad, @seen_good = key, 0, true
22
+ save
24
23
  end
25
24
 
26
25
  def limit_exceeded?
27
- seen_good? && @bad > BAD_CHECK_MAX
26
+ @seen_good && @bad > BAD_CHECK_MAX
28
27
  end
29
28
 
30
- def checking_key?(key)
31
- @key == key
29
+ protected
30
+
31
+ def save
32
+ @pathname.open('w') { |f| f << Marshal.dump([@key, @bad, @seen_good]) }
32
33
  end
33
34
 
34
- protected
35
+ def rehydrate
36
+ @key, @bad, @seen_good = Marshal.load(@pathname.read) if @pathname.readable?
37
+ end
35
38
 
36
39
  def reset
37
- @bad = 0
38
- @seen_good = false
40
+ @key, @bad, @seen_good = nil, 0, false
39
41
  end
40
42
 
41
43
  def reset_on_key_change(key)
42
- unless checking_key?(key)
44
+ unless @key == key
43
45
  reset
44
46
  @key = key
45
47
  end
46
48
  end
47
49
 
48
50
  def log_bad_check
49
- Stonith.logger.info("Bad check against #{@key}. Seen good? #{@seen_good}")
51
+ Stonith.logger.warn("Bad check against #{@key}. Seen good? #{@seen_good}")
50
52
  end
51
53
  end
52
54
  end
@@ -0,0 +1,94 @@
1
+ require 'optparse'
2
+ require 'pathname'
3
+
4
+ module EY
5
+ module Stonith
6
+ module Commands
7
+ class Abstract
8
+ DEFAULT_CONFIG_PATH = Pathname.new("/etc/stonith.yml")
9
+ SCRIPT_NAME = 'stonith'
10
+
11
+ def self.command
12
+ '[COMMAND]'
13
+ end
14
+
15
+ def initialize(argv)
16
+ @argv = argv
17
+ parse!
18
+ init_logger
19
+ end
20
+
21
+ # Load the logger without requiring the config file to be present.
22
+ def init_logger
23
+ Stonith.log_to begin
24
+ config.log_path
25
+ rescue Config::FileNotFound
26
+ $stderr
27
+ end
28
+ Stonith.logger.level = Logger::INFO
29
+ end
30
+
31
+ def config
32
+ @config ||= Config.new(@options[:config_path])
33
+ end
34
+
35
+ def history
36
+ @history ||= History.new(config.history_path)
37
+ end
38
+
39
+ def database
40
+ @database ||= Database.new(config)
41
+ end
42
+
43
+ def call
44
+ invoke
45
+ rescue Config::Error => e
46
+ Stonith.logger.error e.message
47
+ abort "#{e.message}\n\n#{parser}"
48
+ rescue => e
49
+ error = "#{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
50
+ Stonith.logger.error error
51
+ abort error
52
+ end
53
+
54
+ def invoke
55
+ puts parser
56
+ end
57
+
58
+ def command_options
59
+ if @options[:config_path] != DEFAULT_CONFIG_PATH
60
+ " --config #{@options[:config_path]}"
61
+ else
62
+ ""
63
+ end
64
+ end
65
+
66
+ def parse!
67
+ parser.parse!(@argv)
68
+ end
69
+
70
+ def parser
71
+ @parser ||= OptionParser.new do |parser|
72
+ @options = {:config_path => DEFAULT_CONFIG_PATH}
73
+
74
+ parser.banner = "Usage: #{SCRIPT_NAME} #{self.class.command} [FLAGS]"
75
+ parser.separator ""
76
+ parser.separator "COMMANDS"
77
+ parser.separator Stonith::Commands.formatted_command_list
78
+ parser.separator ""
79
+ parser.separator "FLAGS"
80
+
81
+ parser.on('-c', '--config [FILE]', "Location of Stonith YAML config file (default #{@options[:config_path]})") do |path|
82
+ @options[:config_path] = Pathname.new(path)
83
+ end
84
+
85
+ parser.on_tail("-h", "--help", "Show this message") do
86
+ puts parser
87
+ exit
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,58 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Check < Abstract
5
+
6
+ def self.command
7
+ 'check'
8
+ end
9
+
10
+ def self.banner
11
+ "Check the remote master haproxy"
12
+ end
13
+
14
+ def invoke
15
+ database.with_data do |data|
16
+ abort_if_master(data.hostname)
17
+
18
+ history << :check
19
+
20
+ begin
21
+ open("http://#{data.hostname}#{config.monitor_path}").read
22
+ rescue
23
+ bad_check(data.key, data.instance_id)
24
+ else
25
+ good_check(data.key)
26
+ end
27
+
28
+ exit
29
+ end
30
+
31
+ $stderr.puts "No master to check!"
32
+ end
33
+
34
+ def abort_if_master(hostname)
35
+ return if config.meta_data_hostname != hostname
36
+ abort "This instance is master! Can't check self."
37
+ end
38
+
39
+ def good_check(key)
40
+ check_recorder.good_check!(key)
41
+ end
42
+
43
+ def bad_check(key, instance_id)
44
+ check_recorder.bad_check!(key)
45
+
46
+ if check_recorder.limit_exceeded?
47
+ Stonith.logger.info "Invoking takeover of instance #{instance_id}"
48
+ exec "#{SCRIPT_NAME} takeover#{command_options} --instance #{instance_id}"
49
+ end
50
+ end
51
+
52
+ def check_recorder
53
+ @check_recorder ||= CheckRecorder.new(config.checks_path)
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,113 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Claim < Abstract
5
+
6
+ def self.command
7
+ 'claim'
8
+ end
9
+
10
+ def self.banner
11
+ 'Claim the master record in the database'
12
+ end
13
+
14
+ def claim_path
15
+ @claim_path ||= config.claim_path
16
+ end
17
+
18
+ def invoke
19
+ claim_path.exist?? invoke_with_claim_file : invoke_without_claim_file
20
+ end
21
+
22
+ def invoke_with_claim_file
23
+ abort_on_existing_data
24
+
25
+ attempts = (claim_path.read || 0).to_i.succ
26
+
27
+ if @force || attempts >= 6
28
+ reclaim!
29
+ else
30
+ persist_reclaim_attempt(attempts)
31
+ end
32
+ end
33
+
34
+ def abort_on_existing_data
35
+ return unless master_hostname
36
+
37
+ if config.meta_data_hostname == master_hostname
38
+ abort "Already claimed, not claiming."
39
+ else
40
+ claim_path.delete
41
+ abort "#{master_hostname} is master, not claiming.\nRemoving stale claim file."
42
+ end
43
+ end
44
+
45
+ def reclaim!
46
+ Stonith.logger.info @force ? "Reclaim forced." : "Reclaim after 6 consecutive checks."
47
+ claim_path.delete
48
+ invoke_without_claim_file
49
+ end
50
+
51
+ def persist_reclaim_attempt(attempts)
52
+ claim_path.open("w") { |f| f << attempts }
53
+
54
+ abort <<-ERROR
55
+ Master unknown and claim file exists, not claiming.
56
+ Reclaim at 6th attempt or use --force
57
+
58
+ Failed attempts: #{attempts}
59
+ ERROR
60
+ end
61
+
62
+ def invoke_without_claim_file
63
+ @force ? database.reset : confirm_master!
64
+
65
+ data = Data.new(config.meta_data_hostname, config.meta_data_id, config.meta_data_ip)
66
+ database.set data
67
+ claim_path.open('w') {}
68
+
69
+ Stonith.logger.info "Claimed with data: #{data}"
70
+ history << :claim
71
+ end
72
+
73
+ def confirm_master!
74
+ # TODO: Only claim if the master has an IP?
75
+ # if fog.servers.get(config.meta_data_id).addresses.empty?
76
+ # abort "No IP, not claiming." + (@force ? "\nIgnoring --force" : "")
77
+ # end
78
+
79
+ confirm_master_with_database
80
+ confirm_master_with_config
81
+ end
82
+
83
+ def confirm_master_with_database
84
+ return unless master_hostname
85
+
86
+ if config.meta_data_hostname == master_hostname
87
+ claim_path.open('w') {}
88
+ abort "Already claimed, not claiming. Touching claim file."
89
+ else
90
+ abort "#{master_hostname} is master, not claiming."
91
+ end
92
+ end
93
+
94
+ def confirm_master_with_config
95
+ if config.meta_data_hostname != config.monitor_host
96
+ abort "#{config.monitor_host} is master, not claiming."
97
+ end
98
+ end
99
+
100
+ def master_hostname
101
+ @master_hostname ||= database.with_data { |data| data.hostname }
102
+ end
103
+
104
+ def parser
105
+ super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
106
+ @force = f
107
+ end
108
+ super
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,26 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Commands < Abstract
5
+ def self.command
6
+ 'commands'
7
+ end
8
+
9
+ def self.banner
10
+ "Print all the available commands"
11
+ end
12
+
13
+ def invoke
14
+ puts <<-MESSAGE
15
+ Available stonith commands:
16
+
17
+ #{Stonith::Commands.formatted_command_list}
18
+
19
+ Example:
20
+ $ stonith status
21
+ MESSAGE
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Cron < Abstract
5
+ def self.command
6
+ 'cron'
7
+ end
8
+
9
+ def self.banner
10
+ "Run the Stonith cron-like process that invokes other stonith commands periodically"
11
+ end
12
+
13
+ def invoke
14
+ heartbeat_loop do |beat|
15
+ unless_stopped { run_commands if beat.zero? }
16
+ sleep 1
17
+ end
18
+ end
19
+
20
+ def heartbeat_loop(&block)
21
+ loop { config.heartbeat.ceil.times(&block) }
22
+ end
23
+
24
+ def unless_stopped
25
+ if config.stop_path.exist?
26
+ history << :stop
27
+ else
28
+ yield
29
+ end
30
+ end
31
+
32
+ def run_commands
33
+ %w[claim notify check].each do |cmd|
34
+ system("stonith #{cmd}#{command_options}")
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,16 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Help < Abstract
5
+ def self.banner
6
+ "A helpful message :D"
7
+ end
8
+
9
+ def invoke
10
+ puts "Stonith is a set of takeover & monitoring scripts for Engine Yard clusters.\n\n"
11
+ super
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class NotFound < Abstract
5
+ def invoke
6
+ abort "Command not found.\n#{parser}"
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end