ey_stonith 0.1.4 → 0.1.5.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ey-monitor +2 -5
- data/bin/{ey-monitor-reset → stonith} +1 -1
- data/bin/{ey-monitor-stop → stonith-check} +1 -1
- data/bin/stonith-claim +5 -0
- data/bin/{ey-monitor-resume → stonith-cron} +1 -1
- data/bin/stonith-notify +5 -0
- data/bin/stonith-reset +5 -0
- data/bin/stonith-resume +5 -0
- data/bin/stonith-status +5 -0
- data/bin/{ey-monitor-status → stonith-stop} +1 -1
- data/bin/stonith-takeover +5 -0
- data/lib/ey_stonith/address_stealer.rb +1 -6
- data/lib/ey_stonith/awsm_notifier.rb +28 -29
- data/lib/ey_stonith/check_recorder.rb +19 -17
- data/lib/ey_stonith/commands/abstract.rb +94 -0
- data/lib/ey_stonith/commands/check.rb +58 -0
- data/lib/ey_stonith/commands/claim.rb +113 -0
- data/lib/ey_stonith/commands/commands.rb +26 -0
- data/lib/ey_stonith/commands/cron.rb +40 -0
- data/lib/ey_stonith/commands/help.rb +16 -0
- data/lib/ey_stonith/commands/not_found.rb +11 -0
- data/lib/ey_stonith/commands/notify.rb +85 -0
- data/lib/ey_stonith/commands/reset.rb +21 -0
- data/lib/ey_stonith/commands/resume.rb +19 -0
- data/lib/ey_stonith/commands/status.rb +23 -0
- data/lib/ey_stonith/commands/stop.rb +21 -0
- data/lib/ey_stonith/commands/takeover.rb +106 -0
- data/lib/ey_stonith/commands.rb +40 -0
- data/lib/ey_stonith/config.rb +107 -14
- data/lib/ey_stonith/data.rb +5 -1
- data/lib/ey_stonith/database.rb +28 -6
- data/lib/ey_stonith/history.rb +1 -1
- data/lib/ey_stonith.rb +2 -8
- metadata +57 -61
- data/lib/ey_stonith/abstract_master.rb +0 -15
- data/lib/ey_stonith/box.rb +0 -61
- data/lib/ey_stonith/cli.rb +0 -138
- data/lib/ey_stonith/local_master.rb +0 -28
- data/lib/ey_stonith/master.rb +0 -37
- data/lib/ey_stonith/meta_data.rb +0 -11
- data/lib/ey_stonith/slave.rb +0 -41
data/bin/ey-monitor
CHANGED
data/bin/stonith-claim
ADDED
data/bin/stonith-notify
ADDED
data/bin/stonith-reset
ADDED
data/bin/stonith-resume
ADDED
data/bin/stonith-status
ADDED
@@ -3,12 +3,7 @@ require 'fog'
|
|
3
3
|
module EY
|
4
4
|
module Stonith
|
5
5
|
class AddressStealer
|
6
|
-
def self.fog(credentials)
|
7
|
-
Fog::AWS::EC2.new(
|
8
|
-
:aws_access_key_id => credentials[:aws_secret_id],
|
9
|
-
:aws_secret_access_key => credentials[:aws_secret_key]
|
10
|
-
)
|
11
|
-
end
|
6
|
+
def self.fog(credentials) Fog::AWS::EC2.new(credentials) end
|
12
7
|
|
13
8
|
def initialize(server_id, ip, credentials)
|
14
9
|
@fog = self.class.fog(credentials)
|
@@ -1,49 +1,48 @@
|
|
1
|
-
require '
|
1
|
+
require 'json'
|
2
|
+
require 'net/http'
|
3
|
+
require 'timeout'
|
2
4
|
|
3
5
|
module EY
|
4
6
|
module Stonith
|
5
7
|
class AwsmNotifier
|
6
|
-
def initialize(instance_id,
|
7
|
-
@instance_id, @
|
8
|
-
call_api
|
8
|
+
def initialize(instance_id, uri, credentials)
|
9
|
+
@instance_id, @uri, @credentials = instance_id, uri, credentials
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
+
def notify(success, unreachable, refused)
|
13
|
+
Stonith.logger.info "Attempting to notify #@uri that I did a takeover."
|
12
14
|
|
13
|
-
|
14
|
-
EM.add_timer(@heartbeat) { call_api }
|
15
|
-
end
|
15
|
+
code, body = do_request
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
ok = (200...300).include?(http.response_header.status)
|
22
|
-
if ok && JSON.parse(http.response)['status'] == 'ok'
|
23
|
-
Stonith.logger.info("Notified awsm!")
|
17
|
+
case code
|
18
|
+
when 200...300
|
19
|
+
if JSON.parse(body)['status'] == 'ok'
|
20
|
+
success.call
|
24
21
|
else
|
25
|
-
|
22
|
+
refused.call body
|
26
23
|
end
|
27
|
-
|
28
|
-
|
24
|
+
else
|
25
|
+
unreachable.call
|
26
|
+
end
|
27
|
+
rescue
|
28
|
+
unreachable.call
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
|
-
|
31
|
+
private
|
32
|
+
|
33
|
+
def do_request
|
34
|
+
response = Timeout.timeout(10) do
|
35
|
+
Net::HTTP.new(@uri.host, @uri.port).request_post(@uri.path, process_payload(payload), headers)
|
36
|
+
end
|
37
|
+
[response.code.to_i, response.body]
|
33
38
|
end
|
34
39
|
|
35
|
-
def
|
36
|
-
|
37
|
-
def result.bytesize() size end unless result.respond_to?(:bytesize)
|
38
|
-
result
|
40
|
+
def headers
|
41
|
+
{"Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/json"}
|
39
42
|
end
|
40
43
|
|
41
44
|
def payload
|
42
|
-
|
43
|
-
'instance_id' => @instance_id,
|
44
|
-
'aws_secret_id' => @opts[:aws_secret_id],
|
45
|
-
'aws_secret_key' => @opts[:aws_secret_key],
|
46
|
-
}
|
45
|
+
@credentials.merge('instance_id' => @instance_id)
|
47
46
|
end
|
48
47
|
|
49
48
|
# ripped from restclient so we can use eventmachine
|
@@ -3,50 +3,52 @@ module EY
|
|
3
3
|
class CheckRecorder
|
4
4
|
BAD_CHECK_MAX = 5
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
attr_reader :bad
|
7
|
+
|
8
|
+
def initialize(pathname)
|
9
|
+
@pathname = pathname
|
10
|
+
rehydrate
|
8
11
|
end
|
9
12
|
|
10
13
|
def bad_check!(key)
|
11
14
|
reset_on_key_change(key)
|
12
15
|
log_bad_check
|
13
16
|
@bad += 1 if @seen_good
|
17
|
+
save
|
14
18
|
end
|
15
19
|
|
16
20
|
def good_check!(key)
|
17
|
-
@key = key
|
18
|
-
|
19
|
-
@seen_good = true
|
20
|
-
end
|
21
|
-
|
22
|
-
def seen_good?
|
23
|
-
@seen_good
|
21
|
+
@key, @bad, @seen_good = key, 0, true
|
22
|
+
save
|
24
23
|
end
|
25
24
|
|
26
25
|
def limit_exceeded?
|
27
|
-
seen_good
|
26
|
+
@seen_good && @bad > BAD_CHECK_MAX
|
28
27
|
end
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
protected
|
30
|
+
|
31
|
+
def save
|
32
|
+
@pathname.open('w') { |f| f << Marshal.dump([@key, @bad, @seen_good]) }
|
32
33
|
end
|
33
34
|
|
34
|
-
|
35
|
+
def rehydrate
|
36
|
+
@key, @bad, @seen_good = Marshal.load(@pathname.read) if @pathname.readable?
|
37
|
+
end
|
35
38
|
|
36
39
|
def reset
|
37
|
-
@bad = 0
|
38
|
-
@seen_good = false
|
40
|
+
@key, @bad, @seen_good = nil, 0, false
|
39
41
|
end
|
40
42
|
|
41
43
|
def reset_on_key_change(key)
|
42
|
-
unless
|
44
|
+
unless @key == key
|
43
45
|
reset
|
44
46
|
@key = key
|
45
47
|
end
|
46
48
|
end
|
47
49
|
|
48
50
|
def log_bad_check
|
49
|
-
Stonith.logger.
|
51
|
+
Stonith.logger.warn("Bad check against #{@key}. Seen good? #{@seen_good}")
|
50
52
|
end
|
51
53
|
end
|
52
54
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module EY
|
5
|
+
module Stonith
|
6
|
+
module Commands
|
7
|
+
class Abstract
|
8
|
+
DEFAULT_CONFIG_PATH = Pathname.new("/etc/stonith.yml")
|
9
|
+
SCRIPT_NAME = 'stonith'
|
10
|
+
|
11
|
+
def self.command
|
12
|
+
'[COMMAND]'
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(argv)
|
16
|
+
@argv = argv
|
17
|
+
parse!
|
18
|
+
init_logger
|
19
|
+
end
|
20
|
+
|
21
|
+
# Load the logger without requiring the config file to be present.
|
22
|
+
def init_logger
|
23
|
+
Stonith.log_to begin
|
24
|
+
config.log_path
|
25
|
+
rescue Config::FileNotFound
|
26
|
+
$stderr
|
27
|
+
end
|
28
|
+
Stonith.logger.level = Logger::INFO
|
29
|
+
end
|
30
|
+
|
31
|
+
def config
|
32
|
+
@config ||= Config.new(@options[:config_path])
|
33
|
+
end
|
34
|
+
|
35
|
+
def history
|
36
|
+
@history ||= History.new(config.history_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
def database
|
40
|
+
@database ||= Database.new(config)
|
41
|
+
end
|
42
|
+
|
43
|
+
def call
|
44
|
+
invoke
|
45
|
+
rescue Config::Error => e
|
46
|
+
Stonith.logger.error e.message
|
47
|
+
abort "#{e.message}\n\n#{parser}"
|
48
|
+
rescue => e
|
49
|
+
error = "#{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
|
50
|
+
Stonith.logger.error error
|
51
|
+
abort error
|
52
|
+
end
|
53
|
+
|
54
|
+
def invoke
|
55
|
+
puts parser
|
56
|
+
end
|
57
|
+
|
58
|
+
def command_options
|
59
|
+
if @options[:config_path] != DEFAULT_CONFIG_PATH
|
60
|
+
" --config #{@options[:config_path]}"
|
61
|
+
else
|
62
|
+
""
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse!
|
67
|
+
parser.parse!(@argv)
|
68
|
+
end
|
69
|
+
|
70
|
+
def parser
|
71
|
+
@parser ||= OptionParser.new do |parser|
|
72
|
+
@options = {:config_path => DEFAULT_CONFIG_PATH}
|
73
|
+
|
74
|
+
parser.banner = "Usage: #{SCRIPT_NAME} #{self.class.command} [FLAGS]"
|
75
|
+
parser.separator ""
|
76
|
+
parser.separator "COMMANDS"
|
77
|
+
parser.separator Stonith::Commands.formatted_command_list
|
78
|
+
parser.separator ""
|
79
|
+
parser.separator "FLAGS"
|
80
|
+
|
81
|
+
parser.on('-c', '--config [FILE]', "Location of Stonith YAML config file (default #{@options[:config_path]})") do |path|
|
82
|
+
@options[:config_path] = Pathname.new(path)
|
83
|
+
end
|
84
|
+
|
85
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
86
|
+
puts parser
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Check < Abstract
|
5
|
+
|
6
|
+
def self.command
|
7
|
+
'check'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.banner
|
11
|
+
"Check the remote master haproxy"
|
12
|
+
end
|
13
|
+
|
14
|
+
def invoke
|
15
|
+
database.with_data do |data|
|
16
|
+
abort_if_master(data.hostname)
|
17
|
+
|
18
|
+
history << :check
|
19
|
+
|
20
|
+
begin
|
21
|
+
open("http://#{data.hostname}#{config.monitor_path}").read
|
22
|
+
rescue
|
23
|
+
bad_check(data.key, data.instance_id)
|
24
|
+
else
|
25
|
+
good_check(data.key)
|
26
|
+
end
|
27
|
+
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
|
31
|
+
$stderr.puts "No master to check!"
|
32
|
+
end
|
33
|
+
|
34
|
+
def abort_if_master(hostname)
|
35
|
+
return if config.meta_data_hostname != hostname
|
36
|
+
abort "This instance is master! Can't check self."
|
37
|
+
end
|
38
|
+
|
39
|
+
def good_check(key)
|
40
|
+
check_recorder.good_check!(key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def bad_check(key, instance_id)
|
44
|
+
check_recorder.bad_check!(key)
|
45
|
+
|
46
|
+
if check_recorder.limit_exceeded?
|
47
|
+
Stonith.logger.info "Invoking takeover of instance #{instance_id}"
|
48
|
+
exec "#{SCRIPT_NAME} takeover#{command_options} --instance #{instance_id}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def check_recorder
|
53
|
+
@check_recorder ||= CheckRecorder.new(config.checks_path)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Claim < Abstract
|
5
|
+
|
6
|
+
def self.command
|
7
|
+
'claim'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.banner
|
11
|
+
'Claim the master record in the database'
|
12
|
+
end
|
13
|
+
|
14
|
+
def claim_path
|
15
|
+
@claim_path ||= config.claim_path
|
16
|
+
end
|
17
|
+
|
18
|
+
def invoke
|
19
|
+
claim_path.exist?? invoke_with_claim_file : invoke_without_claim_file
|
20
|
+
end
|
21
|
+
|
22
|
+
def invoke_with_claim_file
|
23
|
+
abort_on_existing_data
|
24
|
+
|
25
|
+
attempts = (claim_path.read || 0).to_i.succ
|
26
|
+
|
27
|
+
if @force || attempts >= 6
|
28
|
+
reclaim!
|
29
|
+
else
|
30
|
+
persist_reclaim_attempt(attempts)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def abort_on_existing_data
|
35
|
+
return unless master_hostname
|
36
|
+
|
37
|
+
if config.meta_data_hostname == master_hostname
|
38
|
+
abort "Already claimed, not claiming."
|
39
|
+
else
|
40
|
+
claim_path.delete
|
41
|
+
abort "#{master_hostname} is master, not claiming.\nRemoving stale claim file."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def reclaim!
|
46
|
+
Stonith.logger.info @force ? "Reclaim forced." : "Reclaim after 6 consecutive checks."
|
47
|
+
claim_path.delete
|
48
|
+
invoke_without_claim_file
|
49
|
+
end
|
50
|
+
|
51
|
+
def persist_reclaim_attempt(attempts)
|
52
|
+
claim_path.open("w") { |f| f << attempts }
|
53
|
+
|
54
|
+
abort <<-ERROR
|
55
|
+
Master unknown and claim file exists, not claiming.
|
56
|
+
Reclaim at 6th attempt or use --force
|
57
|
+
|
58
|
+
Failed attempts: #{attempts}
|
59
|
+
ERROR
|
60
|
+
end
|
61
|
+
|
62
|
+
def invoke_without_claim_file
|
63
|
+
@force ? database.reset : confirm_master!
|
64
|
+
|
65
|
+
data = Data.new(config.meta_data_hostname, config.meta_data_id, config.meta_data_ip)
|
66
|
+
database.set data
|
67
|
+
claim_path.open('w') {}
|
68
|
+
|
69
|
+
Stonith.logger.info "Claimed with data: #{data}"
|
70
|
+
history << :claim
|
71
|
+
end
|
72
|
+
|
73
|
+
def confirm_master!
|
74
|
+
# TODO: Only claim if the master has an IP?
|
75
|
+
# if fog.servers.get(config.meta_data_id).addresses.empty?
|
76
|
+
# abort "No IP, not claiming." + (@force ? "\nIgnoring --force" : "")
|
77
|
+
# end
|
78
|
+
|
79
|
+
confirm_master_with_database
|
80
|
+
confirm_master_with_config
|
81
|
+
end
|
82
|
+
|
83
|
+
def confirm_master_with_database
|
84
|
+
return unless master_hostname
|
85
|
+
|
86
|
+
if config.meta_data_hostname == master_hostname
|
87
|
+
claim_path.open('w') {}
|
88
|
+
abort "Already claimed, not claiming. Touching claim file."
|
89
|
+
else
|
90
|
+
abort "#{master_hostname} is master, not claiming."
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def confirm_master_with_config
|
95
|
+
if config.meta_data_hostname != config.monitor_host
|
96
|
+
abort "#{config.monitor_host} is master, not claiming."
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def master_hostname
|
101
|
+
@master_hostname ||= database.with_data { |data| data.hostname }
|
102
|
+
end
|
103
|
+
|
104
|
+
def parser
|
105
|
+
super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
|
106
|
+
@force = f
|
107
|
+
end
|
108
|
+
super
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Commands < Abstract
|
5
|
+
def self.command
|
6
|
+
'commands'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Print all the available commands"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
puts <<-MESSAGE
|
15
|
+
Available stonith commands:
|
16
|
+
|
17
|
+
#{Stonith::Commands.formatted_command_list}
|
18
|
+
|
19
|
+
Example:
|
20
|
+
$ stonith status
|
21
|
+
MESSAGE
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Cron < Abstract
|
5
|
+
def self.command
|
6
|
+
'cron'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Run the Stonith cron-like process that invokes other stonith commands periodically"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
heartbeat_loop do |beat|
|
15
|
+
unless_stopped { run_commands if beat.zero? }
|
16
|
+
sleep 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def heartbeat_loop(&block)
|
21
|
+
loop { config.heartbeat.ceil.times(&block) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def unless_stopped
|
25
|
+
if config.stop_path.exist?
|
26
|
+
history << :stop
|
27
|
+
else
|
28
|
+
yield
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def run_commands
|
33
|
+
%w[claim notify check].each do |cmd|
|
34
|
+
system("stonith #{cmd}#{command_options}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Help < Abstract
|
5
|
+
def self.banner
|
6
|
+
"A helpful message :D"
|
7
|
+
end
|
8
|
+
|
9
|
+
def invoke
|
10
|
+
puts "Stonith is a set of takeover & monitoring scripts for Engine Yard clusters.\n\n"
|
11
|
+
super
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|