ey_stonith 0.1.4 → 0.1.5.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ey-monitor +2 -5
- data/bin/{ey-monitor-reset → stonith} +1 -1
- data/bin/{ey-monitor-stop → stonith-check} +1 -1
- data/bin/stonith-claim +5 -0
- data/bin/{ey-monitor-resume → stonith-cron} +1 -1
- data/bin/stonith-notify +5 -0
- data/bin/stonith-reset +5 -0
- data/bin/stonith-resume +5 -0
- data/bin/stonith-status +5 -0
- data/bin/{ey-monitor-status → stonith-stop} +1 -1
- data/bin/stonith-takeover +5 -0
- data/lib/ey_stonith/address_stealer.rb +1 -6
- data/lib/ey_stonith/awsm_notifier.rb +28 -29
- data/lib/ey_stonith/check_recorder.rb +19 -17
- data/lib/ey_stonith/commands/abstract.rb +94 -0
- data/lib/ey_stonith/commands/check.rb +58 -0
- data/lib/ey_stonith/commands/claim.rb +113 -0
- data/lib/ey_stonith/commands/commands.rb +26 -0
- data/lib/ey_stonith/commands/cron.rb +40 -0
- data/lib/ey_stonith/commands/help.rb +16 -0
- data/lib/ey_stonith/commands/not_found.rb +11 -0
- data/lib/ey_stonith/commands/notify.rb +85 -0
- data/lib/ey_stonith/commands/reset.rb +21 -0
- data/lib/ey_stonith/commands/resume.rb +19 -0
- data/lib/ey_stonith/commands/status.rb +23 -0
- data/lib/ey_stonith/commands/stop.rb +21 -0
- data/lib/ey_stonith/commands/takeover.rb +106 -0
- data/lib/ey_stonith/commands.rb +40 -0
- data/lib/ey_stonith/config.rb +107 -14
- data/lib/ey_stonith/data.rb +5 -1
- data/lib/ey_stonith/database.rb +28 -6
- data/lib/ey_stonith/history.rb +1 -1
- data/lib/ey_stonith.rb +2 -8
- metadata +57 -61
- data/lib/ey_stonith/abstract_master.rb +0 -15
- data/lib/ey_stonith/box.rb +0 -61
- data/lib/ey_stonith/cli.rb +0 -138
- data/lib/ey_stonith/local_master.rb +0 -28
- data/lib/ey_stonith/master.rb +0 -37
- data/lib/ey_stonith/meta_data.rb +0 -11
- data/lib/ey_stonith/slave.rb +0 -41
@@ -0,0 +1,85 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Notify < Abstract
|
5
|
+
def self.command
|
6
|
+
'notify'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
'Notify the provisioning server about a takeover'
|
11
|
+
end
|
12
|
+
|
13
|
+
def notify_path
|
14
|
+
@notify_path ||= config.notify_path
|
15
|
+
end
|
16
|
+
|
17
|
+
def invoke
|
18
|
+
abort_if_unintentional
|
19
|
+
|
20
|
+
notify_path.delete if notify_path.exist?
|
21
|
+
|
22
|
+
database.with_data do |data|
|
23
|
+
abort_if_master(data.hostname)
|
24
|
+
notify!
|
25
|
+
end
|
26
|
+
|
27
|
+
abort_no_data
|
28
|
+
end
|
29
|
+
|
30
|
+
def abort_if_unintentional
|
31
|
+
return if @force || notify_path.exist?
|
32
|
+
|
33
|
+
abort <<-ERROR
|
34
|
+
Cannot notify, #{notify_path} does not exist.
|
35
|
+
If you want to (destructively) notify AWSM that this instance is master, call with --force.
|
36
|
+
ERROR
|
37
|
+
end
|
38
|
+
|
39
|
+
def abort_if_master(hostname)
|
40
|
+
if config.meta_data_hostname != hostname
|
41
|
+
Stonith.logger.error "Cannot notify, I am not master. Master is #{hostname}. Cancelling notify."
|
42
|
+
abort "Cannot notify, I am not master."
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def abort_no_data
|
47
|
+
msg = "Cannot notify, there is no master in the database. Giving up."
|
48
|
+
Stonith.logger.error msg
|
49
|
+
abort msg
|
50
|
+
end
|
51
|
+
|
52
|
+
def notify!
|
53
|
+
notifier = AwsmNotifier.new(config.meta_data_id, config.notify_uri, config.awsm_credentials)
|
54
|
+
notifier.notify(method(:success), method(:unreachable), method(:refused))
|
55
|
+
end
|
56
|
+
|
57
|
+
def success
|
58
|
+
Stonith.logger.info "AWSM notified!"
|
59
|
+
history << :notified
|
60
|
+
exit
|
61
|
+
end
|
62
|
+
|
63
|
+
def unreachable
|
64
|
+
notify_path.open('w') {}
|
65
|
+
msg = "Unable to reach AWSM for promotion to master."
|
66
|
+
Stonith.logger.warn msg
|
67
|
+
abort "#{msg}\nIf you're running this from the command line, you should run ey-monitor-notify again."
|
68
|
+
end
|
69
|
+
|
70
|
+
def refused(response_body)
|
71
|
+
msg = "Notify refused by endpoint. Giving up.\nResponse: #{response_body}"
|
72
|
+
Stonith.logger.error msg
|
73
|
+
abort msg
|
74
|
+
end
|
75
|
+
|
76
|
+
def parser
|
77
|
+
super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
|
78
|
+
@force = f
|
79
|
+
end
|
80
|
+
super
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Reset < Abstract
|
5
|
+
def self.command
|
6
|
+
'reset'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Reset the state of all stonith commands"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
database.reset
|
15
|
+
config.state_path.rmtree
|
16
|
+
Stonith.logger.info "All state and database reset!"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Resume < Abstract
|
5
|
+
def self.command
|
6
|
+
'resume'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Resume Stonith monitoring"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
config.stop_path.delete if config.stop_path.exist?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Status < Abstract
|
5
|
+
def self.command
|
6
|
+
'status'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"List the last one or two actions performed"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
puts status unless status.empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def status
|
18
|
+
@status ||= history.to_s
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Stop < Abstract
|
5
|
+
def self.command
|
6
|
+
'stop'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Stop Stonith monitoring"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
config.stop_path.open('w') {}
|
15
|
+
sleep(0.5) until history.last == "stop"
|
16
|
+
puts "takeover" if history.include?(:takeover)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module EY
|
4
|
+
module Stonith
|
5
|
+
module Commands
|
6
|
+
class Takeover < Abstract
|
7
|
+
def self.command
|
8
|
+
'takeover'
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.banner
|
12
|
+
'Cause a takeover of the specified master'
|
13
|
+
end
|
14
|
+
|
15
|
+
def invoke
|
16
|
+
abort_if_no_instance_id
|
17
|
+
abort_if_self
|
18
|
+
|
19
|
+
abort_if_takeover_lock
|
20
|
+
set_takeover_lock
|
21
|
+
|
22
|
+
database.with_locked_data do |data|
|
23
|
+
restore_data_on_fail(data)
|
24
|
+
|
25
|
+
if instance_id == data.instance_id
|
26
|
+
locked!(data.instance_id, data.ip)
|
27
|
+
else
|
28
|
+
relent!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def abort_if_no_instance_id
|
34
|
+
if !instance_id || instance_id == ""
|
35
|
+
abort "Please call with the instance_id of the master to takeover.\n\n#{parser}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def abort_if_self
|
40
|
+
if instance_id == config.meta_data_id
|
41
|
+
abort "Cannot takeover self!"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def abort_if_takeover_lock
|
46
|
+
if config.takeover_path.exist?
|
47
|
+
abort "Already attempting takeover!\n#{config.takeover_path.read}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def takeover_path
|
52
|
+
@takeover_path ||= config.takeover_path
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_takeover_lock
|
56
|
+
at_exit { takeover_path.delete if takeover_path.exist? }
|
57
|
+
takeover_path.open('w') { |f| f << "Takeover started at #{Time.now}" }
|
58
|
+
end
|
59
|
+
|
60
|
+
def restore_data_on_fail(data)
|
61
|
+
at_exit do
|
62
|
+
if database.set(data) # always replace the data if it wasn't set
|
63
|
+
Stonith.logger.error("Emergency replacement of redis data #{data.inspect} occurred.")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def locked!(instance_id, master_ip)
|
69
|
+
Stonith.logger.info "Locked! Taking over #{instance_id}."
|
70
|
+
history << :takeover
|
71
|
+
|
72
|
+
ip = steal_address(instance_id, master_ip)
|
73
|
+
data = Data.new(config.meta_data_hostname, config.meta_data_id, ip)
|
74
|
+
database.set data
|
75
|
+
|
76
|
+
exec "#{SCRIPT_NAME} notify --force#{command_options}"
|
77
|
+
end
|
78
|
+
|
79
|
+
def relent!
|
80
|
+
history << :relent
|
81
|
+
msg = "Failed to grab lock, relenting."
|
82
|
+
Stonith.logger.info msg
|
83
|
+
abort msg
|
84
|
+
end
|
85
|
+
|
86
|
+
def steal_address(instance_id, ip)
|
87
|
+
Stonith.logger.info "Stealing IP #{ip} from #{instance_id}."
|
88
|
+
address = AddressStealer.new(instance_id, ip, config.fog_credentials)
|
89
|
+
address.associate(config.meta_data_id)
|
90
|
+
address.ip
|
91
|
+
end
|
92
|
+
|
93
|
+
def instance_id
|
94
|
+
@instance_id
|
95
|
+
end
|
96
|
+
|
97
|
+
def parser
|
98
|
+
super.on('-i', '--instance INSTANCE_ID', "Amazon Instance ID of the failing master") do |instance|
|
99
|
+
@instance_id = instance
|
100
|
+
end
|
101
|
+
super
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
|
5
|
+
dir = 'ey_stonith/commands/'
|
6
|
+
autoload :Abstract, dir + 'abstract'
|
7
|
+
autoload :NotFound, dir + 'not_found'
|
8
|
+
|
9
|
+
COMMANDS = Hash.new(:NotFound).merge({
|
10
|
+
'check' => :Check,
|
11
|
+
'claim' => :Claim,
|
12
|
+
'commands' => :Commands,
|
13
|
+
'cron' => :Cron,
|
14
|
+
'help' => :Help,
|
15
|
+
'notify' => :Notify,
|
16
|
+
'reset' => :Reset,
|
17
|
+
'resume' => :Resume,
|
18
|
+
'status' => :Status,
|
19
|
+
'stop' => :Stop,
|
20
|
+
'takeover' => :Takeover,
|
21
|
+
})
|
22
|
+
|
23
|
+
COMMANDS.each do |command, klass|
|
24
|
+
autoload klass, dir + command
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.invoke(argv)
|
28
|
+
klass = const_get(COMMANDS[argv.shift])
|
29
|
+
klass.new(argv).call
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.formatted_command_list
|
33
|
+
COMMANDS.sort { |(a,_),(b,_)| a <=> b }.map { |cmd, klass_name|
|
34
|
+
klass = const_get(klass_name)
|
35
|
+
" #{cmd.ljust(8)} #{klass.banner}"
|
36
|
+
}.join("\n")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/ey_stonith/config.rb
CHANGED
@@ -1,26 +1,119 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'json'
|
3
|
+
require 'open-uri'
|
3
4
|
|
4
5
|
module EY
|
5
6
|
module Stonith
|
6
|
-
class Config
|
7
|
-
|
8
|
-
|
7
|
+
class Config
|
8
|
+
class Error < StandardError
|
9
|
+
def initialize(path, message)
|
10
|
+
super("Config file #{path}: #{message}")
|
11
|
+
end
|
9
12
|
end
|
10
|
-
|
11
|
-
def cloud_credentials() YAML::load_file(ey_cloud) end
|
12
|
-
def notify_uri() "#{cloud_credentials[:api]}/api/promote_instance_to_master" end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
class FileNotFound < Error
|
15
|
+
def initialize(path)
|
16
|
+
super(path, "File not found.")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class RequiredSetting < Error
|
21
|
+
def initialize(path, key)
|
22
|
+
super(path, "Missing required setting #{key}.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
DEFAULT = {
|
27
|
+
'log' => '/var/log/stonith.log',
|
28
|
+
'state_dir' => '/var/run/stonith',
|
29
|
+
'heartbeat' => 10,
|
30
|
+
'notify_uri' => nil,
|
31
|
+
|
32
|
+
'monitor_host' => nil,
|
33
|
+
'monitor_path' => '/haproxy/monitor',
|
34
|
+
|
35
|
+
'redis_host' => nil,
|
36
|
+
'redis_port' => 6379,
|
37
|
+
'redis_key' => 'stonith',
|
38
|
+
'redis_db' => 14,
|
39
|
+
'redis_timeout' => 60 * 5,
|
40
|
+
|
41
|
+
'aws_secret_id' => nil,
|
42
|
+
'aws_secret_key' => nil,
|
43
|
+
}
|
44
|
+
|
45
|
+
def initialize(config_path)
|
46
|
+
path = Pathname.new(config_path)
|
47
|
+
raise FileNotFound.new(path) unless path.readable?
|
48
|
+
|
49
|
+
realpath = path.realpath
|
50
|
+
@data = DEFAULT.merge(YAML.load_file(realpath))
|
51
|
+
@path = realpath
|
52
|
+
end
|
53
|
+
|
54
|
+
attr_reader :path
|
55
|
+
|
56
|
+
def log_path() pathname(log) end
|
57
|
+
def state_path() ensure_exists(pathname(state_dir)) end
|
58
|
+
def stop_path() state_path + 'stop' end
|
59
|
+
def claim_path() state_path + 'claim' end
|
60
|
+
def checks_path() state_path + 'checks' end
|
61
|
+
def notify_path() state_path + 'notify' end
|
62
|
+
def history_path() state_path + 'history' end
|
63
|
+
def takeover_path() state_path + 'takeover' end
|
64
|
+
def notify_uri() URI.parse(method_missing('notify_uri')) end
|
65
|
+
|
66
|
+
def meta_data_hostname() @data['meta_data_hostname'] ||= meta_data('local-hostname') end
|
67
|
+
def meta_data_id() @data['meta_data_id'] ||= meta_data('instance-id') end
|
68
|
+
def meta_data_ip() @data['meta_data_ip'] || meta_data('public-ipv4') end # don't cache
|
69
|
+
|
70
|
+
def respond_to?(meth)
|
71
|
+
@data.key?(meth) || super
|
72
|
+
end
|
73
|
+
|
74
|
+
def awsm_credentials
|
75
|
+
{
|
76
|
+
'aws_secret_id' => aws_secret_id,
|
77
|
+
'aws_secret_key' => aws_secret_key,
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def fog_credentials
|
82
|
+
{
|
83
|
+
:aws_access_key_id => aws_secret_id,
|
84
|
+
:aws_secret_access_key => aws_secret_key,
|
85
|
+
}
|
86
|
+
end
|
20
87
|
|
21
88
|
private
|
22
|
-
|
23
|
-
def
|
89
|
+
|
90
|
+
def self.secret_hack
|
91
|
+
@secret_hack = true
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.pwned?
|
95
|
+
@secret_hack
|
96
|
+
end
|
97
|
+
|
98
|
+
def meta_data(key)
|
99
|
+
open("http://169.254.169.254/latest/meta-data/#{key}").read
|
100
|
+
end
|
101
|
+
|
102
|
+
def pathname(path) path && Pathname.new(path) end
|
103
|
+
|
104
|
+
def ensure_exists(path)
|
105
|
+
path.mkpath unless self.class.pwned?
|
106
|
+
path
|
107
|
+
end
|
108
|
+
|
109
|
+
def method_missing(meth, *args)
|
110
|
+
meth_s = meth.to_s
|
111
|
+
if respond_to?(meth_s)
|
112
|
+
@data[meth_s] || raise(RequiredSetting.new(path, meth_s))
|
113
|
+
else
|
114
|
+
super
|
115
|
+
end
|
116
|
+
end
|
24
117
|
end
|
25
118
|
end
|
26
119
|
end
|
data/lib/ey_stonith/data.rb
CHANGED
data/lib/ey_stonith/database.rb
CHANGED
@@ -23,9 +23,10 @@ module EY
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def set(data)
|
26
|
-
unless get
|
26
|
+
unless get # very small race condition (very)
|
27
27
|
redis.lpush(master_key, Marshal.dump(data))
|
28
28
|
@locked = false
|
29
|
+
true
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
@@ -34,22 +35,43 @@ module EY
|
|
34
35
|
end
|
35
36
|
|
36
37
|
private
|
37
|
-
|
38
|
+
|
38
39
|
def get
|
39
40
|
result = redis.lindex(master_key, 0) # index 0
|
40
41
|
result && Marshal.load(result)
|
41
42
|
end
|
42
43
|
|
44
|
+
# popping & locking
|
43
45
|
def locked_get
|
44
|
-
Marshal.load redis.blpop(master_key,
|
46
|
+
Marshal.load redis.blpop(master_key, @config.redis_timeout).last
|
45
47
|
end
|
46
48
|
|
47
49
|
def master_key
|
48
|
-
@config.redis_key
|
50
|
+
"#{@config.redis_key}:master"
|
49
51
|
end
|
50
|
-
|
52
|
+
|
51
53
|
def redis
|
52
|
-
@redis ||=
|
54
|
+
@redis ||= load_redis
|
55
|
+
end
|
56
|
+
|
57
|
+
def load_redis
|
58
|
+
redis = Redis.new(:host => @config.redis_host, :port => @config.redis_port, :db => @config.redis_db, :timeout => @config.redis_timeout)
|
59
|
+
check_redis_version redis.info[:redis_version]
|
60
|
+
redis
|
61
|
+
rescue Errno::ECONNREFUSED
|
62
|
+
abort "Unable to connect to redis"
|
63
|
+
end
|
64
|
+
|
65
|
+
def check_redis_version(version)
|
66
|
+
major, minor, patch = version.split('.').map { |num| num.to_i }
|
67
|
+
unless major > 1 ||
|
68
|
+
(major == 1 && minor > 3) ||
|
69
|
+
(major == 1 && minor == 3 && patch >= 1)
|
70
|
+
abort <<-ERROR
|
71
|
+
Redis server version [#{version}] is too old.
|
72
|
+
>= 1.3.1 required for blpop support.
|
73
|
+
ERROR
|
74
|
+
end
|
53
75
|
end
|
54
76
|
end
|
55
77
|
end
|
data/lib/ey_stonith/history.rb
CHANGED
data/lib/ey_stonith.rb
CHANGED
@@ -11,6 +11,7 @@ module EY
|
|
11
11
|
autoload :AddressStealer, 'ey_stonith/address_stealer'
|
12
12
|
autoload :Box, 'ey_stonith/box'
|
13
13
|
autoload :CheckRecorder, 'ey_stonith/check_recorder'
|
14
|
+
autoload :Commands, 'ey_stonith/commands'
|
14
15
|
autoload :CLI, 'ey_stonith/cli'
|
15
16
|
autoload :Config, 'ey_stonith/config'
|
16
17
|
autoload :Data, 'ey_stonith/data'
|
@@ -21,15 +22,8 @@ module EY
|
|
21
22
|
autoload :MetaData, 'ey_stonith/meta_data'
|
22
23
|
autoload :Slave, 'ey_stonith/slave'
|
23
24
|
|
24
|
-
def self.
|
25
|
+
def self.log_to(io) @@logger = Logger.new(io) end
|
25
26
|
def self.logger() @@logger end
|
26
|
-
self.logger = Logger.new(STDOUT)
|
27
|
-
logger.level = Logger::INFO
|
28
|
-
|
29
|
-
def self.meta_data() @@meta_data end
|
30
|
-
def self.meta_data=(meta) @@meta_data = meta end
|
31
|
-
def self.reset_meta_data() @@meta_data = MetaData end
|
32
|
-
reset_meta_data
|
33
27
|
end
|
34
28
|
end
|
35
29
|
|