ey_stonith 0.1.4 → 0.1.5.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ey-monitor +2 -5
- data/bin/{ey-monitor-reset → stonith} +1 -1
- data/bin/{ey-monitor-stop → stonith-check} +1 -1
- data/bin/stonith-claim +5 -0
- data/bin/{ey-monitor-resume → stonith-cron} +1 -1
- data/bin/stonith-notify +5 -0
- data/bin/stonith-reset +5 -0
- data/bin/stonith-resume +5 -0
- data/bin/stonith-status +5 -0
- data/bin/{ey-monitor-status → stonith-stop} +1 -1
- data/bin/stonith-takeover +5 -0
- data/lib/ey_stonith/address_stealer.rb +1 -6
- data/lib/ey_stonith/awsm_notifier.rb +28 -29
- data/lib/ey_stonith/check_recorder.rb +19 -17
- data/lib/ey_stonith/commands/abstract.rb +94 -0
- data/lib/ey_stonith/commands/check.rb +58 -0
- data/lib/ey_stonith/commands/claim.rb +113 -0
- data/lib/ey_stonith/commands/commands.rb +26 -0
- data/lib/ey_stonith/commands/cron.rb +40 -0
- data/lib/ey_stonith/commands/help.rb +16 -0
- data/lib/ey_stonith/commands/not_found.rb +11 -0
- data/lib/ey_stonith/commands/notify.rb +85 -0
- data/lib/ey_stonith/commands/reset.rb +21 -0
- data/lib/ey_stonith/commands/resume.rb +19 -0
- data/lib/ey_stonith/commands/status.rb +23 -0
- data/lib/ey_stonith/commands/stop.rb +21 -0
- data/lib/ey_stonith/commands/takeover.rb +106 -0
- data/lib/ey_stonith/commands.rb +40 -0
- data/lib/ey_stonith/config.rb +107 -14
- data/lib/ey_stonith/data.rb +5 -1
- data/lib/ey_stonith/database.rb +28 -6
- data/lib/ey_stonith/history.rb +1 -1
- data/lib/ey_stonith.rb +2 -8
- metadata +57 -61
- data/lib/ey_stonith/abstract_master.rb +0 -15
- data/lib/ey_stonith/box.rb +0 -61
- data/lib/ey_stonith/cli.rb +0 -138
- data/lib/ey_stonith/local_master.rb +0 -28
- data/lib/ey_stonith/master.rb +0 -37
- data/lib/ey_stonith/meta_data.rb +0 -11
- data/lib/ey_stonith/slave.rb +0 -41
@@ -0,0 +1,85 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Notify < Abstract
|
5
|
+
def self.command
|
6
|
+
'notify'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
'Notify the provisioning server about a takeover'
|
11
|
+
end
|
12
|
+
|
13
|
+
def notify_path
|
14
|
+
@notify_path ||= config.notify_path
|
15
|
+
end
|
16
|
+
|
17
|
+
def invoke
|
18
|
+
abort_if_unintentional
|
19
|
+
|
20
|
+
notify_path.delete if notify_path.exist?
|
21
|
+
|
22
|
+
database.with_data do |data|
|
23
|
+
abort_if_master(data.hostname)
|
24
|
+
notify!
|
25
|
+
end
|
26
|
+
|
27
|
+
abort_no_data
|
28
|
+
end
|
29
|
+
|
30
|
+
def abort_if_unintentional
|
31
|
+
return if @force || notify_path.exist?
|
32
|
+
|
33
|
+
abort <<-ERROR
|
34
|
+
Cannot notify, #{notify_path} does not exist.
|
35
|
+
If you want to (destructively) notify AWSM that this instance is master, call with --force.
|
36
|
+
ERROR
|
37
|
+
end
|
38
|
+
|
39
|
+
def abort_if_master(hostname)
|
40
|
+
if config.meta_data_hostname != hostname
|
41
|
+
Stonith.logger.error "Cannot notify, I am not master. Master is #{hostname}. Cancelling notify."
|
42
|
+
abort "Cannot notify, I am not master."
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def abort_no_data
|
47
|
+
msg = "Cannot notify, there is no master in the database. Giving up."
|
48
|
+
Stonith.logger.error msg
|
49
|
+
abort msg
|
50
|
+
end
|
51
|
+
|
52
|
+
def notify!
|
53
|
+
notifier = AwsmNotifier.new(config.meta_data_id, config.notify_uri, config.awsm_credentials)
|
54
|
+
notifier.notify(method(:success), method(:unreachable), method(:refused))
|
55
|
+
end
|
56
|
+
|
57
|
+
def success
|
58
|
+
Stonith.logger.info "AWSM notified!"
|
59
|
+
history << :notified
|
60
|
+
exit
|
61
|
+
end
|
62
|
+
|
63
|
+
def unreachable
|
64
|
+
notify_path.open('w') {}
|
65
|
+
msg = "Unable to reach AWSM for promotion to master."
|
66
|
+
Stonith.logger.warn msg
|
67
|
+
abort "#{msg}\nIf you're running this from the command line, you should run ey-monitor-notify again."
|
68
|
+
end
|
69
|
+
|
70
|
+
def refused(response_body)
|
71
|
+
msg = "Notify refused by endpoint. Giving up.\nResponse: #{response_body}"
|
72
|
+
Stonith.logger.error msg
|
73
|
+
abort msg
|
74
|
+
end
|
75
|
+
|
76
|
+
def parser
|
77
|
+
super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
|
78
|
+
@force = f
|
79
|
+
end
|
80
|
+
super
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Reset < Abstract
|
5
|
+
def self.command
|
6
|
+
'reset'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Reset the state of all stonith commands"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
database.reset
|
15
|
+
config.state_path.rmtree
|
16
|
+
Stonith.logger.info "All state and database reset!"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Resume < Abstract
|
5
|
+
def self.command
|
6
|
+
'resume'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Resume Stonith monitoring"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
config.stop_path.delete if config.stop_path.exist?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Status < Abstract
|
5
|
+
def self.command
|
6
|
+
'status'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"List the last one or two actions performed"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
puts status unless status.empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def status
|
18
|
+
@status ||= history.to_s
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Stop < Abstract
|
5
|
+
def self.command
|
6
|
+
'stop'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Stop Stonith monitoring"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
config.stop_path.open('w') {}
|
15
|
+
sleep(0.5) until history.last == "stop"
|
16
|
+
puts "takeover" if history.include?(:takeover)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module EY
|
4
|
+
module Stonith
|
5
|
+
module Commands
|
6
|
+
class Takeover < Abstract
|
7
|
+
def self.command
|
8
|
+
'takeover'
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.banner
|
12
|
+
'Cause a takeover of the specified master'
|
13
|
+
end
|
14
|
+
|
15
|
+
def invoke
|
16
|
+
abort_if_no_instance_id
|
17
|
+
abort_if_self
|
18
|
+
|
19
|
+
abort_if_takeover_lock
|
20
|
+
set_takeover_lock
|
21
|
+
|
22
|
+
database.with_locked_data do |data|
|
23
|
+
restore_data_on_fail(data)
|
24
|
+
|
25
|
+
if instance_id == data.instance_id
|
26
|
+
locked!(data.instance_id, data.ip)
|
27
|
+
else
|
28
|
+
relent!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def abort_if_no_instance_id
|
34
|
+
if !instance_id || instance_id == ""
|
35
|
+
abort "Please call with the instance_id of the master to takeover.\n\n#{parser}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def abort_if_self
|
40
|
+
if instance_id == config.meta_data_id
|
41
|
+
abort "Cannot takeover self!"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def abort_if_takeover_lock
|
46
|
+
if config.takeover_path.exist?
|
47
|
+
abort "Already attempting takeover!\n#{config.takeover_path.read}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def takeover_path
|
52
|
+
@takeover_path ||= config.takeover_path
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_takeover_lock
|
56
|
+
at_exit { takeover_path.delete if takeover_path.exist? }
|
57
|
+
takeover_path.open('w') { |f| f << "Takeover started at #{Time.now}" }
|
58
|
+
end
|
59
|
+
|
60
|
+
def restore_data_on_fail(data)
|
61
|
+
at_exit do
|
62
|
+
if database.set(data) # always replace the data if it wasn't set
|
63
|
+
Stonith.logger.error("Emergency replacement of redis data #{data.inspect} occurred.")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def locked!(instance_id, master_ip)
|
69
|
+
Stonith.logger.info "Locked! Taking over #{instance_id}."
|
70
|
+
history << :takeover
|
71
|
+
|
72
|
+
ip = steal_address(instance_id, master_ip)
|
73
|
+
data = Data.new(config.meta_data_hostname, config.meta_data_id, ip)
|
74
|
+
database.set data
|
75
|
+
|
76
|
+
exec "#{SCRIPT_NAME} notify --force#{command_options}"
|
77
|
+
end
|
78
|
+
|
79
|
+
def relent!
|
80
|
+
history << :relent
|
81
|
+
msg = "Failed to grab lock, relenting."
|
82
|
+
Stonith.logger.info msg
|
83
|
+
abort msg
|
84
|
+
end
|
85
|
+
|
86
|
+
def steal_address(instance_id, ip)
|
87
|
+
Stonith.logger.info "Stealing IP #{ip} from #{instance_id}."
|
88
|
+
address = AddressStealer.new(instance_id, ip, config.fog_credentials)
|
89
|
+
address.associate(config.meta_data_id)
|
90
|
+
address.ip
|
91
|
+
end
|
92
|
+
|
93
|
+
def instance_id
|
94
|
+
@instance_id
|
95
|
+
end
|
96
|
+
|
97
|
+
def parser
|
98
|
+
super.on('-i', '--instance INSTANCE_ID', "Amazon Instance ID of the failing master") do |instance|
|
99
|
+
@instance_id = instance
|
100
|
+
end
|
101
|
+
super
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
|
5
|
+
dir = 'ey_stonith/commands/'
|
6
|
+
autoload :Abstract, dir + 'abstract'
|
7
|
+
autoload :NotFound, dir + 'not_found'
|
8
|
+
|
9
|
+
COMMANDS = Hash.new(:NotFound).merge({
|
10
|
+
'check' => :Check,
|
11
|
+
'claim' => :Claim,
|
12
|
+
'commands' => :Commands,
|
13
|
+
'cron' => :Cron,
|
14
|
+
'help' => :Help,
|
15
|
+
'notify' => :Notify,
|
16
|
+
'reset' => :Reset,
|
17
|
+
'resume' => :Resume,
|
18
|
+
'status' => :Status,
|
19
|
+
'stop' => :Stop,
|
20
|
+
'takeover' => :Takeover,
|
21
|
+
})
|
22
|
+
|
23
|
+
COMMANDS.each do |command, klass|
|
24
|
+
autoload klass, dir + command
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.invoke(argv)
|
28
|
+
klass = const_get(COMMANDS[argv.shift])
|
29
|
+
klass.new(argv).call
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.formatted_command_list
|
33
|
+
COMMANDS.sort { |(a,_),(b,_)| a <=> b }.map { |cmd, klass_name|
|
34
|
+
klass = const_get(klass_name)
|
35
|
+
" #{cmd.ljust(8)} #{klass.banner}"
|
36
|
+
}.join("\n")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/ey_stonith/config.rb
CHANGED
@@ -1,26 +1,119 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'json'
|
3
|
+
require 'open-uri'
|
3
4
|
|
4
5
|
module EY
|
5
6
|
module Stonith
|
6
|
-
class Config
|
7
|
-
|
8
|
-
|
7
|
+
class Config
|
8
|
+
class Error < StandardError
|
9
|
+
def initialize(path, message)
|
10
|
+
super("Config file #{path}: #{message}")
|
11
|
+
end
|
9
12
|
end
|
10
|
-
|
11
|
-
def cloud_credentials() YAML::load_file(ey_cloud) end
|
12
|
-
def notify_uri() "#{cloud_credentials[:api]}/api/promote_instance_to_master" end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
class FileNotFound < Error
|
15
|
+
def initialize(path)
|
16
|
+
super(path, "File not found.")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class RequiredSetting < Error
|
21
|
+
def initialize(path, key)
|
22
|
+
super(path, "Missing required setting #{key}.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
DEFAULT = {
|
27
|
+
'log' => '/var/log/stonith.log',
|
28
|
+
'state_dir' => '/var/run/stonith',
|
29
|
+
'heartbeat' => 10,
|
30
|
+
'notify_uri' => nil,
|
31
|
+
|
32
|
+
'monitor_host' => nil,
|
33
|
+
'monitor_path' => '/haproxy/monitor',
|
34
|
+
|
35
|
+
'redis_host' => nil,
|
36
|
+
'redis_port' => 6379,
|
37
|
+
'redis_key' => 'stonith',
|
38
|
+
'redis_db' => 14,
|
39
|
+
'redis_timeout' => 60 * 5,
|
40
|
+
|
41
|
+
'aws_secret_id' => nil,
|
42
|
+
'aws_secret_key' => nil,
|
43
|
+
}
|
44
|
+
|
45
|
+
def initialize(config_path)
|
46
|
+
path = Pathname.new(config_path)
|
47
|
+
raise FileNotFound.new(path) unless path.readable?
|
48
|
+
|
49
|
+
realpath = path.realpath
|
50
|
+
@data = DEFAULT.merge(YAML.load_file(realpath))
|
51
|
+
@path = realpath
|
52
|
+
end
|
53
|
+
|
54
|
+
attr_reader :path
|
55
|
+
|
56
|
+
def log_path() pathname(log) end
|
57
|
+
def state_path() ensure_exists(pathname(state_dir)) end
|
58
|
+
def stop_path() state_path + 'stop' end
|
59
|
+
def claim_path() state_path + 'claim' end
|
60
|
+
def checks_path() state_path + 'checks' end
|
61
|
+
def notify_path() state_path + 'notify' end
|
62
|
+
def history_path() state_path + 'history' end
|
63
|
+
def takeover_path() state_path + 'takeover' end
|
64
|
+
def notify_uri() URI.parse(method_missing('notify_uri')) end
|
65
|
+
|
66
|
+
def meta_data_hostname() @data['meta_data_hostname'] ||= meta_data('local-hostname') end
|
67
|
+
def meta_data_id() @data['meta_data_id'] ||= meta_data('instance-id') end
|
68
|
+
def meta_data_ip() @data['meta_data_ip'] || meta_data('public-ipv4') end # don't cache
|
69
|
+
|
70
|
+
def respond_to?(meth)
|
71
|
+
@data.key?(meth) || super
|
72
|
+
end
|
73
|
+
|
74
|
+
def awsm_credentials
|
75
|
+
{
|
76
|
+
'aws_secret_id' => aws_secret_id,
|
77
|
+
'aws_secret_key' => aws_secret_key,
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def fog_credentials
|
82
|
+
{
|
83
|
+
:aws_access_key_id => aws_secret_id,
|
84
|
+
:aws_secret_access_key => aws_secret_key,
|
85
|
+
}
|
86
|
+
end
|
20
87
|
|
21
88
|
private
|
22
|
-
|
23
|
-
def
|
89
|
+
|
90
|
+
def self.secret_hack
|
91
|
+
@secret_hack = true
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.pwned?
|
95
|
+
@secret_hack
|
96
|
+
end
|
97
|
+
|
98
|
+
def meta_data(key)
|
99
|
+
open("http://169.254.169.254/latest/meta-data/#{key}").read
|
100
|
+
end
|
101
|
+
|
102
|
+
def pathname(path) path && Pathname.new(path) end
|
103
|
+
|
104
|
+
def ensure_exists(path)
|
105
|
+
path.mkpath unless self.class.pwned?
|
106
|
+
path
|
107
|
+
end
|
108
|
+
|
109
|
+
def method_missing(meth, *args)
|
110
|
+
meth_s = meth.to_s
|
111
|
+
if respond_to?(meth_s)
|
112
|
+
@data[meth_s] || raise(RequiredSetting.new(path, meth_s))
|
113
|
+
else
|
114
|
+
super
|
115
|
+
end
|
116
|
+
end
|
24
117
|
end
|
25
118
|
end
|
26
119
|
end
|
data/lib/ey_stonith/data.rb
CHANGED
data/lib/ey_stonith/database.rb
CHANGED
@@ -23,9 +23,10 @@ module EY
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def set(data)
|
26
|
-
unless get
|
26
|
+
unless get # very small race condition (very)
|
27
27
|
redis.lpush(master_key, Marshal.dump(data))
|
28
28
|
@locked = false
|
29
|
+
true
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
@@ -34,22 +35,43 @@ module EY
|
|
34
35
|
end
|
35
36
|
|
36
37
|
private
|
37
|
-
|
38
|
+
|
38
39
|
def get
|
39
40
|
result = redis.lindex(master_key, 0) # index 0
|
40
41
|
result && Marshal.load(result)
|
41
42
|
end
|
42
43
|
|
44
|
+
# popping & locking
|
43
45
|
def locked_get
|
44
|
-
Marshal.load redis.blpop(master_key,
|
46
|
+
Marshal.load redis.blpop(master_key, @config.redis_timeout).last
|
45
47
|
end
|
46
48
|
|
47
49
|
def master_key
|
48
|
-
@config.redis_key
|
50
|
+
"#{@config.redis_key}:master"
|
49
51
|
end
|
50
|
-
|
52
|
+
|
51
53
|
def redis
|
52
|
-
@redis ||=
|
54
|
+
@redis ||= load_redis
|
55
|
+
end
|
56
|
+
|
57
|
+
def load_redis
|
58
|
+
redis = Redis.new(:host => @config.redis_host, :port => @config.redis_port, :db => @config.redis_db, :timeout => @config.redis_timeout)
|
59
|
+
check_redis_version redis.info[:redis_version]
|
60
|
+
redis
|
61
|
+
rescue Errno::ECONNREFUSED
|
62
|
+
abort "Unable to connect to redis"
|
63
|
+
end
|
64
|
+
|
65
|
+
def check_redis_version(version)
|
66
|
+
major, minor, patch = version.split('.').map { |num| num.to_i }
|
67
|
+
unless major > 1 ||
|
68
|
+
(major == 1 && minor > 3) ||
|
69
|
+
(major == 1 && minor == 3 && patch >= 1)
|
70
|
+
abort <<-ERROR
|
71
|
+
Redis server version [#{version}] is too old.
|
72
|
+
>= 1.3.1 required for blpop support.
|
73
|
+
ERROR
|
74
|
+
end
|
53
75
|
end
|
54
76
|
end
|
55
77
|
end
|
data/lib/ey_stonith/history.rb
CHANGED
data/lib/ey_stonith.rb
CHANGED
@@ -11,6 +11,7 @@ module EY
|
|
11
11
|
autoload :AddressStealer, 'ey_stonith/address_stealer'
|
12
12
|
autoload :Box, 'ey_stonith/box'
|
13
13
|
autoload :CheckRecorder, 'ey_stonith/check_recorder'
|
14
|
+
autoload :Commands, 'ey_stonith/commands'
|
14
15
|
autoload :CLI, 'ey_stonith/cli'
|
15
16
|
autoload :Config, 'ey_stonith/config'
|
16
17
|
autoload :Data, 'ey_stonith/data'
|
@@ -21,15 +22,8 @@ module EY
|
|
21
22
|
autoload :MetaData, 'ey_stonith/meta_data'
|
22
23
|
autoload :Slave, 'ey_stonith/slave'
|
23
24
|
|
24
|
-
def self.
|
25
|
+
def self.log_to(io) @@logger = Logger.new(io) end
|
25
26
|
def self.logger() @@logger end
|
26
|
-
self.logger = Logger.new(STDOUT)
|
27
|
-
logger.level = Logger::INFO
|
28
|
-
|
29
|
-
def self.meta_data() @@meta_data end
|
30
|
-
def self.meta_data=(meta) @@meta_data = meta end
|
31
|
-
def self.reset_meta_data() @@meta_data = MetaData end
|
32
|
-
reset_meta_data
|
33
27
|
end
|
34
28
|
end
|
35
29
|
|