ey_stonith 0.1.4 → 0.1.5.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ey-monitor +2 -5
- data/bin/{ey-monitor-reset → stonith} +1 -1
- data/bin/{ey-monitor-stop → stonith-check} +1 -1
- data/bin/stonith-claim +5 -0
- data/bin/{ey-monitor-resume → stonith-cron} +1 -1
- data/bin/stonith-notify +5 -0
- data/bin/stonith-reset +5 -0
- data/bin/stonith-resume +5 -0
- data/bin/stonith-status +5 -0
- data/bin/{ey-monitor-status → stonith-stop} +1 -1
- data/bin/stonith-takeover +5 -0
- data/lib/ey_stonith/address_stealer.rb +1 -6
- data/lib/ey_stonith/awsm_notifier.rb +28 -29
- data/lib/ey_stonith/check_recorder.rb +19 -17
- data/lib/ey_stonith/commands/abstract.rb +94 -0
- data/lib/ey_stonith/commands/check.rb +58 -0
- data/lib/ey_stonith/commands/claim.rb +113 -0
- data/lib/ey_stonith/commands/commands.rb +26 -0
- data/lib/ey_stonith/commands/cron.rb +40 -0
- data/lib/ey_stonith/commands/help.rb +16 -0
- data/lib/ey_stonith/commands/not_found.rb +11 -0
- data/lib/ey_stonith/commands/notify.rb +85 -0
- data/lib/ey_stonith/commands/reset.rb +21 -0
- data/lib/ey_stonith/commands/resume.rb +19 -0
- data/lib/ey_stonith/commands/status.rb +23 -0
- data/lib/ey_stonith/commands/stop.rb +21 -0
- data/lib/ey_stonith/commands/takeover.rb +106 -0
- data/lib/ey_stonith/commands.rb +40 -0
- data/lib/ey_stonith/config.rb +107 -14
- data/lib/ey_stonith/data.rb +5 -1
- data/lib/ey_stonith/database.rb +28 -6
- data/lib/ey_stonith/history.rb +1 -1
- data/lib/ey_stonith.rb +2 -8
- metadata +57 -61
- data/lib/ey_stonith/abstract_master.rb +0 -15
- data/lib/ey_stonith/box.rb +0 -61
- data/lib/ey_stonith/cli.rb +0 -138
- data/lib/ey_stonith/local_master.rb +0 -28
- data/lib/ey_stonith/master.rb +0 -37
- data/lib/ey_stonith/meta_data.rb +0 -11
- data/lib/ey_stonith/slave.rb +0 -41
data/bin/ey-monitor
CHANGED
data/bin/stonith-claim
ADDED
data/bin/stonith-notify
ADDED
data/bin/stonith-reset
ADDED
data/bin/stonith-resume
ADDED
data/bin/stonith-status
ADDED
@@ -3,12 +3,7 @@ require 'fog'
|
|
3
3
|
module EY
|
4
4
|
module Stonith
|
5
5
|
class AddressStealer
|
6
|
-
def self.fog(credentials)
|
7
|
-
Fog::AWS::EC2.new(
|
8
|
-
:aws_access_key_id => credentials[:aws_secret_id],
|
9
|
-
:aws_secret_access_key => credentials[:aws_secret_key]
|
10
|
-
)
|
11
|
-
end
|
6
|
+
def self.fog(credentials) Fog::AWS::EC2.new(credentials) end
|
12
7
|
|
13
8
|
def initialize(server_id, ip, credentials)
|
14
9
|
@fog = self.class.fog(credentials)
|
@@ -1,49 +1,48 @@
|
|
1
|
-
require '
|
1
|
+
require 'json'
|
2
|
+
require 'net/http'
|
3
|
+
require 'timeout'
|
2
4
|
|
3
5
|
module EY
|
4
6
|
module Stonith
|
5
7
|
class AwsmNotifier
|
6
|
-
def initialize(instance_id,
|
7
|
-
@instance_id, @
|
8
|
-
call_api
|
8
|
+
def initialize(instance_id, uri, credentials)
|
9
|
+
@instance_id, @uri, @credentials = instance_id, uri, credentials
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
+
def notify(success, unreachable, refused)
|
13
|
+
Stonith.logger.info "Attempting to notify #@uri that I did a takeover."
|
12
14
|
|
13
|
-
|
14
|
-
EM.add_timer(@heartbeat) { call_api }
|
15
|
-
end
|
15
|
+
code, body = do_request
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
ok = (200...300).include?(http.response_header.status)
|
22
|
-
if ok && JSON.parse(http.response)['status'] == 'ok'
|
23
|
-
Stonith.logger.info("Notified awsm!")
|
17
|
+
case code
|
18
|
+
when 200...300
|
19
|
+
if JSON.parse(body)['status'] == 'ok'
|
20
|
+
success.call
|
24
21
|
else
|
25
|
-
|
22
|
+
refused.call body
|
26
23
|
end
|
27
|
-
|
28
|
-
|
24
|
+
else
|
25
|
+
unreachable.call
|
26
|
+
end
|
27
|
+
rescue
|
28
|
+
unreachable.call
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
|
-
|
31
|
+
private
|
32
|
+
|
33
|
+
def do_request
|
34
|
+
response = Timeout.timeout(10) do
|
35
|
+
Net::HTTP.new(@uri.host, @uri.port).request_post(@uri.path, process_payload(payload), headers)
|
36
|
+
end
|
37
|
+
[response.code.to_i, response.body]
|
33
38
|
end
|
34
39
|
|
35
|
-
def
|
36
|
-
|
37
|
-
def result.bytesize() size end unless result.respond_to?(:bytesize)
|
38
|
-
result
|
40
|
+
def headers
|
41
|
+
{"Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/json"}
|
39
42
|
end
|
40
43
|
|
41
44
|
def payload
|
42
|
-
|
43
|
-
'instance_id' => @instance_id,
|
44
|
-
'aws_secret_id' => @opts[:aws_secret_id],
|
45
|
-
'aws_secret_key' => @opts[:aws_secret_key],
|
46
|
-
}
|
45
|
+
@credentials.merge('instance_id' => @instance_id)
|
47
46
|
end
|
48
47
|
|
49
48
|
# ripped from restclient so we can use eventmachine
|
@@ -3,50 +3,52 @@ module EY
|
|
3
3
|
class CheckRecorder
|
4
4
|
BAD_CHECK_MAX = 5
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
attr_reader :bad
|
7
|
+
|
8
|
+
def initialize(pathname)
|
9
|
+
@pathname = pathname
|
10
|
+
rehydrate
|
8
11
|
end
|
9
12
|
|
10
13
|
def bad_check!(key)
|
11
14
|
reset_on_key_change(key)
|
12
15
|
log_bad_check
|
13
16
|
@bad += 1 if @seen_good
|
17
|
+
save
|
14
18
|
end
|
15
19
|
|
16
20
|
def good_check!(key)
|
17
|
-
@key = key
|
18
|
-
|
19
|
-
@seen_good = true
|
20
|
-
end
|
21
|
-
|
22
|
-
def seen_good?
|
23
|
-
@seen_good
|
21
|
+
@key, @bad, @seen_good = key, 0, true
|
22
|
+
save
|
24
23
|
end
|
25
24
|
|
26
25
|
def limit_exceeded?
|
27
|
-
seen_good
|
26
|
+
@seen_good && @bad > BAD_CHECK_MAX
|
28
27
|
end
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
protected
|
30
|
+
|
31
|
+
def save
|
32
|
+
@pathname.open('w') { |f| f << Marshal.dump([@key, @bad, @seen_good]) }
|
32
33
|
end
|
33
34
|
|
34
|
-
|
35
|
+
def rehydrate
|
36
|
+
@key, @bad, @seen_good = Marshal.load(@pathname.read) if @pathname.readable?
|
37
|
+
end
|
35
38
|
|
36
39
|
def reset
|
37
|
-
@bad = 0
|
38
|
-
@seen_good = false
|
40
|
+
@key, @bad, @seen_good = nil, 0, false
|
39
41
|
end
|
40
42
|
|
41
43
|
def reset_on_key_change(key)
|
42
|
-
unless
|
44
|
+
unless @key == key
|
43
45
|
reset
|
44
46
|
@key = key
|
45
47
|
end
|
46
48
|
end
|
47
49
|
|
48
50
|
def log_bad_check
|
49
|
-
Stonith.logger.
|
51
|
+
Stonith.logger.warn("Bad check against #{@key}. Seen good? #{@seen_good}")
|
50
52
|
end
|
51
53
|
end
|
52
54
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module EY
|
5
|
+
module Stonith
|
6
|
+
module Commands
|
7
|
+
class Abstract
|
8
|
+
DEFAULT_CONFIG_PATH = Pathname.new("/etc/stonith.yml")
|
9
|
+
SCRIPT_NAME = 'stonith'
|
10
|
+
|
11
|
+
def self.command
|
12
|
+
'[COMMAND]'
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(argv)
|
16
|
+
@argv = argv
|
17
|
+
parse!
|
18
|
+
init_logger
|
19
|
+
end
|
20
|
+
|
21
|
+
# Load the logger without requiring the config file to be present.
|
22
|
+
def init_logger
|
23
|
+
Stonith.log_to begin
|
24
|
+
config.log_path
|
25
|
+
rescue Config::FileNotFound
|
26
|
+
$stderr
|
27
|
+
end
|
28
|
+
Stonith.logger.level = Logger::INFO
|
29
|
+
end
|
30
|
+
|
31
|
+
def config
|
32
|
+
@config ||= Config.new(@options[:config_path])
|
33
|
+
end
|
34
|
+
|
35
|
+
def history
|
36
|
+
@history ||= History.new(config.history_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
def database
|
40
|
+
@database ||= Database.new(config)
|
41
|
+
end
|
42
|
+
|
43
|
+
def call
|
44
|
+
invoke
|
45
|
+
rescue Config::Error => e
|
46
|
+
Stonith.logger.error e.message
|
47
|
+
abort "#{e.message}\n\n#{parser}"
|
48
|
+
rescue => e
|
49
|
+
error = "#{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
|
50
|
+
Stonith.logger.error error
|
51
|
+
abort error
|
52
|
+
end
|
53
|
+
|
54
|
+
def invoke
|
55
|
+
puts parser
|
56
|
+
end
|
57
|
+
|
58
|
+
def command_options
|
59
|
+
if @options[:config_path] != DEFAULT_CONFIG_PATH
|
60
|
+
" --config #{@options[:config_path]}"
|
61
|
+
else
|
62
|
+
""
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse!
|
67
|
+
parser.parse!(@argv)
|
68
|
+
end
|
69
|
+
|
70
|
+
def parser
|
71
|
+
@parser ||= OptionParser.new do |parser|
|
72
|
+
@options = {:config_path => DEFAULT_CONFIG_PATH}
|
73
|
+
|
74
|
+
parser.banner = "Usage: #{SCRIPT_NAME} #{self.class.command} [FLAGS]"
|
75
|
+
parser.separator ""
|
76
|
+
parser.separator "COMMANDS"
|
77
|
+
parser.separator Stonith::Commands.formatted_command_list
|
78
|
+
parser.separator ""
|
79
|
+
parser.separator "FLAGS"
|
80
|
+
|
81
|
+
parser.on('-c', '--config [FILE]', "Location of Stonith YAML config file (default #{@options[:config_path]})") do |path|
|
82
|
+
@options[:config_path] = Pathname.new(path)
|
83
|
+
end
|
84
|
+
|
85
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
86
|
+
puts parser
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Check < Abstract
|
5
|
+
|
6
|
+
def self.command
|
7
|
+
'check'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.banner
|
11
|
+
"Check the remote master haproxy"
|
12
|
+
end
|
13
|
+
|
14
|
+
def invoke
|
15
|
+
database.with_data do |data|
|
16
|
+
abort_if_master(data.hostname)
|
17
|
+
|
18
|
+
history << :check
|
19
|
+
|
20
|
+
begin
|
21
|
+
open("http://#{data.hostname}#{config.monitor_path}").read
|
22
|
+
rescue
|
23
|
+
bad_check(data.key, data.instance_id)
|
24
|
+
else
|
25
|
+
good_check(data.key)
|
26
|
+
end
|
27
|
+
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
|
31
|
+
$stderr.puts "No master to check!"
|
32
|
+
end
|
33
|
+
|
34
|
+
def abort_if_master(hostname)
|
35
|
+
return if config.meta_data_hostname != hostname
|
36
|
+
abort "This instance is master! Can't check self."
|
37
|
+
end
|
38
|
+
|
39
|
+
def good_check(key)
|
40
|
+
check_recorder.good_check!(key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def bad_check(key, instance_id)
|
44
|
+
check_recorder.bad_check!(key)
|
45
|
+
|
46
|
+
if check_recorder.limit_exceeded?
|
47
|
+
Stonith.logger.info "Invoking takeover of instance #{instance_id}"
|
48
|
+
exec "#{SCRIPT_NAME} takeover#{command_options} --instance #{instance_id}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def check_recorder
|
53
|
+
@check_recorder ||= CheckRecorder.new(config.checks_path)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Claim < Abstract
|
5
|
+
|
6
|
+
def self.command
|
7
|
+
'claim'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.banner
|
11
|
+
'Claim the master record in the database'
|
12
|
+
end
|
13
|
+
|
14
|
+
def claim_path
|
15
|
+
@claim_path ||= config.claim_path
|
16
|
+
end
|
17
|
+
|
18
|
+
def invoke
|
19
|
+
claim_path.exist?? invoke_with_claim_file : invoke_without_claim_file
|
20
|
+
end
|
21
|
+
|
22
|
+
def invoke_with_claim_file
|
23
|
+
abort_on_existing_data
|
24
|
+
|
25
|
+
attempts = (claim_path.read || 0).to_i.succ
|
26
|
+
|
27
|
+
if @force || attempts >= 6
|
28
|
+
reclaim!
|
29
|
+
else
|
30
|
+
persist_reclaim_attempt(attempts)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def abort_on_existing_data
|
35
|
+
return unless master_hostname
|
36
|
+
|
37
|
+
if config.meta_data_hostname == master_hostname
|
38
|
+
abort "Already claimed, not claiming."
|
39
|
+
else
|
40
|
+
claim_path.delete
|
41
|
+
abort "#{master_hostname} is master, not claiming.\nRemoving stale claim file."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def reclaim!
|
46
|
+
Stonith.logger.info @force ? "Reclaim forced." : "Reclaim after 6 consecutive checks."
|
47
|
+
claim_path.delete
|
48
|
+
invoke_without_claim_file
|
49
|
+
end
|
50
|
+
|
51
|
+
def persist_reclaim_attempt(attempts)
|
52
|
+
claim_path.open("w") { |f| f << attempts }
|
53
|
+
|
54
|
+
abort <<-ERROR
|
55
|
+
Master unknown and claim file exists, not claiming.
|
56
|
+
Reclaim at 6th attempt or use --force
|
57
|
+
|
58
|
+
Failed attempts: #{attempts}
|
59
|
+
ERROR
|
60
|
+
end
|
61
|
+
|
62
|
+
def invoke_without_claim_file
|
63
|
+
@force ? database.reset : confirm_master!
|
64
|
+
|
65
|
+
data = Data.new(config.meta_data_hostname, config.meta_data_id, config.meta_data_ip)
|
66
|
+
database.set data
|
67
|
+
claim_path.open('w') {}
|
68
|
+
|
69
|
+
Stonith.logger.info "Claimed with data: #{data}"
|
70
|
+
history << :claim
|
71
|
+
end
|
72
|
+
|
73
|
+
def confirm_master!
|
74
|
+
# TODO: Only claim if the master has an IP?
|
75
|
+
# if fog.servers.get(config.meta_data_id).addresses.empty?
|
76
|
+
# abort "No IP, not claiming." + (@force ? "\nIgnoring --force" : "")
|
77
|
+
# end
|
78
|
+
|
79
|
+
confirm_master_with_database
|
80
|
+
confirm_master_with_config
|
81
|
+
end
|
82
|
+
|
83
|
+
def confirm_master_with_database
|
84
|
+
return unless master_hostname
|
85
|
+
|
86
|
+
if config.meta_data_hostname == master_hostname
|
87
|
+
claim_path.open('w') {}
|
88
|
+
abort "Already claimed, not claiming. Touching claim file."
|
89
|
+
else
|
90
|
+
abort "#{master_hostname} is master, not claiming."
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def confirm_master_with_config
|
95
|
+
if config.meta_data_hostname != config.monitor_host
|
96
|
+
abort "#{config.monitor_host} is master, not claiming."
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def master_hostname
|
101
|
+
@master_hostname ||= database.with_data { |data| data.hostname }
|
102
|
+
end
|
103
|
+
|
104
|
+
def parser
|
105
|
+
super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
|
106
|
+
@force = f
|
107
|
+
end
|
108
|
+
super
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Commands < Abstract
|
5
|
+
def self.command
|
6
|
+
'commands'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Print all the available commands"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
puts <<-MESSAGE
|
15
|
+
Available stonith commands:
|
16
|
+
|
17
|
+
#{Stonith::Commands.formatted_command_list}
|
18
|
+
|
19
|
+
Example:
|
20
|
+
$ stonith status
|
21
|
+
MESSAGE
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Cron < Abstract
|
5
|
+
def self.command
|
6
|
+
'cron'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.banner
|
10
|
+
"Run the Stonith cron-like process that invokes other stonith commands periodically"
|
11
|
+
end
|
12
|
+
|
13
|
+
def invoke
|
14
|
+
heartbeat_loop do |beat|
|
15
|
+
unless_stopped { run_commands if beat.zero? }
|
16
|
+
sleep 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def heartbeat_loop(&block)
|
21
|
+
loop { config.heartbeat.ceil.times(&block) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def unless_stopped
|
25
|
+
if config.stop_path.exist?
|
26
|
+
history << :stop
|
27
|
+
else
|
28
|
+
yield
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def run_commands
|
33
|
+
%w[claim notify check].each do |cmd|
|
34
|
+
system("stonith #{cmd}#{command_options}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module EY
|
2
|
+
module Stonith
|
3
|
+
module Commands
|
4
|
+
class Help < Abstract
|
5
|
+
def self.banner
|
6
|
+
"A helpful message :D"
|
7
|
+
end
|
8
|
+
|
9
|
+
def invoke
|
10
|
+
puts "Stonith is a set of takeover & monitoring scripts for Engine Yard clusters.\n\n"
|
11
|
+
super
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|