ey_stonith 0.1.4 → 0.1.5.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/bin/ey-monitor +2 -5
  2. data/bin/{ey-monitor-reset → stonith} +1 -1
  3. data/bin/{ey-monitor-stop → stonith-check} +1 -1
  4. data/bin/stonith-claim +5 -0
  5. data/bin/{ey-monitor-resume → stonith-cron} +1 -1
  6. data/bin/stonith-notify +5 -0
  7. data/bin/stonith-reset +5 -0
  8. data/bin/stonith-resume +5 -0
  9. data/bin/stonith-status +5 -0
  10. data/bin/{ey-monitor-status → stonith-stop} +1 -1
  11. data/bin/stonith-takeover +5 -0
  12. data/lib/ey_stonith/address_stealer.rb +1 -6
  13. data/lib/ey_stonith/awsm_notifier.rb +28 -29
  14. data/lib/ey_stonith/check_recorder.rb +19 -17
  15. data/lib/ey_stonith/commands/abstract.rb +94 -0
  16. data/lib/ey_stonith/commands/check.rb +58 -0
  17. data/lib/ey_stonith/commands/claim.rb +113 -0
  18. data/lib/ey_stonith/commands/commands.rb +26 -0
  19. data/lib/ey_stonith/commands/cron.rb +40 -0
  20. data/lib/ey_stonith/commands/help.rb +16 -0
  21. data/lib/ey_stonith/commands/not_found.rb +11 -0
  22. data/lib/ey_stonith/commands/notify.rb +85 -0
  23. data/lib/ey_stonith/commands/reset.rb +21 -0
  24. data/lib/ey_stonith/commands/resume.rb +19 -0
  25. data/lib/ey_stonith/commands/status.rb +23 -0
  26. data/lib/ey_stonith/commands/stop.rb +21 -0
  27. data/lib/ey_stonith/commands/takeover.rb +106 -0
  28. data/lib/ey_stonith/commands.rb +40 -0
  29. data/lib/ey_stonith/config.rb +107 -14
  30. data/lib/ey_stonith/data.rb +5 -1
  31. data/lib/ey_stonith/database.rb +28 -6
  32. data/lib/ey_stonith/history.rb +1 -1
  33. data/lib/ey_stonith.rb +2 -8
  34. metadata +57 -61
  35. data/lib/ey_stonith/abstract_master.rb +0 -15
  36. data/lib/ey_stonith/box.rb +0 -61
  37. data/lib/ey_stonith/cli.rb +0 -138
  38. data/lib/ey_stonith/local_master.rb +0 -28
  39. data/lib/ey_stonith/master.rb +0 -37
  40. data/lib/ey_stonith/meta_data.rb +0 -11
  41. data/lib/ey_stonith/slave.rb +0 -41
@@ -0,0 +1,85 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Notify < Abstract
5
+ def self.command
6
+ 'notify'
7
+ end
8
+
9
+ def self.banner
10
+ 'Notify the provisioning server about a takeover'
11
+ end
12
+
13
+ def notify_path
14
+ @notify_path ||= config.notify_path
15
+ end
16
+
17
+ def invoke
18
+ abort_if_unintentional
19
+
20
+ notify_path.delete if notify_path.exist?
21
+
22
+ database.with_data do |data|
23
+ abort_if_master(data.hostname)
24
+ notify!
25
+ end
26
+
27
+ abort_no_data
28
+ end
29
+
30
+ def abort_if_unintentional
31
+ return if @force || notify_path.exist?
32
+
33
+ abort <<-ERROR
34
+ Cannot notify, #{notify_path} does not exist.
35
+ If you want to (destructively) notify AWSM that this instance is master, call with --force.
36
+ ERROR
37
+ end
38
+
39
+ def abort_if_master(hostname)
40
+ if config.meta_data_hostname != hostname
41
+ Stonith.logger.error "Cannot notify, I am not master. Master is #{hostname}. Cancelling notify."
42
+ abort "Cannot notify, I am not master."
43
+ end
44
+ end
45
+
46
+ def abort_no_data
47
+ msg = "Cannot notify, there is no master in the database. Giving up."
48
+ Stonith.logger.error msg
49
+ abort msg
50
+ end
51
+
52
+ def notify!
53
+ notifier = AwsmNotifier.new(config.meta_data_id, config.notify_uri, config.awsm_credentials)
54
+ notifier.notify(method(:success), method(:unreachable), method(:refused))
55
+ end
56
+
57
+ def success
58
+ Stonith.logger.info "AWSM notified!"
59
+ history << :notified
60
+ exit
61
+ end
62
+
63
+ def unreachable
64
+ notify_path.open('w') {}
65
+ msg = "Unable to reach AWSM for promotion to master."
66
+ Stonith.logger.warn msg
67
+ abort "#{msg}\nIf you're running this from the command line, you should run ey-monitor-notify again."
68
+ end
69
+
70
+ def refused(response_body)
71
+ msg = "Notify refused by endpoint. Giving up.\nResponse: #{response_body}"
72
+ Stonith.logger.error msg
73
+ abort msg
74
+ end
75
+
76
+ def parser
77
+ super.on('-f', '--force', "Force the command (only applicable to claim currently)") do |f|
78
+ @force = f
79
+ end
80
+ super
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,21 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Reset < Abstract
5
+ def self.command
6
+ 'reset'
7
+ end
8
+
9
+ def self.banner
10
+ "Reset the state of all stonith commands"
11
+ end
12
+
13
+ def invoke
14
+ database.reset
15
+ config.state_path.rmtree
16
+ Stonith.logger.info "All state and database reset!"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Resume < Abstract
5
+ def self.command
6
+ 'resume'
7
+ end
8
+
9
+ def self.banner
10
+ "Resume Stonith monitoring"
11
+ end
12
+
13
+ def invoke
14
+ config.stop_path.delete if config.stop_path.exist?
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,23 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Status < Abstract
5
+ def self.command
6
+ 'status'
7
+ end
8
+
9
+ def self.banner
10
+ "List the last one or two actions performed"
11
+ end
12
+
13
+ def invoke
14
+ puts status unless status.empty?
15
+ end
16
+
17
+ def status
18
+ @status ||= history.to_s
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+ class Stop < Abstract
5
+ def self.command
6
+ 'stop'
7
+ end
8
+
9
+ def self.banner
10
+ "Stop Stonith monitoring"
11
+ end
12
+
13
+ def invoke
14
+ config.stop_path.open('w') {}
15
+ sleep(0.5) until history.last == "stop"
16
+ puts "takeover" if history.include?(:takeover)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,106 @@
1
+ require 'pathname'
2
+
3
+ module EY
4
+ module Stonith
5
+ module Commands
6
+ class Takeover < Abstract
7
+ def self.command
8
+ 'takeover'
9
+ end
10
+
11
+ def self.banner
12
+ 'Cause a takeover of the specified master'
13
+ end
14
+
15
+ def invoke
16
+ abort_if_no_instance_id
17
+ abort_if_self
18
+
19
+ abort_if_takeover_lock
20
+ set_takeover_lock
21
+
22
+ database.with_locked_data do |data|
23
+ restore_data_on_fail(data)
24
+
25
+ if instance_id == data.instance_id
26
+ locked!(data.instance_id, data.ip)
27
+ else
28
+ relent!
29
+ end
30
+ end
31
+ end
32
+
33
+ def abort_if_no_instance_id
34
+ if !instance_id || instance_id == ""
35
+ abort "Please call with the instance_id of the master to takeover.\n\n#{parser}"
36
+ end
37
+ end
38
+
39
+ def abort_if_self
40
+ if instance_id == config.meta_data_id
41
+ abort "Cannot takeover self!"
42
+ end
43
+ end
44
+
45
+ def abort_if_takeover_lock
46
+ if config.takeover_path.exist?
47
+ abort "Already attempting takeover!\n#{config.takeover_path.read}"
48
+ end
49
+ end
50
+
51
+ def takeover_path
52
+ @takeover_path ||= config.takeover_path
53
+ end
54
+
55
+ def set_takeover_lock
56
+ at_exit { takeover_path.delete if takeover_path.exist? }
57
+ takeover_path.open('w') { |f| f << "Takeover started at #{Time.now}" }
58
+ end
59
+
60
+ def restore_data_on_fail(data)
61
+ at_exit do
62
+ if database.set(data) # always replace the data if it wasn't set
63
+ Stonith.logger.error("Emergency replacement of redis data #{data.inspect} occurred.")
64
+ end
65
+ end
66
+ end
67
+
68
+ def locked!(instance_id, master_ip)
69
+ Stonith.logger.info "Locked! Taking over #{instance_id}."
70
+ history << :takeover
71
+
72
+ ip = steal_address(instance_id, master_ip)
73
+ data = Data.new(config.meta_data_hostname, config.meta_data_id, ip)
74
+ database.set data
75
+
76
+ exec "#{SCRIPT_NAME} notify --force#{command_options}"
77
+ end
78
+
79
+ def relent!
80
+ history << :relent
81
+ msg = "Failed to grab lock, relenting."
82
+ Stonith.logger.info msg
83
+ abort msg
84
+ end
85
+
86
+ def steal_address(instance_id, ip)
87
+ Stonith.logger.info "Stealing IP #{ip} from #{instance_id}."
88
+ address = AddressStealer.new(instance_id, ip, config.fog_credentials)
89
+ address.associate(config.meta_data_id)
90
+ address.ip
91
+ end
92
+
93
+ def instance_id
94
+ @instance_id
95
+ end
96
+
97
+ def parser
98
+ super.on('-i', '--instance INSTANCE_ID', "Amazon Instance ID of the failing master") do |instance|
99
+ @instance_id = instance
100
+ end
101
+ super
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,40 @@
1
+ module EY
2
+ module Stonith
3
+ module Commands
4
+
5
+ dir = 'ey_stonith/commands/'
6
+ autoload :Abstract, dir + 'abstract'
7
+ autoload :NotFound, dir + 'not_found'
8
+
9
+ COMMANDS = Hash.new(:NotFound).merge({
10
+ 'check' => :Check,
11
+ 'claim' => :Claim,
12
+ 'commands' => :Commands,
13
+ 'cron' => :Cron,
14
+ 'help' => :Help,
15
+ 'notify' => :Notify,
16
+ 'reset' => :Reset,
17
+ 'resume' => :Resume,
18
+ 'status' => :Status,
19
+ 'stop' => :Stop,
20
+ 'takeover' => :Takeover,
21
+ })
22
+
23
+ COMMANDS.each do |command, klass|
24
+ autoload klass, dir + command
25
+ end
26
+
27
+ def self.invoke(argv)
28
+ klass = const_get(COMMANDS[argv.shift])
29
+ klass.new(argv).call
30
+ end
31
+
32
+ def self.formatted_command_list
33
+ COMMANDS.sort { |(a,_),(b,_)| a <=> b }.map { |cmd, klass_name|
34
+ klass = const_get(klass_name)
35
+ " #{cmd.ljust(8)} #{klass.banner}"
36
+ }.join("\n")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,26 +1,119 @@
1
1
  require 'yaml'
2
2
  require 'json'
3
+ require 'open-uri'
3
4
 
4
5
  module EY
5
6
  module Stonith
6
- class Config < Struct.new(:ey_cloud, :redis_yml, :dna_json, :history_path, :pid_path, :log_path)
7
- def initialize(*args)
8
- super *args.map { |arg| arg && Pathname.new(arg) }
7
+ class Config
8
+ class Error < StandardError
9
+ def initialize(path, message)
10
+ super("Config file #{path}: #{message}")
11
+ end
9
12
  end
10
-
11
- def cloud_credentials() YAML::load_file(ey_cloud) end
12
- def notify_uri() "#{cloud_credentials[:api]}/api/promote_instance_to_master" end
13
13
 
14
- def monitor_heartbeat() 10 end
15
- def redis_key() 'ey:stonith' end
16
- def redis_db() 14 end
17
- def redis_host() redis[:host] end # support sometimes changes this, do not cache!
18
- def redis_port() redis[:port] end
19
- def master_hostname_from_dna() dna['master_app_server']['private_dns_name'] end
14
+ class FileNotFound < Error
15
+ def initialize(path)
16
+ super(path, "File not found.")
17
+ end
18
+ end
19
+
20
+ class RequiredSetting < Error
21
+ def initialize(path, key)
22
+ super(path, "Missing required setting #{key}.")
23
+ end
24
+ end
25
+
26
+ DEFAULT = {
27
+ 'log' => '/var/log/stonith.log',
28
+ 'state_dir' => '/var/run/stonith',
29
+ 'heartbeat' => 10,
30
+ 'notify_uri' => nil,
31
+
32
+ 'monitor_host' => nil,
33
+ 'monitor_path' => '/haproxy/monitor',
34
+
35
+ 'redis_host' => nil,
36
+ 'redis_port' => 6379,
37
+ 'redis_key' => 'stonith',
38
+ 'redis_db' => 14,
39
+ 'redis_timeout' => 60 * 5,
40
+
41
+ 'aws_secret_id' => nil,
42
+ 'aws_secret_key' => nil,
43
+ }
44
+
45
+ def initialize(config_path)
46
+ path = Pathname.new(config_path)
47
+ raise FileNotFound.new(path) unless path.readable?
48
+
49
+ realpath = path.realpath
50
+ @data = DEFAULT.merge(YAML.load_file(realpath))
51
+ @path = realpath
52
+ end
53
+
54
+ attr_reader :path
55
+
56
+ def log_path() pathname(log) end
57
+ def state_path() ensure_exists(pathname(state_dir)) end
58
+ def stop_path() state_path + 'stop' end
59
+ def claim_path() state_path + 'claim' end
60
+ def checks_path() state_path + 'checks' end
61
+ def notify_path() state_path + 'notify' end
62
+ def history_path() state_path + 'history' end
63
+ def takeover_path() state_path + 'takeover' end
64
+ def notify_uri() URI.parse(method_missing('notify_uri')) end
65
+
66
+ def meta_data_hostname() @data['meta_data_hostname'] ||= meta_data('local-hostname') end
67
+ def meta_data_id() @data['meta_data_id'] ||= meta_data('instance-id') end
68
+ def meta_data_ip() @data['meta_data_ip'] || meta_data('public-ipv4') end # don't cache
69
+
70
+ def respond_to?(meth)
71
+ @data.key?(meth) || super
72
+ end
73
+
74
+ def awsm_credentials
75
+ {
76
+ 'aws_secret_id' => aws_secret_id,
77
+ 'aws_secret_key' => aws_secret_key,
78
+ }
79
+ end
80
+
81
+ def fog_credentials
82
+ {
83
+ :aws_access_key_id => aws_secret_id,
84
+ :aws_secret_access_key => aws_secret_key,
85
+ }
86
+ end
20
87
 
21
88
  private
22
- def dna() JSON.parse(dna_json.read) end
23
- def redis() YAML::load_file(redis_yml) end
89
+
90
+ def self.secret_hack
91
+ @secret_hack = true
92
+ end
93
+
94
+ def self.pwned?
95
+ @secret_hack
96
+ end
97
+
98
+ def meta_data(key)
99
+ open("http://169.254.169.254/latest/meta-data/#{key}").read
100
+ end
101
+
102
+ def pathname(path) path && Pathname.new(path) end
103
+
104
+ def ensure_exists(path)
105
+ path.mkpath unless self.class.pwned?
106
+ path
107
+ end
108
+
109
+ def method_missing(meth, *args)
110
+ meth_s = meth.to_s
111
+ if respond_to?(meth_s)
112
+ @data[meth_s] || raise(RequiredSetting.new(path, meth_s))
113
+ else
114
+ super
115
+ end
116
+ end
24
117
  end
25
118
  end
26
119
  end
@@ -1,7 +1,11 @@
1
1
  module EY
2
2
  module Stonith
3
3
  class Data < Struct.new(:hostname, :instance_id, :ip)
4
- def key() instance_id end
4
+ alias_method :key, :instance_id
5
+
6
+ def to_s
7
+ "[#{hostname.inspect}, #{instance_id.inspect}, #{ip.inspect}]"
8
+ end
5
9
  end
6
10
  end
7
11
  end
@@ -23,9 +23,10 @@ module EY
23
23
  end
24
24
 
25
25
  def set(data)
26
- unless get
26
+ unless get # very small race condition (very)
27
27
  redis.lpush(master_key, Marshal.dump(data))
28
28
  @locked = false
29
+ true
29
30
  end
30
31
  end
31
32
 
@@ -34,22 +35,43 @@ module EY
34
35
  end
35
36
 
36
37
  private
37
-
38
+
38
39
  def get
39
40
  result = redis.lindex(master_key, 0) # index 0
40
41
  result && Marshal.load(result)
41
42
  end
42
43
 
44
+ # popping & locking
43
45
  def locked_get
44
- Marshal.load redis.blpop(master_key, 0).last # don't timeout (this number gets passed to the actual redis command)
46
+ Marshal.load redis.blpop(master_key, @config.redis_timeout).last
45
47
  end
46
48
 
47
49
  def master_key
48
- @config.redis_key
50
+ "#{@config.redis_key}:master"
49
51
  end
50
-
52
+
51
53
  def redis
52
- @redis ||= Redis.new(:host => @config.redis_host, :port => @config.redis_port, :db => @config.redis_db, :timeout => 0)
54
+ @redis ||= load_redis
55
+ end
56
+
57
+ def load_redis
58
+ redis = Redis.new(:host => @config.redis_host, :port => @config.redis_port, :db => @config.redis_db, :timeout => @config.redis_timeout)
59
+ check_redis_version redis.info[:redis_version]
60
+ redis
61
+ rescue Errno::ECONNREFUSED
62
+ abort "Unable to connect to redis"
63
+ end
64
+
65
+ def check_redis_version(version)
66
+ major, minor, patch = version.split('.').map { |num| num.to_i }
67
+ unless major > 1 ||
68
+ (major == 1 && minor > 3) ||
69
+ (major == 1 && minor == 3 && patch >= 1)
70
+ abort <<-ERROR
71
+ Redis server version [#{version}] is too old.
72
+ >= 1.3.1 required for blpop support.
73
+ ERROR
74
+ end
53
75
  end
54
76
  end
55
77
  end
@@ -30,7 +30,7 @@ module EY
30
30
  end
31
31
 
32
32
  def last() read.last end
33
- def read() @path.read.to_s.split(SEPARATOR) end
33
+ def read() @path.read.to_s.strip.split(SEPARATOR) end
34
34
  end
35
35
  end
36
36
  end
data/lib/ey_stonith.rb CHANGED
@@ -11,6 +11,7 @@ module EY
11
11
  autoload :AddressStealer, 'ey_stonith/address_stealer'
12
12
  autoload :Box, 'ey_stonith/box'
13
13
  autoload :CheckRecorder, 'ey_stonith/check_recorder'
14
+ autoload :Commands, 'ey_stonith/commands'
14
15
  autoload :CLI, 'ey_stonith/cli'
15
16
  autoload :Config, 'ey_stonith/config'
16
17
  autoload :Data, 'ey_stonith/data'
@@ -21,15 +22,8 @@ module EY
21
22
  autoload :MetaData, 'ey_stonith/meta_data'
22
23
  autoload :Slave, 'ey_stonith/slave'
23
24
 
24
- def self.logger=(logger) @@logger = logger end
25
+ def self.log_to(io) @@logger = Logger.new(io) end
25
26
  def self.logger() @@logger end
26
- self.logger = Logger.new(STDOUT)
27
- logger.level = Logger::INFO
28
-
29
- def self.meta_data() @@meta_data end
30
- def self.meta_data=(meta) @@meta_data = meta end
31
- def self.reset_meta_data() @@meta_data = MetaData end
32
- reset_meta_data
33
27
  end
34
28
  end
35
29