deckard 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README +13 -9
- data/config/deckard.yml +4 -0
- data/lib/deckard/config.rb +3 -0
- data/lib/deckard/monitoring.rb +11 -10
- data/lib/deckard/util.rb +28 -9
- data/lib/deckard.rb +2 -0
- metadata +17 -5
data/README
CHANGED
@@ -12,7 +12,7 @@ Features:
|
|
12
12
|
* Basic content check alerts
|
13
13
|
* Content check alerts with EC2 elastic IP failover
|
14
14
|
* All checks are defined in CouchDB (CRUD checks with ReST)
|
15
|
-
* Alert priorities (log, email, SMS)
|
15
|
+
* Alert priorities (log, email, SMS and notifo)
|
16
16
|
* Simple setup via cron
|
17
17
|
* Basic scheduling to silence alerts
|
18
18
|
|
@@ -36,9 +36,10 @@ On Call document format:
|
|
36
36
|
{
|
37
37
|
"_id": "on_call_person",
|
38
38
|
"sms_email": "8675309@jenny.net"
|
39
|
+
"notifo_username" : "jenny"
|
39
40
|
}
|
40
41
|
|
41
|
-
For sms_email you will need to put in the phone number and sms to email host for your phone provider.
|
42
|
+
For sms_email you will need to put in the phone number and sms to email host for your phone provider. Provide both an sms email and notifo username and the sms will be only used for backup if something should go wrong with notifo. Saves you money on your text message bill! *You need the notifo application on your phone to use the notifo support.
|
42
43
|
|
43
44
|
|
44
45
|
Failover check document format:
|
@@ -61,12 +62,15 @@ Replication check format:
|
|
61
62
|
|
62
63
|
{
|
63
64
|
"_id": "node01_node02",
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
65
|
+
"name": "test",
|
66
|
+
"master_url": "http://node01/db",
|
67
|
+
"slave_url": "http://node02/db",
|
68
|
+
"offset": 0,
|
69
|
+
"priority": 1,
|
70
|
+
"schedule": [
|
71
|
+
2,
|
72
|
+
3
|
73
|
+
]
|
70
74
|
}
|
71
75
|
|
72
76
|
This will test the doc counts between two databases and if they become out of sync by more or less than the thresholds specified in the config an alert is triggered.
|
@@ -82,4 +86,4 @@ HTTP content check format:
|
|
82
86
|
}
|
83
87
|
|
84
88
|
|
85
|
-
For all of these priority and schedule are optional fields in these documents, priority is 0, 1 and 2. 0 is log only, 1 is log and email and 2 is log, email and sms. The schedule is an array containing integers for the hours the alert should be silent.
|
89
|
+
For all of these priority and schedule are optional fields in these documents, priority is 0, 1 and 2. 0 is log only, 1 is log and email and 2 is log, email and sms. The schedule is an array containing integers for the hours the alert should be silent. Check out the replication check definition above.
|
data/config/deckard.yml
CHANGED
data/lib/deckard/config.rb
CHANGED
@@ -25,6 +25,9 @@ class Deckard
|
|
25
25
|
c[:doc_behind_threshold] = monitor_config["rep_check"]["doc_behind_threshold"]
|
26
26
|
c[:doc_ahead_threshold] = monitor_config["rep_check"]["doc_ahead_threshold"]
|
27
27
|
c[:rep_check_db] = monitor_config["rep_check"]["db"]
|
28
|
+
|
29
|
+
c[:notifo_user] = monitor_config["notifo"]["user"]
|
30
|
+
c[:notifo_apikey] = monitor_config["notifo"]["apikey"]
|
28
31
|
end
|
29
32
|
end
|
30
33
|
end
|
data/lib/deckard/monitoring.rb
CHANGED
@@ -14,18 +14,18 @@ class Deckard
|
|
14
14
|
subject = "ALERT :: Check Content Failed on #{url}"
|
15
15
|
body = "Could not connect to #{url}"
|
16
16
|
log = subject + " -- " + body
|
17
|
-
Deckard::Util.alert(priority, subject, body, log, schedule)
|
17
|
+
Deckard::Util.alert(priority, subject, body, log, schedule, url)
|
18
18
|
check = false
|
19
19
|
end
|
20
20
|
else
|
21
21
|
retries = 1
|
22
|
-
if result.include?
|
22
|
+
if result.include?(content)
|
23
23
|
Deckard::Log.info("PASS :: Found text \"#{content}\" on #{url}")
|
24
24
|
else
|
25
25
|
subject = "ALERT :: Check Content Failed on #{url}"
|
26
26
|
body = "Could not find text \"#{content}\" at #{url}"
|
27
27
|
log = subject + " -- " + body
|
28
|
-
Deckard::Util.alert(priority, subject, body, log, schedule)
|
28
|
+
Deckard::Util.alert(priority, subject, body, log, schedule, url)
|
29
29
|
check = false
|
30
30
|
end
|
31
31
|
end
|
@@ -48,7 +48,7 @@ class Deckard
|
|
48
48
|
subject = "ALERT :: Replication for #{name}"
|
49
49
|
body = "Master: #{master_url} => Slave: #{slave_url} : off by #{doc_count_diff}"
|
50
50
|
log = subject + " -- " + body
|
51
|
-
Deckard::Util.alert(priority, subject, body, log, schedule)
|
51
|
+
Deckard::Util.alert(priority, subject, body, log, schedule, master_url)
|
52
52
|
else
|
53
53
|
Deckard::Log.info("PASS :: Replication for #{name} is OK (#{doc_count_diff})")
|
54
54
|
end
|
@@ -63,7 +63,7 @@ class Deckard
|
|
63
63
|
subject = "ALERT :: #{elastic_ip} attempting failover!"
|
64
64
|
body = "#{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id} attempting failover!"
|
65
65
|
log = subject + " " + body
|
66
|
-
Deckard::Util.alert(priority, subject, body, log, schedule)
|
66
|
+
Deckard::Util.alert(priority, subject, body, log, schedule, "http://#{elastic_ip}")
|
67
67
|
|
68
68
|
instance_id = Deckard::Ec2.get_association(elastic_ip)
|
69
69
|
Deckard::Ec2.disassociate_address(elastic_ip)
|
@@ -75,23 +75,24 @@ class Deckard
|
|
75
75
|
Deckard::Log.info("ALERT :: associated #{elastic_ip} to #{secondary_instance_id}")
|
76
76
|
subject = "ALERT :: Failover Complete for #{elastic_ip} #{secondary_instance_id}"
|
77
77
|
body = "VERIFY THINGS ARE WORKING! #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}"
|
78
|
-
Deckard::Util.alert(priority, subject, body, subject, schedule)
|
78
|
+
Deckard::Util.alert(priority, subject, body, subject, schedule, "http://#{elastic_ip}")
|
79
79
|
elsif instance_id == secondary_instance_id
|
80
80
|
Deckard::Ec2.associate_address(primary_instance_id, elastic_ip)
|
81
81
|
|
82
82
|
Deckard::Log.info("ALERT :: associated #{elastic_ip} to #{primary_instance_id}")
|
83
83
|
subject = "ALERT :: Failover Complete for #{elastic_ip} #{primary_instance_id}"
|
84
84
|
body = "VERIFY THINGS ARE WORKING! #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}"
|
85
|
-
Deckard::Util.alert(priority, subject, body, subject, schedule)
|
85
|
+
Deckard::Util.alert(priority, subject, body, subject, schedule, "http://#{elastic_ip}")
|
86
86
|
else
|
87
87
|
error = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!!"
|
88
88
|
log = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!! Due to instance_id != primary and secondary"
|
89
|
-
Deckard::Util.alert(priority, error, error, log, schedule)
|
89
|
+
Deckard::Util.alert(priority, error, error, log, schedule, "http://#{elastic_ip}")
|
90
90
|
end
|
91
91
|
rescue Exception => e
|
92
92
|
error = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!!"
|
93
|
-
log = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!!
|
94
|
-
Deckard::
|
93
|
+
log = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!!"
|
94
|
+
Deckard::Log.error(e)
|
95
|
+
Deckard::Util.alert(priority, error, error, log, schedule, "http://#{elastic_ip}")
|
95
96
|
end
|
96
97
|
else
|
97
98
|
# dont failover
|
data/lib/deckard/util.rb
CHANGED
@@ -32,9 +32,9 @@ class Deckard
|
|
32
32
|
node_list
|
33
33
|
end
|
34
34
|
|
35
|
-
def self.alert(priority, subject, body, log, schedule)
|
35
|
+
def self.alert(priority, subject, body, log, schedule, url)
|
36
36
|
email_to = Deckard::Config.email_to
|
37
|
-
|
37
|
+
on_call_contacts = on_call()
|
38
38
|
|
39
39
|
# if scheduled maintenance set to logging only
|
40
40
|
if schedule(schedule) == true
|
@@ -48,10 +48,23 @@ class Deckard
|
|
48
48
|
send_email(email_to, subject, body)
|
49
49
|
Deckard::Log.info(log)
|
50
50
|
elsif priority == 2
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
begin
|
52
|
+
if on_call_contacts.has_key?("notifo_username")
|
53
|
+
Deckard::Log.info("sending notifo alert to #{on_call_contacts["notifo_username"]}")
|
54
|
+
send_notifo(on_call_contacts["notifo_username"], subject, url)
|
55
|
+
Deckard::Log.info(log)
|
56
|
+
else
|
57
|
+
Deckard::Log.info("sending email alert to #{email_to} and sms to #{on_call_contacts["sms_email"]}")
|
58
|
+
send_email(email_to, subject, body)
|
59
|
+
Deckard::Log.info(log)
|
60
|
+
send_email("#{on_call_contacts["sms_email"]}", subject, body)
|
61
|
+
end
|
62
|
+
rescue
|
63
|
+
Deckard::Log.info("sending email alert to #{email_to} and sms to #{on_call_contacts["sms_email"]}")
|
64
|
+
send_email(email_to, subject, body)
|
65
|
+
Deckard::Log.info(log)
|
66
|
+
send_email("#{on_call_contacts["sms_email"]}", subject, body)
|
67
|
+
end
|
55
68
|
end
|
56
69
|
end
|
57
70
|
|
@@ -65,9 +78,9 @@ class Deckard
|
|
65
78
|
|
66
79
|
doc_url = "http://#{db_user}:#{db_password}@#{db_host}:#{db_port}/#{db_name}/#{doc_name}"
|
67
80
|
|
68
|
-
|
69
|
-
|
70
|
-
|
81
|
+
on_call_json = RestClient.get doc_url
|
82
|
+
on_call = JSON.parse(on_call_json)
|
83
|
+
on_call
|
71
84
|
end
|
72
85
|
|
73
86
|
def self.send_email(email_addr, subject, body)
|
@@ -91,6 +104,12 @@ class Deckard
|
|
91
104
|
end
|
92
105
|
end
|
93
106
|
|
107
|
+
def self.send_notifo(username, subject, url)
|
108
|
+
notifo = Notifo.new(Deckard::Config.notifo_user, Deckard::Config.notifo_apikey)
|
109
|
+
response = notifo.post(username, subject, "deckard alert", url)
|
110
|
+
Deckard::Log.info("Notifo response: #{response}")
|
111
|
+
end
|
112
|
+
|
94
113
|
def self.schedule(schedule)
|
95
114
|
if schedule.nil?
|
96
115
|
false
|
data/lib/deckard.rb
CHANGED
@@ -10,6 +10,7 @@ require 'right_aws'
|
|
10
10
|
require 'mixlib/log'
|
11
11
|
require 'mixlib/config'
|
12
12
|
require 'yaml'
|
13
|
+
require 'notifo'
|
13
14
|
|
14
15
|
__DIR__ = File.dirname(__FILE__)
|
15
16
|
|
@@ -25,6 +26,7 @@ require 'deckard/util'
|
|
25
26
|
|
26
27
|
|
27
28
|
class Deckard
|
29
|
+
|
28
30
|
def self.content_check
|
29
31
|
retry_count = Deckard::Config.content_check_retry
|
30
32
|
db_name = Deckard::Config.content_check_db
|
metadata
CHANGED
@@ -4,8 +4,8 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
version: "0.
|
7
|
+
- 5
|
8
|
+
version: "0.5"
|
9
9
|
platform: ruby
|
10
10
|
authors:
|
11
11
|
- joe williams
|
@@ -13,7 +13,7 @@ autorequire:
|
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
15
|
|
16
|
-
date: 2010-
|
16
|
+
date: 2010-07-12 00:00:00 -07:00
|
17
17
|
default_executable:
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
@@ -77,9 +77,21 @@ dependencies:
|
|
77
77
|
type: :runtime
|
78
78
|
version_requirements: *id005
|
79
79
|
- !ruby/object:Gem::Dependency
|
80
|
-
name:
|
80
|
+
name: notifo
|
81
81
|
prerelease: false
|
82
82
|
requirement: &id006 !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
type: :runtime
|
90
|
+
version_requirements: *id006
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: rest-client
|
93
|
+
prerelease: false
|
94
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
83
95
|
requirements:
|
84
96
|
- - "="
|
85
97
|
- !ruby/object:Gem::Version
|
@@ -89,7 +101,7 @@ dependencies:
|
|
89
101
|
- 0
|
90
102
|
version: 1.3.0
|
91
103
|
type: :runtime
|
92
|
-
version_requirements: *
|
104
|
+
version_requirements: *id007
|
93
105
|
description:
|
94
106
|
email: joe@joetify.com
|
95
107
|
executables:
|