deckard 0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,85 @@
1
+ deckard : http monitoring system
2
+
3
+ deckard is a http check monitoring system built on top of CouchDB.
4
+
5
+ license: apache 2
6
+
7
+ Features:
8
+
9
+ * Email and SMS based alerts (through email)
10
+ * Designated on-call sms email address
11
+ * Basic CouchDB replication latency alerts
12
+ * Basic content check alerts
13
+ * Content check alerts with EC2 elastic IP failover
14
+ * All checks are defined in CouchDB (CRUD checks with ReST)
15
+ * Alert priorities (log, email, SMS)
16
+ * Simple setup via cron
17
+ * Basic scheduling to silence alerts
18
+
19
+ Usage:
20
+
21
+ $ deckard ./deckard.yml
22
+
23
+ Setup:
24
+
25
+ * Setup and configure all appropriate databases and alert documents.
26
+ * Create a crontab entry
27
+
28
+ $ crontab -e
29
+ */5 * * * * deckard /path/deckard.yml &> /dev/null
30
+
31
+
32
+ Example documents:
33
+
34
+ On Call document format:
35
+
36
+ {
37
+ "_id": "on_call_person",
38
+ "sms_email": "8675309@jenny.net"
39
+ }
40
+
41
+ For sms_email you will need to put in the phone number and sms to email host for your phone provider.
42
+
43
+
44
+ Failover check document format:
45
+
46
+ {
47
+ "_id": "lb01",
48
+ "url": "http://somecheck.com/check.html",
49
+ "secondary_instance_id": "i-1234",
50
+ "priority": 2,
51
+ "elastic_ip": "127.0.0.1",
52
+ "content": "sometext",
53
+ "failover": true,
54
+ "primary_instance_id": "i-4321"
55
+ }
56
+
57
+ This document needs all the details to cause an elastic ip switch in the case the content is not found on the url.
58
+
59
+
60
+ Replication check format:
61
+
62
+ {
63
+ "_id": "node01_node02",
64
+ "name": "test",
65
+ "master_url": "http://node01:5984/db",
66
+ "slave_url": "http://node02:5984/db"
67
+ "offset": 0,
68
+ "priority": 2,
69
+ "schedule": [])
70
+ }
71
+
72
+ This will test the doc counts between two databases and if they become out of sync by more or less than the thresholds specified in the config an alert is triggered.
73
+
74
+
75
+ HTTP content check format:
76
+
77
+ {
78
+ "_id": "deckard.com:5984/",
79
+ "url": "http://deckard.com:5984/",
80
+ "content": "couchdb",
81
+ "priority": 2
82
+ }
83
+
84
+
85
+ For all of these priority and schedule are optional fields in these documents, priority is 0, 1 and 2. 0 is log only, 1 is log and email and 2 is log, email and sms. The schedule is an array containing integers for the hours the alert should be silent.
data/bin/deckard ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/ruby
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'deckard')
3
+
4
+ Deckard.main
5
+
@@ -0,0 +1,26 @@
1
+ defaults:
2
+ email_to: "my@email.com"
3
+ email_from: "some@email.com"
4
+ email_host: "localhost"
5
+ db_user:
6
+ db_password:
7
+ db_host: "localhost"
8
+ db_port: "5984"
9
+ on_call_db: "monitor_on_call"
10
+ on_call_doc: "on_call_person"
11
+ log_file:
12
+
13
+ content_check:
14
+ retry_count: 1
15
+ db: "monitor_content_check"
16
+
17
+ fo_check:
18
+ retry_count: 1
19
+ aws_key: "key"
20
+ aws_secret: "seekrat"
21
+ db: "monitor_fo_check"
22
+
23
+ rep_check:
24
+ doc_behind_threshold: 5000
25
+ doc_ahead_threshold: -100
26
+ db: "monitor_rep_check"
data/lib/deckard.rb ADDED
@@ -0,0 +1,100 @@
1
+ require 'rubygems'
2
+
3
+ gem 'rest-client', '1.3.0'
4
+ require 'rest_client'
5
+ require 'tmail'
6
+ require 'json'
7
+ require 'cgi'
8
+ require 'net/smtp'
9
+ require 'right_aws'
10
+ require 'mixlib/log'
11
+ require 'mixlib/config'
12
+ require 'yaml'
13
+
14
+ __DIR__ = File.dirname(__FILE__)
15
+
16
+ $LOAD_PATH.unshift __DIR__ unless
17
+ $LOAD_PATH.include?(__DIR__) ||
18
+ $LOAD_PATH.include?(File.expand_path(__DIR__))
19
+
20
+ require 'deckard/config'
21
+ require 'deckard/log'
22
+ require 'deckard/ec2'
23
+ require 'deckard/monitoring'
24
+ require 'deckard/util'
25
+
26
+
27
+ class Deckard
28
+ def self.content_check
29
+ retry_count = Deckard::Config.content_check_retry
30
+ db_name = Deckard::Config.content_check_db
31
+ list = Array.new
32
+
33
+ nodes = Deckard::Util.get_nodes(db_name)
34
+
35
+ nodes.each do |node|
36
+ run = Thread.new {
37
+ Deckard::Monitor.content_check(node["url"], node["content"], node["priority"], retry_count, node["schedule"])
38
+ }
39
+ list << run
40
+ end
41
+
42
+ list.each { |x|
43
+ x.join
44
+ }
45
+ end
46
+
47
+ def self.rep_check
48
+ db_name = Deckard::Config.rep_check_db
49
+ list = Array.new
50
+
51
+ nodes = Deckard::Util.get_nodes(db_name)
52
+
53
+ nodes.each do |node|
54
+ run = Thread.new {
55
+ Deckard::Monitor.rep_check(node["name"], node["master_url"], node["slave_url"], node["offset"], node["priority"], node["schedule"])
56
+ }
57
+ list << run
58
+ end
59
+
60
+ list.each { |x|
61
+ x.join
62
+ }
63
+ end
64
+
65
+ def self.fo_check
66
+ retry_count = Deckard::Config.fo_check_retry
67
+ db_name = Deckard::Config.fo_check_db
68
+ list = Array.new
69
+
70
+ nodes = Deckard::Util.get_nodes(db_name)
71
+
72
+ nodes.each do |node|
73
+ run = Thread.new {
74
+ check = Deckard::Monitor.content_check(node["url"], node["content"], node["priority"], retry_count, node["schedule"])
75
+ unless check
76
+ Deckard::Monitor.failover(node["elastic_ip"], node["primary_instance_id"], node["secondary_instance_id"], node["priority"], node["schedule"], node["failover"])
77
+ Deckard::Util.flip_failover(node)
78
+ end
79
+ }
80
+ list << run
81
+ end
82
+
83
+ list.each { |x|
84
+ x.join
85
+ }
86
+ end
87
+
88
+ def self.main
89
+ list = Array.new
90
+
91
+ list << Thread.new { content_check }
92
+ list << Thread.new { rep_check }
93
+ list << Thread.new { fo_check }
94
+
95
+ list.each { |x|
96
+ x.join
97
+ }
98
+ end
99
+
100
+ end
@@ -0,0 +1,30 @@
1
+ class Deckard
2
+ class Config
3
+ monitor_config = YAML.load(File.open(ARGV[0]))
4
+ extend Mixlib::Config
5
+ configure do |c|
6
+ c[:email_to] = monitor_config["defaults"]["email_to"]
7
+ c[:email_from] = monitor_config["defaults"]["email_from"]
8
+ c[:email_host] = monitor_config["defaults"]["email_host"]
9
+ c[:db_user] = monitor_config["defaults"]["db_user"]
10
+ c[:db_password] = monitor_config["defaults"]["db_password"]
11
+ c[:db_host] = monitor_config["defaults"]["db_host"]
12
+ c[:db_port] = monitor_config["defaults"]["db_port"]
13
+ c[:on_call_db] = monitor_config["defaults"]["on_call_db"]
14
+ c[:on_call_doc] = monitor_config["defaults"]["on_call_doc"]
15
+ c[:log_file] = monitor_config["defaults"]["log_file"]
16
+
17
+ c[:content_check_retry] = monitor_config["content_check"]["retry_count"]
18
+ c[:content_check_db] = monitor_config["content_check"]["db"]
19
+
20
+ c[:fo_check_retry] = monitor_config["fo_check"]["retry_count"]
21
+ c[:aws_key] = monitor_config["fo_check"]["aws_key"]
22
+ c[:aws_secret] = monitor_config["fo_check"]["aws_secret"]
23
+ c[:fo_check_db] = monitor_config["fo_check"]["db"]
24
+
25
+ c[:doc_behind_threshold] = monitor_config["rep_check"]["doc_behind_threshold"]
26
+ c[:doc_ahead_threshold] = monitor_config["rep_check"]["doc_ahead_threshold"]
27
+ c[:rep_check_db] = monitor_config["rep_check"]["db"]
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,19 @@
1
+ class Deckard
2
+ class Ec2
3
+ aws_key = Deckard::Config.aws_key
4
+ aws_secret = Deckard::Config.aws_secret
5
+ Ec2 = RightAws::Ec2.new(aws_key, aws_secret)
6
+
7
+ def self.get_association(elastic_ip)
8
+ Ec2.describe_addresses(elastic_ip)[0][:instance_id]
9
+ end
10
+
11
+ def self.associate_address(instance_id, elastic_ip)
12
+ Ec2.associate_address(instance_id, elastic_ip)
13
+ end
14
+
15
+ def self.disassociate_address(elastic_ip)
16
+ Ec2.disassociate_address(elastic_ip)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ class Deckard
2
+ class Log
3
+ extend Mixlib::Log
4
+ log_file = Deckard::Config.log_file
5
+
6
+ if log_file
7
+ Deckard::Log.init(log_file)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,103 @@
1
+ class Deckard
2
+ class Monitor
3
+
4
+ def self.content_check(url, content, priority, retry_count, schedule)
5
+ check = true
6
+ begin
7
+ retries = 1 unless retries
8
+ result = RestClient.get(url)
9
+ rescue
10
+ Deckard::Log.info("ALERT :: Could not connect to #{url}, retrying ...")
11
+ sleep(3)
12
+ retry if (retries += 1) < retry_count
13
+ if retries >= retry_count
14
+ subject = "ALERT :: Check Content Failed on #{url}"
15
+ body = "Could not connect to #{url}"
16
+ log = subject + " -- " + body
17
+ Deckard::Util.alert(priority, subject, body, log, schedule)
18
+ check = false
19
+ end
20
+ else
21
+ retries = 1
22
+ if result.include? content
23
+ Deckard::Log.info("PASS :: Found text \"#{content}\" on #{url}")
24
+ else
25
+ subject = "ALERT :: Check Content Failed on #{url}"
26
+ body = "Could not find text \"#{content}\" at #{url}"
27
+ log = subject + " -- " + body
28
+ Deckard::Util.alert(priority, subject, body, log, schedule)
29
+ check = false
30
+ end
31
+ end
32
+ check
33
+ end
34
+
35
+ def self.rep_check(name, master_url, slave_url, offset, priority, schedule)
36
+ begin
37
+ doc_behind_threshold = Deckard::Config.doc_behind_threshold
38
+ doc_ahead_threshold = Deckard::Config.doc_ahead_threshold
39
+
40
+ master_result = RestClient.get(master_url)
41
+ slave_result = RestClient.get(slave_url)
42
+
43
+ master_doc_count = JSON.parse(master_result)["doc_count"]
44
+ slave_doc_count = JSON.parse(slave_result)["doc_count"]
45
+ doc_count_diff = master_doc_count - slave_doc_count + offset
46
+
47
+ if doc_count_diff >= doc_behind_threshold || doc_count_diff < doc_ahead_threshold
48
+ subject = "ALERT :: Replication for #{name}"
49
+ body = "Master: #{master_url} => Slave: #{slave_url} : off by #{doc_count_diff}"
50
+ log = subject + " -- " + body
51
+ Deckard::Util.alert(priority, subject, body, log, schedule)
52
+ else
53
+ Deckard::Log.info("PASS :: Replication for #{name} is OK (#{doc_count_diff})")
54
+ end
55
+ rescue
56
+ # do nothing
57
+ end
58
+ end
59
+
60
+ def self.failover(elastic_ip, primary_instance_id, secondary_instance_id, priority, schedule, failover)
61
+ if failover
62
+ begin
63
+ subject = "ALERT :: #{elastic_ip} attempting failover!"
64
+ body = "#{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id} attempting failover!"
65
+ log = subject + " " + body
66
+ Deckard::Util.alert(priority, subject, body, log, schedule)
67
+
68
+ instance_id = Deckard::Ec2.get_association(elastic_ip)
69
+ Deckard::Ec2.disassociate_address(elastic_ip)
70
+ Deckard::Log.info("ALERT :: Disassociated #{elastic_ip}")
71
+
72
+ if instance_id == primary_instance_id
73
+ Deckard::Ec2.associate_address(secondary_instance_id, elastic_ip)
74
+
75
+ Deckard::Log.info("ALERT :: associated #{elastic_ip} to #{secondary_instance_id}")
76
+ subject = "ALERT :: Failover Complete for #{elastic_ip} #{secondary_instance_id}"
77
+ body = "VERIFY THINGS ARE WORKING! #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}"
78
+ Deckard::Util.alert(priority, subject, body, subject, schedule)
79
+ elsif instance_id == secondary_instance_id
80
+ Deckard::Ec2.associate_address(primary_instance_id, elastic_ip)
81
+
82
+ Deckard::Log.info("ALERT :: associated #{elastic_ip} to #{primary_instance_id}")
83
+ subject = "ALERT :: Failover Complete for #{elastic_ip} #{primary_instance_id}"
84
+ body = "VERIFY THINGS ARE WORKING! #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}"
85
+ Deckard::Util.alert(priority, subject, body, subject, schedule)
86
+ else
87
+ error = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!!"
88
+ log = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!! Due to instance_id != primary and secondary"
89
+ Deckard::Util.alert(priority, error, error, log, schedule)
90
+ end
91
+ rescue Exception => e
92
+ error = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!!"
93
+ log = "ALERT :: Could not a failover #{elastic_ip} => #{primary_instance_id} / #{secondary_instance_id}!! Due to #{e}"
94
+ Deckard::Util.alert(priority, error, error, log, schedule)
95
+ end
96
+ else
97
+ # dont failover
98
+ Deckard::Log.info("ALERT :: not failing over due to failover=false")
99
+ end
100
+ end
101
+
102
+ end
103
+ end
@@ -0,0 +1,123 @@
1
+ class Deckard
2
+ class Util
3
+ def self.get_nodes(db_name)
4
+ doc_list = []
5
+ node_list = []
6
+
7
+ db_user = Deckard::Config.db_user
8
+ db_password = Deckard::Config.db_password
9
+ db_host = Deckard::Config.db_host
10
+ db_port = Deckard::Config.db_port
11
+
12
+ if db_user && db_password
13
+ db_url = "http://#{db_user}:#{db_password}@#{db_host}:#{db_port}/#{db_name}"
14
+ else
15
+ db_url = "http://#{db_host}:#{db_port}/#{db_name}"
16
+ end
17
+
18
+ all_docs = RestClient.get("#{db_url}/_all_docs")
19
+
20
+ all_docs_hash = JSON.parse(all_docs)
21
+
22
+ all_docs_hash["rows"].each do |doc|
23
+ doc_list << doc["id"]
24
+ end
25
+
26
+ doc_list.each do |doc|
27
+ escaped_doc = CGI.escape(doc)
28
+ node_json = RestClient.get("#{db_url}/#{escaped_doc}")
29
+ node = JSON.parse(node_json)
30
+ node_list << node
31
+ end
32
+ node_list
33
+ end
34
+
35
+ def self.alert(priority, subject, body, log, schedule)
36
+ email_to = Deckard::Config.email_to
37
+ sms_email = on_call
38
+
39
+ # if scheduled maintenance set to logging only
40
+ if schedule(schedule) == true
41
+ priority = 0
42
+ end
43
+
44
+ if priority == 0
45
+ Deckard::Log.info(log)
46
+ elsif priority == 1
47
+ Deckard::Log.info("sending email alert to #{email_to}")
48
+ send_email(email_to, subject, body)
49
+ Deckard::Log.info(log)
50
+ elsif priority == 2
51
+ Deckard::Log.info("sending email alert to #{email_to} and sms to #{sms_email}")
52
+ send_email(email_to, subject, body)
53
+ Deckard::Log.info(log)
54
+ send_email("#{sms_email}", subject, body)
55
+ end
56
+ end
57
+
58
+ def self.on_call
59
+ db_user = Deckard::Config.db_user
60
+ db_password = Deckard::Config.db_password
61
+ db_host = Deckard::Config.db_host
62
+ db_port = Deckard::Config.db_port
63
+ db_name = Deckard::Config.on_call_db
64
+ doc_name = Deckard::Config.on_call_doc
65
+
66
+ doc_url = "http://#{db_user}:#{db_password}@#{db_host}:#{db_port}/#{db_name}/#{doc_name}"
67
+
68
+ on_call = RestClient.get doc_url
69
+ on_call_json = JSON.parse(on_call)
70
+ on_call_json["sms_email"]
71
+ end
72
+
73
+ def self.send_email(email_addr, subject, body)
74
+ to = email_addr
75
+ from = Deckard::Config.email_from
76
+ host = Deckard::Config.email_host
77
+ mail = TMail::Mail.new
78
+ mail.to = to
79
+ mail.from = from
80
+ mail.subject = subject
81
+ mail.date = Time.now
82
+ mail.mime_version = '1.0'
83
+ mail.body = body
84
+
85
+ Net::SMTP.start( host ) do |smtp|
86
+ smtp.send_message(
87
+ mail.to_s,
88
+ from,
89
+ to
90
+ )
91
+ end
92
+ end
93
+
94
+ def self.schedule(schedule)
95
+ if schedule.nil?
96
+ false
97
+ else
98
+ schedule.include? Time.now.hour
99
+ end
100
+ end
101
+
102
+ def self.flip_failover(node)
103
+ db_user = Deckard::Config.db_user
104
+ db_password = Deckard::Config.db_password
105
+ db_host = Deckard::Config.db_host
106
+ db_port = Deckard::Config.db_port
107
+ db_name = Deckard::Config.fo_check_db
108
+
109
+ if db_user == "" || db_password == ""
110
+ db_url = "http://#{db_host}:#{db_port}/#{db_name}"
111
+ else
112
+ db_url = "http://#{db_user}:#{db_password}@#{db_host}:#{db_port}/#{db_name}"
113
+ end
114
+
115
+ doc_json = RestClient.get("#{db_url}/#{node["_id"]}")
116
+ doc = JSON.parse(doc_json)
117
+ doc.store("failover", false)
118
+
119
+ RestClient.put("#{db_url}/#{node["_id"]}?rev=#{node["_rev"]}", doc.to_json)
120
+ end
121
+
122
+ end
123
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: deckard
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 4
8
+ version: "0.4"
9
+ platform: ruby
10
+ authors:
11
+ - joe williams
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-06-04 00:00:00 -07:00
17
+ default_executable:
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: mixlib-config
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ segments:
27
+ - 0
28
+ version: "0"
29
+ type: :runtime
30
+ version_requirements: *id001
31
+ - !ruby/object:Gem::Dependency
32
+ name: mixlib-log
33
+ prerelease: false
34
+ requirement: &id002 !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ segments:
39
+ - 0
40
+ version: "0"
41
+ type: :runtime
42
+ version_requirements: *id002
43
+ - !ruby/object:Gem::Dependency
44
+ name: tmail
45
+ prerelease: false
46
+ requirement: &id003 !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ segments:
51
+ - 0
52
+ version: "0"
53
+ type: :runtime
54
+ version_requirements: *id003
55
+ - !ruby/object:Gem::Dependency
56
+ name: json
57
+ prerelease: false
58
+ requirement: &id004 !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ type: :runtime
66
+ version_requirements: *id004
67
+ - !ruby/object:Gem::Dependency
68
+ name: right_aws
69
+ prerelease: false
70
+ requirement: &id005 !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ type: :runtime
78
+ version_requirements: *id005
79
+ - !ruby/object:Gem::Dependency
80
+ name: rest-client
81
+ prerelease: false
82
+ requirement: &id006 !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - "="
85
+ - !ruby/object:Gem::Version
86
+ segments:
87
+ - 1
88
+ - 3
89
+ - 0
90
+ version: 1.3.0
91
+ type: :runtime
92
+ version_requirements: *id006
93
+ description:
94
+ email: joe@joetify.com
95
+ executables:
96
+ - deckard
97
+ extensions: []
98
+
99
+ extra_rdoc_files:
100
+ - README
101
+ files:
102
+ - bin/deckard
103
+ - lib/deckard/config.rb
104
+ - lib/deckard/ec2.rb
105
+ - lib/deckard/log.rb
106
+ - lib/deckard/monitoring.rb
107
+ - lib/deckard/util.rb
108
+ - lib/deckard.rb
109
+ - config/deckard.yml
110
+ - README
111
+ has_rdoc: true
112
+ homepage: http://github.com/joewilliams/deckard
113
+ licenses: []
114
+
115
+ post_install_message:
116
+ rdoc_options: []
117
+
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ segments:
125
+ - 0
126
+ version: "0"
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ requirements: []
135
+
136
+ rubyforge_project:
137
+ rubygems_version: 1.3.6
138
+ signing_key:
139
+ specification_version: 3
140
+ summary: a monitoring system built on couchdb
141
+ test_files: []
142
+