continuent-monitors-nagios 0.0.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/tungsten_nagios_backups +140 -0
- data/bin/tungsten_nagios_connector +10 -3
- data/bin/tungsten_nagios_latency +159 -0
- data/bin/tungsten_nagios_monitor_threads +2 -0
- data/bin/tungsten_nagios_online +134 -0
- data/bin/tungsten_nagios_policy +61 -0
- data/bin/tungsten_nagios_progress +105 -0
- data/bin/tungsten_nagios_services +75 -0
- metadata +16 -18
- data/bin/check_tungsten.sh +0 -576
- data/bin/check_tungsten_backups +0 -70
- data/bin/check_tungsten_latency +0 -172
- data/bin/check_tungsten_online +0 -105
- data/bin/check_tungsten_policy +0 -61
- data/bin/check_tungsten_progress +0 -81
- data/bin/check_tungsten_services +0 -95
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorPolicy
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
unless TI.is_manager?()
|
35
|
+
critical("The server is not a Continuent Tungsten Manager")
|
36
|
+
end
|
37
|
+
|
38
|
+
unless TI.is_running?("manager")
|
39
|
+
critical("The Continuent Tungsten Manager is not running")
|
40
|
+
end
|
41
|
+
|
42
|
+
status = TI.status()
|
43
|
+
if status.policy() == "AUTOMATIC"
|
44
|
+
ok("Cluster is in #{status.policy()} mode")
|
45
|
+
else
|
46
|
+
critical("Cluster is in #{status.policy()} mode")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def configure
|
51
|
+
super()
|
52
|
+
|
53
|
+
description("Check that the local cluster is running in the AUTOMATIC policy")
|
54
|
+
end
|
55
|
+
|
56
|
+
def script_name
|
57
|
+
"tungsten_nagios_policy"
|
58
|
+
end
|
59
|
+
|
60
|
+
self.new().run()
|
61
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorProgress
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
unless TI.is_replicator?()
|
35
|
+
critical("The server is not a Tungsten Replicator")
|
36
|
+
end
|
37
|
+
|
38
|
+
unless TI.is_running?("replicator")
|
39
|
+
critical("The Tungsten Replicator is not running")
|
40
|
+
end
|
41
|
+
|
42
|
+
if TI.is_commercial?()
|
43
|
+
unless TI.is_manager?()
|
44
|
+
critical("The server is not a Continuent Tungsten Manager")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("manager")
|
48
|
+
critical("The Continuent Tungsten Manager is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
unless TI.trepctl_value(opt(:service), "state") == "ONLINE"
|
58
|
+
critical("The #{opt(:service)} replication service is not ONLINE")
|
59
|
+
end
|
60
|
+
|
61
|
+
pre_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
|
62
|
+
|
63
|
+
if TI.is_commercial?()
|
64
|
+
TI.ensure_cctrl("cluster heartbeat")
|
65
|
+
end
|
66
|
+
|
67
|
+
if opt(:delay).is_a?(Integer)
|
68
|
+
TU.debug("Go to sleep for #{opt(:delay)} seconds")
|
69
|
+
sleep(opt(:delay))
|
70
|
+
end
|
71
|
+
|
72
|
+
post_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
|
73
|
+
difference = post_seqno - pre_seqno
|
74
|
+
|
75
|
+
if difference > 0
|
76
|
+
ok("Tungsten Replicator #{opt(:service)} service is making progress")
|
77
|
+
else
|
78
|
+
critical("Tungsten Replicator #{opt(:service)} service did not show progress")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def configure
|
83
|
+
super()
|
84
|
+
|
85
|
+
description("Check that the replication service is making progress. For Continuent Tungsten installations, a heartbeat command will be run to force activity.")
|
86
|
+
|
87
|
+
add_option(:delay, {
|
88
|
+
:on => "--delay String",
|
89
|
+
:help => "The number of seconds to wait when monitoring progress",
|
90
|
+
:parse => method(:parse_integer_option),
|
91
|
+
:default => 1
|
92
|
+
})
|
93
|
+
|
94
|
+
add_option(:service, {
|
95
|
+
:on => "--service String",
|
96
|
+
:help => "The replication service or cluster to check"
|
97
|
+
})
|
98
|
+
end
|
99
|
+
|
100
|
+
def script_name
|
101
|
+
"tungsten_nagios_progress"
|
102
|
+
end
|
103
|
+
|
104
|
+
self.new().run()
|
105
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorServices
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
missing_services = []
|
35
|
+
checked_services = []
|
36
|
+
|
37
|
+
[
|
38
|
+
"replicator",
|
39
|
+
"manager",
|
40
|
+
"connector"
|
41
|
+
].each{
|
42
|
+
|svc|
|
43
|
+
|
44
|
+
unless TI.setting(TI.setting_key(HOSTS, "host_enable_" + svc)) == "true"
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
checked_services << svc
|
49
|
+
|
50
|
+
unless TI.is_running?(svc)
|
51
|
+
missing_services << svc
|
52
|
+
end
|
53
|
+
}
|
54
|
+
|
55
|
+
if missing_services.size() > 0
|
56
|
+
critical("#{missing_services.join(', ')} #{TU.pluralize(missing_services, 'is', 'are')} not running")
|
57
|
+
elsif checked_services.size() == 0
|
58
|
+
critical("No services were checked")
|
59
|
+
else
|
60
|
+
ok("All services (#{checked_services.join(', ')}) are running")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def configure
|
65
|
+
super()
|
66
|
+
|
67
|
+
description("Check that all configured services for the installation are running.")
|
68
|
+
end
|
69
|
+
|
70
|
+
def script_name
|
71
|
+
"tungsten_nagios_services"
|
72
|
+
end
|
73
|
+
|
74
|
+
self.new().run()
|
75
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: continuent-monitors-nagios
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Continuent
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: continuent-tools-monitoring
|
@@ -16,38 +16,36 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.5.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.5.0
|
27
27
|
description:
|
28
28
|
email: info@continuent.com
|
29
29
|
executables:
|
30
|
-
-
|
31
|
-
- check_tungsten_backups
|
32
|
-
- check_tungsten_latency
|
33
|
-
- check_tungsten_online
|
34
|
-
- check_tungsten_policy
|
35
|
-
- check_tungsten_progress
|
36
|
-
- check_tungsten_services
|
30
|
+
- tungsten_nagios_backups
|
37
31
|
- tungsten_nagios_connector
|
32
|
+
- tungsten_nagios_latency
|
38
33
|
- tungsten_nagios_monitor_threads
|
34
|
+
- tungsten_nagios_online
|
35
|
+
- tungsten_nagios_policy
|
36
|
+
- tungsten_nagios_progress
|
37
|
+
- tungsten_nagios_services
|
39
38
|
extensions: []
|
40
39
|
extra_rdoc_files: []
|
41
40
|
files:
|
42
|
-
- bin/
|
43
|
-
- bin/check_tungsten_backups
|
44
|
-
- bin/check_tungsten_latency
|
45
|
-
- bin/check_tungsten_online
|
46
|
-
- bin/check_tungsten_policy
|
47
|
-
- bin/check_tungsten_progress
|
48
|
-
- bin/check_tungsten_services
|
41
|
+
- bin/tungsten_nagios_backups
|
49
42
|
- bin/tungsten_nagios_connector
|
43
|
+
- bin/tungsten_nagios_latency
|
50
44
|
- bin/tungsten_nagios_monitor_threads
|
45
|
+
- bin/tungsten_nagios_online
|
46
|
+
- bin/tungsten_nagios_policy
|
47
|
+
- bin/tungsten_nagios_progress
|
48
|
+
- bin/tungsten_nagios_services
|
51
49
|
- LICENSE
|
52
50
|
- README.md
|
53
51
|
homepage: https://github.com/continuent/continuent-monitors-nagios
|
data/bin/check_tungsten.sh
DELETED
@@ -1,576 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
|
3
|
-
#TODO
|
4
|
-
#Work out the cluster names in a Composite DS
|
5
|
-
#determine the individulal services in a replicator so we can print out better output (status on each)
|
6
|
-
#Remove host logging - stop duplicate emails across multi hosts?
|
7
|
-
|
8
|
-
HOST=`hostname`
|
9
|
-
|
10
|
-
#Start Configuration Options. - These can be overridden by command line options or from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
11
|
-
CONNECTOR=0 #If this host is running a connector set to 1 otherwise 0
|
12
|
-
CLUSTER=0 #If this host is running a cluster set to 1 otherwise 0
|
13
|
-
REPLICATOR=0 #If this host is running a replicator set to 1 otherwise 0
|
14
|
-
REPLICATOR_PORT=10000 #Replicator Port
|
15
|
-
REPLICATOR_HOME=/opt/continuent/ #Home dir for Replicator
|
16
|
-
SERVICES='' #Name of the individual clusters in a composite DS
|
17
|
-
EMAIL='' #Set email address here or pass via the email= command line option
|
18
|
-
DISK=0 #Check Disk space
|
19
|
-
CHECK_ELB=0 #Enable check for ELB socket check
|
20
|
-
|
21
|
-
SUBJECT="Error : Problems exist with the Tungsten Services on $HOST"
|
22
|
-
LOCK_TIMEOUT=180 # Only send a Email every x minutes for a specific
|
23
|
-
# problem, stop spamming from the script
|
24
|
-
LAG=60 # Slave lag to report on
|
25
|
-
CONNECTOR_TIMEOUT=10 # No of seconds to wait for a connector response
|
26
|
-
DISK_WARNING=80 # % full to send a warning
|
27
|
-
SENDMAILBIN=/usr/sbin/sendmail
|
28
|
-
#End Configuration Options
|
29
|
-
|
30
|
-
SENDMAIL=0
|
31
|
-
DEBUG=0
|
32
|
-
LOG=/opt/continuent/share/check_tungsten.log
|
33
|
-
LOCK_DIR=/opt/continuent/share/tungsten_locks
|
34
|
-
|
35
|
-
function float_cond()
|
36
|
-
{
|
37
|
-
local cond=0
|
38
|
-
if [[ $# -gt 0 ]]; then
|
39
|
-
cond=$(echo "$*" | bc -q 2>&1)
|
40
|
-
if [[ $? -ne 0 ]]; then
|
41
|
-
echo "Error: $cond"
|
42
|
-
exit 1
|
43
|
-
fi
|
44
|
-
if [[ -z "$cond" ]]; then cond=0; fi
|
45
|
-
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
46
|
-
fi
|
47
|
-
local stat=$((cond == 0))
|
48
|
-
return $stat
|
49
|
-
}
|
50
|
-
|
51
|
-
info ()
|
52
|
-
{
|
53
|
-
if [ $DEBUG == 1 ]; then echo "INFO : $1"; fi
|
54
|
-
}
|
55
|
-
error ()
|
56
|
-
{
|
57
|
-
if [ $DEBUG == 1 ]; then echo "ERROR : $1"; fi
|
58
|
-
}
|
59
|
-
severe ()
|
60
|
-
{
|
61
|
-
echo "SEVERE : $1"
|
62
|
-
exit 1
|
63
|
-
}
|
64
|
-
getSetting ()
|
65
|
-
{
|
66
|
-
CFG=$CONTINUENT_ROOT/conf/tungsten.cfg
|
67
|
-
if [ ! -f $CFG ]
|
68
|
-
then
|
69
|
-
severe "Unable to find $CFG"
|
70
|
-
fi
|
71
|
-
getSettingValue=""
|
72
|
-
getSettingValue=$(grep "\"$1\"" $CFG| cut -d ':' -f2 | head -1|sed 's/,//g'|sed 's/"//g'|sed 's/ //g')
|
73
|
-
if [ -z $getSettingValue ]
|
74
|
-
then
|
75
|
-
severe "Unable to find $1 in $CFG"
|
76
|
-
fi
|
77
|
-
if [ "$getSettingValue" == '' ]
|
78
|
-
then
|
79
|
-
severe "Unable to find $1 in $CFG"
|
80
|
-
fi
|
81
|
-
echo "$getSettingValue"
|
82
|
-
}
|
83
|
-
|
84
|
-
|
85
|
-
# Load any continuent variables
|
86
|
-
|
87
|
-
if [ -z $CONTINUENT_ROOT ]
|
88
|
-
then
|
89
|
-
[ -f "$HOME/.bash_profile" ] && . "$HOME/.bash_profile"
|
90
|
-
[ -f "$HOME/.profile" ] && . "$HOME/.profile"
|
91
|
-
fi
|
92
|
-
|
93
|
-
function sOpt()
|
94
|
-
{
|
95
|
-
$1=1
|
96
|
-
info "$1 switched on via command line"
|
97
|
-
}
|
98
|
-
|
99
|
-
function mOpt()
|
100
|
-
{
|
101
|
-
|
102
|
-
for i in $(echo $2 | tr "=" "\n")
|
103
|
-
do
|
104
|
-
if [ $i != '$3' ]
|
105
|
-
then
|
106
|
-
$1=$i
|
107
|
-
fi
|
108
|
-
done
|
109
|
-
}
|
110
|
-
|
111
|
-
#Parse the command line options
|
112
|
-
|
113
|
-
for arg in "$@"
|
114
|
-
do
|
115
|
-
case "$arg" in
|
116
|
-
-v) DEBUG=1
|
117
|
-
info "Debug mode set"
|
118
|
-
;;
|
119
|
-
-vv) DEBUG=1
|
120
|
-
info "INFO : Extended Debug mode set"
|
121
|
-
set -x
|
122
|
-
;;
|
123
|
-
cluster) CLUSTER=1
|
124
|
-
info "CLUSTER switched on via command line"
|
125
|
-
;;
|
126
|
-
connector) CONNECTOR=1
|
127
|
-
info "CONNECTOR switched on via command line"
|
128
|
-
;;
|
129
|
-
replicator) REPLICATOR=1
|
130
|
-
info "REPLICATOR switched on via command line"
|
131
|
-
;;
|
132
|
-
check_elb) CHECK_ELB=1
|
133
|
-
info "CHECK_ELB switched on via command line"
|
134
|
-
;;
|
135
|
-
replicator_port*) for i in $(echo $arg | tr "=" "\n")
|
136
|
-
do
|
137
|
-
if [ $i != 'replicator_port' ]
|
138
|
-
then
|
139
|
-
REPLICATOR_PORT=$i
|
140
|
-
fi
|
141
|
-
done
|
142
|
-
|
143
|
-
info "REPLICATOR_PORT - $REPLICATOR_PORT - switched on via command line"
|
144
|
-
;;
|
145
|
-
replicator_home*) for i in $(echo $arg | tr "=" "\n")
|
146
|
-
do
|
147
|
-
if [ $i != 'replicator_home' ]
|
148
|
-
then
|
149
|
-
REPLICATOR_HOME=$i
|
150
|
-
fi
|
151
|
-
done
|
152
|
-
|
153
|
-
info "REPLICATOR_HOME - $REPLICATOR_HOME - switched on via command line"
|
154
|
-
;;
|
155
|
-
services*) for i in $(echo $arg | tr "=" "\n")
|
156
|
-
do
|
157
|
-
if [ $i != 'services' ]
|
158
|
-
then
|
159
|
-
SERVICES=$i
|
160
|
-
fi
|
161
|
-
done
|
162
|
-
info "SERVICES $SERVICES passed via the command line"
|
163
|
-
;;
|
164
|
-
email*) for i in $(echo $arg | tr "=" "\n")
|
165
|
-
do
|
166
|
-
if [ $i != 'email' ]
|
167
|
-
then
|
168
|
-
EMAIL=$i
|
169
|
-
fi
|
170
|
-
done
|
171
|
-
info "EMAIL $EMAIL passed via the command line"
|
172
|
-
;;
|
173
|
-
config*) for i in $(echo $arg | tr "=" "\n")
|
174
|
-
do
|
175
|
-
if [ $i != 'config' ]
|
176
|
-
then
|
177
|
-
FILE=$i
|
178
|
-
fi
|
179
|
-
done
|
180
|
-
info "Config File $FILE passed via the command line"
|
181
|
-
;;
|
182
|
-
disk) DISK=1
|
183
|
-
info "DISK switched on via command line"
|
184
|
-
;;
|
185
|
-
*)
|
186
|
-
echo "Unknown command line option passed $arg"
|
187
|
-
echo "Valid options are -v,cluster,connector,replicator,replicator_port=??,services=??,email=??,config=??"
|
188
|
-
exit 1
|
189
|
-
esac
|
190
|
-
|
191
|
-
|
192
|
-
done
|
193
|
-
|
194
|
-
|
195
|
-
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
196
|
-
then
|
197
|
-
if [ -z $CONTINUENT_ROOT ]
|
198
|
-
then
|
199
|
-
severe "$CONTINUENT_ROOT is not set - unable to continue"
|
200
|
-
fi
|
201
|
-
if [ ! -f $CONTINUENT_ROOT/share/env.sh ]
|
202
|
-
then
|
203
|
-
severe "Unable to find env.sh in $CONTINUENT_ROOT/share"
|
204
|
-
fi
|
205
|
-
|
206
|
-
. "$CONTINUENT_ROOT/share/env.sh"
|
207
|
-
|
208
|
-
#Load any default settings from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
209
|
-
CFG=$CONTINUENT_ROOT/share/check_tungsten.cfg
|
210
|
-
|
211
|
-
if [ -f $CFG ]
|
212
|
-
then
|
213
|
-
info "Loading settings from $CFG"
|
214
|
-
. "$CFG"
|
215
|
-
fi
|
216
|
-
if [ -z "$MYSQL" ]
|
217
|
-
then
|
218
|
-
MYSQL=`which mysql 2>/dev/null`
|
219
|
-
|
220
|
-
if [ "$MYSQL" == "" ]
|
221
|
-
then
|
222
|
-
severe " Unable to the mysql command line program"
|
223
|
-
fi
|
224
|
-
fi
|
225
|
-
fi
|
226
|
-
|
227
|
-
#If a file is passed from the command line load any variables from there
|
228
|
-
if [ ! -z $FILE ]
|
229
|
-
then
|
230
|
-
if [ ! -f $FILE ]
|
231
|
-
then
|
232
|
-
severe "The file specified in the command line $FILE does not exist"
|
233
|
-
fi
|
234
|
-
|
235
|
-
info "Loading settings from $FILE"
|
236
|
-
. "$FILE"
|
237
|
-
fi
|
238
|
-
|
239
|
-
#Parameter and host validation
|
240
|
-
|
241
|
-
BC=`which bc 2>/dev/null`
|
242
|
-
|
243
|
-
if [ "$BC" == "" ]
|
244
|
-
then
|
245
|
-
severe " Unable to find the command bc - please install"
|
246
|
-
fi
|
247
|
-
|
248
|
-
|
249
|
-
if [ "$EMAIL" == "" ]
|
250
|
-
then
|
251
|
-
severe " email must be specified"
|
252
|
-
fi
|
253
|
-
|
254
|
-
if [[ "$CONNECTOR" == 0 && "$CLUSTER" == 0 && "$REPLICATOR" == 0 ]]
|
255
|
-
then
|
256
|
-
severe " No option specified, select either connector, cluster or replicator"
|
257
|
-
fi
|
258
|
-
|
259
|
-
if [ -d $LOCK_DIR ]
|
260
|
-
then
|
261
|
-
if [ ! -w $LOCK_DIR ]
|
262
|
-
then
|
263
|
-
severe " The locks dir $LOCK_DIR is not writable"
|
264
|
-
fi
|
265
|
-
else
|
266
|
-
info "Creating locks dir"
|
267
|
-
mkdir $LOCK_DIR
|
268
|
-
fi
|
269
|
-
|
270
|
-
if [ -z "$MAILPROG" ]
|
271
|
-
then
|
272
|
-
MAILPROG=`which mail 2>/dev/null`
|
273
|
-
|
274
|
-
if [ "$MAILPROG" == "" ]
|
275
|
-
then
|
276
|
-
severe " Unable to find a mail program"
|
277
|
-
fi
|
278
|
-
fi
|
279
|
-
|
280
|
-
if [ -z "$SENDMAILBIN" ]
|
281
|
-
then
|
282
|
-
SENDMAILBIN=`which sendmail 2>/dev/null`
|
283
|
-
|
284
|
-
if [ "$SENDMAILBIN" == "" ]
|
285
|
-
then
|
286
|
-
severe " Unable to find a sendmail program"
|
287
|
-
fi
|
288
|
-
fi
|
289
|
-
|
290
|
-
if [ -f $LOG ]
|
291
|
-
then
|
292
|
-
rm $LOG
|
293
|
-
fi
|
294
|
-
|
295
|
-
#Expire old Locks
|
296
|
-
info "Deleting Locks older than $LOCK_TIMEOUT min"
|
297
|
-
find $LOCK_DIR/* -type f -mmin +$LOCK_TIMEOUT -delete 2> /dev/null
|
298
|
-
|
299
|
-
#Check the connector status
|
300
|
-
if [ $CONNECTOR == 1 ]
|
301
|
-
then
|
302
|
-
connector_ok_to_allow_elb=0
|
303
|
-
info "Running Connector Tests"
|
304
|
-
CONN=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -c| grep -v OK | wc -l)
|
305
|
-
if [ $CONN -ne 0 ]
|
306
|
-
then
|
307
|
-
error " Connector is not running"
|
308
|
-
echo "Connector is not running on $HOST - Investigate" >> $LOG
|
309
|
-
if [ ! -f $LOCK_DIR/con_running.lck ]
|
310
|
-
then
|
311
|
-
SENDMAIL=1
|
312
|
-
touch $LOCK_DIR/con_running.lck
|
313
|
-
else
|
314
|
-
info "Not sending Email lock file exists"
|
315
|
-
fi
|
316
|
-
else
|
317
|
-
info "Connector is running OK"
|
318
|
-
|
319
|
-
TIMEOUT=`which timeout 2>/dev/null`
|
320
|
-
|
321
|
-
if [ "$TIMEOUT" == "" ]
|
322
|
-
then
|
323
|
-
info "timeout command not found - unable to check if the connector is responding"
|
324
|
-
else
|
325
|
-
info "Checking Connector is responding to queries"
|
326
|
-
CON_USER=$(getSetting connector_user)
|
327
|
-
CON_PW=$(getSetting connector_password)
|
328
|
-
CON_PORT=$(getSetting connector_listen_port)
|
329
|
-
CHECK=$(timeout -s HUP $CONNECTOR_TIMEOUT $MYSQL -P$CON_PORT -u $CON_USER -p$CON_PW -h $HOSTNAME --skip-column-names -Be"select 'ALIVE'")
|
330
|
-
if [ "$CHECK" != 'ALIVE' ]
|
331
|
-
then
|
332
|
-
error 'Unable to connect to connector'
|
333
|
-
echo "Connector is not responding on $HOST - Investigate" >> $LOG
|
334
|
-
connector_ok=0
|
335
|
-
if [ ! -f $LOCK_DIR/con_responding.lck ]
|
336
|
-
then
|
337
|
-
SENDMAIL=1
|
338
|
-
touch $LOCK_DIR/con_responding.lck
|
339
|
-
else
|
340
|
-
info "Not sending Email lock file exists"
|
341
|
-
fi
|
342
|
-
else
|
343
|
-
info 'Connector is alive'
|
344
|
-
connector_ok_to_allow_elb=1
|
345
|
-
fi
|
346
|
-
fi
|
347
|
-
fi
|
348
|
-
|
349
|
-
if [ $CHECK_ELB == 1 ]
|
350
|
-
then
|
351
|
-
if [ -f /etc/xinetd.d/disabled/connectorchk ] && [ $connector_ok_to_allow_elb == 1 ]
|
352
|
-
then
|
353
|
-
|
354
|
-
sudo mv /etc/xinetd.d/disabled/connectorchk /etc/xinetd.d/
|
355
|
-
sudo service xinetd reload
|
356
|
-
fi
|
357
|
-
fi
|
358
|
-
fi
|
359
|
-
|
360
|
-
#Check the cluster Status
|
361
|
-
if [ $CLUSTER == 1 ]
|
362
|
-
then
|
363
|
-
#Check the processes are running
|
364
|
-
info "Running Cluster Tests"
|
365
|
-
REPL=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -r| grep -v OK | wc -l)
|
366
|
-
if [ $REPL -ne 0 ]
|
367
|
-
then
|
368
|
-
error " Replicator or Manager in cluster is not running"
|
369
|
-
echo "Replicator or Manager in cluster is not running on $HOST - Investigate" >> $LOG
|
370
|
-
if [ ! -f $LOCK_DIR/rep_running.lck ]
|
371
|
-
then
|
372
|
-
SENDMAIL=1
|
373
|
-
touch $LOCK_DIR/rep_running.lck
|
374
|
-
else
|
375
|
-
info "Not sending Email lock file exists"
|
376
|
-
fi
|
377
|
-
|
378
|
-
else
|
379
|
-
info "Replicator and Manager in cluster are running OK"
|
380
|
-
fi
|
381
|
-
|
382
|
-
#Check the processes are online
|
383
|
-
if [ "$SERVICES" == "" ]
|
384
|
-
then
|
385
|
-
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online | grep -v OK | wc -l)
|
386
|
-
if [ $ONLINE -ne 0 ]
|
387
|
-
then
|
388
|
-
error "Services are not online"
|
389
|
-
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
390
|
-
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
391
|
-
then
|
392
|
-
SENDMAIL=1
|
393
|
-
touch $LOCK_DIR/rep_online.lck
|
394
|
-
else
|
395
|
-
info "Not sending Email lock file exists"
|
396
|
-
fi
|
397
|
-
|
398
|
-
else
|
399
|
-
info "Services are online"
|
400
|
-
fi
|
401
|
-
else
|
402
|
-
services=$(echo "$SERVICES" | sed 's/,/ /g')
|
403
|
-
for s in $services
|
404
|
-
do
|
405
|
-
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online -s $s | grep -v OK | wc -l)
|
406
|
-
if [ $ONLINE -ne 0 ]
|
407
|
-
then
|
408
|
-
error "Services are not online @ $s"
|
409
|
-
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
410
|
-
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
411
|
-
then
|
412
|
-
SENDMAIL=1
|
413
|
-
touch $LOCK_DIR/rep_online.lck
|
414
|
-
else
|
415
|
-
info "Not sending Email lock file exists"
|
416
|
-
fi
|
417
|
-
|
418
|
-
else
|
419
|
-
info "Services are online @ $s"
|
420
|
-
fi
|
421
|
-
done
|
422
|
-
fi
|
423
|
-
|
424
|
-
#Check for replicator latency
|
425
|
-
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_latency -w $LAG -c $LAG | grep -v OK | wc -l)
|
426
|
-
if [ $ONLINE -ne 0 ]
|
427
|
-
then
|
428
|
-
error "Services are Lagging"
|
429
|
-
echo "Cluster Replicator processes are lagging on $HOST - Investigate" >> $LOG
|
430
|
-
if [ ! -f $LOCK_DIR/rep_lag.lck ]
|
431
|
-
then
|
432
|
-
SENDMAIL=1
|
433
|
-
touch $LOCK_DIR/rep_lag.lck
|
434
|
-
else
|
435
|
-
info "Not sending Email lock file exists"
|
436
|
-
fi
|
437
|
-
|
438
|
-
else
|
439
|
-
info "Cluster Replicator is keeping up"
|
440
|
-
fi
|
441
|
-
fi
|
442
|
-
|
443
|
-
#Check the Replicator
|
444
|
-
if [ $REPLICATOR == 1 ]
|
445
|
-
then
|
446
|
-
if [ ! -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
447
|
-
then
|
448
|
-
severe "trepctl not found in $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/ "
|
449
|
-
fi
|
450
|
-
|
451
|
-
AVAILABLE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services | grep "Connection failed" | wc -l)
|
452
|
-
if [ $AVAILABLE -gt 0 ]
|
453
|
-
then
|
454
|
-
error "Replicator process is not running on $REPLICATOR_PORT"
|
455
|
-
echo "Replicator processes is not running on $HOST:$REPLICATOR_PORT - Investigate" >> $LOG
|
456
|
-
if [ ! -f $LOCK_DIR/tr_rep_running.lck ]
|
457
|
-
then
|
458
|
-
SENDMAIL=1
|
459
|
-
touch $LOCK_DIR/tr_rep_running.lck
|
460
|
-
else
|
461
|
-
info "Not sending Email lock file exists"
|
462
|
-
fi
|
463
|
-
|
464
|
-
else
|
465
|
-
info "TR Replicator is running"
|
466
|
-
fi
|
467
|
-
|
468
|
-
|
469
|
-
ONLINE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services| grep state | grep -v ONLINE | wc -l)
|
470
|
-
if [ $ONLINE -gt 0 ]
|
471
|
-
then
|
472
|
-
error "Replicator is down"
|
473
|
-
echo "Replicator processes is not ONLINE on $HOST - Investigate" >> $LOG
|
474
|
-
if [ ! -f $LOCK_DIR/tr_rep_online.lck ]
|
475
|
-
then
|
476
|
-
SENDMAIL=1
|
477
|
-
touch $LOCK_DIR/tr_rep_online.lck
|
478
|
-
else
|
479
|
-
info "Not sending Email lock file exists"
|
480
|
-
fi
|
481
|
-
|
482
|
-
else
|
483
|
-
info "TR Replicator is online"
|
484
|
-
fi
|
485
|
-
|
486
|
-
#Check for latency
|
487
|
-
LATENCY_LIST=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services|grep appliedLatency|cut -d ':' -f2)
|
488
|
-
|
489
|
-
for LATENCY in $LATENCY_LIST
|
490
|
-
do
|
491
|
-
if float_cond "$LATENCY > $LAG"; then
|
492
|
-
error "Replicator is lagging"
|
493
|
-
echo "Replicator processes is behind on $HOST - Investigate" >> $LOG
|
494
|
-
if [ ! -f $LOCK_DIR/tr_rep_lag.lck ]
|
495
|
-
then
|
496
|
-
SENDMAIL=1
|
497
|
-
touch $LOCK_DIR/tr_rep_lag.lck
|
498
|
-
else
|
499
|
-
info "Not sending Email lock file exists"
|
500
|
-
fi
|
501
|
-
else
|
502
|
-
info "Replicator latency ok"
|
503
|
-
fi
|
504
|
-
done
|
505
|
-
|
506
|
-
|
507
|
-
fi
|
508
|
-
|
509
|
-
#Check the disk space
|
510
|
-
if [ $DISK == 1 ]
|
511
|
-
then
|
512
|
-
|
513
|
-
df -HP | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
|
514
|
-
do
|
515
|
-
usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
|
516
|
-
partition=$(echo $output | awk '{ print $2 }' )
|
517
|
-
if [ $usep -ge $DISK_WARNING ]; then
|
518
|
-
error "Running out of disk space on $partition"
|
519
|
-
echo "Running out for disk space on $HOST $partition - Investigate" >> $LOG
|
520
|
-
if [ ! -f $LOCK_DIR/disk.lck ]
|
521
|
-
then
|
522
|
-
SENDMAIL=1
|
523
|
-
touch $LOCK_DIR/disk.lck
|
524
|
-
else
|
525
|
-
info "Not sending Email lock file exists"
|
526
|
-
fi
|
527
|
-
fi
|
528
|
-
done
|
529
|
-
|
530
|
-
fi
|
531
|
-
|
532
|
-
if [ $SENDMAIL == 1 ]
|
533
|
-
then
|
534
|
-
if [ $DEBUG == 1 ]
|
535
|
-
then
|
536
|
-
info "Sending Email to $EMAIL"
|
537
|
-
info "Subject $SUBJECT"
|
538
|
-
cat $LOG
|
539
|
-
fi
|
540
|
-
|
541
|
-
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
542
|
-
then
|
543
|
-
manager_running=$($CONTINUENT_ROOT/tungsten/tungsten-manager/bin/manager status | grep "PID" | wc -l)
|
544
|
-
if [ $manager_running -eq 1 ]; then
|
545
|
-
info "Adding cctrl output to email"
|
546
|
-
echo >> $LOG
|
547
|
-
echo "OUTPUT FROM cctrl ls on $HOST" >> $LOG
|
548
|
-
echo '--------------------------------------------------' >> $LOG
|
549
|
-
echo 'ls' | $CONTINUENT_ROOT/tungsten/tungsten-manager/bin/cctrl -expert >> $LOG
|
550
|
-
echo '--------------------------------------------------' >> $LOG
|
551
|
-
else
|
552
|
-
info 'Manager not running skipping cctrl output'
|
553
|
-
echo "Manager not running unable to gather cctrl output" >> $LOG
|
554
|
-
fi
|
555
|
-
|
556
|
-
fi
|
557
|
-
if [ $REPLICATOR == 1 ]
|
558
|
-
then
|
559
|
-
if [ -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
560
|
-
then
|
561
|
-
info "Adding trepctl output to email"
|
562
|
-
echo "OUTPUT FROM trepctl -port $REPLICATOR_PORT status on $HOST" >> $LOG
|
563
|
-
echo '--------------------------------------------------' >> $LOG
|
564
|
-
|
565
|
-
|
566
|
-
$REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services >> $LOG
|
567
|
-
echo '--------------------------------------------------' >> $LOG
|
568
|
-
else
|
569
|
-
info 'trepctl not found'
|
570
|
-
echo "trepctl not found at $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl unable to query for output" >> $LOG
|
571
|
-
fi
|
572
|
-
fi
|
573
|
-
$MAILPROG -s "$SUBJECT" "$EMAIL" < $LOG
|
574
|
-
fi
|
575
|
-
|
576
|
-
|