continuent-monitors-nagios 0.0.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tungsten_nagios_backups +140 -0
- data/bin/tungsten_nagios_connector +10 -3
- data/bin/tungsten_nagios_latency +159 -0
- data/bin/tungsten_nagios_monitor_threads +2 -0
- data/bin/tungsten_nagios_online +134 -0
- data/bin/tungsten_nagios_policy +61 -0
- data/bin/tungsten_nagios_progress +105 -0
- data/bin/tungsten_nagios_services +75 -0
- metadata +16 -18
- data/bin/check_tungsten.sh +0 -576
- data/bin/check_tungsten_backups +0 -70
- data/bin/check_tungsten_latency +0 -172
- data/bin/check_tungsten_online +0 -105
- data/bin/check_tungsten_policy +0 -61
- data/bin/check_tungsten_progress +0 -81
- data/bin/check_tungsten_services +0 -95
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorPolicy
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
unless TI.is_manager?()
|
35
|
+
critical("The server is not a Continuent Tungsten Manager")
|
36
|
+
end
|
37
|
+
|
38
|
+
unless TI.is_running?("manager")
|
39
|
+
critical("The Continuent Tungsten Manager is not running")
|
40
|
+
end
|
41
|
+
|
42
|
+
status = TI.status()
|
43
|
+
if status.policy() == "AUTOMATIC"
|
44
|
+
ok("Cluster is in #{status.policy()} mode")
|
45
|
+
else
|
46
|
+
critical("Cluster is in #{status.policy()} mode")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def configure
|
51
|
+
super()
|
52
|
+
|
53
|
+
description("Check that the local cluster is running in the AUTOMATIC policy")
|
54
|
+
end
|
55
|
+
|
56
|
+
def script_name
|
57
|
+
"tungsten_nagios_policy"
|
58
|
+
end
|
59
|
+
|
60
|
+
self.new().run()
|
61
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorProgress
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
unless TI.is_replicator?()
|
35
|
+
critical("The server is not a Tungsten Replicator")
|
36
|
+
end
|
37
|
+
|
38
|
+
unless TI.is_running?("replicator")
|
39
|
+
critical("The Tungsten Replicator is not running")
|
40
|
+
end
|
41
|
+
|
42
|
+
if TI.is_commercial?()
|
43
|
+
unless TI.is_manager?()
|
44
|
+
critical("The server is not a Continuent Tungsten Manager")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("manager")
|
48
|
+
critical("The Continuent Tungsten Manager is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
unless TI.trepctl_value(opt(:service), "state") == "ONLINE"
|
58
|
+
critical("The #{opt(:service)} replication service is not ONLINE")
|
59
|
+
end
|
60
|
+
|
61
|
+
pre_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
|
62
|
+
|
63
|
+
if TI.is_commercial?()
|
64
|
+
TI.ensure_cctrl("cluster heartbeat")
|
65
|
+
end
|
66
|
+
|
67
|
+
if opt(:delay).is_a?(Integer)
|
68
|
+
TU.debug("Go to sleep for #{opt(:delay)} seconds")
|
69
|
+
sleep(opt(:delay))
|
70
|
+
end
|
71
|
+
|
72
|
+
post_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
|
73
|
+
difference = post_seqno - pre_seqno
|
74
|
+
|
75
|
+
if difference > 0
|
76
|
+
ok("Tungsten Replicator #{opt(:service)} service is making progress")
|
77
|
+
else
|
78
|
+
critical("Tungsten Replicator #{opt(:service)} service did not show progress")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def configure
|
83
|
+
super()
|
84
|
+
|
85
|
+
description("Check that the replication service is making progress. For Continuent Tungsten installations, a heartbeat command will be run to force activity.")
|
86
|
+
|
87
|
+
add_option(:delay, {
|
88
|
+
:on => "--delay String",
|
89
|
+
:help => "The number of seconds to wait when monitoring progress",
|
90
|
+
:parse => method(:parse_integer_option),
|
91
|
+
:default => 1
|
92
|
+
})
|
93
|
+
|
94
|
+
add_option(:service, {
|
95
|
+
:on => "--service String",
|
96
|
+
:help => "The replication service or cluster to check"
|
97
|
+
})
|
98
|
+
end
|
99
|
+
|
100
|
+
def script_name
|
101
|
+
"tungsten_nagios_progress"
|
102
|
+
end
|
103
|
+
|
104
|
+
self.new().run()
|
105
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorServices
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
missing_services = []
|
35
|
+
checked_services = []
|
36
|
+
|
37
|
+
[
|
38
|
+
"replicator",
|
39
|
+
"manager",
|
40
|
+
"connector"
|
41
|
+
].each{
|
42
|
+
|svc|
|
43
|
+
|
44
|
+
unless TI.setting(TI.setting_key(HOSTS, "host_enable_" + svc)) == "true"
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
checked_services << svc
|
49
|
+
|
50
|
+
unless TI.is_running?(svc)
|
51
|
+
missing_services << svc
|
52
|
+
end
|
53
|
+
}
|
54
|
+
|
55
|
+
if missing_services.size() > 0
|
56
|
+
critical("#{missing_services.join(', ')} #{TU.pluralize(missing_services, 'is', 'are')} not running")
|
57
|
+
elsif checked_services.size() == 0
|
58
|
+
critical("No services were checked")
|
59
|
+
else
|
60
|
+
ok("All services (#{checked_services.join(', ')}) are running")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def configure
|
65
|
+
super()
|
66
|
+
|
67
|
+
description("Check that all configured services for the installation are running.")
|
68
|
+
end
|
69
|
+
|
70
|
+
def script_name
|
71
|
+
"tungsten_nagios_services"
|
72
|
+
end
|
73
|
+
|
74
|
+
self.new().run()
|
75
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: continuent-monitors-nagios
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Continuent
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: continuent-tools-monitoring
|
@@ -16,38 +16,36 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.5.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.5.0
|
27
27
|
description:
|
28
28
|
email: info@continuent.com
|
29
29
|
executables:
|
30
|
-
-
|
31
|
-
- check_tungsten_backups
|
32
|
-
- check_tungsten_latency
|
33
|
-
- check_tungsten_online
|
34
|
-
- check_tungsten_policy
|
35
|
-
- check_tungsten_progress
|
36
|
-
- check_tungsten_services
|
30
|
+
- tungsten_nagios_backups
|
37
31
|
- tungsten_nagios_connector
|
32
|
+
- tungsten_nagios_latency
|
38
33
|
- tungsten_nagios_monitor_threads
|
34
|
+
- tungsten_nagios_online
|
35
|
+
- tungsten_nagios_policy
|
36
|
+
- tungsten_nagios_progress
|
37
|
+
- tungsten_nagios_services
|
39
38
|
extensions: []
|
40
39
|
extra_rdoc_files: []
|
41
40
|
files:
|
42
|
-
- bin/
|
43
|
-
- bin/check_tungsten_backups
|
44
|
-
- bin/check_tungsten_latency
|
45
|
-
- bin/check_tungsten_online
|
46
|
-
- bin/check_tungsten_policy
|
47
|
-
- bin/check_tungsten_progress
|
48
|
-
- bin/check_tungsten_services
|
41
|
+
- bin/tungsten_nagios_backups
|
49
42
|
- bin/tungsten_nagios_connector
|
43
|
+
- bin/tungsten_nagios_latency
|
50
44
|
- bin/tungsten_nagios_monitor_threads
|
45
|
+
- bin/tungsten_nagios_online
|
46
|
+
- bin/tungsten_nagios_policy
|
47
|
+
- bin/tungsten_nagios_progress
|
48
|
+
- bin/tungsten_nagios_services
|
51
49
|
- LICENSE
|
52
50
|
- README.md
|
53
51
|
homepage: https://github.com/continuent/continuent-monitors-nagios
|
data/bin/check_tungsten.sh
DELETED
@@ -1,576 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
|
3
|
-
#TODO
|
4
|
-
#Work out the cluster names in a Composite DS
|
5
|
-
#determine the individulal services in a replicator so we can print out better output (status on each)
|
6
|
-
#Remove host logging - stop duplicate emails across multi hosts?
|
7
|
-
|
8
|
-
HOST=`hostname`
|
9
|
-
|
10
|
-
#Start Configuration Options. - These can be overridden by command line options or from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
11
|
-
CONNECTOR=0 #If this host is running a connector set to 1 otherwise 0
|
12
|
-
CLUSTER=0 #If this host is running a cluster set to 1 otherwise 0
|
13
|
-
REPLICATOR=0 #If this host is running a replicator set to 1 otherwise 0
|
14
|
-
REPLICATOR_PORT=10000 #Replicator Port
|
15
|
-
REPLICATOR_HOME=/opt/continuent/ #Home dir for Replicator
|
16
|
-
SERVICES='' #Name of the individual clusters in a composite DS
|
17
|
-
EMAIL='' #Set email address here or pass via the email= command line option
|
18
|
-
DISK=0 #Check Disk space
|
19
|
-
CHECK_ELB=0 #Enable check for ELB socket check
|
20
|
-
|
21
|
-
SUBJECT="Error : Problems exist with the Tungsten Services on $HOST"
|
22
|
-
LOCK_TIMEOUT=180 # Only send a Email every x minutes for a specific
|
23
|
-
# problem, stop spamming from the script
|
24
|
-
LAG=60 # Slave lag to report on
|
25
|
-
CONNECTOR_TIMEOUT=10 # No of seconds to wait for a connector response
|
26
|
-
DISK_WARNING=80 # % full to send a warning
|
27
|
-
SENDMAILBIN=/usr/sbin/sendmail
|
28
|
-
#End Configuration Options
|
29
|
-
|
30
|
-
SENDMAIL=0
|
31
|
-
DEBUG=0
|
32
|
-
LOG=/opt/continuent/share/check_tungsten.log
|
33
|
-
LOCK_DIR=/opt/continuent/share/tungsten_locks
|
34
|
-
|
35
|
-
function float_cond()
|
36
|
-
{
|
37
|
-
local cond=0
|
38
|
-
if [[ $# -gt 0 ]]; then
|
39
|
-
cond=$(echo "$*" | bc -q 2>&1)
|
40
|
-
if [[ $? -ne 0 ]]; then
|
41
|
-
echo "Error: $cond"
|
42
|
-
exit 1
|
43
|
-
fi
|
44
|
-
if [[ -z "$cond" ]]; then cond=0; fi
|
45
|
-
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
46
|
-
fi
|
47
|
-
local stat=$((cond == 0))
|
48
|
-
return $stat
|
49
|
-
}
|
50
|
-
|
51
|
-
info ()
|
52
|
-
{
|
53
|
-
if [ $DEBUG == 1 ]; then echo "INFO : $1"; fi
|
54
|
-
}
|
55
|
-
error ()
|
56
|
-
{
|
57
|
-
if [ $DEBUG == 1 ]; then echo "ERROR : $1"; fi
|
58
|
-
}
|
59
|
-
severe ()
|
60
|
-
{
|
61
|
-
echo "SEVERE : $1"
|
62
|
-
exit 1
|
63
|
-
}
|
64
|
-
getSetting ()
|
65
|
-
{
|
66
|
-
CFG=$CONTINUENT_ROOT/conf/tungsten.cfg
|
67
|
-
if [ ! -f $CFG ]
|
68
|
-
then
|
69
|
-
severe "Unable to find $CFG"
|
70
|
-
fi
|
71
|
-
getSettingValue=""
|
72
|
-
getSettingValue=$(grep "\"$1\"" $CFG| cut -d ':' -f2 | head -1|sed 's/,//g'|sed 's/"//g'|sed 's/ //g')
|
73
|
-
if [ -z $getSettingValue ]
|
74
|
-
then
|
75
|
-
severe "Unable to find $1 in $CFG"
|
76
|
-
fi
|
77
|
-
if [ "$getSettingValue" == '' ]
|
78
|
-
then
|
79
|
-
severe "Unable to find $1 in $CFG"
|
80
|
-
fi
|
81
|
-
echo "$getSettingValue"
|
82
|
-
}
|
83
|
-
|
84
|
-
|
85
|
-
# Load any continuent variables
|
86
|
-
|
87
|
-
if [ -z $CONTINUENT_ROOT ]
|
88
|
-
then
|
89
|
-
[ -f "$HOME/.bash_profile" ] && . "$HOME/.bash_profile"
|
90
|
-
[ -f "$HOME/.profile" ] && . "$HOME/.profile"
|
91
|
-
fi
|
92
|
-
|
93
|
-
function sOpt()
|
94
|
-
{
|
95
|
-
$1=1
|
96
|
-
info "$1 switched on via command line"
|
97
|
-
}
|
98
|
-
|
99
|
-
function mOpt()
|
100
|
-
{
|
101
|
-
|
102
|
-
for i in $(echo $2 | tr "=" "\n")
|
103
|
-
do
|
104
|
-
if [ $i != '$3' ]
|
105
|
-
then
|
106
|
-
$1=$i
|
107
|
-
fi
|
108
|
-
done
|
109
|
-
}
|
110
|
-
|
111
|
-
#Parse the command line options
|
112
|
-
|
113
|
-
for arg in "$@"
|
114
|
-
do
|
115
|
-
case "$arg" in
|
116
|
-
-v) DEBUG=1
|
117
|
-
info "Debug mode set"
|
118
|
-
;;
|
119
|
-
-vv) DEBUG=1
|
120
|
-
info "INFO : Extended Debug mode set"
|
121
|
-
set -x
|
122
|
-
;;
|
123
|
-
cluster) CLUSTER=1
|
124
|
-
info "CLUSTER switched on via command line"
|
125
|
-
;;
|
126
|
-
connector) CONNECTOR=1
|
127
|
-
info "CONNECTOR switched on via command line"
|
128
|
-
;;
|
129
|
-
replicator) REPLICATOR=1
|
130
|
-
info "REPLICATOR switched on via command line"
|
131
|
-
;;
|
132
|
-
check_elb) CHECK_ELB=1
|
133
|
-
info "CHECK_ELB switched on via command line"
|
134
|
-
;;
|
135
|
-
replicator_port*) for i in $(echo $arg | tr "=" "\n")
|
136
|
-
do
|
137
|
-
if [ $i != 'replicator_port' ]
|
138
|
-
then
|
139
|
-
REPLICATOR_PORT=$i
|
140
|
-
fi
|
141
|
-
done
|
142
|
-
|
143
|
-
info "REPLICATOR_PORT - $REPLICATOR_PORT - switched on via command line"
|
144
|
-
;;
|
145
|
-
replicator_home*) for i in $(echo $arg | tr "=" "\n")
|
146
|
-
do
|
147
|
-
if [ $i != 'replicator_home' ]
|
148
|
-
then
|
149
|
-
REPLICATOR_HOME=$i
|
150
|
-
fi
|
151
|
-
done
|
152
|
-
|
153
|
-
info "REPLICATOR_HOME - $REPLICATOR_HOME - switched on via command line"
|
154
|
-
;;
|
155
|
-
services*) for i in $(echo $arg | tr "=" "\n")
|
156
|
-
do
|
157
|
-
if [ $i != 'services' ]
|
158
|
-
then
|
159
|
-
SERVICES=$i
|
160
|
-
fi
|
161
|
-
done
|
162
|
-
info "SERVICES $SERVICES passed via the command line"
|
163
|
-
;;
|
164
|
-
email*) for i in $(echo $arg | tr "=" "\n")
|
165
|
-
do
|
166
|
-
if [ $i != 'email' ]
|
167
|
-
then
|
168
|
-
EMAIL=$i
|
169
|
-
fi
|
170
|
-
done
|
171
|
-
info "EMAIL $EMAIL passed via the command line"
|
172
|
-
;;
|
173
|
-
config*) for i in $(echo $arg | tr "=" "\n")
|
174
|
-
do
|
175
|
-
if [ $i != 'config' ]
|
176
|
-
then
|
177
|
-
FILE=$i
|
178
|
-
fi
|
179
|
-
done
|
180
|
-
info "Config File $FILE passed via the command line"
|
181
|
-
;;
|
182
|
-
disk) DISK=1
|
183
|
-
info "DISK switched on via command line"
|
184
|
-
;;
|
185
|
-
*)
|
186
|
-
echo "Unknown command line option passed $arg"
|
187
|
-
echo "Valid options are -v,cluster,connector,replicator,replicator_port=??,services=??,email=??,config=??"
|
188
|
-
exit 1
|
189
|
-
esac
|
190
|
-
|
191
|
-
|
192
|
-
done
|
193
|
-
|
194
|
-
|
195
|
-
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
196
|
-
then
|
197
|
-
if [ -z $CONTINUENT_ROOT ]
|
198
|
-
then
|
199
|
-
severe "$CONTINUENT_ROOT is not set - unable to continue"
|
200
|
-
fi
|
201
|
-
if [ ! -f $CONTINUENT_ROOT/share/env.sh ]
|
202
|
-
then
|
203
|
-
severe "Unable to find env.sh in $CONTINUENT_ROOT/share"
|
204
|
-
fi
|
205
|
-
|
206
|
-
. "$CONTINUENT_ROOT/share/env.sh"
|
207
|
-
|
208
|
-
#Load any default settings from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
209
|
-
CFG=$CONTINUENT_ROOT/share/check_tungsten.cfg
|
210
|
-
|
211
|
-
if [ -f $CFG ]
|
212
|
-
then
|
213
|
-
info "Loading settings from $CFG"
|
214
|
-
. "$CFG"
|
215
|
-
fi
|
216
|
-
if [ -z "$MYSQL" ]
|
217
|
-
then
|
218
|
-
MYSQL=`which mysql 2>/dev/null`
|
219
|
-
|
220
|
-
if [ "$MYSQL" == "" ]
|
221
|
-
then
|
222
|
-
severe " Unable to the mysql command line program"
|
223
|
-
fi
|
224
|
-
fi
|
225
|
-
fi
|
226
|
-
|
227
|
-
#If a file is passed from the command line load any variables from there
|
228
|
-
if [ ! -z $FILE ]
|
229
|
-
then
|
230
|
-
if [ ! -f $FILE ]
|
231
|
-
then
|
232
|
-
severe "The file specified in the command line $FILE does not exist"
|
233
|
-
fi
|
234
|
-
|
235
|
-
info "Loading settings from $FILE"
|
236
|
-
. "$FILE"
|
237
|
-
fi
|
238
|
-
|
239
|
-
#Parameter and host validation
|
240
|
-
|
241
|
-
BC=`which bc 2>/dev/null`
|
242
|
-
|
243
|
-
if [ "$BC" == "" ]
|
244
|
-
then
|
245
|
-
severe " Unable to find the command bc - please install"
|
246
|
-
fi
|
247
|
-
|
248
|
-
|
249
|
-
if [ "$EMAIL" == "" ]
|
250
|
-
then
|
251
|
-
severe " email must be specified"
|
252
|
-
fi
|
253
|
-
|
254
|
-
if [[ "$CONNECTOR" == 0 && "$CLUSTER" == 0 && "$REPLICATOR" == 0 ]]
|
255
|
-
then
|
256
|
-
severe " No option specified, select either connector, cluster or replicator"
|
257
|
-
fi
|
258
|
-
|
259
|
-
if [ -d $LOCK_DIR ]
|
260
|
-
then
|
261
|
-
if [ ! -w $LOCK_DIR ]
|
262
|
-
then
|
263
|
-
severe " The locks dir $LOCK_DIR is not writable"
|
264
|
-
fi
|
265
|
-
else
|
266
|
-
info "Creating locks dir"
|
267
|
-
mkdir $LOCK_DIR
|
268
|
-
fi
|
269
|
-
|
270
|
-
if [ -z "$MAILPROG" ]
|
271
|
-
then
|
272
|
-
MAILPROG=`which mail 2>/dev/null`
|
273
|
-
|
274
|
-
if [ "$MAILPROG" == "" ]
|
275
|
-
then
|
276
|
-
severe " Unable to find a mail program"
|
277
|
-
fi
|
278
|
-
fi
|
279
|
-
|
280
|
-
if [ -z "$SENDMAILBIN" ]
|
281
|
-
then
|
282
|
-
SENDMAILBIN=`which sendmail 2>/dev/null`
|
283
|
-
|
284
|
-
if [ "$SENDMAILBIN" == "" ]
|
285
|
-
then
|
286
|
-
severe " Unable to find a sendmail program"
|
287
|
-
fi
|
288
|
-
fi
|
289
|
-
|
290
|
-
if [ -f $LOG ]
|
291
|
-
then
|
292
|
-
rm $LOG
|
293
|
-
fi
|
294
|
-
|
295
|
-
#Expire old Locks
|
296
|
-
info "Deleting Locks older than $LOCK_TIMEOUT min"
|
297
|
-
find $LOCK_DIR/* -type f -mmin +$LOCK_TIMEOUT -delete 2> /dev/null
|
298
|
-
|
299
|
-
#Check the connector status
|
300
|
-
if [ $CONNECTOR == 1 ]
|
301
|
-
then
|
302
|
-
connector_ok_to_allow_elb=0
|
303
|
-
info "Running Connector Tests"
|
304
|
-
CONN=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -c| grep -v OK | wc -l)
|
305
|
-
if [ $CONN -ne 0 ]
|
306
|
-
then
|
307
|
-
error " Connector is not running"
|
308
|
-
echo "Connector is not running on $HOST - Investigate" >> $LOG
|
309
|
-
if [ ! -f $LOCK_DIR/con_running.lck ]
|
310
|
-
then
|
311
|
-
SENDMAIL=1
|
312
|
-
touch $LOCK_DIR/con_running.lck
|
313
|
-
else
|
314
|
-
info "Not sending Email lock file exists"
|
315
|
-
fi
|
316
|
-
else
|
317
|
-
info "Connector is running OK"
|
318
|
-
|
319
|
-
TIMEOUT=`which timeout 2>/dev/null`
|
320
|
-
|
321
|
-
if [ "$TIMEOUT" == "" ]
|
322
|
-
then
|
323
|
-
info "timeout command not found - unable to check if the connector is responding"
|
324
|
-
else
|
325
|
-
info "Checking Connector is responding to queries"
|
326
|
-
CON_USER=$(getSetting connector_user)
|
327
|
-
CON_PW=$(getSetting connector_password)
|
328
|
-
CON_PORT=$(getSetting connector_listen_port)
|
329
|
-
CHECK=$(timeout -s HUP $CONNECTOR_TIMEOUT $MYSQL -P$CON_PORT -u $CON_USER -p$CON_PW -h $HOSTNAME --skip-column-names -Be"select 'ALIVE'")
|
330
|
-
if [ "$CHECK" != 'ALIVE' ]
|
331
|
-
then
|
332
|
-
error 'Unable to connect to connector'
|
333
|
-
echo "Connector is not responding on $HOST - Investigate" >> $LOG
|
334
|
-
connector_ok=0
|
335
|
-
if [ ! -f $LOCK_DIR/con_responding.lck ]
|
336
|
-
then
|
337
|
-
SENDMAIL=1
|
338
|
-
touch $LOCK_DIR/con_responding.lck
|
339
|
-
else
|
340
|
-
info "Not sending Email lock file exists"
|
341
|
-
fi
|
342
|
-
else
|
343
|
-
info 'Connector is alive'
|
344
|
-
connector_ok_to_allow_elb=1
|
345
|
-
fi
|
346
|
-
fi
|
347
|
-
fi
|
348
|
-
|
349
|
-
if [ $CHECK_ELB == 1 ]
|
350
|
-
then
|
351
|
-
if [ -f /etc/xinetd.d/disabled/connectorchk ] && [ $connector_ok_to_allow_elb == 1 ]
|
352
|
-
then
|
353
|
-
|
354
|
-
sudo mv /etc/xinetd.d/disabled/connectorchk /etc/xinetd.d/
|
355
|
-
sudo service xinetd reload
|
356
|
-
fi
|
357
|
-
fi
|
358
|
-
fi
|
359
|
-
|
360
|
-
#Check the cluster Status
|
361
|
-
if [ $CLUSTER == 1 ]
|
362
|
-
then
|
363
|
-
#Check the processes are running
|
364
|
-
info "Running Cluster Tests"
|
365
|
-
REPL=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -r| grep -v OK | wc -l)
|
366
|
-
if [ $REPL -ne 0 ]
|
367
|
-
then
|
368
|
-
error " Replicator or Manager in cluster is not running"
|
369
|
-
echo "Replicator or Manager in cluster is not running on $HOST - Investigate" >> $LOG
|
370
|
-
if [ ! -f $LOCK_DIR/rep_running.lck ]
|
371
|
-
then
|
372
|
-
SENDMAIL=1
|
373
|
-
touch $LOCK_DIR/rep_running.lck
|
374
|
-
else
|
375
|
-
info "Not sending Email lock file exists"
|
376
|
-
fi
|
377
|
-
|
378
|
-
else
|
379
|
-
info "Replicator and Manager in cluster are running OK"
|
380
|
-
fi
|
381
|
-
|
382
|
-
#Check the processes are online
|
383
|
-
if [ "$SERVICES" == "" ]
|
384
|
-
then
|
385
|
-
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online | grep -v OK | wc -l)
|
386
|
-
if [ $ONLINE -ne 0 ]
|
387
|
-
then
|
388
|
-
error "Services are not online"
|
389
|
-
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
390
|
-
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
391
|
-
then
|
392
|
-
SENDMAIL=1
|
393
|
-
touch $LOCK_DIR/rep_online.lck
|
394
|
-
else
|
395
|
-
info "Not sending Email lock file exists"
|
396
|
-
fi
|
397
|
-
|
398
|
-
else
|
399
|
-
info "Services are online"
|
400
|
-
fi
|
401
|
-
else
|
402
|
-
services=$(echo "$SERVICES" | sed 's/,/ /g')
|
403
|
-
for s in $services
|
404
|
-
do
|
405
|
-
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online -s $s | grep -v OK | wc -l)
|
406
|
-
if [ $ONLINE -ne 0 ]
|
407
|
-
then
|
408
|
-
error "Services are not online @ $s"
|
409
|
-
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
410
|
-
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
411
|
-
then
|
412
|
-
SENDMAIL=1
|
413
|
-
touch $LOCK_DIR/rep_online.lck
|
414
|
-
else
|
415
|
-
info "Not sending Email lock file exists"
|
416
|
-
fi
|
417
|
-
|
418
|
-
else
|
419
|
-
info "Services are online @ $s"
|
420
|
-
fi
|
421
|
-
done
|
422
|
-
fi
|
423
|
-
|
424
|
-
#Check for replicator latency
|
425
|
-
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_latency -w $LAG -c $LAG | grep -v OK | wc -l)
|
426
|
-
if [ $ONLINE -ne 0 ]
|
427
|
-
then
|
428
|
-
error "Services are Lagging"
|
429
|
-
echo "Cluster Replicator processes are lagging on $HOST - Investigate" >> $LOG
|
430
|
-
if [ ! -f $LOCK_DIR/rep_lag.lck ]
|
431
|
-
then
|
432
|
-
SENDMAIL=1
|
433
|
-
touch $LOCK_DIR/rep_lag.lck
|
434
|
-
else
|
435
|
-
info "Not sending Email lock file exists"
|
436
|
-
fi
|
437
|
-
|
438
|
-
else
|
439
|
-
info "Cluster Replicator is keeping up"
|
440
|
-
fi
|
441
|
-
fi
|
442
|
-
|
443
|
-
#Check the Replicator
|
444
|
-
if [ $REPLICATOR == 1 ]
|
445
|
-
then
|
446
|
-
if [ ! -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
447
|
-
then
|
448
|
-
severe "trepctl not found in $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/ "
|
449
|
-
fi
|
450
|
-
|
451
|
-
AVAILABLE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services | grep "Connection failed" | wc -l)
|
452
|
-
if [ $AVAILABLE -gt 0 ]
|
453
|
-
then
|
454
|
-
error "Replicator process is not running on $REPLICATOR_PORT"
|
455
|
-
echo "Replicator processes is not running on $HOST:$REPLICATOR_PORT - Investigate" >> $LOG
|
456
|
-
if [ ! -f $LOCK_DIR/tr_rep_running.lck ]
|
457
|
-
then
|
458
|
-
SENDMAIL=1
|
459
|
-
touch $LOCK_DIR/tr_rep_running.lck
|
460
|
-
else
|
461
|
-
info "Not sending Email lock file exists"
|
462
|
-
fi
|
463
|
-
|
464
|
-
else
|
465
|
-
info "TR Replicator is running"
|
466
|
-
fi
|
467
|
-
|
468
|
-
|
469
|
-
ONLINE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services| grep state | grep -v ONLINE | wc -l)
|
470
|
-
if [ $ONLINE -gt 0 ]
|
471
|
-
then
|
472
|
-
error "Replicator is down"
|
473
|
-
echo "Replicator processes is not ONLINE on $HOST - Investigate" >> $LOG
|
474
|
-
if [ ! -f $LOCK_DIR/tr_rep_online.lck ]
|
475
|
-
then
|
476
|
-
SENDMAIL=1
|
477
|
-
touch $LOCK_DIR/tr_rep_online.lck
|
478
|
-
else
|
479
|
-
info "Not sending Email lock file exists"
|
480
|
-
fi
|
481
|
-
|
482
|
-
else
|
483
|
-
info "TR Replicator is online"
|
484
|
-
fi
|
485
|
-
|
486
|
-
#Check for latency
|
487
|
-
LATENCY_LIST=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services|grep appliedLatency|cut -d ':' -f2)
|
488
|
-
|
489
|
-
for LATENCY in $LATENCY_LIST
|
490
|
-
do
|
491
|
-
if float_cond "$LATENCY > $LAG"; then
|
492
|
-
error "Replicator is lagging"
|
493
|
-
echo "Replicator processes is behind on $HOST - Investigate" >> $LOG
|
494
|
-
if [ ! -f $LOCK_DIR/tr_rep_lag.lck ]
|
495
|
-
then
|
496
|
-
SENDMAIL=1
|
497
|
-
touch $LOCK_DIR/tr_rep_lag.lck
|
498
|
-
else
|
499
|
-
info "Not sending Email lock file exists"
|
500
|
-
fi
|
501
|
-
else
|
502
|
-
info "Replicator latency ok"
|
503
|
-
fi
|
504
|
-
done
|
505
|
-
|
506
|
-
|
507
|
-
fi
|
508
|
-
|
509
|
-
#Check the disk space
|
510
|
-
if [ $DISK == 1 ]
|
511
|
-
then
|
512
|
-
|
513
|
-
df -HP | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
|
514
|
-
do
|
515
|
-
usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
|
516
|
-
partition=$(echo $output | awk '{ print $2 }' )
|
517
|
-
if [ $usep -ge $DISK_WARNING ]; then
|
518
|
-
error "Running out of disk space on $partition"
|
519
|
-
echo "Running out for disk space on $HOST $partition - Investigate" >> $LOG
|
520
|
-
if [ ! -f $LOCK_DIR/disk.lck ]
|
521
|
-
then
|
522
|
-
SENDMAIL=1
|
523
|
-
touch $LOCK_DIR/disk.lck
|
524
|
-
else
|
525
|
-
info "Not sending Email lock file exists"
|
526
|
-
fi
|
527
|
-
fi
|
528
|
-
done
|
529
|
-
|
530
|
-
fi
|
531
|
-
|
532
|
-
if [ $SENDMAIL == 1 ]
|
533
|
-
then
|
534
|
-
if [ $DEBUG == 1 ]
|
535
|
-
then
|
536
|
-
info "Sending Email to $EMAIL"
|
537
|
-
info "Subject $SUBJECT"
|
538
|
-
cat $LOG
|
539
|
-
fi
|
540
|
-
|
541
|
-
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
542
|
-
then
|
543
|
-
manager_running=$($CONTINUENT_ROOT/tungsten/tungsten-manager/bin/manager status | grep "PID" | wc -l)
|
544
|
-
if [ $manager_running -eq 1 ]; then
|
545
|
-
info "Adding cctrl output to email"
|
546
|
-
echo >> $LOG
|
547
|
-
echo "OUTPUT FROM cctrl ls on $HOST" >> $LOG
|
548
|
-
echo '--------------------------------------------------' >> $LOG
|
549
|
-
echo 'ls' | $CONTINUENT_ROOT/tungsten/tungsten-manager/bin/cctrl -expert >> $LOG
|
550
|
-
echo '--------------------------------------------------' >> $LOG
|
551
|
-
else
|
552
|
-
info 'Manager not running skipping cctrl output'
|
553
|
-
echo "Manager not running unable to gather cctrl output" >> $LOG
|
554
|
-
fi
|
555
|
-
|
556
|
-
fi
|
557
|
-
if [ $REPLICATOR == 1 ]
|
558
|
-
then
|
559
|
-
if [ -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
560
|
-
then
|
561
|
-
info "Adding trepctl output to email"
|
562
|
-
echo "OUTPUT FROM trepctl -port $REPLICATOR_PORT status on $HOST" >> $LOG
|
563
|
-
echo '--------------------------------------------------' >> $LOG
|
564
|
-
|
565
|
-
|
566
|
-
$REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services >> $LOG
|
567
|
-
echo '--------------------------------------------------' >> $LOG
|
568
|
-
else
|
569
|
-
info 'trepctl not found'
|
570
|
-
echo "trepctl not found at $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl unable to query for output" >> $LOG
|
571
|
-
fi
|
572
|
-
fi
|
573
|
-
$MAILPROG -s "$SUBJECT" "$EMAIL" < $LOG
|
574
|
-
fi
|
575
|
-
|
576
|
-
|