continuent-monitors-nagios 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/check_tungsten.sh +576 -0
- data/bin/check_tungsten_backups +70 -0
- data/bin/check_tungsten_latency +172 -0
- data/bin/check_tungsten_online +105 -0
- data/bin/check_tungsten_policy +61 -0
- data/bin/check_tungsten_progress +81 -0
- data/bin/check_tungsten_services +95 -0
- data/bin/tungsten_nagios_connector +91 -0
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a06f684ddc84caf2d0b1c859a33b5d34b5d2d63
|
4
|
+
data.tar.gz: a0bebc5b0902b9bbe71460e6335e757529b79c90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44f04b5c74c7409d300fdbed92c1e2bc010727549625801d859092693e9f7c49d75d7e927cc35ddfbef5ae17a7f0746c7dad7671a146ecec486ef4a1c760ce19
|
7
|
+
data.tar.gz: 9f713e08e75e95625284cf086e41ddf49469b8fd3b62cfa4bb89c7b17b1d9505db15ac110b55cb46b3a7df218cbf4c693a8029c1dadde1a40240d42f7ec55645
|
@@ -0,0 +1,576 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
#TODO
|
4
|
+
#Work out the cluster names in a Composite DS
|
5
|
+
#determine the individulal services in a replicator so we can print out better output (status on each)
|
6
|
+
#Remove host logging - stop duplicate emails across multi hosts?
|
7
|
+
|
8
|
+
HOST=`hostname`
|
9
|
+
|
10
|
+
#Start Configuration Options. - These can be overridden by command line options or from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
11
|
+
CONNECTOR=0 #If this host is running a connector set to 1 otherwise 0
|
12
|
+
CLUSTER=0 #If this host is running a cluster set to 1 otherwise 0
|
13
|
+
REPLICATOR=0 #If this host is running a replicator set to 1 otherwise 0
|
14
|
+
REPLICATOR_PORT=10000 #Replicator Port
|
15
|
+
REPLICATOR_HOME=/opt/continuent/ #Home dir for Replicator
|
16
|
+
SERVICES='' #Name of the individual clusters in a composite DS
|
17
|
+
EMAIL='' #Set email address here or pass via the email= command line option
|
18
|
+
DISK=0 #Check Disk space
|
19
|
+
CHECK_ELB=0 #Enable check for ELB socket check
|
20
|
+
|
21
|
+
SUBJECT="Error : Problems exist with the Tungsten Services on $HOST"
|
22
|
+
LOCK_TIMEOUT=180 # Only send a Email every x minutes for a specific
|
23
|
+
# problem, stop spamming from the script
|
24
|
+
LAG=60 # Slave lag to report on
|
25
|
+
CONNECTOR_TIMEOUT=10 # No of seconds to wait for a connector response
|
26
|
+
DISK_WARNING=80 # % full to send a warning
|
27
|
+
SENDMAILBIN=/usr/sbin/sendmail
|
28
|
+
#End Configuration Options
|
29
|
+
|
30
|
+
SENDMAIL=0
|
31
|
+
DEBUG=0
|
32
|
+
LOG=/opt/continuent/share/check_tungsten.log
|
33
|
+
LOCK_DIR=/opt/continuent/share/tungsten_locks
|
34
|
+
|
35
|
+
function float_cond()
|
36
|
+
{
|
37
|
+
local cond=0
|
38
|
+
if [[ $# -gt 0 ]]; then
|
39
|
+
cond=$(echo "$*" | bc -q 2>&1)
|
40
|
+
if [[ $? -ne 0 ]]; then
|
41
|
+
echo "Error: $cond"
|
42
|
+
exit 1
|
43
|
+
fi
|
44
|
+
if [[ -z "$cond" ]]; then cond=0; fi
|
45
|
+
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
46
|
+
fi
|
47
|
+
local stat=$((cond == 0))
|
48
|
+
return $stat
|
49
|
+
}
|
50
|
+
|
51
|
+
info ()
|
52
|
+
{
|
53
|
+
if [ $DEBUG == 1 ]; then echo "INFO : $1"; fi
|
54
|
+
}
|
55
|
+
error ()
|
56
|
+
{
|
57
|
+
if [ $DEBUG == 1 ]; then echo "ERROR : $1"; fi
|
58
|
+
}
|
59
|
+
severe ()
|
60
|
+
{
|
61
|
+
echo "SEVERE : $1"
|
62
|
+
exit 1
|
63
|
+
}
|
64
|
+
getSetting ()
|
65
|
+
{
|
66
|
+
CFG=$CONTINUENT_ROOT/conf/tungsten.cfg
|
67
|
+
if [ ! -f $CFG ]
|
68
|
+
then
|
69
|
+
severe "Unable to find $CFG"
|
70
|
+
fi
|
71
|
+
getSettingValue=""
|
72
|
+
getSettingValue=$(grep "\"$1\"" $CFG| cut -d ':' -f2 | head -1|sed 's/,//g'|sed 's/"//g'|sed 's/ //g')
|
73
|
+
if [ -z $getSettingValue ]
|
74
|
+
then
|
75
|
+
severe "Unable to find $1 in $CFG"
|
76
|
+
fi
|
77
|
+
if [ "$getSettingValue" == '' ]
|
78
|
+
then
|
79
|
+
severe "Unable to find $1 in $CFG"
|
80
|
+
fi
|
81
|
+
echo "$getSettingValue"
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
# Load any continuent variables
|
86
|
+
|
87
|
+
if [ -z $CONTINUENT_ROOT ]
|
88
|
+
then
|
89
|
+
[ -f "$HOME/.bash_profile" ] && . "$HOME/.bash_profile"
|
90
|
+
[ -f "$HOME/.profile" ] && . "$HOME/.profile"
|
91
|
+
fi
|
92
|
+
|
93
|
+
function sOpt()
|
94
|
+
{
|
95
|
+
$1=1
|
96
|
+
info "$1 switched on via command line"
|
97
|
+
}
|
98
|
+
|
99
|
+
function mOpt()
|
100
|
+
{
|
101
|
+
|
102
|
+
for i in $(echo $2 | tr "=" "\n")
|
103
|
+
do
|
104
|
+
if [ $i != '$3' ]
|
105
|
+
then
|
106
|
+
$1=$i
|
107
|
+
fi
|
108
|
+
done
|
109
|
+
}
|
110
|
+
|
111
|
+
#Parse the command line options
|
112
|
+
|
113
|
+
for arg in "$@"
|
114
|
+
do
|
115
|
+
case "$arg" in
|
116
|
+
-v) DEBUG=1
|
117
|
+
info "Debug mode set"
|
118
|
+
;;
|
119
|
+
-vv) DEBUG=1
|
120
|
+
info "INFO : Extended Debug mode set"
|
121
|
+
set -x
|
122
|
+
;;
|
123
|
+
cluster) CLUSTER=1
|
124
|
+
info "CLUSTER switched on via command line"
|
125
|
+
;;
|
126
|
+
connector) CONNECTOR=1
|
127
|
+
info "CONNECTOR switched on via command line"
|
128
|
+
;;
|
129
|
+
replicator) REPLICATOR=1
|
130
|
+
info "REPLICATOR switched on via command line"
|
131
|
+
;;
|
132
|
+
check_elb) CHECK_ELB=1
|
133
|
+
info "CHECK_ELB switched on via command line"
|
134
|
+
;;
|
135
|
+
replicator_port*) for i in $(echo $arg | tr "=" "\n")
|
136
|
+
do
|
137
|
+
if [ $i != 'replicator_port' ]
|
138
|
+
then
|
139
|
+
REPLICATOR_PORT=$i
|
140
|
+
fi
|
141
|
+
done
|
142
|
+
|
143
|
+
info "REPLICATOR_PORT - $REPLICATOR_PORT - switched on via command line"
|
144
|
+
;;
|
145
|
+
replicator_home*) for i in $(echo $arg | tr "=" "\n")
|
146
|
+
do
|
147
|
+
if [ $i != 'replicator_home' ]
|
148
|
+
then
|
149
|
+
REPLICATOR_HOME=$i
|
150
|
+
fi
|
151
|
+
done
|
152
|
+
|
153
|
+
info "REPLICATOR_HOME - $REPLICATOR_HOME - switched on via command line"
|
154
|
+
;;
|
155
|
+
services*) for i in $(echo $arg | tr "=" "\n")
|
156
|
+
do
|
157
|
+
if [ $i != 'services' ]
|
158
|
+
then
|
159
|
+
SERVICES=$i
|
160
|
+
fi
|
161
|
+
done
|
162
|
+
info "SERVICES $SERVICES passed via the command line"
|
163
|
+
;;
|
164
|
+
email*) for i in $(echo $arg | tr "=" "\n")
|
165
|
+
do
|
166
|
+
if [ $i != 'email' ]
|
167
|
+
then
|
168
|
+
EMAIL=$i
|
169
|
+
fi
|
170
|
+
done
|
171
|
+
info "EMAIL $EMAIL passed via the command line"
|
172
|
+
;;
|
173
|
+
config*) for i in $(echo $arg | tr "=" "\n")
|
174
|
+
do
|
175
|
+
if [ $i != 'config' ]
|
176
|
+
then
|
177
|
+
FILE=$i
|
178
|
+
fi
|
179
|
+
done
|
180
|
+
info "Config File $FILE passed via the command line"
|
181
|
+
;;
|
182
|
+
disk) DISK=1
|
183
|
+
info "DISK switched on via command line"
|
184
|
+
;;
|
185
|
+
*)
|
186
|
+
echo "Unknown command line option passed $arg"
|
187
|
+
echo "Valid options are -v,cluster,connector,replicator,replicator_port=??,services=??,email=??,config=??"
|
188
|
+
exit 1
|
189
|
+
esac
|
190
|
+
|
191
|
+
|
192
|
+
done
|
193
|
+
|
194
|
+
|
195
|
+
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
196
|
+
then
|
197
|
+
if [ -z $CONTINUENT_ROOT ]
|
198
|
+
then
|
199
|
+
severe "$CONTINUENT_ROOT is not set - unable to continue"
|
200
|
+
fi
|
201
|
+
if [ ! -f $CONTINUENT_ROOT/share/env.sh ]
|
202
|
+
then
|
203
|
+
severe "Unable to find env.sh in $CONTINUENT_ROOT/share"
|
204
|
+
fi
|
205
|
+
|
206
|
+
. "$CONTINUENT_ROOT/share/env.sh"
|
207
|
+
|
208
|
+
#Load any default settings from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
209
|
+
CFG=$CONTINUENT_ROOT/share/check_tungsten.cfg
|
210
|
+
|
211
|
+
if [ -f $CFG ]
|
212
|
+
then
|
213
|
+
info "Loading settings from $CFG"
|
214
|
+
. "$CFG"
|
215
|
+
fi
|
216
|
+
if [ -z "$MYSQL" ]
|
217
|
+
then
|
218
|
+
MYSQL=`which mysql 2>/dev/null`
|
219
|
+
|
220
|
+
if [ "$MYSQL" == "" ]
|
221
|
+
then
|
222
|
+
severe " Unable to the mysql command line program"
|
223
|
+
fi
|
224
|
+
fi
|
225
|
+
fi
|
226
|
+
|
227
|
+
#If a file is passed from the command line load any variables from there
|
228
|
+
if [ ! -z $FILE ]
|
229
|
+
then
|
230
|
+
if [ ! -f $FILE ]
|
231
|
+
then
|
232
|
+
severe "The file specified in the command line $FILE does not exist"
|
233
|
+
fi
|
234
|
+
|
235
|
+
info "Loading settings from $FILE"
|
236
|
+
. "$FILE"
|
237
|
+
fi
|
238
|
+
|
239
|
+
#Parameter and host validation
|
240
|
+
|
241
|
+
BC=`which bc 2>/dev/null`
|
242
|
+
|
243
|
+
if [ "$BC" == "" ]
|
244
|
+
then
|
245
|
+
severe " Unable to find the command bc - please install"
|
246
|
+
fi
|
247
|
+
|
248
|
+
|
249
|
+
if [ "$EMAIL" == "" ]
|
250
|
+
then
|
251
|
+
severe " email must be specified"
|
252
|
+
fi
|
253
|
+
|
254
|
+
if [[ "$CONNECTOR" == 0 && "$CLUSTER" == 0 && "$REPLICATOR" == 0 ]]
|
255
|
+
then
|
256
|
+
severe " No option specified, select either connector, cluster or replicator"
|
257
|
+
fi
|
258
|
+
|
259
|
+
if [ -d $LOCK_DIR ]
|
260
|
+
then
|
261
|
+
if [ ! -w $LOCK_DIR ]
|
262
|
+
then
|
263
|
+
severe " The locks dir $LOCK_DIR is not writable"
|
264
|
+
fi
|
265
|
+
else
|
266
|
+
info "Creating locks dir"
|
267
|
+
mkdir $LOCK_DIR
|
268
|
+
fi
|
269
|
+
|
270
|
+
if [ -z "$MAILPROG" ]
|
271
|
+
then
|
272
|
+
MAILPROG=`which mail 2>/dev/null`
|
273
|
+
|
274
|
+
if [ "$MAILPROG" == "" ]
|
275
|
+
then
|
276
|
+
severe " Unable to find a mail program"
|
277
|
+
fi
|
278
|
+
fi
|
279
|
+
|
280
|
+
if [ -z "$SENDMAILBIN" ]
|
281
|
+
then
|
282
|
+
SENDMAILBIN=`which sendmail 2>/dev/null`
|
283
|
+
|
284
|
+
if [ "$SENDMAILBIN" == "" ]
|
285
|
+
then
|
286
|
+
severe " Unable to find a sendmail program"
|
287
|
+
fi
|
288
|
+
fi
|
289
|
+
|
290
|
+
if [ -f $LOG ]
|
291
|
+
then
|
292
|
+
rm $LOG
|
293
|
+
fi
|
294
|
+
|
295
|
+
#Expire old Locks
|
296
|
+
info "Deleting Locks older than $LOCK_TIMEOUT min"
|
297
|
+
find $LOCK_DIR/* -type f -mmin +$LOCK_TIMEOUT -delete 2> /dev/null
|
298
|
+
|
299
|
+
#Check the connector status
|
300
|
+
if [ $CONNECTOR == 1 ]
|
301
|
+
then
|
302
|
+
connector_ok_to_allow_elb=0
|
303
|
+
info "Running Connector Tests"
|
304
|
+
CONN=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -c| grep -v OK | wc -l)
|
305
|
+
if [ $CONN -ne 0 ]
|
306
|
+
then
|
307
|
+
error " Connector is not running"
|
308
|
+
echo "Connector is not running on $HOST - Investigate" >> $LOG
|
309
|
+
if [ ! -f $LOCK_DIR/con_running.lck ]
|
310
|
+
then
|
311
|
+
SENDMAIL=1
|
312
|
+
touch $LOCK_DIR/con_running.lck
|
313
|
+
else
|
314
|
+
info "Not sending Email lock file exists"
|
315
|
+
fi
|
316
|
+
else
|
317
|
+
info "Connector is running OK"
|
318
|
+
|
319
|
+
TIMEOUT=`which timeout 2>/dev/null`
|
320
|
+
|
321
|
+
if [ "$TIMEOUT" == "" ]
|
322
|
+
then
|
323
|
+
info "timeout command not found - unable to check if the connector is responding"
|
324
|
+
else
|
325
|
+
info "Checking Connector is responding to queries"
|
326
|
+
CON_USER=$(getSetting connector_user)
|
327
|
+
CON_PW=$(getSetting connector_password)
|
328
|
+
CON_PORT=$(getSetting connector_listen_port)
|
329
|
+
CHECK=$(timeout -s HUP $CONNECTOR_TIMEOUT $MYSQL -P$CON_PORT -u $CON_USER -p$CON_PW -h $HOSTNAME --skip-column-names -Be"select 'ALIVE'")
|
330
|
+
if [ "$CHECK" != 'ALIVE' ]
|
331
|
+
then
|
332
|
+
error 'Unable to connect to connector'
|
333
|
+
echo "Connector is not responding on $HOST - Investigate" >> $LOG
|
334
|
+
connector_ok=0
|
335
|
+
if [ ! -f $LOCK_DIR/con_responding.lck ]
|
336
|
+
then
|
337
|
+
SENDMAIL=1
|
338
|
+
touch $LOCK_DIR/con_responding.lck
|
339
|
+
else
|
340
|
+
info "Not sending Email lock file exists"
|
341
|
+
fi
|
342
|
+
else
|
343
|
+
info 'Connector is alive'
|
344
|
+
connector_ok_to_allow_elb=1
|
345
|
+
fi
|
346
|
+
fi
|
347
|
+
fi
|
348
|
+
|
349
|
+
if [ $CHECK_ELB == 1 ]
|
350
|
+
then
|
351
|
+
if [ -f /etc/xinetd.d/disabled/connectorchk ] && [ $connector_ok_to_allow_elb == 1 ]
|
352
|
+
then
|
353
|
+
|
354
|
+
sudo mv /etc/xinetd.d/disabled/connectorchk /etc/xinetd.d/
|
355
|
+
sudo service xinetd reload
|
356
|
+
fi
|
357
|
+
fi
|
358
|
+
fi
|
359
|
+
|
360
|
+
#Check the cluster Status
|
361
|
+
if [ $CLUSTER == 1 ]
|
362
|
+
then
|
363
|
+
#Check the processes are running
|
364
|
+
info "Running Cluster Tests"
|
365
|
+
REPL=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -r| grep -v OK | wc -l)
|
366
|
+
if [ $REPL -ne 0 ]
|
367
|
+
then
|
368
|
+
error " Replicator or Manager in cluster is not running"
|
369
|
+
echo "Replicator or Manager in cluster is not running on $HOST - Investigate" >> $LOG
|
370
|
+
if [ ! -f $LOCK_DIR/rep_running.lck ]
|
371
|
+
then
|
372
|
+
SENDMAIL=1
|
373
|
+
touch $LOCK_DIR/rep_running.lck
|
374
|
+
else
|
375
|
+
info "Not sending Email lock file exists"
|
376
|
+
fi
|
377
|
+
|
378
|
+
else
|
379
|
+
info "Replicator and Manager in cluster are running OK"
|
380
|
+
fi
|
381
|
+
|
382
|
+
#Check the processes are online
|
383
|
+
if [ "$SERVICES" == "" ]
|
384
|
+
then
|
385
|
+
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online | grep -v OK | wc -l)
|
386
|
+
if [ $ONLINE -ne 0 ]
|
387
|
+
then
|
388
|
+
error "Services are not online"
|
389
|
+
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
390
|
+
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
391
|
+
then
|
392
|
+
SENDMAIL=1
|
393
|
+
touch $LOCK_DIR/rep_online.lck
|
394
|
+
else
|
395
|
+
info "Not sending Email lock file exists"
|
396
|
+
fi
|
397
|
+
|
398
|
+
else
|
399
|
+
info "Services are online"
|
400
|
+
fi
|
401
|
+
else
|
402
|
+
services=$(echo "$SERVICES" | sed 's/,/ /g')
|
403
|
+
for s in $services
|
404
|
+
do
|
405
|
+
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online -s $s | grep -v OK | wc -l)
|
406
|
+
if [ $ONLINE -ne 0 ]
|
407
|
+
then
|
408
|
+
error "Services are not online @ $s"
|
409
|
+
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
410
|
+
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
411
|
+
then
|
412
|
+
SENDMAIL=1
|
413
|
+
touch $LOCK_DIR/rep_online.lck
|
414
|
+
else
|
415
|
+
info "Not sending Email lock file exists"
|
416
|
+
fi
|
417
|
+
|
418
|
+
else
|
419
|
+
info "Services are online @ $s"
|
420
|
+
fi
|
421
|
+
done
|
422
|
+
fi
|
423
|
+
|
424
|
+
#Check for replicator latency
|
425
|
+
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_latency -w $LAG -c $LAG | grep -v OK | wc -l)
|
426
|
+
if [ $ONLINE -ne 0 ]
|
427
|
+
then
|
428
|
+
error "Services are Lagging"
|
429
|
+
echo "Cluster Replicator processes are lagging on $HOST - Investigate" >> $LOG
|
430
|
+
if [ ! -f $LOCK_DIR/rep_lag.lck ]
|
431
|
+
then
|
432
|
+
SENDMAIL=1
|
433
|
+
touch $LOCK_DIR/rep_lag.lck
|
434
|
+
else
|
435
|
+
info "Not sending Email lock file exists"
|
436
|
+
fi
|
437
|
+
|
438
|
+
else
|
439
|
+
info "Cluster Replicator is keeping up"
|
440
|
+
fi
|
441
|
+
fi
|
442
|
+
|
443
|
+
#Check the Replicator
|
444
|
+
if [ $REPLICATOR == 1 ]
|
445
|
+
then
|
446
|
+
if [ ! -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
447
|
+
then
|
448
|
+
severe "trepctl not found in $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/ "
|
449
|
+
fi
|
450
|
+
|
451
|
+
AVAILABLE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services | grep "Connection failed" | wc -l)
|
452
|
+
if [ $AVAILABLE -gt 0 ]
|
453
|
+
then
|
454
|
+
error "Replicator process is not running on $REPLICATOR_PORT"
|
455
|
+
echo "Replicator processes is not running on $HOST:$REPLICATOR_PORT - Investigate" >> $LOG
|
456
|
+
if [ ! -f $LOCK_DIR/tr_rep_running.lck ]
|
457
|
+
then
|
458
|
+
SENDMAIL=1
|
459
|
+
touch $LOCK_DIR/tr_rep_running.lck
|
460
|
+
else
|
461
|
+
info "Not sending Email lock file exists"
|
462
|
+
fi
|
463
|
+
|
464
|
+
else
|
465
|
+
info "TR Replicator is running"
|
466
|
+
fi
|
467
|
+
|
468
|
+
|
469
|
+
ONLINE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services| grep state | grep -v ONLINE | wc -l)
|
470
|
+
if [ $ONLINE -gt 0 ]
|
471
|
+
then
|
472
|
+
error "Replicator is down"
|
473
|
+
echo "Replicator processes is not ONLINE on $HOST - Investigate" >> $LOG
|
474
|
+
if [ ! -f $LOCK_DIR/tr_rep_online.lck ]
|
475
|
+
then
|
476
|
+
SENDMAIL=1
|
477
|
+
touch $LOCK_DIR/tr_rep_online.lck
|
478
|
+
else
|
479
|
+
info "Not sending Email lock file exists"
|
480
|
+
fi
|
481
|
+
|
482
|
+
else
|
483
|
+
info "TR Replicator is online"
|
484
|
+
fi
|
485
|
+
|
486
|
+
#Check for latency
|
487
|
+
LATENCY_LIST=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services|grep appliedLatency|cut -d ':' -f2)
|
488
|
+
|
489
|
+
for LATENCY in $LATENCY_LIST
|
490
|
+
do
|
491
|
+
if float_cond "$LATENCY > $LAG"; then
|
492
|
+
error "Replicator is lagging"
|
493
|
+
echo "Replicator processes is behind on $HOST - Investigate" >> $LOG
|
494
|
+
if [ ! -f $LOCK_DIR/tr_rep_lag.lck ]
|
495
|
+
then
|
496
|
+
SENDMAIL=1
|
497
|
+
touch $LOCK_DIR/tr_rep_lag.lck
|
498
|
+
else
|
499
|
+
info "Not sending Email lock file exists"
|
500
|
+
fi
|
501
|
+
else
|
502
|
+
info "Replicator latency ok"
|
503
|
+
fi
|
504
|
+
done
|
505
|
+
|
506
|
+
|
507
|
+
fi
|
508
|
+
|
509
|
+
#Check the disk space
|
510
|
+
if [ $DISK == 1 ]
|
511
|
+
then
|
512
|
+
|
513
|
+
df -HP | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
|
514
|
+
do
|
515
|
+
usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
|
516
|
+
partition=$(echo $output | awk '{ print $2 }' )
|
517
|
+
if [ $usep -ge $DISK_WARNING ]; then
|
518
|
+
error "Running out of disk space on $partition"
|
519
|
+
echo "Running out for disk space on $HOST $partition - Investigate" >> $LOG
|
520
|
+
if [ ! -f $LOCK_DIR/disk.lck ]
|
521
|
+
then
|
522
|
+
SENDMAIL=1
|
523
|
+
touch $LOCK_DIR/disk.lck
|
524
|
+
else
|
525
|
+
info "Not sending Email lock file exists"
|
526
|
+
fi
|
527
|
+
fi
|
528
|
+
done
|
529
|
+
|
530
|
+
fi
|
531
|
+
|
532
|
+
if [ $SENDMAIL == 1 ]
|
533
|
+
then
|
534
|
+
if [ $DEBUG == 1 ]
|
535
|
+
then
|
536
|
+
info "Sending Email to $EMAIL"
|
537
|
+
info "Subject $SUBJECT"
|
538
|
+
cat $LOG
|
539
|
+
fi
|
540
|
+
|
541
|
+
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
542
|
+
then
|
543
|
+
manager_running=$($CONTINUENT_ROOT/tungsten/tungsten-manager/bin/manager status | grep "PID" | wc -l)
|
544
|
+
if [ $manager_running -eq 1 ]; then
|
545
|
+
info "Adding cctrl output to email"
|
546
|
+
echo >> $LOG
|
547
|
+
echo "OUTPUT FROM cctrl ls on $HOST" >> $LOG
|
548
|
+
echo '--------------------------------------------------' >> $LOG
|
549
|
+
echo 'ls' | $CONTINUENT_ROOT/tungsten/tungsten-manager/bin/cctrl -expert >> $LOG
|
550
|
+
echo '--------------------------------------------------' >> $LOG
|
551
|
+
else
|
552
|
+
info 'Manager not running skipping cctrl output'
|
553
|
+
echo "Manager not running unable to gather cctrl output" >> $LOG
|
554
|
+
fi
|
555
|
+
|
556
|
+
fi
|
557
|
+
if [ $REPLICATOR == 1 ]
|
558
|
+
then
|
559
|
+
if [ -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
560
|
+
then
|
561
|
+
info "Adding trepctl output to email"
|
562
|
+
echo "OUTPUT FROM trepctl -port $REPLICATOR_PORT status on $HOST" >> $LOG
|
563
|
+
echo '--------------------------------------------------' >> $LOG
|
564
|
+
|
565
|
+
|
566
|
+
$REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services >> $LOG
|
567
|
+
echo '--------------------------------------------------' >> $LOG
|
568
|
+
else
|
569
|
+
info 'trepctl not found'
|
570
|
+
echo "trepctl not found at $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl unable to query for output" >> $LOG
|
571
|
+
fi
|
572
|
+
fi
|
573
|
+
$MAILPROG -s "$SUBJECT" "$EMAIL" < $LOG
|
574
|
+
fi
|
575
|
+
|
576
|
+
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# The script should be run as the tungsten user to ensure the
|
4
|
+
# environment is set correctly. Export the environment variables before
|
5
|
+
# calling the script to modify behavior
|
6
|
+
#
|
7
|
+
|
8
|
+
require "#{File.dirname(__FILE__)}/../lib/ruby/tungsten"
|
9
|
+
|
10
|
+
class CheckTungstenBackups
|
11
|
+
include TungstenScript
|
12
|
+
|
13
|
+
def main
|
14
|
+
status = TI.status()
|
15
|
+
unless status.coordinator() == TI.hostname()
|
16
|
+
nagios_ok("Not running check because this node is not the coordinator")
|
17
|
+
end
|
18
|
+
seconds_since_epoch = TU.cmd_result("date +%s").to_i()
|
19
|
+
most_recent_backup = nil
|
20
|
+
|
21
|
+
status.datasources().each{
|
22
|
+
|ds|
|
23
|
+
begin
|
24
|
+
TU.ssh_result("stat -c\"%n %Y\" #{TI.trepctl_property(status.name(), 'replicator.storage.agent.fs.directory')}/store*.properties 2>/dev/null", ds, TI.user()).split("\n").each{
|
25
|
+
|line|
|
26
|
+
stored_backup=line.split(" ")
|
27
|
+
stored_backup[1] = stored_backup[1].to_i()
|
28
|
+
|
29
|
+
if most_recent_backup == nil || stored_backup[1] > most_recent_backup[:seconds]
|
30
|
+
most_recent_backup = {
|
31
|
+
:hostname => ds,
|
32
|
+
:filename => stored_backup[0],
|
33
|
+
:seconds => stored_backup[1]
|
34
|
+
}
|
35
|
+
end
|
36
|
+
}
|
37
|
+
rescue CommandError
|
38
|
+
end
|
39
|
+
}
|
40
|
+
|
41
|
+
if most_recent_backup == nil
|
42
|
+
nagios_critical("Unable to find a backup on any datasource")
|
43
|
+
end
|
44
|
+
|
45
|
+
age = seconds_since_epoch-most_recent_backup[:seconds]
|
46
|
+
if age > @options[:max_backup_age]
|
47
|
+
nagios_critical("#{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s] is older than #{@options[:max_backup_age]}s")
|
48
|
+
else
|
49
|
+
nagios_ok("The most recent backup is #{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s]")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def configure
|
54
|
+
super()
|
55
|
+
|
56
|
+
description("Check all local datasources to make sure one of them has a backup younger than the max allowed age")
|
57
|
+
add_option(:max_backup_age, {
|
58
|
+
:on => "--max-backup-age String",
|
59
|
+
:help => "Maximum allowed age in seconds of a backup on any machine",
|
60
|
+
:parse => method(:parse_integer_option),
|
61
|
+
:default => 86400
|
62
|
+
})
|
63
|
+
end
|
64
|
+
|
65
|
+
def script_name
|
66
|
+
"check_tungsten_backups"
|
67
|
+
end
|
68
|
+
|
69
|
+
self.new().run()
|
70
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Latency
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script accepts two arguments, {{-w}} and {{-c}}. The {{-w}} flag is
|
7
|
+
# the level at which a warning should be returned. {{-c}} sets the level for
|
8
|
+
# a critical return value. The script uses the maximum latency of any slave
|
9
|
+
# to determine the return value.
|
10
|
+
#
|
11
|
+
OK_STATE=0
|
12
|
+
WARNING_STATE=1
|
13
|
+
CRITICAL_STATE=2
|
14
|
+
THOME=`dirname $0`
|
15
|
+
|
16
|
+
function display_help()
|
17
|
+
{
|
18
|
+
echo "Usage: ./check_tungsten_latency -w warning_level -c critical_level [-h]"
|
19
|
+
echo " -w Throw a warning alert if the maximum latency"
|
20
|
+
echo " is above this level"
|
21
|
+
echo " -c Throw a critical alert if the maximum latency"
|
22
|
+
echo " is above this level"
|
23
|
+
echo " --perfdata Display performance data of the latency"
|
24
|
+
echo " --perslave-perfdata Show performance latency values of each slave."
|
25
|
+
echo " If this is not set the maximum latency will be"
|
26
|
+
echo " displayed in the performace data"
|
27
|
+
echo " -h Display this message"
|
28
|
+
exit 0
|
29
|
+
}
|
30
|
+
|
31
|
+
# We will use this to make some floating point comparisons
|
32
|
+
function float_cond()
|
33
|
+
{
|
34
|
+
local cond=0
|
35
|
+
if [[ $# -gt 0 ]]; then
|
36
|
+
cond=$(echo "$*" | bc -q 2>&1)
|
37
|
+
if [[ $? -ne 0 ]]; then
|
38
|
+
echo "Error: $cond"
|
39
|
+
exit 1
|
40
|
+
fi
|
41
|
+
if [[ -z "$cond" ]]; then cond=0; fi
|
42
|
+
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
43
|
+
fi
|
44
|
+
local stat=$((cond == 0))
|
45
|
+
return $stat
|
46
|
+
}
|
47
|
+
|
48
|
+
warning_level=0
|
49
|
+
critical_level=0
|
50
|
+
perfdata="false"
|
51
|
+
performance_data_default_glue=""
|
52
|
+
performance_data_suffix=""
|
53
|
+
perfdata_allslaves=""
|
54
|
+
|
55
|
+
for arg
|
56
|
+
do
|
57
|
+
delim=""
|
58
|
+
case "$arg" in
|
59
|
+
#translate --gnu-long-options to -g (short options)
|
60
|
+
--perfdata) args="${args}-p ";;
|
61
|
+
--perslave-perfdata) args="${args}-s ";;
|
62
|
+
#pass through anything else
|
63
|
+
*) [[ "${arg:0:1}" == "-" ]] || delim="\""
|
64
|
+
args="${args}${delim}${arg}${delim} ";;
|
65
|
+
esac
|
66
|
+
done
|
67
|
+
|
68
|
+
#Reset the positional parameters to the short options
|
69
|
+
eval set -- $args
|
70
|
+
|
71
|
+
while getopts "w:c:h:nps" Option
|
72
|
+
do
|
73
|
+
case $Option in
|
74
|
+
w )
|
75
|
+
warning_level=$OPTARG
|
76
|
+
;;
|
77
|
+
c )
|
78
|
+
critical_level=$OPTARG
|
79
|
+
;;
|
80
|
+
h )
|
81
|
+
display_help
|
82
|
+
;;
|
83
|
+
p )
|
84
|
+
perfdata="true"
|
85
|
+
;;
|
86
|
+
s )
|
87
|
+
perfdata_allslaves="true"
|
88
|
+
;;
|
89
|
+
esac
|
90
|
+
done
|
91
|
+
if float_cond "$warning_level == 0"; then
|
92
|
+
echo "Error: warning_level has not been set"
|
93
|
+
echo ""
|
94
|
+
display_help
|
95
|
+
fi
|
96
|
+
|
97
|
+
if float_cond "$critical_level == 0"; then
|
98
|
+
echo "Error: critical_level has not been set"
|
99
|
+
echo ""
|
100
|
+
display_help
|
101
|
+
fi
|
102
|
+
|
103
|
+
if [ "$perfdata" == "true" ]; then
|
104
|
+
performance_data_default_glue=" "
|
105
|
+
performance_data_suffix=";$warning_level;$critical_level;;"
|
106
|
+
fi
|
107
|
+
|
108
|
+
error_message=""
|
109
|
+
error_messaage_glue=""
|
110
|
+
performance_data_glue=""
|
111
|
+
performance_data="| "
|
112
|
+
max_latency=0
|
113
|
+
|
114
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
115
|
+
# Check the manager status
|
116
|
+
if [ $manager_running -eq 0 ]; then
|
117
|
+
echo "CRITICAL: Manager is not running"
|
118
|
+
exit $CRITICAL_STATE
|
119
|
+
fi
|
120
|
+
|
121
|
+
latency_values=`echo "ls -l" | ${THOME}/../../tungsten-manager/bin/cctrl | grep -E "REPLICATOR\(role=[relay|slave]|appliedLatency" | tr -d "| "`
|
122
|
+
|
123
|
+
current_slave=""
|
124
|
+
for line in $latency_values
|
125
|
+
do
|
126
|
+
if [[ $current_slave == "" ]]
|
127
|
+
then
|
128
|
+
current_slave=`echo $line | grep "REPLICATOR" | cut -f 1 -d ":"`
|
129
|
+
else
|
130
|
+
applied_latency=`echo $line | grep "appliedLatency" | cut -f 2 -d ":"`
|
131
|
+
|
132
|
+
if float_cond "$applied_latency > $max_latency"; then
|
133
|
+
max_latency=$applied_latency
|
134
|
+
fi
|
135
|
+
|
136
|
+
if float_cond "$applied_latency > $warning_level"; then
|
137
|
+
error_message="$error_message$error_message_glue$current_slave=$applied_latency""s"
|
138
|
+
error_message_glue=", "
|
139
|
+
fi
|
140
|
+
|
141
|
+
if float_cond "$applied_latency == -1"; then
|
142
|
+
error_message="$error_message$error_message_glue$current_slave is missing latency information"
|
143
|
+
error_message_glue=", "
|
144
|
+
fi
|
145
|
+
|
146
|
+
performance_data="$performance_data$performance_data_glue$current_slave=$applied_latency$performance_data_suffix"
|
147
|
+
performance_data_glue="$performance_data_default_glue"
|
148
|
+
current_slave=""
|
149
|
+
fi
|
150
|
+
done
|
151
|
+
|
152
|
+
if [ "$perfdata_allslaves" != "true" ]; then
|
153
|
+
performance_data="| max_latency=${max_latency}$performance_data_suffix"
|
154
|
+
fi
|
155
|
+
|
156
|
+
if [ "$perfdata" == "false" ]; then
|
157
|
+
performance_data=""
|
158
|
+
fi
|
159
|
+
|
160
|
+
if float_cond "$max_latency > $critical_level"; then
|
161
|
+
echo "CRITICAL: $error_message $performance_data"
|
162
|
+
exit $CRITICAL_STATE
|
163
|
+
fi
|
164
|
+
|
165
|
+
if [[ $error_message != "" ]]; then
|
166
|
+
echo "WARNING: $error_message $performance_data"
|
167
|
+
exit $WARNING_STATE
|
168
|
+
fi
|
169
|
+
|
170
|
+
echo "OK: All slaves are running normally (max_latency=${max_latency}) $performance_data "
|
171
|
+
|
172
|
+
exit $OK_STATE
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
#
|
19
|
+
# Simple Bash Script To Check Tungsten Services
|
20
|
+
# Nagios Plugin For NRPE
|
21
|
+
#
|
22
|
+
# This script does not accept any arguments. It will return a warning if any
|
23
|
+
# of Tungsten resources is not in an {{ONLINE}} state. It uses the output of
|
24
|
+
# the {{ls resources}} command to determine the current state.
|
25
|
+
#
|
26
|
+
OK_STATE=0
|
27
|
+
WARNING_STATE=1
|
28
|
+
CRITICAL_STATE=2
|
29
|
+
THOME=`dirname $0`
|
30
|
+
|
31
|
+
error_message=""
|
32
|
+
error_messaage_glue=""
|
33
|
+
offline_count=0
|
34
|
+
dataservice=""
|
35
|
+
skip_shun=0
|
36
|
+
|
37
|
+
function display_help()
|
38
|
+
{
|
39
|
+
echo "Usage: ./check_tungsten_online -s dataservice [-h]"
|
40
|
+
echo " -s The data service you would like to check"
|
41
|
+
echo " -h Display this message"
|
42
|
+
echo " -n Skip Shunned Services"
|
43
|
+
exit 0
|
44
|
+
}
|
45
|
+
|
46
|
+
while getopts "s:h:n" Option
|
47
|
+
do
|
48
|
+
case $Option in
|
49
|
+
h )
|
50
|
+
display_help
|
51
|
+
;;
|
52
|
+
s )
|
53
|
+
dataservice=$OPTARG
|
54
|
+
;;
|
55
|
+
n )
|
56
|
+
skip_shun=1
|
57
|
+
;;
|
58
|
+
esac
|
59
|
+
done
|
60
|
+
|
61
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
62
|
+
# Check the manager status
|
63
|
+
if [ $manager_running -eq 0 ]; then
|
64
|
+
echo "CRITICAL: Manager is not running"
|
65
|
+
exit $CRITICAL_STATE
|
66
|
+
fi
|
67
|
+
|
68
|
+
if [ "$dataservice" == "" ]; then
|
69
|
+
offline_services=`echo "ls resources" | ${THOME}/../../tungsten-manager/bin/cctrl | grep \| | grep : | grep -v ONLINE | tr -d "| " | cut -f 1,2 -d ":"`
|
70
|
+
else
|
71
|
+
offline_services=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep "(\(composite \)\?master\|(\(composite \)\?slave\|(relay" | grep -v ONLINE | tr -d "|" | cut -f 1 -d "("`
|
72
|
+
fi
|
73
|
+
|
74
|
+
for offline_service in $offline_services
|
75
|
+
do
|
76
|
+
offline_count=$(($offline_count+1))
|
77
|
+
error_message="$error_message$error_message_glue$offline_service"
|
78
|
+
error_message_glue=", "
|
79
|
+
done
|
80
|
+
|
81
|
+
if [ $offline_count -gt 0 ]
|
82
|
+
then
|
83
|
+
echo "CRITICAL: $error_message are not ONLINE"
|
84
|
+
exit $CRITICAL_STATE
|
85
|
+
fi
|
86
|
+
|
87
|
+
|
88
|
+
if [ $skip_shun -eq 0 ]
|
89
|
+
then
|
90
|
+
if [ "$dataservice" == "" ]; then
|
91
|
+
shunned=`echo "ls" | ${THOME}/../../tungsten-manager/bin/cctrl | grep 'SHUNNED' | wc -l`
|
92
|
+
else
|
93
|
+
shunned=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep 'SHUNNED' | wc -l`
|
94
|
+
fi
|
95
|
+
|
96
|
+
if [ $shunned -gt 0 ]
|
97
|
+
then
|
98
|
+
echo "CRITICAL: Dataservices are shunned"
|
99
|
+
exit $CRITICAL_STATE
|
100
|
+
fi
|
101
|
+
fi
|
102
|
+
|
103
|
+
|
104
|
+
echo "OK: All services are online"
|
105
|
+
exit $OK_STATE
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Policy
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script does not accept any arguments. It will return error if the
|
7
|
+
# cluster is in maintenance mode
|
8
|
+
#
|
9
|
+
OK_STATE=0
|
10
|
+
WARNING_STATE=1
|
11
|
+
CRITICAL_STATE=2
|
12
|
+
THOME=`dirname $0`
|
13
|
+
|
14
|
+
error_message=""
|
15
|
+
error_messaage_glue=""
|
16
|
+
offline_count=0
|
17
|
+
dataservice=""
|
18
|
+
|
19
|
+
function display_help()
|
20
|
+
{
|
21
|
+
echo "Usage: ./check_tungsten_policy -s dataservice [-h]"
|
22
|
+
echo " -s The data service you would like to check"
|
23
|
+
echo " -h Display this message"
|
24
|
+
exit 0
|
25
|
+
}
|
26
|
+
|
27
|
+
while getopts "s:h" Option
|
28
|
+
do
|
29
|
+
case $Option in
|
30
|
+
h )
|
31
|
+
display_help
|
32
|
+
;;
|
33
|
+
s )
|
34
|
+
dataservice=$OPTARG
|
35
|
+
;;
|
36
|
+
esac
|
37
|
+
done
|
38
|
+
|
39
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
40
|
+
# Check the manager status
|
41
|
+
if [ $manager_running -eq 0 ]; then
|
42
|
+
echo "CRITICAL: Manager is not running"
|
43
|
+
exit $CRITICAL_STATE
|
44
|
+
fi
|
45
|
+
|
46
|
+
if [ "$dataservice" == "" ]; then
|
47
|
+
maint_mode=`echo "ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
|
48
|
+
else
|
49
|
+
maint_mode=`echo "use $dataservice; ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
|
50
|
+
fi
|
51
|
+
|
52
|
+
|
53
|
+
if [ $maint_mode -gt 0 ]
|
54
|
+
then
|
55
|
+
echo "CRITICAL: Cluster is in Maintenance mode"
|
56
|
+
exit $CRITICAL_STATE
|
57
|
+
fi
|
58
|
+
|
59
|
+
|
60
|
+
echo "OK: Cluster is in Automatic Mode"
|
61
|
+
exit $OK_STATE
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Progress
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script accepts three arguments, {{-w}}, {{-c}} and {{-t}}. The {{-w}} flag
|
7
|
+
# is the level at which a warning should be returned. {{-c}} sets the level for
|
8
|
+
# a critical return value. The {{-t}} flag sets the amount of time used to wait
|
9
|
+
# between monitoring the progress of the cluster. The script uses the difference
|
10
|
+
# between the values to determine if a warning or critical alert should be
|
11
|
+
# issued
|
12
|
+
#
|
13
|
+
OK_STATE=0
|
14
|
+
WARNING_STATE=1
|
15
|
+
CRITICAL_STATE=2
|
16
|
+
THOME=`dirname $0`
|
17
|
+
|
18
|
+
function display_help()
|
19
|
+
{
|
20
|
+
echo "Usage: ./check_tungsten_progress -t time [-h]"
|
21
|
+
echo " -t The number of seconds to wait when monitoring progress"
|
22
|
+
echo " -h Display this message"
|
23
|
+
exit 0
|
24
|
+
}
|
25
|
+
|
26
|
+
# We will use this to make some floating point comparisons
|
27
|
+
function float_cond()
|
28
|
+
{
|
29
|
+
local cond=0
|
30
|
+
if [[ $# -gt 0 ]]; then
|
31
|
+
cond=$(echo "$*" | bc -q 2>&1)
|
32
|
+
if [[ $? -ne 0 ]]; then
|
33
|
+
echo "Error: $cond"
|
34
|
+
exit 1
|
35
|
+
fi
|
36
|
+
if [[ -z "$cond" ]]; then cond=0; fi
|
37
|
+
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
38
|
+
fi
|
39
|
+
local stat=$((cond == 0))
|
40
|
+
return $stat
|
41
|
+
}
|
42
|
+
|
43
|
+
time_period=1
|
44
|
+
while getopts "t:h" Option
|
45
|
+
do
|
46
|
+
case $Option in
|
47
|
+
t)
|
48
|
+
time_period=$OPTARG
|
49
|
+
;;
|
50
|
+
h )
|
51
|
+
display_help
|
52
|
+
;;
|
53
|
+
esac
|
54
|
+
done
|
55
|
+
|
56
|
+
if float_cond "$time_period == 0"; then
|
57
|
+
echo "Error: time_period has not been set"
|
58
|
+
echo ""
|
59
|
+
display_help
|
60
|
+
fi
|
61
|
+
|
62
|
+
is_online=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "state" | grep "ONLINE" | wc -l`
|
63
|
+
if float_cond "$is_online == 0"; then
|
64
|
+
echo "CRITICAL: Replicator is not ONLINE"
|
65
|
+
exit $CRITICAL_STATE
|
66
|
+
fi
|
67
|
+
|
68
|
+
pre_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
|
69
|
+
echo "cluster heartbeat" | ${THOME}/../../tungsten-manager/bin/cctrl > /dev/null
|
70
|
+
sleep $time_period
|
71
|
+
post_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
|
72
|
+
|
73
|
+
progress_number=`echo "$post_progress_number - $pre_progress_number" | bc -q 2>/dev/null`
|
74
|
+
|
75
|
+
if float_cond "$progress_number < 1"; then
|
76
|
+
echo "WARNING: Replicator did not show progress"
|
77
|
+
exit $WARNING_STATE
|
78
|
+
fi
|
79
|
+
|
80
|
+
echo "OK: Replicator is making progress"
|
81
|
+
exit $OK_STATE
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Services
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script accepts two arguments, {{-r}} and {{-c}}. The {{-r}} argument
|
7
|
+
# will tell the script to check that the replicator and manager services are
|
8
|
+
# running. The {{-c}} argument will check the connector service, the
|
9
|
+
# arguments may be given individually or together. If any service is not
|
10
|
+
# running, the script will return a critical error.
|
11
|
+
#
|
12
|
+
OK_STATE=0
|
13
|
+
WARNING_STATE=1
|
14
|
+
CRITICAL_STATE=2
|
15
|
+
THOME=`dirname $0`
|
16
|
+
|
17
|
+
function display_help
|
18
|
+
{
|
19
|
+
echo "Usage: ./check_tungsten_latency [-r] [-c] [-h]"
|
20
|
+
echo " -r Check the replicator and manager services"
|
21
|
+
echo " -c Check the connector service"
|
22
|
+
echo " -h Display this message"
|
23
|
+
exit 0
|
24
|
+
}
|
25
|
+
|
26
|
+
services_checked=""
|
27
|
+
services_checked_glue=""
|
28
|
+
services_stopped=""
|
29
|
+
services_stopped_glue=""
|
30
|
+
stopped_count=0
|
31
|
+
|
32
|
+
while getopts "rch" Option
|
33
|
+
do
|
34
|
+
case $Option in
|
35
|
+
r )
|
36
|
+
# Mark that the replicator and manager were checked
|
37
|
+
services_checked="${services_checked}${services_checked_glue}Replicator, Manager"
|
38
|
+
services_checked_glue=", "
|
39
|
+
|
40
|
+
replicator_running=`${THOME}/../../tungsten-replicator/bin/replicator status | grep "PID" | wc -l`
|
41
|
+
# Check the replicator status
|
42
|
+
if [ $replicator_running -eq 0 ]; then
|
43
|
+
services_stopped="${services_stopped}${services_stopped_glue}Replicator"
|
44
|
+
services_stopped_glue=", "
|
45
|
+
stopped_count=$(($stopped_count+1))
|
46
|
+
fi
|
47
|
+
|
48
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
49
|
+
# Check the manager status
|
50
|
+
if [ $manager_running -eq 0 ]; then
|
51
|
+
services_stopped="${services_stopped}${services_stopped_glue}Manager"
|
52
|
+
services_stopped_glue=", "
|
53
|
+
stopped_count=$(($stopped_count+1))
|
54
|
+
fi
|
55
|
+
;;
|
56
|
+
c )
|
57
|
+
# Mark that the connector was checked
|
58
|
+
services_checked="${services_checked}${services_checked_glue}Connector"
|
59
|
+
services_checked_glue=", "
|
60
|
+
|
61
|
+
connector_running=`${THOME}/../../tungsten-connector/bin/connector status | grep "PID" | wc -l`
|
62
|
+
# Check the connector status
|
63
|
+
if [ $connector_running -eq 0 ]; then
|
64
|
+
services_stopped="${services_stopped}${services_stopped_glue}Connector"
|
65
|
+
services_stopped_glue=", "
|
66
|
+
stopped_count=$(($stopped_count+1))
|
67
|
+
fi
|
68
|
+
;;
|
69
|
+
h )
|
70
|
+
display_help
|
71
|
+
;;
|
72
|
+
esac
|
73
|
+
done
|
74
|
+
|
75
|
+
# One of the services isn't running
|
76
|
+
if [ $stopped_count -gt 0 ]
|
77
|
+
then
|
78
|
+
if [ $stopped_count -gt 1 ]; then
|
79
|
+
echo "CRITICAL: $services_stopped are not running"
|
80
|
+
else
|
81
|
+
echo "CRITICAL: $services_stopped is not running"
|
82
|
+
fi
|
83
|
+
|
84
|
+
exit $CRITICAL_STATE
|
85
|
+
fi
|
86
|
+
|
87
|
+
# Ensure that something was checked
|
88
|
+
if [[ $services_checked == "" ]]; then
|
89
|
+
echo "CRITICAL: No services were checked"
|
90
|
+
exit $CRITICAL_STATE
|
91
|
+
fi
|
92
|
+
|
93
|
+
# Everything is running
|
94
|
+
echo "OK: All services (${services_checked}) are running"
|
95
|
+
exit $OK_STATE
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorConnector
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
unless TI.is_running?("connector")
|
35
|
+
critical("The Tungsten Connector is not running")
|
36
|
+
end
|
37
|
+
|
38
|
+
begin
|
39
|
+
TU.cmd_result("echo '#{opt(:statement)}' | mysql --defaults-file=#{opt(:defaults_file)} -h#{TI.hostname()}")
|
40
|
+
rescue CommandError => ce
|
41
|
+
TU.debug(ce)
|
42
|
+
critical("A connection to the Tungsten Connector could not be created")
|
43
|
+
end
|
44
|
+
|
45
|
+
ok("The connection was successfully created")
|
46
|
+
end
|
47
|
+
|
48
|
+
def configure
|
49
|
+
super()
|
50
|
+
|
51
|
+
add_option(:defaults_file, {
|
52
|
+
:on => "--defaults-file String",
|
53
|
+
:help => "The defaults file to use when connecting to MySQL"
|
54
|
+
})
|
55
|
+
|
56
|
+
add_option(:statement, {
|
57
|
+
:on => "--statement String",
|
58
|
+
:help => "The command to run against the Tungsten Connector",
|
59
|
+
:default => "tungsten connection status",
|
60
|
+
})
|
61
|
+
end
|
62
|
+
|
63
|
+
def validate
|
64
|
+
super()
|
65
|
+
|
66
|
+
unless TU.is_valid?()
|
67
|
+
return TU.is_valid?()
|
68
|
+
end
|
69
|
+
|
70
|
+
unless TI.is_connector?()
|
71
|
+
unknown("This server is not a Tungsten Connector")
|
72
|
+
end
|
73
|
+
|
74
|
+
if opt(:defaults_file).to_s() == ""
|
75
|
+
defaults_file = Tempfile.new("tungsten_nagios_connector")
|
76
|
+
opt(:defaults_file, defaults_file.path())
|
77
|
+
|
78
|
+
defaults_file.puts("[client]")
|
79
|
+
defaults_file.puts("user=#{TI.setting(TI.setting_key(CONNECTORS, "connector_user"))}")
|
80
|
+
defaults_file.puts("password=#{TI.setting(TI.setting_key(CONNECTORS, "connector_password"))}")
|
81
|
+
defaults_file.puts("port=#{TI.setting(TI.setting_key(CONNECTORS, "connector_listen_port"))}")
|
82
|
+
defaults_file.flush()
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def script_name
|
87
|
+
"tungsten_nagios_connector"
|
88
|
+
end
|
89
|
+
|
90
|
+
self.new().run()
|
91
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: continuent-monitors-nagios
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Continuent
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: continuent-tools-monitoring
|
@@ -27,10 +27,26 @@ dependencies:
|
|
27
27
|
description:
|
28
28
|
email: info@continuent.com
|
29
29
|
executables:
|
30
|
+
- check_tungsten.sh
|
31
|
+
- check_tungsten_backups
|
32
|
+
- check_tungsten_latency
|
33
|
+
- check_tungsten_online
|
34
|
+
- check_tungsten_policy
|
35
|
+
- check_tungsten_progress
|
36
|
+
- check_tungsten_services
|
37
|
+
- tungsten_nagios_connector
|
30
38
|
- tungsten_nagios_monitor_threads
|
31
39
|
extensions: []
|
32
40
|
extra_rdoc_files: []
|
33
41
|
files:
|
42
|
+
- bin/check_tungsten.sh
|
43
|
+
- bin/check_tungsten_backups
|
44
|
+
- bin/check_tungsten_latency
|
45
|
+
- bin/check_tungsten_online
|
46
|
+
- bin/check_tungsten_policy
|
47
|
+
- bin/check_tungsten_progress
|
48
|
+
- bin/check_tungsten_services
|
49
|
+
- bin/tungsten_nagios_connector
|
34
50
|
- bin/tungsten_nagios_monitor_threads
|
35
51
|
- LICENSE
|
36
52
|
- README.md
|
@@ -54,7 +70,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
54
70
|
version: '0'
|
55
71
|
requirements: []
|
56
72
|
rubyforge_project:
|
57
|
-
rubygems_version: 2.0.
|
73
|
+
rubygems_version: 2.0.14
|
58
74
|
signing_key:
|
59
75
|
specification_version: 4
|
60
76
|
summary: Continuent Tungsten monitoring scripts for Nagios
|