continuent-monitors-nagios 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/check_tungsten.sh +576 -0
- data/bin/check_tungsten_backups +70 -0
- data/bin/check_tungsten_latency +172 -0
- data/bin/check_tungsten_online +105 -0
- data/bin/check_tungsten_policy +61 -0
- data/bin/check_tungsten_progress +81 -0
- data/bin/check_tungsten_services +95 -0
- data/bin/tungsten_nagios_connector +91 -0
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a06f684ddc84caf2d0b1c859a33b5d34b5d2d63
|
4
|
+
data.tar.gz: a0bebc5b0902b9bbe71460e6335e757529b79c90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44f04b5c74c7409d300fdbed92c1e2bc010727549625801d859092693e9f7c49d75d7e927cc35ddfbef5ae17a7f0746c7dad7671a146ecec486ef4a1c760ce19
|
7
|
+
data.tar.gz: 9f713e08e75e95625284cf086e41ddf49469b8fd3b62cfa4bb89c7b17b1d9505db15ac110b55cb46b3a7df218cbf4c693a8029c1dadde1a40240d42f7ec55645
|
@@ -0,0 +1,576 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
#TODO
|
4
|
+
#Work out the cluster names in a Composite DS
|
5
|
+
#determine the individulal services in a replicator so we can print out better output (status on each)
|
6
|
+
#Remove host logging - stop duplicate emails across multi hosts?
|
7
|
+
|
8
|
+
HOST=`hostname`
|
9
|
+
|
10
|
+
#Start Configuration Options. - These can be overridden by command line options or from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
11
|
+
CONNECTOR=0 #If this host is running a connector set to 1 otherwise 0
|
12
|
+
CLUSTER=0 #If this host is running a cluster set to 1 otherwise 0
|
13
|
+
REPLICATOR=0 #If this host is running a replicator set to 1 otherwise 0
|
14
|
+
REPLICATOR_PORT=10000 #Replicator Port
|
15
|
+
REPLICATOR_HOME=/opt/continuent/ #Home dir for Replicator
|
16
|
+
SERVICES='' #Name of the individual clusters in a composite DS
|
17
|
+
EMAIL='' #Set email address here or pass via the email= command line option
|
18
|
+
DISK=0 #Check Disk space
|
19
|
+
CHECK_ELB=0 #Enable check for ELB socket check
|
20
|
+
|
21
|
+
SUBJECT="Error : Problems exist with the Tungsten Services on $HOST"
|
22
|
+
LOCK_TIMEOUT=180 # Only send a Email every x minutes for a specific
|
23
|
+
# problem, stop spamming from the script
|
24
|
+
LAG=60 # Slave lag to report on
|
25
|
+
CONNECTOR_TIMEOUT=10 # No of seconds to wait for a connector response
|
26
|
+
DISK_WARNING=80 # % full to send a warning
|
27
|
+
SENDMAILBIN=/usr/sbin/sendmail
|
28
|
+
#End Configuration Options
|
29
|
+
|
30
|
+
SENDMAIL=0
|
31
|
+
DEBUG=0
|
32
|
+
LOG=/opt/continuent/share/check_tungsten.log
|
33
|
+
LOCK_DIR=/opt/continuent/share/tungsten_locks
|
34
|
+
|
35
|
+
function float_cond()
|
36
|
+
{
|
37
|
+
local cond=0
|
38
|
+
if [[ $# -gt 0 ]]; then
|
39
|
+
cond=$(echo "$*" | bc -q 2>&1)
|
40
|
+
if [[ $? -ne 0 ]]; then
|
41
|
+
echo "Error: $cond"
|
42
|
+
exit 1
|
43
|
+
fi
|
44
|
+
if [[ -z "$cond" ]]; then cond=0; fi
|
45
|
+
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
46
|
+
fi
|
47
|
+
local stat=$((cond == 0))
|
48
|
+
return $stat
|
49
|
+
}
|
50
|
+
|
51
|
+
info ()
|
52
|
+
{
|
53
|
+
if [ $DEBUG == 1 ]; then echo "INFO : $1"; fi
|
54
|
+
}
|
55
|
+
error ()
|
56
|
+
{
|
57
|
+
if [ $DEBUG == 1 ]; then echo "ERROR : $1"; fi
|
58
|
+
}
|
59
|
+
severe ()
|
60
|
+
{
|
61
|
+
echo "SEVERE : $1"
|
62
|
+
exit 1
|
63
|
+
}
|
64
|
+
getSetting ()
|
65
|
+
{
|
66
|
+
CFG=$CONTINUENT_ROOT/conf/tungsten.cfg
|
67
|
+
if [ ! -f $CFG ]
|
68
|
+
then
|
69
|
+
severe "Unable to find $CFG"
|
70
|
+
fi
|
71
|
+
getSettingValue=""
|
72
|
+
getSettingValue=$(grep "\"$1\"" $CFG| cut -d ':' -f2 | head -1|sed 's/,//g'|sed 's/"//g'|sed 's/ //g')
|
73
|
+
if [ -z $getSettingValue ]
|
74
|
+
then
|
75
|
+
severe "Unable to find $1 in $CFG"
|
76
|
+
fi
|
77
|
+
if [ "$getSettingValue" == '' ]
|
78
|
+
then
|
79
|
+
severe "Unable to find $1 in $CFG"
|
80
|
+
fi
|
81
|
+
echo "$getSettingValue"
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
# Load any continuent variables
|
86
|
+
|
87
|
+
if [ -z $CONTINUENT_ROOT ]
|
88
|
+
then
|
89
|
+
[ -f "$HOME/.bash_profile" ] && . "$HOME/.bash_profile"
|
90
|
+
[ -f "$HOME/.profile" ] && . "$HOME/.profile"
|
91
|
+
fi
|
92
|
+
|
93
|
+
function sOpt()
|
94
|
+
{
|
95
|
+
$1=1
|
96
|
+
info "$1 switched on via command line"
|
97
|
+
}
|
98
|
+
|
99
|
+
function mOpt()
|
100
|
+
{
|
101
|
+
|
102
|
+
for i in $(echo $2 | tr "=" "\n")
|
103
|
+
do
|
104
|
+
if [ $i != '$3' ]
|
105
|
+
then
|
106
|
+
$1=$i
|
107
|
+
fi
|
108
|
+
done
|
109
|
+
}
|
110
|
+
|
111
|
+
#Parse the command line options
|
112
|
+
|
113
|
+
for arg in "$@"
|
114
|
+
do
|
115
|
+
case "$arg" in
|
116
|
+
-v) DEBUG=1
|
117
|
+
info "Debug mode set"
|
118
|
+
;;
|
119
|
+
-vv) DEBUG=1
|
120
|
+
info "INFO : Extended Debug mode set"
|
121
|
+
set -x
|
122
|
+
;;
|
123
|
+
cluster) CLUSTER=1
|
124
|
+
info "CLUSTER switched on via command line"
|
125
|
+
;;
|
126
|
+
connector) CONNECTOR=1
|
127
|
+
info "CONNECTOR switched on via command line"
|
128
|
+
;;
|
129
|
+
replicator) REPLICATOR=1
|
130
|
+
info "REPLICATOR switched on via command line"
|
131
|
+
;;
|
132
|
+
check_elb) CHECK_ELB=1
|
133
|
+
info "CHECK_ELB switched on via command line"
|
134
|
+
;;
|
135
|
+
replicator_port*) for i in $(echo $arg | tr "=" "\n")
|
136
|
+
do
|
137
|
+
if [ $i != 'replicator_port' ]
|
138
|
+
then
|
139
|
+
REPLICATOR_PORT=$i
|
140
|
+
fi
|
141
|
+
done
|
142
|
+
|
143
|
+
info "REPLICATOR_PORT - $REPLICATOR_PORT - switched on via command line"
|
144
|
+
;;
|
145
|
+
replicator_home*) for i in $(echo $arg | tr "=" "\n")
|
146
|
+
do
|
147
|
+
if [ $i != 'replicator_home' ]
|
148
|
+
then
|
149
|
+
REPLICATOR_HOME=$i
|
150
|
+
fi
|
151
|
+
done
|
152
|
+
|
153
|
+
info "REPLICATOR_HOME - $REPLICATOR_HOME - switched on via command line"
|
154
|
+
;;
|
155
|
+
services*) for i in $(echo $arg | tr "=" "\n")
|
156
|
+
do
|
157
|
+
if [ $i != 'services' ]
|
158
|
+
then
|
159
|
+
SERVICES=$i
|
160
|
+
fi
|
161
|
+
done
|
162
|
+
info "SERVICES $SERVICES passed via the command line"
|
163
|
+
;;
|
164
|
+
email*) for i in $(echo $arg | tr "=" "\n")
|
165
|
+
do
|
166
|
+
if [ $i != 'email' ]
|
167
|
+
then
|
168
|
+
EMAIL=$i
|
169
|
+
fi
|
170
|
+
done
|
171
|
+
info "EMAIL $EMAIL passed via the command line"
|
172
|
+
;;
|
173
|
+
config*) for i in $(echo $arg | tr "=" "\n")
|
174
|
+
do
|
175
|
+
if [ $i != 'config' ]
|
176
|
+
then
|
177
|
+
FILE=$i
|
178
|
+
fi
|
179
|
+
done
|
180
|
+
info "Config File $FILE passed via the command line"
|
181
|
+
;;
|
182
|
+
disk) DISK=1
|
183
|
+
info "DISK switched on via command line"
|
184
|
+
;;
|
185
|
+
*)
|
186
|
+
echo "Unknown command line option passed $arg"
|
187
|
+
echo "Valid options are -v,cluster,connector,replicator,replicator_port=??,services=??,email=??,config=??"
|
188
|
+
exit 1
|
189
|
+
esac
|
190
|
+
|
191
|
+
|
192
|
+
done
|
193
|
+
|
194
|
+
|
195
|
+
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
196
|
+
then
|
197
|
+
if [ -z $CONTINUENT_ROOT ]
|
198
|
+
then
|
199
|
+
severe "$CONTINUENT_ROOT is not set - unable to continue"
|
200
|
+
fi
|
201
|
+
if [ ! -f $CONTINUENT_ROOT/share/env.sh ]
|
202
|
+
then
|
203
|
+
severe "Unable to find env.sh in $CONTINUENT_ROOT/share"
|
204
|
+
fi
|
205
|
+
|
206
|
+
. "$CONTINUENT_ROOT/share/env.sh"
|
207
|
+
|
208
|
+
#Load any default settings from $CONTINUENT_ROOT/share/check_tungsten.cfg
|
209
|
+
CFG=$CONTINUENT_ROOT/share/check_tungsten.cfg
|
210
|
+
|
211
|
+
if [ -f $CFG ]
|
212
|
+
then
|
213
|
+
info "Loading settings from $CFG"
|
214
|
+
. "$CFG"
|
215
|
+
fi
|
216
|
+
if [ -z "$MYSQL" ]
|
217
|
+
then
|
218
|
+
MYSQL=`which mysql 2>/dev/null`
|
219
|
+
|
220
|
+
if [ "$MYSQL" == "" ]
|
221
|
+
then
|
222
|
+
severe " Unable to the mysql command line program"
|
223
|
+
fi
|
224
|
+
fi
|
225
|
+
fi
|
226
|
+
|
227
|
+
#If a file is passed from the command line load any variables from there
|
228
|
+
if [ ! -z $FILE ]
|
229
|
+
then
|
230
|
+
if [ ! -f $FILE ]
|
231
|
+
then
|
232
|
+
severe "The file specified in the command line $FILE does not exist"
|
233
|
+
fi
|
234
|
+
|
235
|
+
info "Loading settings from $FILE"
|
236
|
+
. "$FILE"
|
237
|
+
fi
|
238
|
+
|
239
|
+
#Parameter and host validation
|
240
|
+
|
241
|
+
BC=`which bc 2>/dev/null`
|
242
|
+
|
243
|
+
if [ "$BC" == "" ]
|
244
|
+
then
|
245
|
+
severe " Unable to find the command bc - please install"
|
246
|
+
fi
|
247
|
+
|
248
|
+
|
249
|
+
if [ "$EMAIL" == "" ]
|
250
|
+
then
|
251
|
+
severe " email must be specified"
|
252
|
+
fi
|
253
|
+
|
254
|
+
if [[ "$CONNECTOR" == 0 && "$CLUSTER" == 0 && "$REPLICATOR" == 0 ]]
|
255
|
+
then
|
256
|
+
severe " No option specified, select either connector, cluster or replicator"
|
257
|
+
fi
|
258
|
+
|
259
|
+
if [ -d $LOCK_DIR ]
|
260
|
+
then
|
261
|
+
if [ ! -w $LOCK_DIR ]
|
262
|
+
then
|
263
|
+
severe " The locks dir $LOCK_DIR is not writable"
|
264
|
+
fi
|
265
|
+
else
|
266
|
+
info "Creating locks dir"
|
267
|
+
mkdir $LOCK_DIR
|
268
|
+
fi
|
269
|
+
|
270
|
+
if [ -z "$MAILPROG" ]
|
271
|
+
then
|
272
|
+
MAILPROG=`which mail 2>/dev/null`
|
273
|
+
|
274
|
+
if [ "$MAILPROG" == "" ]
|
275
|
+
then
|
276
|
+
severe " Unable to find a mail program"
|
277
|
+
fi
|
278
|
+
fi
|
279
|
+
|
280
|
+
if [ -z "$SENDMAILBIN" ]
|
281
|
+
then
|
282
|
+
SENDMAILBIN=`which sendmail 2>/dev/null`
|
283
|
+
|
284
|
+
if [ "$SENDMAILBIN" == "" ]
|
285
|
+
then
|
286
|
+
severe " Unable to find a sendmail program"
|
287
|
+
fi
|
288
|
+
fi
|
289
|
+
|
290
|
+
if [ -f $LOG ]
|
291
|
+
then
|
292
|
+
rm $LOG
|
293
|
+
fi
|
294
|
+
|
295
|
+
#Expire old Locks
|
296
|
+
info "Deleting Locks older than $LOCK_TIMEOUT min"
|
297
|
+
find $LOCK_DIR/* -type f -mmin +$LOCK_TIMEOUT -delete 2> /dev/null
|
298
|
+
|
299
|
+
#Check the connector status
|
300
|
+
if [ $CONNECTOR == 1 ]
|
301
|
+
then
|
302
|
+
connector_ok_to_allow_elb=0
|
303
|
+
info "Running Connector Tests"
|
304
|
+
CONN=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -c| grep -v OK | wc -l)
|
305
|
+
if [ $CONN -ne 0 ]
|
306
|
+
then
|
307
|
+
error " Connector is not running"
|
308
|
+
echo "Connector is not running on $HOST - Investigate" >> $LOG
|
309
|
+
if [ ! -f $LOCK_DIR/con_running.lck ]
|
310
|
+
then
|
311
|
+
SENDMAIL=1
|
312
|
+
touch $LOCK_DIR/con_running.lck
|
313
|
+
else
|
314
|
+
info "Not sending Email lock file exists"
|
315
|
+
fi
|
316
|
+
else
|
317
|
+
info "Connector is running OK"
|
318
|
+
|
319
|
+
TIMEOUT=`which timeout 2>/dev/null`
|
320
|
+
|
321
|
+
if [ "$TIMEOUT" == "" ]
|
322
|
+
then
|
323
|
+
info "timeout command not found - unable to check if the connector is responding"
|
324
|
+
else
|
325
|
+
info "Checking Connector is responding to queries"
|
326
|
+
CON_USER=$(getSetting connector_user)
|
327
|
+
CON_PW=$(getSetting connector_password)
|
328
|
+
CON_PORT=$(getSetting connector_listen_port)
|
329
|
+
CHECK=$(timeout -s HUP $CONNECTOR_TIMEOUT $MYSQL -P$CON_PORT -u $CON_USER -p$CON_PW -h $HOSTNAME --skip-column-names -Be"select 'ALIVE'")
|
330
|
+
if [ "$CHECK" != 'ALIVE' ]
|
331
|
+
then
|
332
|
+
error 'Unable to connect to connector'
|
333
|
+
echo "Connector is not responding on $HOST - Investigate" >> $LOG
|
334
|
+
connector_ok=0
|
335
|
+
if [ ! -f $LOCK_DIR/con_responding.lck ]
|
336
|
+
then
|
337
|
+
SENDMAIL=1
|
338
|
+
touch $LOCK_DIR/con_responding.lck
|
339
|
+
else
|
340
|
+
info "Not sending Email lock file exists"
|
341
|
+
fi
|
342
|
+
else
|
343
|
+
info 'Connector is alive'
|
344
|
+
connector_ok_to_allow_elb=1
|
345
|
+
fi
|
346
|
+
fi
|
347
|
+
fi
|
348
|
+
|
349
|
+
if [ $CHECK_ELB == 1 ]
|
350
|
+
then
|
351
|
+
if [ -f /etc/xinetd.d/disabled/connectorchk ] && [ $connector_ok_to_allow_elb == 1 ]
|
352
|
+
then
|
353
|
+
|
354
|
+
sudo mv /etc/xinetd.d/disabled/connectorchk /etc/xinetd.d/
|
355
|
+
sudo service xinetd reload
|
356
|
+
fi
|
357
|
+
fi
|
358
|
+
fi
|
359
|
+
|
360
|
+
#Check the cluster Status
|
361
|
+
if [ $CLUSTER == 1 ]
|
362
|
+
then
|
363
|
+
#Check the processes are running
|
364
|
+
info "Running Cluster Tests"
|
365
|
+
REPL=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -r| grep -v OK | wc -l)
|
366
|
+
if [ $REPL -ne 0 ]
|
367
|
+
then
|
368
|
+
error " Replicator or Manager in cluster is not running"
|
369
|
+
echo "Replicator or Manager in cluster is not running on $HOST - Investigate" >> $LOG
|
370
|
+
if [ ! -f $LOCK_DIR/rep_running.lck ]
|
371
|
+
then
|
372
|
+
SENDMAIL=1
|
373
|
+
touch $LOCK_DIR/rep_running.lck
|
374
|
+
else
|
375
|
+
info "Not sending Email lock file exists"
|
376
|
+
fi
|
377
|
+
|
378
|
+
else
|
379
|
+
info "Replicator and Manager in cluster are running OK"
|
380
|
+
fi
|
381
|
+
|
382
|
+
#Check the processes are online
|
383
|
+
if [ "$SERVICES" == "" ]
|
384
|
+
then
|
385
|
+
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online | grep -v OK | wc -l)
|
386
|
+
if [ $ONLINE -ne 0 ]
|
387
|
+
then
|
388
|
+
error "Services are not online"
|
389
|
+
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
390
|
+
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
391
|
+
then
|
392
|
+
SENDMAIL=1
|
393
|
+
touch $LOCK_DIR/rep_online.lck
|
394
|
+
else
|
395
|
+
info "Not sending Email lock file exists"
|
396
|
+
fi
|
397
|
+
|
398
|
+
else
|
399
|
+
info "Services are online"
|
400
|
+
fi
|
401
|
+
else
|
402
|
+
services=$(echo "$SERVICES" | sed 's/,/ /g')
|
403
|
+
for s in $services
|
404
|
+
do
|
405
|
+
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online -s $s | grep -v OK | wc -l)
|
406
|
+
if [ $ONLINE -ne 0 ]
|
407
|
+
then
|
408
|
+
error "Services are not online @ $s"
|
409
|
+
echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
|
410
|
+
if [ ! -f $LOCK_DIR/rep_online.lck ]
|
411
|
+
then
|
412
|
+
SENDMAIL=1
|
413
|
+
touch $LOCK_DIR/rep_online.lck
|
414
|
+
else
|
415
|
+
info "Not sending Email lock file exists"
|
416
|
+
fi
|
417
|
+
|
418
|
+
else
|
419
|
+
info "Services are online @ $s"
|
420
|
+
fi
|
421
|
+
done
|
422
|
+
fi
|
423
|
+
|
424
|
+
#Check for replicator latency
|
425
|
+
ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_latency -w $LAG -c $LAG | grep -v OK | wc -l)
|
426
|
+
if [ $ONLINE -ne 0 ]
|
427
|
+
then
|
428
|
+
error "Services are Lagging"
|
429
|
+
echo "Cluster Replicator processes are lagging on $HOST - Investigate" >> $LOG
|
430
|
+
if [ ! -f $LOCK_DIR/rep_lag.lck ]
|
431
|
+
then
|
432
|
+
SENDMAIL=1
|
433
|
+
touch $LOCK_DIR/rep_lag.lck
|
434
|
+
else
|
435
|
+
info "Not sending Email lock file exists"
|
436
|
+
fi
|
437
|
+
|
438
|
+
else
|
439
|
+
info "Cluster Replicator is keeping up"
|
440
|
+
fi
|
441
|
+
fi
|
442
|
+
|
443
|
+
#Check the Replicator
|
444
|
+
if [ $REPLICATOR == 1 ]
|
445
|
+
then
|
446
|
+
if [ ! -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
447
|
+
then
|
448
|
+
severe "trepctl not found in $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/ "
|
449
|
+
fi
|
450
|
+
|
451
|
+
AVAILABLE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services | grep "Connection failed" | wc -l)
|
452
|
+
if [ $AVAILABLE -gt 0 ]
|
453
|
+
then
|
454
|
+
error "Replicator process is not running on $REPLICATOR_PORT"
|
455
|
+
echo "Replicator processes is not running on $HOST:$REPLICATOR_PORT - Investigate" >> $LOG
|
456
|
+
if [ ! -f $LOCK_DIR/tr_rep_running.lck ]
|
457
|
+
then
|
458
|
+
SENDMAIL=1
|
459
|
+
touch $LOCK_DIR/tr_rep_running.lck
|
460
|
+
else
|
461
|
+
info "Not sending Email lock file exists"
|
462
|
+
fi
|
463
|
+
|
464
|
+
else
|
465
|
+
info "TR Replicator is running"
|
466
|
+
fi
|
467
|
+
|
468
|
+
|
469
|
+
ONLINE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services| grep state | grep -v ONLINE | wc -l)
|
470
|
+
if [ $ONLINE -gt 0 ]
|
471
|
+
then
|
472
|
+
error "Replicator is down"
|
473
|
+
echo "Replicator processes is not ONLINE on $HOST - Investigate" >> $LOG
|
474
|
+
if [ ! -f $LOCK_DIR/tr_rep_online.lck ]
|
475
|
+
then
|
476
|
+
SENDMAIL=1
|
477
|
+
touch $LOCK_DIR/tr_rep_online.lck
|
478
|
+
else
|
479
|
+
info "Not sending Email lock file exists"
|
480
|
+
fi
|
481
|
+
|
482
|
+
else
|
483
|
+
info "TR Replicator is online"
|
484
|
+
fi
|
485
|
+
|
486
|
+
#Check for latency
|
487
|
+
LATENCY_LIST=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services|grep appliedLatency|cut -d ':' -f2)
|
488
|
+
|
489
|
+
for LATENCY in $LATENCY_LIST
|
490
|
+
do
|
491
|
+
if float_cond "$LATENCY > $LAG"; then
|
492
|
+
error "Replicator is lagging"
|
493
|
+
echo "Replicator processes is behind on $HOST - Investigate" >> $LOG
|
494
|
+
if [ ! -f $LOCK_DIR/tr_rep_lag.lck ]
|
495
|
+
then
|
496
|
+
SENDMAIL=1
|
497
|
+
touch $LOCK_DIR/tr_rep_lag.lck
|
498
|
+
else
|
499
|
+
info "Not sending Email lock file exists"
|
500
|
+
fi
|
501
|
+
else
|
502
|
+
info "Replicator latency ok"
|
503
|
+
fi
|
504
|
+
done
|
505
|
+
|
506
|
+
|
507
|
+
fi
|
508
|
+
|
509
|
+
#Check the disk space
|
510
|
+
if [ $DISK == 1 ]
|
511
|
+
then
|
512
|
+
|
513
|
+
df -HP | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
|
514
|
+
do
|
515
|
+
usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
|
516
|
+
partition=$(echo $output | awk '{ print $2 }' )
|
517
|
+
if [ $usep -ge $DISK_WARNING ]; then
|
518
|
+
error "Running out of disk space on $partition"
|
519
|
+
echo "Running out for disk space on $HOST $partition - Investigate" >> $LOG
|
520
|
+
if [ ! -f $LOCK_DIR/disk.lck ]
|
521
|
+
then
|
522
|
+
SENDMAIL=1
|
523
|
+
touch $LOCK_DIR/disk.lck
|
524
|
+
else
|
525
|
+
info "Not sending Email lock file exists"
|
526
|
+
fi
|
527
|
+
fi
|
528
|
+
done
|
529
|
+
|
530
|
+
fi
|
531
|
+
|
532
|
+
if [ $SENDMAIL == 1 ]
|
533
|
+
then
|
534
|
+
if [ $DEBUG == 1 ]
|
535
|
+
then
|
536
|
+
info "Sending Email to $EMAIL"
|
537
|
+
info "Subject $SUBJECT"
|
538
|
+
cat $LOG
|
539
|
+
fi
|
540
|
+
|
541
|
+
if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
|
542
|
+
then
|
543
|
+
manager_running=$($CONTINUENT_ROOT/tungsten/tungsten-manager/bin/manager status | grep "PID" | wc -l)
|
544
|
+
if [ $manager_running -eq 1 ]; then
|
545
|
+
info "Adding cctrl output to email"
|
546
|
+
echo >> $LOG
|
547
|
+
echo "OUTPUT FROM cctrl ls on $HOST" >> $LOG
|
548
|
+
echo '--------------------------------------------------' >> $LOG
|
549
|
+
echo 'ls' | $CONTINUENT_ROOT/tungsten/tungsten-manager/bin/cctrl -expert >> $LOG
|
550
|
+
echo '--------------------------------------------------' >> $LOG
|
551
|
+
else
|
552
|
+
info 'Manager not running skipping cctrl output'
|
553
|
+
echo "Manager not running unable to gather cctrl output" >> $LOG
|
554
|
+
fi
|
555
|
+
|
556
|
+
fi
|
557
|
+
if [ $REPLICATOR == 1 ]
|
558
|
+
then
|
559
|
+
if [ -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
|
560
|
+
then
|
561
|
+
info "Adding trepctl output to email"
|
562
|
+
echo "OUTPUT FROM trepctl -port $REPLICATOR_PORT status on $HOST" >> $LOG
|
563
|
+
echo '--------------------------------------------------' >> $LOG
|
564
|
+
|
565
|
+
|
566
|
+
$REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services >> $LOG
|
567
|
+
echo '--------------------------------------------------' >> $LOG
|
568
|
+
else
|
569
|
+
info 'trepctl not found'
|
570
|
+
echo "trepctl not found at $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl unable to query for output" >> $LOG
|
571
|
+
fi
|
572
|
+
fi
|
573
|
+
$MAILPROG -s "$SUBJECT" "$EMAIL" < $LOG
|
574
|
+
fi
|
575
|
+
|
576
|
+
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# The script should be run as the tungsten user to ensure the
|
4
|
+
# environment is set correctly. Export the environment variables before
|
5
|
+
# calling the script to modify behavior
|
6
|
+
#
|
7
|
+
|
8
|
+
require "#{File.dirname(__FILE__)}/../lib/ruby/tungsten"
|
9
|
+
|
10
|
+
class CheckTungstenBackups
|
11
|
+
include TungstenScript
|
12
|
+
|
13
|
+
def main
|
14
|
+
status = TI.status()
|
15
|
+
unless status.coordinator() == TI.hostname()
|
16
|
+
nagios_ok("Not running check because this node is not the coordinator")
|
17
|
+
end
|
18
|
+
seconds_since_epoch = TU.cmd_result("date +%s").to_i()
|
19
|
+
most_recent_backup = nil
|
20
|
+
|
21
|
+
status.datasources().each{
|
22
|
+
|ds|
|
23
|
+
begin
|
24
|
+
TU.ssh_result("stat -c\"%n %Y\" #{TI.trepctl_property(status.name(), 'replicator.storage.agent.fs.directory')}/store*.properties 2>/dev/null", ds, TI.user()).split("\n").each{
|
25
|
+
|line|
|
26
|
+
stored_backup=line.split(" ")
|
27
|
+
stored_backup[1] = stored_backup[1].to_i()
|
28
|
+
|
29
|
+
if most_recent_backup == nil || stored_backup[1] > most_recent_backup[:seconds]
|
30
|
+
most_recent_backup = {
|
31
|
+
:hostname => ds,
|
32
|
+
:filename => stored_backup[0],
|
33
|
+
:seconds => stored_backup[1]
|
34
|
+
}
|
35
|
+
end
|
36
|
+
}
|
37
|
+
rescue CommandError
|
38
|
+
end
|
39
|
+
}
|
40
|
+
|
41
|
+
if most_recent_backup == nil
|
42
|
+
nagios_critical("Unable to find a backup on any datasource")
|
43
|
+
end
|
44
|
+
|
45
|
+
age = seconds_since_epoch-most_recent_backup[:seconds]
|
46
|
+
if age > @options[:max_backup_age]
|
47
|
+
nagios_critical("#{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s] is older than #{@options[:max_backup_age]}s")
|
48
|
+
else
|
49
|
+
nagios_ok("The most recent backup is #{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s]")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def configure
|
54
|
+
super()
|
55
|
+
|
56
|
+
description("Check all local datasources to make sure one of them has a backup younger than the max allowed age")
|
57
|
+
add_option(:max_backup_age, {
|
58
|
+
:on => "--max-backup-age String",
|
59
|
+
:help => "Maximum allowed age in seconds of a backup on any machine",
|
60
|
+
:parse => method(:parse_integer_option),
|
61
|
+
:default => 86400
|
62
|
+
})
|
63
|
+
end
|
64
|
+
|
65
|
+
def script_name
|
66
|
+
"check_tungsten_backups"
|
67
|
+
end
|
68
|
+
|
69
|
+
self.new().run()
|
70
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Latency
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script accepts two arguments, {{-w}} and {{-c}}. The {{-w}} flag is
|
7
|
+
# the level at which a warning should be returned. {{-c}} sets the level for
|
8
|
+
# a critical return value. The script uses the maximum latency of any slave
|
9
|
+
# to determine the return value.
|
10
|
+
#
|
11
|
+
OK_STATE=0
|
12
|
+
WARNING_STATE=1
|
13
|
+
CRITICAL_STATE=2
|
14
|
+
THOME=`dirname $0`
|
15
|
+
|
16
|
+
function display_help()
|
17
|
+
{
|
18
|
+
echo "Usage: ./check_tungsten_latency -w warning_level -c critical_level [-h]"
|
19
|
+
echo " -w Throw a warning alert if the maximum latency"
|
20
|
+
echo " is above this level"
|
21
|
+
echo " -c Throw a critical alert if the maximum latency"
|
22
|
+
echo " is above this level"
|
23
|
+
echo " --perfdata Display performance data of the latency"
|
24
|
+
echo " --perslave-perfdata Show performance latency values of each slave."
|
25
|
+
echo " If this is not set the maximum latency will be"
|
26
|
+
echo " displayed in the performace data"
|
27
|
+
echo " -h Display this message"
|
28
|
+
exit 0
|
29
|
+
}
|
30
|
+
|
31
|
+
# We will use this to make some floating point comparisons
|
32
|
+
function float_cond()
|
33
|
+
{
|
34
|
+
local cond=0
|
35
|
+
if [[ $# -gt 0 ]]; then
|
36
|
+
cond=$(echo "$*" | bc -q 2>&1)
|
37
|
+
if [[ $? -ne 0 ]]; then
|
38
|
+
echo "Error: $cond"
|
39
|
+
exit 1
|
40
|
+
fi
|
41
|
+
if [[ -z "$cond" ]]; then cond=0; fi
|
42
|
+
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
43
|
+
fi
|
44
|
+
local stat=$((cond == 0))
|
45
|
+
return $stat
|
46
|
+
}
|
47
|
+
|
48
|
+
warning_level=0
|
49
|
+
critical_level=0
|
50
|
+
perfdata="false"
|
51
|
+
performance_data_default_glue=""
|
52
|
+
performance_data_suffix=""
|
53
|
+
perfdata_allslaves=""
|
54
|
+
|
55
|
+
for arg
|
56
|
+
do
|
57
|
+
delim=""
|
58
|
+
case "$arg" in
|
59
|
+
#translate --gnu-long-options to -g (short options)
|
60
|
+
--perfdata) args="${args}-p ";;
|
61
|
+
--perslave-perfdata) args="${args}-s ";;
|
62
|
+
#pass through anything else
|
63
|
+
*) [[ "${arg:0:1}" == "-" ]] || delim="\""
|
64
|
+
args="${args}${delim}${arg}${delim} ";;
|
65
|
+
esac
|
66
|
+
done
|
67
|
+
|
68
|
+
#Reset the positional parameters to the short options
|
69
|
+
eval set -- $args
|
70
|
+
|
71
|
+
while getopts "w:c:h:nps" Option
|
72
|
+
do
|
73
|
+
case $Option in
|
74
|
+
w )
|
75
|
+
warning_level=$OPTARG
|
76
|
+
;;
|
77
|
+
c )
|
78
|
+
critical_level=$OPTARG
|
79
|
+
;;
|
80
|
+
h )
|
81
|
+
display_help
|
82
|
+
;;
|
83
|
+
p )
|
84
|
+
perfdata="true"
|
85
|
+
;;
|
86
|
+
s )
|
87
|
+
perfdata_allslaves="true"
|
88
|
+
;;
|
89
|
+
esac
|
90
|
+
done
|
91
|
+
if float_cond "$warning_level == 0"; then
|
92
|
+
echo "Error: warning_level has not been set"
|
93
|
+
echo ""
|
94
|
+
display_help
|
95
|
+
fi
|
96
|
+
|
97
|
+
if float_cond "$critical_level == 0"; then
|
98
|
+
echo "Error: critical_level has not been set"
|
99
|
+
echo ""
|
100
|
+
display_help
|
101
|
+
fi
|
102
|
+
|
103
|
+
if [ "$perfdata" == "true" ]; then
|
104
|
+
performance_data_default_glue=" "
|
105
|
+
performance_data_suffix=";$warning_level;$critical_level;;"
|
106
|
+
fi
|
107
|
+
|
108
|
+
error_message=""
|
109
|
+
error_messaage_glue=""
|
110
|
+
performance_data_glue=""
|
111
|
+
performance_data="| "
|
112
|
+
max_latency=0
|
113
|
+
|
114
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
115
|
+
# Check the manager status
|
116
|
+
if [ $manager_running -eq 0 ]; then
|
117
|
+
echo "CRITICAL: Manager is not running"
|
118
|
+
exit $CRITICAL_STATE
|
119
|
+
fi
|
120
|
+
|
121
|
+
latency_values=`echo "ls -l" | ${THOME}/../../tungsten-manager/bin/cctrl | grep -E "REPLICATOR\(role=[relay|slave]|appliedLatency" | tr -d "| "`
|
122
|
+
|
123
|
+
current_slave=""
|
124
|
+
for line in $latency_values
|
125
|
+
do
|
126
|
+
if [[ $current_slave == "" ]]
|
127
|
+
then
|
128
|
+
current_slave=`echo $line | grep "REPLICATOR" | cut -f 1 -d ":"`
|
129
|
+
else
|
130
|
+
applied_latency=`echo $line | grep "appliedLatency" | cut -f 2 -d ":"`
|
131
|
+
|
132
|
+
if float_cond "$applied_latency > $max_latency"; then
|
133
|
+
max_latency=$applied_latency
|
134
|
+
fi
|
135
|
+
|
136
|
+
if float_cond "$applied_latency > $warning_level"; then
|
137
|
+
error_message="$error_message$error_message_glue$current_slave=$applied_latency""s"
|
138
|
+
error_message_glue=", "
|
139
|
+
fi
|
140
|
+
|
141
|
+
if float_cond "$applied_latency == -1"; then
|
142
|
+
error_message="$error_message$error_message_glue$current_slave is missing latency information"
|
143
|
+
error_message_glue=", "
|
144
|
+
fi
|
145
|
+
|
146
|
+
performance_data="$performance_data$performance_data_glue$current_slave=$applied_latency$performance_data_suffix"
|
147
|
+
performance_data_glue="$performance_data_default_glue"
|
148
|
+
current_slave=""
|
149
|
+
fi
|
150
|
+
done
|
151
|
+
|
152
|
+
if [ "$perfdata_allslaves" != "true" ]; then
|
153
|
+
performance_data="| max_latency=${max_latency}$performance_data_suffix"
|
154
|
+
fi
|
155
|
+
|
156
|
+
if [ "$perfdata" == "false" ]; then
|
157
|
+
performance_data=""
|
158
|
+
fi
|
159
|
+
|
160
|
+
if float_cond "$max_latency > $critical_level"; then
|
161
|
+
echo "CRITICAL: $error_message $performance_data"
|
162
|
+
exit $CRITICAL_STATE
|
163
|
+
fi
|
164
|
+
|
165
|
+
if [[ $error_message != "" ]]; then
|
166
|
+
echo "WARNING: $error_message $performance_data"
|
167
|
+
exit $WARNING_STATE
|
168
|
+
fi
|
169
|
+
|
170
|
+
echo "OK: All slaves are running normally (max_latency=${max_latency}) $performance_data "
|
171
|
+
|
172
|
+
exit $OK_STATE
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
#
|
19
|
+
# Simple Bash Script To Check Tungsten Services
|
20
|
+
# Nagios Plugin For NRPE
|
21
|
+
#
|
22
|
+
# This script does not accept any arguments. It will return a warning if any
|
23
|
+
# of Tungsten resources is not in an {{ONLINE}} state. It uses the output of
|
24
|
+
# the {{ls resources}} command to determine the current state.
|
25
|
+
#
|
26
|
+
OK_STATE=0
|
27
|
+
WARNING_STATE=1
|
28
|
+
CRITICAL_STATE=2
|
29
|
+
THOME=`dirname $0`
|
30
|
+
|
31
|
+
error_message=""
|
32
|
+
error_messaage_glue=""
|
33
|
+
offline_count=0
|
34
|
+
dataservice=""
|
35
|
+
skip_shun=0
|
36
|
+
|
37
|
+
function display_help()
|
38
|
+
{
|
39
|
+
echo "Usage: ./check_tungsten_online -s dataservice [-h]"
|
40
|
+
echo " -s The data service you would like to check"
|
41
|
+
echo " -h Display this message"
|
42
|
+
echo " -n Skip Shunned Services"
|
43
|
+
exit 0
|
44
|
+
}
|
45
|
+
|
46
|
+
while getopts "s:h:n" Option
|
47
|
+
do
|
48
|
+
case $Option in
|
49
|
+
h )
|
50
|
+
display_help
|
51
|
+
;;
|
52
|
+
s )
|
53
|
+
dataservice=$OPTARG
|
54
|
+
;;
|
55
|
+
n )
|
56
|
+
skip_shun=1
|
57
|
+
;;
|
58
|
+
esac
|
59
|
+
done
|
60
|
+
|
61
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
62
|
+
# Check the manager status
|
63
|
+
if [ $manager_running -eq 0 ]; then
|
64
|
+
echo "CRITICAL: Manager is not running"
|
65
|
+
exit $CRITICAL_STATE
|
66
|
+
fi
|
67
|
+
|
68
|
+
if [ "$dataservice" == "" ]; then
|
69
|
+
offline_services=`echo "ls resources" | ${THOME}/../../tungsten-manager/bin/cctrl | grep \| | grep : | grep -v ONLINE | tr -d "| " | cut -f 1,2 -d ":"`
|
70
|
+
else
|
71
|
+
offline_services=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep "(\(composite \)\?master\|(\(composite \)\?slave\|(relay" | grep -v ONLINE | tr -d "|" | cut -f 1 -d "("`
|
72
|
+
fi
|
73
|
+
|
74
|
+
for offline_service in $offline_services
|
75
|
+
do
|
76
|
+
offline_count=$(($offline_count+1))
|
77
|
+
error_message="$error_message$error_message_glue$offline_service"
|
78
|
+
error_message_glue=", "
|
79
|
+
done
|
80
|
+
|
81
|
+
if [ $offline_count -gt 0 ]
|
82
|
+
then
|
83
|
+
echo "CRITICAL: $error_message are not ONLINE"
|
84
|
+
exit $CRITICAL_STATE
|
85
|
+
fi
|
86
|
+
|
87
|
+
|
88
|
+
if [ $skip_shun -eq 0 ]
|
89
|
+
then
|
90
|
+
if [ "$dataservice" == "" ]; then
|
91
|
+
shunned=`echo "ls" | ${THOME}/../../tungsten-manager/bin/cctrl | grep 'SHUNNED' | wc -l`
|
92
|
+
else
|
93
|
+
shunned=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep 'SHUNNED' | wc -l`
|
94
|
+
fi
|
95
|
+
|
96
|
+
if [ $shunned -gt 0 ]
|
97
|
+
then
|
98
|
+
echo "CRITICAL: Dataservices are shunned"
|
99
|
+
exit $CRITICAL_STATE
|
100
|
+
fi
|
101
|
+
fi
|
102
|
+
|
103
|
+
|
104
|
+
echo "OK: All services are online"
|
105
|
+
exit $OK_STATE
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Policy
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script does not accept any arguments. It will return error if the
|
7
|
+
# cluster is in maintenance mode
|
8
|
+
#
|
9
|
+
OK_STATE=0
|
10
|
+
WARNING_STATE=1
|
11
|
+
CRITICAL_STATE=2
|
12
|
+
THOME=`dirname $0`
|
13
|
+
|
14
|
+
error_message=""
|
15
|
+
error_messaage_glue=""
|
16
|
+
offline_count=0
|
17
|
+
dataservice=""
|
18
|
+
|
19
|
+
function display_help()
|
20
|
+
{
|
21
|
+
echo "Usage: ./check_tungsten_policy -s dataservice [-h]"
|
22
|
+
echo " -s The data service you would like to check"
|
23
|
+
echo " -h Display this message"
|
24
|
+
exit 0
|
25
|
+
}
|
26
|
+
|
27
|
+
while getopts "s:h" Option
|
28
|
+
do
|
29
|
+
case $Option in
|
30
|
+
h )
|
31
|
+
display_help
|
32
|
+
;;
|
33
|
+
s )
|
34
|
+
dataservice=$OPTARG
|
35
|
+
;;
|
36
|
+
esac
|
37
|
+
done
|
38
|
+
|
39
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
40
|
+
# Check the manager status
|
41
|
+
if [ $manager_running -eq 0 ]; then
|
42
|
+
echo "CRITICAL: Manager is not running"
|
43
|
+
exit $CRITICAL_STATE
|
44
|
+
fi
|
45
|
+
|
46
|
+
if [ "$dataservice" == "" ]; then
|
47
|
+
maint_mode=`echo "ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
|
48
|
+
else
|
49
|
+
maint_mode=`echo "use $dataservice; ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
|
50
|
+
fi
|
51
|
+
|
52
|
+
|
53
|
+
if [ $maint_mode -gt 0 ]
|
54
|
+
then
|
55
|
+
echo "CRITICAL: Cluster is in Maintenance mode"
|
56
|
+
exit $CRITICAL_STATE
|
57
|
+
fi
|
58
|
+
|
59
|
+
|
60
|
+
echo "OK: Cluster is in Automatic Mode"
|
61
|
+
exit $OK_STATE
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Progress
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script accepts three arguments, {{-w}}, {{-c}} and {{-t}}. The {{-w}} flag
|
7
|
+
# is the level at which a warning should be returned. {{-c}} sets the level for
|
8
|
+
# a critical return value. The {{-t}} flag sets the amount of time used to wait
|
9
|
+
# between monitoring the progress of the cluster. The script uses the difference
|
10
|
+
# between the values to determine if a warning or critical alert should be
|
11
|
+
# issued
|
12
|
+
#
|
13
|
+
OK_STATE=0
|
14
|
+
WARNING_STATE=1
|
15
|
+
CRITICAL_STATE=2
|
16
|
+
THOME=`dirname $0`
|
17
|
+
|
18
|
+
function display_help()
|
19
|
+
{
|
20
|
+
echo "Usage: ./check_tungsten_progress -t time [-h]"
|
21
|
+
echo " -t The number of seconds to wait when monitoring progress"
|
22
|
+
echo " -h Display this message"
|
23
|
+
exit 0
|
24
|
+
}
|
25
|
+
|
26
|
+
# We will use this to make some floating point comparisons
|
27
|
+
function float_cond()
|
28
|
+
{
|
29
|
+
local cond=0
|
30
|
+
if [[ $# -gt 0 ]]; then
|
31
|
+
cond=$(echo "$*" | bc -q 2>&1)
|
32
|
+
if [[ $? -ne 0 ]]; then
|
33
|
+
echo "Error: $cond"
|
34
|
+
exit 1
|
35
|
+
fi
|
36
|
+
if [[ -z "$cond" ]]; then cond=0; fi
|
37
|
+
if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
|
38
|
+
fi
|
39
|
+
local stat=$((cond == 0))
|
40
|
+
return $stat
|
41
|
+
}
|
42
|
+
|
43
|
+
time_period=1
|
44
|
+
while getopts "t:h" Option
|
45
|
+
do
|
46
|
+
case $Option in
|
47
|
+
t)
|
48
|
+
time_period=$OPTARG
|
49
|
+
;;
|
50
|
+
h )
|
51
|
+
display_help
|
52
|
+
;;
|
53
|
+
esac
|
54
|
+
done
|
55
|
+
|
56
|
+
if float_cond "$time_period == 0"; then
|
57
|
+
echo "Error: time_period has not been set"
|
58
|
+
echo ""
|
59
|
+
display_help
|
60
|
+
fi
|
61
|
+
|
62
|
+
is_online=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "state" | grep "ONLINE" | wc -l`
|
63
|
+
if float_cond "$is_online == 0"; then
|
64
|
+
echo "CRITICAL: Replicator is not ONLINE"
|
65
|
+
exit $CRITICAL_STATE
|
66
|
+
fi
|
67
|
+
|
68
|
+
pre_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
|
69
|
+
echo "cluster heartbeat" | ${THOME}/../../tungsten-manager/bin/cctrl > /dev/null
|
70
|
+
sleep $time_period
|
71
|
+
post_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
|
72
|
+
|
73
|
+
progress_number=`echo "$post_progress_number - $pre_progress_number" | bc -q 2>/dev/null`
|
74
|
+
|
75
|
+
if float_cond "$progress_number < 1"; then
|
76
|
+
echo "WARNING: Replicator did not show progress"
|
77
|
+
exit $WARNING_STATE
|
78
|
+
fi
|
79
|
+
|
80
|
+
echo "OK: Replicator is making progress"
|
81
|
+
exit $OK_STATE
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Simple Bash Script To Check Tungsten Services
|
4
|
+
# Nagios Plugin For NRPE
|
5
|
+
#
|
6
|
+
# This script accepts two arguments, {{-r}} and {{-c}}. The {{-r}} argument
|
7
|
+
# will tell the script to check that the replicator and manager services are
|
8
|
+
# running. The {{-c}} argument will check the connector service, the
|
9
|
+
# arguments may be given individually or together. If any service is not
|
10
|
+
# running, the script will return a critical error.
|
11
|
+
#
|
12
|
+
OK_STATE=0
|
13
|
+
WARNING_STATE=1
|
14
|
+
CRITICAL_STATE=2
|
15
|
+
THOME=`dirname $0`
|
16
|
+
|
17
|
+
function display_help
|
18
|
+
{
|
19
|
+
echo "Usage: ./check_tungsten_latency [-r] [-c] [-h]"
|
20
|
+
echo " -r Check the replicator and manager services"
|
21
|
+
echo " -c Check the connector service"
|
22
|
+
echo " -h Display this message"
|
23
|
+
exit 0
|
24
|
+
}
|
25
|
+
|
26
|
+
services_checked=""
|
27
|
+
services_checked_glue=""
|
28
|
+
services_stopped=""
|
29
|
+
services_stopped_glue=""
|
30
|
+
stopped_count=0
|
31
|
+
|
32
|
+
while getopts "rch" Option
|
33
|
+
do
|
34
|
+
case $Option in
|
35
|
+
r )
|
36
|
+
# Mark that the replicator and manager were checked
|
37
|
+
services_checked="${services_checked}${services_checked_glue}Replicator, Manager"
|
38
|
+
services_checked_glue=", "
|
39
|
+
|
40
|
+
replicator_running=`${THOME}/../../tungsten-replicator/bin/replicator status | grep "PID" | wc -l`
|
41
|
+
# Check the replicator status
|
42
|
+
if [ $replicator_running -eq 0 ]; then
|
43
|
+
services_stopped="${services_stopped}${services_stopped_glue}Replicator"
|
44
|
+
services_stopped_glue=", "
|
45
|
+
stopped_count=$(($stopped_count+1))
|
46
|
+
fi
|
47
|
+
|
48
|
+
manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
|
49
|
+
# Check the manager status
|
50
|
+
if [ $manager_running -eq 0 ]; then
|
51
|
+
services_stopped="${services_stopped}${services_stopped_glue}Manager"
|
52
|
+
services_stopped_glue=", "
|
53
|
+
stopped_count=$(($stopped_count+1))
|
54
|
+
fi
|
55
|
+
;;
|
56
|
+
c )
|
57
|
+
# Mark that the connector was checked
|
58
|
+
services_checked="${services_checked}${services_checked_glue}Connector"
|
59
|
+
services_checked_glue=", "
|
60
|
+
|
61
|
+
connector_running=`${THOME}/../../tungsten-connector/bin/connector status | grep "PID" | wc -l`
|
62
|
+
# Check the connector status
|
63
|
+
if [ $connector_running -eq 0 ]; then
|
64
|
+
services_stopped="${services_stopped}${services_stopped_glue}Connector"
|
65
|
+
services_stopped_glue=", "
|
66
|
+
stopped_count=$(($stopped_count+1))
|
67
|
+
fi
|
68
|
+
;;
|
69
|
+
h )
|
70
|
+
display_help
|
71
|
+
;;
|
72
|
+
esac
|
73
|
+
done
|
74
|
+
|
75
|
+
# One of the services isn't running
|
76
|
+
if [ $stopped_count -gt 0 ]
|
77
|
+
then
|
78
|
+
if [ $stopped_count -gt 1 ]; then
|
79
|
+
echo "CRITICAL: $services_stopped are not running"
|
80
|
+
else
|
81
|
+
echo "CRITICAL: $services_stopped is not running"
|
82
|
+
fi
|
83
|
+
|
84
|
+
exit $CRITICAL_STATE
|
85
|
+
fi
|
86
|
+
|
87
|
+
# Ensure that something was checked
|
88
|
+
if [[ $services_checked == "" ]]; then
|
89
|
+
echo "CRITICAL: No services were checked"
|
90
|
+
exit $CRITICAL_STATE
|
91
|
+
fi
|
92
|
+
|
93
|
+
# Everything is running
|
94
|
+
echo "OK: All services (${services_checked}) are running"
|
95
|
+
exit $OK_STATE
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorConnector
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
unless TI.is_running?("connector")
|
35
|
+
critical("The Tungsten Connector is not running")
|
36
|
+
end
|
37
|
+
|
38
|
+
begin
|
39
|
+
TU.cmd_result("echo '#{opt(:statement)}' | mysql --defaults-file=#{opt(:defaults_file)} -h#{TI.hostname()}")
|
40
|
+
rescue CommandError => ce
|
41
|
+
TU.debug(ce)
|
42
|
+
critical("A connection to the Tungsten Connector could not be created")
|
43
|
+
end
|
44
|
+
|
45
|
+
ok("The connection was successfully created")
|
46
|
+
end
|
47
|
+
|
48
|
+
def configure
|
49
|
+
super()
|
50
|
+
|
51
|
+
add_option(:defaults_file, {
|
52
|
+
:on => "--defaults-file String",
|
53
|
+
:help => "The defaults file to use when connecting to MySQL"
|
54
|
+
})
|
55
|
+
|
56
|
+
add_option(:statement, {
|
57
|
+
:on => "--statement String",
|
58
|
+
:help => "The command to run against the Tungsten Connector",
|
59
|
+
:default => "tungsten connection status",
|
60
|
+
})
|
61
|
+
end
|
62
|
+
|
63
|
+
def validate
|
64
|
+
super()
|
65
|
+
|
66
|
+
unless TU.is_valid?()
|
67
|
+
return TU.is_valid?()
|
68
|
+
end
|
69
|
+
|
70
|
+
unless TI.is_connector?()
|
71
|
+
unknown("This server is not a Tungsten Connector")
|
72
|
+
end
|
73
|
+
|
74
|
+
if opt(:defaults_file).to_s() == ""
|
75
|
+
defaults_file = Tempfile.new("tungsten_nagios_connector")
|
76
|
+
opt(:defaults_file, defaults_file.path())
|
77
|
+
|
78
|
+
defaults_file.puts("[client]")
|
79
|
+
defaults_file.puts("user=#{TI.setting(TI.setting_key(CONNECTORS, "connector_user"))}")
|
80
|
+
defaults_file.puts("password=#{TI.setting(TI.setting_key(CONNECTORS, "connector_password"))}")
|
81
|
+
defaults_file.puts("port=#{TI.setting(TI.setting_key(CONNECTORS, "connector_listen_port"))}")
|
82
|
+
defaults_file.flush()
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def script_name
|
87
|
+
"tungsten_nagios_connector"
|
88
|
+
end
|
89
|
+
|
90
|
+
self.new().run()
|
91
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: continuent-monitors-nagios
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Continuent
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: continuent-tools-monitoring
|
@@ -27,10 +27,26 @@ dependencies:
|
|
27
27
|
description:
|
28
28
|
email: info@continuent.com
|
29
29
|
executables:
|
30
|
+
- check_tungsten.sh
|
31
|
+
- check_tungsten_backups
|
32
|
+
- check_tungsten_latency
|
33
|
+
- check_tungsten_online
|
34
|
+
- check_tungsten_policy
|
35
|
+
- check_tungsten_progress
|
36
|
+
- check_tungsten_services
|
37
|
+
- tungsten_nagios_connector
|
30
38
|
- tungsten_nagios_monitor_threads
|
31
39
|
extensions: []
|
32
40
|
extra_rdoc_files: []
|
33
41
|
files:
|
42
|
+
- bin/check_tungsten.sh
|
43
|
+
- bin/check_tungsten_backups
|
44
|
+
- bin/check_tungsten_latency
|
45
|
+
- bin/check_tungsten_online
|
46
|
+
- bin/check_tungsten_policy
|
47
|
+
- bin/check_tungsten_progress
|
48
|
+
- bin/check_tungsten_services
|
49
|
+
- bin/tungsten_nagios_connector
|
34
50
|
- bin/tungsten_nagios_monitor_threads
|
35
51
|
- LICENSE
|
36
52
|
- README.md
|
@@ -54,7 +70,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
54
70
|
version: '0'
|
55
71
|
requirements: []
|
56
72
|
rubyforge_project:
|
57
|
-
rubygems_version: 2.0.
|
73
|
+
rubygems_version: 2.0.14
|
58
74
|
signing_key:
|
59
75
|
specification_version: 4
|
60
76
|
summary: Continuent Tungsten monitoring scripts for Nagios
|