continuent-monitors-nagios 0.0.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2014 Continuent, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #
16
+ # Initial developer(s): Jeff Mace
17
+ # Contributor(s):
18
+
19
+ begin
20
+ require 'rubygems'
21
+ gem 'continuent-tools-core'
22
+ rescue LoadError
23
+ end
24
+
25
+ require 'continuent-tools-core'
26
+ require 'continuent-tools-nagios-monitor'
27
+
28
+ class ContinuentNagiosMonitorPolicy
29
+ include TungstenScript
30
+ include TungstenNagiosMonitor
31
+ private
32
+
33
+ def main
34
+ unless TI.is_manager?()
35
+ critical("The server is not a Continuent Tungsten Manager")
36
+ end
37
+
38
+ unless TI.is_running?("manager")
39
+ critical("The Continuent Tungsten Manager is not running")
40
+ end
41
+
42
+ status = TI.status()
43
+ if status.policy() == "AUTOMATIC"
44
+ ok("Cluster is in #{status.policy()} mode")
45
+ else
46
+ critical("Cluster is in #{status.policy()} mode")
47
+ end
48
+ end
49
+
50
+ def configure
51
+ super()
52
+
53
+ description("Check that the local cluster is running in the AUTOMATIC policy")
54
+ end
55
+
56
+ def script_name
57
+ "tungsten_nagios_policy"
58
+ end
59
+
60
+ self.new().run()
61
+ end
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2014 Continuent, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #
16
+ # Initial developer(s): Jeff Mace
17
+ # Contributor(s):
18
+
19
+ begin
20
+ require 'rubygems'
21
+ gem 'continuent-tools-core'
22
+ rescue LoadError
23
+ end
24
+
25
+ require 'continuent-tools-core'
26
+ require 'continuent-tools-nagios-monitor'
27
+
28
+ class ContinuentNagiosMonitorProgress
29
+ include TungstenScript
30
+ include TungstenNagiosMonitor
31
+ private
32
+
33
+ def main
34
+ unless TI.is_replicator?()
35
+ critical("The server is not a Tungsten Replicator")
36
+ end
37
+
38
+ unless TI.is_running?("replicator")
39
+ critical("The Tungsten Replicator is not running")
40
+ end
41
+
42
+ if TI.is_commercial?()
43
+ unless TI.is_manager?()
44
+ critical("The server is not a Continuent Tungsten Manager")
45
+ end
46
+
47
+ unless TI.is_running?("manager")
48
+ critical("The Continuent Tungsten Manager is not running")
49
+ end
50
+ end
51
+
52
+ opt_default(:service, TI.default_dataservice())
53
+ if opt(:service) == nil
54
+ critical("The --service option was not given")
55
+ end
56
+
57
+ unless TI.trepctl_value(opt(:service), "state") == "ONLINE"
58
+ critical("The #{opt(:service)} replication service is not ONLINE")
59
+ end
60
+
61
+ pre_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
62
+
63
+ if TI.is_commercial?()
64
+ TI.ensure_cctrl("cluster heartbeat")
65
+ end
66
+
67
+ if opt(:delay).is_a?(Integer)
68
+ TU.debug("Go to sleep for #{opt(:delay)} seconds")
69
+ sleep(opt(:delay))
70
+ end
71
+
72
+ post_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
73
+ difference = post_seqno - pre_seqno
74
+
75
+ if difference > 0
76
+ ok("Tungsten Replicator #{opt(:service)} service is making progress")
77
+ else
78
+ critical("Tungsten Replicator #{opt(:service)} service did not show progress")
79
+ end
80
+ end
81
+
82
+ def configure
83
+ super()
84
+
85
+ description("Check that the replication service is making progress. For Continuent Tungsten installations, a heartbeat command will be run to force activity.")
86
+
87
+ add_option(:delay, {
88
+ :on => "--delay String",
89
+ :help => "The number of seconds to wait when monitoring progress",
90
+ :parse => method(:parse_integer_option),
91
+ :default => 1
92
+ })
93
+
94
+ add_option(:service, {
95
+ :on => "--service String",
96
+ :help => "The replication service or cluster to check"
97
+ })
98
+ end
99
+
100
+ def script_name
101
+ "tungsten_nagios_progress"
102
+ end
103
+
104
+ self.new().run()
105
+ end
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2014 Continuent, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #
16
+ # Initial developer(s): Jeff Mace
17
+ # Contributor(s):
18
+
19
+ begin
20
+ require 'rubygems'
21
+ gem 'continuent-tools-core'
22
+ rescue LoadError
23
+ end
24
+
25
+ require 'continuent-tools-core'
26
+ require 'continuent-tools-nagios-monitor'
27
+
28
+ class ContinuentNagiosMonitorServices
29
+ include TungstenScript
30
+ include TungstenNagiosMonitor
31
+ private
32
+
33
+ def main
34
+ missing_services = []
35
+ checked_services = []
36
+
37
+ [
38
+ "replicator",
39
+ "manager",
40
+ "connector"
41
+ ].each{
42
+ |svc|
43
+
44
+ unless TI.setting(TI.setting_key(HOSTS, "host_enable_" + svc)) == "true"
45
+ next
46
+ end
47
+
48
+ checked_services << svc
49
+
50
+ unless TI.is_running?(svc)
51
+ missing_services << svc
52
+ end
53
+ }
54
+
55
+ if missing_services.size() > 0
56
+ critical("#{missing_services.join(', ')} #{TU.pluralize(missing_services, 'is', 'are')} not running")
57
+ elsif checked_services.size() == 0
58
+ critical("No services were checked")
59
+ else
60
+ ok("All services (#{checked_services.join(', ')}) are running")
61
+ end
62
+ end
63
+
64
+ def configure
65
+ super()
66
+
67
+ description("Check that all configured services for the installation are running.")
68
+ end
69
+
70
+ def script_name
71
+ "tungsten_nagios_services"
72
+ end
73
+
74
+ self.new().run()
75
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: continuent-monitors-nagios
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Continuent
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-04 00:00:00.000000000 Z
11
+ date: 2014-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: continuent-tools-monitoring
@@ -16,38 +16,36 @@ dependencies:
16
16
  requirements:
17
17
  - - '>='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.0
19
+ version: 0.5.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '>='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.0
26
+ version: 0.5.0
27
27
  description:
28
28
  email: info@continuent.com
29
29
  executables:
30
- - check_tungsten.sh
31
- - check_tungsten_backups
32
- - check_tungsten_latency
33
- - check_tungsten_online
34
- - check_tungsten_policy
35
- - check_tungsten_progress
36
- - check_tungsten_services
30
+ - tungsten_nagios_backups
37
31
  - tungsten_nagios_connector
32
+ - tungsten_nagios_latency
38
33
  - tungsten_nagios_monitor_threads
34
+ - tungsten_nagios_online
35
+ - tungsten_nagios_policy
36
+ - tungsten_nagios_progress
37
+ - tungsten_nagios_services
39
38
  extensions: []
40
39
  extra_rdoc_files: []
41
40
  files:
42
- - bin/check_tungsten.sh
43
- - bin/check_tungsten_backups
44
- - bin/check_tungsten_latency
45
- - bin/check_tungsten_online
46
- - bin/check_tungsten_policy
47
- - bin/check_tungsten_progress
48
- - bin/check_tungsten_services
41
+ - bin/tungsten_nagios_backups
49
42
  - bin/tungsten_nagios_connector
43
+ - bin/tungsten_nagios_latency
50
44
  - bin/tungsten_nagios_monitor_threads
45
+ - bin/tungsten_nagios_online
46
+ - bin/tungsten_nagios_policy
47
+ - bin/tungsten_nagios_progress
48
+ - bin/tungsten_nagios_services
51
49
  - LICENSE
52
50
  - README.md
53
51
  homepage: https://github.com/continuent/continuent-monitors-nagios
@@ -1,576 +0,0 @@
1
- #!/bin/bash
2
-
3
- #TODO
4
- #Work out the cluster names in a Composite DS
5
- #determine the individulal services in a replicator so we can print out better output (status on each)
6
- #Remove host logging - stop duplicate emails across multi hosts?
7
-
8
- HOST=`hostname`
9
-
10
- #Start Configuration Options. - These can be overridden by command line options or from $CONTINUENT_ROOT/share/check_tungsten.cfg
11
- CONNECTOR=0 #If this host is running a connector set to 1 otherwise 0
12
- CLUSTER=0 #If this host is running a cluster set to 1 otherwise 0
13
- REPLICATOR=0 #If this host is running a replicator set to 1 otherwise 0
14
- REPLICATOR_PORT=10000 #Replicator Port
15
- REPLICATOR_HOME=/opt/continuent/ #Home dir for Replicator
16
- SERVICES='' #Name of the individual clusters in a composite DS
17
- EMAIL='' #Set email address here or pass via the email= command line option
18
- DISK=0 #Check Disk space
19
- CHECK_ELB=0 #Enable check for ELB socket check
20
-
21
- SUBJECT="Error : Problems exist with the Tungsten Services on $HOST"
22
- LOCK_TIMEOUT=180 # Only send a Email every x minutes for a specific
23
- # problem, stop spamming from the script
24
- LAG=60 # Slave lag to report on
25
- CONNECTOR_TIMEOUT=10 # No of seconds to wait for a connector response
26
- DISK_WARNING=80 # % full to send a warning
27
- SENDMAILBIN=/usr/sbin/sendmail
28
- #End Configuration Options
29
-
30
- SENDMAIL=0
31
- DEBUG=0
32
- LOG=/opt/continuent/share/check_tungsten.log
33
- LOCK_DIR=/opt/continuent/share/tungsten_locks
34
-
35
- function float_cond()
36
- {
37
- local cond=0
38
- if [[ $# -gt 0 ]]; then
39
- cond=$(echo "$*" | bc -q 2>&1)
40
- if [[ $? -ne 0 ]]; then
41
- echo "Error: $cond"
42
- exit 1
43
- fi
44
- if [[ -z "$cond" ]]; then cond=0; fi
45
- if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
46
- fi
47
- local stat=$((cond == 0))
48
- return $stat
49
- }
50
-
51
- info ()
52
- {
53
- if [ $DEBUG == 1 ]; then echo "INFO : $1"; fi
54
- }
55
- error ()
56
- {
57
- if [ $DEBUG == 1 ]; then echo "ERROR : $1"; fi
58
- }
59
- severe ()
60
- {
61
- echo "SEVERE : $1"
62
- exit 1
63
- }
64
- getSetting ()
65
- {
66
- CFG=$CONTINUENT_ROOT/conf/tungsten.cfg
67
- if [ ! -f $CFG ]
68
- then
69
- severe "Unable to find $CFG"
70
- fi
71
- getSettingValue=""
72
- getSettingValue=$(grep "\"$1\"" $CFG| cut -d ':' -f2 | head -1|sed 's/,//g'|sed 's/"//g'|sed 's/ //g')
73
- if [ -z $getSettingValue ]
74
- then
75
- severe "Unable to find $1 in $CFG"
76
- fi
77
- if [ "$getSettingValue" == '' ]
78
- then
79
- severe "Unable to find $1 in $CFG"
80
- fi
81
- echo "$getSettingValue"
82
- }
83
-
84
-
85
- # Load any continuent variables
86
-
87
- if [ -z $CONTINUENT_ROOT ]
88
- then
89
- [ -f "$HOME/.bash_profile" ] && . "$HOME/.bash_profile"
90
- [ -f "$HOME/.profile" ] && . "$HOME/.profile"
91
- fi
92
-
93
- function sOpt()
94
- {
95
- $1=1
96
- info "$1 switched on via command line"
97
- }
98
-
99
- function mOpt()
100
- {
101
-
102
- for i in $(echo $2 | tr "=" "\n")
103
- do
104
- if [ $i != '$3' ]
105
- then
106
- $1=$i
107
- fi
108
- done
109
- }
110
-
111
- #Parse the command line options
112
-
113
- for arg in "$@"
114
- do
115
- case "$arg" in
116
- -v) DEBUG=1
117
- info "Debug mode set"
118
- ;;
119
- -vv) DEBUG=1
120
- info "INFO : Extended Debug mode set"
121
- set -x
122
- ;;
123
- cluster) CLUSTER=1
124
- info "CLUSTER switched on via command line"
125
- ;;
126
- connector) CONNECTOR=1
127
- info "CONNECTOR switched on via command line"
128
- ;;
129
- replicator) REPLICATOR=1
130
- info "REPLICATOR switched on via command line"
131
- ;;
132
- check_elb) CHECK_ELB=1
133
- info "CHECK_ELB switched on via command line"
134
- ;;
135
- replicator_port*) for i in $(echo $arg | tr "=" "\n")
136
- do
137
- if [ $i != 'replicator_port' ]
138
- then
139
- REPLICATOR_PORT=$i
140
- fi
141
- done
142
-
143
- info "REPLICATOR_PORT - $REPLICATOR_PORT - switched on via command line"
144
- ;;
145
- replicator_home*) for i in $(echo $arg | tr "=" "\n")
146
- do
147
- if [ $i != 'replicator_home' ]
148
- then
149
- REPLICATOR_HOME=$i
150
- fi
151
- done
152
-
153
- info "REPLICATOR_HOME - $REPLICATOR_HOME - switched on via command line"
154
- ;;
155
- services*) for i in $(echo $arg | tr "=" "\n")
156
- do
157
- if [ $i != 'services' ]
158
- then
159
- SERVICES=$i
160
- fi
161
- done
162
- info "SERVICES $SERVICES passed via the command line"
163
- ;;
164
- email*) for i in $(echo $arg | tr "=" "\n")
165
- do
166
- if [ $i != 'email' ]
167
- then
168
- EMAIL=$i
169
- fi
170
- done
171
- info "EMAIL $EMAIL passed via the command line"
172
- ;;
173
- config*) for i in $(echo $arg | tr "=" "\n")
174
- do
175
- if [ $i != 'config' ]
176
- then
177
- FILE=$i
178
- fi
179
- done
180
- info "Config File $FILE passed via the command line"
181
- ;;
182
- disk) DISK=1
183
- info "DISK switched on via command line"
184
- ;;
185
- *)
186
- echo "Unknown command line option passed $arg"
187
- echo "Valid options are -v,cluster,connector,replicator,replicator_port=??,services=??,email=??,config=??"
188
- exit 1
189
- esac
190
-
191
-
192
- done
193
-
194
-
195
- if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
196
- then
197
- if [ -z $CONTINUENT_ROOT ]
198
- then
199
- severe "$CONTINUENT_ROOT is not set - unable to continue"
200
- fi
201
- if [ ! -f $CONTINUENT_ROOT/share/env.sh ]
202
- then
203
- severe "Unable to find env.sh in $CONTINUENT_ROOT/share"
204
- fi
205
-
206
- . "$CONTINUENT_ROOT/share/env.sh"
207
-
208
- #Load any default settings from $CONTINUENT_ROOT/share/check_tungsten.cfg
209
- CFG=$CONTINUENT_ROOT/share/check_tungsten.cfg
210
-
211
- if [ -f $CFG ]
212
- then
213
- info "Loading settings from $CFG"
214
- . "$CFG"
215
- fi
216
- if [ -z "$MYSQL" ]
217
- then
218
- MYSQL=`which mysql 2>/dev/null`
219
-
220
- if [ "$MYSQL" == "" ]
221
- then
222
- severe " Unable to the mysql command line program"
223
- fi
224
- fi
225
- fi
226
-
227
- #If a file is passed from the command line load any variables from there
228
- if [ ! -z $FILE ]
229
- then
230
- if [ ! -f $FILE ]
231
- then
232
- severe "The file specified in the command line $FILE does not exist"
233
- fi
234
-
235
- info "Loading settings from $FILE"
236
- . "$FILE"
237
- fi
238
-
239
- #Parameter and host validation
240
-
241
- BC=`which bc 2>/dev/null`
242
-
243
- if [ "$BC" == "" ]
244
- then
245
- severe " Unable to find the command bc - please install"
246
- fi
247
-
248
-
249
- if [ "$EMAIL" == "" ]
250
- then
251
- severe " email must be specified"
252
- fi
253
-
254
- if [[ "$CONNECTOR" == 0 && "$CLUSTER" == 0 && "$REPLICATOR" == 0 ]]
255
- then
256
- severe " No option specified, select either connector, cluster or replicator"
257
- fi
258
-
259
- if [ -d $LOCK_DIR ]
260
- then
261
- if [ ! -w $LOCK_DIR ]
262
- then
263
- severe " The locks dir $LOCK_DIR is not writable"
264
- fi
265
- else
266
- info "Creating locks dir"
267
- mkdir $LOCK_DIR
268
- fi
269
-
270
- if [ -z "$MAILPROG" ]
271
- then
272
- MAILPROG=`which mail 2>/dev/null`
273
-
274
- if [ "$MAILPROG" == "" ]
275
- then
276
- severe " Unable to find a mail program"
277
- fi
278
- fi
279
-
280
- if [ -z "$SENDMAILBIN" ]
281
- then
282
- SENDMAILBIN=`which sendmail 2>/dev/null`
283
-
284
- if [ "$SENDMAILBIN" == "" ]
285
- then
286
- severe " Unable to find a sendmail program"
287
- fi
288
- fi
289
-
290
- if [ -f $LOG ]
291
- then
292
- rm $LOG
293
- fi
294
-
295
- #Expire old Locks
296
- info "Deleting Locks older than $LOCK_TIMEOUT min"
297
- find $LOCK_DIR/* -type f -mmin +$LOCK_TIMEOUT -delete 2> /dev/null
298
-
299
- #Check the connector status
300
- if [ $CONNECTOR == 1 ]
301
- then
302
- connector_ok_to_allow_elb=0
303
- info "Running Connector Tests"
304
- CONN=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -c| grep -v OK | wc -l)
305
- if [ $CONN -ne 0 ]
306
- then
307
- error " Connector is not running"
308
- echo "Connector is not running on $HOST - Investigate" >> $LOG
309
- if [ ! -f $LOCK_DIR/con_running.lck ]
310
- then
311
- SENDMAIL=1
312
- touch $LOCK_DIR/con_running.lck
313
- else
314
- info "Not sending Email lock file exists"
315
- fi
316
- else
317
- info "Connector is running OK"
318
-
319
- TIMEOUT=`which timeout 2>/dev/null`
320
-
321
- if [ "$TIMEOUT" == "" ]
322
- then
323
- info "timeout command not found - unable to check if the connector is responding"
324
- else
325
- info "Checking Connector is responding to queries"
326
- CON_USER=$(getSetting connector_user)
327
- CON_PW=$(getSetting connector_password)
328
- CON_PORT=$(getSetting connector_listen_port)
329
- CHECK=$(timeout -s HUP $CONNECTOR_TIMEOUT $MYSQL -P$CON_PORT -u $CON_USER -p$CON_PW -h $HOSTNAME --skip-column-names -Be"select 'ALIVE'")
330
- if [ "$CHECK" != 'ALIVE' ]
331
- then
332
- error 'Unable to connect to connector'
333
- echo "Connector is not responding on $HOST - Investigate" >> $LOG
334
- connector_ok=0
335
- if [ ! -f $LOCK_DIR/con_responding.lck ]
336
- then
337
- SENDMAIL=1
338
- touch $LOCK_DIR/con_responding.lck
339
- else
340
- info "Not sending Email lock file exists"
341
- fi
342
- else
343
- info 'Connector is alive'
344
- connector_ok_to_allow_elb=1
345
- fi
346
- fi
347
- fi
348
-
349
- if [ $CHECK_ELB == 1 ]
350
- then
351
- if [ -f /etc/xinetd.d/disabled/connectorchk ] && [ $connector_ok_to_allow_elb == 1 ]
352
- then
353
-
354
- sudo mv /etc/xinetd.d/disabled/connectorchk /etc/xinetd.d/
355
- sudo service xinetd reload
356
- fi
357
- fi
358
- fi
359
-
360
- #Check the cluster Status
361
- if [ $CLUSTER == 1 ]
362
- then
363
- #Check the processes are running
364
- info "Running Cluster Tests"
365
- REPL=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -r| grep -v OK | wc -l)
366
- if [ $REPL -ne 0 ]
367
- then
368
- error " Replicator or Manager in cluster is not running"
369
- echo "Replicator or Manager in cluster is not running on $HOST - Investigate" >> $LOG
370
- if [ ! -f $LOCK_DIR/rep_running.lck ]
371
- then
372
- SENDMAIL=1
373
- touch $LOCK_DIR/rep_running.lck
374
- else
375
- info "Not sending Email lock file exists"
376
- fi
377
-
378
- else
379
- info "Replicator and Manager in cluster are running OK"
380
- fi
381
-
382
- #Check the processes are online
383
- if [ "$SERVICES" == "" ]
384
- then
385
- ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online | grep -v OK | wc -l)
386
- if [ $ONLINE -ne 0 ]
387
- then
388
- error "Services are not online"
389
- echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
390
- if [ ! -f $LOCK_DIR/rep_online.lck ]
391
- then
392
- SENDMAIL=1
393
- touch $LOCK_DIR/rep_online.lck
394
- else
395
- info "Not sending Email lock file exists"
396
- fi
397
-
398
- else
399
- info "Services are online"
400
- fi
401
- else
402
- services=$(echo "$SERVICES" | sed 's/,/ /g')
403
- for s in $services
404
- do
405
- ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online -s $s | grep -v OK | wc -l)
406
- if [ $ONLINE -ne 0 ]
407
- then
408
- error "Services are not online @ $s"
409
- echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
410
- if [ ! -f $LOCK_DIR/rep_online.lck ]
411
- then
412
- SENDMAIL=1
413
- touch $LOCK_DIR/rep_online.lck
414
- else
415
- info "Not sending Email lock file exists"
416
- fi
417
-
418
- else
419
- info "Services are online @ $s"
420
- fi
421
- done
422
- fi
423
-
424
- #Check for replicator latency
425
- ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_latency -w $LAG -c $LAG | grep -v OK | wc -l)
426
- if [ $ONLINE -ne 0 ]
427
- then
428
- error "Services are Lagging"
429
- echo "Cluster Replicator processes are lagging on $HOST - Investigate" >> $LOG
430
- if [ ! -f $LOCK_DIR/rep_lag.lck ]
431
- then
432
- SENDMAIL=1
433
- touch $LOCK_DIR/rep_lag.lck
434
- else
435
- info "Not sending Email lock file exists"
436
- fi
437
-
438
- else
439
- info "Cluster Replicator is keeping up"
440
- fi
441
- fi
442
-
443
- #Check the Replicator
444
- if [ $REPLICATOR == 1 ]
445
- then
446
- if [ ! -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
447
- then
448
- severe "trepctl not found in $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/ "
449
- fi
450
-
451
- AVAILABLE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services | grep "Connection failed" | wc -l)
452
- if [ $AVAILABLE -gt 0 ]
453
- then
454
- error "Replicator process is not running on $REPLICATOR_PORT"
455
- echo "Replicator processes is not running on $HOST:$REPLICATOR_PORT - Investigate" >> $LOG
456
- if [ ! -f $LOCK_DIR/tr_rep_running.lck ]
457
- then
458
- SENDMAIL=1
459
- touch $LOCK_DIR/tr_rep_running.lck
460
- else
461
- info "Not sending Email lock file exists"
462
- fi
463
-
464
- else
465
- info "TR Replicator is running"
466
- fi
467
-
468
-
469
- ONLINE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services| grep state | grep -v ONLINE | wc -l)
470
- if [ $ONLINE -gt 0 ]
471
- then
472
- error "Replicator is down"
473
- echo "Replicator processes is not ONLINE on $HOST - Investigate" >> $LOG
474
- if [ ! -f $LOCK_DIR/tr_rep_online.lck ]
475
- then
476
- SENDMAIL=1
477
- touch $LOCK_DIR/tr_rep_online.lck
478
- else
479
- info "Not sending Email lock file exists"
480
- fi
481
-
482
- else
483
- info "TR Replicator is online"
484
- fi
485
-
486
- #Check for latency
487
- LATENCY_LIST=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services|grep appliedLatency|cut -d ':' -f2)
488
-
489
- for LATENCY in $LATENCY_LIST
490
- do
491
- if float_cond "$LATENCY > $LAG"; then
492
- error "Replicator is lagging"
493
- echo "Replicator processes is behind on $HOST - Investigate" >> $LOG
494
- if [ ! -f $LOCK_DIR/tr_rep_lag.lck ]
495
- then
496
- SENDMAIL=1
497
- touch $LOCK_DIR/tr_rep_lag.lck
498
- else
499
- info "Not sending Email lock file exists"
500
- fi
501
- else
502
- info "Replicator latency ok"
503
- fi
504
- done
505
-
506
-
507
- fi
508
-
509
- #Check the disk space
510
- if [ $DISK == 1 ]
511
- then
512
-
513
- df -HP | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
514
- do
515
- usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
516
- partition=$(echo $output | awk '{ print $2 }' )
517
- if [ $usep -ge $DISK_WARNING ]; then
518
- error "Running out of disk space on $partition"
519
- echo "Running out for disk space on $HOST $partition - Investigate" >> $LOG
520
- if [ ! -f $LOCK_DIR/disk.lck ]
521
- then
522
- SENDMAIL=1
523
- touch $LOCK_DIR/disk.lck
524
- else
525
- info "Not sending Email lock file exists"
526
- fi
527
- fi
528
- done
529
-
530
- fi
531
-
532
- if [ $SENDMAIL == 1 ]
533
- then
534
- if [ $DEBUG == 1 ]
535
- then
536
- info "Sending Email to $EMAIL"
537
- info "Subject $SUBJECT"
538
- cat $LOG
539
- fi
540
-
541
- if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
542
- then
543
- manager_running=$($CONTINUENT_ROOT/tungsten/tungsten-manager/bin/manager status | grep "PID" | wc -l)
544
- if [ $manager_running -eq 1 ]; then
545
- info "Adding cctrl output to email"
546
- echo >> $LOG
547
- echo "OUTPUT FROM cctrl ls on $HOST" >> $LOG
548
- echo '--------------------------------------------------' >> $LOG
549
- echo 'ls' | $CONTINUENT_ROOT/tungsten/tungsten-manager/bin/cctrl -expert >> $LOG
550
- echo '--------------------------------------------------' >> $LOG
551
- else
552
- info 'Manager not running skipping cctrl output'
553
- echo "Manager not running unable to gather cctrl output" >> $LOG
554
- fi
555
-
556
- fi
557
- if [ $REPLICATOR == 1 ]
558
- then
559
- if [ -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
560
- then
561
- info "Adding trepctl output to email"
562
- echo "OUTPUT FROM trepctl -port $REPLICATOR_PORT status on $HOST" >> $LOG
563
- echo '--------------------------------------------------' >> $LOG
564
-
565
-
566
- $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services >> $LOG
567
- echo '--------------------------------------------------' >> $LOG
568
- else
569
- info 'trepctl not found'
570
- echo "trepctl not found at $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl unable to query for output" >> $LOG
571
- fi
572
- fi
573
- $MAILPROG -s "$SUBJECT" "$EMAIL" < $LOG
574
- fi
575
-
576
-