continuent-monitors-nagios 0.0.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2014 Continuent, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #
16
+ # Initial developer(s): Jeff Mace
17
+ # Contributor(s):
18
+
19
+ begin
20
+ require 'rubygems'
21
+ gem 'continuent-tools-core'
22
+ rescue LoadError
23
+ end
24
+
25
+ require 'continuent-tools-core'
26
+ require 'continuent-tools-nagios-monitor'
27
+
28
+ class ContinuentNagiosMonitorPolicy
29
+ include TungstenScript
30
+ include TungstenNagiosMonitor
31
+ private
32
+
33
+ def main
34
+ unless TI.is_manager?()
35
+ critical("The server is not a Continuent Tungsten Manager")
36
+ end
37
+
38
+ unless TI.is_running?("manager")
39
+ critical("The Continuent Tungsten Manager is not running")
40
+ end
41
+
42
+ status = TI.status()
43
+ if status.policy() == "AUTOMATIC"
44
+ ok("Cluster is in #{status.policy()} mode")
45
+ else
46
+ critical("Cluster is in #{status.policy()} mode")
47
+ end
48
+ end
49
+
50
+ def configure
51
+ super()
52
+
53
+ description("Check that the local cluster is running in the AUTOMATIC policy")
54
+ end
55
+
56
+ def script_name
57
+ "tungsten_nagios_policy"
58
+ end
59
+
60
+ self.new().run()
61
+ end
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2014 Continuent, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #
16
+ # Initial developer(s): Jeff Mace
17
+ # Contributor(s):
18
+
19
+ begin
20
+ require 'rubygems'
21
+ gem 'continuent-tools-core'
22
+ rescue LoadError
23
+ end
24
+
25
+ require 'continuent-tools-core'
26
+ require 'continuent-tools-nagios-monitor'
27
+
28
+ class ContinuentNagiosMonitorProgress
29
+ include TungstenScript
30
+ include TungstenNagiosMonitor
31
+ private
32
+
33
+ def main
34
+ unless TI.is_replicator?()
35
+ critical("The server is not a Tungsten Replicator")
36
+ end
37
+
38
+ unless TI.is_running?("replicator")
39
+ critical("The Tungsten Replicator is not running")
40
+ end
41
+
42
+ if TI.is_commercial?()
43
+ unless TI.is_manager?()
44
+ critical("The server is not a Continuent Tungsten Manager")
45
+ end
46
+
47
+ unless TI.is_running?("manager")
48
+ critical("The Continuent Tungsten Manager is not running")
49
+ end
50
+ end
51
+
52
+ opt_default(:service, TI.default_dataservice())
53
+ if opt(:service) == nil
54
+ critical("The --service option was not given")
55
+ end
56
+
57
+ unless TI.trepctl_value(opt(:service), "state") == "ONLINE"
58
+ critical("The #{opt(:service)} replication service is not ONLINE")
59
+ end
60
+
61
+ pre_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
62
+
63
+ if TI.is_commercial?()
64
+ TI.ensure_cctrl("cluster heartbeat")
65
+ end
66
+
67
+ if opt(:delay).is_a?(Integer)
68
+ TU.debug("Go to sleep for #{opt(:delay)} seconds")
69
+ sleep(opt(:delay))
70
+ end
71
+
72
+ post_seqno = TI.trepctl_value(opt(:service), "appliedLastSeqno").to_s().to_f()
73
+ difference = post_seqno - pre_seqno
74
+
75
+ if difference > 0
76
+ ok("Tungsten Replicator #{opt(:service)} service is making progress")
77
+ else
78
+ critical("Tungsten Replicator #{opt(:service)} service did not show progress")
79
+ end
80
+ end
81
+
82
+ def configure
83
+ super()
84
+
85
+ description("Check that the replication service is making progress. For Continuent Tungsten installations, a heartbeat command will be run to force activity.")
86
+
87
+ add_option(:delay, {
88
+ :on => "--delay String",
89
+ :help => "The number of seconds to wait when monitoring progress",
90
+ :parse => method(:parse_integer_option),
91
+ :default => 1
92
+ })
93
+
94
+ add_option(:service, {
95
+ :on => "--service String",
96
+ :help => "The replication service or cluster to check"
97
+ })
98
+ end
99
+
100
+ def script_name
101
+ "tungsten_nagios_progress"
102
+ end
103
+
104
+ self.new().run()
105
+ end
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright (C) 2014 Continuent, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #
16
+ # Initial developer(s): Jeff Mace
17
+ # Contributor(s):
18
+
19
+ begin
20
+ require 'rubygems'
21
+ gem 'continuent-tools-core'
22
+ rescue LoadError
23
+ end
24
+
25
+ require 'continuent-tools-core'
26
+ require 'continuent-tools-nagios-monitor'
27
+
28
+ class ContinuentNagiosMonitorServices
29
+ include TungstenScript
30
+ include TungstenNagiosMonitor
31
+ private
32
+
33
+ def main
34
+ missing_services = []
35
+ checked_services = []
36
+
37
+ [
38
+ "replicator",
39
+ "manager",
40
+ "connector"
41
+ ].each{
42
+ |svc|
43
+
44
+ unless TI.setting(TI.setting_key(HOSTS, "host_enable_" + svc)) == "true"
45
+ next
46
+ end
47
+
48
+ checked_services << svc
49
+
50
+ unless TI.is_running?(svc)
51
+ missing_services << svc
52
+ end
53
+ }
54
+
55
+ if missing_services.size() > 0
56
+ critical("#{missing_services.join(', ')} #{TU.pluralize(missing_services, 'is', 'are')} not running")
57
+ elsif checked_services.size() == 0
58
+ critical("No services were checked")
59
+ else
60
+ ok("All services (#{checked_services.join(', ')}) are running")
61
+ end
62
+ end
63
+
64
+ def configure
65
+ super()
66
+
67
+ description("Check that all configured services for the installation are running.")
68
+ end
69
+
70
+ def script_name
71
+ "tungsten_nagios_services"
72
+ end
73
+
74
+ self.new().run()
75
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: continuent-monitors-nagios
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Continuent
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-04 00:00:00.000000000 Z
11
+ date: 2014-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: continuent-tools-monitoring
@@ -16,38 +16,36 @@ dependencies:
16
16
  requirements:
17
17
  - - '>='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.0
19
+ version: 0.5.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '>='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.0
26
+ version: 0.5.0
27
27
  description:
28
28
  email: info@continuent.com
29
29
  executables:
30
- - check_tungsten.sh
31
- - check_tungsten_backups
32
- - check_tungsten_latency
33
- - check_tungsten_online
34
- - check_tungsten_policy
35
- - check_tungsten_progress
36
- - check_tungsten_services
30
+ - tungsten_nagios_backups
37
31
  - tungsten_nagios_connector
32
+ - tungsten_nagios_latency
38
33
  - tungsten_nagios_monitor_threads
34
+ - tungsten_nagios_online
35
+ - tungsten_nagios_policy
36
+ - tungsten_nagios_progress
37
+ - tungsten_nagios_services
39
38
  extensions: []
40
39
  extra_rdoc_files: []
41
40
  files:
42
- - bin/check_tungsten.sh
43
- - bin/check_tungsten_backups
44
- - bin/check_tungsten_latency
45
- - bin/check_tungsten_online
46
- - bin/check_tungsten_policy
47
- - bin/check_tungsten_progress
48
- - bin/check_tungsten_services
41
+ - bin/tungsten_nagios_backups
49
42
  - bin/tungsten_nagios_connector
43
+ - bin/tungsten_nagios_latency
50
44
  - bin/tungsten_nagios_monitor_threads
45
+ - bin/tungsten_nagios_online
46
+ - bin/tungsten_nagios_policy
47
+ - bin/tungsten_nagios_progress
48
+ - bin/tungsten_nagios_services
51
49
  - LICENSE
52
50
  - README.md
53
51
  homepage: https://github.com/continuent/continuent-monitors-nagios
@@ -1,576 +0,0 @@
1
- #!/bin/bash
2
-
3
- #TODO
4
- #Work out the cluster names in a Composite DS
5
- #determine the individulal services in a replicator so we can print out better output (status on each)
6
- #Remove host logging - stop duplicate emails across multi hosts?
7
-
8
- HOST=`hostname`
9
-
10
- #Start Configuration Options. - These can be overridden by command line options or from $CONTINUENT_ROOT/share/check_tungsten.cfg
11
- CONNECTOR=0 #If this host is running a connector set to 1 otherwise 0
12
- CLUSTER=0 #If this host is running a cluster set to 1 otherwise 0
13
- REPLICATOR=0 #If this host is running a replicator set to 1 otherwise 0
14
- REPLICATOR_PORT=10000 #Replicator Port
15
- REPLICATOR_HOME=/opt/continuent/ #Home dir for Replicator
16
- SERVICES='' #Name of the individual clusters in a composite DS
17
- EMAIL='' #Set email address here or pass via the email= command line option
18
- DISK=0 #Check Disk space
19
- CHECK_ELB=0 #Enable check for ELB socket check
20
-
21
- SUBJECT="Error : Problems exist with the Tungsten Services on $HOST"
22
- LOCK_TIMEOUT=180 # Only send a Email every x minutes for a specific
23
- # problem, stop spamming from the script
24
- LAG=60 # Slave lag to report on
25
- CONNECTOR_TIMEOUT=10 # No of seconds to wait for a connector response
26
- DISK_WARNING=80 # % full to send a warning
27
- SENDMAILBIN=/usr/sbin/sendmail
28
- #End Configuration Options
29
-
30
- SENDMAIL=0
31
- DEBUG=0
32
- LOG=/opt/continuent/share/check_tungsten.log
33
- LOCK_DIR=/opt/continuent/share/tungsten_locks
34
-
35
- function float_cond()
36
- {
37
- local cond=0
38
- if [[ $# -gt 0 ]]; then
39
- cond=$(echo "$*" | bc -q 2>&1)
40
- if [[ $? -ne 0 ]]; then
41
- echo "Error: $cond"
42
- exit 1
43
- fi
44
- if [[ -z "$cond" ]]; then cond=0; fi
45
- if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
46
- fi
47
- local stat=$((cond == 0))
48
- return $stat
49
- }
50
-
51
- info ()
52
- {
53
- if [ $DEBUG == 1 ]; then echo "INFO : $1"; fi
54
- }
55
- error ()
56
- {
57
- if [ $DEBUG == 1 ]; then echo "ERROR : $1"; fi
58
- }
59
- severe ()
60
- {
61
- echo "SEVERE : $1"
62
- exit 1
63
- }
64
- getSetting ()
65
- {
66
- CFG=$CONTINUENT_ROOT/conf/tungsten.cfg
67
- if [ ! -f $CFG ]
68
- then
69
- severe "Unable to find $CFG"
70
- fi
71
- getSettingValue=""
72
- getSettingValue=$(grep "\"$1\"" $CFG| cut -d ':' -f2 | head -1|sed 's/,//g'|sed 's/"//g'|sed 's/ //g')
73
- if [ -z $getSettingValue ]
74
- then
75
- severe "Unable to find $1 in $CFG"
76
- fi
77
- if [ "$getSettingValue" == '' ]
78
- then
79
- severe "Unable to find $1 in $CFG"
80
- fi
81
- echo "$getSettingValue"
82
- }
83
-
84
-
85
- # Load any continuent variables
86
-
87
- if [ -z $CONTINUENT_ROOT ]
88
- then
89
- [ -f "$HOME/.bash_profile" ] && . "$HOME/.bash_profile"
90
- [ -f "$HOME/.profile" ] && . "$HOME/.profile"
91
- fi
92
-
93
- function sOpt()
94
- {
95
- $1=1
96
- info "$1 switched on via command line"
97
- }
98
-
99
- function mOpt()
100
- {
101
-
102
- for i in $(echo $2 | tr "=" "\n")
103
- do
104
- if [ $i != '$3' ]
105
- then
106
- $1=$i
107
- fi
108
- done
109
- }
110
-
111
- #Parse the command line options
112
-
113
- for arg in "$@"
114
- do
115
- case "$arg" in
116
- -v) DEBUG=1
117
- info "Debug mode set"
118
- ;;
119
- -vv) DEBUG=1
120
- info "INFO : Extended Debug mode set"
121
- set -x
122
- ;;
123
- cluster) CLUSTER=1
124
- info "CLUSTER switched on via command line"
125
- ;;
126
- connector) CONNECTOR=1
127
- info "CONNECTOR switched on via command line"
128
- ;;
129
- replicator) REPLICATOR=1
130
- info "REPLICATOR switched on via command line"
131
- ;;
132
- check_elb) CHECK_ELB=1
133
- info "CHECK_ELB switched on via command line"
134
- ;;
135
- replicator_port*) for i in $(echo $arg | tr "=" "\n")
136
- do
137
- if [ $i != 'replicator_port' ]
138
- then
139
- REPLICATOR_PORT=$i
140
- fi
141
- done
142
-
143
- info "REPLICATOR_PORT - $REPLICATOR_PORT - switched on via command line"
144
- ;;
145
- replicator_home*) for i in $(echo $arg | tr "=" "\n")
146
- do
147
- if [ $i != 'replicator_home' ]
148
- then
149
- REPLICATOR_HOME=$i
150
- fi
151
- done
152
-
153
- info "REPLICATOR_HOME - $REPLICATOR_HOME - switched on via command line"
154
- ;;
155
- services*) for i in $(echo $arg | tr "=" "\n")
156
- do
157
- if [ $i != 'services' ]
158
- then
159
- SERVICES=$i
160
- fi
161
- done
162
- info "SERVICES $SERVICES passed via the command line"
163
- ;;
164
- email*) for i in $(echo $arg | tr "=" "\n")
165
- do
166
- if [ $i != 'email' ]
167
- then
168
- EMAIL=$i
169
- fi
170
- done
171
- info "EMAIL $EMAIL passed via the command line"
172
- ;;
173
- config*) for i in $(echo $arg | tr "=" "\n")
174
- do
175
- if [ $i != 'config' ]
176
- then
177
- FILE=$i
178
- fi
179
- done
180
- info "Config File $FILE passed via the command line"
181
- ;;
182
- disk) DISK=1
183
- info "DISK switched on via command line"
184
- ;;
185
- *)
186
- echo "Unknown command line option passed $arg"
187
- echo "Valid options are -v,cluster,connector,replicator,replicator_port=??,services=??,email=??,config=??"
188
- exit 1
189
- esac
190
-
191
-
192
- done
193
-
194
-
195
- if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
196
- then
197
- if [ -z $CONTINUENT_ROOT ]
198
- then
199
- severe "$CONTINUENT_ROOT is not set - unable to continue"
200
- fi
201
- if [ ! -f $CONTINUENT_ROOT/share/env.sh ]
202
- then
203
- severe "Unable to find env.sh in $CONTINUENT_ROOT/share"
204
- fi
205
-
206
- . "$CONTINUENT_ROOT/share/env.sh"
207
-
208
- #Load any default settings from $CONTINUENT_ROOT/share/check_tungsten.cfg
209
- CFG=$CONTINUENT_ROOT/share/check_tungsten.cfg
210
-
211
- if [ -f $CFG ]
212
- then
213
- info "Loading settings from $CFG"
214
- . "$CFG"
215
- fi
216
- if [ -z "$MYSQL" ]
217
- then
218
- MYSQL=`which mysql 2>/dev/null`
219
-
220
- if [ "$MYSQL" == "" ]
221
- then
222
- severe " Unable to the mysql command line program"
223
- fi
224
- fi
225
- fi
226
-
227
- #If a file is passed from the command line load any variables from there
228
- if [ ! -z $FILE ]
229
- then
230
- if [ ! -f $FILE ]
231
- then
232
- severe "The file specified in the command line $FILE does not exist"
233
- fi
234
-
235
- info "Loading settings from $FILE"
236
- . "$FILE"
237
- fi
238
-
239
- #Parameter and host validation
240
-
241
- BC=`which bc 2>/dev/null`
242
-
243
- if [ "$BC" == "" ]
244
- then
245
- severe " Unable to find the command bc - please install"
246
- fi
247
-
248
-
249
- if [ "$EMAIL" == "" ]
250
- then
251
- severe " email must be specified"
252
- fi
253
-
254
- if [[ "$CONNECTOR" == 0 && "$CLUSTER" == 0 && "$REPLICATOR" == 0 ]]
255
- then
256
- severe " No option specified, select either connector, cluster or replicator"
257
- fi
258
-
259
- if [ -d $LOCK_DIR ]
260
- then
261
- if [ ! -w $LOCK_DIR ]
262
- then
263
- severe " The locks dir $LOCK_DIR is not writable"
264
- fi
265
- else
266
- info "Creating locks dir"
267
- mkdir $LOCK_DIR
268
- fi
269
-
270
- if [ -z "$MAILPROG" ]
271
- then
272
- MAILPROG=`which mail 2>/dev/null`
273
-
274
- if [ "$MAILPROG" == "" ]
275
- then
276
- severe " Unable to find a mail program"
277
- fi
278
- fi
279
-
280
- if [ -z "$SENDMAILBIN" ]
281
- then
282
- SENDMAILBIN=`which sendmail 2>/dev/null`
283
-
284
- if [ "$SENDMAILBIN" == "" ]
285
- then
286
- severe " Unable to find a sendmail program"
287
- fi
288
- fi
289
-
290
- if [ -f $LOG ]
291
- then
292
- rm $LOG
293
- fi
294
-
295
- #Expire old Locks
296
- info "Deleting Locks older than $LOCK_TIMEOUT min"
297
- find $LOCK_DIR/* -type f -mmin +$LOCK_TIMEOUT -delete 2> /dev/null
298
-
299
- #Check the connector status
300
- if [ $CONNECTOR == 1 ]
301
- then
302
- connector_ok_to_allow_elb=0
303
- info "Running Connector Tests"
304
- CONN=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -c| grep -v OK | wc -l)
305
- if [ $CONN -ne 0 ]
306
- then
307
- error " Connector is not running"
308
- echo "Connector is not running on $HOST - Investigate" >> $LOG
309
- if [ ! -f $LOCK_DIR/con_running.lck ]
310
- then
311
- SENDMAIL=1
312
- touch $LOCK_DIR/con_running.lck
313
- else
314
- info "Not sending Email lock file exists"
315
- fi
316
- else
317
- info "Connector is running OK"
318
-
319
- TIMEOUT=`which timeout 2>/dev/null`
320
-
321
- if [ "$TIMEOUT" == "" ]
322
- then
323
- info "timeout command not found - unable to check if the connector is responding"
324
- else
325
- info "Checking Connector is responding to queries"
326
- CON_USER=$(getSetting connector_user)
327
- CON_PW=$(getSetting connector_password)
328
- CON_PORT=$(getSetting connector_listen_port)
329
- CHECK=$(timeout -s HUP $CONNECTOR_TIMEOUT $MYSQL -P$CON_PORT -u $CON_USER -p$CON_PW -h $HOSTNAME --skip-column-names -Be"select 'ALIVE'")
330
- if [ "$CHECK" != 'ALIVE' ]
331
- then
332
- error 'Unable to connect to connector'
333
- echo "Connector is not responding on $HOST - Investigate" >> $LOG
334
- connector_ok=0
335
- if [ ! -f $LOCK_DIR/con_responding.lck ]
336
- then
337
- SENDMAIL=1
338
- touch $LOCK_DIR/con_responding.lck
339
- else
340
- info "Not sending Email lock file exists"
341
- fi
342
- else
343
- info 'Connector is alive'
344
- connector_ok_to_allow_elb=1
345
- fi
346
- fi
347
- fi
348
-
349
- if [ $CHECK_ELB == 1 ]
350
- then
351
- if [ -f /etc/xinetd.d/disabled/connectorchk ] && [ $connector_ok_to_allow_elb == 1 ]
352
- then
353
-
354
- sudo mv /etc/xinetd.d/disabled/connectorchk /etc/xinetd.d/
355
- sudo service xinetd reload
356
- fi
357
- fi
358
- fi
359
-
360
- #Check the cluster Status
361
- if [ $CLUSTER == 1 ]
362
- then
363
- #Check the processes are running
364
- info "Running Cluster Tests"
365
- REPL=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_services -r| grep -v OK | wc -l)
366
- if [ $REPL -ne 0 ]
367
- then
368
- error " Replicator or Manager in cluster is not running"
369
- echo "Replicator or Manager in cluster is not running on $HOST - Investigate" >> $LOG
370
- if [ ! -f $LOCK_DIR/rep_running.lck ]
371
- then
372
- SENDMAIL=1
373
- touch $LOCK_DIR/rep_running.lck
374
- else
375
- info "Not sending Email lock file exists"
376
- fi
377
-
378
- else
379
- info "Replicator and Manager in cluster are running OK"
380
- fi
381
-
382
- #Check the processes are online
383
- if [ "$SERVICES" == "" ]
384
- then
385
- ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online | grep -v OK | wc -l)
386
- if [ $ONLINE -ne 0 ]
387
- then
388
- error "Services are not online"
389
- echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
390
- if [ ! -f $LOCK_DIR/rep_online.lck ]
391
- then
392
- SENDMAIL=1
393
- touch $LOCK_DIR/rep_online.lck
394
- else
395
- info "Not sending Email lock file exists"
396
- fi
397
-
398
- else
399
- info "Services are online"
400
- fi
401
- else
402
- services=$(echo "$SERVICES" | sed 's/,/ /g')
403
- for s in $services
404
- do
405
- ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_online -s $s | grep -v OK | wc -l)
406
- if [ $ONLINE -ne 0 ]
407
- then
408
- error "Services are not online @ $s"
409
- echo "Cluster Replicator processes are not online on $HOST - Investigate" >> $LOG
410
- if [ ! -f $LOCK_DIR/rep_online.lck ]
411
- then
412
- SENDMAIL=1
413
- touch $LOCK_DIR/rep_online.lck
414
- else
415
- info "Not sending Email lock file exists"
416
- fi
417
-
418
- else
419
- info "Services are online @ $s"
420
- fi
421
- done
422
- fi
423
-
424
- #Check for replicator latency
425
- ONLINE=$($CONTINUENT_ROOT/tungsten/cluster-home/bin/check_tungsten_latency -w $LAG -c $LAG | grep -v OK | wc -l)
426
- if [ $ONLINE -ne 0 ]
427
- then
428
- error "Services are Lagging"
429
- echo "Cluster Replicator processes are lagging on $HOST - Investigate" >> $LOG
430
- if [ ! -f $LOCK_DIR/rep_lag.lck ]
431
- then
432
- SENDMAIL=1
433
- touch $LOCK_DIR/rep_lag.lck
434
- else
435
- info "Not sending Email lock file exists"
436
- fi
437
-
438
- else
439
- info "Cluster Replicator is keeping up"
440
- fi
441
- fi
442
-
443
- #Check the Replicator
444
- if [ $REPLICATOR == 1 ]
445
- then
446
- if [ ! -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
447
- then
448
- severe "trepctl not found in $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/ "
449
- fi
450
-
451
- AVAILABLE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services | grep "Connection failed" | wc -l)
452
- if [ $AVAILABLE -gt 0 ]
453
- then
454
- error "Replicator process is not running on $REPLICATOR_PORT"
455
- echo "Replicator processes is not running on $HOST:$REPLICATOR_PORT - Investigate" >> $LOG
456
- if [ ! -f $LOCK_DIR/tr_rep_running.lck ]
457
- then
458
- SENDMAIL=1
459
- touch $LOCK_DIR/tr_rep_running.lck
460
- else
461
- info "Not sending Email lock file exists"
462
- fi
463
-
464
- else
465
- info "TR Replicator is running"
466
- fi
467
-
468
-
469
- ONLINE=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services| grep state | grep -v ONLINE | wc -l)
470
- if [ $ONLINE -gt 0 ]
471
- then
472
- error "Replicator is down"
473
- echo "Replicator processes is not ONLINE on $HOST - Investigate" >> $LOG
474
- if [ ! -f $LOCK_DIR/tr_rep_online.lck ]
475
- then
476
- SENDMAIL=1
477
- touch $LOCK_DIR/tr_rep_online.lck
478
- else
479
- info "Not sending Email lock file exists"
480
- fi
481
-
482
- else
483
- info "TR Replicator is online"
484
- fi
485
-
486
- #Check for latency
487
- LATENCY_LIST=$($REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services|grep appliedLatency|cut -d ':' -f2)
488
-
489
- for LATENCY in $LATENCY_LIST
490
- do
491
- if float_cond "$LATENCY > $LAG"; then
492
- error "Replicator is lagging"
493
- echo "Replicator processes is behind on $HOST - Investigate" >> $LOG
494
- if [ ! -f $LOCK_DIR/tr_rep_lag.lck ]
495
- then
496
- SENDMAIL=1
497
- touch $LOCK_DIR/tr_rep_lag.lck
498
- else
499
- info "Not sending Email lock file exists"
500
- fi
501
- else
502
- info "Replicator latency ok"
503
- fi
504
- done
505
-
506
-
507
- fi
508
-
509
- #Check the disk space
510
- if [ $DISK == 1 ]
511
- then
512
-
513
- df -HP | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
514
- do
515
- usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
516
- partition=$(echo $output | awk '{ print $2 }' )
517
- if [ $usep -ge $DISK_WARNING ]; then
518
- error "Running out of disk space on $partition"
519
- echo "Running out for disk space on $HOST $partition - Investigate" >> $LOG
520
- if [ ! -f $LOCK_DIR/disk.lck ]
521
- then
522
- SENDMAIL=1
523
- touch $LOCK_DIR/disk.lck
524
- else
525
- info "Not sending Email lock file exists"
526
- fi
527
- fi
528
- done
529
-
530
- fi
531
-
532
- if [ $SENDMAIL == 1 ]
533
- then
534
- if [ $DEBUG == 1 ]
535
- then
536
- info "Sending Email to $EMAIL"
537
- info "Subject $SUBJECT"
538
- cat $LOG
539
- fi
540
-
541
- if [ $CLUSTER == 1 ] || [ $CONNECTOR == 1 ]
542
- then
543
- manager_running=$($CONTINUENT_ROOT/tungsten/tungsten-manager/bin/manager status | grep "PID" | wc -l)
544
- if [ $manager_running -eq 1 ]; then
545
- info "Adding cctrl output to email"
546
- echo >> $LOG
547
- echo "OUTPUT FROM cctrl ls on $HOST" >> $LOG
548
- echo '--------------------------------------------------' >> $LOG
549
- echo 'ls' | $CONTINUENT_ROOT/tungsten/tungsten-manager/bin/cctrl -expert >> $LOG
550
- echo '--------------------------------------------------' >> $LOG
551
- else
552
- info 'Manager not running skipping cctrl output'
553
- echo "Manager not running unable to gather cctrl output" >> $LOG
554
- fi
555
-
556
- fi
557
- if [ $REPLICATOR == 1 ]
558
- then
559
- if [ -f $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl ]
560
- then
561
- info "Adding trepctl output to email"
562
- echo "OUTPUT FROM trepctl -port $REPLICATOR_PORT status on $HOST" >> $LOG
563
- echo '--------------------------------------------------' >> $LOG
564
-
565
-
566
- $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl -port $REPLICATOR_PORT services >> $LOG
567
- echo '--------------------------------------------------' >> $LOG
568
- else
569
- info 'trepctl not found'
570
- echo "trepctl not found at $REPLICATOR_HOME/tungsten/tungsten-replicator/bin/trepctl unable to query for output" >> $LOG
571
- fi
572
- fi
573
- $MAILPROG -s "$SUBJECT" "$EMAIL" < $LOG
574
- fi
575
-
576
-