continuent-monitors-nagios 0.0.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # The script should be run as the tungsten user to ensure the
4
- # environment is set correctly. Export the environment variables before
5
- # calling the script to modify behavior
6
- #
7
-
8
- require "#{File.dirname(__FILE__)}/../lib/ruby/tungsten"
9
-
10
- class CheckTungstenBackups
11
- include TungstenScript
12
-
13
- def main
14
- status = TI.status()
15
- unless status.coordinator() == TI.hostname()
16
- nagios_ok("Not running check because this node is not the coordinator")
17
- end
18
- seconds_since_epoch = TU.cmd_result("date +%s").to_i()
19
- most_recent_backup = nil
20
-
21
- status.datasources().each{
22
- |ds|
23
- begin
24
- TU.ssh_result("stat -c\"%n %Y\" #{TI.trepctl_property(status.name(), 'replicator.storage.agent.fs.directory')}/store*.properties 2>/dev/null", ds, TI.user()).split("\n").each{
25
- |line|
26
- stored_backup=line.split(" ")
27
- stored_backup[1] = stored_backup[1].to_i()
28
-
29
- if most_recent_backup == nil || stored_backup[1] > most_recent_backup[:seconds]
30
- most_recent_backup = {
31
- :hostname => ds,
32
- :filename => stored_backup[0],
33
- :seconds => stored_backup[1]
34
- }
35
- end
36
- }
37
- rescue CommandError
38
- end
39
- }
40
-
41
- if most_recent_backup == nil
42
- nagios_critical("Unable to find a backup on any datasource")
43
- end
44
-
45
- age = seconds_since_epoch-most_recent_backup[:seconds]
46
- if age > @options[:max_backup_age]
47
- nagios_critical("#{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s] is older than #{@options[:max_backup_age]}s")
48
- else
49
- nagios_ok("The most recent backup is #{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s]")
50
- end
51
- end
52
-
53
- def configure
54
- super()
55
-
56
- description("Check all local datasources to make sure one of them has a backup younger than the max allowed age")
57
- add_option(:max_backup_age, {
58
- :on => "--max-backup-age String",
59
- :help => "Maximum allowed age in seconds of a backup on any machine",
60
- :parse => method(:parse_integer_option),
61
- :default => 86400
62
- })
63
- end
64
-
65
- def script_name
66
- "check_tungsten_backups"
67
- end
68
-
69
- self.new().run()
70
- end
@@ -1,172 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Simple Bash Script To Check Tungsten Latency
4
- # Nagios Plugin For NRPE
5
- #
6
- # This script accepts two arguments, {{-w}} and {{-c}}. The {{-w}} flag is
7
- # the level at which a warning should be returned. {{-c}} sets the level for
8
- # a critical return value. The script uses the maximum latency of any slave
9
- # to determine the return value.
10
- #
11
- OK_STATE=0
12
- WARNING_STATE=1
13
- CRITICAL_STATE=2
14
- THOME=`dirname $0`
15
-
16
- function display_help()
17
- {
18
- echo "Usage: ./check_tungsten_latency -w warning_level -c critical_level [-h]"
19
- echo " -w Throw a warning alert if the maximum latency"
20
- echo " is above this level"
21
- echo " -c Throw a critical alert if the maximum latency"
22
- echo " is above this level"
23
- echo " --perfdata Display performance data of the latency"
24
- echo " --perslave-perfdata Show performance latency values of each slave."
25
- echo " If this is not set the maximum latency will be"
26
- echo " displayed in the performace data"
27
- echo " -h Display this message"
28
- exit 0
29
- }
30
-
31
- # We will use this to make some floating point comparisons
32
- function float_cond()
33
- {
34
- local cond=0
35
- if [[ $# -gt 0 ]]; then
36
- cond=$(echo "$*" | bc -q 2>&1)
37
- if [[ $? -ne 0 ]]; then
38
- echo "Error: $cond"
39
- exit 1
40
- fi
41
- if [[ -z "$cond" ]]; then cond=0; fi
42
- if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
43
- fi
44
- local stat=$((cond == 0))
45
- return $stat
46
- }
47
-
48
- warning_level=0
49
- critical_level=0
50
- perfdata="false"
51
- performance_data_default_glue=""
52
- performance_data_suffix=""
53
- perfdata_allslaves=""
54
-
55
- for arg
56
- do
57
- delim=""
58
- case "$arg" in
59
- #translate --gnu-long-options to -g (short options)
60
- --perfdata) args="${args}-p ";;
61
- --perslave-perfdata) args="${args}-s ";;
62
- #pass through anything else
63
- *) [[ "${arg:0:1}" == "-" ]] || delim="\""
64
- args="${args}${delim}${arg}${delim} ";;
65
- esac
66
- done
67
-
68
- #Reset the positional parameters to the short options
69
- eval set -- $args
70
-
71
- while getopts "w:c:h:nps" Option
72
- do
73
- case $Option in
74
- w )
75
- warning_level=$OPTARG
76
- ;;
77
- c )
78
- critical_level=$OPTARG
79
- ;;
80
- h )
81
- display_help
82
- ;;
83
- p )
84
- perfdata="true"
85
- ;;
86
- s )
87
- perfdata_allslaves="true"
88
- ;;
89
- esac
90
- done
91
- if float_cond "$warning_level == 0"; then
92
- echo "Error: warning_level has not been set"
93
- echo ""
94
- display_help
95
- fi
96
-
97
- if float_cond "$critical_level == 0"; then
98
- echo "Error: critical_level has not been set"
99
- echo ""
100
- display_help
101
- fi
102
-
103
- if [ "$perfdata" == "true" ]; then
104
- performance_data_default_glue=" "
105
- performance_data_suffix=";$warning_level;$critical_level;;"
106
- fi
107
-
108
- error_message=""
109
- error_messaage_glue=""
110
- performance_data_glue=""
111
- performance_data="| "
112
- max_latency=0
113
-
114
- manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
115
- # Check the manager status
116
- if [ $manager_running -eq 0 ]; then
117
- echo "CRITICAL: Manager is not running"
118
- exit $CRITICAL_STATE
119
- fi
120
-
121
- latency_values=`echo "ls -l" | ${THOME}/../../tungsten-manager/bin/cctrl | grep -E "REPLICATOR\(role=[relay|slave]|appliedLatency" | tr -d "| "`
122
-
123
- current_slave=""
124
- for line in $latency_values
125
- do
126
- if [[ $current_slave == "" ]]
127
- then
128
- current_slave=`echo $line | grep "REPLICATOR" | cut -f 1 -d ":"`
129
- else
130
- applied_latency=`echo $line | grep "appliedLatency" | cut -f 2 -d ":"`
131
-
132
- if float_cond "$applied_latency > $max_latency"; then
133
- max_latency=$applied_latency
134
- fi
135
-
136
- if float_cond "$applied_latency > $warning_level"; then
137
- error_message="$error_message$error_message_glue$current_slave=$applied_latency""s"
138
- error_message_glue=", "
139
- fi
140
-
141
- if float_cond "$applied_latency == -1"; then
142
- error_message="$error_message$error_message_glue$current_slave is missing latency information"
143
- error_message_glue=", "
144
- fi
145
-
146
- performance_data="$performance_data$performance_data_glue$current_slave=$applied_latency$performance_data_suffix"
147
- performance_data_glue="$performance_data_default_glue"
148
- current_slave=""
149
- fi
150
- done
151
-
152
- if [ "$perfdata_allslaves" != "true" ]; then
153
- performance_data="| max_latency=${max_latency}$performance_data_suffix"
154
- fi
155
-
156
- if [ "$perfdata" == "false" ]; then
157
- performance_data=""
158
- fi
159
-
160
- if float_cond "$max_latency > $critical_level"; then
161
- echo "CRITICAL: $error_message $performance_data"
162
- exit $CRITICAL_STATE
163
- fi
164
-
165
- if [[ $error_message != "" ]]; then
166
- echo "WARNING: $error_message $performance_data"
167
- exit $WARNING_STATE
168
- fi
169
-
170
- echo "OK: All slaves are running normally (max_latency=${max_latency}) $performance_data "
171
-
172
- exit $OK_STATE
@@ -1,105 +0,0 @@
1
- #!/bin/bash
2
- # Copyright (C) 2014 Continuent, Inc.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
- # not use this file except in compliance with the License. You may obtain
6
- # a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
- # License for the specific language governing permissions and limitations
14
- # under the License.
15
- #
16
- # Initial developer(s): Jeff Mace
17
- # Contributor(s):
18
- #
19
- # Simple Bash Script To Check Tungsten Services
20
- # Nagios Plugin For NRPE
21
- #
22
- # This script does not accept any arguments. It will return a warning if any
23
- # of Tungsten resources is not in an {{ONLINE}} state. It uses the output of
24
- # the {{ls resources}} command to determine the current state.
25
- #
26
- OK_STATE=0
27
- WARNING_STATE=1
28
- CRITICAL_STATE=2
29
- THOME=`dirname $0`
30
-
31
- error_message=""
32
- error_messaage_glue=""
33
- offline_count=0
34
- dataservice=""
35
- skip_shun=0
36
-
37
- function display_help()
38
- {
39
- echo "Usage: ./check_tungsten_online -s dataservice [-h]"
40
- echo " -s The data service you would like to check"
41
- echo " -h Display this message"
42
- echo " -n Skip Shunned Services"
43
- exit 0
44
- }
45
-
46
- while getopts "s:h:n" Option
47
- do
48
- case $Option in
49
- h )
50
- display_help
51
- ;;
52
- s )
53
- dataservice=$OPTARG
54
- ;;
55
- n )
56
- skip_shun=1
57
- ;;
58
- esac
59
- done
60
-
61
- manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
62
- # Check the manager status
63
- if [ $manager_running -eq 0 ]; then
64
- echo "CRITICAL: Manager is not running"
65
- exit $CRITICAL_STATE
66
- fi
67
-
68
- if [ "$dataservice" == "" ]; then
69
- offline_services=`echo "ls resources" | ${THOME}/../../tungsten-manager/bin/cctrl | grep \| | grep : | grep -v ONLINE | tr -d "| " | cut -f 1,2 -d ":"`
70
- else
71
- offline_services=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep "(\(composite \)\?master\|(\(composite \)\?slave\|(relay" | grep -v ONLINE | tr -d "|" | cut -f 1 -d "("`
72
- fi
73
-
74
- for offline_service in $offline_services
75
- do
76
- offline_count=$(($offline_count+1))
77
- error_message="$error_message$error_message_glue$offline_service"
78
- error_message_glue=", "
79
- done
80
-
81
- if [ $offline_count -gt 0 ]
82
- then
83
- echo "CRITICAL: $error_message are not ONLINE"
84
- exit $CRITICAL_STATE
85
- fi
86
-
87
-
88
- if [ $skip_shun -eq 0 ]
89
- then
90
- if [ "$dataservice" == "" ]; then
91
- shunned=`echo "ls" | ${THOME}/../../tungsten-manager/bin/cctrl | grep 'SHUNNED' | wc -l`
92
- else
93
- shunned=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep 'SHUNNED' | wc -l`
94
- fi
95
-
96
- if [ $shunned -gt 0 ]
97
- then
98
- echo "CRITICAL: Dataservices are shunned"
99
- exit $CRITICAL_STATE
100
- fi
101
- fi
102
-
103
-
104
- echo "OK: All services are online"
105
- exit $OK_STATE
@@ -1,61 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Simple Bash Script To Check Tungsten Policy
4
- # Nagios Plugin For NRPE
5
- #
6
- # This script does not accept any arguments. It will return error if the
7
- # cluster is in maintenance mode
8
- #
9
- OK_STATE=0
10
- WARNING_STATE=1
11
- CRITICAL_STATE=2
12
- THOME=`dirname $0`
13
-
14
- error_message=""
15
- error_messaage_glue=""
16
- offline_count=0
17
- dataservice=""
18
-
19
- function display_help()
20
- {
21
- echo "Usage: ./check_tungsten_policy -s dataservice [-h]"
22
- echo " -s The data service you would like to check"
23
- echo " -h Display this message"
24
- exit 0
25
- }
26
-
27
- while getopts "s:h" Option
28
- do
29
- case $Option in
30
- h )
31
- display_help
32
- ;;
33
- s )
34
- dataservice=$OPTARG
35
- ;;
36
- esac
37
- done
38
-
39
- manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
40
- # Check the manager status
41
- if [ $manager_running -eq 0 ]; then
42
- echo "CRITICAL: Manager is not running"
43
- exit $CRITICAL_STATE
44
- fi
45
-
46
- if [ "$dataservice" == "" ]; then
47
- maint_mode=`echo "ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
48
- else
49
- maint_mode=`echo "use $dataservice; ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
50
- fi
51
-
52
-
53
- if [ $maint_mode -gt 0 ]
54
- then
55
- echo "CRITICAL: Cluster is in Maintenance mode"
56
- exit $CRITICAL_STATE
57
- fi
58
-
59
-
60
- echo "OK: Cluster is in Automatic Mode"
61
- exit $OK_STATE
@@ -1,81 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Simple Bash Script To Check Tungsten Progress
4
- # Nagios Plugin For NRPE
5
- #
6
- # This script accepts three arguments, {{-w}}, {{-c}} and {{-t}}. The {{-w}} flag
7
- # is the level at which a warning should be returned. {{-c}} sets the level for
8
- # a critical return value. The {{-t}} flag sets the amount of time used to wait
9
- # between monitoring the progress of the cluster. The script uses the difference
10
- # between the values to determine if a warning or critical alert should be
11
- # issued
12
- #
13
- OK_STATE=0
14
- WARNING_STATE=1
15
- CRITICAL_STATE=2
16
- THOME=`dirname $0`
17
-
18
- function display_help()
19
- {
20
- echo "Usage: ./check_tungsten_progress -t time [-h]"
21
- echo " -t The number of seconds to wait when monitoring progress"
22
- echo " -h Display this message"
23
- exit 0
24
- }
25
-
26
- # We will use this to make some floating point comparisons
27
- function float_cond()
28
- {
29
- local cond=0
30
- if [[ $# -gt 0 ]]; then
31
- cond=$(echo "$*" | bc -q 2>&1)
32
- if [[ $? -ne 0 ]]; then
33
- echo "Error: $cond"
34
- exit 1
35
- fi
36
- if [[ -z "$cond" ]]; then cond=0; fi
37
- if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
38
- fi
39
- local stat=$((cond == 0))
40
- return $stat
41
- }
42
-
43
- time_period=1
44
- while getopts "t:h" Option
45
- do
46
- case $Option in
47
- t)
48
- time_period=$OPTARG
49
- ;;
50
- h )
51
- display_help
52
- ;;
53
- esac
54
- done
55
-
56
- if float_cond "$time_period == 0"; then
57
- echo "Error: time_period has not been set"
58
- echo ""
59
- display_help
60
- fi
61
-
62
- is_online=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "state" | grep "ONLINE" | wc -l`
63
- if float_cond "$is_online == 0"; then
64
- echo "CRITICAL: Replicator is not ONLINE"
65
- exit $CRITICAL_STATE
66
- fi
67
-
68
- pre_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
69
- echo "cluster heartbeat" | ${THOME}/../../tungsten-manager/bin/cctrl > /dev/null
70
- sleep $time_period
71
- post_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
72
-
73
- progress_number=`echo "$post_progress_number - $pre_progress_number" | bc -q 2>/dev/null`
74
-
75
- if float_cond "$progress_number < 1"; then
76
- echo "WARNING: Replicator did not show progress"
77
- exit $WARNING_STATE
78
- fi
79
-
80
- echo "OK: Replicator is making progress"
81
- exit $OK_STATE