continuent-monitors-nagios 0.0.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # The script should be run as the tungsten user to ensure the
4
- # environment is set correctly. Export the environment variables before
5
- # calling the script to modify behavior
6
- #
7
-
8
- require "#{File.dirname(__FILE__)}/../lib/ruby/tungsten"
9
-
10
- class CheckTungstenBackups
11
- include TungstenScript
12
-
13
- def main
14
- status = TI.status()
15
- unless status.coordinator() == TI.hostname()
16
- nagios_ok("Not running check because this node is not the coordinator")
17
- end
18
- seconds_since_epoch = TU.cmd_result("date +%s").to_i()
19
- most_recent_backup = nil
20
-
21
- status.datasources().each{
22
- |ds|
23
- begin
24
- TU.ssh_result("stat -c\"%n %Y\" #{TI.trepctl_property(status.name(), 'replicator.storage.agent.fs.directory')}/store*.properties 2>/dev/null", ds, TI.user()).split("\n").each{
25
- |line|
26
- stored_backup=line.split(" ")
27
- stored_backup[1] = stored_backup[1].to_i()
28
-
29
- if most_recent_backup == nil || stored_backup[1] > most_recent_backup[:seconds]
30
- most_recent_backup = {
31
- :hostname => ds,
32
- :filename => stored_backup[0],
33
- :seconds => stored_backup[1]
34
- }
35
- end
36
- }
37
- rescue CommandError
38
- end
39
- }
40
-
41
- if most_recent_backup == nil
42
- nagios_critical("Unable to find a backup on any datasource")
43
- end
44
-
45
- age = seconds_since_epoch-most_recent_backup[:seconds]
46
- if age > @options[:max_backup_age]
47
- nagios_critical("#{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s] is older than #{@options[:max_backup_age]}s")
48
- else
49
- nagios_ok("The most recent backup is #{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s]")
50
- end
51
- end
52
-
53
- def configure
54
- super()
55
-
56
- description("Check all local datasources to make sure one of them has a backup younger than the max allowed age")
57
- add_option(:max_backup_age, {
58
- :on => "--max-backup-age String",
59
- :help => "Maximum allowed age in seconds of a backup on any machine",
60
- :parse => method(:parse_integer_option),
61
- :default => 86400
62
- })
63
- end
64
-
65
- def script_name
66
- "check_tungsten_backups"
67
- end
68
-
69
- self.new().run()
70
- end
@@ -1,172 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Simple Bash Script To Check Tungsten Latency
4
- # Nagios Plugin For NRPE
5
- #
6
- # This script accepts two arguments, {{-w}} and {{-c}}. The {{-w}} flag is
7
- # the level at which a warning should be returned. {{-c}} sets the level for
8
- # a critical return value. The script uses the maximum latency of any slave
9
- # to determine the return value.
10
- #
11
- OK_STATE=0
12
- WARNING_STATE=1
13
- CRITICAL_STATE=2
14
- THOME=`dirname $0`
15
-
16
- function display_help()
17
- {
18
- echo "Usage: ./check_tungsten_latency -w warning_level -c critical_level [-h]"
19
- echo " -w Throw a warning alert if the maximum latency"
20
- echo " is above this level"
21
- echo " -c Throw a critical alert if the maximum latency"
22
- echo " is above this level"
23
- echo " --perfdata Display performance data of the latency"
24
- echo " --perslave-perfdata Show performance latency values of each slave."
25
- echo " If this is not set the maximum latency will be"
26
- echo " displayed in the performace data"
27
- echo " -h Display this message"
28
- exit 0
29
- }
30
-
31
- # We will use this to make some floating point comparisons
32
- function float_cond()
33
- {
34
- local cond=0
35
- if [[ $# -gt 0 ]]; then
36
- cond=$(echo "$*" | bc -q 2>&1)
37
- if [[ $? -ne 0 ]]; then
38
- echo "Error: $cond"
39
- exit 1
40
- fi
41
- if [[ -z "$cond" ]]; then cond=0; fi
42
- if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
43
- fi
44
- local stat=$((cond == 0))
45
- return $stat
46
- }
47
-
48
- warning_level=0
49
- critical_level=0
50
- perfdata="false"
51
- performance_data_default_glue=""
52
- performance_data_suffix=""
53
- perfdata_allslaves=""
54
-
55
- for arg
56
- do
57
- delim=""
58
- case "$arg" in
59
- #translate --gnu-long-options to -g (short options)
60
- --perfdata) args="${args}-p ";;
61
- --perslave-perfdata) args="${args}-s ";;
62
- #pass through anything else
63
- *) [[ "${arg:0:1}" == "-" ]] || delim="\""
64
- args="${args}${delim}${arg}${delim} ";;
65
- esac
66
- done
67
-
68
- #Reset the positional parameters to the short options
69
- eval set -- $args
70
-
71
- while getopts "w:c:h:nps" Option
72
- do
73
- case $Option in
74
- w )
75
- warning_level=$OPTARG
76
- ;;
77
- c )
78
- critical_level=$OPTARG
79
- ;;
80
- h )
81
- display_help
82
- ;;
83
- p )
84
- perfdata="true"
85
- ;;
86
- s )
87
- perfdata_allslaves="true"
88
- ;;
89
- esac
90
- done
91
- if float_cond "$warning_level == 0"; then
92
- echo "Error: warning_level has not been set"
93
- echo ""
94
- display_help
95
- fi
96
-
97
- if float_cond "$critical_level == 0"; then
98
- echo "Error: critical_level has not been set"
99
- echo ""
100
- display_help
101
- fi
102
-
103
- if [ "$perfdata" == "true" ]; then
104
- performance_data_default_glue=" "
105
- performance_data_suffix=";$warning_level;$critical_level;;"
106
- fi
107
-
108
- error_message=""
109
- error_messaage_glue=""
110
- performance_data_glue=""
111
- performance_data="| "
112
- max_latency=0
113
-
114
- manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
115
- # Check the manager status
116
- if [ $manager_running -eq 0 ]; then
117
- echo "CRITICAL: Manager is not running"
118
- exit $CRITICAL_STATE
119
- fi
120
-
121
- latency_values=`echo "ls -l" | ${THOME}/../../tungsten-manager/bin/cctrl | grep -E "REPLICATOR\(role=[relay|slave]|appliedLatency" | tr -d "| "`
122
-
123
- current_slave=""
124
- for line in $latency_values
125
- do
126
- if [[ $current_slave == "" ]]
127
- then
128
- current_slave=`echo $line | grep "REPLICATOR" | cut -f 1 -d ":"`
129
- else
130
- applied_latency=`echo $line | grep "appliedLatency" | cut -f 2 -d ":"`
131
-
132
- if float_cond "$applied_latency > $max_latency"; then
133
- max_latency=$applied_latency
134
- fi
135
-
136
- if float_cond "$applied_latency > $warning_level"; then
137
- error_message="$error_message$error_message_glue$current_slave=$applied_latency""s"
138
- error_message_glue=", "
139
- fi
140
-
141
- if float_cond "$applied_latency == -1"; then
142
- error_message="$error_message$error_message_glue$current_slave is missing latency information"
143
- error_message_glue=", "
144
- fi
145
-
146
- performance_data="$performance_data$performance_data_glue$current_slave=$applied_latency$performance_data_suffix"
147
- performance_data_glue="$performance_data_default_glue"
148
- current_slave=""
149
- fi
150
- done
151
-
152
- if [ "$perfdata_allslaves" != "true" ]; then
153
- performance_data="| max_latency=${max_latency}$performance_data_suffix"
154
- fi
155
-
156
- if [ "$perfdata" == "false" ]; then
157
- performance_data=""
158
- fi
159
-
160
- if float_cond "$max_latency > $critical_level"; then
161
- echo "CRITICAL: $error_message $performance_data"
162
- exit $CRITICAL_STATE
163
- fi
164
-
165
- if [[ $error_message != "" ]]; then
166
- echo "WARNING: $error_message $performance_data"
167
- exit $WARNING_STATE
168
- fi
169
-
170
- echo "OK: All slaves are running normally (max_latency=${max_latency}) $performance_data "
171
-
172
- exit $OK_STATE
@@ -1,105 +0,0 @@
1
- #!/bin/bash
2
- # Copyright (C) 2014 Continuent, Inc.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License"); you may
5
- # not use this file except in compliance with the License. You may obtain
6
- # a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
- # License for the specific language governing permissions and limitations
14
- # under the License.
15
- #
16
- # Initial developer(s): Jeff Mace
17
- # Contributor(s):
18
- #
19
- # Simple Bash Script To Check Tungsten Services
20
- # Nagios Plugin For NRPE
21
- #
22
- # This script does not accept any arguments. It will return a warning if any
23
- # of Tungsten resources is not in an {{ONLINE}} state. It uses the output of
24
- # the {{ls resources}} command to determine the current state.
25
- #
26
- OK_STATE=0
27
- WARNING_STATE=1
28
- CRITICAL_STATE=2
29
- THOME=`dirname $0`
30
-
31
- error_message=""
32
- error_messaage_glue=""
33
- offline_count=0
34
- dataservice=""
35
- skip_shun=0
36
-
37
- function display_help()
38
- {
39
- echo "Usage: ./check_tungsten_online -s dataservice [-h]"
40
- echo " -s The data service you would like to check"
41
- echo " -h Display this message"
42
- echo " -n Skip Shunned Services"
43
- exit 0
44
- }
45
-
46
- while getopts "s:h:n" Option
47
- do
48
- case $Option in
49
- h )
50
- display_help
51
- ;;
52
- s )
53
- dataservice=$OPTARG
54
- ;;
55
- n )
56
- skip_shun=1
57
- ;;
58
- esac
59
- done
60
-
61
- manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
62
- # Check the manager status
63
- if [ $manager_running -eq 0 ]; then
64
- echo "CRITICAL: Manager is not running"
65
- exit $CRITICAL_STATE
66
- fi
67
-
68
- if [ "$dataservice" == "" ]; then
69
- offline_services=`echo "ls resources" | ${THOME}/../../tungsten-manager/bin/cctrl | grep \| | grep : | grep -v ONLINE | tr -d "| " | cut -f 1,2 -d ":"`
70
- else
71
- offline_services=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep "(\(composite \)\?master\|(\(composite \)\?slave\|(relay" | grep -v ONLINE | tr -d "|" | cut -f 1 -d "("`
72
- fi
73
-
74
- for offline_service in $offline_services
75
- do
76
- offline_count=$(($offline_count+1))
77
- error_message="$error_message$error_message_glue$offline_service"
78
- error_message_glue=", "
79
- done
80
-
81
- if [ $offline_count -gt 0 ]
82
- then
83
- echo "CRITICAL: $error_message are not ONLINE"
84
- exit $CRITICAL_STATE
85
- fi
86
-
87
-
88
- if [ $skip_shun -eq 0 ]
89
- then
90
- if [ "$dataservice" == "" ]; then
91
- shunned=`echo "ls" | ${THOME}/../../tungsten-manager/bin/cctrl | grep 'SHUNNED' | wc -l`
92
- else
93
- shunned=`echo "use $dataservice; ls" | ${THOME}/../../tungsten-manager/bin/cctrl -multi | grep 'SHUNNED' | wc -l`
94
- fi
95
-
96
- if [ $shunned -gt 0 ]
97
- then
98
- echo "CRITICAL: Dataservices are shunned"
99
- exit $CRITICAL_STATE
100
- fi
101
- fi
102
-
103
-
104
- echo "OK: All services are online"
105
- exit $OK_STATE
@@ -1,61 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Simple Bash Script To Check Tungsten Policy
4
- # Nagios Plugin For NRPE
5
- #
6
- # This script does not accept any arguments. It will return error if the
7
- # cluster is in maintenance mode
8
- #
9
- OK_STATE=0
10
- WARNING_STATE=1
11
- CRITICAL_STATE=2
12
- THOME=`dirname $0`
13
-
14
- error_message=""
15
- error_messaage_glue=""
16
- offline_count=0
17
- dataservice=""
18
-
19
- function display_help()
20
- {
21
- echo "Usage: ./check_tungsten_policy -s dataservice [-h]"
22
- echo " -s The data service you would like to check"
23
- echo " -h Display this message"
24
- exit 0
25
- }
26
-
27
- while getopts "s:h" Option
28
- do
29
- case $Option in
30
- h )
31
- display_help
32
- ;;
33
- s )
34
- dataservice=$OPTARG
35
- ;;
36
- esac
37
- done
38
-
39
- manager_running=`${THOME}/../../tungsten-manager/bin/manager status | grep "PID" | wc -l`
40
- # Check the manager status
41
- if [ $manager_running -eq 0 ]; then
42
- echo "CRITICAL: Manager is not running"
43
- exit $CRITICAL_STATE
44
- fi
45
-
46
- if [ "$dataservice" == "" ]; then
47
- maint_mode=`echo "ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
48
- else
49
- maint_mode=`echo "use $dataservice; ls " | ${THOME}/../../tungsten-manager/bin/cctrl | grep MAINTENANCE | wc -l`
50
- fi
51
-
52
-
53
- if [ $maint_mode -gt 0 ]
54
- then
55
- echo "CRITICAL: Cluster is in Maintenance mode"
56
- exit $CRITICAL_STATE
57
- fi
58
-
59
-
60
- echo "OK: Cluster is in Automatic Mode"
61
- exit $OK_STATE
@@ -1,81 +0,0 @@
1
- #!/bin/bash
2
- #
3
- # Simple Bash Script To Check Tungsten Progress
4
- # Nagios Plugin For NRPE
5
- #
6
- # This script accepts three arguments, {{-w}}, {{-c}} and {{-t}}. The {{-w}} flag
7
- # is the level at which a warning should be returned. {{-c}} sets the level for
8
- # a critical return value. The {{-t}} flag sets the amount of time used to wait
9
- # between monitoring the progress of the cluster. The script uses the difference
10
- # between the values to determine if a warning or critical alert should be
11
- # issued
12
- #
13
- OK_STATE=0
14
- WARNING_STATE=1
15
- CRITICAL_STATE=2
16
- THOME=`dirname $0`
17
-
18
- function display_help()
19
- {
20
- echo "Usage: ./check_tungsten_progress -t time [-h]"
21
- echo " -t The number of seconds to wait when monitoring progress"
22
- echo " -h Display this message"
23
- exit 0
24
- }
25
-
26
- # We will use this to make some floating point comparisons
27
- function float_cond()
28
- {
29
- local cond=0
30
- if [[ $# -gt 0 ]]; then
31
- cond=$(echo "$*" | bc -q 2>&1)
32
- if [[ $? -ne 0 ]]; then
33
- echo "Error: $cond"
34
- exit 1
35
- fi
36
- if [[ -z "$cond" ]]; then cond=0; fi
37
- if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi
38
- fi
39
- local stat=$((cond == 0))
40
- return $stat
41
- }
42
-
43
- time_period=1
44
- while getopts "t:h" Option
45
- do
46
- case $Option in
47
- t)
48
- time_period=$OPTARG
49
- ;;
50
- h )
51
- display_help
52
- ;;
53
- esac
54
- done
55
-
56
- if float_cond "$time_period == 0"; then
57
- echo "Error: time_period has not been set"
58
- echo ""
59
- display_help
60
- fi
61
-
62
- is_online=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "state" | grep "ONLINE" | wc -l`
63
- if float_cond "$is_online == 0"; then
64
- echo "CRITICAL: Replicator is not ONLINE"
65
- exit $CRITICAL_STATE
66
- fi
67
-
68
- pre_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
69
- echo "cluster heartbeat" | ${THOME}/../../tungsten-manager/bin/cctrl > /dev/null
70
- sleep $time_period
71
- post_progress_number=`${THOME}/../../tungsten-replicator/bin/trepctl status | grep "appliedLastSeqno" | tr -d "| " | awk -F":" '{print $2}'`
72
-
73
- progress_number=`echo "$post_progress_number - $pre_progress_number" | bc -q 2>/dev/null`
74
-
75
- if float_cond "$progress_number < 1"; then
76
- echo "WARNING: Replicator did not show progress"
77
- exit $WARNING_STATE
78
- fi
79
-
80
- echo "OK: Replicator is making progress"
81
- exit $OK_STATE