continuent-monitors-nagios 0.0.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/tungsten_nagios_backups +140 -0
- data/bin/tungsten_nagios_connector +10 -3
- data/bin/tungsten_nagios_latency +159 -0
- data/bin/tungsten_nagios_monitor_threads +2 -0
- data/bin/tungsten_nagios_online +134 -0
- data/bin/tungsten_nagios_policy +61 -0
- data/bin/tungsten_nagios_progress +105 -0
- data/bin/tungsten_nagios_services +75 -0
- metadata +16 -18
- data/bin/check_tungsten.sh +0 -576
- data/bin/check_tungsten_backups +0 -70
- data/bin/check_tungsten_latency +0 -172
- data/bin/check_tungsten_online +0 -105
- data/bin/check_tungsten_policy +0 -61
- data/bin/check_tungsten_progress +0 -81
- data/bin/check_tungsten_services +0 -95
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48e8e6f3568e9f4bca84c85a1063becd316337ab
|
4
|
+
data.tar.gz: 432c3e2421a56bd22b0e62d3e5679a7863aa9145
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e2738db134bf76f15f61399bbadd7c64d754dfbd6135f9ab07c794efae1292c7b8e93144cb312283fa2367680c502670f23c2a75ae5c3572be332fff5e14338
|
7
|
+
data.tar.gz: 0b1ac0ce88616fef6f9175bf328bdf18bf508ed67c82850ed2862205f32f96f8a2f799b4b52708a46b087fa4f1ecb07c9f5639e0e379e5a7f719b45f775be0db
|
@@ -0,0 +1,140 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class CheckTungstenBackups
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
if TI.is_commercial?()
|
35
|
+
unless TI.is_manager?()
|
36
|
+
critical("The server is not a Continuent Tungsten Manager")
|
37
|
+
end
|
38
|
+
|
39
|
+
unless TI.is_running?("manager")
|
40
|
+
critical("The Continuent Tungsten Manager is not running")
|
41
|
+
end
|
42
|
+
else
|
43
|
+
unless TI.is_replicator?()
|
44
|
+
critical("The server is not a Tungsten Replicator")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("replicator")
|
48
|
+
critical("The Tungsten Replicator is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
status = TI.status(opt(:service))
|
58
|
+
|
59
|
+
# When running Continuent Tungsten, this will only run on the coordinator
|
60
|
+
if TI.is_commercial?()
|
61
|
+
unless status.coordinator() == TI.hostname()
|
62
|
+
ok("Not running check because this node is not the coordinator")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
seconds_since_epoch = TU.cmd_result("date +%s").to_i()
|
67
|
+
most_recent_backup = nil
|
68
|
+
|
69
|
+
if TI.is_commercial?()
|
70
|
+
hosts = status.replicators()
|
71
|
+
else
|
72
|
+
hosts = [TI.hostname()]
|
73
|
+
end
|
74
|
+
hosts.each{
|
75
|
+
|ds|
|
76
|
+
begin
|
77
|
+
# Find the replication storage directory on the host
|
78
|
+
key = "#{HOSTS}.#{TU.to_identifier(ds)}.repl_backup_directory"
|
79
|
+
raw = TU.ssh_result("#{TI.base_path()}/tools/tpm query values #{key}", ds, TI.user())
|
80
|
+
dir = JSON.parse(raw)[key]
|
81
|
+
|
82
|
+
# Look for backup files in that directory
|
83
|
+
TU.ssh_result("stat -c\"%n %Y\" #{dir}/store*.properties 2>/dev/null", ds, TI.user()).split("\n").each{
|
84
|
+
|line|
|
85
|
+
stored_backup=line.split(" ")
|
86
|
+
stored_backup[1] = stored_backup[1].to_i()
|
87
|
+
|
88
|
+
if most_recent_backup == nil || stored_backup[1] > most_recent_backup[:seconds]
|
89
|
+
most_recent_backup = {
|
90
|
+
:hostname => ds,
|
91
|
+
:filename => stored_backup[0],
|
92
|
+
:seconds => stored_backup[1]
|
93
|
+
}
|
94
|
+
end
|
95
|
+
}
|
96
|
+
rescue CommandError
|
97
|
+
rescue JSON::ParserError
|
98
|
+
end
|
99
|
+
}
|
100
|
+
|
101
|
+
if most_recent_backup == nil
|
102
|
+
if TI.is_commercial?()
|
103
|
+
critical("Unable to find a backup on any datasource")
|
104
|
+
else
|
105
|
+
critical("Unable to find a backup")
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
age = seconds_since_epoch-most_recent_backup[:seconds]
|
110
|
+
if age > @options[:max_backup_age]
|
111
|
+
critical("#{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s] is older than #{@options[:max_backup_age]}s")
|
112
|
+
else
|
113
|
+
ok("The most recent backup is #{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s]")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def configure
|
118
|
+
super()
|
119
|
+
|
120
|
+
description("Check all local datasources to make sure one of them has a backup younger than the max allowed age")
|
121
|
+
|
122
|
+
add_option(:service, {
|
123
|
+
:on => "--service String",
|
124
|
+
:help => "The replication service or cluster to check"
|
125
|
+
})
|
126
|
+
|
127
|
+
add_option(:max_backup_age, {
|
128
|
+
:on => "--max-backup-age String",
|
129
|
+
:help => "Maximum allowed age in seconds of a backup on any machine",
|
130
|
+
:parse => method(:parse_integer_option),
|
131
|
+
:default => 86400
|
132
|
+
})
|
133
|
+
end
|
134
|
+
|
135
|
+
def script_name
|
136
|
+
"tungsten_nagios_backups"
|
137
|
+
end
|
138
|
+
|
139
|
+
self.new().run()
|
140
|
+
end
|
@@ -31,8 +31,12 @@ class ContinuentNagiosMonitorConnector
|
|
31
31
|
private
|
32
32
|
|
33
33
|
def main
|
34
|
+
unless TI.is_connector?()
|
35
|
+
critical("The server is not a Continuent Tungsten Connector")
|
36
|
+
end
|
37
|
+
|
34
38
|
unless TI.is_running?("connector")
|
35
|
-
critical("The Tungsten Connector is not running")
|
39
|
+
critical("The Continuent Tungsten Connector is not running")
|
36
40
|
end
|
37
41
|
|
38
42
|
begin
|
@@ -48,9 +52,12 @@ class ContinuentNagiosMonitorConnector
|
|
48
52
|
def configure
|
49
53
|
super()
|
50
54
|
|
55
|
+
description("Test the Tungsten Connector using credentials from --defaults-file.")
|
56
|
+
|
51
57
|
add_option(:defaults_file, {
|
52
58
|
:on => "--defaults-file String",
|
53
|
-
:help => "The defaults file to use when connecting to MySQL"
|
59
|
+
:help => "The defaults file to use when connecting to MySQL",
|
60
|
+
:required => true
|
54
61
|
})
|
55
62
|
|
56
63
|
add_option(:statement, {
|
@@ -88,4 +95,4 @@ class ContinuentNagiosMonitorConnector
|
|
88
95
|
end
|
89
96
|
|
90
97
|
self.new().run()
|
91
|
-
end
|
98
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorLatency
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
if TI.is_commercial?()
|
35
|
+
unless TI.is_manager?()
|
36
|
+
critical("The server is not a Continuent Tungsten Manager")
|
37
|
+
end
|
38
|
+
|
39
|
+
unless TI.is_running?("manager")
|
40
|
+
critical("The Continuent Tungsten Manager is not running")
|
41
|
+
end
|
42
|
+
else
|
43
|
+
unless TI.is_replicator?()
|
44
|
+
critical("The server is not a Tungsten Replicator")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("replicator")
|
48
|
+
critical("The Tungsten Replicator is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
max_latency = 0
|
58
|
+
errors = []
|
59
|
+
perslave_performance_data = []
|
60
|
+
|
61
|
+
status = TI.status(opt(:service))
|
62
|
+
if status.is_composite?()
|
63
|
+
# Composite Dataservice
|
64
|
+
critical("Unable to check latency on #{opt(:service)} because it is a composite dataservice")
|
65
|
+
else
|
66
|
+
status.replicators().each{
|
67
|
+
|hostname|
|
68
|
+
# Ignore this host since the datasource is shunned
|
69
|
+
if opt(:skip_shunned) == true && status.is_physical?()
|
70
|
+
if status.datasource_status(hostname) == "SHUNNED"
|
71
|
+
next
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
latency = status.replicator_latency(hostname)
|
76
|
+
|
77
|
+
# Check for some special cases
|
78
|
+
if latency.to_s() == "-1"
|
79
|
+
errors << "#{hostname} is missing latency information"
|
80
|
+
next
|
81
|
+
end
|
82
|
+
unless latency.to_s() =~ /^[0-9\.]+$/
|
83
|
+
errors << "#{hostname} is missing latency information"
|
84
|
+
next
|
85
|
+
end
|
86
|
+
|
87
|
+
latency = latency.to_s().to_f()
|
88
|
+
if is_critical?(latency) || is_warning?(latency)
|
89
|
+
errors << "#{hostname}=#{latency}s"
|
90
|
+
end
|
91
|
+
|
92
|
+
if latency > max_latency
|
93
|
+
max_latency = latency
|
94
|
+
end
|
95
|
+
|
96
|
+
perslave_performance_data << "#{hostname}=#{latency};#{opt(:warning_level)};#{opt(:critical_level)};;"
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
if opt(:perslave_perfdata) == true
|
101
|
+
perslave_performance_data.each{
|
102
|
+
|p|
|
103
|
+
@perfdata << p
|
104
|
+
}
|
105
|
+
elsif opt(:perfdata) == true
|
106
|
+
@perfdata << "max_latency=#{max_latency};#{opt(:warning_level)};#{opt(:critical_level)};;"
|
107
|
+
end
|
108
|
+
|
109
|
+
if is_critical?(max_latency)
|
110
|
+
critical(errors.join(', '))
|
111
|
+
elsif is_warning?(max_latency)
|
112
|
+
warning(errors.join(', '))
|
113
|
+
else
|
114
|
+
ok("All slaves are running normally (max_latency=#{max_latency})")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def configure
|
119
|
+
super()
|
120
|
+
|
121
|
+
description("Check the latency for all datasources in the specified --service. If it is a Tungsten Replicator, only the local host will be checked.")
|
122
|
+
|
123
|
+
add_option(:service, {
|
124
|
+
:on => "--service String",
|
125
|
+
:help => "The replication service or cluster to check"
|
126
|
+
})
|
127
|
+
|
128
|
+
add_option(:skip_shunned, {
|
129
|
+
:on => "--skip-shunned String",
|
130
|
+
:help => "Ignore Continuent Tungsten datasources that have been shunned.",
|
131
|
+
:parse => method(:parse_boolean_option),
|
132
|
+
:default => "false",
|
133
|
+
})
|
134
|
+
|
135
|
+
add_option(:perfdata, {
|
136
|
+
:on => "--perfdata String",
|
137
|
+
:help => "Display max_latency performance data",
|
138
|
+
:parse => method(:parse_boolean_option),
|
139
|
+
:default => "false",
|
140
|
+
})
|
141
|
+
|
142
|
+
add_option(:perslave_perfdata, {
|
143
|
+
:on => "--perslave-perfdata String",
|
144
|
+
:help => "Display latency performance data for every replicator",
|
145
|
+
:parse => method(:parse_boolean_option),
|
146
|
+
:default => "false",
|
147
|
+
})
|
148
|
+
end
|
149
|
+
|
150
|
+
def script_name
|
151
|
+
"tungsten_nagios_latency"
|
152
|
+
end
|
153
|
+
|
154
|
+
def uses_thresholds?
|
155
|
+
true
|
156
|
+
end
|
157
|
+
|
158
|
+
self.new().run()
|
159
|
+
end
|
@@ -52,6 +52,8 @@ class ContinuentNagiosMonitorThreads
|
|
52
52
|
def configure
|
53
53
|
super()
|
54
54
|
|
55
|
+
description("Check the number of JVM threads for the specified component")
|
56
|
+
|
55
57
|
add_option(:component, {
|
56
58
|
:on => "--component String",
|
57
59
|
:help => "The Tungsten component to return a Java PID for"
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorOnline
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
if TI.is_commercial?()
|
35
|
+
unless TI.is_manager?()
|
36
|
+
critical("The server is not a Continuent Tungsten Manager")
|
37
|
+
end
|
38
|
+
|
39
|
+
unless TI.is_running?("manager")
|
40
|
+
critical("The Continuent Tungsten Manager is not running")
|
41
|
+
end
|
42
|
+
else
|
43
|
+
unless TI.is_replicator?()
|
44
|
+
critical("The server is not a Tungsten Replicator")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("replicator")
|
48
|
+
critical("The Tungsten Replicator is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
not_online = []
|
58
|
+
shunned = []
|
59
|
+
|
60
|
+
status = TI.status(opt(:service))
|
61
|
+
if status.is_replication?()
|
62
|
+
# Replication Only
|
63
|
+
status.replicators().each{
|
64
|
+
|name|
|
65
|
+
rep_status = status.replicator_status(name)
|
66
|
+
|
67
|
+
if rep_status != "ONLINE"
|
68
|
+
not_online << name
|
69
|
+
end
|
70
|
+
}
|
71
|
+
elsif status.is_physical?()
|
72
|
+
# Physical Dataservice
|
73
|
+
status.replicators().each{
|
74
|
+
|name|
|
75
|
+
ds_status = status.datasource_status(name)
|
76
|
+
rep_status = status.replicator_status(name)
|
77
|
+
|
78
|
+
if opt(:skip_shunned) == true && ds_status == "SHUNNED"
|
79
|
+
shunned << name
|
80
|
+
next
|
81
|
+
end
|
82
|
+
|
83
|
+
if ds_status != "ONLINE" || rep_status != "ONLINE"
|
84
|
+
not_online << name
|
85
|
+
end
|
86
|
+
}
|
87
|
+
else
|
88
|
+
# Composite Dataservices
|
89
|
+
status.datasources().each{
|
90
|
+
|name|
|
91
|
+
ds_status = status.datasource_status(name)
|
92
|
+
|
93
|
+
if opt(:skip_shunned) == true && ds_status == "SHUNNED"
|
94
|
+
shunned << name
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
if ds_status != "ONLINE"
|
99
|
+
not_online << name
|
100
|
+
end
|
101
|
+
}
|
102
|
+
end
|
103
|
+
|
104
|
+
if not_online.size() > 0
|
105
|
+
critical("#{not_online.join(', ')} #{TU.pluralize(not_online, "is", "are")} not ONLINE")
|
106
|
+
end
|
107
|
+
|
108
|
+
ok("All services are ONLINE")
|
109
|
+
end
|
110
|
+
|
111
|
+
def configure
|
112
|
+
super()
|
113
|
+
|
114
|
+
description("Check that all datasources or the local replication service are ONLINE.")
|
115
|
+
|
116
|
+
add_option(:service, {
|
117
|
+
:on => "--service String",
|
118
|
+
:help => "The replication service or cluster to check"
|
119
|
+
})
|
120
|
+
|
121
|
+
add_option(:skip_shunned, {
|
122
|
+
:on => "--skip-shunned String",
|
123
|
+
:help => "Ignore Continuent Tungsten datasources that have been shunned.",
|
124
|
+
:parse => method(:parse_boolean_option),
|
125
|
+
:default => "false",
|
126
|
+
})
|
127
|
+
end
|
128
|
+
|
129
|
+
def script_name
|
130
|
+
"tungsten_nagios_online"
|
131
|
+
end
|
132
|
+
|
133
|
+
self.new().run()
|
134
|
+
end
|