continuent-monitors-nagios 0.0.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tungsten_nagios_backups +140 -0
- data/bin/tungsten_nagios_connector +10 -3
- data/bin/tungsten_nagios_latency +159 -0
- data/bin/tungsten_nagios_monitor_threads +2 -0
- data/bin/tungsten_nagios_online +134 -0
- data/bin/tungsten_nagios_policy +61 -0
- data/bin/tungsten_nagios_progress +105 -0
- data/bin/tungsten_nagios_services +75 -0
- metadata +16 -18
- data/bin/check_tungsten.sh +0 -576
- data/bin/check_tungsten_backups +0 -70
- data/bin/check_tungsten_latency +0 -172
- data/bin/check_tungsten_online +0 -105
- data/bin/check_tungsten_policy +0 -61
- data/bin/check_tungsten_progress +0 -81
- data/bin/check_tungsten_services +0 -95
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48e8e6f3568e9f4bca84c85a1063becd316337ab
|
4
|
+
data.tar.gz: 432c3e2421a56bd22b0e62d3e5679a7863aa9145
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e2738db134bf76f15f61399bbadd7c64d754dfbd6135f9ab07c794efae1292c7b8e93144cb312283fa2367680c502670f23c2a75ae5c3572be332fff5e14338
|
7
|
+
data.tar.gz: 0b1ac0ce88616fef6f9175bf328bdf18bf508ed67c82850ed2862205f32f96f8a2f799b4b52708a46b087fa4f1ecb07c9f5639e0e379e5a7f719b45f775be0db
|
@@ -0,0 +1,140 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class CheckTungstenBackups
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
if TI.is_commercial?()
|
35
|
+
unless TI.is_manager?()
|
36
|
+
critical("The server is not a Continuent Tungsten Manager")
|
37
|
+
end
|
38
|
+
|
39
|
+
unless TI.is_running?("manager")
|
40
|
+
critical("The Continuent Tungsten Manager is not running")
|
41
|
+
end
|
42
|
+
else
|
43
|
+
unless TI.is_replicator?()
|
44
|
+
critical("The server is not a Tungsten Replicator")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("replicator")
|
48
|
+
critical("The Tungsten Replicator is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
status = TI.status(opt(:service))
|
58
|
+
|
59
|
+
# When running Continuent Tungsten, this will only run on the coordinator
|
60
|
+
if TI.is_commercial?()
|
61
|
+
unless status.coordinator() == TI.hostname()
|
62
|
+
ok("Not running check because this node is not the coordinator")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
seconds_since_epoch = TU.cmd_result("date +%s").to_i()
|
67
|
+
most_recent_backup = nil
|
68
|
+
|
69
|
+
if TI.is_commercial?()
|
70
|
+
hosts = status.replicators()
|
71
|
+
else
|
72
|
+
hosts = [TI.hostname()]
|
73
|
+
end
|
74
|
+
hosts.each{
|
75
|
+
|ds|
|
76
|
+
begin
|
77
|
+
# Find the replication storage directory on the host
|
78
|
+
key = "#{HOSTS}.#{TU.to_identifier(ds)}.repl_backup_directory"
|
79
|
+
raw = TU.ssh_result("#{TI.base_path()}/tools/tpm query values #{key}", ds, TI.user())
|
80
|
+
dir = JSON.parse(raw)[key]
|
81
|
+
|
82
|
+
# Look for backup files in that directory
|
83
|
+
TU.ssh_result("stat -c\"%n %Y\" #{dir}/store*.properties 2>/dev/null", ds, TI.user()).split("\n").each{
|
84
|
+
|line|
|
85
|
+
stored_backup=line.split(" ")
|
86
|
+
stored_backup[1] = stored_backup[1].to_i()
|
87
|
+
|
88
|
+
if most_recent_backup == nil || stored_backup[1] > most_recent_backup[:seconds]
|
89
|
+
most_recent_backup = {
|
90
|
+
:hostname => ds,
|
91
|
+
:filename => stored_backup[0],
|
92
|
+
:seconds => stored_backup[1]
|
93
|
+
}
|
94
|
+
end
|
95
|
+
}
|
96
|
+
rescue CommandError
|
97
|
+
rescue JSON::ParserError
|
98
|
+
end
|
99
|
+
}
|
100
|
+
|
101
|
+
if most_recent_backup == nil
|
102
|
+
if TI.is_commercial?()
|
103
|
+
critical("Unable to find a backup on any datasource")
|
104
|
+
else
|
105
|
+
critical("Unable to find a backup")
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
age = seconds_since_epoch-most_recent_backup[:seconds]
|
110
|
+
if age > @options[:max_backup_age]
|
111
|
+
critical("#{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s] is older than #{@options[:max_backup_age]}s")
|
112
|
+
else
|
113
|
+
ok("The most recent backup is #{most_recent_backup[:hostname]}:#{most_recent_backup[:filename]} [#{age}s]")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def configure
|
118
|
+
super()
|
119
|
+
|
120
|
+
description("Check all local datasources to make sure one of them has a backup younger than the max allowed age")
|
121
|
+
|
122
|
+
add_option(:service, {
|
123
|
+
:on => "--service String",
|
124
|
+
:help => "The replication service or cluster to check"
|
125
|
+
})
|
126
|
+
|
127
|
+
add_option(:max_backup_age, {
|
128
|
+
:on => "--max-backup-age String",
|
129
|
+
:help => "Maximum allowed age in seconds of a backup on any machine",
|
130
|
+
:parse => method(:parse_integer_option),
|
131
|
+
:default => 86400
|
132
|
+
})
|
133
|
+
end
|
134
|
+
|
135
|
+
def script_name
|
136
|
+
"tungsten_nagios_backups"
|
137
|
+
end
|
138
|
+
|
139
|
+
self.new().run()
|
140
|
+
end
|
@@ -31,8 +31,12 @@ class ContinuentNagiosMonitorConnector
|
|
31
31
|
private
|
32
32
|
|
33
33
|
def main
|
34
|
+
unless TI.is_connector?()
|
35
|
+
critical("The server is not a Continuent Tungsten Connector")
|
36
|
+
end
|
37
|
+
|
34
38
|
unless TI.is_running?("connector")
|
35
|
-
critical("The Tungsten Connector is not running")
|
39
|
+
critical("The Continuent Tungsten Connector is not running")
|
36
40
|
end
|
37
41
|
|
38
42
|
begin
|
@@ -48,9 +52,12 @@ class ContinuentNagiosMonitorConnector
|
|
48
52
|
def configure
|
49
53
|
super()
|
50
54
|
|
55
|
+
description("Test the Tungsten Connector using credentials from --defaults-file.")
|
56
|
+
|
51
57
|
add_option(:defaults_file, {
|
52
58
|
:on => "--defaults-file String",
|
53
|
-
:help => "The defaults file to use when connecting to MySQL"
|
59
|
+
:help => "The defaults file to use when connecting to MySQL",
|
60
|
+
:required => true
|
54
61
|
})
|
55
62
|
|
56
63
|
add_option(:statement, {
|
@@ -88,4 +95,4 @@ class ContinuentNagiosMonitorConnector
|
|
88
95
|
end
|
89
96
|
|
90
97
|
self.new().run()
|
91
|
-
end
|
98
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorLatency
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
if TI.is_commercial?()
|
35
|
+
unless TI.is_manager?()
|
36
|
+
critical("The server is not a Continuent Tungsten Manager")
|
37
|
+
end
|
38
|
+
|
39
|
+
unless TI.is_running?("manager")
|
40
|
+
critical("The Continuent Tungsten Manager is not running")
|
41
|
+
end
|
42
|
+
else
|
43
|
+
unless TI.is_replicator?()
|
44
|
+
critical("The server is not a Tungsten Replicator")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("replicator")
|
48
|
+
critical("The Tungsten Replicator is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
max_latency = 0
|
58
|
+
errors = []
|
59
|
+
perslave_performance_data = []
|
60
|
+
|
61
|
+
status = TI.status(opt(:service))
|
62
|
+
if status.is_composite?()
|
63
|
+
# Composite Dataservice
|
64
|
+
critical("Unable to check latency on #{opt(:service)} because it is a composite dataservice")
|
65
|
+
else
|
66
|
+
status.replicators().each{
|
67
|
+
|hostname|
|
68
|
+
# Ignore this host since the datasource is shunned
|
69
|
+
if opt(:skip_shunned) == true && status.is_physical?()
|
70
|
+
if status.datasource_status(hostname) == "SHUNNED"
|
71
|
+
next
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
latency = status.replicator_latency(hostname)
|
76
|
+
|
77
|
+
# Check for some special cases
|
78
|
+
if latency.to_s() == "-1"
|
79
|
+
errors << "#{hostname} is missing latency information"
|
80
|
+
next
|
81
|
+
end
|
82
|
+
unless latency.to_s() =~ /^[0-9\.]+$/
|
83
|
+
errors << "#{hostname} is missing latency information"
|
84
|
+
next
|
85
|
+
end
|
86
|
+
|
87
|
+
latency = latency.to_s().to_f()
|
88
|
+
if is_critical?(latency) || is_warning?(latency)
|
89
|
+
errors << "#{hostname}=#{latency}s"
|
90
|
+
end
|
91
|
+
|
92
|
+
if latency > max_latency
|
93
|
+
max_latency = latency
|
94
|
+
end
|
95
|
+
|
96
|
+
perslave_performance_data << "#{hostname}=#{latency};#{opt(:warning_level)};#{opt(:critical_level)};;"
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
if opt(:perslave_perfdata) == true
|
101
|
+
perslave_performance_data.each{
|
102
|
+
|p|
|
103
|
+
@perfdata << p
|
104
|
+
}
|
105
|
+
elsif opt(:perfdata) == true
|
106
|
+
@perfdata << "max_latency=#{max_latency};#{opt(:warning_level)};#{opt(:critical_level)};;"
|
107
|
+
end
|
108
|
+
|
109
|
+
if is_critical?(max_latency)
|
110
|
+
critical(errors.join(', '))
|
111
|
+
elsif is_warning?(max_latency)
|
112
|
+
warning(errors.join(', '))
|
113
|
+
else
|
114
|
+
ok("All slaves are running normally (max_latency=#{max_latency})")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def configure
|
119
|
+
super()
|
120
|
+
|
121
|
+
description("Check the latency for all datasources in the specified --service. If it is a Tungsten Replicator, only the local host will be checked.")
|
122
|
+
|
123
|
+
add_option(:service, {
|
124
|
+
:on => "--service String",
|
125
|
+
:help => "The replication service or cluster to check"
|
126
|
+
})
|
127
|
+
|
128
|
+
add_option(:skip_shunned, {
|
129
|
+
:on => "--skip-shunned String",
|
130
|
+
:help => "Ignore Continuent Tungsten datasources that have been shunned.",
|
131
|
+
:parse => method(:parse_boolean_option),
|
132
|
+
:default => "false",
|
133
|
+
})
|
134
|
+
|
135
|
+
add_option(:perfdata, {
|
136
|
+
:on => "--perfdata String",
|
137
|
+
:help => "Display max_latency performance data",
|
138
|
+
:parse => method(:parse_boolean_option),
|
139
|
+
:default => "false",
|
140
|
+
})
|
141
|
+
|
142
|
+
add_option(:perslave_perfdata, {
|
143
|
+
:on => "--perslave-perfdata String",
|
144
|
+
:help => "Display latency performance data for every replicator",
|
145
|
+
:parse => method(:parse_boolean_option),
|
146
|
+
:default => "false",
|
147
|
+
})
|
148
|
+
end
|
149
|
+
|
150
|
+
def script_name
|
151
|
+
"tungsten_nagios_latency"
|
152
|
+
end
|
153
|
+
|
154
|
+
def uses_thresholds?
|
155
|
+
true
|
156
|
+
end
|
157
|
+
|
158
|
+
self.new().run()
|
159
|
+
end
|
@@ -52,6 +52,8 @@ class ContinuentNagiosMonitorThreads
|
|
52
52
|
def configure
|
53
53
|
super()
|
54
54
|
|
55
|
+
description("Check the number of JVM threads for the specified component")
|
56
|
+
|
55
57
|
add_option(:component, {
|
56
58
|
:on => "--component String",
|
57
59
|
:help => "The Tungsten component to return a Java PID for"
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (C) 2014 Continuent, Inc.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
5
|
+
# not use this file except in compliance with the License. You may obtain
|
6
|
+
# a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
12
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
13
|
+
# License for the specific language governing permissions and limitations
|
14
|
+
# under the License.
|
15
|
+
#
|
16
|
+
# Initial developer(s): Jeff Mace
|
17
|
+
# Contributor(s):
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
gem 'continuent-tools-core'
|
22
|
+
rescue LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'continuent-tools-core'
|
26
|
+
require 'continuent-tools-nagios-monitor'
|
27
|
+
|
28
|
+
class ContinuentNagiosMonitorOnline
|
29
|
+
include TungstenScript
|
30
|
+
include TungstenNagiosMonitor
|
31
|
+
private
|
32
|
+
|
33
|
+
def main
|
34
|
+
if TI.is_commercial?()
|
35
|
+
unless TI.is_manager?()
|
36
|
+
critical("The server is not a Continuent Tungsten Manager")
|
37
|
+
end
|
38
|
+
|
39
|
+
unless TI.is_running?("manager")
|
40
|
+
critical("The Continuent Tungsten Manager is not running")
|
41
|
+
end
|
42
|
+
else
|
43
|
+
unless TI.is_replicator?()
|
44
|
+
critical("The server is not a Tungsten Replicator")
|
45
|
+
end
|
46
|
+
|
47
|
+
unless TI.is_running?("replicator")
|
48
|
+
critical("The Tungsten Replicator is not running")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
opt_default(:service, TI.default_dataservice())
|
53
|
+
if opt(:service) == nil
|
54
|
+
critical("The --service option was not given")
|
55
|
+
end
|
56
|
+
|
57
|
+
not_online = []
|
58
|
+
shunned = []
|
59
|
+
|
60
|
+
status = TI.status(opt(:service))
|
61
|
+
if status.is_replication?()
|
62
|
+
# Replication Only
|
63
|
+
status.replicators().each{
|
64
|
+
|name|
|
65
|
+
rep_status = status.replicator_status(name)
|
66
|
+
|
67
|
+
if rep_status != "ONLINE"
|
68
|
+
not_online << name
|
69
|
+
end
|
70
|
+
}
|
71
|
+
elsif status.is_physical?()
|
72
|
+
# Physical Dataservice
|
73
|
+
status.replicators().each{
|
74
|
+
|name|
|
75
|
+
ds_status = status.datasource_status(name)
|
76
|
+
rep_status = status.replicator_status(name)
|
77
|
+
|
78
|
+
if opt(:skip_shunned) == true && ds_status == "SHUNNED"
|
79
|
+
shunned << name
|
80
|
+
next
|
81
|
+
end
|
82
|
+
|
83
|
+
if ds_status != "ONLINE" || rep_status != "ONLINE"
|
84
|
+
not_online << name
|
85
|
+
end
|
86
|
+
}
|
87
|
+
else
|
88
|
+
# Composite Dataservices
|
89
|
+
status.datasources().each{
|
90
|
+
|name|
|
91
|
+
ds_status = status.datasource_status(name)
|
92
|
+
|
93
|
+
if opt(:skip_shunned) == true && ds_status == "SHUNNED"
|
94
|
+
shunned << name
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
if ds_status != "ONLINE"
|
99
|
+
not_online << name
|
100
|
+
end
|
101
|
+
}
|
102
|
+
end
|
103
|
+
|
104
|
+
if not_online.size() > 0
|
105
|
+
critical("#{not_online.join(', ')} #{TU.pluralize(not_online, "is", "are")} not ONLINE")
|
106
|
+
end
|
107
|
+
|
108
|
+
ok("All services are ONLINE")
|
109
|
+
end
|
110
|
+
|
111
|
+
def configure
|
112
|
+
super()
|
113
|
+
|
114
|
+
description("Check that all datasources or the local replication service are ONLINE.")
|
115
|
+
|
116
|
+
add_option(:service, {
|
117
|
+
:on => "--service String",
|
118
|
+
:help => "The replication service or cluster to check"
|
119
|
+
})
|
120
|
+
|
121
|
+
add_option(:skip_shunned, {
|
122
|
+
:on => "--skip-shunned String",
|
123
|
+
:help => "Ignore Continuent Tungsten datasources that have been shunned.",
|
124
|
+
:parse => method(:parse_boolean_option),
|
125
|
+
:default => "false",
|
126
|
+
})
|
127
|
+
end
|
128
|
+
|
129
|
+
def script_name
|
130
|
+
"tungsten_nagios_online"
|
131
|
+
end
|
132
|
+
|
133
|
+
self.new().run()
|
134
|
+
end
|