sensu-plugins-mongodb-wt 2.2.1 → 2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -0
- data/bin/check-mongodb.py +447 -241
- data/lib/sensu-plugins-mongodb/metrics.rb +0 -12
- data/lib/sensu-plugins-mongodb/version.rb +1 -1
- metadata +8 -8
data/bin/check-mongodb.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
#
|
4
4
|
# A MongoDB Nagios check script
|
@@ -16,37 +16,50 @@
|
|
16
16
|
# - @jbraeuer on github
|
17
17
|
# - Dag Stockstad <dag.stockstad@gmail.com>
|
18
18
|
# - @Andor on github
|
19
|
-
# - Steven Richards - Captainkrtek on
|
19
|
+
# - Steven Richards - Captainkrtek on github
|
20
|
+
# - Max Vernimmen - @mvernimmen-CG / @mvernimmen on github
|
21
|
+
# - Kris Nova - @kris@nivenly.com github.com/kris-nova
|
22
|
+
# - Jan Kantert - firstname@lastname.net
|
23
|
+
#
|
24
|
+
# LICENCE
|
20
25
|
#
|
21
|
-
|
22
|
-
# License: BSD
|
23
26
|
# Copyright (c) 2012, Mike Zupan <mike@zcentric.com>
|
24
27
|
# All rights reserved.
|
25
|
-
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
26
28
|
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
29
|
+
# Redistribution and use in source and binary forms, with or without
|
30
|
+
# modification, are permitted provided that the following conditions are met:
|
31
|
+
#
|
32
|
+
# Redistributions of source code must retain the above copyright notice, this
|
33
|
+
# list of conditions and the following disclaimer.
|
34
|
+
#
|
35
|
+
# Redistributions in binary form must reproduce the above copyright notice, this
|
36
|
+
# list of conditions and the following disclaimer in the documentation and/or
|
37
|
+
# other materials provided with the distribution. #THIS SOFTWARE IS PROVIDED BY
|
38
|
+
# THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
|
39
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
40
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
41
|
+
# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
42
|
+
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
43
|
+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
44
|
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
45
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
46
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
47
|
+
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
35
48
|
#
|
36
|
-
# README: https://github.com/mzupan/nagios-plugin-mongodb/blob/master/LICENSE
|
37
49
|
|
38
|
-
|
50
|
+
from __future__ import print_function
|
51
|
+
from __future__ import division
|
39
52
|
import sys
|
40
53
|
import time
|
41
54
|
import optparse
|
42
|
-
import textwrap
|
43
55
|
import re
|
44
56
|
import os
|
57
|
+
import numbers
|
45
58
|
|
46
59
|
try:
|
47
60
|
import pymongo
|
48
|
-
except ImportError
|
49
|
-
print
|
61
|
+
except ImportError as e:
|
62
|
+
print(e)
|
50
63
|
sys.exit(2)
|
51
64
|
|
52
65
|
# As of pymongo v 1.9 the SON API is part of the BSON package, therefore attempt
|
@@ -90,37 +103,35 @@ def performance_data(perf_data, params):
|
|
90
103
|
|
91
104
|
|
92
105
|
def numeric_type(param):
|
93
|
-
|
94
|
-
return True
|
95
|
-
return False
|
106
|
+
return param is None or isinstance(param, numbers.Real)
|
96
107
|
|
97
108
|
|
98
109
|
def check_levels(param, warning, critical, message, ok=[]):
|
99
110
|
if (numeric_type(critical) and numeric_type(warning)):
|
100
111
|
if param >= critical:
|
101
|
-
print
|
112
|
+
print("CRITICAL - " + message)
|
102
113
|
sys.exit(2)
|
103
114
|
elif param >= warning:
|
104
|
-
print
|
115
|
+
print("WARNING - " + message)
|
105
116
|
sys.exit(1)
|
106
117
|
else:
|
107
|
-
print
|
118
|
+
print("OK - " + message)
|
108
119
|
sys.exit(0)
|
109
120
|
else:
|
110
121
|
if param in critical:
|
111
|
-
print
|
122
|
+
print("CRITICAL - " + message)
|
112
123
|
sys.exit(2)
|
113
124
|
|
114
125
|
if param in warning:
|
115
|
-
print
|
126
|
+
print("WARNING - " + message)
|
116
127
|
sys.exit(1)
|
117
128
|
|
118
129
|
if param in ok:
|
119
|
-
print
|
130
|
+
print("OK - " + message)
|
120
131
|
sys.exit(0)
|
121
132
|
|
122
133
|
# unexpected param value
|
123
|
-
print
|
134
|
+
print("CRITICAL - Unexpected value : %d" % param + "; " + message)
|
124
135
|
return 2
|
125
136
|
|
126
137
|
|
@@ -137,35 +148,47 @@ def main(argv):
|
|
137
148
|
p = optparse.OptionParser(conflict_handler="resolve", description="This Nagios plugin checks the health of mongodb.")
|
138
149
|
|
139
150
|
p.add_option('-H', '--host', action='store', type='string', dest='host', default='127.0.0.1', help='The hostname you want to connect to')
|
140
|
-
p.add_option('-
|
151
|
+
p.add_option('-h', '--host-to-check', action='store', type='string', dest='host_to_check', default=None, help='The hostname you want to check (if this is different from the host you are connecting)')
|
152
|
+
p.add_option('-P', '--port', action='store', type='int', dest='port', default=27017, help='The port mongodb is running on')
|
153
|
+
p.add_option('--port-to-check', action='store', type='int', dest='port_to_check', default=None, help='The port you want to check (if this is different from the port you are connecting)')
|
141
154
|
p.add_option('-u', '--user', action='store', type='string', dest='user', default=None, help='The username you want to login as')
|
142
155
|
p.add_option('-p', '--pass', action='store', type='string', dest='passwd', default=None, help='The password you want to use for that user')
|
143
|
-
p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold
|
144
|
-
p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold
|
156
|
+
p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold you want to set')
|
157
|
+
p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold you want to set')
|
145
158
|
p.add_option('-A', '--action', action='store', type='choice', dest='action', default='connect', help='The action you want to take',
|
146
159
|
choices=['connect', 'connections', 'replication_lag', 'replication_lag_percent', 'replset_state', 'memory', 'memory_mapped', 'lock',
|
147
|
-
'flushing', 'last_flush_time', 'index_miss_ratio', 'databases', 'collections', 'database_size', 'database_indexes', 'collection_indexes', 'collection_size',
|
148
|
-
'queues', 'oplog', 'journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters', 'current_lock', 'replica_primary',
|
149
|
-
'asserts', 'queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary', 'collection_state', 'row_count', 'replset_quorum'])
|
160
|
+
'flushing', 'last_flush_time', 'index_miss_ratio', 'databases', 'collections', 'database_size', 'database_indexes', 'collection_documents', 'collection_indexes', 'collection_size',
|
161
|
+
'collection_storageSize', 'queues', 'oplog', 'journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters', 'current_lock', 'replica_primary',
|
162
|
+
'page_faults', 'asserts', 'queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary', 'collection_state', 'row_count', 'replset_quorum'])
|
150
163
|
p.add_option('--max-lag', action='store_true', dest='max_lag', default=False, help='Get max replication lag (for replication_lag action only)')
|
151
164
|
p.add_option('--mapped-memory', action='store_true', dest='mapped_memory', default=False, help='Get mapped memory instead of resident (if resident memory can not be read)')
|
152
165
|
p.add_option('-D', '--perf-data', action='store_true', dest='perf_data', default=False, help='Enable output of Nagios performance data')
|
153
166
|
p.add_option('-d', '--database', action='store', dest='database', default='admin', help='Specify the database to check')
|
154
167
|
p.add_option('--all-databases', action='store_true', dest='all_databases', default=False, help='Check all databases (action database_size)')
|
155
|
-
p.add_option('-s', '--ssl
|
156
|
-
p.add_option('-e', '--ssl-certfile', dest='ssl_certfile', default=None, action='store', help='The certificate file used to identify the local connection against mongod')
|
157
|
-
p.add_option('-k', '--ssl-keyfile', dest='ssl_keyfile', default=None, action='store', help='The private key used to identify the local connection against mongod')
|
158
|
-
p.add_option('-a', '--ssl-ca-certs', dest='ssl_ca_certs', default=None, action='store', help='The set of concatenated CA certificates, which are used to validate certificates passed from the other end of the connection')
|
168
|
+
p.add_option('-s', '--ssl', dest='ssl', default=False, action='callback', callback=optional_arg(True), help='Connect using SSL')
|
159
169
|
p.add_option('-r', '--replicaset', dest='replicaset', default=None, action='callback', callback=optional_arg(True), help='Connect to replicaset')
|
160
170
|
p.add_option('-q', '--querytype', action='store', dest='query_type', default='query', help='The query type to check [query|insert|update|delete|getmore|command] from queries_per_second')
|
161
171
|
p.add_option('-c', '--collection', action='store', dest='collection', default='admin', help='Specify the collection to check')
|
162
172
|
p.add_option('-T', '--time', action='store', type='int', dest='sample_time', default=1, help='Time used to sample number of pages faults')
|
173
|
+
p.add_option('-M', '--mongoversion', action='store', type='choice', dest='mongo_version', default='2', help='The MongoDB version you are talking with, either 2 or 3',
|
174
|
+
choices=['2','3'])
|
175
|
+
p.add_option('-a', '--authdb', action='store', type='string', dest='authdb', default='admin', help='The database you want to authenticate against')
|
176
|
+
p.add_option('--insecure', action='store_true', dest='insecure', default=False, help="Don't verify SSL/TLS certificates")
|
177
|
+
p.add_option('--ssl-ca-cert-file', action='store', type='string', dest='ssl_ca_cert_file', default=None, help='Path to Certificate Authority file for SSL')
|
178
|
+
p.add_option('-f', '--ssl-cert-file', action='store', type='string', dest='cert_file', default=None, help='Path to PEM encoded key and cert for client authentication')
|
179
|
+
p.add_option('-m','--auth-mechanism', action='store', type='choice', dest='auth_mechanism', default=None, help='Auth mechanism used for auth with mongodb',
|
180
|
+
choices=['MONGODB-X509','SCRAM-SHA-256','SCRAM-SHA-1'])
|
181
|
+
p.add_option('--disable_retry_writes', dest='retry_writes_disabled', default=False, action='callback', callback=optional_arg(True), help='Disable retryWrites feature')
|
163
182
|
|
164
183
|
options, arguments = p.parse_args()
|
165
184
|
host = options.host
|
185
|
+
host_to_check = options.host_to_check if options.host_to_check else options.host
|
166
186
|
port = options.port
|
187
|
+
port_to_check = options.port_to_check if options.port_to_check else options.port
|
167
188
|
user = options.user
|
168
189
|
passwd = options.passwd
|
190
|
+
authdb = options.authdb
|
191
|
+
|
169
192
|
query_type = options.query_type
|
170
193
|
collection = options.collection
|
171
194
|
sample_time = options.sample_time
|
@@ -179,12 +202,15 @@ def main(argv):
|
|
179
202
|
action = options.action
|
180
203
|
perf_data = options.perf_data
|
181
204
|
max_lag = options.max_lag
|
205
|
+
mongo_version = options.mongo_version
|
182
206
|
database = options.database
|
183
|
-
|
184
|
-
ssl_certfile = options.ssl_certfile
|
185
|
-
ssl_keyfile = options.ssl_keyfile
|
186
|
-
ssl_ca_certs = options.ssl_ca_certs
|
207
|
+
ssl = options.ssl
|
187
208
|
replicaset = options.replicaset
|
209
|
+
insecure = options.insecure
|
210
|
+
ssl_ca_cert_file = options.ssl_ca_cert_file
|
211
|
+
cert_file = options.cert_file
|
212
|
+
auth_mechanism = options.auth_mechanism
|
213
|
+
retry_writes_disabled = options.retry_writes_disabled
|
188
214
|
|
189
215
|
if action == 'replica_primary' and replicaset is None:
|
190
216
|
return "replicaset must be passed in when using replica_primary check"
|
@@ -195,31 +221,36 @@ def main(argv):
|
|
195
221
|
# moving the login up here and passing in the connection
|
196
222
|
#
|
197
223
|
start = time.time()
|
198
|
-
err, con = mongo_connect(host, port,
|
224
|
+
err, con = mongo_connect(host, port, ssl, user, passwd, replicaset, authdb, insecure, ssl_ca_cert_file, cert_file, retry_writes_disabled=retry_writes_disabled)
|
225
|
+
|
226
|
+
if err != 0:
|
227
|
+
return err
|
228
|
+
|
229
|
+
# Autodetect mongo-version and force pymongo to let us know if it can connect or not.
|
230
|
+
err, mongo_version = check_version(con)
|
199
231
|
if err != 0:
|
200
232
|
return err
|
201
233
|
|
202
234
|
conn_time = time.time() - start
|
203
|
-
conn_time = round(conn_time, 0)
|
204
235
|
|
205
236
|
if action == "connections":
|
206
237
|
return check_connections(con, warning, critical, perf_data)
|
207
238
|
elif action == "replication_lag":
|
208
|
-
return check_rep_lag(con,
|
239
|
+
return check_rep_lag(con, host_to_check, port_to_check, warning, critical, False, perf_data, max_lag, user, passwd)
|
209
240
|
elif action == "replication_lag_percent":
|
210
|
-
return check_rep_lag(con,
|
241
|
+
return check_rep_lag(con, host_to_check, port_to_check, warning, critical, True, perf_data, max_lag, user, passwd, ssl, insecure, ssl_ca_cert_file, cert_file)
|
211
242
|
elif action == "replset_state":
|
212
243
|
return check_replset_state(con, perf_data, warning, critical)
|
213
244
|
elif action == "memory":
|
214
|
-
return check_memory(con, warning, critical, perf_data, options.mapped_memory)
|
245
|
+
return check_memory(con, warning, critical, perf_data, options.mapped_memory, host)
|
215
246
|
elif action == "memory_mapped":
|
216
247
|
return check_memory_mapped(con, warning, critical, perf_data)
|
217
248
|
elif action == "queues":
|
218
249
|
return check_queues(con, warning, critical, perf_data)
|
219
250
|
elif action == "lock":
|
220
|
-
return check_lock(con, warning, critical, perf_data)
|
251
|
+
return check_lock(con, warning, critical, perf_data, mongo_version)
|
221
252
|
elif action == "current_lock":
|
222
|
-
return check_current_lock(con, host, warning, critical, perf_data)
|
253
|
+
return check_current_lock(con, host, port, warning, critical, perf_data)
|
223
254
|
elif action == "flushing":
|
224
255
|
return check_flushing(con, warning, critical, True, perf_data)
|
225
256
|
elif action == "last_flush_time":
|
@@ -241,22 +272,26 @@ def main(argv):
|
|
241
272
|
return check_database_size(con, database, warning, critical, perf_data)
|
242
273
|
elif action == "database_indexes":
|
243
274
|
return check_database_indexes(con, database, warning, critical, perf_data)
|
275
|
+
elif action == "collection_documents":
|
276
|
+
return check_collection_documents(con, database, collection, warning, critical, perf_data)
|
244
277
|
elif action == "collection_indexes":
|
245
278
|
return check_collection_indexes(con, database, collection, warning, critical, perf_data)
|
246
279
|
elif action == "collection_size":
|
247
280
|
return check_collection_size(con, database, collection, warning, critical, perf_data)
|
281
|
+
elif action == "collection_storageSize":
|
282
|
+
return check_collection_storageSize(con, database, collection, warning, critical, perf_data)
|
248
283
|
elif action == "journaled":
|
249
284
|
return check_journaled(con, warning, critical, perf_data)
|
250
285
|
elif action == "write_data_files":
|
251
286
|
return check_write_to_datafiles(con, warning, critical, perf_data)
|
252
287
|
elif action == "opcounters":
|
253
|
-
return check_opcounters(con, host, warning, critical, perf_data)
|
288
|
+
return check_opcounters(con, host, port, warning, critical, perf_data)
|
254
289
|
elif action == "asserts":
|
255
|
-
return check_asserts(con, host, warning, critical, perf_data)
|
290
|
+
return check_asserts(con, host, port, warning, critical, perf_data)
|
256
291
|
elif action == "replica_primary":
|
257
|
-
return check_replica_primary(con, host, warning, critical, perf_data, replicaset)
|
292
|
+
return check_replica_primary(con, host, warning, critical, perf_data, replicaset, mongo_version)
|
258
293
|
elif action == "queries_per_second":
|
259
|
-
return check_queries_per_second(con, query_type, warning, critical, perf_data)
|
294
|
+
return check_queries_per_second(con, query_type, warning, critical, perf_data, mongo_version)
|
260
295
|
elif action == "page_faults":
|
261
296
|
check_page_faults(con, sample_time, warning, critical, perf_data)
|
262
297
|
elif action == "chunks_balance":
|
@@ -273,42 +308,73 @@ def main(argv):
|
|
273
308
|
return check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time)
|
274
309
|
|
275
310
|
|
276
|
-
def mongo_connect(host=None, port=None,
|
311
|
+
def mongo_connect(host=None, port=None, ssl=False, user=None, passwd=None, replica=None, authdb="admin", insecure=False, ssl_ca_cert_file=None, ssl_cert=None, auth_mechanism=None, retry_writes_disabled=False):
|
312
|
+
from pymongo.errors import ConnectionFailure
|
313
|
+
from pymongo.errors import PyMongoError
|
314
|
+
import ssl as SSL
|
315
|
+
|
316
|
+
con_args = dict()
|
317
|
+
|
318
|
+
if ssl:
|
319
|
+
if insecure:
|
320
|
+
con_args['ssl_cert_reqs'] = SSL.CERT_NONE
|
321
|
+
else:
|
322
|
+
con_args['ssl_cert_reqs'] = SSL.CERT_REQUIRED
|
323
|
+
con_args['ssl'] = ssl
|
324
|
+
if ssl_ca_cert_file:
|
325
|
+
con_args['ssl_ca_certs'] = ssl_ca_cert_file
|
326
|
+
if ssl_cert:
|
327
|
+
con_args['ssl_certfile'] = ssl_cert
|
328
|
+
|
329
|
+
if retry_writes_disabled:
|
330
|
+
con_args['retryWrites'] = False
|
331
|
+
|
277
332
|
try:
|
278
333
|
# ssl connection for pymongo > 2.3
|
279
334
|
if pymongo.version >= "2.3":
|
280
335
|
if replica is None:
|
281
|
-
|
282
|
-
con = pymongo.MongoClient(host, port, ssl=ssl_enabled, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, ssl_ca_certs=ssl_ca_certs)
|
283
|
-
else:
|
284
|
-
con = pymongo.MongoClient(host, port)
|
336
|
+
con = pymongo.MongoClient(host, port, **con_args)
|
285
337
|
else:
|
286
|
-
|
287
|
-
con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, ssl=ssl_enabled, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, ssl_ca_certs=ssl_ca_certs, replicaSet=replica, network_timeout=10)
|
288
|
-
else:
|
289
|
-
con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, replicaSet=replica, network_timeout=10)
|
290
|
-
try:
|
291
|
-
# https://api.mongodb.com/python/current/api/pymongo/mongo_client.html
|
292
|
-
# The ismaster command is cheap and does not require auth.
|
293
|
-
con.admin.command('ismaster', connectTimeoutMS=10000)
|
294
|
-
except Exception, e:
|
295
|
-
return exit_with_general_critical(e), None
|
338
|
+
con = pymongo.MongoClient(host, port, read_preference=pymongo.ReadPreference.SECONDARY, replicaSet=replica, **con_args)
|
296
339
|
else:
|
297
340
|
if replica is None:
|
298
341
|
con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
|
299
342
|
else:
|
300
343
|
con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
|
301
|
-
|
344
|
+
|
345
|
+
# we must authenticate the connection, otherwise we won't be able to perform certain operations
|
346
|
+
if ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'SCRAM-SHA-256':
|
347
|
+
con.the_database.authenticate(user, mechanism='SCRAM-SHA-256')
|
348
|
+
elif ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'SCRAM-SHA-1':
|
349
|
+
con.the_database.authenticate(user, mechanism='SCRAM-SHA-1')
|
350
|
+
elif ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'MONGODB-X509':
|
351
|
+
con.the_database.authenticate(user, mechanism='MONGODB-X509')
|
352
|
+
|
353
|
+
try:
|
354
|
+
result = con.admin.command("ismaster")
|
355
|
+
except ConnectionFailure:
|
356
|
+
print("CRITICAL - Connection to Mongo server on %s:%s has failed" % (host, port) )
|
357
|
+
sys.exit(2)
|
358
|
+
|
359
|
+
if 'arbiterOnly' in result and result['arbiterOnly'] == True:
|
360
|
+
print("OK - State: 7 (Arbiter on port %s)" % (port))
|
361
|
+
sys.exit(0)
|
302
362
|
|
303
363
|
if user and passwd:
|
304
|
-
db = con[
|
305
|
-
|
364
|
+
db = con[authdb]
|
365
|
+
try:
|
366
|
+
db.authenticate(user, password=passwd)
|
367
|
+
except PyMongoError:
|
306
368
|
sys.exit("Username/Password incorrect")
|
307
|
-
|
369
|
+
|
370
|
+
# Ping to check that the server is responding.
|
371
|
+
con.admin.command("ping")
|
372
|
+
|
373
|
+
except Exception as e:
|
308
374
|
if isinstance(e, pymongo.errors.AutoReconnect) and str(e).find(" is an arbiter") != -1:
|
309
375
|
# We got a pymongo AutoReconnect exception that tells us we connected to an Arbiter Server
|
310
376
|
# This means: Arbiter is reachable and can answer requests/votes - this is all we need to know from an arbiter
|
311
|
-
print
|
377
|
+
print("OK - State: 7 (Arbiter)")
|
312
378
|
sys.exit(0)
|
313
379
|
return exit_with_general_critical(e), None
|
314
380
|
return 0, con
|
@@ -318,7 +384,7 @@ def exit_with_general_warning(e):
|
|
318
384
|
if isinstance(e, SystemExit):
|
319
385
|
return e
|
320
386
|
else:
|
321
|
-
print
|
387
|
+
print("WARNING - General MongoDB warning:", e)
|
322
388
|
return 1
|
323
389
|
|
324
390
|
|
@@ -326,21 +392,27 @@ def exit_with_general_critical(e):
|
|
326
392
|
if isinstance(e, SystemExit):
|
327
393
|
return e
|
328
394
|
else:
|
329
|
-
print
|
395
|
+
print("CRITICAL - General MongoDB Error:", e)
|
330
396
|
return 2
|
331
397
|
|
332
398
|
|
333
399
|
def set_read_preference(db):
|
334
|
-
if pymongo.version >= "2.2"
|
400
|
+
if pymongo.version >= "2.2":
|
335
401
|
pymongo.read_preferences.Secondary
|
336
402
|
else:
|
337
403
|
db.read_preference = pymongo.ReadPreference.SECONDARY
|
338
404
|
|
405
|
+
def check_version(con):
|
406
|
+
try:
|
407
|
+
server_info = con.server_info()
|
408
|
+
except Exception as e:
|
409
|
+
return exit_with_general_critical(e), None
|
410
|
+
return 0, int(server_info['version'].split('.')[0].strip())
|
339
411
|
|
340
412
|
def check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time):
|
341
413
|
warning = warning or 3
|
342
414
|
critical = critical or 6
|
343
|
-
message = "Connection took
|
415
|
+
message = "Connection took %.3f seconds" % conn_time
|
344
416
|
message += performance_data(perf_data, [(conn_time, "connection_time", warning, critical)])
|
345
417
|
|
346
418
|
return check_levels(conn_time, warning, critical, message)
|
@@ -362,13 +434,17 @@ def check_connections(con, warning, critical, perf_data):
|
|
362
434
|
(available, "available_connections")])
|
363
435
|
return check_levels(used_percent, warning, critical, message)
|
364
436
|
|
365
|
-
except Exception
|
437
|
+
except Exception as e:
|
366
438
|
return exit_with_general_critical(e)
|
367
439
|
|
368
440
|
|
369
|
-
def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_lag, user, passwd):
|
441
|
+
def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_lag, user, passwd, ssl=None, insecure=None, ssl_ca_cert_file=None, cert_file=None):
|
370
442
|
# Get mongo to tell us replica set member name when connecting locally
|
371
443
|
if "127.0.0.1" == host:
|
444
|
+
if not "me" in list(con.admin.command("ismaster","1").keys()):
|
445
|
+
print("UNKNOWN - This is not replicated MongoDB")
|
446
|
+
return 3
|
447
|
+
|
372
448
|
host = con.admin.command("ismaster","1")["me"].split(':')[0]
|
373
449
|
|
374
450
|
if percent:
|
@@ -380,15 +456,15 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
380
456
|
rs_status = {}
|
381
457
|
slaveDelays = {}
|
382
458
|
try:
|
383
|
-
set_read_preference(con.admin)
|
459
|
+
#set_read_preference(con.admin)
|
384
460
|
|
385
461
|
# Get replica set status
|
386
462
|
try:
|
387
463
|
rs_status = con.admin.command("replSetGetStatus")
|
388
|
-
except pymongo.errors.OperationFailure
|
389
|
-
if e.code == None and str(e).find('failed: not running with --replSet"'):
|
390
|
-
print
|
391
|
-
return
|
464
|
+
except pymongo.errors.OperationFailure as e:
|
465
|
+
if ((e.code == None and str(e).find('failed: not running with --replSet"')) or (e.code == 76 and str(e).find('not running with --replSet"'))):
|
466
|
+
print("UNKNOWN - Not running with replSet")
|
467
|
+
return 3
|
392
468
|
|
393
469
|
serverVersion = tuple(con.server_info()['version'].split('.'))
|
394
470
|
if serverVersion >= tuple("2.0.0".split(".")):
|
@@ -409,24 +485,24 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
409
485
|
for member in rs_status["members"]:
|
410
486
|
if member["stateStr"] == "PRIMARY":
|
411
487
|
primary_node = member
|
412
|
-
if member
|
488
|
+
if member.get('name') == "{0}:{1}".format(host, port):
|
413
489
|
host_node = member
|
414
490
|
|
415
491
|
# Check if we're in the middle of an election and don't have a primary
|
416
492
|
if primary_node is None:
|
417
|
-
print
|
493
|
+
print("WARNING - No primary defined. In an election?")
|
418
494
|
return 1
|
419
495
|
|
420
496
|
# Check if we failed to find the current host
|
421
497
|
# below should never happen
|
422
498
|
if host_node is None:
|
423
|
-
print
|
499
|
+
print("CRITICAL - Unable to find host '" + host + "' in replica set.")
|
424
500
|
return 2
|
425
501
|
|
426
502
|
# Is the specified host the primary?
|
427
503
|
if host_node["stateStr"] == "PRIMARY":
|
428
504
|
if max_lag == False:
|
429
|
-
print
|
505
|
+
print("OK - This is the primary.")
|
430
506
|
return 0
|
431
507
|
else:
|
432
508
|
#get the maximal replication lag
|
@@ -439,7 +515,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
439
515
|
data = data + member['name'] + " lag=%d;" % replicationLag
|
440
516
|
maximal_lag = max(maximal_lag, replicationLag)
|
441
517
|
if percent:
|
442
|
-
err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user
|
518
|
+
err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user, passwd)
|
443
519
|
if err != 0:
|
444
520
|
return err
|
445
521
|
primary_timediff = replication_get_time_diff(con)
|
@@ -451,8 +527,8 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
451
527
|
message += performance_data(perf_data, [(maximal_lag, "replication_lag", warning, critical)])
|
452
528
|
return check_levels(maximal_lag, warning, critical, message)
|
453
529
|
elif host_node["stateStr"] == "ARBITER":
|
454
|
-
print
|
455
|
-
return
|
530
|
+
print("UNKNOWN - This is an arbiter")
|
531
|
+
return 3
|
456
532
|
|
457
533
|
# Find the difference in optime between current node and PRIMARY
|
458
534
|
|
@@ -471,7 +547,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
471
547
|
lag = float(optime_lag.seconds + optime_lag.days * 24 * 3600)
|
472
548
|
|
473
549
|
if percent:
|
474
|
-
err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]),
|
550
|
+
err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), ssl, user, passwd, None, None, insecure, ssl_ca_cert_file, cert_file)
|
475
551
|
if err != 0:
|
476
552
|
return err
|
477
553
|
primary_timediff = replication_get_time_diff(con)
|
@@ -503,12 +579,12 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
503
579
|
|
504
580
|
# Check if we're in the middle of an election and don't have a primary
|
505
581
|
if primary_node is None:
|
506
|
-
print
|
582
|
+
print("WARNING - No primary defined. In an election?")
|
507
583
|
sys.exit(1)
|
508
584
|
|
509
585
|
# Is the specified host the primary?
|
510
586
|
if host_node["stateStr"] == "PRIMARY":
|
511
|
-
print
|
587
|
+
print("OK - This is the primary.")
|
512
588
|
sys.exit(0)
|
513
589
|
|
514
590
|
# Find the difference in optime between current node and PRIMARY
|
@@ -527,20 +603,42 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
|
|
527
603
|
message += performance_data(perf_data, [(lag, "replication_lag", warning, critical)])
|
528
604
|
return check_levels(lag, warning, critical, message)
|
529
605
|
|
530
|
-
except Exception
|
606
|
+
except Exception as e:
|
531
607
|
return exit_with_general_critical(e)
|
532
608
|
|
609
|
+
#
|
610
|
+
# Check the memory usage of mongo. Alerting on this may be hard to get right
|
611
|
+
# because it'll try to get as much memory as it can. And that's probably
|
612
|
+
# a good thing.
|
613
|
+
#
|
614
|
+
def check_memory(con, warning, critical, perf_data, mapped_memory, host):
|
615
|
+
# Get the total system memory of this system (This is totally bogus if you
|
616
|
+
# are running this command remotely) and calculate based on that how much
|
617
|
+
# memory used by Mongodb is ok or not.
|
618
|
+
meminfo = open('/proc/meminfo').read()
|
619
|
+
matched = re.search(r'^MemTotal:\s+(\d+)', meminfo)
|
620
|
+
if matched:
|
621
|
+
mem_total_kB = int(matched.groups()[0])
|
622
|
+
|
623
|
+
if host != "127.0.0.1" and not warning:
|
624
|
+
# Running remotely and value was not set by user, use hardcoded value
|
625
|
+
warning = 12
|
626
|
+
else:
|
627
|
+
# running locally or user provided value
|
628
|
+
warning = warning or (mem_total_kB * 0.8) / 1024.0 / 1024.0
|
629
|
+
|
630
|
+
if host != "127.0.0.1" and not critical:
|
631
|
+
critical = 16
|
632
|
+
else:
|
633
|
+
critical = critical or (mem_total_kB * 0.9) / 1024.0 / 1024.0
|
634
|
+
|
635
|
+
# debugging
|
636
|
+
#print "mem total: {0}kb, warn: {1}GB, crit: {2}GB".format(mem_total_kB,warning, critical)
|
533
637
|
|
534
|
-
def check_memory(con, warning, critical, perf_data, mapped_memory):
|
535
|
-
#
|
536
|
-
# These thresholds are basically meaningless, and must be customized to your system's ram
|
537
|
-
#
|
538
|
-
warning = warning or 8
|
539
|
-
critical = critical or 16
|
540
638
|
try:
|
541
639
|
data = get_server_status(con)
|
542
640
|
if not data['mem']['supported'] and not mapped_memory:
|
543
|
-
print
|
641
|
+
print("OK - Platform not supported for memory info")
|
544
642
|
return 0
|
545
643
|
#
|
546
644
|
# convert to gigs
|
@@ -577,7 +675,7 @@ def check_memory(con, warning, critical, perf_data, mapped_memory):
|
|
577
675
|
else:
|
578
676
|
return check_levels(mem_resident, warning, critical, message)
|
579
677
|
|
580
|
-
except Exception
|
678
|
+
except Exception as e:
|
581
679
|
return exit_with_general_critical(e)
|
582
680
|
|
583
681
|
|
@@ -590,7 +688,7 @@ def check_memory_mapped(con, warning, critical, perf_data):
|
|
590
688
|
try:
|
591
689
|
data = get_server_status(con)
|
592
690
|
if not data['mem']['supported']:
|
593
|
-
print
|
691
|
+
print("OK - Platform not supported for memory info")
|
594
692
|
return 0
|
595
693
|
#
|
596
694
|
# convert to gigs
|
@@ -607,33 +705,45 @@ def check_memory_mapped(con, warning, critical, perf_data):
|
|
607
705
|
message += " %.2fGB mappedWithJournal" % mem_mapped_journal
|
608
706
|
except:
|
609
707
|
mem_mapped_journal = 0
|
610
|
-
message += performance_data(perf_data, [("%.2f" % mem_mapped, "memory_mapped"), ("%.2f" % mem_mapped_journal, "mappedWithJournal")])
|
708
|
+
message += performance_data(perf_data, [("%.2f" % mem_mapped, "memory_mapped", warning, critical), ("%.2f" % mem_mapped_journal, "mappedWithJournal")])
|
611
709
|
|
612
710
|
if not mem_mapped == -1:
|
613
711
|
return check_levels(mem_mapped, warning, critical, message)
|
614
712
|
else:
|
615
|
-
print
|
713
|
+
print("OK - Server does not provide mem.mapped info")
|
616
714
|
return 0
|
617
715
|
|
618
|
-
except Exception
|
716
|
+
except Exception as e:
|
619
717
|
return exit_with_general_critical(e)
|
620
718
|
|
621
719
|
|
622
|
-
|
720
|
+
#
|
721
|
+
# Return the percentage of the time there was a global Lock
|
722
|
+
#
|
723
|
+
def check_lock(con, warning, critical, perf_data, mongo_version):
|
623
724
|
warning = warning or 10
|
624
725
|
critical = critical or 30
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
726
|
+
if mongo_version == 2:
|
727
|
+
try:
|
728
|
+
data = get_server_status(con)
|
729
|
+
lockTime = data['globalLock']['lockTime']
|
730
|
+
totalTime = data['globalLock']['totalTime']
|
731
|
+
#
|
732
|
+
# calculate percentage
|
733
|
+
#
|
734
|
+
if lockTime > totalTime:
|
735
|
+
lock_percentage = 0.00
|
736
|
+
else:
|
737
|
+
lock_percentage = float(lockTime) / float(totalTime) * 100
|
738
|
+
message = "Lock Percentage: %.2f%%" % lock_percentage
|
739
|
+
message += performance_data(perf_data, [("%.2f" % lock_percentage, "lock_percentage", warning, critical)])
|
740
|
+
return check_levels(lock_percentage, warning, critical, message)
|
741
|
+
except Exception as e:
|
742
|
+
print("Couldn't get globalLock lockTime info from mongo, are you sure you're not using version 3? See the -M option.")
|
743
|
+
return exit_with_general_critical(e)
|
744
|
+
else:
|
745
|
+
print("OK - MongoDB version 3 doesn't report on global locks")
|
746
|
+
return 0
|
637
747
|
|
638
748
|
|
639
749
|
def check_flushing(con, warning, critical, avg, perf_data):
|
@@ -645,19 +755,24 @@ def check_flushing(con, warning, critical, avg, perf_data):
|
|
645
755
|
critical = critical or 15000
|
646
756
|
try:
|
647
757
|
data = get_server_status(con)
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
758
|
+
try:
|
759
|
+
data['backgroundFlushing']
|
760
|
+
if avg:
|
761
|
+
flush_time = float(data['backgroundFlushing']['average_ms'])
|
762
|
+
stat_type = "Average"
|
763
|
+
else:
|
764
|
+
flush_time = float(data['backgroundFlushing']['last_ms'])
|
765
|
+
stat_type = "Last"
|
654
766
|
|
655
|
-
|
656
|
-
|
767
|
+
message = "%s Flush Time: %.2fms" % (stat_type, flush_time)
|
768
|
+
message += performance_data(perf_data, [("%.2fms" % flush_time, "%s_flush_time" % stat_type.lower(), warning, critical)])
|
657
769
|
|
658
|
-
|
770
|
+
return check_levels(flush_time, warning, critical, message)
|
771
|
+
except Exception:
|
772
|
+
print("OK - flushing stats not available for this storage engine")
|
773
|
+
return 0
|
659
774
|
|
660
|
-
except Exception
|
775
|
+
except Exception as e:
|
661
776
|
return exit_with_general_critical(e)
|
662
777
|
|
663
778
|
|
@@ -668,6 +783,7 @@ def index_miss_ratio(con, warning, critical, perf_data):
|
|
668
783
|
data = get_server_status(con)
|
669
784
|
|
670
785
|
try:
|
786
|
+
data['indexCounters']
|
671
787
|
serverVersion = tuple(con.server_info()['version'].split('.'))
|
672
788
|
if serverVersion >= tuple("2.4.0".split(".")):
|
673
789
|
miss_ratio = float(data['indexCounters']['missRatio'])
|
@@ -675,19 +791,24 @@ def index_miss_ratio(con, warning, critical, perf_data):
|
|
675
791
|
miss_ratio = float(data['indexCounters']['btree']['missRatio'])
|
676
792
|
except KeyError:
|
677
793
|
not_supported_msg = "not supported on this platform"
|
678
|
-
|
679
|
-
|
794
|
+
try:
|
795
|
+
data['indexCounters']
|
796
|
+
if 'note' in data['indexCounters']:
|
797
|
+
print("OK - MongoDB says: " + not_supported_msg)
|
798
|
+
return 0
|
799
|
+
else:
|
800
|
+
print("WARNING - Can't get counter from MongoDB")
|
801
|
+
return 1
|
802
|
+
except Exception:
|
803
|
+
print("OK - MongoDB says: " + not_supported_msg)
|
680
804
|
return 0
|
681
|
-
else:
|
682
|
-
print "WARNING - Can't get counter from MongoDB"
|
683
|
-
return 1
|
684
805
|
|
685
806
|
message = "Miss Ratio: %.2f" % miss_ratio
|
686
807
|
message += performance_data(perf_data, [("%.2f" % miss_ratio, "index_miss_ratio", warning, critical)])
|
687
808
|
|
688
809
|
return check_levels(miss_ratio, warning, critical, message)
|
689
810
|
|
690
|
-
except Exception
|
811
|
+
except Exception as e:
|
691
812
|
return exit_with_general_critical(e)
|
692
813
|
|
693
814
|
def check_replset_quorum(con, perf_data):
|
@@ -711,7 +832,7 @@ def check_replset_quorum(con, perf_data):
|
|
711
832
|
message = "Cluster is not quorate and cannot operate"
|
712
833
|
|
713
834
|
return check_levels(state, warning, critical, message)
|
714
|
-
except Exception
|
835
|
+
except Exception as e:
|
715
836
|
return exit_with_general_critical(e)
|
716
837
|
|
717
838
|
|
@@ -720,52 +841,69 @@ def check_replset_state(con, perf_data, warning="", critical=""):
|
|
720
841
|
try:
|
721
842
|
warning = [int(x) for x in warning.split(",")]
|
722
843
|
except:
|
723
|
-
warning = [0, 3, 5
|
844
|
+
warning = [0, 3, 5]
|
724
845
|
try:
|
725
846
|
critical = [int(x) for x in critical.split(",")]
|
726
847
|
except:
|
727
848
|
critical = [8, 4, -1]
|
728
849
|
|
729
|
-
ok = range(-1, 8) # should include the range of all posiible values
|
850
|
+
ok = list(range(-1, 8)) # should include the range of all posiible values
|
730
851
|
try:
|
852
|
+
worst_state = -2
|
853
|
+
message = ""
|
731
854
|
try:
|
732
855
|
try:
|
733
856
|
set_read_preference(con.admin)
|
734
857
|
data = con.admin.command(pymongo.son_manipulator.SON([('replSetGetStatus', 1)]))
|
735
858
|
except:
|
736
859
|
data = con.admin.command(son.SON([('replSetGetStatus', 1)]))
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
message
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
message = "State: %i (Primary)" % state
|
754
|
-
elif state == 2:
|
755
|
-
message = "State: %i (Secondary)" % state
|
756
|
-
elif state == 7:
|
757
|
-
message = "State: %i (Arbiter)" % state
|
758
|
-
elif state == 9:
|
759
|
-
message = "State: %i (Rollback)" % state
|
760
|
-
elif state == -1:
|
761
|
-
message = "Not running with replSet"
|
762
|
-
else:
|
763
|
-
message = "State: %i (Unknown state)" % state
|
764
|
-
message += performance_data(perf_data, [(state, "state")])
|
765
|
-
return check_levels(state, warning, critical, message, ok)
|
766
|
-
except Exception, e:
|
860
|
+
members = data['members']
|
861
|
+
my_state = int(data['myState'])
|
862
|
+
worst_state = my_state
|
863
|
+
for member in members:
|
864
|
+
their_state = int(member['state'])
|
865
|
+
message += " %s: %i (%s)" % (member['name'], their_state, state_text(their_state))
|
866
|
+
if state_is_worse(their_state, worst_state, warning, critical):
|
867
|
+
worst_state = their_state
|
868
|
+
message += performance_data(perf_data, [(my_state, "state")])
|
869
|
+
|
870
|
+
except pymongo.errors.OperationFailure as e:
|
871
|
+
if ((e.code == None and str(e).find('failed: not running with --replSet"')) or (e.code == 76 and str(e).find('not running with --replSet"'))):
|
872
|
+
worst_state = -1
|
873
|
+
|
874
|
+
return check_levels(worst_state, warning, critical, message, ok)
|
875
|
+
except Exception as e:
|
767
876
|
return exit_with_general_critical(e)
|
768
877
|
|
878
|
+
def state_is_worse(state, worst_state, warning, critical):
|
879
|
+
if worst_state in critical:
|
880
|
+
return False
|
881
|
+
if worst_state in warning:
|
882
|
+
return state in critical
|
883
|
+
return (state in warning) or (state in critical)
|
884
|
+
|
885
|
+
def state_text(state):
|
886
|
+
if state == 8:
|
887
|
+
return "Down"
|
888
|
+
elif state == 4:
|
889
|
+
return "Fatal error"
|
890
|
+
elif state == 0:
|
891
|
+
return "Starting up, phase1"
|
892
|
+
elif state == 3:
|
893
|
+
return "Recovering"
|
894
|
+
elif state == 5:
|
895
|
+
return "Starting up, phase2"
|
896
|
+
elif state == 1:
|
897
|
+
return "Primary"
|
898
|
+
elif state == 2:
|
899
|
+
return "Secondary"
|
900
|
+
elif state == 7:
|
901
|
+
return "Arbiter"
|
902
|
+
elif state == -1:
|
903
|
+
return "Not running with replSet"
|
904
|
+
else:
|
905
|
+
return "Unknown state"
|
906
|
+
|
769
907
|
|
770
908
|
def check_databases(con, warning, critical, perf_data=None):
|
771
909
|
try:
|
@@ -779,7 +917,7 @@ def check_databases(con, warning, critical, perf_data=None):
|
|
779
917
|
message = "Number of DBs: %.0f" % count
|
780
918
|
message += performance_data(perf_data, [(count, "databases", warning, critical, message)])
|
781
919
|
return check_levels(count, warning, critical, message)
|
782
|
-
except Exception
|
920
|
+
except Exception as e:
|
783
921
|
return exit_with_general_critical(e)
|
784
922
|
|
785
923
|
|
@@ -801,7 +939,7 @@ def check_collections(con, warning, critical, perf_data=None):
|
|
801
939
|
message += performance_data(perf_data, [(count, "collections", warning, critical, message)])
|
802
940
|
return check_levels(count, warning, critical, message)
|
803
941
|
|
804
|
-
except Exception
|
942
|
+
except Exception as e:
|
805
943
|
return exit_with_general_critical(e)
|
806
944
|
|
807
945
|
|
@@ -838,21 +976,21 @@ def check_database_size(con, database, warning, critical, perf_data):
|
|
838
976
|
try:
|
839
977
|
set_read_preference(con.admin)
|
840
978
|
data = con[database].command('dbstats')
|
841
|
-
storage_size = data['storageSize']
|
979
|
+
storage_size = data['storageSize'] // 1024 // 1024
|
842
980
|
if perf_data:
|
843
981
|
perfdata += " | database_size=%i;%i;%i" % (storage_size, warning, critical)
|
844
982
|
#perfdata += " database=%s" %(database)
|
845
983
|
|
846
984
|
if storage_size >= critical:
|
847
|
-
print
|
985
|
+
print("CRITICAL - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
|
848
986
|
return 2
|
849
987
|
elif storage_size >= warning:
|
850
|
-
print
|
988
|
+
print("WARNING - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
|
851
989
|
return 1
|
852
990
|
else:
|
853
|
-
print
|
991
|
+
print("OK - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
|
854
992
|
return 0
|
855
|
-
except Exception
|
993
|
+
except Exception as e:
|
856
994
|
return exit_with_general_critical(e)
|
857
995
|
|
858
996
|
|
@@ -866,20 +1004,42 @@ def check_database_indexes(con, database, warning, critical, perf_data):
|
|
866
1004
|
try:
|
867
1005
|
set_read_preference(con.admin)
|
868
1006
|
data = con[database].command('dbstats')
|
869
|
-
index_size = data['indexSize'] / 1024
|
1007
|
+
index_size = data['indexSize'] / 1024 // 1024
|
870
1008
|
if perf_data:
|
871
1009
|
perfdata += " | database_indexes=%i;%i;%i" % (index_size, warning, critical)
|
872
1010
|
|
873
1011
|
if index_size >= critical:
|
874
|
-
print
|
1012
|
+
print("CRITICAL - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
|
875
1013
|
return 2
|
876
1014
|
elif index_size >= warning:
|
877
|
-
print
|
1015
|
+
print("WARNING - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
|
1016
|
+
return 1
|
1017
|
+
else:
|
1018
|
+
print("OK - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
|
1019
|
+
return 0
|
1020
|
+
except Exception as e:
|
1021
|
+
return exit_with_general_critical(e)
|
1022
|
+
|
1023
|
+
|
1024
|
+
def check_collection_documents(con, database, collection, warning, critical, perf_data):
|
1025
|
+
perfdata = ""
|
1026
|
+
try:
|
1027
|
+
set_read_preference(con.admin)
|
1028
|
+
data = con[database].command('collstats', collection)
|
1029
|
+
documents = data['count']
|
1030
|
+
if perf_data:
|
1031
|
+
perfdata += " | collection_documents=%i;%i;%i" % (documents, warning, critical)
|
1032
|
+
|
1033
|
+
if documents >= critical:
|
1034
|
+
print("CRITICAL - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
|
1035
|
+
return 2
|
1036
|
+
elif documents >= warning:
|
1037
|
+
print("WARNING - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
|
878
1038
|
return 1
|
879
1039
|
else:
|
880
|
-
print
|
1040
|
+
print("OK - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
|
881
1041
|
return 0
|
882
|
-
except Exception
|
1042
|
+
except Exception as e:
|
883
1043
|
return exit_with_general_critical(e)
|
884
1044
|
|
885
1045
|
|
@@ -898,15 +1058,15 @@ def check_collection_indexes(con, database, collection, warning, critical, perf_
|
|
898
1058
|
perfdata += " | collection_indexes=%i;%i;%i" % (total_index_size, warning, critical)
|
899
1059
|
|
900
1060
|
if total_index_size >= critical:
|
901
|
-
print
|
1061
|
+
print("CRITICAL - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
|
902
1062
|
return 2
|
903
1063
|
elif total_index_size >= warning:
|
904
|
-
print
|
1064
|
+
print("WARNING - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
|
905
1065
|
return 1
|
906
1066
|
else:
|
907
|
-
print
|
1067
|
+
print("OK - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
|
908
1068
|
return 0
|
909
|
-
except Exception
|
1069
|
+
except Exception as e:
|
910
1070
|
return exit_with_general_critical(e)
|
911
1071
|
|
912
1072
|
|
@@ -923,7 +1083,7 @@ def check_queues(con, warning, critical, perf_data):
|
|
923
1083
|
message += performance_data(perf_data, [(total_queues, "total_queues", warning, critical), (readers_queues, "readers_queues"), (writers_queues, "writers_queues")])
|
924
1084
|
return check_levels(total_queues, warning, critical, message)
|
925
1085
|
|
926
|
-
except Exception
|
1086
|
+
except Exception as e:
|
927
1087
|
return exit_with_general_critical(e)
|
928
1088
|
|
929
1089
|
def check_collection_size(con, database, collection, warning, critical, perf_data):
|
@@ -938,18 +1098,43 @@ def check_collection_size(con, database, collection, warning, critical, perf_dat
|
|
938
1098
|
perfdata += " | collection_size=%i;%i;%i" % (size, warning, critical)
|
939
1099
|
|
940
1100
|
if size >= critical:
|
941
|
-
print
|
1101
|
+
print("CRITICAL - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
|
942
1102
|
return 2
|
943
1103
|
elif size >= warning:
|
944
|
-
print
|
1104
|
+
print("WARNING - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
|
1105
|
+
return 1
|
1106
|
+
else:
|
1107
|
+
print("OK - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
|
1108
|
+
return 0
|
1109
|
+
except Exception as e:
|
1110
|
+
return exit_with_general_critical(e)
|
1111
|
+
|
1112
|
+
|
1113
|
+
def check_collection_storageSize(con, database, collection, warning, critical, perf_data):
|
1114
|
+
warning = warning or 100
|
1115
|
+
critical = critical or 1000
|
1116
|
+
perfdata = ""
|
1117
|
+
try:
|
1118
|
+
set_read_preference(con.admin)
|
1119
|
+
data = con[database].command('collstats', collection)
|
1120
|
+
storageSize = data['storageSize'] / 1024 / 1024
|
1121
|
+
if perf_data:
|
1122
|
+
perfdata += " | collection_storageSize=%i;%i;%i" % (storageSize, warning, critical)
|
1123
|
+
|
1124
|
+
if storageSize >= critical:
|
1125
|
+
print("CRITICAL - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
|
1126
|
+
return 2
|
1127
|
+
elif storageSize >= warning:
|
1128
|
+
print("WARNING - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
|
945
1129
|
return 1
|
946
1130
|
else:
|
947
|
-
print
|
1131
|
+
print("OK - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
|
948
1132
|
return 0
|
949
|
-
except Exception
|
1133
|
+
except Exception as e:
|
950
1134
|
return exit_with_general_critical(e)
|
951
1135
|
|
952
|
-
|
1136
|
+
|
1137
|
+
def check_queries_per_second(con, query_type, warning, critical, perf_data, mongo_version):
|
953
1138
|
warning = warning or 250
|
954
1139
|
critical = critical or 500
|
955
1140
|
|
@@ -970,10 +1155,17 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data):
|
|
970
1155
|
diff_query = num - last_count['data'][query_type]['count']
|
971
1156
|
diff_ts = ts - last_count['data'][query_type]['ts']
|
972
1157
|
|
1158
|
+
if diff_ts == 0:
|
1159
|
+
message = "diff_query = " + str(diff_query) + " diff_ts = " + str(diff_ts)
|
1160
|
+
return check_levels(0, warning, critical, message)
|
1161
|
+
|
973
1162
|
query_per_sec = float(diff_query) / float(diff_ts)
|
974
1163
|
|
975
1164
|
# update the count now
|
976
|
-
|
1165
|
+
if mongo_version == 2:
|
1166
|
+
db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
|
1167
|
+
else:
|
1168
|
+
db.nagios_check.update_one({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
|
977
1169
|
|
978
1170
|
message = "Queries / Sec: %f" % query_per_sec
|
979
1171
|
message += performance_data(perf_data, [(query_per_sec, "%s_per_sec" % query_type, warning, critical, message)])
|
@@ -982,17 +1174,24 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data):
|
|
982
1174
|
# since it is the first run insert it
|
983
1175
|
query_per_sec = 0
|
984
1176
|
message = "First run of check.. no data"
|
985
|
-
|
1177
|
+
if mongo_version == 2:
|
1178
|
+
db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
|
1179
|
+
else:
|
1180
|
+
db.nagios_check.update_one({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
|
1181
|
+
|
986
1182
|
except TypeError:
|
987
1183
|
#
|
988
1184
|
# since it is the first run insert it
|
989
1185
|
query_per_sec = 0
|
990
1186
|
message = "First run of check.. no data"
|
991
|
-
|
1187
|
+
if mongo_version == 2:
|
1188
|
+
db.nagios_check.insert({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
|
1189
|
+
else:
|
1190
|
+
db.nagios_check.insert_one({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
|
992
1191
|
|
993
1192
|
return check_levels(query_per_sec, warning, critical, message)
|
994
1193
|
|
995
|
-
except Exception
|
1194
|
+
except Exception as e:
|
996
1195
|
return exit_with_general_critical(e)
|
997
1196
|
|
998
1197
|
|
@@ -1039,7 +1238,7 @@ def check_oplog(con, warning, critical, perf_data):
|
|
1039
1238
|
message += performance_data(perf_data, [("%.2f" % hours_in_oplog, 'oplog_time', warning, critical), ("%.2f " % approx_level, 'oplog_time_100_percent_used')])
|
1040
1239
|
return check_levels(-approx_level, -warning, -critical, message)
|
1041
1240
|
|
1042
|
-
except Exception
|
1241
|
+
except Exception as e:
|
1043
1242
|
return exit_with_general_critical(e)
|
1044
1243
|
|
1045
1244
|
|
@@ -1057,7 +1256,7 @@ Under very high write situations it is normal for this value to be nonzero. """
|
|
1057
1256
|
message += performance_data(perf_data, [(j_commits_in_wl, "j_commits_in_wl", warning, critical)])
|
1058
1257
|
return check_levels(j_commits_in_wl, warning, critical, message)
|
1059
1258
|
|
1060
|
-
except Exception
|
1259
|
+
except Exception as e:
|
1061
1260
|
return exit_with_general_critical(e)
|
1062
1261
|
|
1063
1262
|
|
@@ -1073,7 +1272,7 @@ def check_journaled(con, warning, critical, perf_data):
|
|
1073
1272
|
message += performance_data(perf_data, [("%.2f" % journaled, "journaled", warning, critical)])
|
1074
1273
|
return check_levels(journaled, warning, critical, message)
|
1075
1274
|
|
1076
|
-
except Exception
|
1275
|
+
except Exception as e:
|
1077
1276
|
return exit_with_general_critical(e)
|
1078
1277
|
|
1079
1278
|
|
@@ -1090,11 +1289,11 @@ than the amount physically written to disk."""
|
|
1090
1289
|
message += performance_data(perf_data, [("%.2f" % writes, "write_to_data_files", warning, critical)])
|
1091
1290
|
return check_levels(writes, warning, critical, message)
|
1092
1291
|
|
1093
|
-
except Exception
|
1292
|
+
except Exception as e:
|
1094
1293
|
return exit_with_general_critical(e)
|
1095
1294
|
|
1096
1295
|
|
1097
|
-
def get_opcounters(data, opcounters_name, host):
|
1296
|
+
def get_opcounters(data, opcounters_name, host, port):
|
1098
1297
|
try:
|
1099
1298
|
insert = data[opcounters_name]['insert']
|
1100
1299
|
query = data[opcounters_name]['query']
|
@@ -1102,21 +1301,21 @@ def get_opcounters(data, opcounters_name, host):
|
|
1102
1301
|
delete = data[opcounters_name]['delete']
|
1103
1302
|
getmore = data[opcounters_name]['getmore']
|
1104
1303
|
command = data[opcounters_name]['command']
|
1105
|
-
except KeyError
|
1304
|
+
except KeyError as e:
|
1106
1305
|
return 0, [0] * 100
|
1107
1306
|
total_commands = insert + query + update + delete + getmore + command
|
1108
1307
|
new_vals = [total_commands, insert, query, update, delete, getmore, command]
|
1109
|
-
return maintain_delta(new_vals, host, opcounters_name)
|
1308
|
+
return maintain_delta(new_vals, host, port, opcounters_name)
|
1110
1309
|
|
1111
1310
|
|
1112
|
-
def check_opcounters(con, host, warning, critical, perf_data):
|
1311
|
+
def check_opcounters(con, host, port, warning, critical, perf_data):
|
1113
1312
|
""" A function to get all opcounters delta per minute. In case of a replication - gets the opcounters+opcountersRepl"""
|
1114
1313
|
warning = warning or 10000
|
1115
1314
|
critical = critical or 15000
|
1116
1315
|
|
1117
1316
|
data = get_server_status(con)
|
1118
|
-
err1, delta_opcounters = get_opcounters(data, 'opcounters', host)
|
1119
|
-
err2, delta_opcounters_repl = get_opcounters(data, 'opcountersRepl', host)
|
1317
|
+
err1, delta_opcounters = get_opcounters(data, 'opcounters', host, port)
|
1318
|
+
err2, delta_opcounters_repl = get_opcounters(data, 'opcountersRepl', host, port)
|
1120
1319
|
if err1 == 0 and err2 == 0:
|
1121
1320
|
delta = [(x + y) for x, y in zip(delta_opcounters, delta_opcounters_repl)]
|
1122
1321
|
delta[0] = delta_opcounters[0] # only the time delta shouldn't be summarized
|
@@ -1124,14 +1323,14 @@ def check_opcounters(con, host, warning, critical, perf_data):
|
|
1124
1323
|
message = "Test succeeded , old values missing"
|
1125
1324
|
message = "Opcounters: total=%d,insert=%d,query=%d,update=%d,delete=%d,getmore=%d,command=%d" % tuple(per_minute_delta)
|
1126
1325
|
message += performance_data(perf_data, ([(per_minute_delta[0], "total", warning, critical), (per_minute_delta[1], "insert"),
|
1127
|
-
(per_minute_delta[2], "query"), (per_minute_delta[3], "update"), (per_minute_delta[
|
1326
|
+
(per_minute_delta[2], "query"), (per_minute_delta[3], "update"), (per_minute_delta[4], "delete"),
|
1128
1327
|
(per_minute_delta[5], "getmore"), (per_minute_delta[6], "command")]))
|
1129
1328
|
return check_levels(per_minute_delta[0], warning, critical, message)
|
1130
1329
|
else:
|
1131
1330
|
return exit_with_general_critical("problem reading data from temp file")
|
1132
1331
|
|
1133
1332
|
|
1134
|
-
def check_current_lock(con, host, warning, critical, perf_data):
|
1333
|
+
def check_current_lock(con, host, port, warning, critical, perf_data):
|
1135
1334
|
""" A function to get current lock percentage and not a global one, as check_lock function does"""
|
1136
1335
|
warning = warning or 10
|
1137
1336
|
critical = critical or 30
|
@@ -1140,7 +1339,7 @@ def check_current_lock(con, host, warning, critical, perf_data):
|
|
1140
1339
|
lockTime = float(data['globalLock']['lockTime'])
|
1141
1340
|
totalTime = float(data['globalLock']['totalTime'])
|
1142
1341
|
|
1143
|
-
err, delta = maintain_delta([totalTime, lockTime], host, "locktime")
|
1342
|
+
err, delta = maintain_delta([totalTime, lockTime], host, port, "locktime")
|
1144
1343
|
if err == 0:
|
1145
1344
|
lock_percentage = delta[2] / delta[1] * 100 # lockTime/totalTime*100
|
1146
1345
|
message = "Current Lock Percentage: %.2f%%" % lock_percentage
|
@@ -1150,7 +1349,7 @@ def check_current_lock(con, host, warning, critical, perf_data):
|
|
1150
1349
|
return exit_with_general_warning("problem reading data from temp file")
|
1151
1350
|
|
1152
1351
|
|
1153
|
-
def check_page_faults(con, host, warning, critical, perf_data):
|
1352
|
+
def check_page_faults(con, host, port, warning, critical, perf_data):
|
1154
1353
|
""" A function to get page_faults per second from the system"""
|
1155
1354
|
warning = warning or 10
|
1156
1355
|
critical = critical or 30
|
@@ -1162,7 +1361,7 @@ def check_page_faults(con, host, warning, critical, perf_data):
|
|
1162
1361
|
# page_faults unsupported on the underlaying system
|
1163
1362
|
return exit_with_general_critical("page_faults unsupported on the underlaying system")
|
1164
1363
|
|
1165
|
-
err, delta = maintain_delta([page_faults], host, "page_faults")
|
1364
|
+
err, delta = maintain_delta([page_faults], host, port, "page_faults")
|
1166
1365
|
if err == 0:
|
1167
1366
|
page_faults_ps = delta[1] / delta[0]
|
1168
1367
|
message = "Page faults : %.2f ps" % page_faults_ps
|
@@ -1172,7 +1371,7 @@ def check_page_faults(con, host, warning, critical, perf_data):
|
|
1172
1371
|
return exit_with_general_warning("problem reading data from temp file")
|
1173
1372
|
|
1174
1373
|
|
1175
|
-
def check_asserts(con, host, warning, critical, perf_data):
|
1374
|
+
def check_asserts(con, host, port, warning, critical, perf_data):
|
1176
1375
|
""" A function to get asserts from the system"""
|
1177
1376
|
warning = warning or 1
|
1178
1377
|
critical = critical or 10
|
@@ -1187,7 +1386,7 @@ def check_asserts(con, host, warning, critical, perf_data):
|
|
1187
1386
|
user = asserts['user']
|
1188
1387
|
rollovers = asserts['rollovers']
|
1189
1388
|
|
1190
|
-
err, delta = maintain_delta([regular, warning_asserts, msg, user, rollovers], host, "asserts")
|
1389
|
+
err, delta = maintain_delta([regular, warning_asserts, msg, user, rollovers], host, port, "asserts")
|
1191
1390
|
|
1192
1391
|
if err == 0:
|
1193
1392
|
if delta[5] != 0:
|
@@ -1221,7 +1420,7 @@ def get_stored_primary_server_name(db):
|
|
1221
1420
|
return stored_primary_server
|
1222
1421
|
|
1223
1422
|
|
1224
|
-
def check_replica_primary(con, host, warning, critical, perf_data, replicaset):
|
1423
|
+
def check_replica_primary(con, host, warning, critical, perf_data, replicaset, mongo_version):
|
1225
1424
|
""" A function to check if the primary server of a replica set has changed """
|
1226
1425
|
if warning is None and critical is None:
|
1227
1426
|
warning = 1
|
@@ -1244,7 +1443,10 @@ def check_replica_primary(con, host, warning, critical, perf_data, replicaset):
|
|
1244
1443
|
saved_primary = "None"
|
1245
1444
|
if current_primary != saved_primary:
|
1246
1445
|
last_primary_server_record = {"server": current_primary}
|
1247
|
-
|
1446
|
+
if mongo_version == 2:
|
1447
|
+
db.last_primary_server.update({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True)
|
1448
|
+
else:
|
1449
|
+
db.last_primary_server.update_one({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True)
|
1248
1450
|
message = "Primary server has changed from %s to %s" % (saved_primary, current_primary)
|
1249
1451
|
primary_status = 1
|
1250
1452
|
return check_levels(primary_status, warning, critical, message)
|
@@ -1266,9 +1468,9 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
|
|
1266
1468
|
|
1267
1469
|
try:
|
1268
1470
|
#on linux servers only
|
1269
|
-
page_faults = (int(data2['extra_info']['page_faults']) - int(data1['extra_info']['page_faults']))
|
1471
|
+
page_faults = (int(data2['extra_info']['page_faults']) - int(data1['extra_info']['page_faults'])) // sample_time
|
1270
1472
|
except KeyError:
|
1271
|
-
print
|
1473
|
+
print("WARNING - Can't get extra_info.page_faults counter from MongoDB")
|
1272
1474
|
sys.exit(1)
|
1273
1475
|
|
1274
1476
|
message = "Page Faults: %i" % (page_faults)
|
@@ -1276,7 +1478,7 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
|
|
1276
1478
|
message += performance_data(perf_data, [(page_faults, "page_faults", warning, critical)])
|
1277
1479
|
check_levels(page_faults, warning, critical, message)
|
1278
1480
|
|
1279
|
-
except Exception
|
1481
|
+
except Exception as e:
|
1280
1482
|
exit_with_general_critical(e)
|
1281
1483
|
|
1282
1484
|
|
@@ -1292,35 +1494,35 @@ def chunks_balance(con, database, collection, warning, critical):
|
|
1292
1494
|
shards = col.distinct("shard")
|
1293
1495
|
|
1294
1496
|
except:
|
1295
|
-
print
|
1497
|
+
print("WARNING - Can't get chunks infos from MongoDB")
|
1296
1498
|
sys.exit(1)
|
1297
1499
|
|
1298
1500
|
if nscount == 0:
|
1299
|
-
print
|
1501
|
+
print("WARNING - Namespace %s is not sharded" % (nsfilter))
|
1300
1502
|
sys.exit(1)
|
1301
1503
|
|
1302
|
-
avgchunksnb = nscount
|
1303
|
-
warningnb = avgchunksnb * warning
|
1304
|
-
criticalnb = avgchunksnb * critical
|
1504
|
+
avgchunksnb = nscount // len(shards)
|
1505
|
+
warningnb = avgchunksnb * warning // 100
|
1506
|
+
criticalnb = avgchunksnb * critical // 100
|
1305
1507
|
|
1306
1508
|
for shard in shards:
|
1307
1509
|
delta = abs(avgchunksnb - col.find({"ns": nsfilter, "shard": shard}).count())
|
1308
1510
|
message = "Namespace: %s, Shard name: %s, Chunk delta: %i" % (nsfilter, shard, delta)
|
1309
1511
|
|
1310
1512
|
if delta >= criticalnb and delta > 0:
|
1311
|
-
print
|
1513
|
+
print("CRITICAL - Chunks not well balanced " + message)
|
1312
1514
|
sys.exit(2)
|
1313
1515
|
elif delta >= warningnb and delta > 0:
|
1314
|
-
print
|
1516
|
+
print("WARNING - Chunks not well balanced " + message)
|
1315
1517
|
sys.exit(1)
|
1316
1518
|
|
1317
|
-
print
|
1519
|
+
print("OK - Chunks well balanced across shards")
|
1318
1520
|
sys.exit(0)
|
1319
1521
|
|
1320
|
-
except Exception
|
1522
|
+
except Exception as e:
|
1321
1523
|
exit_with_general_critical(e)
|
1322
1524
|
|
1323
|
-
print
|
1525
|
+
print("OK - Chunks well balanced across shards")
|
1324
1526
|
sys.exit(0)
|
1325
1527
|
|
1326
1528
|
|
@@ -1336,7 +1538,7 @@ def check_connect_primary(con, warning, critical, perf_data):
|
|
1336
1538
|
data = con.admin.command(son.SON([('isMaster', 1)]))
|
1337
1539
|
|
1338
1540
|
if data['ismaster'] == True:
|
1339
|
-
print
|
1541
|
+
print("OK - This server is primary")
|
1340
1542
|
return 0
|
1341
1543
|
|
1342
1544
|
phost = data['primary'].split(':')[0]
|
@@ -1354,17 +1556,17 @@ def check_connect_primary(con, warning, critical, perf_data):
|
|
1354
1556
|
|
1355
1557
|
return check_levels(pconn_time, warning, critical, message)
|
1356
1558
|
|
1357
|
-
except Exception
|
1559
|
+
except Exception as e:
|
1358
1560
|
return exit_with_general_critical(e)
|
1359
1561
|
|
1360
1562
|
|
1361
1563
|
def check_collection_state(con, database, collection):
|
1362
1564
|
try:
|
1363
1565
|
con[database][collection].find_one()
|
1364
|
-
print
|
1566
|
+
print("OK - Collection %s.%s is reachable " % (database, collection))
|
1365
1567
|
return 0
|
1366
1568
|
|
1367
|
-
except Exception
|
1569
|
+
except Exception as e:
|
1368
1570
|
return exit_with_general_critical(e)
|
1369
1571
|
|
1370
1572
|
|
@@ -1376,14 +1578,18 @@ def check_row_count(con, database, collection, warning, critical, perf_data):
|
|
1376
1578
|
|
1377
1579
|
return check_levels(count, warning, critical, message)
|
1378
1580
|
|
1379
|
-
except Exception
|
1581
|
+
except Exception as e:
|
1380
1582
|
return exit_with_general_critical(e)
|
1381
1583
|
|
1382
1584
|
|
1383
|
-
def build_file_name(host, action):
|
1585
|
+
def build_file_name(host, port, action):
|
1384
1586
|
#done this way so it will work when run independently and from shell
|
1385
1587
|
module_name = re.match('(.*//*)*(.*)\..*', __file__).group(2)
|
1386
|
-
|
1588
|
+
|
1589
|
+
if (port == 27017):
|
1590
|
+
return "/tmp/" + module_name + "_data/" + host + "-" + action + ".data"
|
1591
|
+
else:
|
1592
|
+
return "/tmp/" + module_name + "_data/" + host + "-" + str(port) + "-" + action + ".data"
|
1387
1593
|
|
1388
1594
|
|
1389
1595
|
def ensure_dir(f):
|
@@ -1396,7 +1602,7 @@ def write_values(file_name, string):
|
|
1396
1602
|
f = None
|
1397
1603
|
try:
|
1398
1604
|
f = open(file_name, 'w')
|
1399
|
-
except IOError
|
1605
|
+
except IOError as e:
|
1400
1606
|
#try creating
|
1401
1607
|
if (e.errno == 2):
|
1402
1608
|
ensure_dir(file_name)
|
@@ -1415,11 +1621,11 @@ def read_values(file_name):
|
|
1415
1621
|
data = f.read()
|
1416
1622
|
f.close()
|
1417
1623
|
return 0, data
|
1418
|
-
except IOError
|
1624
|
+
except IOError as e:
|
1419
1625
|
if (e.errno == 2):
|
1420
1626
|
#no previous data
|
1421
1627
|
return 1, ''
|
1422
|
-
except Exception
|
1628
|
+
except Exception as e:
|
1423
1629
|
return 2, None
|
1424
1630
|
|
1425
1631
|
|
@@ -1435,8 +1641,8 @@ def calc_delta(old, new):
|
|
1435
1641
|
return 0, delta
|
1436
1642
|
|
1437
1643
|
|
1438
|
-
def maintain_delta(new_vals, host, action):
|
1439
|
-
file_name = build_file_name(host, action)
|
1644
|
+
def maintain_delta(new_vals, host, port, action):
|
1645
|
+
file_name = build_file_name(host, port, action)
|
1440
1646
|
err, data = read_values(file_name)
|
1441
1647
|
old_vals = data.split(';')
|
1442
1648
|
new_vals = [str(int(time.time()))] + new_vals
|
@@ -1457,8 +1663,8 @@ def replication_get_time_diff(con):
|
|
1457
1663
|
col = 'oplog.$main'
|
1458
1664
|
firstc = local[col].find().sort("$natural", 1).limit(1)
|
1459
1665
|
lastc = local[col].find().sort("$natural", -1).limit(1)
|
1460
|
-
first =
|
1461
|
-
last =
|
1666
|
+
first = next(firstc)
|
1667
|
+
last = next(lastc)
|
1462
1668
|
tfirst = first["ts"]
|
1463
1669
|
tlast = last["ts"]
|
1464
1670
|
delta = tlast.time - tfirst.time
|