sensu-plugins-mongodb-boutetnico 1.0.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a03df4caef7dc049d55bbfc903b5115a70f715a6
4
- data.tar.gz: 432c4515549e0a71e04e11ea69b889874f1b87ec
3
+ metadata.gz: 20bd487d838baf2695377d4f9427abb2ab216b42
4
+ data.tar.gz: ba4b4e0e4d895b9f1df4c20dd598e1cf3df67ac9
5
5
  SHA512:
6
- metadata.gz: 8fd39f365379139289e0b8933ec87af4331920516e5ce86bf2306c5e11b3712196b68adc7f08882234d67c61120fa6e1d88489e8b8383e16e7af56bc9c81722a
7
- data.tar.gz: 1dad2a62055b2c11690878b72608de258aee2ba82547a2bb9ccd4dd95ff9b506a4494646d8c8c3eeb103b0df37418c200ca09ac7f3753ad5b8dc0dd8aa203ae8
6
+ metadata.gz: 6489d8a793494b7cccd8b141f6d1b0414764692149289a4bfb201af868fdf81106d77c0e0b48b8841524dd8d2d6d6b0147df3cd8c0f6dfcc3d9437951243f598
7
+ data.tar.gz: f1b84acd7d5a6afc1e0fac77d6612fb241a139b01b5448d61588a54a5cab8e69f8df34e5a0ac73dd50aaf6711bf532fb4cb210dc2515097e62a2bb0b507c7502
data/README.md CHANGED
@@ -12,6 +12,7 @@ This fork is automatically tested, built and published to [RubyGems](https://rub
12
12
  * bin/check-mongodb.py
13
13
  * bin/check-mongodb.rb - wrapper for check-mongodb.py
14
14
  * bin/check-mongodb-metric.rb
15
+ * bin/check-mongodb-query-count.rb
15
16
  * bin/metrics-mongodb.rb
16
17
  * bin/metrics-mongodb-replication.rb
17
18
 
@@ -0,0 +1,267 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-mongodb-query-count.rb
4
+ #
5
+ # DESCRIPTION:
6
+ # Check how many documents are returned by a MongoDB query.
7
+ #
8
+ # OUTPUT:
9
+ # Plain text
10
+ #
11
+ # PLATFORMS:
12
+ # Linux
13
+ #
14
+ # DEPENDENCIES:
15
+ # gem: sensu-plugin
16
+ # gem: mongo
17
+ # gem: bson
18
+ # gem: bson_ext
19
+ # gem: json
20
+ #
21
+ # USAGE:
22
+ # # Check MongoDB collection "logs" for critical events
23
+ # ./check-mongodb-query-count.rb --user sensu --pass sensu --database test --collection logs
24
+ # --query '{"level":"CRITICAL"}'
25
+ # --minutes-previous 5
26
+ # -w 0 -c 10 --include-results
27
+ #
28
+ # NOTES:
29
+ # Ruby is shit.
30
+ #
31
+ # LICENSE:
32
+ # Copyright 2019 github.com/boutetnico
33
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
34
+ # for details.
35
+ #
36
+
37
+ require 'sensu-plugin/check/cli'
38
+ require 'mongo'
39
+ require 'json'
40
+ include Mongo
41
+
42
+ #
43
+ # Mongodb
44
+ #
45
+
46
+ class MongoDBQueryCount < Sensu::Plugin::Check::CLI
47
+ option :host,
48
+ description: 'MongoDB host',
49
+ long: '--host HOST',
50
+ default: 'localhost'
51
+
52
+ option :port,
53
+ description: 'MongoDB port',
54
+ long: '--port PORT',
55
+ default: 27_017
56
+
57
+ option :user,
58
+ description: 'MongoDB user',
59
+ long: '--user USER',
60
+ default: nil
61
+
62
+ option :password,
63
+ description: 'MongoDB password',
64
+ long: '--password PASSWORD',
65
+ default: nil
66
+
67
+ option :ssl,
68
+ description: 'Connect using SSL',
69
+ long: '--ssl',
70
+ default: false
71
+
72
+ option :ssl_cert,
73
+ description: 'The certificate file used to identify the local connection against mongod',
74
+ long: '--ssl-cert SSL_CERT',
75
+ default: ''
76
+
77
+ option :ssl_key,
78
+ description: 'The private key used to identify the local connection against mongod',
79
+ long: '--ssl-key SSL_KEY',
80
+ default: ''
81
+
82
+ option :ssl_ca_cert,
83
+ description: 'The set of concatenated CA certificates, which are used to validate certificates passed from the other end of the connection',
84
+ long: '--ssl-ca-cert SSL_CA_CERT',
85
+ default: ''
86
+
87
+ option :ssl_verify,
88
+ description: 'Whether or not to do peer certification validation',
89
+ long: '--ssl-verify',
90
+ default: false
91
+
92
+ option :debug,
93
+ description: 'Enable debug',
94
+ long: '--debug',
95
+ default: false
96
+
97
+ option :database,
98
+ description: 'Database to perform query on',
99
+ short: '-d DATABASE',
100
+ long: '--database DATABASE',
101
+ required: true
102
+
103
+ option :collection,
104
+ description: 'Collection to perform query on',
105
+ short: '-C COLLECTION',
106
+ long: '--collection COLLECTION',
107
+ required: true
108
+
109
+ option :query,
110
+ description: 'Query to perform',
111
+ short: '-q QUERY',
112
+ long: '--query QUERY',
113
+ required: true
114
+
115
+ option :warn,
116
+ short: '-w N',
117
+ long: '--warn N',
118
+ description: 'Result count WARNING threshold',
119
+ proc: proc(&:to_i),
120
+ default: 0
121
+
122
+ option :crit,
123
+ short: '-c N',
124
+ long: '--crit N',
125
+ description: 'Result count CRITICAL threshold',
126
+ proc: proc(&:to_i),
127
+ default: 0
128
+
129
+ option :invert,
130
+ long: '--invert',
131
+ description: 'Invert thresholds',
132
+ boolean: true
133
+
134
+ option :date_field,
135
+ description: 'Field to use instead of "date" for query.',
136
+ long: '--date-field FIELD_NAME',
137
+ default: 'date'
138
+
139
+ option :minutes_previous,
140
+ description: 'Minutes before offset to check date field against query.',
141
+ long: '--minutes-previous MINUTES_PREVIOUS',
142
+ proc: proc(&:to_i),
143
+ default: 0
144
+
145
+ option :hours_previous,
146
+ description: 'Hours before offset to check date field against query.',
147
+ long: '--hours-previous HOURS_PREVIOUS',
148
+ proc: proc(&:to_i),
149
+ default: 0
150
+
151
+ option :days_previous,
152
+ description: 'Days before offset to check date field against query.',
153
+ long: '--days-previous DAYS_PREVIOUS',
154
+ proc: proc(&:to_i),
155
+ default: 0
156
+
157
+ option :weeks_previous,
158
+ description: 'Weeks before offset to check date field against query.',
159
+ long: '--weeks-previous WEEKS_PREVIOUS',
160
+ proc: proc(&:to_i),
161
+ default: 0
162
+
163
+ option :months_previous,
164
+ description: 'Months before offset to check date field against query.',
165
+ long: '--months-previous MONTHS_PREVIOUS',
166
+ proc: proc(&:to_i),
167
+ default: 0
168
+
169
+ option :include_results,
170
+ long: '--include-results',
171
+ description: 'Include results in response',
172
+ boolean: false
173
+
174
+ def connect_mongo_db
175
+ address_str = "#{config[:host]}:#{config[:port]}"
176
+ client_opts = {}
177
+ client_opts[:database] = config[:database]
178
+ unless config[:user].nil?
179
+ client_opts[:user] = config[:user]
180
+ client_opts[:password] = config[:password]
181
+ end
182
+ if config[:ssl]
183
+ client_opts[:ssl] = true
184
+ client_opts[:ssl_cert] = config[:ssl_cert]
185
+ client_opts[:ssl_key] = config[:ssl_key]
186
+ client_opts[:ssl_ca_cert] = config[:ssl_ca_cert]
187
+ client_opts[:ssl_verify] = config[:ssl_verify]
188
+ end
189
+ mongo_client = Mongo::Client.new([address_str], client_opts)
190
+ @db = mongo_client.database
191
+ end
192
+
193
+ def query_mongo
194
+ collection = @db[config[:collection]]
195
+ begin
196
+ query = JSON.parse(config[:query])
197
+ rescue JSON::ParserError
198
+ unknown 'Failed to parse query. Provide a valid JSON array.'
199
+ end
200
+
201
+ start_time = Time.now.utc.to_i
202
+ if config[:minutes_previous] != 0
203
+ start_time -= (config[:minutes_previous] * 60)
204
+ end
205
+ if config[:hours_previous] != 0
206
+ start_time -= (config[:hours_previous] * 60 * 60)
207
+ end
208
+ if config[:days_previous] != 0
209
+ start_time -= (config[:days_previous] * 60 * 60 * 24)
210
+ end
211
+ if config[:weeks_previous] != 0
212
+ start_time -= (config[:weeks_previous] * 60 * 60 * 24 * 7)
213
+ end
214
+ if config[:months_previous] != 0
215
+ start_time -= (config[:months_previous] * 60 * 60 * 24 * 31)
216
+ end
217
+
218
+ query[config[:date_field]] = { '$gte' => Time.at(start_time).to_datetime }
219
+
220
+ if config[:debug]
221
+ puts 'Query: ' + query.inspect
222
+ end
223
+
224
+ collection.find(query)
225
+ end
226
+
227
+ def print_results(results)
228
+ count = results.count
229
+
230
+ if config[:include_results]
231
+ results.each { |document| puts document.inspect }
232
+ end
233
+
234
+ if config[:invert]
235
+ if count < config[:crit]
236
+ critical "Query count (#{count}) was below critical threshold."
237
+ elsif count < config[:warn]
238
+ warning "Query count (#{count}) was below warning threshold."
239
+ else
240
+ ok "Query count (#{count}) was ok"
241
+ end
242
+ elsif count > config[:crit]
243
+ critical "Query count (#{count}) was above critical threshold."
244
+ elsif count > config[:warn]
245
+ warning "Query count (#{count}) was above warning threshold."
246
+ else
247
+ ok "Query count (#{count}) was ok"
248
+ end
249
+ end
250
+
251
+ def run
252
+ Mongo::Logger.logger.level = Logger::FATAL
253
+ @debug = config[:debug]
254
+ if @debug
255
+ Mongo::Logger.logger.level = Logger::DEBUG
256
+ config_debug = config.clone
257
+ config_debug[:password] = '***'
258
+ puts 'Arguments: ' + config_debug.inspect
259
+ end
260
+
261
+ connect_mongo_db
262
+
263
+ results = query_mongo
264
+
265
+ print_results(results)
266
+ end
267
+ end
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env python
1
+ #!/usr/bin/env python3
2
2
 
3
3
  #
4
4
  # A MongoDB Nagios check script
@@ -16,37 +16,29 @@
16
16
  # - @jbraeuer on github
17
17
  # - Dag Stockstad <dag.stockstad@gmail.com>
18
18
  # - @Andor on github
19
- # - Steven Richards - Captainkrtek on Github <sbrichards@mit.edu>
19
+ # - Steven Richards - Captainkrtek on github
20
+ # - Max Vernimmen - @mvernimmen-CG / @mvernimmen on github
21
+ # - Kris Nova - @kris@nivenly.com github.com/kris-nova
22
+ # - Jan Kantert - firstname@lastname.net
20
23
  #
21
-
22
- # License: BSD
23
- # Copyright (c) 2012, Mike Zupan <mike@zcentric.com>
24
- # All rights reserved.
25
- # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
24
+ # USAGE
26
25
  #
27
- # Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
28
- # Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
29
- # documentation and/or other materials provided with the distribution.
30
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
31
- # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
32
- # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
33
- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34
- # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ # See the README.md
35
27
  #
36
- # README: https://github.com/mzupan/nagios-plugin-mongodb/blob/master/LICENSE
37
28
 
38
- # #RED
29
+ from __future__ import print_function
30
+ from __future__ import division
39
31
  import sys
40
32
  import time
41
33
  import optparse
42
- import textwrap
43
34
  import re
44
35
  import os
36
+ import numbers
45
37
 
46
38
  try:
47
39
  import pymongo
48
- except ImportError, e:
49
- print e
40
+ except ImportError as e:
41
+ print(e)
50
42
  sys.exit(2)
51
43
 
52
44
  # As of pymongo v 1.9 the SON API is part of the BSON package, therefore attempt
@@ -90,37 +82,35 @@ def performance_data(perf_data, params):
90
82
 
91
83
 
92
84
  def numeric_type(param):
93
- if ((type(param) == float or type(param) == int or type(param) == long or param == None)):
94
- return True
95
- return False
85
+ return param is None or isinstance(param, numbers.Real)
96
86
 
97
87
 
98
88
  def check_levels(param, warning, critical, message, ok=[]):
99
89
  if (numeric_type(critical) and numeric_type(warning)):
100
90
  if param >= critical:
101
- print "CRITICAL - " + message
91
+ print("CRITICAL - " + message)
102
92
  sys.exit(2)
103
93
  elif param >= warning:
104
- print "WARNING - " + message
94
+ print("WARNING - " + message)
105
95
  sys.exit(1)
106
96
  else:
107
- print "OK - " + message
97
+ print("OK - " + message)
108
98
  sys.exit(0)
109
99
  else:
110
100
  if param in critical:
111
- print "CRITICAL - " + message
101
+ print("CRITICAL - " + message)
112
102
  sys.exit(2)
113
103
 
114
104
  if param in warning:
115
- print "WARNING - " + message
105
+ print("WARNING - " + message)
116
106
  sys.exit(1)
117
107
 
118
108
  if param in ok:
119
- print "OK - " + message
109
+ print("OK - " + message)
120
110
  sys.exit(0)
121
111
 
122
112
  # unexpected param value
123
- print "CRITICAL - Unexpected value : %d" % param + "; " + message
113
+ print("CRITICAL - Unexpected value : %d" % param + "; " + message)
124
114
  return 2
125
115
 
126
116
 
@@ -137,35 +127,45 @@ def main(argv):
137
127
  p = optparse.OptionParser(conflict_handler="resolve", description="This Nagios plugin checks the health of mongodb.")
138
128
 
139
129
  p.add_option('-H', '--host', action='store', type='string', dest='host', default='127.0.0.1', help='The hostname you want to connect to')
140
- p.add_option('-P', '--port', action='store', type='int', dest='port', default=27017, help='The port mongodb is runnung on')
130
+ p.add_option('-h', '--host-to-check', action='store', type='string', dest='host_to_check', default=None, help='The hostname you want to check (if this is different from the host you are connecting)')
131
+ p.add_option('-P', '--port', action='store', type='int', dest='port', default=27017, help='The port mongodb is running on')
132
+ p.add_option('--port-to-check', action='store', type='int', dest='port_to_check', default=None, help='The port you want to check (if this is different from the port you are connecting)')
141
133
  p.add_option('-u', '--user', action='store', type='string', dest='user', default=None, help='The username you want to login as')
142
134
  p.add_option('-p', '--pass', action='store', type='string', dest='passwd', default=None, help='The password you want to use for that user')
143
- p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold we want to set')
144
- p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold we want to set')
135
+ p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold you want to set')
136
+ p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold you want to set')
145
137
  p.add_option('-A', '--action', action='store', type='choice', dest='action', default='connect', help='The action you want to take',
146
138
  choices=['connect', 'connections', 'replication_lag', 'replication_lag_percent', 'replset_state', 'memory', 'memory_mapped', 'lock',
147
- 'flushing', 'last_flush_time', 'index_miss_ratio', 'databases', 'collections', 'database_size', 'database_indexes', 'collection_indexes', 'collection_size',
148
- 'queues', 'oplog', 'journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters', 'current_lock', 'replica_primary', 'page_faults',
149
- 'asserts', 'queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary', 'collection_state', 'row_count', 'replset_quorum'])
139
+ 'flushing', 'last_flush_time', 'index_miss_ratio', 'databases', 'collections', 'database_size', 'database_indexes', 'collection_documents', 'collection_indexes', 'collection_size',
140
+ 'collection_storageSize', 'queues', 'oplog', 'journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters', 'current_lock', 'replica_primary',
141
+ 'page_faults', 'asserts', 'queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary', 'collection_state', 'row_count', 'replset_quorum'])
150
142
  p.add_option('--max-lag', action='store_true', dest='max_lag', default=False, help='Get max replication lag (for replication_lag action only)')
151
143
  p.add_option('--mapped-memory', action='store_true', dest='mapped_memory', default=False, help='Get mapped memory instead of resident (if resident memory can not be read)')
152
144
  p.add_option('-D', '--perf-data', action='store_true', dest='perf_data', default=False, help='Enable output of Nagios performance data')
153
145
  p.add_option('-d', '--database', action='store', dest='database', default='admin', help='Specify the database to check')
154
146
  p.add_option('--all-databases', action='store_true', dest='all_databases', default=False, help='Check all databases (action database_size)')
155
- p.add_option('-s', '--ssl-enabled', dest='ssl_enabled', default=False, action='callback', callback=optional_arg(True), help='Connect using SSL')
156
- p.add_option('-e', '--ssl-certfile', dest='ssl_certfile', default=None, action='store', help='The certificate file used to identify the local connection against mongod')
157
- p.add_option('-k', '--ssl-keyfile', dest='ssl_keyfile', default=None, action='store', help='The private key used to identify the local connection against mongod')
158
- p.add_option('-a', '--ssl-ca-certs', dest='ssl_ca_certs', default=None, action='store', help='The set of concatenated CA certificates, which are used to validate certificates passed from the other end of the connection')
147
+ p.add_option('-s', '--ssl', dest='ssl', default=False, action='callback', callback=optional_arg(True), help='Connect using SSL')
159
148
  p.add_option('-r', '--replicaset', dest='replicaset', default=None, action='callback', callback=optional_arg(True), help='Connect to replicaset')
160
149
  p.add_option('-q', '--querytype', action='store', dest='query_type', default='query', help='The query type to check [query|insert|update|delete|getmore|command] from queries_per_second')
161
150
  p.add_option('-c', '--collection', action='store', dest='collection', default='admin', help='Specify the collection to check')
162
151
  p.add_option('-T', '--time', action='store', type='int', dest='sample_time', default=1, help='Time used to sample number of pages faults')
152
+ p.add_option('-M', '--mongoversion', action='store', type='choice', dest='mongo_version', default='2', help='The MongoDB version you are talking with, either 2 or 3',
153
+ choices=['2','3'])
154
+ p.add_option('-a', '--authdb', action='store', type='string', dest='authdb', default='admin', help='The database you want to authenticate against')
155
+ p.add_option('--ssl-ca-cert-file', action='store', type='string', dest='ssl_ca_cert_file', default=None, help='Path to Certificate Authority file for SSL')
156
+ p.add_option('-f', '--ssl-cert-file', action='store', type='string', dest='cert_file', default=None, help='Path to PEM encoded key and cert for client authentication')
157
+ p.add_option('-m','--auth-mechanism', action='store', type='choice', dest='auth_mechanism', default=None, help='Auth mechanism used for auth with mongodb',
158
+ choices=['MONGODB-X509','SCRAM-SHA-256','SCRAM-SHA-1'])
163
159
 
164
160
  options, arguments = p.parse_args()
165
161
  host = options.host
162
+ host_to_check = options.host_to_check if options.host_to_check else options.host
166
163
  port = options.port
164
+ port_to_check = options.port_to_check if options.port_to_check else options.port
167
165
  user = options.user
168
166
  passwd = options.passwd
167
+ authdb = options.authdb
168
+
169
169
  query_type = options.query_type
170
170
  collection = options.collection
171
171
  sample_time = options.sample_time
@@ -179,12 +179,13 @@ def main(argv):
179
179
  action = options.action
180
180
  perf_data = options.perf_data
181
181
  max_lag = options.max_lag
182
+ mongo_version = options.mongo_version
182
183
  database = options.database
183
- ssl_enabled = options.ssl_enabled
184
- ssl_certfile = options.ssl_certfile
185
- ssl_keyfile = options.ssl_keyfile
186
- ssl_ca_certs = options.ssl_ca_certs
184
+ ssl = options.ssl
187
185
  replicaset = options.replicaset
186
+ ssl_ca_cert_file = options.ssl_ca_cert_file
187
+ cert_file = options.cert_file
188
+ auth_mechanism = options.auth_mechanism
188
189
 
189
190
  if action == 'replica_primary' and replicaset is None:
190
191
  return "replicaset must be passed in when using replica_primary check"
@@ -195,31 +196,36 @@ def main(argv):
195
196
  # moving the login up here and passing in the connection
196
197
  #
197
198
  start = time.time()
198
- err, con = mongo_connect(host, port, ssl_enabled, ssl_certfile, ssl_keyfile, ssl_ca_certs, user, passwd, replicaset)
199
+ err, con = mongo_connect(host, port, ssl, user, passwd, replicaset, authdb, ssl_ca_cert_file, cert_file)
200
+
201
+ if err != 0:
202
+ return err
203
+
204
+ # Autodetect mongo-version and force pymongo to let us know if it can connect or not.
205
+ err, mongo_version = check_version(con)
199
206
  if err != 0:
200
207
  return err
201
208
 
202
209
  conn_time = time.time() - start
203
- conn_time = round(conn_time, 0)
204
210
 
205
211
  if action == "connections":
206
212
  return check_connections(con, warning, critical, perf_data)
207
213
  elif action == "replication_lag":
208
- return check_rep_lag(con, host, port, warning, critical, False, perf_data, max_lag, user, passwd)
214
+ return check_rep_lag(con, host_to_check, port_to_check, warning, critical, False, perf_data, max_lag, user, passwd)
209
215
  elif action == "replication_lag_percent":
210
- return check_rep_lag(con, host, port, warning, critical, True, perf_data, max_lag, user, passwd)
216
+ return check_rep_lag(con, host_to_check, port_to_check, warning, critical, True, perf_data, max_lag, user, passwd, ssl, ssl_ca_cert_file, cert_file)
211
217
  elif action == "replset_state":
212
218
  return check_replset_state(con, perf_data, warning, critical)
213
219
  elif action == "memory":
214
- return check_memory(con, warning, critical, perf_data, options.mapped_memory)
220
+ return check_memory(con, warning, critical, perf_data, options.mapped_memory, host)
215
221
  elif action == "memory_mapped":
216
222
  return check_memory_mapped(con, warning, critical, perf_data)
217
223
  elif action == "queues":
218
224
  return check_queues(con, warning, critical, perf_data)
219
225
  elif action == "lock":
220
- return check_lock(con, warning, critical, perf_data)
226
+ return check_lock(con, warning, critical, perf_data, mongo_version)
221
227
  elif action == "current_lock":
222
- return check_current_lock(con, host, warning, critical, perf_data)
228
+ return check_current_lock(con, host, port, warning, critical, perf_data)
223
229
  elif action == "flushing":
224
230
  return check_flushing(con, warning, critical, True, perf_data)
225
231
  elif action == "last_flush_time":
@@ -241,22 +247,26 @@ def main(argv):
241
247
  return check_database_size(con, database, warning, critical, perf_data)
242
248
  elif action == "database_indexes":
243
249
  return check_database_indexes(con, database, warning, critical, perf_data)
250
+ elif action == "collection_documents":
251
+ return check_collection_documents(con, database, collection, warning, critical, perf_data)
244
252
  elif action == "collection_indexes":
245
253
  return check_collection_indexes(con, database, collection, warning, critical, perf_data)
246
254
  elif action == "collection_size":
247
255
  return check_collection_size(con, database, collection, warning, critical, perf_data)
256
+ elif action == "collection_storageSize":
257
+ return check_collection_storageSize(con, database, collection, warning, critical, perf_data)
248
258
  elif action == "journaled":
249
259
  return check_journaled(con, warning, critical, perf_data)
250
260
  elif action == "write_data_files":
251
261
  return check_write_to_datafiles(con, warning, critical, perf_data)
252
262
  elif action == "opcounters":
253
- return check_opcounters(con, host, warning, critical, perf_data)
263
+ return check_opcounters(con, host, port, warning, critical, perf_data)
254
264
  elif action == "asserts":
255
- return check_asserts(con, host, warning, critical, perf_data)
265
+ return check_asserts(con, host, port, warning, critical, perf_data)
256
266
  elif action == "replica_primary":
257
- return check_replica_primary(con, host, warning, critical, perf_data, replicaset)
267
+ return check_replica_primary(con, host, warning, critical, perf_data, replicaset, mongo_version)
258
268
  elif action == "queries_per_second":
259
- return check_queries_per_second(con, query_type, warning, critical, perf_data)
269
+ return check_queries_per_second(con, query_type, warning, critical, perf_data, mongo_version)
260
270
  elif action == "page_faults":
261
271
  check_page_faults(con, sample_time, warning, critical, perf_data)
262
272
  elif action == "chunks_balance":
@@ -273,42 +283,65 @@ def main(argv):
273
283
  return check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time)
274
284
 
275
285
 
276
- def mongo_connect(host=None, port=None, ssl_enabled=False, ssl_certfile=None, ssl_keyfile=None, ssl_ca_certs=None, user=None, passwd=None, replica=None):
286
+ def mongo_connect(host=None, port=None, ssl=False, user=None, passwd=None, replica=None, authdb="admin", ssl_ca_cert_file=None, ssl_cert=None, auth_mechanism=None):
287
+ from pymongo.errors import ConnectionFailure
288
+ from pymongo.errors import PyMongoError
289
+
290
+ con_args = dict()
291
+
292
+ if ssl:
293
+ con_args['ssl'] = ssl
294
+ if ssl_ca_cert_file:
295
+ con_args['ssl_ca_certs'] = ssl_ca_cert_file
296
+ if ssl_cert:
297
+ con_args['ssl_certfile'] = ssl_cert
298
+
277
299
  try:
278
300
  # ssl connection for pymongo > 2.3
279
301
  if pymongo.version >= "2.3":
280
302
  if replica is None:
281
- if ssl_enabled:
282
- con = pymongo.MongoClient(host, port, ssl=ssl_enabled, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, ssl_ca_certs=ssl_ca_certs)
283
- else:
284
- con = pymongo.MongoClient(host, port)
303
+ con = pymongo.MongoClient(host, port, **con_args)
285
304
  else:
286
- if ssl_enabled:
287
- con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, ssl=ssl_enabled, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, ssl_ca_certs=ssl_ca_certs, replicaSet=replica, network_timeout=10)
288
- else:
289
- con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, replicaSet=replica, network_timeout=10)
290
- try:
291
- # https://api.mongodb.com/python/current/api/pymongo/mongo_client.html
292
- # The ismaster command is cheap and does not require auth.
293
- con.admin.command('ismaster', connectTimeoutMS=10000)
294
- except Exception, e:
295
- return exit_with_general_critical(e), None
305
+ con = pymongo.MongoClient(host, port, read_preference=pymongo.ReadPreference.SECONDARY, replicaSet=replica, **con_args)
296
306
  else:
297
307
  if replica is None:
298
308
  con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
299
309
  else:
300
310
  con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
301
- #con = pymongo.Connection(host, port, slave_okay=True, replicaSet=replica, network_timeout=10)
311
+
312
+ # we must authenticate the connection, otherwise we won't be able to perform certain operations
313
+ if ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'SCRAM-SHA-256':
314
+ con.the_database.authenticate(user, mechanism='SCRAM-SHA-256')
315
+ elif ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'SCRAM-SHA-1':
316
+ con.the_database.authenticate(user, mechanism='SCRAM-SHA-1')
317
+ elif ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'MONGODB-X509':
318
+ con.the_database.authenticate(user, mechanism='MONGODB-X509')
319
+
320
+ try:
321
+ result = con.admin.command("ismaster")
322
+ except ConnectionFailure:
323
+ print("CRITICAL - Connection to Mongo server on %s:%s has failed" % (host, port) )
324
+ sys.exit(2)
325
+
326
+ if 'arbiterOnly' in result and result['arbiterOnly'] == True:
327
+ print("OK - State: 7 (Arbiter on port %s)" % (port))
328
+ sys.exit(0)
302
329
 
303
330
  if user and passwd:
304
- db = con["admin"]
305
- if not db.authenticate(user, passwd):
331
+ db = con[authdb]
332
+ try:
333
+ db.authenticate(user, password=passwd)
334
+ except PyMongoError:
306
335
  sys.exit("Username/Password incorrect")
307
- except Exception, e:
336
+
337
+ # Ping to check that the server is responding.
338
+ con.admin.command("ping")
339
+
340
+ except Exception as e:
308
341
  if isinstance(e, pymongo.errors.AutoReconnect) and str(e).find(" is an arbiter") != -1:
309
342
  # We got a pymongo AutoReconnect exception that tells us we connected to an Arbiter Server
310
343
  # This means: Arbiter is reachable and can answer requests/votes - this is all we need to know from an arbiter
311
- print "OK - State: 7 (Arbiter)"
344
+ print("OK - State: 7 (Arbiter)")
312
345
  sys.exit(0)
313
346
  return exit_with_general_critical(e), None
314
347
  return 0, con
@@ -318,7 +351,7 @@ def exit_with_general_warning(e):
318
351
  if isinstance(e, SystemExit):
319
352
  return e
320
353
  else:
321
- print "WARNING - General MongoDB warning:", e
354
+ print("WARNING - General MongoDB warning:", e)
322
355
  return 1
323
356
 
324
357
 
@@ -326,21 +359,27 @@ def exit_with_general_critical(e):
326
359
  if isinstance(e, SystemExit):
327
360
  return e
328
361
  else:
329
- print "CRITICAL - General MongoDB Error:", e
362
+ print("CRITICAL - General MongoDB Error:", e)
330
363
  return 2
331
364
 
332
365
 
333
366
  def set_read_preference(db):
334
- if pymongo.version >= "2.2" and pymongo.version < "2.8":
367
+ if pymongo.version >= "2.2":
335
368
  pymongo.read_preferences.Secondary
336
369
  else:
337
370
  db.read_preference = pymongo.ReadPreference.SECONDARY
338
371
 
372
+ def check_version(con):
373
+ try:
374
+ server_info = con.server_info()
375
+ except Exception as e:
376
+ return exit_with_general_critical(e), None
377
+ return 0, int(server_info['version'].split('.')[0].strip())
339
378
 
340
379
  def check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time):
341
380
  warning = warning or 3
342
381
  critical = critical or 6
343
- message = "Connection took %i seconds" % conn_time
382
+ message = "Connection took %.3f seconds" % conn_time
344
383
  message += performance_data(perf_data, [(conn_time, "connection_time", warning, critical)])
345
384
 
346
385
  return check_levels(conn_time, warning, critical, message)
@@ -362,13 +401,17 @@ def check_connections(con, warning, critical, perf_data):
362
401
  (available, "available_connections")])
363
402
  return check_levels(used_percent, warning, critical, message)
364
403
 
365
- except Exception, e:
404
+ except Exception as e:
366
405
  return exit_with_general_critical(e)
367
406
 
368
407
 
369
- def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_lag, user, passwd):
408
+ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_lag, user, passwd, ssl=None, ssl_ca_cert_file=None, cert_file=None):
370
409
  # Get mongo to tell us replica set member name when connecting locally
371
410
  if "127.0.0.1" == host:
411
+ if not "me" in list(con.admin.command("ismaster","1").keys()):
412
+ print("UNKNOWN - This is not replicated MongoDB")
413
+ return 3
414
+
372
415
  host = con.admin.command("ismaster","1")["me"].split(':')[0]
373
416
 
374
417
  if percent:
@@ -380,15 +423,15 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
380
423
  rs_status = {}
381
424
  slaveDelays = {}
382
425
  try:
383
- set_read_preference(con.admin)
426
+ #set_read_preference(con.admin)
384
427
 
385
428
  # Get replica set status
386
429
  try:
387
430
  rs_status = con.admin.command("replSetGetStatus")
388
- except pymongo.errors.OperationFailure, e:
389
- if e.code == None and str(e).find('failed: not running with --replSet"'):
390
- print "OK - Not running with replSet"
391
- return 0
431
+ except pymongo.errors.OperationFailure as e:
432
+ if ((e.code == None and str(e).find('failed: not running with --replSet"')) or (e.code == 76 and str(e).find('not running with --replSet"'))):
433
+ print("UNKNOWN - Not running with replSet")
434
+ return 3
392
435
 
393
436
  serverVersion = tuple(con.server_info()['version'].split('.'))
394
437
  if serverVersion >= tuple("2.0.0".split(".")):
@@ -409,24 +452,24 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
409
452
  for member in rs_status["members"]:
410
453
  if member["stateStr"] == "PRIMARY":
411
454
  primary_node = member
412
- if member["name"].split(':')[0] == host and int(member["name"].split(':')[1]) == port:
455
+ if member.get('name') == "{0}:{1}".format(host, port):
413
456
  host_node = member
414
457
 
415
458
  # Check if we're in the middle of an election and don't have a primary
416
459
  if primary_node is None:
417
- print "WARNING - No primary defined. In an election?"
460
+ print("WARNING - No primary defined. In an election?")
418
461
  return 1
419
462
 
420
463
  # Check if we failed to find the current host
421
464
  # below should never happen
422
465
  if host_node is None:
423
- print "CRITICAL - Unable to find host '" + host + "' in replica set."
466
+ print("CRITICAL - Unable to find host '" + host + "' in replica set.")
424
467
  return 2
425
468
 
426
469
  # Is the specified host the primary?
427
470
  if host_node["stateStr"] == "PRIMARY":
428
471
  if max_lag == False:
429
- print "OK - This is the primary."
472
+ print("OK - This is the primary.")
430
473
  return 0
431
474
  else:
432
475
  #get the maximal replication lag
@@ -439,7 +482,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
439
482
  data = data + member['name'] + " lag=%d;" % replicationLag
440
483
  maximal_lag = max(maximal_lag, replicationLag)
441
484
  if percent:
442
- err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user=user, passwd=passwd)
485
+ err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user, passwd)
443
486
  if err != 0:
444
487
  return err
445
488
  primary_timediff = replication_get_time_diff(con)
@@ -451,8 +494,8 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
451
494
  message += performance_data(perf_data, [(maximal_lag, "replication_lag", warning, critical)])
452
495
  return check_levels(maximal_lag, warning, critical, message)
453
496
  elif host_node["stateStr"] == "ARBITER":
454
- print "OK - This is an arbiter"
455
- return 0
497
+ print("UNKNOWN - This is an arbiter")
498
+ return 3
456
499
 
457
500
  # Find the difference in optime between current node and PRIMARY
458
501
 
@@ -471,7 +514,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
471
514
  lag = float(optime_lag.seconds + optime_lag.days * 24 * 3600)
472
515
 
473
516
  if percent:
474
- err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user=user, passwd=passwd)
517
+ err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), ssl, user, passwd, None, None, ssl_ca_cert_file, cert_file)
475
518
  if err != 0:
476
519
  return err
477
520
  primary_timediff = replication_get_time_diff(con)
@@ -503,12 +546,12 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
503
546
 
504
547
  # Check if we're in the middle of an election and don't have a primary
505
548
  if primary_node is None:
506
- print "WARNING - No primary defined. In an election?"
549
+ print("WARNING - No primary defined. In an election?")
507
550
  sys.exit(1)
508
551
 
509
552
  # Is the specified host the primary?
510
553
  if host_node["stateStr"] == "PRIMARY":
511
- print "OK - This is the primary."
554
+ print("OK - This is the primary.")
512
555
  sys.exit(0)
513
556
 
514
557
  # Find the difference in optime between current node and PRIMARY
@@ -527,20 +570,42 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
527
570
  message += performance_data(perf_data, [(lag, "replication_lag", warning, critical)])
528
571
  return check_levels(lag, warning, critical, message)
529
572
 
530
- except Exception, e:
573
+ except Exception as e:
531
574
  return exit_with_general_critical(e)
532
575
 
576
+ #
577
+ # Check the memory usage of mongo. Alerting on this may be hard to get right
578
+ # because it'll try to get as much memory as it can. And that's probably
579
+ # a good thing.
580
+ #
581
+ def check_memory(con, warning, critical, perf_data, mapped_memory, host):
582
+ # Get the total system memory of this system (This is totally bogus if you
583
+ # are running this command remotely) and calculate based on that how much
584
+ # memory used by Mongodb is ok or not.
585
+ meminfo = open('/proc/meminfo').read()
586
+ matched = re.search(r'^MemTotal:\s+(\d+)', meminfo)
587
+ if matched:
588
+ mem_total_kB = int(matched.groups()[0])
589
+
590
+ if host != "127.0.0.1" and not warning:
591
+ # Running remotely and value was not set by user, use hardcoded value
592
+ warning = 12
593
+ else:
594
+ # running locally or user provided value
595
+ warning = warning or (mem_total_kB * 0.8) / 1024.0 / 1024.0
596
+
597
+ if host != "127.0.0.1" and not critical:
598
+ critical = 16
599
+ else:
600
+ critical = critical or (mem_total_kB * 0.9) / 1024.0 / 1024.0
601
+
602
+ # debugging
603
+ #print "mem total: {0}kb, warn: {1}GB, crit: {2}GB".format(mem_total_kB,warning, critical)
533
604
 
534
- def check_memory(con, warning, critical, perf_data, mapped_memory):
535
- #
536
- # These thresholds are basically meaningless, and must be customized to your system's ram
537
- #
538
- warning = warning or 8
539
- critical = critical or 16
540
605
  try:
541
606
  data = get_server_status(con)
542
607
  if not data['mem']['supported'] and not mapped_memory:
543
- print "OK - Platform not supported for memory info"
608
+ print("OK - Platform not supported for memory info")
544
609
  return 0
545
610
  #
546
611
  # convert to gigs
@@ -577,7 +642,7 @@ def check_memory(con, warning, critical, perf_data, mapped_memory):
577
642
  else:
578
643
  return check_levels(mem_resident, warning, critical, message)
579
644
 
580
- except Exception, e:
645
+ except Exception as e:
581
646
  return exit_with_general_critical(e)
582
647
 
583
648
 
@@ -590,7 +655,7 @@ def check_memory_mapped(con, warning, critical, perf_data):
590
655
  try:
591
656
  data = get_server_status(con)
592
657
  if not data['mem']['supported']:
593
- print "OK - Platform not supported for memory info"
658
+ print("OK - Platform not supported for memory info")
594
659
  return 0
595
660
  #
596
661
  # convert to gigs
@@ -607,33 +672,45 @@ def check_memory_mapped(con, warning, critical, perf_data):
607
672
  message += " %.2fGB mappedWithJournal" % mem_mapped_journal
608
673
  except:
609
674
  mem_mapped_journal = 0
610
- message += performance_data(perf_data, [("%.2f" % mem_mapped, "memory_mapped"), ("%.2f" % mem_mapped_journal, "mappedWithJournal")])
675
+ message += performance_data(perf_data, [("%.2f" % mem_mapped, "memory_mapped", warning, critical), ("%.2f" % mem_mapped_journal, "mappedWithJournal")])
611
676
 
612
677
  if not mem_mapped == -1:
613
678
  return check_levels(mem_mapped, warning, critical, message)
614
679
  else:
615
- print "OK - Server does not provide mem.mapped info"
680
+ print("OK - Server does not provide mem.mapped info")
616
681
  return 0
617
682
 
618
- except Exception, e:
683
+ except Exception as e:
619
684
  return exit_with_general_critical(e)
620
685
 
621
686
 
622
- def check_lock(con, warning, critical, perf_data):
687
+ #
688
+ # Return the percentage of the time there was a global Lock
689
+ #
690
+ def check_lock(con, warning, critical, perf_data, mongo_version):
623
691
  warning = warning or 10
624
692
  critical = critical or 30
625
- try:
626
- data = get_server_status(con)
627
- #
628
- # calculate percentage
629
- #
630
- lock_percentage = float(data['globalLock']['lockTime']) / float(data['globalLock']['totalTime']) * 100
631
- message = "Lock Percentage: %.2f%%" % lock_percentage
632
- message += performance_data(perf_data, [("%.2f" % lock_percentage, "lock_percentage", warning, critical)])
633
- return check_levels(lock_percentage, warning, critical, message)
634
-
635
- except Exception, e:
636
- return exit_with_general_critical(e)
693
+ if mongo_version == 2:
694
+ try:
695
+ data = get_server_status(con)
696
+ lockTime = data['globalLock']['lockTime']
697
+ totalTime = data['globalLock']['totalTime']
698
+ #
699
+ # calculate percentage
700
+ #
701
+ if lockTime > totalTime:
702
+ lock_percentage = 0.00
703
+ else:
704
+ lock_percentage = float(lockTime) / float(totalTime) * 100
705
+ message = "Lock Percentage: %.2f%%" % lock_percentage
706
+ message += performance_data(perf_data, [("%.2f" % lock_percentage, "lock_percentage", warning, critical)])
707
+ return check_levels(lock_percentage, warning, critical, message)
708
+ except Exception as e:
709
+ print("Couldn't get globalLock lockTime info from mongo, are you sure you're not using version 3? See the -M option.")
710
+ return exit_with_general_critical(e)
711
+ else:
712
+ print("OK - MongoDB version 3 doesn't report on global locks")
713
+ return 0
637
714
 
638
715
 
639
716
  def check_flushing(con, warning, critical, avg, perf_data):
@@ -645,19 +722,24 @@ def check_flushing(con, warning, critical, avg, perf_data):
645
722
  critical = critical or 15000
646
723
  try:
647
724
  data = get_server_status(con)
648
- if avg:
649
- flush_time = float(data['backgroundFlushing']['average_ms'])
650
- stat_type = "Average"
651
- else:
652
- flush_time = float(data['backgroundFlushing']['last_ms'])
653
- stat_type = "Last"
725
+ try:
726
+ data['backgroundFlushing']
727
+ if avg:
728
+ flush_time = float(data['backgroundFlushing']['average_ms'])
729
+ stat_type = "Average"
730
+ else:
731
+ flush_time = float(data['backgroundFlushing']['last_ms'])
732
+ stat_type = "Last"
654
733
 
655
- message = "%s Flush Time: %.2fms" % (stat_type, flush_time)
656
- message += performance_data(perf_data, [("%.2fms" % flush_time, "%s_flush_time" % stat_type.lower(), warning, critical)])
734
+ message = "%s Flush Time: %.2fms" % (stat_type, flush_time)
735
+ message += performance_data(perf_data, [("%.2fms" % flush_time, "%s_flush_time" % stat_type.lower(), warning, critical)])
657
736
 
658
- return check_levels(flush_time, warning, critical, message)
737
+ return check_levels(flush_time, warning, critical, message)
738
+ except Exception:
739
+ print("OK - flushing stats not available for this storage engine")
740
+ return 0
659
741
 
660
- except Exception, e:
742
+ except Exception as e:
661
743
  return exit_with_general_critical(e)
662
744
 
663
745
 
@@ -668,6 +750,7 @@ def index_miss_ratio(con, warning, critical, perf_data):
668
750
  data = get_server_status(con)
669
751
 
670
752
  try:
753
+ data['indexCounters']
671
754
  serverVersion = tuple(con.server_info()['version'].split('.'))
672
755
  if serverVersion >= tuple("2.4.0".split(".")):
673
756
  miss_ratio = float(data['indexCounters']['missRatio'])
@@ -675,19 +758,24 @@ def index_miss_ratio(con, warning, critical, perf_data):
675
758
  miss_ratio = float(data['indexCounters']['btree']['missRatio'])
676
759
  except KeyError:
677
760
  not_supported_msg = "not supported on this platform"
678
- if data['indexCounters'].has_key('note'):
679
- print "OK - MongoDB says: " + not_supported_msg
761
+ try:
762
+ data['indexCounters']
763
+ if 'note' in data['indexCounters']:
764
+ print("OK - MongoDB says: " + not_supported_msg)
765
+ return 0
766
+ else:
767
+ print("WARNING - Can't get counter from MongoDB")
768
+ return 1
769
+ except Exception:
770
+ print("OK - MongoDB says: " + not_supported_msg)
680
771
  return 0
681
- else:
682
- print "WARNING - Can't get counter from MongoDB"
683
- return 1
684
772
 
685
773
  message = "Miss Ratio: %.2f" % miss_ratio
686
774
  message += performance_data(perf_data, [("%.2f" % miss_ratio, "index_miss_ratio", warning, critical)])
687
775
 
688
776
  return check_levels(miss_ratio, warning, critical, message)
689
777
 
690
- except Exception, e:
778
+ except Exception as e:
691
779
  return exit_with_general_critical(e)
692
780
 
693
781
  def check_replset_quorum(con, perf_data):
@@ -711,7 +799,7 @@ def check_replset_quorum(con, perf_data):
711
799
  message = "Cluster is not quorate and cannot operate"
712
800
 
713
801
  return check_levels(state, warning, critical, message)
714
- except Exception, e:
802
+ except Exception as e:
715
803
  return exit_with_general_critical(e)
716
804
 
717
805
 
@@ -720,52 +808,69 @@ def check_replset_state(con, perf_data, warning="", critical=""):
720
808
  try:
721
809
  warning = [int(x) for x in warning.split(",")]
722
810
  except:
723
- warning = [0, 3, 5, 9]
811
+ warning = [0, 3, 5]
724
812
  try:
725
813
  critical = [int(x) for x in critical.split(",")]
726
814
  except:
727
815
  critical = [8, 4, -1]
728
816
 
729
- ok = range(-1, 8) # should include the range of all posiible values
817
+ ok = list(range(-1, 8)) # should include the range of all posiible values
730
818
  try:
819
+ worst_state = -2
820
+ message = ""
731
821
  try:
732
822
  try:
733
823
  set_read_preference(con.admin)
734
824
  data = con.admin.command(pymongo.son_manipulator.SON([('replSetGetStatus', 1)]))
735
825
  except:
736
826
  data = con.admin.command(son.SON([('replSetGetStatus', 1)]))
737
- state = int(data['myState'])
738
- except pymongo.errors.OperationFailure, e:
739
- if e.code == None and str(e).find('failed: not running with --replSet"'):
740
- state = -1
741
-
742
- if state == 8:
743
- message = "State: %i (Down)" % state
744
- elif state == 4:
745
- message = "State: %i (Fatal error)" % state
746
- elif state == 0:
747
- message = "State: %i (Starting up, phase1)" % state
748
- elif state == 3:
749
- message = "State: %i (Recovering)" % state
750
- elif state == 5:
751
- message = "State: %i (Starting up, phase2)" % state
752
- elif state == 1:
753
- message = "State: %i (Primary)" % state
754
- elif state == 2:
755
- message = "State: %i (Secondary)" % state
756
- elif state == 7:
757
- message = "State: %i (Arbiter)" % state
758
- elif state == 9:
759
- message = "State: %i (Rollback)" % state
760
- elif state == -1:
761
- message = "Not running with replSet"
762
- else:
763
- message = "State: %i (Unknown state)" % state
764
- message += performance_data(perf_data, [(state, "state")])
765
- return check_levels(state, warning, critical, message, ok)
766
- except Exception, e:
827
+ members = data['members']
828
+ my_state = int(data['myState'])
829
+ worst_state = my_state
830
+ for member in members:
831
+ their_state = int(member['state'])
832
+ message += " %s: %i (%s)" % (member['name'], their_state, state_text(their_state))
833
+ if state_is_worse(their_state, worst_state, warning, critical):
834
+ worst_state = their_state
835
+ message += performance_data(perf_data, [(my_state, "state")])
836
+
837
+ except pymongo.errors.OperationFailure as e:
838
+ if ((e.code == None and str(e).find('failed: not running with --replSet"')) or (e.code == 76 and str(e).find('not running with --replSet"'))):
839
+ worst_state = -1
840
+
841
+ return check_levels(worst_state, warning, critical, message, ok)
842
+ except Exception as e:
767
843
  return exit_with_general_critical(e)
768
844
 
845
+ def state_is_worse(state, worst_state, warning, critical):
846
+ if worst_state in critical:
847
+ return False
848
+ if worst_state in warning:
849
+ return state in critical
850
+ return (state in warning) or (state in critical)
851
+
852
+ def state_text(state):
853
+ if state == 8:
854
+ return "Down"
855
+ elif state == 4:
856
+ return "Fatal error"
857
+ elif state == 0:
858
+ return "Starting up, phase1"
859
+ elif state == 3:
860
+ return "Recovering"
861
+ elif state == 5:
862
+ return "Starting up, phase2"
863
+ elif state == 1:
864
+ return "Primary"
865
+ elif state == 2:
866
+ return "Secondary"
867
+ elif state == 7:
868
+ return "Arbiter"
869
+ elif state == -1:
870
+ return "Not running with replSet"
871
+ else:
872
+ return "Unknown state"
873
+
769
874
 
770
875
  def check_databases(con, warning, critical, perf_data=None):
771
876
  try:
@@ -779,7 +884,7 @@ def check_databases(con, warning, critical, perf_data=None):
779
884
  message = "Number of DBs: %.0f" % count
780
885
  message += performance_data(perf_data, [(count, "databases", warning, critical, message)])
781
886
  return check_levels(count, warning, critical, message)
782
- except Exception, e:
887
+ except Exception as e:
783
888
  return exit_with_general_critical(e)
784
889
 
785
890
 
@@ -801,7 +906,7 @@ def check_collections(con, warning, critical, perf_data=None):
801
906
  message += performance_data(perf_data, [(count, "collections", warning, critical, message)])
802
907
  return check_levels(count, warning, critical, message)
803
908
 
804
- except Exception, e:
909
+ except Exception as e:
805
910
  return exit_with_general_critical(e)
806
911
 
807
912
 
@@ -838,21 +943,21 @@ def check_database_size(con, database, warning, critical, perf_data):
838
943
  try:
839
944
  set_read_preference(con.admin)
840
945
  data = con[database].command('dbstats')
841
- storage_size = data['storageSize'] / 1024 / 1024
946
+ storage_size = data['storageSize'] // 1024 // 1024
842
947
  if perf_data:
843
948
  perfdata += " | database_size=%i;%i;%i" % (storage_size, warning, critical)
844
949
  #perfdata += " database=%s" %(database)
845
950
 
846
951
  if storage_size >= critical:
847
- print "CRITICAL - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata)
952
+ print("CRITICAL - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
848
953
  return 2
849
954
  elif storage_size >= warning:
850
- print "WARNING - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata)
955
+ print("WARNING - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
851
956
  return 1
852
957
  else:
853
- print "OK - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata)
958
+ print("OK - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
854
959
  return 0
855
- except Exception, e:
960
+ except Exception as e:
856
961
  return exit_with_general_critical(e)
857
962
 
858
963
 
@@ -866,20 +971,42 @@ def check_database_indexes(con, database, warning, critical, perf_data):
866
971
  try:
867
972
  set_read_preference(con.admin)
868
973
  data = con[database].command('dbstats')
869
- index_size = data['indexSize'] / 1024 / 1024
974
+ index_size = data['indexSize'] / 1024 // 1024
870
975
  if perf_data:
871
976
  perfdata += " | database_indexes=%i;%i;%i" % (index_size, warning, critical)
872
977
 
873
978
  if index_size >= critical:
874
- print "CRITICAL - %s indexSize: %.0f MB %s" % (database, index_size, perfdata)
979
+ print("CRITICAL - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
875
980
  return 2
876
981
  elif index_size >= warning:
877
- print "WARNING - %s indexSize: %.0f MB %s" % (database, index_size, perfdata)
982
+ print("WARNING - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
983
+ return 1
984
+ else:
985
+ print("OK - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
986
+ return 0
987
+ except Exception as e:
988
+ return exit_with_general_critical(e)
989
+
990
+
991
+ def check_collection_documents(con, database, collection, warning, critical, perf_data):
992
+ perfdata = ""
993
+ try:
994
+ set_read_preference(con.admin)
995
+ data = con[database].command('collstats', collection)
996
+ documents = data['count']
997
+ if perf_data:
998
+ perfdata += " | collection_documents=%i;%i;%i" % (documents, warning, critical)
999
+
1000
+ if documents >= critical:
1001
+ print("CRITICAL - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
1002
+ return 2
1003
+ elif documents >= warning:
1004
+ print("WARNING - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
878
1005
  return 1
879
1006
  else:
880
- print "OK - %s indexSize: %.0f MB %s" % (database, index_size, perfdata)
1007
+ print("OK - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
881
1008
  return 0
882
- except Exception, e:
1009
+ except Exception as e:
883
1010
  return exit_with_general_critical(e)
884
1011
 
885
1012
 
@@ -898,15 +1025,15 @@ def check_collection_indexes(con, database, collection, warning, critical, perf_
898
1025
  perfdata += " | collection_indexes=%i;%i;%i" % (total_index_size, warning, critical)
899
1026
 
900
1027
  if total_index_size >= critical:
901
- print "CRITICAL - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata)
1028
+ print("CRITICAL - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
902
1029
  return 2
903
1030
  elif total_index_size >= warning:
904
- print "WARNING - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata)
1031
+ print("WARNING - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
905
1032
  return 1
906
1033
  else:
907
- print "OK - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata)
1034
+ print("OK - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
908
1035
  return 0
909
- except Exception, e:
1036
+ except Exception as e:
910
1037
  return exit_with_general_critical(e)
911
1038
 
912
1039
 
@@ -923,7 +1050,7 @@ def check_queues(con, warning, critical, perf_data):
923
1050
  message += performance_data(perf_data, [(total_queues, "total_queues", warning, critical), (readers_queues, "readers_queues"), (writers_queues, "writers_queues")])
924
1051
  return check_levels(total_queues, warning, critical, message)
925
1052
 
926
- except Exception, e:
1053
+ except Exception as e:
927
1054
  return exit_with_general_critical(e)
928
1055
 
929
1056
  def check_collection_size(con, database, collection, warning, critical, perf_data):
@@ -938,18 +1065,43 @@ def check_collection_size(con, database, collection, warning, critical, perf_dat
938
1065
  perfdata += " | collection_size=%i;%i;%i" % (size, warning, critical)
939
1066
 
940
1067
  if size >= critical:
941
- print "CRITICAL - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata)
1068
+ print("CRITICAL - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
942
1069
  return 2
943
1070
  elif size >= warning:
944
- print "WARNING - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata)
1071
+ print("WARNING - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
945
1072
  return 1
946
1073
  else:
947
- print "OK - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata)
1074
+ print("OK - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
948
1075
  return 0
949
- except Exception, e:
1076
+ except Exception as e:
950
1077
  return exit_with_general_critical(e)
951
1078
 
952
- def check_queries_per_second(con, query_type, warning, critical, perf_data):
1079
+
1080
+ def check_collection_storageSize(con, database, collection, warning, critical, perf_data):
1081
+ warning = warning or 100
1082
+ critical = critical or 1000
1083
+ perfdata = ""
1084
+ try:
1085
+ set_read_preference(con.admin)
1086
+ data = con[database].command('collstats', collection)
1087
+ storageSize = data['storageSize'] / 1024 / 1024
1088
+ if perf_data:
1089
+ perfdata += " | collection_storageSize=%i;%i;%i" % (storageSize, warning, critical)
1090
+
1091
+ if storageSize >= critical:
1092
+ print("CRITICAL - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
1093
+ return 2
1094
+ elif storageSize >= warning:
1095
+ print("WARNING - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
1096
+ return 1
1097
+ else:
1098
+ print("OK - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
1099
+ return 0
1100
+ except Exception as e:
1101
+ return exit_with_general_critical(e)
1102
+
1103
+
1104
+ def check_queries_per_second(con, query_type, warning, critical, perf_data, mongo_version):
953
1105
  warning = warning or 250
954
1106
  critical = critical or 500
955
1107
 
@@ -970,10 +1122,17 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data):
970
1122
  diff_query = num - last_count['data'][query_type]['count']
971
1123
  diff_ts = ts - last_count['data'][query_type]['ts']
972
1124
 
1125
+ if diff_ts == 0:
1126
+ message = "diff_query = " + str(diff_query) + " diff_ts = " + str(diff_ts)
1127
+ return check_levels(0, warning, critical, message)
1128
+
973
1129
  query_per_sec = float(diff_query) / float(diff_ts)
974
1130
 
975
1131
  # update the count now
976
- db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1132
+ if mongo_version == 2:
1133
+ db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1134
+ else:
1135
+ db.nagios_check.update_one({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
977
1136
 
978
1137
  message = "Queries / Sec: %f" % query_per_sec
979
1138
  message += performance_data(perf_data, [(query_per_sec, "%s_per_sec" % query_type, warning, critical, message)])
@@ -982,17 +1141,24 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data):
982
1141
  # since it is the first run insert it
983
1142
  query_per_sec = 0
984
1143
  message = "First run of check.. no data"
985
- db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1144
+ if mongo_version == 2:
1145
+ db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1146
+ else:
1147
+ db.nagios_check.update_one({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1148
+
986
1149
  except TypeError:
987
1150
  #
988
1151
  # since it is the first run insert it
989
1152
  query_per_sec = 0
990
1153
  message = "First run of check.. no data"
991
- db.nagios_check.insert({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
1154
+ if mongo_version == 2:
1155
+ db.nagios_check.insert({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
1156
+ else:
1157
+ db.nagios_check.insert_one({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
992
1158
 
993
1159
  return check_levels(query_per_sec, warning, critical, message)
994
1160
 
995
- except Exception, e:
1161
+ except Exception as e:
996
1162
  return exit_with_general_critical(e)
997
1163
 
998
1164
 
@@ -1039,7 +1205,7 @@ def check_oplog(con, warning, critical, perf_data):
1039
1205
  message += performance_data(perf_data, [("%.2f" % hours_in_oplog, 'oplog_time', warning, critical), ("%.2f " % approx_level, 'oplog_time_100_percent_used')])
1040
1206
  return check_levels(-approx_level, -warning, -critical, message)
1041
1207
 
1042
- except Exception, e:
1208
+ except Exception as e:
1043
1209
  return exit_with_general_critical(e)
1044
1210
 
1045
1211
 
@@ -1057,7 +1223,7 @@ Under very high write situations it is normal for this value to be nonzero. """
1057
1223
  message += performance_data(perf_data, [(j_commits_in_wl, "j_commits_in_wl", warning, critical)])
1058
1224
  return check_levels(j_commits_in_wl, warning, critical, message)
1059
1225
 
1060
- except Exception, e:
1226
+ except Exception as e:
1061
1227
  return exit_with_general_critical(e)
1062
1228
 
1063
1229
 
@@ -1073,7 +1239,7 @@ def check_journaled(con, warning, critical, perf_data):
1073
1239
  message += performance_data(perf_data, [("%.2f" % journaled, "journaled", warning, critical)])
1074
1240
  return check_levels(journaled, warning, critical, message)
1075
1241
 
1076
- except Exception, e:
1242
+ except Exception as e:
1077
1243
  return exit_with_general_critical(e)
1078
1244
 
1079
1245
 
@@ -1090,11 +1256,11 @@ than the amount physically written to disk."""
1090
1256
  message += performance_data(perf_data, [("%.2f" % writes, "write_to_data_files", warning, critical)])
1091
1257
  return check_levels(writes, warning, critical, message)
1092
1258
 
1093
- except Exception, e:
1259
+ except Exception as e:
1094
1260
  return exit_with_general_critical(e)
1095
1261
 
1096
1262
 
1097
- def get_opcounters(data, opcounters_name, host):
1263
+ def get_opcounters(data, opcounters_name, host, port):
1098
1264
  try:
1099
1265
  insert = data[opcounters_name]['insert']
1100
1266
  query = data[opcounters_name]['query']
@@ -1102,21 +1268,21 @@ def get_opcounters(data, opcounters_name, host):
1102
1268
  delete = data[opcounters_name]['delete']
1103
1269
  getmore = data[opcounters_name]['getmore']
1104
1270
  command = data[opcounters_name]['command']
1105
- except KeyError, e:
1271
+ except KeyError as e:
1106
1272
  return 0, [0] * 100
1107
1273
  total_commands = insert + query + update + delete + getmore + command
1108
1274
  new_vals = [total_commands, insert, query, update, delete, getmore, command]
1109
- return maintain_delta(new_vals, host, opcounters_name)
1275
+ return maintain_delta(new_vals, host, port, opcounters_name)
1110
1276
 
1111
1277
 
1112
- def check_opcounters(con, host, warning, critical, perf_data):
1278
+ def check_opcounters(con, host, port, warning, critical, perf_data):
1113
1279
  """ A function to get all opcounters delta per minute. In case of a replication - gets the opcounters+opcountersRepl"""
1114
1280
  warning = warning or 10000
1115
1281
  critical = critical or 15000
1116
1282
 
1117
1283
  data = get_server_status(con)
1118
- err1, delta_opcounters = get_opcounters(data, 'opcounters', host)
1119
- err2, delta_opcounters_repl = get_opcounters(data, 'opcountersRepl', host)
1284
+ err1, delta_opcounters = get_opcounters(data, 'opcounters', host, port)
1285
+ err2, delta_opcounters_repl = get_opcounters(data, 'opcountersRepl', host, port)
1120
1286
  if err1 == 0 and err2 == 0:
1121
1287
  delta = [(x + y) for x, y in zip(delta_opcounters, delta_opcounters_repl)]
1122
1288
  delta[0] = delta_opcounters[0] # only the time delta shouldn't be summarized
@@ -1124,14 +1290,14 @@ def check_opcounters(con, host, warning, critical, perf_data):
1124
1290
  message = "Test succeeded , old values missing"
1125
1291
  message = "Opcounters: total=%d,insert=%d,query=%d,update=%d,delete=%d,getmore=%d,command=%d" % tuple(per_minute_delta)
1126
1292
  message += performance_data(perf_data, ([(per_minute_delta[0], "total", warning, critical), (per_minute_delta[1], "insert"),
1127
- (per_minute_delta[2], "query"), (per_minute_delta[3], "update"), (per_minute_delta[5], "delete"),
1293
+ (per_minute_delta[2], "query"), (per_minute_delta[3], "update"), (per_minute_delta[4], "delete"),
1128
1294
  (per_minute_delta[5], "getmore"), (per_minute_delta[6], "command")]))
1129
1295
  return check_levels(per_minute_delta[0], warning, critical, message)
1130
1296
  else:
1131
1297
  return exit_with_general_critical("problem reading data from temp file")
1132
1298
 
1133
1299
 
1134
- def check_current_lock(con, host, warning, critical, perf_data):
1300
+ def check_current_lock(con, host, port, warning, critical, perf_data):
1135
1301
  """ A function to get current lock percentage and not a global one, as check_lock function does"""
1136
1302
  warning = warning or 10
1137
1303
  critical = critical or 30
@@ -1140,7 +1306,7 @@ def check_current_lock(con, host, warning, critical, perf_data):
1140
1306
  lockTime = float(data['globalLock']['lockTime'])
1141
1307
  totalTime = float(data['globalLock']['totalTime'])
1142
1308
 
1143
- err, delta = maintain_delta([totalTime, lockTime], host, "locktime")
1309
+ err, delta = maintain_delta([totalTime, lockTime], host, port, "locktime")
1144
1310
  if err == 0:
1145
1311
  lock_percentage = delta[2] / delta[1] * 100 # lockTime/totalTime*100
1146
1312
  message = "Current Lock Percentage: %.2f%%" % lock_percentage
@@ -1150,7 +1316,7 @@ def check_current_lock(con, host, warning, critical, perf_data):
1150
1316
  return exit_with_general_warning("problem reading data from temp file")
1151
1317
 
1152
1318
 
1153
- def check_page_faults(con, host, warning, critical, perf_data):
1319
+ def check_page_faults(con, host, port, warning, critical, perf_data):
1154
1320
  """ A function to get page_faults per second from the system"""
1155
1321
  warning = warning or 10
1156
1322
  critical = critical or 30
@@ -1162,7 +1328,7 @@ def check_page_faults(con, host, warning, critical, perf_data):
1162
1328
  # page_faults unsupported on the underlaying system
1163
1329
  return exit_with_general_critical("page_faults unsupported on the underlaying system")
1164
1330
 
1165
- err, delta = maintain_delta([page_faults], host, "page_faults")
1331
+ err, delta = maintain_delta([page_faults], host, port, "page_faults")
1166
1332
  if err == 0:
1167
1333
  page_faults_ps = delta[1] / delta[0]
1168
1334
  message = "Page faults : %.2f ps" % page_faults_ps
@@ -1172,7 +1338,7 @@ def check_page_faults(con, host, warning, critical, perf_data):
1172
1338
  return exit_with_general_warning("problem reading data from temp file")
1173
1339
 
1174
1340
 
1175
- def check_asserts(con, host, warning, critical, perf_data):
1341
+ def check_asserts(con, host, port, warning, critical, perf_data):
1176
1342
  """ A function to get asserts from the system"""
1177
1343
  warning = warning or 1
1178
1344
  critical = critical or 10
@@ -1187,7 +1353,7 @@ def check_asserts(con, host, warning, critical, perf_data):
1187
1353
  user = asserts['user']
1188
1354
  rollovers = asserts['rollovers']
1189
1355
 
1190
- err, delta = maintain_delta([regular, warning_asserts, msg, user, rollovers], host, "asserts")
1356
+ err, delta = maintain_delta([regular, warning_asserts, msg, user, rollovers], host, port, "asserts")
1191
1357
 
1192
1358
  if err == 0:
1193
1359
  if delta[5] != 0:
@@ -1221,7 +1387,7 @@ def get_stored_primary_server_name(db):
1221
1387
  return stored_primary_server
1222
1388
 
1223
1389
 
1224
- def check_replica_primary(con, host, warning, critical, perf_data, replicaset):
1390
+ def check_replica_primary(con, host, warning, critical, perf_data, replicaset, mongo_version):
1225
1391
  """ A function to check if the primary server of a replica set has changed """
1226
1392
  if warning is None and critical is None:
1227
1393
  warning = 1
@@ -1244,7 +1410,10 @@ def check_replica_primary(con, host, warning, critical, perf_data, replicaset):
1244
1410
  saved_primary = "None"
1245
1411
  if current_primary != saved_primary:
1246
1412
  last_primary_server_record = {"server": current_primary}
1247
- db.last_primary_server.update({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True, safe=True)
1413
+ if mongo_version == 2:
1414
+ db.last_primary_server.update({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True)
1415
+ else:
1416
+ db.last_primary_server.update_one({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True)
1248
1417
  message = "Primary server has changed from %s to %s" % (saved_primary, current_primary)
1249
1418
  primary_status = 1
1250
1419
  return check_levels(primary_status, warning, critical, message)
@@ -1266,9 +1435,9 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
1266
1435
 
1267
1436
  try:
1268
1437
  #on linux servers only
1269
- page_faults = (int(data2['extra_info']['page_faults']) - int(data1['extra_info']['page_faults'])) / sample_time
1438
+ page_faults = (int(data2['extra_info']['page_faults']) - int(data1['extra_info']['page_faults'])) // sample_time
1270
1439
  except KeyError:
1271
- print "WARNING - Can't get extra_info.page_faults counter from MongoDB"
1440
+ print("WARNING - Can't get extra_info.page_faults counter from MongoDB")
1272
1441
  sys.exit(1)
1273
1442
 
1274
1443
  message = "Page Faults: %i" % (page_faults)
@@ -1276,7 +1445,7 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
1276
1445
  message += performance_data(perf_data, [(page_faults, "page_faults", warning, critical)])
1277
1446
  check_levels(page_faults, warning, critical, message)
1278
1447
 
1279
- except Exception, e:
1448
+ except Exception as e:
1280
1449
  exit_with_general_critical(e)
1281
1450
 
1282
1451
 
@@ -1292,35 +1461,35 @@ def chunks_balance(con, database, collection, warning, critical):
1292
1461
  shards = col.distinct("shard")
1293
1462
 
1294
1463
  except:
1295
- print "WARNING - Can't get chunks infos from MongoDB"
1464
+ print("WARNING - Can't get chunks infos from MongoDB")
1296
1465
  sys.exit(1)
1297
1466
 
1298
1467
  if nscount == 0:
1299
- print "WARNING - Namespace %s is not sharded" % (nsfilter)
1468
+ print("WARNING - Namespace %s is not sharded" % (nsfilter))
1300
1469
  sys.exit(1)
1301
1470
 
1302
- avgchunksnb = nscount / len(shards)
1303
- warningnb = avgchunksnb * warning / 100
1304
- criticalnb = avgchunksnb * critical / 100
1471
+ avgchunksnb = nscount // len(shards)
1472
+ warningnb = avgchunksnb * warning // 100
1473
+ criticalnb = avgchunksnb * critical // 100
1305
1474
 
1306
1475
  for shard in shards:
1307
1476
  delta = abs(avgchunksnb - col.find({"ns": nsfilter, "shard": shard}).count())
1308
1477
  message = "Namespace: %s, Shard name: %s, Chunk delta: %i" % (nsfilter, shard, delta)
1309
1478
 
1310
1479
  if delta >= criticalnb and delta > 0:
1311
- print "CRITICAL - Chunks not well balanced " + message
1480
+ print("CRITICAL - Chunks not well balanced " + message)
1312
1481
  sys.exit(2)
1313
1482
  elif delta >= warningnb and delta > 0:
1314
- print "WARNING - Chunks not well balanced " + message
1483
+ print("WARNING - Chunks not well balanced " + message)
1315
1484
  sys.exit(1)
1316
1485
 
1317
- print "OK - Chunks well balanced across shards"
1486
+ print("OK - Chunks well balanced across shards")
1318
1487
  sys.exit(0)
1319
1488
 
1320
- except Exception, e:
1489
+ except Exception as e:
1321
1490
  exit_with_general_critical(e)
1322
1491
 
1323
- print "OK - Chunks well balanced across shards"
1492
+ print("OK - Chunks well balanced across shards")
1324
1493
  sys.exit(0)
1325
1494
 
1326
1495
 
@@ -1336,7 +1505,7 @@ def check_connect_primary(con, warning, critical, perf_data):
1336
1505
  data = con.admin.command(son.SON([('isMaster', 1)]))
1337
1506
 
1338
1507
  if data['ismaster'] == True:
1339
- print "OK - This server is primary"
1508
+ print("OK - This server is primary")
1340
1509
  return 0
1341
1510
 
1342
1511
  phost = data['primary'].split(':')[0]
@@ -1354,17 +1523,17 @@ def check_connect_primary(con, warning, critical, perf_data):
1354
1523
 
1355
1524
  return check_levels(pconn_time, warning, critical, message)
1356
1525
 
1357
- except Exception, e:
1526
+ except Exception as e:
1358
1527
  return exit_with_general_critical(e)
1359
1528
 
1360
1529
 
1361
1530
  def check_collection_state(con, database, collection):
1362
1531
  try:
1363
1532
  con[database][collection].find_one()
1364
- print "OK - Collection %s.%s is reachable " % (database, collection)
1533
+ print("OK - Collection %s.%s is reachable " % (database, collection))
1365
1534
  return 0
1366
1535
 
1367
- except Exception, e:
1536
+ except Exception as e:
1368
1537
  return exit_with_general_critical(e)
1369
1538
 
1370
1539
 
@@ -1376,14 +1545,18 @@ def check_row_count(con, database, collection, warning, critical, perf_data):
1376
1545
 
1377
1546
  return check_levels(count, warning, critical, message)
1378
1547
 
1379
- except Exception, e:
1548
+ except Exception as e:
1380
1549
  return exit_with_general_critical(e)
1381
1550
 
1382
1551
 
1383
- def build_file_name(host, action):
1552
+ def build_file_name(host, port, action):
1384
1553
  #done this way so it will work when run independently and from shell
1385
1554
  module_name = re.match('(.*//*)*(.*)\..*', __file__).group(2)
1386
- return "/tmp/" + module_name + "_data/" + host + "-" + action + ".data"
1555
+
1556
+ if (port == 27017):
1557
+ return "/tmp/" + module_name + "_data/" + host + "-" + action + ".data"
1558
+ else:
1559
+ return "/tmp/" + module_name + "_data/" + host + "-" + str(port) + "-" + action + ".data"
1387
1560
 
1388
1561
 
1389
1562
  def ensure_dir(f):
@@ -1396,7 +1569,7 @@ def write_values(file_name, string):
1396
1569
  f = None
1397
1570
  try:
1398
1571
  f = open(file_name, 'w')
1399
- except IOError, e:
1572
+ except IOError as e:
1400
1573
  #try creating
1401
1574
  if (e.errno == 2):
1402
1575
  ensure_dir(file_name)
@@ -1415,11 +1588,11 @@ def read_values(file_name):
1415
1588
  data = f.read()
1416
1589
  f.close()
1417
1590
  return 0, data
1418
- except IOError, e:
1591
+ except IOError as e:
1419
1592
  if (e.errno == 2):
1420
1593
  #no previous data
1421
1594
  return 1, ''
1422
- except Exception, e:
1595
+ except Exception as e:
1423
1596
  return 2, None
1424
1597
 
1425
1598
 
@@ -1435,8 +1608,8 @@ def calc_delta(old, new):
1435
1608
  return 0, delta
1436
1609
 
1437
1610
 
1438
- def maintain_delta(new_vals, host, action):
1439
- file_name = build_file_name(host, action)
1611
+ def maintain_delta(new_vals, host, port, action):
1612
+ file_name = build_file_name(host, port, action)
1440
1613
  err, data = read_values(file_name)
1441
1614
  old_vals = data.split(';')
1442
1615
  new_vals = [str(int(time.time()))] + new_vals
@@ -1457,8 +1630,8 @@ def replication_get_time_diff(con):
1457
1630
  col = 'oplog.$main'
1458
1631
  firstc = local[col].find().sort("$natural", 1).limit(1)
1459
1632
  lastc = local[col].find().sort("$natural", -1).limit(1)
1460
- first = firstc.next()
1461
- last = lastc.next()
1633
+ first = next(firstc)
1634
+ last = next(lastc)
1462
1635
  tfirst = first["ts"]
1463
1636
  tlast = last["ts"]
1464
1637
  delta = tlast.time - tfirst.time