sensu-plugins-mongodb-boutetnico 1.0.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a03df4caef7dc049d55bbfc903b5115a70f715a6
4
- data.tar.gz: 432c4515549e0a71e04e11ea69b889874f1b87ec
3
+ metadata.gz: 20bd487d838baf2695377d4f9427abb2ab216b42
4
+ data.tar.gz: ba4b4e0e4d895b9f1df4c20dd598e1cf3df67ac9
5
5
  SHA512:
6
- metadata.gz: 8fd39f365379139289e0b8933ec87af4331920516e5ce86bf2306c5e11b3712196b68adc7f08882234d67c61120fa6e1d88489e8b8383e16e7af56bc9c81722a
7
- data.tar.gz: 1dad2a62055b2c11690878b72608de258aee2ba82547a2bb9ccd4dd95ff9b506a4494646d8c8c3eeb103b0df37418c200ca09ac7f3753ad5b8dc0dd8aa203ae8
6
+ metadata.gz: 6489d8a793494b7cccd8b141f6d1b0414764692149289a4bfb201af868fdf81106d77c0e0b48b8841524dd8d2d6d6b0147df3cd8c0f6dfcc3d9437951243f598
7
+ data.tar.gz: f1b84acd7d5a6afc1e0fac77d6612fb241a139b01b5448d61588a54a5cab8e69f8df34e5a0ac73dd50aaf6711bf532fb4cb210dc2515097e62a2bb0b507c7502
data/README.md CHANGED
@@ -12,6 +12,7 @@ This fork is automatically tested, built and published to [RubyGems](https://rub
12
12
  * bin/check-mongodb.py
13
13
  * bin/check-mongodb.rb - wrapper for check-mongodb.py
14
14
  * bin/check-mongodb-metric.rb
15
+ * bin/check-mongodb-query-count.rb
15
16
  * bin/metrics-mongodb.rb
16
17
  * bin/metrics-mongodb-replication.rb
17
18
 
@@ -0,0 +1,267 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-mongodb-query-count.rb
4
+ #
5
+ # DESCRIPTION:
6
+ # Check how many documents are returned by a MongoDB query.
7
+ #
8
+ # OUTPUT:
9
+ # Plain text
10
+ #
11
+ # PLATFORMS:
12
+ # Linux
13
+ #
14
+ # DEPENDENCIES:
15
+ # gem: sensu-plugin
16
+ # gem: mongo
17
+ # gem: bson
18
+ # gem: bson_ext
19
+ # gem: json
20
+ #
21
+ # USAGE:
22
+ # # Check MongoDB collection "logs" for critical events
23
+ # ./check-mongodb-query-count.rb --user sensu --pass sensu --database test --collection logs
24
+ # --query '{"level":"CRITICAL"}'
25
+ # --minutes-previous 5
26
+ # -w 0 -c 10 --include-results
27
+ #
28
+ # NOTES:
29
+ # Ruby is shit.
30
+ #
31
+ # LICENSE:
32
+ # Copyright 2019 github.com/boutetnico
33
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
34
+ # for details.
35
+ #
36
+
37
+ require 'sensu-plugin/check/cli'
38
+ require 'mongo'
39
+ require 'json'
40
+ include Mongo
41
+
42
+ #
43
+ # Mongodb
44
+ #
45
+
46
+ class MongoDBQueryCount < Sensu::Plugin::Check::CLI
47
+ option :host,
48
+ description: 'MongoDB host',
49
+ long: '--host HOST',
50
+ default: 'localhost'
51
+
52
+ option :port,
53
+ description: 'MongoDB port',
54
+ long: '--port PORT',
55
+ default: 27_017
56
+
57
+ option :user,
58
+ description: 'MongoDB user',
59
+ long: '--user USER',
60
+ default: nil
61
+
62
+ option :password,
63
+ description: 'MongoDB password',
64
+ long: '--password PASSWORD',
65
+ default: nil
66
+
67
+ option :ssl,
68
+ description: 'Connect using SSL',
69
+ long: '--ssl',
70
+ default: false
71
+
72
+ option :ssl_cert,
73
+ description: 'The certificate file used to identify the local connection against mongod',
74
+ long: '--ssl-cert SSL_CERT',
75
+ default: ''
76
+
77
+ option :ssl_key,
78
+ description: 'The private key used to identify the local connection against mongod',
79
+ long: '--ssl-key SSL_KEY',
80
+ default: ''
81
+
82
+ option :ssl_ca_cert,
83
+ description: 'The set of concatenated CA certificates, which are used to validate certificates passed from the other end of the connection',
84
+ long: '--ssl-ca-cert SSL_CA_CERT',
85
+ default: ''
86
+
87
+ option :ssl_verify,
88
+ description: 'Whether or not to do peer certification validation',
89
+ long: '--ssl-verify',
90
+ default: false
91
+
92
+ option :debug,
93
+ description: 'Enable debug',
94
+ long: '--debug',
95
+ default: false
96
+
97
+ option :database,
98
+ description: 'Database to perform query on',
99
+ short: '-d DATABASE',
100
+ long: '--database DATABASE',
101
+ required: true
102
+
103
+ option :collection,
104
+ description: 'Collection to perform query on',
105
+ short: '-C COLLECTION',
106
+ long: '--collection COLLECTION',
107
+ required: true
108
+
109
+ option :query,
110
+ description: 'Query to perform',
111
+ short: '-q QUERY',
112
+ long: '--query QUERY',
113
+ required: true
114
+
115
+ option :warn,
116
+ short: '-w N',
117
+ long: '--warn N',
118
+ description: 'Result count WARNING threshold',
119
+ proc: proc(&:to_i),
120
+ default: 0
121
+
122
+ option :crit,
123
+ short: '-c N',
124
+ long: '--crit N',
125
+ description: 'Result count CRITICAL threshold',
126
+ proc: proc(&:to_i),
127
+ default: 0
128
+
129
+ option :invert,
130
+ long: '--invert',
131
+ description: 'Invert thresholds',
132
+ boolean: true
133
+
134
+ option :date_field,
135
+ description: 'Field to use instead of "date" for query.',
136
+ long: '--date-field FIELD_NAME',
137
+ default: 'date'
138
+
139
+ option :minutes_previous,
140
+ description: 'Minutes before offset to check date field against query.',
141
+ long: '--minutes-previous MINUTES_PREVIOUS',
142
+ proc: proc(&:to_i),
143
+ default: 0
144
+
145
+ option :hours_previous,
146
+ description: 'Hours before offset to check date field against query.',
147
+ long: '--hours-previous HOURS_PREVIOUS',
148
+ proc: proc(&:to_i),
149
+ default: 0
150
+
151
+ option :days_previous,
152
+ description: 'Days before offset to check date field against query.',
153
+ long: '--days-previous DAYS_PREVIOUS',
154
+ proc: proc(&:to_i),
155
+ default: 0
156
+
157
+ option :weeks_previous,
158
+ description: 'Weeks before offset to check date field against query.',
159
+ long: '--weeks-previous WEEKS_PREVIOUS',
160
+ proc: proc(&:to_i),
161
+ default: 0
162
+
163
+ option :months_previous,
164
+ description: 'Months before offset to check date field against query.',
165
+ long: '--months-previous MONTHS_PREVIOUS',
166
+ proc: proc(&:to_i),
167
+ default: 0
168
+
169
+ option :include_results,
170
+ long: '--include-results',
171
+ description: 'Include results in response',
172
+ boolean: false
173
+
174
+ def connect_mongo_db
175
+ address_str = "#{config[:host]}:#{config[:port]}"
176
+ client_opts = {}
177
+ client_opts[:database] = config[:database]
178
+ unless config[:user].nil?
179
+ client_opts[:user] = config[:user]
180
+ client_opts[:password] = config[:password]
181
+ end
182
+ if config[:ssl]
183
+ client_opts[:ssl] = true
184
+ client_opts[:ssl_cert] = config[:ssl_cert]
185
+ client_opts[:ssl_key] = config[:ssl_key]
186
+ client_opts[:ssl_ca_cert] = config[:ssl_ca_cert]
187
+ client_opts[:ssl_verify] = config[:ssl_verify]
188
+ end
189
+ mongo_client = Mongo::Client.new([address_str], client_opts)
190
+ @db = mongo_client.database
191
+ end
192
+
193
+ def query_mongo
194
+ collection = @db[config[:collection]]
195
+ begin
196
+ query = JSON.parse(config[:query])
197
+ rescue JSON::ParserError
198
+ unknown 'Failed to parse query. Provide a valid JSON array.'
199
+ end
200
+
201
+ start_time = Time.now.utc.to_i
202
+ if config[:minutes_previous] != 0
203
+ start_time -= (config[:minutes_previous] * 60)
204
+ end
205
+ if config[:hours_previous] != 0
206
+ start_time -= (config[:hours_previous] * 60 * 60)
207
+ end
208
+ if config[:days_previous] != 0
209
+ start_time -= (config[:days_previous] * 60 * 60 * 24)
210
+ end
211
+ if config[:weeks_previous] != 0
212
+ start_time -= (config[:weeks_previous] * 60 * 60 * 24 * 7)
213
+ end
214
+ if config[:months_previous] != 0
215
+ start_time -= (config[:months_previous] * 60 * 60 * 24 * 31)
216
+ end
217
+
218
+ query[config[:date_field]] = { '$gte' => Time.at(start_time).to_datetime }
219
+
220
+ if config[:debug]
221
+ puts 'Query: ' + query.inspect
222
+ end
223
+
224
+ collection.find(query)
225
+ end
226
+
227
+ def print_results(results)
228
+ count = results.count
229
+
230
+ if config[:include_results]
231
+ results.each { |document| puts document.inspect }
232
+ end
233
+
234
+ if config[:invert]
235
+ if count < config[:crit]
236
+ critical "Query count (#{count}) was below critical threshold."
237
+ elsif count < config[:warn]
238
+ warning "Query count (#{count}) was below warning threshold."
239
+ else
240
+ ok "Query count (#{count}) was ok"
241
+ end
242
+ elsif count > config[:crit]
243
+ critical "Query count (#{count}) was above critical threshold."
244
+ elsif count > config[:warn]
245
+ warning "Query count (#{count}) was above warning threshold."
246
+ else
247
+ ok "Query count (#{count}) was ok"
248
+ end
249
+ end
250
+
251
+ def run
252
+ Mongo::Logger.logger.level = Logger::FATAL
253
+ @debug = config[:debug]
254
+ if @debug
255
+ Mongo::Logger.logger.level = Logger::DEBUG
256
+ config_debug = config.clone
257
+ config_debug[:password] = '***'
258
+ puts 'Arguments: ' + config_debug.inspect
259
+ end
260
+
261
+ connect_mongo_db
262
+
263
+ results = query_mongo
264
+
265
+ print_results(results)
266
+ end
267
+ end
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env python
1
+ #!/usr/bin/env python3
2
2
 
3
3
  #
4
4
  # A MongoDB Nagios check script
@@ -16,37 +16,29 @@
16
16
  # - @jbraeuer on github
17
17
  # - Dag Stockstad <dag.stockstad@gmail.com>
18
18
  # - @Andor on github
19
- # - Steven Richards - Captainkrtek on Github <sbrichards@mit.edu>
19
+ # - Steven Richards - Captainkrtek on github
20
+ # - Max Vernimmen - @mvernimmen-CG / @mvernimmen on github
21
+ # - Kris Nova - @kris@nivenly.com github.com/kris-nova
22
+ # - Jan Kantert - firstname@lastname.net
20
23
  #
21
-
22
- # License: BSD
23
- # Copyright (c) 2012, Mike Zupan <mike@zcentric.com>
24
- # All rights reserved.
25
- # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
24
+ # USAGE
26
25
  #
27
- # Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
28
- # Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
29
- # documentation and/or other materials provided with the distribution.
30
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
31
- # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
32
- # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
33
- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34
- # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ # See the README.md
35
27
  #
36
- # README: https://github.com/mzupan/nagios-plugin-mongodb/blob/master/LICENSE
37
28
 
38
- # #RED
29
+ from __future__ import print_function
30
+ from __future__ import division
39
31
  import sys
40
32
  import time
41
33
  import optparse
42
- import textwrap
43
34
  import re
44
35
  import os
36
+ import numbers
45
37
 
46
38
  try:
47
39
  import pymongo
48
- except ImportError, e:
49
- print e
40
+ except ImportError as e:
41
+ print(e)
50
42
  sys.exit(2)
51
43
 
52
44
  # As of pymongo v 1.9 the SON API is part of the BSON package, therefore attempt
@@ -90,37 +82,35 @@ def performance_data(perf_data, params):
90
82
 
91
83
 
92
84
  def numeric_type(param):
93
- if ((type(param) == float or type(param) == int or type(param) == long or param == None)):
94
- return True
95
- return False
85
+ return param is None or isinstance(param, numbers.Real)
96
86
 
97
87
 
98
88
  def check_levels(param, warning, critical, message, ok=[]):
99
89
  if (numeric_type(critical) and numeric_type(warning)):
100
90
  if param >= critical:
101
- print "CRITICAL - " + message
91
+ print("CRITICAL - " + message)
102
92
  sys.exit(2)
103
93
  elif param >= warning:
104
- print "WARNING - " + message
94
+ print("WARNING - " + message)
105
95
  sys.exit(1)
106
96
  else:
107
- print "OK - " + message
97
+ print("OK - " + message)
108
98
  sys.exit(0)
109
99
  else:
110
100
  if param in critical:
111
- print "CRITICAL - " + message
101
+ print("CRITICAL - " + message)
112
102
  sys.exit(2)
113
103
 
114
104
  if param in warning:
115
- print "WARNING - " + message
105
+ print("WARNING - " + message)
116
106
  sys.exit(1)
117
107
 
118
108
  if param in ok:
119
- print "OK - " + message
109
+ print("OK - " + message)
120
110
  sys.exit(0)
121
111
 
122
112
  # unexpected param value
123
- print "CRITICAL - Unexpected value : %d" % param + "; " + message
113
+ print("CRITICAL - Unexpected value : %d" % param + "; " + message)
124
114
  return 2
125
115
 
126
116
 
@@ -137,35 +127,45 @@ def main(argv):
137
127
  p = optparse.OptionParser(conflict_handler="resolve", description="This Nagios plugin checks the health of mongodb.")
138
128
 
139
129
  p.add_option('-H', '--host', action='store', type='string', dest='host', default='127.0.0.1', help='The hostname you want to connect to')
140
- p.add_option('-P', '--port', action='store', type='int', dest='port', default=27017, help='The port mongodb is runnung on')
130
+ p.add_option('-h', '--host-to-check', action='store', type='string', dest='host_to_check', default=None, help='The hostname you want to check (if this is different from the host you are connecting)')
131
+ p.add_option('-P', '--port', action='store', type='int', dest='port', default=27017, help='The port mongodb is running on')
132
+ p.add_option('--port-to-check', action='store', type='int', dest='port_to_check', default=None, help='The port you want to check (if this is different from the port you are connecting)')
141
133
  p.add_option('-u', '--user', action='store', type='string', dest='user', default=None, help='The username you want to login as')
142
134
  p.add_option('-p', '--pass', action='store', type='string', dest='passwd', default=None, help='The password you want to use for that user')
143
- p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold we want to set')
144
- p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold we want to set')
135
+ p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold you want to set')
136
+ p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold you want to set')
145
137
  p.add_option('-A', '--action', action='store', type='choice', dest='action', default='connect', help='The action you want to take',
146
138
  choices=['connect', 'connections', 'replication_lag', 'replication_lag_percent', 'replset_state', 'memory', 'memory_mapped', 'lock',
147
- 'flushing', 'last_flush_time', 'index_miss_ratio', 'databases', 'collections', 'database_size', 'database_indexes', 'collection_indexes', 'collection_size',
148
- 'queues', 'oplog', 'journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters', 'current_lock', 'replica_primary', 'page_faults',
149
- 'asserts', 'queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary', 'collection_state', 'row_count', 'replset_quorum'])
139
+ 'flushing', 'last_flush_time', 'index_miss_ratio', 'databases', 'collections', 'database_size', 'database_indexes', 'collection_documents', 'collection_indexes', 'collection_size',
140
+ 'collection_storageSize', 'queues', 'oplog', 'journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters', 'current_lock', 'replica_primary',
141
+ 'page_faults', 'asserts', 'queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary', 'collection_state', 'row_count', 'replset_quorum'])
150
142
  p.add_option('--max-lag', action='store_true', dest='max_lag', default=False, help='Get max replication lag (for replication_lag action only)')
151
143
  p.add_option('--mapped-memory', action='store_true', dest='mapped_memory', default=False, help='Get mapped memory instead of resident (if resident memory can not be read)')
152
144
  p.add_option('-D', '--perf-data', action='store_true', dest='perf_data', default=False, help='Enable output of Nagios performance data')
153
145
  p.add_option('-d', '--database', action='store', dest='database', default='admin', help='Specify the database to check')
154
146
  p.add_option('--all-databases', action='store_true', dest='all_databases', default=False, help='Check all databases (action database_size)')
155
- p.add_option('-s', '--ssl-enabled', dest='ssl_enabled', default=False, action='callback', callback=optional_arg(True), help='Connect using SSL')
156
- p.add_option('-e', '--ssl-certfile', dest='ssl_certfile', default=None, action='store', help='The certificate file used to identify the local connection against mongod')
157
- p.add_option('-k', '--ssl-keyfile', dest='ssl_keyfile', default=None, action='store', help='The private key used to identify the local connection against mongod')
158
- p.add_option('-a', '--ssl-ca-certs', dest='ssl_ca_certs', default=None, action='store', help='The set of concatenated CA certificates, which are used to validate certificates passed from the other end of the connection')
147
+ p.add_option('-s', '--ssl', dest='ssl', default=False, action='callback', callback=optional_arg(True), help='Connect using SSL')
159
148
  p.add_option('-r', '--replicaset', dest='replicaset', default=None, action='callback', callback=optional_arg(True), help='Connect to replicaset')
160
149
  p.add_option('-q', '--querytype', action='store', dest='query_type', default='query', help='The query type to check [query|insert|update|delete|getmore|command] from queries_per_second')
161
150
  p.add_option('-c', '--collection', action='store', dest='collection', default='admin', help='Specify the collection to check')
162
151
  p.add_option('-T', '--time', action='store', type='int', dest='sample_time', default=1, help='Time used to sample number of pages faults')
152
+ p.add_option('-M', '--mongoversion', action='store', type='choice', dest='mongo_version', default='2', help='The MongoDB version you are talking with, either 2 or 3',
153
+ choices=['2','3'])
154
+ p.add_option('-a', '--authdb', action='store', type='string', dest='authdb', default='admin', help='The database you want to authenticate against')
155
+ p.add_option('--ssl-ca-cert-file', action='store', type='string', dest='ssl_ca_cert_file', default=None, help='Path to Certificate Authority file for SSL')
156
+ p.add_option('-f', '--ssl-cert-file', action='store', type='string', dest='cert_file', default=None, help='Path to PEM encoded key and cert for client authentication')
157
+ p.add_option('-m','--auth-mechanism', action='store', type='choice', dest='auth_mechanism', default=None, help='Auth mechanism used for auth with mongodb',
158
+ choices=['MONGODB-X509','SCRAM-SHA-256','SCRAM-SHA-1'])
163
159
 
164
160
  options, arguments = p.parse_args()
165
161
  host = options.host
162
+ host_to_check = options.host_to_check if options.host_to_check else options.host
166
163
  port = options.port
164
+ port_to_check = options.port_to_check if options.port_to_check else options.port
167
165
  user = options.user
168
166
  passwd = options.passwd
167
+ authdb = options.authdb
168
+
169
169
  query_type = options.query_type
170
170
  collection = options.collection
171
171
  sample_time = options.sample_time
@@ -179,12 +179,13 @@ def main(argv):
179
179
  action = options.action
180
180
  perf_data = options.perf_data
181
181
  max_lag = options.max_lag
182
+ mongo_version = options.mongo_version
182
183
  database = options.database
183
- ssl_enabled = options.ssl_enabled
184
- ssl_certfile = options.ssl_certfile
185
- ssl_keyfile = options.ssl_keyfile
186
- ssl_ca_certs = options.ssl_ca_certs
184
+ ssl = options.ssl
187
185
  replicaset = options.replicaset
186
+ ssl_ca_cert_file = options.ssl_ca_cert_file
187
+ cert_file = options.cert_file
188
+ auth_mechanism = options.auth_mechanism
188
189
 
189
190
  if action == 'replica_primary' and replicaset is None:
190
191
  return "replicaset must be passed in when using replica_primary check"
@@ -195,31 +196,36 @@ def main(argv):
195
196
  # moving the login up here and passing in the connection
196
197
  #
197
198
  start = time.time()
198
- err, con = mongo_connect(host, port, ssl_enabled, ssl_certfile, ssl_keyfile, ssl_ca_certs, user, passwd, replicaset)
199
+ err, con = mongo_connect(host, port, ssl, user, passwd, replicaset, authdb, ssl_ca_cert_file, cert_file)
200
+
201
+ if err != 0:
202
+ return err
203
+
204
+ # Autodetect mongo-version and force pymongo to let us know if it can connect or not.
205
+ err, mongo_version = check_version(con)
199
206
  if err != 0:
200
207
  return err
201
208
 
202
209
  conn_time = time.time() - start
203
- conn_time = round(conn_time, 0)
204
210
 
205
211
  if action == "connections":
206
212
  return check_connections(con, warning, critical, perf_data)
207
213
  elif action == "replication_lag":
208
- return check_rep_lag(con, host, port, warning, critical, False, perf_data, max_lag, user, passwd)
214
+ return check_rep_lag(con, host_to_check, port_to_check, warning, critical, False, perf_data, max_lag, user, passwd)
209
215
  elif action == "replication_lag_percent":
210
- return check_rep_lag(con, host, port, warning, critical, True, perf_data, max_lag, user, passwd)
216
+ return check_rep_lag(con, host_to_check, port_to_check, warning, critical, True, perf_data, max_lag, user, passwd, ssl, ssl_ca_cert_file, cert_file)
211
217
  elif action == "replset_state":
212
218
  return check_replset_state(con, perf_data, warning, critical)
213
219
  elif action == "memory":
214
- return check_memory(con, warning, critical, perf_data, options.mapped_memory)
220
+ return check_memory(con, warning, critical, perf_data, options.mapped_memory, host)
215
221
  elif action == "memory_mapped":
216
222
  return check_memory_mapped(con, warning, critical, perf_data)
217
223
  elif action == "queues":
218
224
  return check_queues(con, warning, critical, perf_data)
219
225
  elif action == "lock":
220
- return check_lock(con, warning, critical, perf_data)
226
+ return check_lock(con, warning, critical, perf_data, mongo_version)
221
227
  elif action == "current_lock":
222
- return check_current_lock(con, host, warning, critical, perf_data)
228
+ return check_current_lock(con, host, port, warning, critical, perf_data)
223
229
  elif action == "flushing":
224
230
  return check_flushing(con, warning, critical, True, perf_data)
225
231
  elif action == "last_flush_time":
@@ -241,22 +247,26 @@ def main(argv):
241
247
  return check_database_size(con, database, warning, critical, perf_data)
242
248
  elif action == "database_indexes":
243
249
  return check_database_indexes(con, database, warning, critical, perf_data)
250
+ elif action == "collection_documents":
251
+ return check_collection_documents(con, database, collection, warning, critical, perf_data)
244
252
  elif action == "collection_indexes":
245
253
  return check_collection_indexes(con, database, collection, warning, critical, perf_data)
246
254
  elif action == "collection_size":
247
255
  return check_collection_size(con, database, collection, warning, critical, perf_data)
256
+ elif action == "collection_storageSize":
257
+ return check_collection_storageSize(con, database, collection, warning, critical, perf_data)
248
258
  elif action == "journaled":
249
259
  return check_journaled(con, warning, critical, perf_data)
250
260
  elif action == "write_data_files":
251
261
  return check_write_to_datafiles(con, warning, critical, perf_data)
252
262
  elif action == "opcounters":
253
- return check_opcounters(con, host, warning, critical, perf_data)
263
+ return check_opcounters(con, host, port, warning, critical, perf_data)
254
264
  elif action == "asserts":
255
- return check_asserts(con, host, warning, critical, perf_data)
265
+ return check_asserts(con, host, port, warning, critical, perf_data)
256
266
  elif action == "replica_primary":
257
- return check_replica_primary(con, host, warning, critical, perf_data, replicaset)
267
+ return check_replica_primary(con, host, warning, critical, perf_data, replicaset, mongo_version)
258
268
  elif action == "queries_per_second":
259
- return check_queries_per_second(con, query_type, warning, critical, perf_data)
269
+ return check_queries_per_second(con, query_type, warning, critical, perf_data, mongo_version)
260
270
  elif action == "page_faults":
261
271
  check_page_faults(con, sample_time, warning, critical, perf_data)
262
272
  elif action == "chunks_balance":
@@ -273,42 +283,65 @@ def main(argv):
273
283
  return check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time)
274
284
 
275
285
 
276
- def mongo_connect(host=None, port=None, ssl_enabled=False, ssl_certfile=None, ssl_keyfile=None, ssl_ca_certs=None, user=None, passwd=None, replica=None):
286
+ def mongo_connect(host=None, port=None, ssl=False, user=None, passwd=None, replica=None, authdb="admin", ssl_ca_cert_file=None, ssl_cert=None, auth_mechanism=None):
287
+ from pymongo.errors import ConnectionFailure
288
+ from pymongo.errors import PyMongoError
289
+
290
+ con_args = dict()
291
+
292
+ if ssl:
293
+ con_args['ssl'] = ssl
294
+ if ssl_ca_cert_file:
295
+ con_args['ssl_ca_certs'] = ssl_ca_cert_file
296
+ if ssl_cert:
297
+ con_args['ssl_certfile'] = ssl_cert
298
+
277
299
  try:
278
300
  # ssl connection for pymongo > 2.3
279
301
  if pymongo.version >= "2.3":
280
302
  if replica is None:
281
- if ssl_enabled:
282
- con = pymongo.MongoClient(host, port, ssl=ssl_enabled, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, ssl_ca_certs=ssl_ca_certs)
283
- else:
284
- con = pymongo.MongoClient(host, port)
303
+ con = pymongo.MongoClient(host, port, **con_args)
285
304
  else:
286
- if ssl_enabled:
287
- con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, ssl=ssl_enabled, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, ssl_ca_certs=ssl_ca_certs, replicaSet=replica, network_timeout=10)
288
- else:
289
- con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, replicaSet=replica, network_timeout=10)
290
- try:
291
- # https://api.mongodb.com/python/current/api/pymongo/mongo_client.html
292
- # The ismaster command is cheap and does not require auth.
293
- con.admin.command('ismaster', connectTimeoutMS=10000)
294
- except Exception, e:
295
- return exit_with_general_critical(e), None
305
+ con = pymongo.MongoClient(host, port, read_preference=pymongo.ReadPreference.SECONDARY, replicaSet=replica, **con_args)
296
306
  else:
297
307
  if replica is None:
298
308
  con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
299
309
  else:
300
310
  con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
301
- #con = pymongo.Connection(host, port, slave_okay=True, replicaSet=replica, network_timeout=10)
311
+
312
+ # we must authenticate the connection, otherwise we won't be able to perform certain operations
313
+ if ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'SCRAM-SHA-256':
314
+ con.the_database.authenticate(user, mechanism='SCRAM-SHA-256')
315
+ elif ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'SCRAM-SHA-1':
316
+ con.the_database.authenticate(user, mechanism='SCRAM-SHA-1')
317
+ elif ssl_cert and ssl_ca_cert_file and user and auth_mechanism == 'MONGODB-X509':
318
+ con.the_database.authenticate(user, mechanism='MONGODB-X509')
319
+
320
+ try:
321
+ result = con.admin.command("ismaster")
322
+ except ConnectionFailure:
323
+ print("CRITICAL - Connection to Mongo server on %s:%s has failed" % (host, port) )
324
+ sys.exit(2)
325
+
326
+ if 'arbiterOnly' in result and result['arbiterOnly'] == True:
327
+ print("OK - State: 7 (Arbiter on port %s)" % (port))
328
+ sys.exit(0)
302
329
 
303
330
  if user and passwd:
304
- db = con["admin"]
305
- if not db.authenticate(user, passwd):
331
+ db = con[authdb]
332
+ try:
333
+ db.authenticate(user, password=passwd)
334
+ except PyMongoError:
306
335
  sys.exit("Username/Password incorrect")
307
- except Exception, e:
336
+
337
+ # Ping to check that the server is responding.
338
+ con.admin.command("ping")
339
+
340
+ except Exception as e:
308
341
  if isinstance(e, pymongo.errors.AutoReconnect) and str(e).find(" is an arbiter") != -1:
309
342
  # We got a pymongo AutoReconnect exception that tells us we connected to an Arbiter Server
310
343
  # This means: Arbiter is reachable and can answer requests/votes - this is all we need to know from an arbiter
311
- print "OK - State: 7 (Arbiter)"
344
+ print("OK - State: 7 (Arbiter)")
312
345
  sys.exit(0)
313
346
  return exit_with_general_critical(e), None
314
347
  return 0, con
@@ -318,7 +351,7 @@ def exit_with_general_warning(e):
318
351
  if isinstance(e, SystemExit):
319
352
  return e
320
353
  else:
321
- print "WARNING - General MongoDB warning:", e
354
+ print("WARNING - General MongoDB warning:", e)
322
355
  return 1
323
356
 
324
357
 
@@ -326,21 +359,27 @@ def exit_with_general_critical(e):
326
359
  if isinstance(e, SystemExit):
327
360
  return e
328
361
  else:
329
- print "CRITICAL - General MongoDB Error:", e
362
+ print("CRITICAL - General MongoDB Error:", e)
330
363
  return 2
331
364
 
332
365
 
333
366
  def set_read_preference(db):
334
- if pymongo.version >= "2.2" and pymongo.version < "2.8":
367
+ if pymongo.version >= "2.2":
335
368
  pymongo.read_preferences.Secondary
336
369
  else:
337
370
  db.read_preference = pymongo.ReadPreference.SECONDARY
338
371
 
372
+ def check_version(con):
373
+ try:
374
+ server_info = con.server_info()
375
+ except Exception as e:
376
+ return exit_with_general_critical(e), None
377
+ return 0, int(server_info['version'].split('.')[0].strip())
339
378
 
340
379
  def check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time):
341
380
  warning = warning or 3
342
381
  critical = critical or 6
343
- message = "Connection took %i seconds" % conn_time
382
+ message = "Connection took %.3f seconds" % conn_time
344
383
  message += performance_data(perf_data, [(conn_time, "connection_time", warning, critical)])
345
384
 
346
385
  return check_levels(conn_time, warning, critical, message)
@@ -362,13 +401,17 @@ def check_connections(con, warning, critical, perf_data):
362
401
  (available, "available_connections")])
363
402
  return check_levels(used_percent, warning, critical, message)
364
403
 
365
- except Exception, e:
404
+ except Exception as e:
366
405
  return exit_with_general_critical(e)
367
406
 
368
407
 
369
- def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_lag, user, passwd):
408
+ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_lag, user, passwd, ssl=None, ssl_ca_cert_file=None, cert_file=None):
370
409
  # Get mongo to tell us replica set member name when connecting locally
371
410
  if "127.0.0.1" == host:
411
+ if not "me" in list(con.admin.command("ismaster","1").keys()):
412
+ print("UNKNOWN - This is not replicated MongoDB")
413
+ return 3
414
+
372
415
  host = con.admin.command("ismaster","1")["me"].split(':')[0]
373
416
 
374
417
  if percent:
@@ -380,15 +423,15 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
380
423
  rs_status = {}
381
424
  slaveDelays = {}
382
425
  try:
383
- set_read_preference(con.admin)
426
+ #set_read_preference(con.admin)
384
427
 
385
428
  # Get replica set status
386
429
  try:
387
430
  rs_status = con.admin.command("replSetGetStatus")
388
- except pymongo.errors.OperationFailure, e:
389
- if e.code == None and str(e).find('failed: not running with --replSet"'):
390
- print "OK - Not running with replSet"
391
- return 0
431
+ except pymongo.errors.OperationFailure as e:
432
+ if ((e.code == None and str(e).find('failed: not running with --replSet"')) or (e.code == 76 and str(e).find('not running with --replSet"'))):
433
+ print("UNKNOWN - Not running with replSet")
434
+ return 3
392
435
 
393
436
  serverVersion = tuple(con.server_info()['version'].split('.'))
394
437
  if serverVersion >= tuple("2.0.0".split(".")):
@@ -409,24 +452,24 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
409
452
  for member in rs_status["members"]:
410
453
  if member["stateStr"] == "PRIMARY":
411
454
  primary_node = member
412
- if member["name"].split(':')[0] == host and int(member["name"].split(':')[1]) == port:
455
+ if member.get('name') == "{0}:{1}".format(host, port):
413
456
  host_node = member
414
457
 
415
458
  # Check if we're in the middle of an election and don't have a primary
416
459
  if primary_node is None:
417
- print "WARNING - No primary defined. In an election?"
460
+ print("WARNING - No primary defined. In an election?")
418
461
  return 1
419
462
 
420
463
  # Check if we failed to find the current host
421
464
  # below should never happen
422
465
  if host_node is None:
423
- print "CRITICAL - Unable to find host '" + host + "' in replica set."
466
+ print("CRITICAL - Unable to find host '" + host + "' in replica set.")
424
467
  return 2
425
468
 
426
469
  # Is the specified host the primary?
427
470
  if host_node["stateStr"] == "PRIMARY":
428
471
  if max_lag == False:
429
- print "OK - This is the primary."
472
+ print("OK - This is the primary.")
430
473
  return 0
431
474
  else:
432
475
  #get the maximal replication lag
@@ -439,7 +482,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
439
482
  data = data + member['name'] + " lag=%d;" % replicationLag
440
483
  maximal_lag = max(maximal_lag, replicationLag)
441
484
  if percent:
442
- err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user=user, passwd=passwd)
485
+ err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user, passwd)
443
486
  if err != 0:
444
487
  return err
445
488
  primary_timediff = replication_get_time_diff(con)
@@ -451,8 +494,8 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
451
494
  message += performance_data(perf_data, [(maximal_lag, "replication_lag", warning, critical)])
452
495
  return check_levels(maximal_lag, warning, critical, message)
453
496
  elif host_node["stateStr"] == "ARBITER":
454
- print "OK - This is an arbiter"
455
- return 0
497
+ print("UNKNOWN - This is an arbiter")
498
+ return 3
456
499
 
457
500
  # Find the difference in optime between current node and PRIMARY
458
501
 
@@ -471,7 +514,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
471
514
  lag = float(optime_lag.seconds + optime_lag.days * 24 * 3600)
472
515
 
473
516
  if percent:
474
- err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user=user, passwd=passwd)
517
+ err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), ssl, user, passwd, None, None, ssl_ca_cert_file, cert_file)
475
518
  if err != 0:
476
519
  return err
477
520
  primary_timediff = replication_get_time_diff(con)
@@ -503,12 +546,12 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
503
546
 
504
547
  # Check if we're in the middle of an election and don't have a primary
505
548
  if primary_node is None:
506
- print "WARNING - No primary defined. In an election?"
549
+ print("WARNING - No primary defined. In an election?")
507
550
  sys.exit(1)
508
551
 
509
552
  # Is the specified host the primary?
510
553
  if host_node["stateStr"] == "PRIMARY":
511
- print "OK - This is the primary."
554
+ print("OK - This is the primary.")
512
555
  sys.exit(0)
513
556
 
514
557
  # Find the difference in optime between current node and PRIMARY
@@ -527,20 +570,42 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
527
570
  message += performance_data(perf_data, [(lag, "replication_lag", warning, critical)])
528
571
  return check_levels(lag, warning, critical, message)
529
572
 
530
- except Exception, e:
573
+ except Exception as e:
531
574
  return exit_with_general_critical(e)
532
575
 
576
+ #
577
+ # Check the memory usage of mongo. Alerting on this may be hard to get right
578
+ # because it'll try to get as much memory as it can. And that's probably
579
+ # a good thing.
580
+ #
581
+ def check_memory(con, warning, critical, perf_data, mapped_memory, host):
582
+ # Get the total system memory of this system (This is totally bogus if you
583
+ # are running this command remotely) and calculate based on that how much
584
+ # memory used by Mongodb is ok or not.
585
+ meminfo = open('/proc/meminfo').read()
586
+ matched = re.search(r'^MemTotal:\s+(\d+)', meminfo)
587
+ if matched:
588
+ mem_total_kB = int(matched.groups()[0])
589
+
590
+ if host != "127.0.0.1" and not warning:
591
+ # Running remotely and value was not set by user, use hardcoded value
592
+ warning = 12
593
+ else:
594
+ # running locally or user provided value
595
+ warning = warning or (mem_total_kB * 0.8) / 1024.0 / 1024.0
596
+
597
+ if host != "127.0.0.1" and not critical:
598
+ critical = 16
599
+ else:
600
+ critical = critical or (mem_total_kB * 0.9) / 1024.0 / 1024.0
601
+
602
+ # debugging
603
+ #print "mem total: {0}kb, warn: {1}GB, crit: {2}GB".format(mem_total_kB,warning, critical)
533
604
 
534
- def check_memory(con, warning, critical, perf_data, mapped_memory):
535
- #
536
- # These thresholds are basically meaningless, and must be customized to your system's ram
537
- #
538
- warning = warning or 8
539
- critical = critical or 16
540
605
  try:
541
606
  data = get_server_status(con)
542
607
  if not data['mem']['supported'] and not mapped_memory:
543
- print "OK - Platform not supported for memory info"
608
+ print("OK - Platform not supported for memory info")
544
609
  return 0
545
610
  #
546
611
  # convert to gigs
@@ -577,7 +642,7 @@ def check_memory(con, warning, critical, perf_data, mapped_memory):
577
642
  else:
578
643
  return check_levels(mem_resident, warning, critical, message)
579
644
 
580
- except Exception, e:
645
+ except Exception as e:
581
646
  return exit_with_general_critical(e)
582
647
 
583
648
 
@@ -590,7 +655,7 @@ def check_memory_mapped(con, warning, critical, perf_data):
590
655
  try:
591
656
  data = get_server_status(con)
592
657
  if not data['mem']['supported']:
593
- print "OK - Platform not supported for memory info"
658
+ print("OK - Platform not supported for memory info")
594
659
  return 0
595
660
  #
596
661
  # convert to gigs
@@ -607,33 +672,45 @@ def check_memory_mapped(con, warning, critical, perf_data):
607
672
  message += " %.2fGB mappedWithJournal" % mem_mapped_journal
608
673
  except:
609
674
  mem_mapped_journal = 0
610
- message += performance_data(perf_data, [("%.2f" % mem_mapped, "memory_mapped"), ("%.2f" % mem_mapped_journal, "mappedWithJournal")])
675
+ message += performance_data(perf_data, [("%.2f" % mem_mapped, "memory_mapped", warning, critical), ("%.2f" % mem_mapped_journal, "mappedWithJournal")])
611
676
 
612
677
  if not mem_mapped == -1:
613
678
  return check_levels(mem_mapped, warning, critical, message)
614
679
  else:
615
- print "OK - Server does not provide mem.mapped info"
680
+ print("OK - Server does not provide mem.mapped info")
616
681
  return 0
617
682
 
618
- except Exception, e:
683
+ except Exception as e:
619
684
  return exit_with_general_critical(e)
620
685
 
621
686
 
622
- def check_lock(con, warning, critical, perf_data):
687
+ #
688
+ # Return the percentage of the time there was a global Lock
689
+ #
690
+ def check_lock(con, warning, critical, perf_data, mongo_version):
623
691
  warning = warning or 10
624
692
  critical = critical or 30
625
- try:
626
- data = get_server_status(con)
627
- #
628
- # calculate percentage
629
- #
630
- lock_percentage = float(data['globalLock']['lockTime']) / float(data['globalLock']['totalTime']) * 100
631
- message = "Lock Percentage: %.2f%%" % lock_percentage
632
- message += performance_data(perf_data, [("%.2f" % lock_percentage, "lock_percentage", warning, critical)])
633
- return check_levels(lock_percentage, warning, critical, message)
634
-
635
- except Exception, e:
636
- return exit_with_general_critical(e)
693
+ if mongo_version == 2:
694
+ try:
695
+ data = get_server_status(con)
696
+ lockTime = data['globalLock']['lockTime']
697
+ totalTime = data['globalLock']['totalTime']
698
+ #
699
+ # calculate percentage
700
+ #
701
+ if lockTime > totalTime:
702
+ lock_percentage = 0.00
703
+ else:
704
+ lock_percentage = float(lockTime) / float(totalTime) * 100
705
+ message = "Lock Percentage: %.2f%%" % lock_percentage
706
+ message += performance_data(perf_data, [("%.2f" % lock_percentage, "lock_percentage", warning, critical)])
707
+ return check_levels(lock_percentage, warning, critical, message)
708
+ except Exception as e:
709
+ print("Couldn't get globalLock lockTime info from mongo, are you sure you're not using version 3? See the -M option.")
710
+ return exit_with_general_critical(e)
711
+ else:
712
+ print("OK - MongoDB version 3 doesn't report on global locks")
713
+ return 0
637
714
 
638
715
 
639
716
  def check_flushing(con, warning, critical, avg, perf_data):
@@ -645,19 +722,24 @@ def check_flushing(con, warning, critical, avg, perf_data):
645
722
  critical = critical or 15000
646
723
  try:
647
724
  data = get_server_status(con)
648
- if avg:
649
- flush_time = float(data['backgroundFlushing']['average_ms'])
650
- stat_type = "Average"
651
- else:
652
- flush_time = float(data['backgroundFlushing']['last_ms'])
653
- stat_type = "Last"
725
+ try:
726
+ data['backgroundFlushing']
727
+ if avg:
728
+ flush_time = float(data['backgroundFlushing']['average_ms'])
729
+ stat_type = "Average"
730
+ else:
731
+ flush_time = float(data['backgroundFlushing']['last_ms'])
732
+ stat_type = "Last"
654
733
 
655
- message = "%s Flush Time: %.2fms" % (stat_type, flush_time)
656
- message += performance_data(perf_data, [("%.2fms" % flush_time, "%s_flush_time" % stat_type.lower(), warning, critical)])
734
+ message = "%s Flush Time: %.2fms" % (stat_type, flush_time)
735
+ message += performance_data(perf_data, [("%.2fms" % flush_time, "%s_flush_time" % stat_type.lower(), warning, critical)])
657
736
 
658
- return check_levels(flush_time, warning, critical, message)
737
+ return check_levels(flush_time, warning, critical, message)
738
+ except Exception:
739
+ print("OK - flushing stats not available for this storage engine")
740
+ return 0
659
741
 
660
- except Exception, e:
742
+ except Exception as e:
661
743
  return exit_with_general_critical(e)
662
744
 
663
745
 
@@ -668,6 +750,7 @@ def index_miss_ratio(con, warning, critical, perf_data):
668
750
  data = get_server_status(con)
669
751
 
670
752
  try:
753
+ data['indexCounters']
671
754
  serverVersion = tuple(con.server_info()['version'].split('.'))
672
755
  if serverVersion >= tuple("2.4.0".split(".")):
673
756
  miss_ratio = float(data['indexCounters']['missRatio'])
@@ -675,19 +758,24 @@ def index_miss_ratio(con, warning, critical, perf_data):
675
758
  miss_ratio = float(data['indexCounters']['btree']['missRatio'])
676
759
  except KeyError:
677
760
  not_supported_msg = "not supported on this platform"
678
- if data['indexCounters'].has_key('note'):
679
- print "OK - MongoDB says: " + not_supported_msg
761
+ try:
762
+ data['indexCounters']
763
+ if 'note' in data['indexCounters']:
764
+ print("OK - MongoDB says: " + not_supported_msg)
765
+ return 0
766
+ else:
767
+ print("WARNING - Can't get counter from MongoDB")
768
+ return 1
769
+ except Exception:
770
+ print("OK - MongoDB says: " + not_supported_msg)
680
771
  return 0
681
- else:
682
- print "WARNING - Can't get counter from MongoDB"
683
- return 1
684
772
 
685
773
  message = "Miss Ratio: %.2f" % miss_ratio
686
774
  message += performance_data(perf_data, [("%.2f" % miss_ratio, "index_miss_ratio", warning, critical)])
687
775
 
688
776
  return check_levels(miss_ratio, warning, critical, message)
689
777
 
690
- except Exception, e:
778
+ except Exception as e:
691
779
  return exit_with_general_critical(e)
692
780
 
693
781
  def check_replset_quorum(con, perf_data):
@@ -711,7 +799,7 @@ def check_replset_quorum(con, perf_data):
711
799
  message = "Cluster is not quorate and cannot operate"
712
800
 
713
801
  return check_levels(state, warning, critical, message)
714
- except Exception, e:
802
+ except Exception as e:
715
803
  return exit_with_general_critical(e)
716
804
 
717
805
 
@@ -720,52 +808,69 @@ def check_replset_state(con, perf_data, warning="", critical=""):
720
808
  try:
721
809
  warning = [int(x) for x in warning.split(",")]
722
810
  except:
723
- warning = [0, 3, 5, 9]
811
+ warning = [0, 3, 5]
724
812
  try:
725
813
  critical = [int(x) for x in critical.split(",")]
726
814
  except:
727
815
  critical = [8, 4, -1]
728
816
 
729
- ok = range(-1, 8) # should include the range of all posiible values
817
+ ok = list(range(-1, 8)) # should include the range of all posiible values
730
818
  try:
819
+ worst_state = -2
820
+ message = ""
731
821
  try:
732
822
  try:
733
823
  set_read_preference(con.admin)
734
824
  data = con.admin.command(pymongo.son_manipulator.SON([('replSetGetStatus', 1)]))
735
825
  except:
736
826
  data = con.admin.command(son.SON([('replSetGetStatus', 1)]))
737
- state = int(data['myState'])
738
- except pymongo.errors.OperationFailure, e:
739
- if e.code == None and str(e).find('failed: not running with --replSet"'):
740
- state = -1
741
-
742
- if state == 8:
743
- message = "State: %i (Down)" % state
744
- elif state == 4:
745
- message = "State: %i (Fatal error)" % state
746
- elif state == 0:
747
- message = "State: %i (Starting up, phase1)" % state
748
- elif state == 3:
749
- message = "State: %i (Recovering)" % state
750
- elif state == 5:
751
- message = "State: %i (Starting up, phase2)" % state
752
- elif state == 1:
753
- message = "State: %i (Primary)" % state
754
- elif state == 2:
755
- message = "State: %i (Secondary)" % state
756
- elif state == 7:
757
- message = "State: %i (Arbiter)" % state
758
- elif state == 9:
759
- message = "State: %i (Rollback)" % state
760
- elif state == -1:
761
- message = "Not running with replSet"
762
- else:
763
- message = "State: %i (Unknown state)" % state
764
- message += performance_data(perf_data, [(state, "state")])
765
- return check_levels(state, warning, critical, message, ok)
766
- except Exception, e:
827
+ members = data['members']
828
+ my_state = int(data['myState'])
829
+ worst_state = my_state
830
+ for member in members:
831
+ their_state = int(member['state'])
832
+ message += " %s: %i (%s)" % (member['name'], their_state, state_text(their_state))
833
+ if state_is_worse(their_state, worst_state, warning, critical):
834
+ worst_state = their_state
835
+ message += performance_data(perf_data, [(my_state, "state")])
836
+
837
+ except pymongo.errors.OperationFailure as e:
838
+ if ((e.code == None and str(e).find('failed: not running with --replSet"')) or (e.code == 76 and str(e).find('not running with --replSet"'))):
839
+ worst_state = -1
840
+
841
+ return check_levels(worst_state, warning, critical, message, ok)
842
+ except Exception as e:
767
843
  return exit_with_general_critical(e)
768
844
 
845
+ def state_is_worse(state, worst_state, warning, critical):
846
+ if worst_state in critical:
847
+ return False
848
+ if worst_state in warning:
849
+ return state in critical
850
+ return (state in warning) or (state in critical)
851
+
852
+ def state_text(state):
853
+ if state == 8:
854
+ return "Down"
855
+ elif state == 4:
856
+ return "Fatal error"
857
+ elif state == 0:
858
+ return "Starting up, phase1"
859
+ elif state == 3:
860
+ return "Recovering"
861
+ elif state == 5:
862
+ return "Starting up, phase2"
863
+ elif state == 1:
864
+ return "Primary"
865
+ elif state == 2:
866
+ return "Secondary"
867
+ elif state == 7:
868
+ return "Arbiter"
869
+ elif state == -1:
870
+ return "Not running with replSet"
871
+ else:
872
+ return "Unknown state"
873
+
769
874
 
770
875
  def check_databases(con, warning, critical, perf_data=None):
771
876
  try:
@@ -779,7 +884,7 @@ def check_databases(con, warning, critical, perf_data=None):
779
884
  message = "Number of DBs: %.0f" % count
780
885
  message += performance_data(perf_data, [(count, "databases", warning, critical, message)])
781
886
  return check_levels(count, warning, critical, message)
782
- except Exception, e:
887
+ except Exception as e:
783
888
  return exit_with_general_critical(e)
784
889
 
785
890
 
@@ -801,7 +906,7 @@ def check_collections(con, warning, critical, perf_data=None):
801
906
  message += performance_data(perf_data, [(count, "collections", warning, critical, message)])
802
907
  return check_levels(count, warning, critical, message)
803
908
 
804
- except Exception, e:
909
+ except Exception as e:
805
910
  return exit_with_general_critical(e)
806
911
 
807
912
 
@@ -838,21 +943,21 @@ def check_database_size(con, database, warning, critical, perf_data):
838
943
  try:
839
944
  set_read_preference(con.admin)
840
945
  data = con[database].command('dbstats')
841
- storage_size = data['storageSize'] / 1024 / 1024
946
+ storage_size = data['storageSize'] // 1024 // 1024
842
947
  if perf_data:
843
948
  perfdata += " | database_size=%i;%i;%i" % (storage_size, warning, critical)
844
949
  #perfdata += " database=%s" %(database)
845
950
 
846
951
  if storage_size >= critical:
847
- print "CRITICAL - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata)
952
+ print("CRITICAL - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
848
953
  return 2
849
954
  elif storage_size >= warning:
850
- print "WARNING - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata)
955
+ print("WARNING - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
851
956
  return 1
852
957
  else:
853
- print "OK - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata)
958
+ print("OK - Database size: %.0f MB, Database: %s%s" % (storage_size, database, perfdata))
854
959
  return 0
855
- except Exception, e:
960
+ except Exception as e:
856
961
  return exit_with_general_critical(e)
857
962
 
858
963
 
@@ -866,20 +971,42 @@ def check_database_indexes(con, database, warning, critical, perf_data):
866
971
  try:
867
972
  set_read_preference(con.admin)
868
973
  data = con[database].command('dbstats')
869
- index_size = data['indexSize'] / 1024 / 1024
974
+ index_size = data['indexSize'] / 1024 // 1024
870
975
  if perf_data:
871
976
  perfdata += " | database_indexes=%i;%i;%i" % (index_size, warning, critical)
872
977
 
873
978
  if index_size >= critical:
874
- print "CRITICAL - %s indexSize: %.0f MB %s" % (database, index_size, perfdata)
979
+ print("CRITICAL - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
875
980
  return 2
876
981
  elif index_size >= warning:
877
- print "WARNING - %s indexSize: %.0f MB %s" % (database, index_size, perfdata)
982
+ print("WARNING - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
983
+ return 1
984
+ else:
985
+ print("OK - %s indexSize: %.0f MB %s" % (database, index_size, perfdata))
986
+ return 0
987
+ except Exception as e:
988
+ return exit_with_general_critical(e)
989
+
990
+
991
+ def check_collection_documents(con, database, collection, warning, critical, perf_data):
992
+ perfdata = ""
993
+ try:
994
+ set_read_preference(con.admin)
995
+ data = con[database].command('collstats', collection)
996
+ documents = data['count']
997
+ if perf_data:
998
+ perfdata += " | collection_documents=%i;%i;%i" % (documents, warning, critical)
999
+
1000
+ if documents >= critical:
1001
+ print("CRITICAL - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
1002
+ return 2
1003
+ elif documents >= warning:
1004
+ print("WARNING - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
878
1005
  return 1
879
1006
  else:
880
- print "OK - %s indexSize: %.0f MB %s" % (database, index_size, perfdata)
1007
+ print("OK - %s.%s documents: %s %s" % (database, collection, documents, perfdata))
881
1008
  return 0
882
- except Exception, e:
1009
+ except Exception as e:
883
1010
  return exit_with_general_critical(e)
884
1011
 
885
1012
 
@@ -898,15 +1025,15 @@ def check_collection_indexes(con, database, collection, warning, critical, perf_
898
1025
  perfdata += " | collection_indexes=%i;%i;%i" % (total_index_size, warning, critical)
899
1026
 
900
1027
  if total_index_size >= critical:
901
- print "CRITICAL - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata)
1028
+ print("CRITICAL - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
902
1029
  return 2
903
1030
  elif total_index_size >= warning:
904
- print "WARNING - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata)
1031
+ print("WARNING - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
905
1032
  return 1
906
1033
  else:
907
- print "OK - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata)
1034
+ print("OK - %s.%s totalIndexSize: %.0f MB %s" % (database, collection, total_index_size, perfdata))
908
1035
  return 0
909
- except Exception, e:
1036
+ except Exception as e:
910
1037
  return exit_with_general_critical(e)
911
1038
 
912
1039
 
@@ -923,7 +1050,7 @@ def check_queues(con, warning, critical, perf_data):
923
1050
  message += performance_data(perf_data, [(total_queues, "total_queues", warning, critical), (readers_queues, "readers_queues"), (writers_queues, "writers_queues")])
924
1051
  return check_levels(total_queues, warning, critical, message)
925
1052
 
926
- except Exception, e:
1053
+ except Exception as e:
927
1054
  return exit_with_general_critical(e)
928
1055
 
929
1056
  def check_collection_size(con, database, collection, warning, critical, perf_data):
@@ -938,18 +1065,43 @@ def check_collection_size(con, database, collection, warning, critical, perf_dat
938
1065
  perfdata += " | collection_size=%i;%i;%i" % (size, warning, critical)
939
1066
 
940
1067
  if size >= critical:
941
- print "CRITICAL - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata)
1068
+ print("CRITICAL - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
942
1069
  return 2
943
1070
  elif size >= warning:
944
- print "WARNING - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata)
1071
+ print("WARNING - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
945
1072
  return 1
946
1073
  else:
947
- print "OK - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata)
1074
+ print("OK - %s.%s size: %.0f MB %s" % (database, collection, size, perfdata))
948
1075
  return 0
949
- except Exception, e:
1076
+ except Exception as e:
950
1077
  return exit_with_general_critical(e)
951
1078
 
952
- def check_queries_per_second(con, query_type, warning, critical, perf_data):
1079
+
1080
+ def check_collection_storageSize(con, database, collection, warning, critical, perf_data):
1081
+ warning = warning or 100
1082
+ critical = critical or 1000
1083
+ perfdata = ""
1084
+ try:
1085
+ set_read_preference(con.admin)
1086
+ data = con[database].command('collstats', collection)
1087
+ storageSize = data['storageSize'] / 1024 / 1024
1088
+ if perf_data:
1089
+ perfdata += " | collection_storageSize=%i;%i;%i" % (storageSize, warning, critical)
1090
+
1091
+ if storageSize >= critical:
1092
+ print("CRITICAL - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
1093
+ return 2
1094
+ elif storageSize >= warning:
1095
+ print("WARNING - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
1096
+ return 1
1097
+ else:
1098
+ print("OK - %s.%s storageSize: %.0f MB %s" % (database, collection, storageSize, perfdata))
1099
+ return 0
1100
+ except Exception as e:
1101
+ return exit_with_general_critical(e)
1102
+
1103
+
1104
+ def check_queries_per_second(con, query_type, warning, critical, perf_data, mongo_version):
953
1105
  warning = warning or 250
954
1106
  critical = critical or 500
955
1107
 
@@ -970,10 +1122,17 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data):
970
1122
  diff_query = num - last_count['data'][query_type]['count']
971
1123
  diff_ts = ts - last_count['data'][query_type]['ts']
972
1124
 
1125
+ if diff_ts == 0:
1126
+ message = "diff_query = " + str(diff_query) + " diff_ts = " + str(diff_ts)
1127
+ return check_levels(0, warning, critical, message)
1128
+
973
1129
  query_per_sec = float(diff_query) / float(diff_ts)
974
1130
 
975
1131
  # update the count now
976
- db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1132
+ if mongo_version == 2:
1133
+ db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1134
+ else:
1135
+ db.nagios_check.update_one({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
977
1136
 
978
1137
  message = "Queries / Sec: %f" % query_per_sec
979
1138
  message += performance_data(perf_data, [(query_per_sec, "%s_per_sec" % query_type, warning, critical, message)])
@@ -982,17 +1141,24 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data):
982
1141
  # since it is the first run insert it
983
1142
  query_per_sec = 0
984
1143
  message = "First run of check.. no data"
985
- db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1144
+ if mongo_version == 2:
1145
+ db.nagios_check.update({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1146
+ else:
1147
+ db.nagios_check.update_one({u'_id': last_count['_id']}, {'$set': {"data.%s" % query_type: {'count': num, 'ts': int(time.time())}}})
1148
+
986
1149
  except TypeError:
987
1150
  #
988
1151
  # since it is the first run insert it
989
1152
  query_per_sec = 0
990
1153
  message = "First run of check.. no data"
991
- db.nagios_check.insert({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
1154
+ if mongo_version == 2:
1155
+ db.nagios_check.insert({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
1156
+ else:
1157
+ db.nagios_check.insert_one({'check': 'query_counts', 'data': {query_type: {'count': num, 'ts': int(time.time())}}})
992
1158
 
993
1159
  return check_levels(query_per_sec, warning, critical, message)
994
1160
 
995
- except Exception, e:
1161
+ except Exception as e:
996
1162
  return exit_with_general_critical(e)
997
1163
 
998
1164
 
@@ -1039,7 +1205,7 @@ def check_oplog(con, warning, critical, perf_data):
1039
1205
  message += performance_data(perf_data, [("%.2f" % hours_in_oplog, 'oplog_time', warning, critical), ("%.2f " % approx_level, 'oplog_time_100_percent_used')])
1040
1206
  return check_levels(-approx_level, -warning, -critical, message)
1041
1207
 
1042
- except Exception, e:
1208
+ except Exception as e:
1043
1209
  return exit_with_general_critical(e)
1044
1210
 
1045
1211
 
@@ -1057,7 +1223,7 @@ Under very high write situations it is normal for this value to be nonzero. """
1057
1223
  message += performance_data(perf_data, [(j_commits_in_wl, "j_commits_in_wl", warning, critical)])
1058
1224
  return check_levels(j_commits_in_wl, warning, critical, message)
1059
1225
 
1060
- except Exception, e:
1226
+ except Exception as e:
1061
1227
  return exit_with_general_critical(e)
1062
1228
 
1063
1229
 
@@ -1073,7 +1239,7 @@ def check_journaled(con, warning, critical, perf_data):
1073
1239
  message += performance_data(perf_data, [("%.2f" % journaled, "journaled", warning, critical)])
1074
1240
  return check_levels(journaled, warning, critical, message)
1075
1241
 
1076
- except Exception, e:
1242
+ except Exception as e:
1077
1243
  return exit_with_general_critical(e)
1078
1244
 
1079
1245
 
@@ -1090,11 +1256,11 @@ than the amount physically written to disk."""
1090
1256
  message += performance_data(perf_data, [("%.2f" % writes, "write_to_data_files", warning, critical)])
1091
1257
  return check_levels(writes, warning, critical, message)
1092
1258
 
1093
- except Exception, e:
1259
+ except Exception as e:
1094
1260
  return exit_with_general_critical(e)
1095
1261
 
1096
1262
 
1097
- def get_opcounters(data, opcounters_name, host):
1263
+ def get_opcounters(data, opcounters_name, host, port):
1098
1264
  try:
1099
1265
  insert = data[opcounters_name]['insert']
1100
1266
  query = data[opcounters_name]['query']
@@ -1102,21 +1268,21 @@ def get_opcounters(data, opcounters_name, host):
1102
1268
  delete = data[opcounters_name]['delete']
1103
1269
  getmore = data[opcounters_name]['getmore']
1104
1270
  command = data[opcounters_name]['command']
1105
- except KeyError, e:
1271
+ except KeyError as e:
1106
1272
  return 0, [0] * 100
1107
1273
  total_commands = insert + query + update + delete + getmore + command
1108
1274
  new_vals = [total_commands, insert, query, update, delete, getmore, command]
1109
- return maintain_delta(new_vals, host, opcounters_name)
1275
+ return maintain_delta(new_vals, host, port, opcounters_name)
1110
1276
 
1111
1277
 
1112
- def check_opcounters(con, host, warning, critical, perf_data):
1278
+ def check_opcounters(con, host, port, warning, critical, perf_data):
1113
1279
  """ A function to get all opcounters delta per minute. In case of a replication - gets the opcounters+opcountersRepl"""
1114
1280
  warning = warning or 10000
1115
1281
  critical = critical or 15000
1116
1282
 
1117
1283
  data = get_server_status(con)
1118
- err1, delta_opcounters = get_opcounters(data, 'opcounters', host)
1119
- err2, delta_opcounters_repl = get_opcounters(data, 'opcountersRepl', host)
1284
+ err1, delta_opcounters = get_opcounters(data, 'opcounters', host, port)
1285
+ err2, delta_opcounters_repl = get_opcounters(data, 'opcountersRepl', host, port)
1120
1286
  if err1 == 0 and err2 == 0:
1121
1287
  delta = [(x + y) for x, y in zip(delta_opcounters, delta_opcounters_repl)]
1122
1288
  delta[0] = delta_opcounters[0] # only the time delta shouldn't be summarized
@@ -1124,14 +1290,14 @@ def check_opcounters(con, host, warning, critical, perf_data):
1124
1290
  message = "Test succeeded , old values missing"
1125
1291
  message = "Opcounters: total=%d,insert=%d,query=%d,update=%d,delete=%d,getmore=%d,command=%d" % tuple(per_minute_delta)
1126
1292
  message += performance_data(perf_data, ([(per_minute_delta[0], "total", warning, critical), (per_minute_delta[1], "insert"),
1127
- (per_minute_delta[2], "query"), (per_minute_delta[3], "update"), (per_minute_delta[5], "delete"),
1293
+ (per_minute_delta[2], "query"), (per_minute_delta[3], "update"), (per_minute_delta[4], "delete"),
1128
1294
  (per_minute_delta[5], "getmore"), (per_minute_delta[6], "command")]))
1129
1295
  return check_levels(per_minute_delta[0], warning, critical, message)
1130
1296
  else:
1131
1297
  return exit_with_general_critical("problem reading data from temp file")
1132
1298
 
1133
1299
 
1134
- def check_current_lock(con, host, warning, critical, perf_data):
1300
+ def check_current_lock(con, host, port, warning, critical, perf_data):
1135
1301
  """ A function to get current lock percentage and not a global one, as check_lock function does"""
1136
1302
  warning = warning or 10
1137
1303
  critical = critical or 30
@@ -1140,7 +1306,7 @@ def check_current_lock(con, host, warning, critical, perf_data):
1140
1306
  lockTime = float(data['globalLock']['lockTime'])
1141
1307
  totalTime = float(data['globalLock']['totalTime'])
1142
1308
 
1143
- err, delta = maintain_delta([totalTime, lockTime], host, "locktime")
1309
+ err, delta = maintain_delta([totalTime, lockTime], host, port, "locktime")
1144
1310
  if err == 0:
1145
1311
  lock_percentage = delta[2] / delta[1] * 100 # lockTime/totalTime*100
1146
1312
  message = "Current Lock Percentage: %.2f%%" % lock_percentage
@@ -1150,7 +1316,7 @@ def check_current_lock(con, host, warning, critical, perf_data):
1150
1316
  return exit_with_general_warning("problem reading data from temp file")
1151
1317
 
1152
1318
 
1153
- def check_page_faults(con, host, warning, critical, perf_data):
1319
+ def check_page_faults(con, host, port, warning, critical, perf_data):
1154
1320
  """ A function to get page_faults per second from the system"""
1155
1321
  warning = warning or 10
1156
1322
  critical = critical or 30
@@ -1162,7 +1328,7 @@ def check_page_faults(con, host, warning, critical, perf_data):
1162
1328
  # page_faults unsupported on the underlaying system
1163
1329
  return exit_with_general_critical("page_faults unsupported on the underlaying system")
1164
1330
 
1165
- err, delta = maintain_delta([page_faults], host, "page_faults")
1331
+ err, delta = maintain_delta([page_faults], host, port, "page_faults")
1166
1332
  if err == 0:
1167
1333
  page_faults_ps = delta[1] / delta[0]
1168
1334
  message = "Page faults : %.2f ps" % page_faults_ps
@@ -1172,7 +1338,7 @@ def check_page_faults(con, host, warning, critical, perf_data):
1172
1338
  return exit_with_general_warning("problem reading data from temp file")
1173
1339
 
1174
1340
 
1175
- def check_asserts(con, host, warning, critical, perf_data):
1341
+ def check_asserts(con, host, port, warning, critical, perf_data):
1176
1342
  """ A function to get asserts from the system"""
1177
1343
  warning = warning or 1
1178
1344
  critical = critical or 10
@@ -1187,7 +1353,7 @@ def check_asserts(con, host, warning, critical, perf_data):
1187
1353
  user = asserts['user']
1188
1354
  rollovers = asserts['rollovers']
1189
1355
 
1190
- err, delta = maintain_delta([regular, warning_asserts, msg, user, rollovers], host, "asserts")
1356
+ err, delta = maintain_delta([regular, warning_asserts, msg, user, rollovers], host, port, "asserts")
1191
1357
 
1192
1358
  if err == 0:
1193
1359
  if delta[5] != 0:
@@ -1221,7 +1387,7 @@ def get_stored_primary_server_name(db):
1221
1387
  return stored_primary_server
1222
1388
 
1223
1389
 
1224
- def check_replica_primary(con, host, warning, critical, perf_data, replicaset):
1390
+ def check_replica_primary(con, host, warning, critical, perf_data, replicaset, mongo_version):
1225
1391
  """ A function to check if the primary server of a replica set has changed """
1226
1392
  if warning is None and critical is None:
1227
1393
  warning = 1
@@ -1244,7 +1410,10 @@ def check_replica_primary(con, host, warning, critical, perf_data, replicaset):
1244
1410
  saved_primary = "None"
1245
1411
  if current_primary != saved_primary:
1246
1412
  last_primary_server_record = {"server": current_primary}
1247
- db.last_primary_server.update({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True, safe=True)
1413
+ if mongo_version == 2:
1414
+ db.last_primary_server.update({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True)
1415
+ else:
1416
+ db.last_primary_server.update_one({"_id": "last_primary"}, {"$set": last_primary_server_record}, upsert=True)
1248
1417
  message = "Primary server has changed from %s to %s" % (saved_primary, current_primary)
1249
1418
  primary_status = 1
1250
1419
  return check_levels(primary_status, warning, critical, message)
@@ -1266,9 +1435,9 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
1266
1435
 
1267
1436
  try:
1268
1437
  #on linux servers only
1269
- page_faults = (int(data2['extra_info']['page_faults']) - int(data1['extra_info']['page_faults'])) / sample_time
1438
+ page_faults = (int(data2['extra_info']['page_faults']) - int(data1['extra_info']['page_faults'])) // sample_time
1270
1439
  except KeyError:
1271
- print "WARNING - Can't get extra_info.page_faults counter from MongoDB"
1440
+ print("WARNING - Can't get extra_info.page_faults counter from MongoDB")
1272
1441
  sys.exit(1)
1273
1442
 
1274
1443
  message = "Page Faults: %i" % (page_faults)
@@ -1276,7 +1445,7 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
1276
1445
  message += performance_data(perf_data, [(page_faults, "page_faults", warning, critical)])
1277
1446
  check_levels(page_faults, warning, critical, message)
1278
1447
 
1279
- except Exception, e:
1448
+ except Exception as e:
1280
1449
  exit_with_general_critical(e)
1281
1450
 
1282
1451
 
@@ -1292,35 +1461,35 @@ def chunks_balance(con, database, collection, warning, critical):
1292
1461
  shards = col.distinct("shard")
1293
1462
 
1294
1463
  except:
1295
- print "WARNING - Can't get chunks infos from MongoDB"
1464
+ print("WARNING - Can't get chunks infos from MongoDB")
1296
1465
  sys.exit(1)
1297
1466
 
1298
1467
  if nscount == 0:
1299
- print "WARNING - Namespace %s is not sharded" % (nsfilter)
1468
+ print("WARNING - Namespace %s is not sharded" % (nsfilter))
1300
1469
  sys.exit(1)
1301
1470
 
1302
- avgchunksnb = nscount / len(shards)
1303
- warningnb = avgchunksnb * warning / 100
1304
- criticalnb = avgchunksnb * critical / 100
1471
+ avgchunksnb = nscount // len(shards)
1472
+ warningnb = avgchunksnb * warning // 100
1473
+ criticalnb = avgchunksnb * critical // 100
1305
1474
 
1306
1475
  for shard in shards:
1307
1476
  delta = abs(avgchunksnb - col.find({"ns": nsfilter, "shard": shard}).count())
1308
1477
  message = "Namespace: %s, Shard name: %s, Chunk delta: %i" % (nsfilter, shard, delta)
1309
1478
 
1310
1479
  if delta >= criticalnb and delta > 0:
1311
- print "CRITICAL - Chunks not well balanced " + message
1480
+ print("CRITICAL - Chunks not well balanced " + message)
1312
1481
  sys.exit(2)
1313
1482
  elif delta >= warningnb and delta > 0:
1314
- print "WARNING - Chunks not well balanced " + message
1483
+ print("WARNING - Chunks not well balanced " + message)
1315
1484
  sys.exit(1)
1316
1485
 
1317
- print "OK - Chunks well balanced across shards"
1486
+ print("OK - Chunks well balanced across shards")
1318
1487
  sys.exit(0)
1319
1488
 
1320
- except Exception, e:
1489
+ except Exception as e:
1321
1490
  exit_with_general_critical(e)
1322
1491
 
1323
- print "OK - Chunks well balanced across shards"
1492
+ print("OK - Chunks well balanced across shards")
1324
1493
  sys.exit(0)
1325
1494
 
1326
1495
 
@@ -1336,7 +1505,7 @@ def check_connect_primary(con, warning, critical, perf_data):
1336
1505
  data = con.admin.command(son.SON([('isMaster', 1)]))
1337
1506
 
1338
1507
  if data['ismaster'] == True:
1339
- print "OK - This server is primary"
1508
+ print("OK - This server is primary")
1340
1509
  return 0
1341
1510
 
1342
1511
  phost = data['primary'].split(':')[0]
@@ -1354,17 +1523,17 @@ def check_connect_primary(con, warning, critical, perf_data):
1354
1523
 
1355
1524
  return check_levels(pconn_time, warning, critical, message)
1356
1525
 
1357
- except Exception, e:
1526
+ except Exception as e:
1358
1527
  return exit_with_general_critical(e)
1359
1528
 
1360
1529
 
1361
1530
  def check_collection_state(con, database, collection):
1362
1531
  try:
1363
1532
  con[database][collection].find_one()
1364
- print "OK - Collection %s.%s is reachable " % (database, collection)
1533
+ print("OK - Collection %s.%s is reachable " % (database, collection))
1365
1534
  return 0
1366
1535
 
1367
- except Exception, e:
1536
+ except Exception as e:
1368
1537
  return exit_with_general_critical(e)
1369
1538
 
1370
1539
 
@@ -1376,14 +1545,18 @@ def check_row_count(con, database, collection, warning, critical, perf_data):
1376
1545
 
1377
1546
  return check_levels(count, warning, critical, message)
1378
1547
 
1379
- except Exception, e:
1548
+ except Exception as e:
1380
1549
  return exit_with_general_critical(e)
1381
1550
 
1382
1551
 
1383
- def build_file_name(host, action):
1552
+ def build_file_name(host, port, action):
1384
1553
  #done this way so it will work when run independently and from shell
1385
1554
  module_name = re.match('(.*//*)*(.*)\..*', __file__).group(2)
1386
- return "/tmp/" + module_name + "_data/" + host + "-" + action + ".data"
1555
+
1556
+ if (port == 27017):
1557
+ return "/tmp/" + module_name + "_data/" + host + "-" + action + ".data"
1558
+ else:
1559
+ return "/tmp/" + module_name + "_data/" + host + "-" + str(port) + "-" + action + ".data"
1387
1560
 
1388
1561
 
1389
1562
  def ensure_dir(f):
@@ -1396,7 +1569,7 @@ def write_values(file_name, string):
1396
1569
  f = None
1397
1570
  try:
1398
1571
  f = open(file_name, 'w')
1399
- except IOError, e:
1572
+ except IOError as e:
1400
1573
  #try creating
1401
1574
  if (e.errno == 2):
1402
1575
  ensure_dir(file_name)
@@ -1415,11 +1588,11 @@ def read_values(file_name):
1415
1588
  data = f.read()
1416
1589
  f.close()
1417
1590
  return 0, data
1418
- except IOError, e:
1591
+ except IOError as e:
1419
1592
  if (e.errno == 2):
1420
1593
  #no previous data
1421
1594
  return 1, ''
1422
- except Exception, e:
1595
+ except Exception as e:
1423
1596
  return 2, None
1424
1597
 
1425
1598
 
@@ -1435,8 +1608,8 @@ def calc_delta(old, new):
1435
1608
  return 0, delta
1436
1609
 
1437
1610
 
1438
- def maintain_delta(new_vals, host, action):
1439
- file_name = build_file_name(host, action)
1611
+ def maintain_delta(new_vals, host, port, action):
1612
+ file_name = build_file_name(host, port, action)
1440
1613
  err, data = read_values(file_name)
1441
1614
  old_vals = data.split(';')
1442
1615
  new_vals = [str(int(time.time()))] + new_vals
@@ -1457,8 +1630,8 @@ def replication_get_time_diff(con):
1457
1630
  col = 'oplog.$main'
1458
1631
  firstc = local[col].find().sort("$natural", 1).limit(1)
1459
1632
  lastc = local[col].find().sort("$natural", -1).limit(1)
1460
- first = firstc.next()
1461
- last = lastc.next()
1633
+ first = next(firstc)
1634
+ last = next(lastc)
1462
1635
  tfirst = first["ts"]
1463
1636
  tlast = last["ts"]
1464
1637
  delta = tlast.time - tfirst.time