opensipscli 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opensipscli/__init__.py +20 -0
- opensipscli/args.py +56 -0
- opensipscli/cli.py +472 -0
- opensipscli/comm.py +57 -0
- opensipscli/config.py +162 -0
- opensipscli/db.py +989 -0
- opensipscli/defaults.py +91 -0
- opensipscli/libs/__init__.py +20 -0
- opensipscli/libs/sqlalchemy_utils.py +244 -0
- opensipscli/logger.py +85 -0
- opensipscli/main.py +86 -0
- opensipscli/module.py +69 -0
- opensipscli/modules/__init__.py +24 -0
- opensipscli/modules/database.py +1062 -0
- opensipscli/modules/diagnose.py +1089 -0
- opensipscli/modules/instance.py +53 -0
- opensipscli/modules/mi.py +200 -0
- opensipscli/modules/tls.py +354 -0
- opensipscli/modules/trace.py +292 -0
- opensipscli/modules/trap.py +138 -0
- opensipscli/modules/user.py +281 -0
- opensipscli/version.py +22 -0
- opensipscli-0.3.1.data/scripts/opensips-cli +9 -0
- opensipscli-0.3.1.dist-info/LICENSE +674 -0
- opensipscli-0.3.1.dist-info/METADATA +225 -0
- opensipscli-0.3.1.dist-info/RECORD +28 -0
- opensipscli-0.3.1.dist-info/WHEEL +5 -0
- opensipscli-0.3.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1089 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
##
|
|
3
|
+
## This file is part of OpenSIPS CLI
|
|
4
|
+
## (see https://github.com/OpenSIPS/opensips-cli).
|
|
5
|
+
##
|
|
6
|
+
## This program is free software: you can redistribute it and/or modify
|
|
7
|
+
## it under the terms of the GNU General Public License as published by
|
|
8
|
+
## the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
## (at your option) any later version.
|
|
10
|
+
##
|
|
11
|
+
## This program is distributed in the hope that it will be useful,
|
|
12
|
+
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
## GNU General Public License for more details.
|
|
15
|
+
##
|
|
16
|
+
## You should have received a copy of the GNU General Public License
|
|
17
|
+
## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
##
|
|
19
|
+
|
|
20
|
+
from opensipscli.module import Module
|
|
21
|
+
from opensipscli.logger import logger
|
|
22
|
+
from opensipscli.config import cfg
|
|
23
|
+
from opensipscli import comm
|
|
24
|
+
from threading import Thread
|
|
25
|
+
import socket
|
|
26
|
+
import subprocess
|
|
27
|
+
import shutil
|
|
28
|
+
import time
|
|
29
|
+
import os
|
|
30
|
+
import re
|
|
31
|
+
import time
|
|
32
|
+
import threading
|
|
33
|
+
import bisect
|
|
34
|
+
import random
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
import psutil
|
|
38
|
+
have_psutil = True
|
|
39
|
+
except:
|
|
40
|
+
have_psutil = False
|
|
41
|
+
|
|
42
|
+
import json
|
|
43
|
+
from json.decoder import WHITESPACE
|
|
44
|
+
|
|
45
|
+
DNS_THR_EVENTS = ['dns']
|
|
46
|
+
SQL_THR_EVENTS = ['mysql', 'pgsql']
|
|
47
|
+
NOSQL_THR_EVENTS = ['Cassandra', 'cachedb_local', 'MongoDB',
|
|
48
|
+
'cachedb_memcached', 'cachedb_couchbase']
|
|
49
|
+
SIP_THR_EVENTS = ['msg processing']
|
|
50
|
+
|
|
51
|
+
thr_summary = {}
|
|
52
|
+
thr_slowest = []
|
|
53
|
+
|
|
54
|
+
""" cheers to Philippe: https://stackoverflow.com/a/325528/2054305 """
|
|
55
|
+
class StoppableThread(threading.Thread):
|
|
56
|
+
def __init__(self, *args, **kwargs):
|
|
57
|
+
super().__init__(*args, **kwargs)
|
|
58
|
+
self._stop_event = threading.Event()
|
|
59
|
+
|
|
60
|
+
def stop(self):
|
|
61
|
+
self._stop_event.set()
|
|
62
|
+
|
|
63
|
+
def stopped(self):
|
|
64
|
+
return self._stop_event.is_set()
|
|
65
|
+
|
|
66
|
+
class ThresholdCollector(StoppableThread):
|
|
67
|
+
def __init__(self, *args, **kwargs):
|
|
68
|
+
kwargs['target'] = self.collect_events
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
kwargs['args'] = (kwargs['events'],)
|
|
72
|
+
del kwargs['events']
|
|
73
|
+
self.skip_summ = kwargs['skip_summ']
|
|
74
|
+
self.__rcv_proto = kwargs['rcv_proto']
|
|
75
|
+
self.__rcv_ip = kwargs['rcv_ip']
|
|
76
|
+
self.__rcv_port = kwargs['rcv_port']
|
|
77
|
+
del kwargs['skip_summ']
|
|
78
|
+
del kwargs['rcv_proto']
|
|
79
|
+
del kwargs['rcv_ip']
|
|
80
|
+
del kwargs['rcv_port']
|
|
81
|
+
except:
|
|
82
|
+
self.skip_summ = False
|
|
83
|
+
|
|
84
|
+
super().__init__(*args, **kwargs)
|
|
85
|
+
self.last_subscribe_ts = 0
|
|
86
|
+
|
|
87
|
+
def mi_refresh_sub(self):
|
|
88
|
+
now = int(time.time())
|
|
89
|
+
if now <= self.last_subscribe_ts + 5:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
ans = comm.execute("event_subscribe", {
|
|
93
|
+
'event': 'E_CORE_THRESHOLD',
|
|
94
|
+
'socket': '{}:{}:{}'.format(
|
|
95
|
+
self.__rcv_proto,self.__rcv_ip,self.__rcv_port),
|
|
96
|
+
'expire': 10,
|
|
97
|
+
}, silent=True)
|
|
98
|
+
|
|
99
|
+
self.last_subscribe_ts = now if ans == "OK" else 0
|
|
100
|
+
|
|
101
|
+
def mi_unsub(self):
|
|
102
|
+
comm.execute("event_subscribe", {
|
|
103
|
+
'event': 'E_CORE_THRESHOLD',
|
|
104
|
+
'socket': '{}:{}:{}'.format(
|
|
105
|
+
self.__rcv_proto,self.__rcv_ip,self.__rcv_port),
|
|
106
|
+
'expire': 0, # there is no "event_unsubscribe", this is good enough
|
|
107
|
+
}, silent=True)
|
|
108
|
+
|
|
109
|
+
def collect_events(self, events=None):
|
|
110
|
+
global thr_summary, thr_slowest
|
|
111
|
+
|
|
112
|
+
thr_summary = {}
|
|
113
|
+
thr_slowest = []
|
|
114
|
+
|
|
115
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
116
|
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
117
|
+
s.bind((self.__rcv_ip,self.__rcv_port))
|
|
118
|
+
s.settimeout(0.1)
|
|
119
|
+
s.listen()
|
|
120
|
+
|
|
121
|
+
while True:
|
|
122
|
+
self.mi_refresh_sub()
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
conn, addr = s.accept()
|
|
126
|
+
conn.settimeout(0.1)
|
|
127
|
+
break
|
|
128
|
+
except socket.timeout:
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
if threading.current_thread().stopped():
|
|
132
|
+
self.mi_unsub()
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
with conn:
|
|
136
|
+
self.collect_loop(conn, events)
|
|
137
|
+
|
|
138
|
+
def collect_loop(self, conn, events):
|
|
139
|
+
global thr_summary, thr_slowest
|
|
140
|
+
|
|
141
|
+
string = ""
|
|
142
|
+
while True:
|
|
143
|
+
self.mi_refresh_sub()
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
new = conn.recv(1024).decode('utf-8')
|
|
147
|
+
except socket.timeout:
|
|
148
|
+
new = ""
|
|
149
|
+
|
|
150
|
+
if threading.current_thread().stopped():
|
|
151
|
+
self.mi_unsub()
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
if not new:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
string += new
|
|
158
|
+
|
|
159
|
+
decoder = json.JSONDecoder()
|
|
160
|
+
idx = WHITESPACE.match(string, 0).end()
|
|
161
|
+
while idx < len(string):
|
|
162
|
+
try:
|
|
163
|
+
obj, end = decoder.raw_decode(string, idx)
|
|
164
|
+
except json.decoder.JSONDecodeError:
|
|
165
|
+
# partial JSON -- just let it accumulate
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
if 'params' not in obj:
|
|
169
|
+
string = string[end:]
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
params = obj['params']
|
|
173
|
+
|
|
174
|
+
# only process threshold events we're interested in
|
|
175
|
+
if events is None or \
|
|
176
|
+
any(params['source'].startswith(e) for e in events):
|
|
177
|
+
if 'extra' not in params:
|
|
178
|
+
params['extra'] = "<unknown>"
|
|
179
|
+
|
|
180
|
+
if not self.skip_summ:
|
|
181
|
+
try:
|
|
182
|
+
thr_summary[(params['extra'],
|
|
183
|
+
params['source'])] += 1
|
|
184
|
+
except:
|
|
185
|
+
thr_summary[(params['extra'],
|
|
186
|
+
params['source'])] = 1
|
|
187
|
+
|
|
188
|
+
bisect.insort(thr_slowest, (-params['time'],
|
|
189
|
+
params['extra'], params['source']))
|
|
190
|
+
thr_slowest = thr_slowest[:3]
|
|
191
|
+
|
|
192
|
+
string = string[end:]
|
|
193
|
+
idx = WHITESPACE.match(string, 0).end()
|
|
194
|
+
|
|
195
|
+
class diagnose(Module):
|
|
196
|
+
def __init__(self, *args, **kwargs):
|
|
197
|
+
super().__init__(*args, **kwargs)
|
|
198
|
+
self.t = None
|
|
199
|
+
self.__rcv_proto = 'tcp'
|
|
200
|
+
self.__rcv_ip = cfg.get("diagnose_listen_ip")
|
|
201
|
+
self.__rcv_port = int(cfg.get("diagnose_listen_port"))
|
|
202
|
+
|
|
203
|
+
def getOpenSIPSVersion(self):
|
|
204
|
+
ans = comm.execute('version')
|
|
205
|
+
if not ans:
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
ver = re.match(r'OpenSIPS \((?P<major>\d)\.(?P<minor>\d)\.\d.*', ans['Server'])
|
|
209
|
+
return ver.groupdict()
|
|
210
|
+
|
|
211
|
+
def startThresholdCollector(self, events, skip_summ=False):
|
|
212
|
+
version = self.getOpenSIPSVersion()
|
|
213
|
+
if not version:
|
|
214
|
+
logger.error("Can't detect OpenSIPS version")
|
|
215
|
+
return False
|
|
216
|
+
if int(version['major']) < 3:
|
|
217
|
+
logger.error("OpenSIPS-CLI works with OpenSIPS starting from version 3.0")
|
|
218
|
+
return False
|
|
219
|
+
moduleName = 'event_stream.so'
|
|
220
|
+
if int(version['minor']) == 0:
|
|
221
|
+
self.__rcv_proto = 'jsonrpc'
|
|
222
|
+
moduleName = 'event_jsonrpc.so'
|
|
223
|
+
# subscribe for, then collect "query threshold exceeded" events
|
|
224
|
+
self.t = ThresholdCollector(events=events, skip_summ=skip_summ,
|
|
225
|
+
rcv_proto=self.__rcv_proto,rcv_ip=self.__rcv_ip,rcv_port=self.__rcv_port,)
|
|
226
|
+
self.t.daemon = True
|
|
227
|
+
self.t.start()
|
|
228
|
+
for i in range(15):
|
|
229
|
+
if self.t.last_subscribe_ts != 0:
|
|
230
|
+
return True
|
|
231
|
+
time.sleep(0.05)
|
|
232
|
+
|
|
233
|
+
logger.error("Failed to subscribe for JSON-RPC events")
|
|
234
|
+
logger.error("Is the {} OpenSIPS module loaded?".format(moduleName))
|
|
235
|
+
self.stopThresholdCollector()
|
|
236
|
+
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
def stopThresholdCollector(self):
|
|
240
|
+
if self.t:
|
|
241
|
+
self.t.stop()
|
|
242
|
+
self.t.join()
|
|
243
|
+
self.t = None
|
|
244
|
+
|
|
245
|
+
def restartThresholdCollector(self, events, skip_summ=False):
|
|
246
|
+
self.stopThresholdCollector()
|
|
247
|
+
return self.startThresholdCollector(events, skip_summ)
|
|
248
|
+
|
|
249
|
+
def print_diag_footer(self):
|
|
250
|
+
print("\n{}(press Ctrl-c to exit)".format('\t' * 5))
|
|
251
|
+
|
|
252
|
+
def diagnose_dns(self):
|
|
253
|
+
# quickly ensure opensips is running
|
|
254
|
+
ans = comm.execute('get_statistics', {
|
|
255
|
+
'statistics': ['dns_total_queries', 'dns_slow_queries']
|
|
256
|
+
})
|
|
257
|
+
if ans is None:
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
stats = {
|
|
261
|
+
'ini_total': int(ans['dns:dns_total_queries']),
|
|
262
|
+
'ini_slow': int(ans['dns:dns_slow_queries']),
|
|
263
|
+
}
|
|
264
|
+
stats['total'] = stats['ini_total']
|
|
265
|
+
stats['slow'] = stats['ini_slow']
|
|
266
|
+
|
|
267
|
+
if not self.startThresholdCollector(DNS_THR_EVENTS):
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
sec = 0
|
|
271
|
+
try:
|
|
272
|
+
while True:
|
|
273
|
+
if not self.diagnose_dns_loop(sec, stats):
|
|
274
|
+
break
|
|
275
|
+
time.sleep(1)
|
|
276
|
+
sec += 1
|
|
277
|
+
except KeyboardInterrupt:
|
|
278
|
+
print('^C')
|
|
279
|
+
finally:
|
|
280
|
+
self.stopThresholdCollector()
|
|
281
|
+
|
|
282
|
+
def diagnose_dns_loop(self, sec, stats):
|
|
283
|
+
global thr_summary, thr_slowest
|
|
284
|
+
|
|
285
|
+
os.system("clear")
|
|
286
|
+
print("In the last {} seconds...".format(sec))
|
|
287
|
+
if not thr_summary:
|
|
288
|
+
print(" DNS Queries [OK]".format(sec))
|
|
289
|
+
else:
|
|
290
|
+
print(" DNS Queries [WARNING]".format(sec))
|
|
291
|
+
print(" * Slowest queries:")
|
|
292
|
+
for q in thr_slowest:
|
|
293
|
+
print(" {} ({} us)".format(q[1], -q[0]))
|
|
294
|
+
print(" * Constantly slow queries")
|
|
295
|
+
for q in sorted([(v, k) for k, v in thr_summary.items()], reverse=True)[:3]:
|
|
296
|
+
print(" {} ({} times exceeded threshold)".format(
|
|
297
|
+
q[1][0], q[0]))
|
|
298
|
+
|
|
299
|
+
ans = comm.execute('get_statistics', {
|
|
300
|
+
'statistics': ['dns_total_queries', 'dns_slow_queries']
|
|
301
|
+
})
|
|
302
|
+
if not ans:
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
# was opensips restarted in the meantime? if yes, resubscribe!
|
|
306
|
+
if int(ans['dns:dns_total_queries']) < stats['total']:
|
|
307
|
+
stats['ini_total'] = int(ans['dns:dns_total_queries'])
|
|
308
|
+
stats['ini_slow'] = int(ans['dns:dns_slow_queries'])
|
|
309
|
+
thr_summary = {}
|
|
310
|
+
thr_slowest = []
|
|
311
|
+
sec = 1
|
|
312
|
+
if not self.restartThresholdCollector(DNS_THR_EVENTS):
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
stats['total'] = int(ans['dns:dns_total_queries']) - stats['ini_total']
|
|
316
|
+
stats['slow'] = int(ans['dns:dns_slow_queries']) - stats['ini_slow']
|
|
317
|
+
|
|
318
|
+
print(" * {} / {} queries ({}%) exceeded threshold".format(
|
|
319
|
+
stats['slow'], stats['total'],
|
|
320
|
+
int((stats['slow'] / stats['total']) * 100) \
|
|
321
|
+
if stats['total'] > 0 else 0))
|
|
322
|
+
self.print_diag_footer()
|
|
323
|
+
|
|
324
|
+
return True
|
|
325
|
+
|
|
326
|
+
def diagnose_sql(self):
|
|
327
|
+
return self.diagnose_db(('sql', 'SQL'), SQL_THR_EVENTS)
|
|
328
|
+
|
|
329
|
+
def diagnose_nosql(self):
|
|
330
|
+
return self.diagnose_db(('cdb', 'NoSQL (CacheDB)'), NOSQL_THR_EVENTS)
|
|
331
|
+
|
|
332
|
+
def diagnose_db(self, dbtype, events):
|
|
333
|
+
# quickly ensure opensips is running
|
|
334
|
+
ans = comm.execute('get_statistics', {
|
|
335
|
+
'statistics': ['{}_total_queries'.format(dbtype[0]),
|
|
336
|
+
'{}_slow_queries'.format(dbtype[0])]
|
|
337
|
+
})
|
|
338
|
+
if ans is None:
|
|
339
|
+
return
|
|
340
|
+
|
|
341
|
+
stats = {
|
|
342
|
+
'ini_total': int(ans['{}:{}_total_queries'.format(dbtype[0], dbtype[0])]),
|
|
343
|
+
'ini_slow': int(ans['{}:{}_slow_queries'.format(dbtype[0], dbtype[0])]),
|
|
344
|
+
}
|
|
345
|
+
stats['total'] = stats['ini_total']
|
|
346
|
+
stats['slow'] = stats['ini_slow']
|
|
347
|
+
|
|
348
|
+
if not self.startThresholdCollector(events):
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
sec = 0
|
|
352
|
+
try:
|
|
353
|
+
while True:
|
|
354
|
+
if not self.diagnose_db_loop(sec, stats, dbtype, events):
|
|
355
|
+
break
|
|
356
|
+
time.sleep(1)
|
|
357
|
+
sec += 1
|
|
358
|
+
except KeyboardInterrupt:
|
|
359
|
+
print('^C')
|
|
360
|
+
finally:
|
|
361
|
+
self.stopThresholdCollector()
|
|
362
|
+
|
|
363
|
+
def diagnose_db_loop(self, sec, stats, dbtype, events):
|
|
364
|
+
global thr_summary, thr_slowest
|
|
365
|
+
|
|
366
|
+
total_stat = '{}_total_queries'.format(dbtype[0])
|
|
367
|
+
slow_stat = '{}_slow_queries'.format(dbtype[0])
|
|
368
|
+
|
|
369
|
+
os.system("clear")
|
|
370
|
+
print("In the last {} seconds...".format(sec))
|
|
371
|
+
if not thr_summary:
|
|
372
|
+
print(" {} Queries [OK]".format(dbtype[1]))
|
|
373
|
+
else:
|
|
374
|
+
print(" {} Queries [WARNING]".format(dbtype[1]))
|
|
375
|
+
print(" * Slowest queries:")
|
|
376
|
+
for q in thr_slowest:
|
|
377
|
+
print(" {}: {} ({} us)".format(q[2], q[1], -q[0]))
|
|
378
|
+
print(" * Constantly slow queries")
|
|
379
|
+
for q in sorted([(v, k) for k, v in thr_summary.items()], reverse=True)[:3]:
|
|
380
|
+
print(" {}: {} ({} times exceeded threshold)".format(
|
|
381
|
+
q[1][1], q[1][0], q[0]))
|
|
382
|
+
|
|
383
|
+
ans = comm.execute('get_statistics',
|
|
384
|
+
{'statistics': [total_stat, slow_stat]
|
|
385
|
+
})
|
|
386
|
+
if not ans:
|
|
387
|
+
return False
|
|
388
|
+
|
|
389
|
+
# was opensips restarted in the meantime? if yes, resubscribe!
|
|
390
|
+
if int(ans["{}:{}".format(dbtype[0], total_stat)]) < stats['total']:
|
|
391
|
+
stats['ini_total'] = int(ans["{}:{}".format(dbtype[0], total_stat)])
|
|
392
|
+
stats['ini_slow'] = int(ans["{}:{}".format(dbtype[0], slow_stat)])
|
|
393
|
+
thr_summary = {}
|
|
394
|
+
thr_slowest = []
|
|
395
|
+
sec = 1
|
|
396
|
+
if not self.restartThresholdCollector(events):
|
|
397
|
+
return
|
|
398
|
+
|
|
399
|
+
stats['total'] = int(ans["{}:{}".format(dbtype[0], total_stat)]) - \
|
|
400
|
+
stats['ini_total']
|
|
401
|
+
stats['slow'] = int(ans["{}:{}".format(dbtype[0], slow_stat)]) - \
|
|
402
|
+
stats['ini_slow']
|
|
403
|
+
|
|
404
|
+
print(" * {} / {} queries ({}%) exceeded threshold".format(
|
|
405
|
+
stats['slow'], stats['total'],
|
|
406
|
+
int((stats['slow'] / stats['total']) * 100) \
|
|
407
|
+
if stats['total'] > 0 else 0))
|
|
408
|
+
self.print_diag_footer()
|
|
409
|
+
|
|
410
|
+
return True
|
|
411
|
+
|
|
412
|
+
def diagnose_sip(self):
|
|
413
|
+
# quickly ensure opensips is running
|
|
414
|
+
ans = comm.execute('get_statistics', {
|
|
415
|
+
'statistics': ['rcv_requests', 'rcv_replies', 'slow_messages']
|
|
416
|
+
})
|
|
417
|
+
if ans is None:
|
|
418
|
+
return
|
|
419
|
+
|
|
420
|
+
stats = {
|
|
421
|
+
'ini_total': int(ans['core:rcv_requests']) + int(ans['core:rcv_replies']),
|
|
422
|
+
'ini_slow': int(ans['core:slow_messages']),
|
|
423
|
+
}
|
|
424
|
+
stats['total'] = stats['ini_total']
|
|
425
|
+
stats['slow'] = stats['ini_slow']
|
|
426
|
+
|
|
427
|
+
if not self.startThresholdCollector(SIP_THR_EVENTS, skip_summ=True):
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
sec = 0
|
|
431
|
+
try:
|
|
432
|
+
while True:
|
|
433
|
+
if not self.diagnose_sip_loop(sec, stats):
|
|
434
|
+
break
|
|
435
|
+
time.sleep(1)
|
|
436
|
+
sec += 1
|
|
437
|
+
except KeyboardInterrupt:
|
|
438
|
+
print('^C')
|
|
439
|
+
finally:
|
|
440
|
+
self.stopThresholdCollector()
|
|
441
|
+
|
|
442
|
+
def diagnose_sip_loop(self, sec, stats):
|
|
443
|
+
global thr_slowest
|
|
444
|
+
|
|
445
|
+
os.system("clear")
|
|
446
|
+
print("In the last {} seconds...".format(sec))
|
|
447
|
+
if not thr_slowest:
|
|
448
|
+
print(" SIP Processing [OK]")
|
|
449
|
+
else:
|
|
450
|
+
print(" SIP Processing [WARNING]")
|
|
451
|
+
print(" * Slowest SIP messages:")
|
|
452
|
+
for q in thr_slowest:
|
|
453
|
+
print(" {} ({} us)".format(desc_sip_msg(q[1]), -q[0]))
|
|
454
|
+
|
|
455
|
+
ans = comm.execute('get_statistics', {'statistics':
|
|
456
|
+
['rcv_requests', 'rcv_replies', 'slow_messages']})
|
|
457
|
+
if not ans:
|
|
458
|
+
return False
|
|
459
|
+
|
|
460
|
+
rcv_req = int(ans["core:rcv_requests"])
|
|
461
|
+
rcv_rpl = int(ans["core:rcv_replies"])
|
|
462
|
+
slow_msgs = int(ans["core:slow_messages"])
|
|
463
|
+
|
|
464
|
+
# was opensips restarted in the meantime? if yes, resubscribe!
|
|
465
|
+
if rcv_req + rcv_rpl < stats['total']:
|
|
466
|
+
stats['ini_total'] = rcv_req + rcv_rpl
|
|
467
|
+
stats['ini_slow'] = slow_msgs
|
|
468
|
+
thr_slowest = []
|
|
469
|
+
sec = 1
|
|
470
|
+
if not self.restartThresholdCollector(SIP_THR_EVENTS, skip_summ=True):
|
|
471
|
+
return
|
|
472
|
+
|
|
473
|
+
stats['total'] = rcv_req + rcv_rpl - stats['ini_total']
|
|
474
|
+
stats['slow'] = slow_msgs - stats['ini_slow']
|
|
475
|
+
|
|
476
|
+
print(" * {} / {} SIP messages ({}%) exceeded threshold".format(
|
|
477
|
+
stats['slow'], stats['total'],
|
|
478
|
+
int((stats['slow'] / stats['total']) * 100) \
|
|
479
|
+
if stats['total'] > 0 else 0))
|
|
480
|
+
self.print_diag_footer()
|
|
481
|
+
|
|
482
|
+
return True
|
|
483
|
+
|
|
484
|
+
def diagnose_mem(self):
|
|
485
|
+
try:
|
|
486
|
+
while True:
|
|
487
|
+
if not self.diagnose_mem_loop():
|
|
488
|
+
break
|
|
489
|
+
time.sleep(1)
|
|
490
|
+
except KeyboardInterrupt:
|
|
491
|
+
print('^C')
|
|
492
|
+
|
|
493
|
+
def diagnose_mem_loop(self):
|
|
494
|
+
os.system("clear")
|
|
495
|
+
ans = comm.execute('get_statistics', {
|
|
496
|
+
'statistics': ['shmem:', 'pkmem:']})
|
|
497
|
+
ps = comm.execute('ps')
|
|
498
|
+
if ans is None or ps is None:
|
|
499
|
+
return False
|
|
500
|
+
|
|
501
|
+
try:
|
|
502
|
+
self.diagnose_shm_stats(ans)
|
|
503
|
+
print()
|
|
504
|
+
self.diagnose_pkg_stats(ans, ps)
|
|
505
|
+
except:
|
|
506
|
+
return False
|
|
507
|
+
|
|
508
|
+
self.print_diag_footer()
|
|
509
|
+
return True
|
|
510
|
+
|
|
511
|
+
def diagnose_shm_stats(self, stats):
|
|
512
|
+
shm_total = int(stats['shmem:total_size'])
|
|
513
|
+
shm_used = int(stats['shmem:real_used_size'])
|
|
514
|
+
shm_max_used = int(stats['shmem:max_used_size'])
|
|
515
|
+
|
|
516
|
+
usage_perc = int(shm_used / shm_total * 100)
|
|
517
|
+
max_usage_perc = int(shm_max_used / shm_total * 100)
|
|
518
|
+
|
|
519
|
+
if usage_perc <= 70 and max_usage_perc <= 80:
|
|
520
|
+
shm_status = "OK"
|
|
521
|
+
elif usage_perc <= 85 and max_usage_perc <= 90:
|
|
522
|
+
shm_status = "WARNING"
|
|
523
|
+
else:
|
|
524
|
+
shm_status = "CRITICAL"
|
|
525
|
+
|
|
526
|
+
print("Shared Memory Status")
|
|
527
|
+
print("--------------------")
|
|
528
|
+
print(" Current Usage: {} / {} ({}%)".format(human_size(shm_used),
|
|
529
|
+
human_size(shm_total), usage_perc))
|
|
530
|
+
print(" Peak Usage: {} / {} ({}%)".format(human_size(shm_max_used),
|
|
531
|
+
human_size(shm_total), max_usage_perc))
|
|
532
|
+
print()
|
|
533
|
+
|
|
534
|
+
if shm_status == "OK":
|
|
535
|
+
print(" {}: no issues detected.".format(shm_status))
|
|
536
|
+
elif shm_status == "WARNING":
|
|
537
|
+
print(""" {}: {} shared memory usage > {}%, please
|
|
538
|
+
increase the "-m" command line parameter!""".format(shm_status,
|
|
539
|
+
"Current" if usage_perc > 70 else "Peak",
|
|
540
|
+
70 if usage_perc > 70 else 80))
|
|
541
|
+
else:
|
|
542
|
+
print(""" {}: {} shared memory usage > {}%, increase
|
|
543
|
+
the "-m" command line parameter as soon as possible!!""".format(
|
|
544
|
+
shm_status, "Current" if usage_perc > 85 else "Peak",
|
|
545
|
+
85 if usage_perc > 85 else 90))
|
|
546
|
+
|
|
547
|
+
def diagnose_pkg_stats(self, stats, ps):
|
|
548
|
+
print("Private Memory Status")
|
|
549
|
+
print("---------------------")
|
|
550
|
+
|
|
551
|
+
pk_total = None
|
|
552
|
+
for pno in range(1, len(ps['Processes'])):
|
|
553
|
+
try:
|
|
554
|
+
st_used = "pkmem:{}-real_used_size".format(pno)
|
|
555
|
+
st_free = "pkmem:{}-free_size".format(pno)
|
|
556
|
+
st_max_used = "pkmem:{}-max_used_size".format(pno)
|
|
557
|
+
except:
|
|
558
|
+
continue
|
|
559
|
+
|
|
560
|
+
if any(s not in stats for s in [st_used, st_free, st_max_used]):
|
|
561
|
+
continue
|
|
562
|
+
|
|
563
|
+
pk_total = int(stats[st_used]) + int(stats[st_free])
|
|
564
|
+
if pk_total == 0:
|
|
565
|
+
continue
|
|
566
|
+
break
|
|
567
|
+
|
|
568
|
+
if not pk_total:
|
|
569
|
+
return
|
|
570
|
+
|
|
571
|
+
print("Each process has {} of private (packaged) memory.\n".format(
|
|
572
|
+
human_size(pk_total)))
|
|
573
|
+
|
|
574
|
+
issues_found = False
|
|
575
|
+
|
|
576
|
+
for proc in ps['Processes']:
|
|
577
|
+
st_used = "pkmem:{}-real_used_size".format(proc['ID'])
|
|
578
|
+
st_free = "pkmem:{}-free_size".format(proc['ID'])
|
|
579
|
+
st_max_used = "pkmem:{}-max_used_size".format(proc['ID'])
|
|
580
|
+
if any(s not in stats for s in [st_used, st_free, st_max_used]):
|
|
581
|
+
continue
|
|
582
|
+
|
|
583
|
+
pk_used = int(stats[st_used])
|
|
584
|
+
pk_total = pk_used + int(stats[st_free])
|
|
585
|
+
pk_max_used = int(stats[st_max_used])
|
|
586
|
+
if pk_total == 0:
|
|
587
|
+
print(" Process {:>2}: no pkg memory stats found ({})".format(
|
|
588
|
+
proc['ID'], proc['Type']))
|
|
589
|
+
continue
|
|
590
|
+
|
|
591
|
+
usage_perc = int(pk_used / pk_total * 100)
|
|
592
|
+
max_usage_perc = int(pk_max_used / pk_total * 100)
|
|
593
|
+
|
|
594
|
+
if usage_perc <= 70 and max_usage_perc <= 80:
|
|
595
|
+
pk_status = "OK"
|
|
596
|
+
elif usage_perc <= 85 and max_usage_perc <= 90:
|
|
597
|
+
pk_status = "WARNING"
|
|
598
|
+
issues_found = True
|
|
599
|
+
else:
|
|
600
|
+
pk_status = "CRITICAL"
|
|
601
|
+
issues_found = True
|
|
602
|
+
|
|
603
|
+
print(" Process {:>2}: {:>2}% usage, {:>2}% peak usage ({})".format(
|
|
604
|
+
proc['ID'], usage_perc, max_usage_perc, proc['Type']))
|
|
605
|
+
|
|
606
|
+
if pk_status == "WARNING":
|
|
607
|
+
print(""" {}: {} private memory usage > {}%, please
|
|
608
|
+
increase the "-M" command line parameter!""".format(pk_status,
|
|
609
|
+
"Current" if usage_perc > 70 else "Peak",
|
|
610
|
+
70 if usage_perc > 70 else 80))
|
|
611
|
+
elif pk_status == "CRITICAL":
|
|
612
|
+
print(""" {}: {} private memory usage > {}%, increase
|
|
613
|
+
the "-M" command line parameter as soon as possible!!""".format(
|
|
614
|
+
pk_status, "Current" if usage_perc > 85 else "Peak",
|
|
615
|
+
85 if usage_perc > 85 else 90))
|
|
616
|
+
|
|
617
|
+
if not issues_found:
|
|
618
|
+
print("\n OK: no issues detected.")
|
|
619
|
+
|
|
620
|
+
def diagnose_load(self, transports):
|
|
621
|
+
"""first, we group processes by scope/interface!"""
|
|
622
|
+
pgroups = self.get_opensips_pgroups()
|
|
623
|
+
if pgroups is None:
|
|
624
|
+
return False
|
|
625
|
+
ppgroups = [pgroups]
|
|
626
|
+
|
|
627
|
+
try:
|
|
628
|
+
while True:
|
|
629
|
+
if not self.diagnose_load_loop(ppgroups, transports):
|
|
630
|
+
break
|
|
631
|
+
time.sleep(1)
|
|
632
|
+
except KeyboardInterrupt:
|
|
633
|
+
print('^C')
|
|
634
|
+
|
|
635
|
+
def diagnose_load_loop(self, ppgroups, transports):
|
|
636
|
+
pgroups = ppgroups[0]
|
|
637
|
+
os.system("clear")
|
|
638
|
+
|
|
639
|
+
print("{}OpenSIPS Processing Status".format(25 * " "))
|
|
640
|
+
print()
|
|
641
|
+
|
|
642
|
+
load = comm.execute('get_statistics', {
|
|
643
|
+
'statistics': ['load:', 'timestamp']})
|
|
644
|
+
if not load:
|
|
645
|
+
return False
|
|
646
|
+
|
|
647
|
+
# if opensips restarted in the meantime -> refresh the proc groups
|
|
648
|
+
if 'ts' in pgroups and int(load['core:timestamp']) < pgroups['ts']:
|
|
649
|
+
pgroups = self.get_opensips_pgroups()
|
|
650
|
+
pgroups['ts'] = int(load['core:timestamp'])
|
|
651
|
+
ppgroups[0] = pgroups
|
|
652
|
+
else:
|
|
653
|
+
pgroups['ts'] = int(load['core:timestamp'])
|
|
654
|
+
|
|
655
|
+
# fetch the network waiting queues
|
|
656
|
+
if 'udp' in transports and pgroups['udp']:
|
|
657
|
+
with open('/proc/net/udp') as f:
|
|
658
|
+
udp_wait = [line.split() for line in f.readlines()[1:]]
|
|
659
|
+
self.diagnose_transport_load('udp', pgroups, load, udp_wait)
|
|
660
|
+
|
|
661
|
+
if 'tcp' in transports and pgroups['tcp']:
|
|
662
|
+
self.diagnose_transport_load('tcp', pgroups, load, None)
|
|
663
|
+
|
|
664
|
+
if 'hep' in transports and pgroups['hep']:
|
|
665
|
+
with open('/proc/net/udp') as f:
|
|
666
|
+
udp_wait = [line.split() for line in f.readlines()[1:]]
|
|
667
|
+
self.diagnose_transport_load('hep', pgroups, load, udp_wait)
|
|
668
|
+
|
|
669
|
+
print()
|
|
670
|
+
print("Info: the load percentages represent the amount of time spent by an")
|
|
671
|
+
print(" OpenSIPS worker processing SIP messages, as opposed to waiting")
|
|
672
|
+
print(" for new ones. The three numbers represent the 'busy' percentage")
|
|
673
|
+
print(" over the last 1 sec, last 1 min and last 10 min, respectively.")
|
|
674
|
+
self.print_diag_footer()
|
|
675
|
+
|
|
676
|
+
return True
|
|
677
|
+
|
|
678
|
+
def diagnose_transport_load(self, transport, pgroups, load, net_wait):
|
|
679
|
+
for i, (iface, procs) in enumerate(pgroups[transport].items()):
|
|
680
|
+
# TODO: add SCTP support
|
|
681
|
+
if iface != 'TCP' and not iface.startswith('{}'.format(transport)):
|
|
682
|
+
continue
|
|
683
|
+
|
|
684
|
+
recvq = None
|
|
685
|
+
|
|
686
|
+
if iface == 'TCP':
|
|
687
|
+
print("TCP Processing")
|
|
688
|
+
else:
|
|
689
|
+
print("{} UDP Interface #{} ({})".format(
|
|
690
|
+
'HEP' if transport == 'hep' else 'SIP',
|
|
691
|
+
i + 1, iface))
|
|
692
|
+
if iface.startswith("hep_"):
|
|
693
|
+
iface = iface[4:]
|
|
694
|
+
|
|
695
|
+
try:
|
|
696
|
+
# 127.0.0.1:5060 -> 0100007F, 13C4
|
|
697
|
+
ip = "{:02X}{:02X}{:02X}{:02X}".format(*reversed(list(
|
|
698
|
+
map(int, iface[4:].split(':')[0].split('.')))))
|
|
699
|
+
port = hex(int(iface[4:].split(':')[1]))[2:].upper()
|
|
700
|
+
for line in net_wait:
|
|
701
|
+
if line[1] == "{}:{}".format(ip, port):
|
|
702
|
+
recvq = int("0x" + line[4].split(':')[1], 0)
|
|
703
|
+
break
|
|
704
|
+
except:
|
|
705
|
+
pass
|
|
706
|
+
|
|
707
|
+
print(" Receive Queue: {}".format(
|
|
708
|
+
"???" if recvq is None else human_size(recvq)))
|
|
709
|
+
|
|
710
|
+
tot_cpu = 0.0
|
|
711
|
+
tot_l1 = 0
|
|
712
|
+
tot_l2 = 0
|
|
713
|
+
tot_l3 = 0
|
|
714
|
+
proc_lines = []
|
|
715
|
+
for proc in procs:
|
|
716
|
+
try:
|
|
717
|
+
l1 = int(load['load:load-proc-{}'.format(proc['ID'])])
|
|
718
|
+
tot_l1 += l1
|
|
719
|
+
except:
|
|
720
|
+
l1 = "??"
|
|
721
|
+
|
|
722
|
+
try:
|
|
723
|
+
l2 = int(load['load:load1m-proc-{}'.format(proc['ID'])])
|
|
724
|
+
tot_l2 += l2
|
|
725
|
+
except:
|
|
726
|
+
l2 = "??"
|
|
727
|
+
|
|
728
|
+
try:
|
|
729
|
+
l3 = int(load['load:load10m-proc-{}'.format(proc['ID'])])
|
|
730
|
+
tot_l3 += l3
|
|
731
|
+
except:
|
|
732
|
+
l3 = "??"
|
|
733
|
+
|
|
734
|
+
proc_lines.append(
|
|
735
|
+
" Process {:>2} load: {:>2}%, {:>2}%, {:>2}% ({})".format(
|
|
736
|
+
proc['ID'], l1, l2, l3, proc['Type']))
|
|
737
|
+
|
|
738
|
+
if have_psutil:
|
|
739
|
+
try:
|
|
740
|
+
tot_cpu += proc['cpumon'].cpu_percent(interval=None)
|
|
741
|
+
except psutil.NoSuchProcess:
|
|
742
|
+
"""opensips may be restarted in the meantime!"""
|
|
743
|
+
|
|
744
|
+
avg_cpu = round(tot_cpu / len(procs))
|
|
745
|
+
print(" Avg. CPU usage: {}% (last 1 sec)".format(avg_cpu))
|
|
746
|
+
print()
|
|
747
|
+
|
|
748
|
+
for proc_line in proc_lines:
|
|
749
|
+
print(proc_line)
|
|
750
|
+
print()
|
|
751
|
+
|
|
752
|
+
if recvq:
|
|
753
|
+
print(" WARNING: the receive queue is NOT empty, SIP signaling may be slower!")
|
|
754
|
+
|
|
755
|
+
tot_l1 = round(tot_l1 / len(procs))
|
|
756
|
+
tot_l2 = round(tot_l2 / len(procs))
|
|
757
|
+
tot_l3 = round(tot_l3 / len(procs))
|
|
758
|
+
|
|
759
|
+
severity = "WARNING"
|
|
760
|
+
|
|
761
|
+
if tot_l1 > 50:
|
|
762
|
+
if tot_l1 > 80:
|
|
763
|
+
severity = "CRITICAL"
|
|
764
|
+
print(" {}: {}% avg. currently used worker capacity!!".format(
|
|
765
|
+
severity, tot_l1))
|
|
766
|
+
elif tot_l2 > 50:
|
|
767
|
+
if tot_l2 > 80:
|
|
768
|
+
severity = "CRITICAL"
|
|
769
|
+
print(" {}: {}% avg. used worker capacity over the last 1 minute!".format(
|
|
770
|
+
severity, tot_l2))
|
|
771
|
+
elif tot_l3 > 50:
|
|
772
|
+
if tot_l3 > 80:
|
|
773
|
+
severity = "CRITICAL"
|
|
774
|
+
print(" {}: {}% avg. used worker capacity over the last 10 minutes!".format(
|
|
775
|
+
severity, tot_l3))
|
|
776
|
+
else:
|
|
777
|
+
if not recvq:
|
|
778
|
+
print(" OK: no issues detected.")
|
|
779
|
+
print("-" * 70)
|
|
780
|
+
continue
|
|
781
|
+
|
|
782
|
+
if not have_psutil:
|
|
783
|
+
print("""\n Suggestion: see the DNS/SQL/NoSQL diagnosis for any slow query
|
|
784
|
+
reports, otherwise increase 'use_workers' or '{}_workers'!""".format(
|
|
785
|
+
"tcp" if transport == "tcp" else "udp"))
|
|
786
|
+
print("-" * 70)
|
|
787
|
+
continue
|
|
788
|
+
|
|
789
|
+
if avg_cpu > 25:
|
|
790
|
+
if avg_cpu > 50:
|
|
791
|
+
severity = "CRITICAL"
|
|
792
|
+
else:
|
|
793
|
+
severity = "WARNING"
|
|
794
|
+
print(" {}: CPU intensive workload detected!".format(severity))
|
|
795
|
+
print("""\n Suggestion: increase the 'use_workers' or '{}_workers'
|
|
796
|
+
OpenSIPS settings or add more servers!""".format(
|
|
797
|
+
"tcp" if transport == "tcp" else "udp"))
|
|
798
|
+
else:
|
|
799
|
+
print(" {}: I/O intensive (blocking) workload detected!".format(severity))
|
|
800
|
+
print("""\n Suggestion: see the DNS/SQL/NoSQL diagnosis for any slow query
|
|
801
|
+
reports, otherwise increase 'use_workers' or '{}_workers'!""".format(
|
|
802
|
+
"tcp" if transport == "tcp" else "udp"))
|
|
803
|
+
|
|
804
|
+
print("-" * 70)
|
|
805
|
+
|
|
806
|
+
def get_opensips_pgroups(self):
|
|
807
|
+
ps = comm.execute('ps')
|
|
808
|
+
if ps is None:
|
|
809
|
+
return None
|
|
810
|
+
|
|
811
|
+
pgroups = {
|
|
812
|
+
'udp': {},
|
|
813
|
+
'tcp': {},
|
|
814
|
+
'hep': {},
|
|
815
|
+
}
|
|
816
|
+
for proc in ps['Processes']:
|
|
817
|
+
if have_psutil:
|
|
818
|
+
proc['cpumon'] = psutil.Process(proc['PID'])
|
|
819
|
+
proc['cpumon'].cpu_percent(interval=None) # begin cyle count
|
|
820
|
+
|
|
821
|
+
if proc['Type'].startswith("TCP "):
|
|
822
|
+
""" OpenSIPS TCP is simplified, but normalize the format"""
|
|
823
|
+
try:
|
|
824
|
+
pgroups['tcp']['TCP'].append(proc)
|
|
825
|
+
except:
|
|
826
|
+
pgroups['tcp']['TCP'] = [proc]
|
|
827
|
+
elif "hep_" in proc['Type']:
|
|
828
|
+
if proc['Type'].startswith("SIP"):
|
|
829
|
+
proc['Type'] = "HEP" + proc['Type'][3:]
|
|
830
|
+
|
|
831
|
+
try:
|
|
832
|
+
pgroups['hep'][proc['Type'][13:]].append(proc)
|
|
833
|
+
except:
|
|
834
|
+
pgroups['hep'][proc['Type'][13:]] = [proc]
|
|
835
|
+
elif proc['Type'].startswith("SIP receiver "):
|
|
836
|
+
try:
|
|
837
|
+
pgroups['udp'][proc['Type'][13:]].append(proc)
|
|
838
|
+
except:
|
|
839
|
+
pgroups['udp'][proc['Type'][13:]] = [proc]
|
|
840
|
+
|
|
841
|
+
return pgroups
|
|
842
|
+
|
|
843
|
+
def diagnosis_summary(self):
|
|
844
|
+
try:
|
|
845
|
+
while True:
|
|
846
|
+
if not self.diagnosis_summary_loop():
|
|
847
|
+
break
|
|
848
|
+
time.sleep(1)
|
|
849
|
+
except KeyboardInterrupt:
|
|
850
|
+
print('^C')
|
|
851
|
+
|
|
852
|
+
def diagnosis_summary_loop(self):
|
|
853
|
+
stats = comm.execute('get_statistics', {
|
|
854
|
+
'statistics': [
|
|
855
|
+
'load', 'load1m', 'load10m', 'total_size', 'real_used_size',
|
|
856
|
+
'max_used_size', 'rcv_requests', 'rcv_replies', 'processes_number',
|
|
857
|
+
'slow_messages', 'pkmem:', 'dns:', 'sql:', 'cdb:'
|
|
858
|
+
]})
|
|
859
|
+
if not stats:
|
|
860
|
+
return False
|
|
861
|
+
|
|
862
|
+
os.system("clear")
|
|
863
|
+
print("{}OpenSIPS Overview".format(" " * 25))
|
|
864
|
+
print("{}-----------------".format(" " * 25))
|
|
865
|
+
|
|
866
|
+
if 'load:load' in stats:
|
|
867
|
+
l1 = int(stats['load:load'])
|
|
868
|
+
l2 = int(stats['load:load1m'])
|
|
869
|
+
l3 = int(stats['load:load10m'])
|
|
870
|
+
if l1 > 20 or l2 > 20 or l3 > 20:
|
|
871
|
+
if l1 > 40 or l2 > 40 or l3 > 40:
|
|
872
|
+
if l1 > 66 or l2 > 66 or l3 > 66:
|
|
873
|
+
severity = "CRITICAL"
|
|
874
|
+
else:
|
|
875
|
+
severity = "WARNING"
|
|
876
|
+
else:
|
|
877
|
+
severity = "NOTICE"
|
|
878
|
+
else:
|
|
879
|
+
severity = "OK"
|
|
880
|
+
|
|
881
|
+
print("Worker Capacity: {}{}".format(severity, "" if severity == "OK" else \
|
|
882
|
+
" (run 'diagnose load' for more info)"))
|
|
883
|
+
|
|
884
|
+
if 'shmem:total_size' in stats:
|
|
885
|
+
used = int(stats['shmem:real_used_size'])
|
|
886
|
+
max_used = int(stats['shmem:max_used_size'])
|
|
887
|
+
total = int(stats['shmem:total_size'])
|
|
888
|
+
|
|
889
|
+
used_perc = round(used / total * 100)
|
|
890
|
+
max_used_perc = round(max_used / total * 100)
|
|
891
|
+
if used_perc > 70 or max_used_perc > 80:
|
|
892
|
+
if used_perc > 85 or max_used_perc > 90:
|
|
893
|
+
severity = "CRITICAL"
|
|
894
|
+
else:
|
|
895
|
+
severity = "WARNING"
|
|
896
|
+
else:
|
|
897
|
+
severity = "OK"
|
|
898
|
+
|
|
899
|
+
print("{:<16} {}{}".format("Shared Memory:", severity,
|
|
900
|
+
"" if severity == "OK" else \
|
|
901
|
+
" (run 'diagnose memory' for more info)"))
|
|
902
|
+
|
|
903
|
+
if 'load:processes_number' in stats:
|
|
904
|
+
procs = int(stats['load:processes_number'])
|
|
905
|
+
|
|
906
|
+
severity = "OK"
|
|
907
|
+
|
|
908
|
+
for proc in range(1, procs):
|
|
909
|
+
try:
|
|
910
|
+
used = int(stats['pkmem:{}-real_used_size'.format(proc)])
|
|
911
|
+
total = used + int(stats['pkmem:{}-free_size'.format(proc)])
|
|
912
|
+
max_used = int(stats['pkmem:{}-max_used_size'.format(proc)])
|
|
913
|
+
except:
|
|
914
|
+
continue
|
|
915
|
+
|
|
916
|
+
if total == 0:
|
|
917
|
+
continue
|
|
918
|
+
|
|
919
|
+
used_perc = round(used / total * 100)
|
|
920
|
+
max_used_perc = round(max_used / total * 100)
|
|
921
|
+
|
|
922
|
+
if used_perc > 70 or max_used_perc > 80:
|
|
923
|
+
if used_perc > 85 or max_used_perc > 90:
|
|
924
|
+
severity = "CRITICAL"
|
|
925
|
+
break
|
|
926
|
+
else:
|
|
927
|
+
severity = "WARNING"
|
|
928
|
+
|
|
929
|
+
print("{:<16} {}{}".format("Private Memory:", severity,
|
|
930
|
+
"" if severity == "OK" else \
|
|
931
|
+
" (run 'diagnose memory' for more info)"))
|
|
932
|
+
|
|
933
|
+
if 'core:slow_messages' in stats:
|
|
934
|
+
slow = int(stats['core:slow_messages'])
|
|
935
|
+
total = int(stats['core:rcv_requests']) + int(stats['core:rcv_replies'])
|
|
936
|
+
|
|
937
|
+
try:
|
|
938
|
+
slow_perc = round(slow / total * 100)
|
|
939
|
+
except:
|
|
940
|
+
slow_perc = 0
|
|
941
|
+
|
|
942
|
+
if 0 <= slow_perc <= 1:
|
|
943
|
+
severity = "OK"
|
|
944
|
+
elif 2 <= slow_perc <= 5:
|
|
945
|
+
severity = "NOTICE"
|
|
946
|
+
elif 6 <= slow_perc <= 50:
|
|
947
|
+
severity = "WARNING"
|
|
948
|
+
else:
|
|
949
|
+
severity = "CRITICAL"
|
|
950
|
+
|
|
951
|
+
print("{:<16} {}{}".format("SIP Processing:", severity,
|
|
952
|
+
"" if severity == "OK" else \
|
|
953
|
+
" (run 'diagnose sip' for more info)"))
|
|
954
|
+
|
|
955
|
+
if 'dns:dns_slow_queries' in stats:
|
|
956
|
+
slow = int(stats['dns:dns_slow_queries'])
|
|
957
|
+
total = int(stats['dns:dns_total_queries'])
|
|
958
|
+
|
|
959
|
+
try:
|
|
960
|
+
slow_perc = round(slow / total * 100)
|
|
961
|
+
except:
|
|
962
|
+
slow_perc = 0
|
|
963
|
+
|
|
964
|
+
if 0 <= slow_perc <= 1:
|
|
965
|
+
severity = "OK"
|
|
966
|
+
elif 2 <= slow_perc <= 5:
|
|
967
|
+
severity = "NOTICE"
|
|
968
|
+
elif 6 <= slow_perc <= 50:
|
|
969
|
+
severity = "WARNING"
|
|
970
|
+
else:
|
|
971
|
+
severity = "CRITICAL"
|
|
972
|
+
|
|
973
|
+
print("{:<16} {}{}".format("DNS Queries:", severity,
|
|
974
|
+
"" if severity == "OK" else \
|
|
975
|
+
" (run 'diagnose dns' for more info)"))
|
|
976
|
+
|
|
977
|
+
if 'sql:sql_slow_queries' in stats:
|
|
978
|
+
slow = int(stats['sql:sql_slow_queries'])
|
|
979
|
+
total = int(stats['sql:sql_total_queries'])
|
|
980
|
+
|
|
981
|
+
try:
|
|
982
|
+
slow_perc = round(slow / total * 100)
|
|
983
|
+
except:
|
|
984
|
+
slow_perc = 0
|
|
985
|
+
|
|
986
|
+
if 0 <= slow_perc <= 1:
|
|
987
|
+
severity = "OK"
|
|
988
|
+
elif 2 <= slow_perc <= 5:
|
|
989
|
+
severity = "NOTICE"
|
|
990
|
+
elif 6 <= slow_perc <= 50:
|
|
991
|
+
severity = "WARNING"
|
|
992
|
+
else:
|
|
993
|
+
severity = "CRITICAL"
|
|
994
|
+
|
|
995
|
+
print("{:<16} {}{}".format("SQL queries:", severity,
|
|
996
|
+
"" if severity == "OK" else \
|
|
997
|
+
" (run 'diagnose sql' for more info)"))
|
|
998
|
+
|
|
999
|
+
if 'cdb:cdb_slow_queries' in stats:
|
|
1000
|
+
slow = int(stats['cdb:cdb_slow_queries'])
|
|
1001
|
+
total = int(stats['cdb:cdb_total_queries'])
|
|
1002
|
+
|
|
1003
|
+
try:
|
|
1004
|
+
slow_perc = round(slow / total * 100)
|
|
1005
|
+
except:
|
|
1006
|
+
slow_perc = 0
|
|
1007
|
+
|
|
1008
|
+
if 0 <= slow_perc <= 1:
|
|
1009
|
+
severity = "OK"
|
|
1010
|
+
elif 2 <= slow_perc <= 5:
|
|
1011
|
+
severity = "NOTICE"
|
|
1012
|
+
elif 6 <= slow_perc <= 50:
|
|
1013
|
+
severity = "WARNING"
|
|
1014
|
+
else:
|
|
1015
|
+
severity = "CRITICAL"
|
|
1016
|
+
|
|
1017
|
+
print("{:<16} {}{}".format("NoSQL Queries:", severity,
|
|
1018
|
+
"" if severity == "OK" else \
|
|
1019
|
+
" (run 'diagnose nosql' for more info)"))
|
|
1020
|
+
|
|
1021
|
+
self.print_diag_footer()
|
|
1022
|
+
return True
|
|
1023
|
+
|
|
1024
|
+
def __invoke__(self, cmd, params=None, modifiers=None):
|
|
1025
|
+
if cmd is None:
|
|
1026
|
+
return self.diagnosis_summary()
|
|
1027
|
+
if cmd == 'dns':
|
|
1028
|
+
return self.diagnose_dns()
|
|
1029
|
+
if cmd == 'sql':
|
|
1030
|
+
return self.diagnose_sql()
|
|
1031
|
+
if cmd == 'nosql':
|
|
1032
|
+
return self.diagnose_nosql()
|
|
1033
|
+
if cmd == 'sip':
|
|
1034
|
+
return self.diagnose_sip()
|
|
1035
|
+
if cmd == 'memory':
|
|
1036
|
+
return self.diagnose_mem()
|
|
1037
|
+
if cmd == 'load':
|
|
1038
|
+
if not params:
|
|
1039
|
+
params = ['udp', 'tcp', 'hep']
|
|
1040
|
+
return self.diagnose_load(params)
|
|
1041
|
+
|
|
1042
|
+
def __complete__(self, command, text, line, begidx, endidx):
|
|
1043
|
+
if command != 'load':
|
|
1044
|
+
return ['']
|
|
1045
|
+
|
|
1046
|
+
transports = ['udp', 'tcp', 'hep']
|
|
1047
|
+
if not text:
|
|
1048
|
+
return transports
|
|
1049
|
+
|
|
1050
|
+
ret = [t for t in transports if t.startswith(text)]
|
|
1051
|
+
return ret if ret else ['']
|
|
1052
|
+
|
|
1053
|
+
def __get_methods__(self):
|
|
1054
|
+
return ['', 'sip', 'dns', 'sql', 'nosql', 'memory', 'load', 'brief', 'full']
|
|
1055
|
+
|
|
1056
|
+
def __exclude__(self):
|
|
1057
|
+
valid = comm.valid()
|
|
1058
|
+
return (not valid[0], valid[1])
|
|
1059
|
+
|
|
1060
|
+
def desc_sip_msg(sip_msg):
|
|
1061
|
+
"""summarizes a SIP message into a useful one-liner"""
|
|
1062
|
+
try:
|
|
1063
|
+
if sip_msg.startswith("SIP/2.0"):
|
|
1064
|
+
# a SIP reply
|
|
1065
|
+
desc = sip_msg[7:sip_msg.find("\r\n")].strip()
|
|
1066
|
+
else:
|
|
1067
|
+
# a SIP request
|
|
1068
|
+
desc = sip_msg[:sip_msg.find("SIP/2.0\r\n")].strip()
|
|
1069
|
+
except:
|
|
1070
|
+
desc = ""
|
|
1071
|
+
|
|
1072
|
+
try:
|
|
1073
|
+
callid = "Call-ID: {}".format(re.search('Call-ID:(.*)\r\n',
|
|
1074
|
+
sip_msg, re.IGNORECASE).group(1).strip())
|
|
1075
|
+
except:
|
|
1076
|
+
callid = ""
|
|
1077
|
+
|
|
1078
|
+
if not desc and not callid:
|
|
1079
|
+
if not sip_msg or not isinstance(sip_msg, str):
|
|
1080
|
+
desc = "??? (unknown)"
|
|
1081
|
+
else:
|
|
1082
|
+
desc = sip_msg[:20]
|
|
1083
|
+
|
|
1084
|
+
return "{}{}{}".format(desc, ", " if desc and callid else "", callid)
|
|
1085
|
+
|
|
1086
|
+
def human_size(bytes, units=[' bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']):
|
|
1087
|
+
""" Returns a human readable string reprentation of bytes"""
|
|
1088
|
+
return "{:.1f}".format(bytes) + units[0] \
|
|
1089
|
+
if bytes < 1024 else human_size(bytes / 1024, units[1:])
|