dbca-utils 2.2.0__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbca-utils
3
- Version: 2.2.0
3
+ Version: 3.0.0
4
4
  Summary: Utilities for DBCA Django apps
5
5
  Author-Email: Rocky Chen <rocky.chen@dbca.wa.gov.au>, Ashley Felton <ashley.felton@dbca.wa.gov.au>
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dbca-utils"
3
- version = "2.2.0"
3
+ version = "3.0.0"
4
4
  description = "Utilities for DBCA Django apps"
5
5
  authors = [
6
6
  { name = "Rocky Chen", email = "rocky.chen@dbca.wa.gov.au" },
@@ -0,0 +1,14 @@
1
+ import atexit
2
+
3
+ from django.apps import AppConfig
4
+ from .healthcheck import healthcheck
5
+
6
+
7
+ class DbcaUtilsConfig(AppConfig):
8
+ name = 'dbca_utils'
9
+
10
+ def ready(self):
11
+ if healthcheck.HEALTHCHECK_ENABLED:
12
+ healthcheck.register_healtcheckurls()
13
+
14
+
@@ -0,0 +1,620 @@
1
+ import os
2
+ import importlib
3
+ import logging
4
+ import subprocess
5
+ import random
6
+ import re
7
+ import time
8
+ import socket
9
+ import requests
10
+ from datetime import datetime
11
+
12
+ from django.urls import reverse,path,include
13
+ from django.conf import settings
14
+ from django.http import HttpResponseForbidden, JsonResponse,HttpResponseServerError
15
+ from django.core.signals import request_started
16
+ from django.core.cache import cache
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ #WORKLOADS means the number of WORKLOADS should be started.
22
+ #If WORKLOADS is dynamic, please don't set it.
23
+ HEALTHCHECK_ENABLED = os.environ.get("HEALTHCHECK_ENABLED","true").lower() == "true"
24
+ if not HEALTHCHECK_ENABLED:
25
+ HEALTHCHECK_ENABLED = True if cache else None
26
+
27
+ PROCESS_FILTER = os.environ.get("WORKLOAD_PROCESS_FILTER","| grep python")
28
+ CACHE_PREFIX = os.environ.get("CACHE_PREFIX","")
29
+ PORT = int(os.environ.get("WORKLOAD_PORT",8080))
30
+ WORKLOADS = int(os.environ.get("WORKLOADS",0))
31
+ WORKLOAD_DEPLOYMENT = os.environ.get("WORKLOAD_DEPLOYMENT","true").lower() == "true"
32
+ if WORKLOADS < 0 :
33
+ WORKLOADS = 0
34
+ WORKLOAD_FAILED_THRESHOLD = int(os.environ.get("WORKLOAD_FAILED_THRESHOLD",2))
35
+
36
+
37
+ RANDOM_CHARS="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYA0123456789~!@#$%^&*()-_+=`{}[];':\",./<>?"
38
+ RANDOM_CHARS_MAX_INDEX = len(RANDOM_CHARS) - 1
39
+
40
+ def generate_secret():
41
+ return "".join(RANDOM_CHARS[random.randint(0,RANDOM_CHARS_MAX_INDEX)] for i in range(0,32))
42
+
43
+ secret = None
44
+
45
+ def get_workloadname(index):
46
+ return "workload{}".format(index)
47
+
48
+ def get_local_ip():
49
+ # Create a UDP socket
50
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
51
+ try:
52
+ # Connect to a dummy external IP (doesn't have to be reachable)
53
+ s.connect(('192.168.1.1', 1))
54
+ ip = s.getsockname()[0]
55
+ except Exception:
56
+ # Fallback to localhost if network is down
57
+ ip = '127.0.0.1'
58
+ finally:
59
+ s.close()
60
+ return ip
61
+
62
+ hostname = socket.gethostname()
63
+ if WORKLOAD_DEPLOYMENT:
64
+ registerhostname = hostname
65
+ else:
66
+ statefulset_hostname_re = re.compile("-(?P<index>\\d+)$")
67
+ registerhostname = get_workloadname(statefulset_hostname_re.search(hostname).group("index"))
68
+
69
+ ip = get_local_ip()
70
+
71
+ webapp_process_registerfolder = "/tmp/__webapp__/proc"
72
+
73
+ def get_processregisterfile(pid):
74
+ return os.path.join(webapp_process_registerfolder,str(pid))
75
+
76
+
77
+ def register_webappprocess():
78
+ """
79
+ Register all webapp related processes
80
+ Healthcheck will use the processes to calculate the resources used by webapp
81
+ """
82
+ pid = os.getpid()
83
+ logger.debug("Register the webapp process '{}({}).{}'.".format(hostname,ip,pid))
84
+ try:
85
+ if not os.path.exists(webapp_process_registerfolder):
86
+ os.makedirs(webapp_process_registerfolder)
87
+
88
+ registerfile = get_processregisterfile(pid)
89
+ #register the webapp process first
90
+ with open(registerfile,"wt") as f:
91
+ f.write(datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f"))
92
+ except Exception as ex:
93
+ logger.error("Failed to register the webapp process '{}({}).{}'.".format(hostname,ip,pid))
94
+
95
+ def unregister_webappprocess():
96
+ pid = os.getpid()
97
+ logger.debug("Unregister the webapp process '{}({}).{}'.".format(hostname,ip,pid))
98
+ try:
99
+ registerfile = get_processregisterfile(pid)
100
+ #register the webapp process first
101
+ os.remove(registerfile)
102
+ except Exception as ex:
103
+ if os.path.exists(registerfile):
104
+ logger.error("Failed to unregister the webapp process '{}({}).{}'.".format(hostname,ip,pid))
105
+
106
+
107
+ item_version = "__version__"
108
+ key_workloads = "{}__workloads__".format(CACHE_PREFIX)
109
+ key_workloads_lock = "{}lock__".format(key_workloads)
110
+
111
+ def register_webappserver(sender,environ,**kwargs):
112
+ """
113
+ Register a web server running in the same workload
114
+ 1. Write a server register file in workload's local file system
115
+ 2. Register the workload to a cache shared by all workloads
116
+ """
117
+ pid = os.getpid()
118
+ global secret
119
+ logger.debug("Register the webapp server '{}({}).{}'.".format(hostname,ip,pid))
120
+ try:
121
+ workloads_changed = False
122
+ workloads = cache.get(key_workloads) or {item_version:0}
123
+ if registerhostname not in workloads:
124
+ #not registered by other webservers running in the same workload
125
+ secret = generate_secret()
126
+ workloads[registerhostname] = [[ip,PORT],secret,0]
127
+ workloads_changed = True
128
+ else:
129
+ #already registered by other webservers, check whether the data is correct
130
+ data = workloads[registerhostname]
131
+ if not isinstance(data[0],list):
132
+ data[0] = [ip,PORT]
133
+ workloads_changed = True
134
+ if data[0][0] != ip:
135
+ data[0][0] = ip
136
+ workloads_changed = True
137
+ if data[0][1] != PORT:
138
+ data[0][1] = PORT
139
+ workloads_changed = True
140
+ if data[2] != 0:
141
+ data[2] = 0
142
+ workloads_changed = True
143
+ if workloads_changed:
144
+ #workload data is changed.
145
+ secret = generate_secret()
146
+ data[1] = secret
147
+ else:
148
+ #workload data is not changed.
149
+ secret = data[1]
150
+
151
+ if workloads_changed:
152
+ #save thw workloads data to cache
153
+ save_workloads(workloads)
154
+
155
+ except Exception as ex:
156
+ logger.error("Failed to register the webapp webserver '{}({}).{}'. {}: {}".format(hostname,ip,pid,ex.__class__.__name__,str(ex)))
157
+ #Failed to register workload, remove the server register file
158
+ try:
159
+ os.remove(registerfile)
160
+ except Excepton as ex:
161
+ if os.path.exists(registerfile):
162
+ logger.error("Failed to remove webapp webserver register file '{}'.{}: {}".format(registerfile,ex.__class__.__name__,str(ex)))
163
+
164
+ #ignore the exception
165
+ return
166
+
167
+ #register successfully, no need to register again.
168
+ #disconnect the receiver, no need to register again.
169
+ request_started.disconnect(dispatch_uid="register_webappserver")
170
+ logger.debug("Successfully register the webserver({}<{}>:{}.{}) to the cache.".format(hostname,ip,PORT,pid))
171
+
172
+
173
+ #register the signal receiver to register the workload
174
+ #the signal receiver will be disconnected after successful registration
175
+ if HEALTHCHECK_ENABLED:
176
+ #healthcheck is not initied
177
+ request_started.connect(register_webappserver,dispatch_uid="register_webappserver")
178
+
179
+ GET_RESOURCEUSAGE_CMD = "ps ax -o %cpu=,vsz=,rss=,cmd= {}".format(PROCESS_FILTER).strip()
180
+ GET_RESOURCEUSAGE_PIPECMDS = [c.strip() for c in GET_RESOURCEUSAGE_CMD.split("|")]
181
+
182
+ def get_workload_healthcheckdata():
183
+ #find all running web app processes
184
+ #find the resource usage for all processes
185
+ result = subprocess.run(GET_RESOURCEUSAGE_CMD,shell=True,capture_output=True,text=True)
186
+ if result.returncode != 0:
187
+ return (500,"Failed to get the resource usage data for webapp processes.{}".format(result.stderr))
188
+
189
+ processesdata = []
190
+ for line in result.stdout.split("\n"):
191
+ line = line.strip()
192
+ if not line:
193
+ continue
194
+ if any(c in line for c in GET_RESOURCEUSAGE_PIPECMDS):
195
+ continue
196
+ data = line.split(maxsplit=3)
197
+ data[0] = float(data[0])
198
+ data[1] = float(data[1]) / 1024
199
+ data[2] = float(data[2]) / 1024
200
+ del data[3]
201
+ processesdata.append(data)
202
+
203
+ #populate the resource data
204
+ result = {
205
+ "total_cpu":0,
206
+ "total_vmemory":0,
207
+ "total_pmemory":0,
208
+ "processes":0,
209
+ "min_cpu":None,
210
+ "max_cpu":None,
211
+ "min_vmemory":None,
212
+ "max_vmemory":None,
213
+ "min_pmemory":None,
214
+ "max_pmemory":None
215
+ }
216
+ for data in processesdata:
217
+ result["total_cpu"] += data[0]
218
+ result["total_vmemory"] += data[1]
219
+ result["total_pmemory"] += data[2]
220
+ result["processes"] += 1
221
+
222
+ if result["min_cpu"] is None or result["min_cpu"] > data[0]:
223
+ result["min_cpu"] = data[0]
224
+ if result["max_cpu"] is None or result["max_cpu"] < data[0]:
225
+ result["max_cpu"] = data[0]
226
+
227
+ if result["min_vmemory"] is None or result["min_vmemory"] > data[1]:
228
+ result["min_vmemory"] = data[1]
229
+ if result["max_vmemory"] is None or result["max_vmemory"] < data[1]:
230
+ result["max_vmemory"] = data[1]
231
+
232
+ if result["min_pmemory"] is None or result["min_pmemory"] > data[2]:
233
+ result["min_pmemory"] = data[2]
234
+ if result["max_pmemory"] is None or result["max_pmemory"] < data[2]:
235
+ result["max_pmemory"] = data[2]
236
+
237
+ return (200,result)
238
+
239
+ bearer_token_re = re.compile("^Bearer\\s+(?P<token>\\S+)\\s*$")
240
+ def get_auth_bearer(request):
241
+ """
242
+ Check the bearer authentication
243
+ Return True if authenticated; otherwiser return False
244
+ """
245
+ bearer_auth = request.META.get('HTTP_AUTHORIZATION').strip() if 'HTTP_AUTHORIZATION' in request.META else ''
246
+ m = bearer_token_re.search(bearer_auth)
247
+ token = None
248
+ if m:
249
+ token = m.group('token')
250
+ return token
251
+
252
+ key_assignedworkloads = "{}__assignedworkloads__".format(CACHE_PREFIX)
253
+ key_assignedworkloads_lock = "{}lock__".format(key_assignedworkloads)
254
+
255
+ def str_workloads(workloads):
256
+ return ",".join(["{}={}:{}({})".format(host,data[0][0],data[0][1],data[2]) if host != item_version else "{}={}".format(host,data) for host,data in workloads.items()])
257
+
258
+
259
+ def save_workloads(workloads,unreached_servers=None):
260
+ """
261
+ Save the updated workloads to cache
262
+ """
263
+ #save the workloads
264
+ logger.debug("Begin to save the changed workloads data({}) to cache.".format(str_workloads(workloads)))
265
+ while True:
266
+ if cache.add(key_workloads_lock, 1, timeout=1):
267
+ #get the lock
268
+ try:
269
+ cur_workloads = cache.get(key_workloads)
270
+ if cur_workloads and cur_workloads.get(item_version,0) != workloads[item_version]:
271
+ #workloads data was changed after fetching the workloads data
272
+ #add the new added workloads data
273
+ for k,v in cur_workloads.items():
274
+ if k == item_version:
275
+ continue
276
+ if k not in workloads and (not unreached_servers or k not in unreached_servers):
277
+ workloads[k] = v
278
+ if cur_workloads.get(item_version,0) == 0:
279
+ workloads[item_version] += 1
280
+ else:
281
+ workloads[item_version] = cur_workloads[item_version] + 1
282
+ else:
283
+ #workloads data is not changed.
284
+ workloads[item_version] += 1
285
+
286
+ #save the new workloads data
287
+ cache.set(key_workloads,workloads)
288
+ logger.debug("Successfully save the workloads:{}".format(str_workloads(workloads)))
289
+ return
290
+ finally:
291
+ #release the lock
292
+ cache.delete(key_workloads_lock)
293
+ else:
294
+ #already locked.,wait 100 milliseconds, and try again
295
+ time.sleep(0.01)
296
+ continue
297
+
298
+ def save_assignedworkloads(assignedworkloads):
299
+ """
300
+ Save the updated assigned workloads to cache
301
+ """
302
+ #save the workloads
303
+ logger.debug("Begin to save the changed assigned workloads data({}) to cache.".format(assignedworkloads))
304
+ while True:
305
+ if cache.add(key_assignedworkloads_lock, 1, timeout=1):
306
+ #get the lock
307
+ try:
308
+ cur_assignedworkloads = cache.get(key_assignedworkloads)
309
+ if cur_assignedworkloads and cur_assignedworkloads.get(item_version,0) != assignedworkloads[item_version]:
310
+ #sync the latest cache data
311
+ for k,v in cur_assignedworkloads.items():
312
+ if k == item_version:
313
+ continue
314
+ if k not in assignedworkloads:
315
+ assignedworkloads[k] = v
316
+ elif v != assignedworkloads[k]:
317
+ assignedworkloads[k] = v
318
+
319
+ if cur_assignedworkloads.get(item_version,0) == 0:
320
+ assignedworkloads[item_version] += 1
321
+ else:
322
+ assignedworkloads[item_version] = cur_assignedworkloads[item_version] + 1
323
+ else:
324
+ #workloads data is not changed.
325
+ assignedworkloads[item_version] += 1
326
+
327
+ #save the new workloads data
328
+ cache.set(key_assignedworkloads,assignedworkloads)
329
+ logger.debug("Successfully save the assigned workloads:{}".format(assignedworkloads))
330
+ return
331
+ finally:
332
+ #release the lock
333
+ cache.delete(key_assignedworkloads_lock)
334
+ else:
335
+ #already locked.,wait 100 milliseconds, and try again
336
+ time.sleep(0.01)
337
+ continue
338
+
339
+ def populate_summary_data(datas):
340
+ """
341
+ Populate the resource summary data from workloads' resource usage data
342
+ """
343
+ summary = {
344
+ "total_cpu":0,
345
+ "total_vmemory":0,
346
+ "total_pmemory":0,
347
+ "total_processes":0,
348
+ "running_workloads":0,
349
+ "failed_workloads":0,
350
+ "min_process_cpu":None,
351
+ "max_process_cpu":None,
352
+ "min_process_vmemory":None,
353
+ "max_process_vmemory":None,
354
+ "min_process_pmemory":None,
355
+ "max_process_pmemory":None
356
+ }
357
+ for servername,serverdata in datas.items():
358
+ if isinstance(serverdata,str):
359
+ summary["failed_workloads"] += 1
360
+ continue
361
+ summary["running_workloads"] += 1
362
+ summary["total_cpu"] += serverdata["total_cpu"]
363
+ summary["total_vmemory"] += serverdata["total_vmemory"]
364
+ summary["total_pmemory"] += serverdata["total_pmemory"]
365
+ summary["total_processes"] += serverdata["processes"]
366
+
367
+ if summary["min_process_cpu"] is None or summary["min_process_cpu"] > serverdata["min_cpu"]:
368
+ summary["min_process_cpu"] = serverdata["min_cpu"]
369
+ if summary["max_process_cpu"] is None or summary["max_process_cpu"] < serverdata["max_cpu"]:
370
+ summary["max_process_cpu"] = serverdata["max_cpu"]
371
+
372
+ if summary["min_process_vmemory"] is None or summary["min_process_vmemory"] > serverdata["min_vmemory"]:
373
+ summary["min_process_vmemory"] = serverdata["min_vmemory"]
374
+ if summary["max_process_vmemory"] is None or summary["max_process_vmemory"] < serverdata["max_vmemory"]:
375
+ summary["max_process_vmemory"] = serverdata["max_vmemory"]
376
+
377
+ if summary["min_process_pmemory"] is None or summary["min_process_pmemory"] > serverdata["min_pmemory"]:
378
+ summary["min_process_pmemory"] = serverdata["min_pmemory"]
379
+ if summary["max_process_pmemory"] is None or summary["max_process_pmemory"] < serverdata["max_pmemory"]:
380
+ summary["max_process_pmemory"] = serverdata["max_pmemory"]
381
+
382
+ datas["summary"] = summary
383
+
384
+ workload_healthcheck_url = None
385
+ headers={"Authorization":None,"Accept": "application/json"}
386
+
387
+ def harvest_healthdata(request):
388
+ global secret
389
+
390
+ global workload_healthcheck_url
391
+ if not workload_healthcheck_url:
392
+ workload_healthcheck_url = reverse('healthcheck:workload_healthdata')
393
+
394
+ workloads = cache.get(key_workloads) or {item_version:0}
395
+ workloads_changed = False
396
+ logger.debug("Get the workloads from cache :{}".format(str_workloads(workloads)))
397
+
398
+ if registerhostname not in workloads:
399
+ secret = generate_secret()
400
+ workloads[registerhostname] = [[ip,PORT],secret,0]
401
+ workloads_changed = True
402
+
403
+ servers_res = {}
404
+ unreached_servers = []
405
+ #havest health data from all workloads
406
+ for servername, serverdata in workloads.items():
407
+ if servername == item_version:
408
+ continue
409
+ if servername == registerhostname:
410
+ servers_res[servername] = get_workload_healthcheckdata()
411
+ continue
412
+
413
+ serverip,port = serverdata[0]
414
+ headers["Authorization"] = "Bearer {}".format(serverdata[1])
415
+ headers["host"] = request.get_host()
416
+ url = "http://{}:{}{}".format(serverip,port,workload_healthcheck_url)
417
+ try:
418
+ res = requests.get(url,headers=headers)
419
+ except Exception as ex:
420
+ #the server is offline, don't add the data to servers_res
421
+ workloads_changed = True
422
+ serverdata[2] += 1
423
+ if serverdata[2] >= WORKLOAD_FAILED_THRESHOLD:
424
+ #continuous failed times is greater than WORKLOAD_FAILED_THRESHOLD.
425
+ unreached_servers.append(servername)
426
+ servers_res[servername] = (-1,"{1}:{2},url={0}".format(url,ex.__class__.__name__,str(ex)))
427
+ continue
428
+ if res.status_code in (502,503,504):
429
+ #the server is offline, don't add the data to servers_res
430
+ workloads_changed = True
431
+ serverdata[2] += 1
432
+ if serverdata[2] >= WORKLOAD_FAILED_THRESHOLD:
433
+ #continuous failed times is greater than WORKLOAD_FAILED_THRESHOLD.
434
+ unreached_servers.append(servername)
435
+ servers_res[servername] = (res.status_code,"{1}:{2},url={0}".format(url,res.status_code,res.text))
436
+ elif res.status_code == 200:
437
+ #the server is in good health, add the health data to servers_res
438
+ servers_res[servername] = (200,res.json())
439
+ if serverdata[2] > 0:
440
+ serverdata[2] -= 1
441
+ workloads_changed = True
442
+ else:
443
+ #the server is online, but running into error, add the error message to servers_res
444
+ servers_res[servername] = (res.status_code,"{1}: {2}. url={0}".format(res.status_code,res.text,url))
445
+ if serverdata[2] > 0:
446
+ serverdata[2] -= 1
447
+ workloads_changed = True
448
+
449
+ for servername in unreached_servers:
450
+ del workloads[servername]
451
+
452
+ logger.debug("healthdata harvest result :{}".format(servers_res))
453
+
454
+ if workloads_changed:
455
+ save_workloads(workloads,unreached_servers)
456
+
457
+ return (workloads,servers_res)
458
+
459
+ OFFLINE_STATUSCODE_LIST = (502,503,504,-1,-2)
460
+ if WORKLOADS > 0 and WORKLOAD_DEPLOYMENT:
461
+ #has a fixed number of workloads and it is a deployment
462
+ WORKLOADNAMES = [get_workloadname(index) for index in range(WORKLOADS)]
463
+ def healthdata_view(request):
464
+ #process the workloads which are alreasy assigned a workload name
465
+ workloads,servers_res = harvest_healthdata(request)
466
+ assignedworkloads = cache.get(key_assignedworkloads) or {item_version:1}
467
+ logger.debug("Get assigned workloads:{}".format(assignedworkloads))
468
+ datas = {}
469
+ index = 0
470
+ reassigned_workloads = 0
471
+ for workloadname in WORKLOADNAMES:
472
+ servername = assignedworkloads.get(workloadname)
473
+ if not servername:
474
+ #workloadname is not assined to a server
475
+ reassigned_workloads += 1
476
+ continue
477
+
478
+ #workload name is assigned to a server
479
+ if servername not in servers_res :
480
+ #the server is not available
481
+ reassigned_workloads += 1
482
+ continue
483
+
484
+ datas[servername] = servers_res[servername]
485
+ if servers_res[servername][0] in OFFLINE_STATUSCODE_LIST:
486
+ #Related workload is offline, need to reassign another workload
487
+ reassigned_workloads += 1
488
+ del servers_res[servername]
489
+
490
+ assignedworkloads_changed = False
491
+ if reassigned_workloads > 0:
492
+ #Some workloads are not assigned a workload name or are not available
493
+ #Using the following to replace the exisint one with new one if possible
494
+ #Step 1: Replace the unavailable server with a new one
495
+ #Step 2: Assign the new server to the missing assignedworkloads(missed in the assignedworkloads before)
496
+ step = 0
497
+ while reassigned_workloads > 0:
498
+ step += 1
499
+ for workloadname in WORKLOADNAMES:
500
+ servername = assignedworkloads.get(workloadname)
501
+ if servername in datas and datas[servername][0] not in OFFLINE_STATUSCODE_LIST:
502
+ #related server is online.no need to reassign
503
+ continue
504
+ elif step == 1:
505
+ #step 1 only reassign the assigned workloads
506
+ if workloadname not in assignedworkloads:
507
+ continue
508
+ replacedservername = None
509
+ for name,res in servers_res.items():
510
+ if res[0] == 200:
511
+ #found a good one, choose it
512
+ replacedservername = name
513
+ break
514
+ elif res[0] in OFFLINE_STATUSCODE_LIST:
515
+ continue
516
+ elif not replacedservername:
517
+ #fond a available one, but has some issues,choose it if can't find a good one
518
+ replacedservername = name
519
+
520
+ logger.debug("Replaced {1} with {2} for workload({0})".format(workloadname,servername,replacedservername))
521
+ if replacedservername:
522
+ datas[replacedservername] = servers_res[replacedservername]
523
+ del servers_res[replacedservername]
524
+ assignedworkloads[workloadname] = replacedservername
525
+ assignedworkloads_changed = True
526
+
527
+ if servers_res:
528
+ reassigned_workloads -= 1
529
+ else:
530
+ reassigned_workloads = 0
531
+ if reassigned_workloads == 0:
532
+ break
533
+
534
+ if assignedworkloads_changed:
535
+ #save the workloads
536
+ logger.debug("Save the changed running workloads data({}).".format(assignedworkloads))
537
+ save_assignedworkloads(assignedworkloads)
538
+
539
+ #map the healthdata result to workload. and remove status code
540
+ result = {}
541
+ for workloadname in WORKLOADNAMES:
542
+ servername = assignedworkloads.get(workloadname)
543
+ if not servername:
544
+ result[workloadname] = "Can't find an available host for this non-assigned host.registered workloads: {0}, assigned workloads:{1}".format(str_workloads(workloads),assignedworkloads)
545
+ elif servername not in datas:
546
+ result[workloadname] = "Can't find an available host for this assigned offline host({2}).registered workloads: {0}, assigned workloads:{1}".format(str_workloads(workloads),assignedworkloads,servername)
547
+ else:
548
+ result[workloadname] = datas[servername][1]
549
+ result[workloadname]["hostname"] = servername
550
+
551
+ datas.clear()
552
+
553
+ populate_summary_data(result)
554
+
555
+ return JsonResponse(result)
556
+
557
+ elif WORKLOADS > 0 and not WORKLOAD_DEPLOYMENT:
558
+ WORKLOADNAMES = [get_workloadname(index) for index in range(1,WORKLOADS + 1,1)]
559
+ def healthdata_view(request):
560
+ workloads,servers_res = harvest_healthdata(request)
561
+
562
+ result = {}
563
+ for servername in WORKLOADNAMES:
564
+ if result in servers_res:
565
+ result[servername] = servers_res[servername][1]
566
+ else:
567
+ result[servername] = "Workload is offline.workloads={}".format(str_workloads(workloads))
568
+
569
+ populate_summary_data(result)
570
+
571
+ return JsonResponse(result)
572
+ else:
573
+ def healthdata_view(request):
574
+ workloads,servers_res = harvest_healthdata(request)
575
+
576
+ result = {}
577
+ for servername, serverdata in servers_res.items():
578
+ result[servername] = serverdata[1]
579
+
580
+ populate_summary_data(result)
581
+
582
+ return JsonResponse(result)
583
+
584
+ def workload_healthdata_view(request):
585
+ global secret
586
+ token = get_auth_bearer(request)
587
+ if not token:
588
+ return HttpResponseForbidden("Missing access token")
589
+
590
+ if not secret or secret != token:
591
+ workloads = cache.get(key_workloads)
592
+ data = workloads.get(registerhostname)
593
+ if data:
594
+ secret = data[1]
595
+
596
+ if secret != token:
597
+ return HttpResponseForbidden("Access token doesn't match")
598
+
599
+ statuscode,data = get_workload_healthcheckdata()
600
+ if statuscode == 200:
601
+ return JsonResponse(data)
602
+ else:
603
+ return HttpResponseServerError(data)
604
+
605
+ def register_healtcheckurls():
606
+ #Add urls
607
+ rootconf_module = importlib.import_module(settings.ROOT_URLCONF)
608
+ if not rootconf_module:
609
+ raise Exception("Failed to load module '{}'".format(settings.ROOT_URLCONF))
610
+
611
+ if HEALTHCHECK_ENABLED:
612
+ urlpatterns = [
613
+ path('healthcheck/healthdata', healthdata_view,name="healthdata"),
614
+ path('workload/healthcheck/healthdata',workload_healthdata_view,name="workload_healthdata")
615
+ ]
616
+ else:
617
+ urlpatterns = []
618
+
619
+ rootconf_module.urlpatterns.append(path('',include((urlpatterns,'healthcheck'),namespace="healthcheck")))
620
+
@@ -0,0 +1,16 @@
1
+ from django.urls import path
2
+ from django.conf import settings
3
+
4
+ from . import healthcheck
5
+
6
+ app_name = 'healthcheck'
7
+
8
+ if healthcheck.HEALTHCHECK_ENABLED:
9
+ urlpatterns = [
10
+ path('healthcheck/healthdata', healthcheck.healthdata_view,name="healthdata"),
11
+ path('healthcheck/workload_healthdata', healthcheck.workload_healthdata_view,name="workload_healthdata")
12
+ ]
13
+ else:
14
+ urlpatterns = []
15
+
16
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes