rda-python-common 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,879 @@
1
+ #
2
+ ###############################################################################
3
+ #
4
+ # Title : pg_sig.py
5
+ #
6
+ # Author : Zaihua Ji, zji@ucar.edu
7
+ # Date : 08/05/2020
8
+ # 2025-01-10 transferred to package rda_python_common from
9
+ # https://github.com/NCAR/rda-shared-libraries.git
10
+ # 2025-11-20 convert to class PgSIG
11
+ # Purpose : python library module for start and control daemon process
12
+ #
13
+ # Github : https://github.com/NCAR/rda-python-common.git
14
+ #
15
+ ###############################################################################
16
+ #
17
+ import os
18
+ import re
19
+ import sys
20
+ import errno
21
+ import signal
22
+ import time
23
+ from contextlib import contextmanager
24
+ from .pg_dbi import PgDBI
25
+
26
+ class PgSIG(PgDBI):
27
+
28
+ def __init__(self):
29
+ super().__init__() # initialize parent class
30
+ self.VUSERS = [] # allow users to start this daemon
31
+ self.CPIDS = {} # allow upto 'mproc' processes at one time for daemon
32
+ self.CBIDS = {} # allow upto 'bproc' background processes at one time for each child
33
+ self.SDUMP = {
34
+ 'DEF' : '/dev/null',
35
+ 'ERR' : '',
36
+ 'OUT' : ''
37
+ }
38
+ self.PGSIG = {
39
+ 'QUIT' : 0, # 1 if QUIT signal received, quit server if no child
40
+ 'MPROC' : 1, # default number of multiple processes
41
+ 'BPROC' : 1, # default number of multiple background processes
42
+ 'ETIME' : 20, # default error waiting time (in seconds)
43
+ 'WTIME' : 120, # default waiting time (in seconds)
44
+ 'DTIME' : 600, # the daemon record refresh time (in seconds)
45
+ 'RTIME' : 2400, # the web rda config unlocking and unconfigured system down waiting time (in seconds)
46
+ 'CTIME' : 4800, # the lock cleaning & configued system down waiting time (in seconds)
47
+ 'PPID' : -1, # 1 - server, (> 1) - child, 0 - non-daemon mode
48
+ 'PID' : 0, # current process ID
49
+ 'DNAME' : '', # daemon name
50
+ 'DSTR' : '', # string for daemon with user login name
51
+ 'MTIME' : 0, # maximum daemon running time in seconds, 0 for unlimited
52
+ 'STIME' : 0, # time the daemon is started
53
+ 'STRTM' : '', # string format of 'STIME'
54
+ }
55
+
56
+ # add users for starting this daemon
57
+ def add_vusers(self, user = None, mores = None):
58
+ if not user:
59
+ self.VUSERS = [] # clean all vusers
60
+ else:
61
+ self.VUSERS.append(user)
62
+ if mores: self.VUSERS.extend(mores)
63
+
64
+ # valid user for starting this daemon
65
+ def check_vuser(self, user, aname = None):
66
+ if user and self.VUSERS:
67
+ valid = 0;
68
+ for vuser in self.VUSERS:
69
+ if user == vuser:
70
+ valid = 1;
71
+ break
72
+ if valid == 0:
73
+ vuser = ', '.join(self.VUSERS)
74
+ self.pglog("{}: must be '{}' to run '{}' in Daemon mode".format(user, vuser, aname), self.LGEREX)
75
+
76
+ # turn this process into a daemon
77
+ # aname - application name, or daemon name
78
+ # uname - user login name to started the application
79
+ # mproc - upper limit of muiltiple child processes
80
+ # wtime - waiting time (in seconds) for next process for the daemon
81
+ # logon - turn on the logging if true
82
+ # bproc - multiple background processes if > 1
83
+ # mtime - maximum running time for the daemon if provided
84
+ def start_daemon(self, aname, uname, mproc = 1, wtime = 120, logon = 0, bproc = 1, mtime = 0):
85
+ dstr = "Daemon '{}'{} on {}".format(aname, (" By {}".format(uname) if uname else ''), self.PGLOG['HOSTNAME'])
86
+ pid = self.check_daemon(aname, uname)
87
+ if pid:
88
+ self.pglog("***************** WARNNING **************************\n" +
89
+ "** {} is running as PID={}\n".format(dstr, pid) +
90
+ "** You need stop it before starting a new one!\n" +
91
+ "*****************************************************" , self.WARNLG)
92
+ self.pglog("{} is already running as PID={}".format(dstr, pid), self.FRCLOG|self.MSGLOG)
93
+ sys.exit(0)
94
+ if mproc > 1: self.PGSIG['MPROC'] = mproc
95
+ if bproc > 1: self.PGSIG['BPROC'] = bproc
96
+ self.PGSIG['WTIME'] = self.get_wait_time(wtime, 120, "Polling Wait Time")
97
+ self.PGSIG['MTIME'] = self.get_wait_time(mtime, 0, "Maximum Running Time")
98
+ pid = self.process_fork(dstr)
99
+ cpid = pid if pid > 0 else os.getpid()
100
+ msg = "PID={},PL={},WI={}".format(cpid, self.PGSIG['MPROC'], self.PGSIG['WTIME'])
101
+ if self.PGSIG['MTIME']: msg += ",MT={}".format(self.PGSIG['MTIME'])
102
+ logmsg = "{}({}) started".format(dstr, msg)
103
+ if logon: logmsg += " With Logging On"
104
+ if pid > 0:
105
+ self.pglog(logmsg, self.WARNLG)
106
+ sys.exit(0)
107
+ os.setsid()
108
+ os.umask(0)
109
+ # setup to catch signals in daemon only
110
+ signal.signal(signal.SIGCHLD, self.clean_dead_child)
111
+ signal.signal(signal.SIGQUIT, self.signal_catch)
112
+ signal.signal(signal.SIGUSR1, self.signal_catch)
113
+ signal.signal(signal.SIGUSR2, self.signal_catch)
114
+ self.PGSIG['DSTR'] = dstr
115
+ self.PGSIG['DNAME'] = aname
116
+ self.PGSIG['STIME'] = int(time.time())
117
+ self.PGSIG['STRTM'] = self.current_datetime(self.PGSIG['STIME'])
118
+ self.PGSIG['PPID'] = 1
119
+ self.PGSIG['PID'] = cpid
120
+ sys.stdin = open(self.SDUMP['DEF'])
121
+ self.cmdlog("{} By {}".format(logmsg, self.PGSIG['STRTM']))
122
+ if logon:
123
+ self.PGLOG['LOGMASK'] &= ~(self.WARNLG|self.EMLLOG) # turn off warn/email in daemon
124
+ self.set_dump()
125
+ else:
126
+ self.PGLOG['LOGMASK'] &= ~(self.LGWNEM) # turn off log/warn/email in daemon
127
+ self.set_dump(self.SDUMP['DEF'])
128
+ self.PGLOG['BCKGRND'] = 1 # make sure the background flag is always on
129
+ self.pgdisconnect(1) # disconnect database in daemon
130
+
131
+ # set dump output file
132
+ def set_dump(self, default = None):
133
+ errdump = self.get_environment("ERRDUMP", default)
134
+ outdump = self.get_environment("OUTDUMP", default)
135
+ if not errdump:
136
+ if not self.PGLOG['ERRFILE']:
137
+ self.PGLOG['ERRFILE'] = re.sub(r'\.log$', '.err', self.PGLOG['LOGFILE'], 1)
138
+ errdump = "{}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['ERRFILE'])
139
+ if errdump != self.SDUMP['ERR']:
140
+ sys.stderr = open(errdump, 'a')
141
+ self.SDUMP['ERR'] = errdump
142
+ if not outdump: outdump = "{}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['LOGFILE'])
143
+ if outdump != self.SDUMP['OUT']:
144
+ sys.stdout = open(outdump, 'a')
145
+ self.SDUMP['OUT'] = outdump
146
+
147
+ # stop daemon and log the ending info
148
+ def stop_daemon(self, msg):
149
+ msg = " with " + msg if msg else ''
150
+ self.PGLOG['LOGMASK'] |= self.MSGLOG # turn on logging before daemon stops
151
+ self.pglog("{} Started at {}, Stopped gracefully{} by {}".format(self.PGSIG['DSTR'], self.PGSIG['STRTM'], msg, self.current_datetime()), self.LOGWRN)
152
+
153
+ # check if a daemon is running already
154
+ # aname - application name for the daemon
155
+ # uname - user login name who started the daemon
156
+ # return the process id if yes and 0 if not
157
+ def check_daemon(self, aname, uname = None):
158
+ if uname:
159
+ self.check_vuser(uname, aname)
160
+ pcmd = "ps -u {} -f | grep {} | grep ' 1 '".format(uname, aname)
161
+ mp = r"^\s*{}\s+(\d+)\s+1\s+".format(uname)
162
+ else:
163
+ pcmd = "ps -C {} -f | grep ' 1 '".format(aname)
164
+ mp = r"^\s*\w+\s+(\d+)\s+1\s+"
165
+ buf = self.pgsystem(pcmd, self.LOGWRN, 20+1024)
166
+ if buf:
167
+ cpid = os.getpid()
168
+ lines = buf.split('\n')
169
+ for line in lines:
170
+ ms = re.match(mp, line)
171
+ pid = int(ms.group(1)) if ms else 0
172
+ if pid > 0 and pid != cpid: return pid
173
+ return 0
174
+
175
+ # check if an application is running already; other than the current processs
176
+ # aname - application name
177
+ # uname - user login name who started the application
178
+ # argv - argument string
179
+ # return the process id if yes and 0 if not
180
+ def check_application(self, aname, uname = None, sargv = None):
181
+ if uname:
182
+ self.check_vuser(uname, aname)
183
+ pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname)
184
+ mp = r"^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname)
185
+ else:
186
+ pcmd = "ps -C {} -f".format(aname)
187
+ mp = r"^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname)
188
+ buf = self.pgsystem(pcmd, self.LOGWRN, 20+1024)
189
+ if not buf: return 0
190
+ cpids = [os.getpid(), os.getppid()]
191
+ pids = []
192
+ ppids = []
193
+ astrs = []
194
+ lines = buf.split('\n')
195
+ for line in lines:
196
+ ms = re.match(mp, line)
197
+ if not ms: continue
198
+ pid = int(ms.group(1))
199
+ ppid = int(ms.group(2))
200
+ if pid in cpids:
201
+ if ppid not in cpids: cpids.append(ppid)
202
+ continue
203
+ pids.append(pid)
204
+ ppids.append(ppid)
205
+ if sargv: astrs.append(ms.group(3))
206
+ pcnt = len(pids)
207
+ if not pcnt: return 0
208
+ i = 0
209
+ while i < pcnt:
210
+ pid = pids[i]
211
+ if pid and pid in cpids:
212
+ pids[i] = 0
213
+ ppid = ppids[i]
214
+ if ppid not in cpids: cpids.append(ppid)
215
+ i = 0
216
+ else:
217
+ i += 1
218
+ for i in range(pcnt):
219
+ pid = pids[i]
220
+ if pid and (not sargv or sargv.find(astrs[i]) > -1): return pid
221
+ return 0
222
+
223
+ # validate if the current process is a single one. Quit if not
224
+ def validate_single_process(self, aname, uname = None, sargv = None, logact = None):
225
+ if logact is None: logact = self.LOGWRN
226
+ pid = self.check_application(aname, uname, sargv)
227
+ if pid:
228
+ msg = aname
229
+ if sargv: msg += ' ' + sargv
230
+ msg += ": already running as PID={} on {}".format(pid, self.PGLOG['HOSTNAME'])
231
+ if uname: msg += ' By ' + uname
232
+ self.pglog(msg + ', Quit Now', logact)
233
+ sys.exit(0)
234
+
235
+ # check how many processes are running for an application already
236
+ # aname - application name
237
+ # uname - user login name who started the application
238
+ # argv - argument string
239
+ # return the the number of processes (exclude the child one)
240
+ def check_multiple_application(self, aname, uname = None, sargv = None):
241
+ if uname:
242
+ self.check_vuser(uname, aname)
243
+ pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname)
244
+ mp = r"^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname)
245
+ else:
246
+ pcmd = "ps -C {} -f".format(aname)
247
+ mp = r"^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname)
248
+ buf = self.pgsystem(pcmd, self.LOGWRN, 20+1024)
249
+ if not buf: return 0
250
+ dpids = [os.getpid(), os.getppid()]
251
+ pids = []
252
+ ppids = []
253
+ astrs = []
254
+ lines = buf.split('\n')
255
+ for line in lines:
256
+ ms = re.match(mp, line)
257
+ if not ms: continue
258
+ pid = int(ms.group(1))
259
+ ppid = int(ms.group(2))
260
+ if pid in dpids:
261
+ if ppid > 1 and ppid not in dpids: dpids.append(ppid)
262
+ continue
263
+ elif ppid in pids:
264
+ if pid not in dpids: dpids.append(pid)
265
+ continue
266
+ pids.append(pid)
267
+ ppids.append(ppid)
268
+ if sargv: astrs.append(ms.group(3))
269
+ pcnt = len(pids)
270
+ if not pcnt: return 0
271
+ i = 0
272
+ while i < pcnt:
273
+ pid = pids[i]
274
+ ppid = ppids[i]
275
+ if pid:
276
+ if pid in dpids:
277
+ if ppid > 1 and ppid not in dpids: dpids.append(ppid)
278
+ i = pids[i] = 0
279
+ continue
280
+ elif ppid in pids:
281
+ if pid not in dpids: dpids.append(pid)
282
+ i = pids[i] = 0
283
+ continue
284
+ i += 1
285
+ ccnt = 0
286
+ for i in range(pcnt):
287
+ if pids[i] and (not sargv or sargv.find(astrs[i]) > -1): ccnt += 1
288
+ return ccnt
289
+
290
+ # validate if the running processes reach the limit for the given app; Quit if yes
291
+ def validate_multiple_process(self, aname, plimit, uname = None, sargv = None, logact = None):
292
+ if logact is None: logact = self.LOGWRN
293
+ pcnt = self.check_multiple_application(aname, uname, sargv)
294
+ if pcnt >= plimit:
295
+ msg = aname
296
+ if sargv: msg += ' ' + sargv
297
+ msg += ": already running in {} processes on {}".format(pcnt, self.PGLOG['HOSTNAME'])
298
+ if uname: msg += ' By ' + uname
299
+ self.pglog(msg + ', Quit Now', logact)
300
+ sys.exit(0)
301
+
302
+ # fork process
303
+ # return the defined result from call of fork
304
+ def process_fork(self, dstr):
305
+ for i in range(10): # try 10 times
306
+ try:
307
+ pid = os.fork()
308
+ return pid
309
+ except OSError as e:
310
+ if e.errno == errno.EAGAIN:
311
+ os.sleep(5)
312
+ else:
313
+ self.pglog("{}: {}".format(dstr, str(e)), self.LGEREX)
314
+ break
315
+ self.pglog("{}: too many tries (10) for os.fork()".format(dstr), self.LGEREX)
316
+
317
+ # process the predefined signals
318
+ def signal_catch(self, signum, frame):
319
+ if self.PGSIG['PPID'] == 1:
320
+ tmp = 'Server'
321
+ elif self.PGSIG['PPID'] > 1:
322
+ tmp = 'Child'
323
+ else:
324
+ tmp = 'Process'
325
+ if signum == signal.SIGQUIT:
326
+ sname = "<{} - signal.SIGQUIT - Quit>".format(signum)
327
+ elif signum == signal.SIGUSR1:
328
+ linfo = 'Logging On'
329
+ if self.PGLOG['LOGMASK']&self.MSGLOG: linfo += ' & Debugging On'
330
+ sname = "<{} - signal.SIGUSR1 - {}>".format(signum, linfo)
331
+ elif signum == signal.SIGUSR2:
332
+ if self.PGLOG['DBGLEVEL']:
333
+ linfo = 'Logging off & Debugging Off'
334
+ else:
335
+ linfo = 'Logging Off'
336
+ sname = "<{} - signal.SIGUSR2 - {}>".format(signum, linfo)
337
+ else:
338
+ sname = "<{} - Signal Not Supports Yet>".format(signum)
339
+ dumpon = 1 if self.SDUMP['OUT'] and self.SDUMP['OUT'] != self.SDUMP['DEF'] else 0
340
+ if not dumpon: self.set_dump()
341
+ self.pglog("catches {} in {} {}".format(sname, tmp, self.PGSIG['DSTR']), self.LOGWRN|self.FRCLOG)
342
+ if signum == signal.SIGUSR1:
343
+ if self.PGLOG['LOGMASK']&self.MSGLOG:
344
+ self.PGLOG['DBGLEVEL'] = 1000 # turn logon twice
345
+ else:
346
+ self.PGLOG['LOGMASK'] |= self.MSGLOG # turn on logging
347
+ elif signum == signal.SIGUSR2:
348
+ self.PGLOG['LOGMASK'] &= ~(self.MSGLOG) # turn off logging
349
+ self.PGLOG['DBGLEVEL'] = 0 # turn off debugging
350
+ self.set_dump(self.SDUMP['DEF'])
351
+ else:
352
+ if not dumpon: self.set_dump(self.SDUMP['DEF'])
353
+ if signum == signal.SIGQUIT: self.PGSIG['QUIT'] = 1
354
+ if self.PGSIG['PPID'] <= 1 and len(self.CPIDS) > 0: # passing signal to child processes
355
+ for pid in self.CPIDS: self.kill_process(pid, signum)
356
+
357
+ # wrapper function to call os.kill() logging caught error based on logact
358
+ # return self.SUCCESS is success; PgLog.FAILURE if not
359
+ def kill_process(self, pid, signum, logact = 0):
360
+ try:
361
+ os.kill(pid, signum)
362
+ except Exception as e:
363
+ ret = self.FAILURE
364
+ if logact:
365
+ if type(signum) is int:
366
+ sigstr = str(signum)
367
+ else:
368
+ sigstr = "{}-{}".format(signum.name, int(signum))
369
+ self.pglog("Error pass signal {} to pid {}: {}".format(sigstr, pid, str(e)), logact)
370
+ else:
371
+ ret = self.SUCCESS
372
+ return ret
373
+
374
+ # wait child process to finish
375
+ def clean_dead_child(self, signum, frame):
376
+ live = 0
377
+ while True:
378
+ try:
379
+ dpid, status = os.waitpid(-1, os.WNOHANG)
380
+ except ChildProcessError as e:
381
+ break # no child process any more
382
+ except Exception as e:
383
+ self.pglog("Error check child process: {}".format(str(e)), self.ERRLOG)
384
+ break
385
+ else:
386
+ if dpid == 0:
387
+ if live > 0: break # wait twice if a process is still a live
388
+ live += 1
389
+ elif self.PGSIG['PPID'] < 2:
390
+ if dpid in self.CPIDS: del self.CPIDS[dpid]
391
+
392
+ # send signal to daemon and exit
393
+ def signal_daemon(self, sname, aname, uname):
394
+ dstr = "Daemon '{}'{} on {}".format(aname, ((" By " + uname) if uname else ""), self.PGLOG['HOSTNAME'])
395
+ pid = self.check_daemon(aname, uname)
396
+ if pid > 0:
397
+ dstr += " (PID = {})".format(pid)
398
+ if re.match(r'^(quit|stop)$', sname, re.I):
399
+ signum = signal.SIGQUIT
400
+ msg = "QUIT"
401
+ elif re.match(r'^(logon|on)$', sname, re.I):
402
+ signum = signal.SIGUSR1
403
+ msg = "Logging ON"
404
+ elif re.match(r'^(logoff|off)$', sname, re.I):
405
+ signum = signal.SIGUSR2
406
+ msg = "Logging OFF"
407
+ self.PGLOG['DBGLEVEL'] = 0
408
+ else:
409
+ self.pglog("{}: invalid Signal for {}".format(sname, dstr), self.LGEREX)
410
+
411
+ if self.kill_process(pid, signum, self.LOGERR) == self.SUCCESS:
412
+ self.pglog("{}: signal sent to {}".format(msg, dstr), self.LOGWRN|self.FRCLOG)
413
+ else:
414
+ self.pglog(dstr + ": not running currently", self.LOGWRN|self.FRCLOG)
415
+ sys.exit(0)
416
+
417
+ # start a time child to run the command in case hanging
418
+ def timeout_command(self, cmd, logact = None, cmdopt = 4):
419
+ if logact is None: logact = self.LOGWRN
420
+ if logact&self.EXITLG: logact &= ~self.EXITLG
421
+ self.pglog("> " + cmd, logact)
422
+ if self.start_timeout_child(cmd, logact):
423
+ self.pgsystem(cmd, logact, cmdopt)
424
+ sys.exit(0)
425
+
426
+ # start a timeout child process
427
+ # return: 1 - in child, 0 - in parent
428
+ def start_timeout_child(self, msg, logact = None):
429
+ if logact is None: logact = self.LOGWRN
430
+ pid = self.process_fork(msg)
431
+ if pid == 0: # in child
432
+ signal.signal(signal.SIGQUIT, self.signal_catch) # catch quit signal only
433
+ self.PGSIG['PPID'] = self.PGSIG['PID']
434
+ self.PGSIG['PID'] = pid = os.getpid()
435
+ self.cmdlog("Timeout child to " + msg, time.time(), 0)
436
+ self.pgdisconnect(0) # disconnect database in child
437
+ return 1
438
+ # in parent
439
+ for i in range(self.PGLOG['TIMEOUT']):
440
+ if not self.check_process(pid): break
441
+ sys.sleep(2)
442
+ if self.check_process(self, pid):
443
+ msg += ": timeout({} secs) in CPID {}".format(2*self.PGLOG['TIMEOUT'], pid)
444
+ pids = self.kill_children(pid, 0)
445
+ sys.sleep(6)
446
+ if self.kill_process(pid, signal.SIGKILL, self.LOGERR): pids.insert(0, pid)
447
+ if pids: msg += "\nProcess({}) Killed".format(','.join(map(str, pids)))
448
+ self.pglog(msg, logact)
449
+ return 0
450
+
451
+ # kill children recursively start from the deepest and return the pids got killed
452
+ def kill_children(self, pid, logact = None):
453
+ if logact is None: logact = self.LOGWRN
454
+ buf = self.pgsystem("ps --ppid {} -o pid".format(pid), logact, 20)
455
+ pids = []
456
+ if buf:
457
+ lines = buf.split('\n')
458
+ for line in lines:
459
+ ms = re.match(r'^\s*(\d+)', line)
460
+ if not ms: continue
461
+ cid = int(ms.group(1))
462
+ if not self.check_process(cid): continue
463
+ cids = self.kill_children(cid, logact)
464
+ if cids: pids = cids + pids
465
+ if self.kill_process(cid, signal.SIGKILL, logact) == self.SUCCESS: pids.insert(0, cid)
466
+ if logact and len(pids): self.pglog("Process({}) Killed".format(','.join(map(str, pids))), logact)
467
+ return pids
468
+
469
+ # start a child process
470
+ # pname - unique process name
471
+ def start_child(self, pname, logact = None, dowait = 0):
472
+ if logact is None: logact = self.LOGWRN
473
+ if self.PGSIG['MPROC'] < 2: return 1 # no need child process
474
+ if logact&self.EXITLG: logact &= ~self.EXITLG
475
+ if logact&self.MSGLOG: logact |= self.FRCLOG
476
+ if self.PGSIG['QUIT']:
477
+ return self.pglog("{} is in QUIT mode, cannot start CPID for {}".format(self.PGSIG['DSTR'], pname), logact)
478
+ elif len(self.CPIDS) >= self.PGSIG['MPROC']:
479
+ i = 0
480
+ while True:
481
+ pcnt = self.check_child(None, 0, logact)
482
+ if pcnt < self.PGSIG['MPROC']: break
483
+ if dowait:
484
+ self.show_wait_message(i, "{}-{}: wait any {} child processes".format(self.PGSIG['DSTR'], pname, pcnt), logact, dowait)
485
+ i += 1
486
+ else:
487
+ return self.pglog("{}-{}: {} child processes already running at {}".format(self.PGSIG['DSTR'], pname, pcnt, self.current_datetime()), logact)
488
+ if self.check_child(pname): return -1 # process is running already
489
+ pid = self.process_fork(self.PGSIG['DSTR'])
490
+ if pid:
491
+ self.CPIDS[pid] = pname # record the child process id
492
+ self.pglog("{}: starts CPID {} for {}".format(self.PGSIG['DSTR'], pid, pname))
493
+ else:
494
+ signal.signal(signal.SIGQUIT, signal.SIG_DFL) # turn off catch QUIT signal in child
495
+ self.PGLOG['LOGMASK'] &= ~self.WARNLG # turn off warn in child
496
+ self.PGSIG['PPID'] = self.PGSIG['PID']
497
+ self.PGSIG['PID'] = pid = os.getpid()
498
+ self.PGSIG['MPROC'] = 1 # 1 in child process
499
+ self.CBIDS = {} # empty backgroud proces info in case not
500
+ self.PGSIG['DSTR'] += ": CPID {} for {}".format(pid, pname)
501
+ self.cmdlog("CPID {} for {}".format(pid, pname))
502
+ self.pgdisconnect(0) # disconnect database in child
503
+ return 1 # child started successfully
504
+
505
+ # get child process id for given pname
506
+ def pname2cpid(self, pname):
507
+ for cpid in self.CPIDS:
508
+ if self.CPIDS[cpid] == pname: return cpid
509
+ return 0
510
+
511
+ # check one or all child processes if they are still running
512
+ # pname - unique process name if given
513
+ # pid - check this specified process id if given
514
+ # dowait - 0 no wait, 1 wait all done, -1 wait only when all children are running
515
+ # return the number of running processes if dowait == 0 or 1
516
+ # return the number of none-running processes if dowait == -1
517
+ def check_child(self, pname, pid = 0, logact = None, dowait = 0):
518
+ if logact is None: logact = self.LOGWRN
519
+ if self.PGSIG['MPROC'] < 2: return 0 # no child process
520
+ if logact&self.EXITLG: logact &= ~self.EXITLG
521
+ ccnt = i = 0
522
+ if dowait < 0: ccnt = 1 if (pid or pname) else self.PGSIG['MPROC']
523
+ while True:
524
+ pcnt = 0
525
+ if not pid and pname: pid = self.pname2cpid(pname)
526
+ if pid:
527
+ if self.check_process(pid): # process is not done yet
528
+ if pname:
529
+ self.pglog("{}({}): Child still running".format(pname, pid), logact)
530
+ else:
531
+ self.pglog("{}: Child still running".format(pid), logact)
532
+ pcnt = 1
533
+ elif pid in self.CPIDS:
534
+ del self.CPIDS[pid] # clean the saved info for the process
535
+ elif not pname:
536
+ cpids = list(self.CPIDS)
537
+ for cpid in cpids:
538
+ if self.check_process(cpid): # process is not done yet
539
+ pcnt += 1
540
+ elif cpid in self.CPIDS:
541
+ del self.CPIDS[cpid]
542
+ if pcnt == 0 or dowait == 0 or pcnt < ccnt: break
543
+ self.show_wait_message(i, "{}: wait {}/{} child processes".format(self.PGSIG['DSTR'], pcnt, self.PGSIG['MPROC']), logact, dowait)
544
+ i += 1
545
+ return (ccnt - pcnt) if ccnt else pcnt
546
+
547
+ # start this process in none daemon mode
548
+ # aname - application name, or daemon name
549
+ # cact - short action name
550
+ # uname - user login name to started the application
551
+ # mproc - upper limit of muiltiple child processes
552
+ # wtime - waiting time (in seconds) for next process
553
+ def start_none_daemon(self, aname, cact = None, uname = None, mproc = 1, wtime = 120, logon = 1, bproc = 1):
554
+ dstr = aname
555
+ if cact: dstr += " for Action " + cact
556
+ if uname:
557
+ dstr += " By " + uname
558
+ self.check_vuser(uname, aname)
559
+ signal.signal(signal.SIGQUIT, self.signal_catch) # catch quit signal only
560
+ signal.signal(signal.SIGCHLD, self.clean_dead_child)
561
+ self.PGSIG['DSTR'] = dstr
562
+ self.PGSIG['DNAME'] = aname
563
+ self.PGSIG['PPID'] = 0
564
+ self.PGSIG['PID'] = os.getpid()
565
+ self.PGSIG['MPROC'] = mproc
566
+ self.PGSIG['BPROC'] = bproc
567
+ self.PGLOG['CMDTIME'] = self.PGSIG['WTIME'] = self.get_wait_time(wtime, 120, "Polling Wait Time")
568
+ if self.PGSIG['MPROC'] > 1:
569
+ self.cmdlog("starts non-daemon {}(ML={},WI={})".format(aname, self.PGSIG['MPROC'], self.PGSIG['WTIME']))
570
+ if not logon: self.PGLOG['LOGMASK'] &= ~self.MSGLOG # turn off message logging
571
+
572
+ # check one process id other than the current one if it is still running
573
+ # pid - specified process id
574
+ # pmsg - process message if given
575
+ def check_process(self, pid):
576
+ buf = self.pgsystem("ps -p {} -o pid".format(pid), self.LGWNEX, 20)
577
+ if buf:
578
+ mp = r'^\s*{}$'.format(pid)
579
+ lines = buf.split('\n')
580
+ for line in lines:
581
+ if re.match(mp, line): return 1
582
+ return 0
583
+
584
+ # check a process id on give host
585
+ def check_host_pid(self, host, pid, pmsg = None, logact = None):
586
+ if logact is None: logact = self.LOGWRN
587
+ cmd = 'rdaps'
588
+ if host: cmd += " -h " + host
589
+ cmd += " -p {}".format(pid)
590
+ buf = self.pgsystem(cmd, logact, 276) # 4+16+256
591
+ if not buf: return (-1 if self.PGLOG['SYSERR'] else 0)
592
+ if pmsg: self.pglog(pmsg, logact&(~self.EXITLG))
593
+ return 1
594
+
595
+ # check one process id on a given host name if it is still running, with default timeout
596
+ # pid - specified process id
597
+ # ppid - specified parent process id
598
+ # uname - user login name who started the daemon
599
+ # host - host name the pid supposed to be running on
600
+ # aname - application name
601
+ # pmsg - process message if given
602
+ # return 1 if process is steal live, 0 died already, -1 error checking
603
+ def check_host_process(self, host, pid, ppid = 0, uname = None, aname = None, pmsg = None, logact = None):
604
+ if logact is None: logact = self.LOGWRN
605
+ cmd = "rdaps"
606
+ if host: cmd += " -h " + host
607
+ if pid: cmd += " -p {}".format(pid)
608
+ if ppid: cmd += " -P {}".format(ppid)
609
+ if uname: cmd += " -u " + uname
610
+ if aname: cmd += " -a " + aname
611
+ buf = self.pgsystem(cmd, logact, 276) # 4+16+256
612
+ if not buf: return (-1 if self.PGLOG['SYSERR'] else 0)
613
+ if pmsg: self.pglog(pmsg, logact&(~self.EXITLG))
614
+ return 1
615
+
616
+ # get a single pbs status record via qstat
617
+ def get_pbs_info(self, qopts, multiple = 0, logact = 0, chkcnt = 1):
618
+ stat = {}
619
+ loop = 0
620
+ buf = None
621
+ while loop < chkcnt:
622
+ buf = self.pgsystem("qstat -n -w {}".format(qopts), logact, 16)
623
+ if buf: break
624
+ loop += 1
625
+ time.sleep(6)
626
+ if not buf: return stat
627
+ chkt = chkd = 1
628
+ lines = buf.split('\n')
629
+ for line in lines:
630
+ if chkt:
631
+ if re.match(r'^Job ID', line):
632
+ line = re.sub(r'^Job ID', 'JobID', line, 1)
633
+ ckeys = re.split(r'\s+', self.pgtrim(line))
634
+ ckeys[1] = 'UserName'
635
+ ckeys[3] = 'JobName'
636
+ ckeys[7] = 'Reqd' + ckeys[7]
637
+ ckeys[8] = 'Reqd' + ckeys[7]
638
+ ckeys[9] = 'State'
639
+ ckeys[10] = 'Elap' + ckeys[7]
640
+ ckeys.append('Node')
641
+ kcnt = len(ckeys)
642
+ if multiple:
643
+ for i in range(kcnt):
644
+ stat[ckeys[i]] = []
645
+ chkt = 0
646
+ elif chkd:
647
+ if re.match(r'^-----', line): chkd = 0
648
+ else:
649
+ vals = re.split(r'\s+', self.pgtrim(line))
650
+ vcnt = len(vals)
651
+ if vcnt == 1:
652
+ if multiple:
653
+ stat[ckeys[kcnt-1]].append(vals[0])
654
+ else:
655
+ stat[ckeys[kcnt-1]] = vals[0]
656
+ break
657
+ elif vcnt > 1:
658
+ ms = re.match(r'^(\d+)', vals[0])
659
+ if ms: vals[0] = ms.group(1)
660
+ for i in range(vcnt):
661
+ if multiple:
662
+ stat[ckeys[i]].append(vals[i])
663
+ else:
664
+ stat[ckeys[i]] = vals[i]
665
+ if vcnt == kcnt: break
666
+ return stat
667
+
668
+ # check status of a pbs batch id
669
+ # bid - specified batch id
670
+ # return hash of batch status, 0 if cannot check any more
671
+ def check_pbs_status(self, bid, logact = None):
672
+ if logact is None: logact = self.LOGWRN
673
+ stat = {}
674
+ buf = self.pgsystem("qhist -w -j {}".format(bid), logact, 20)
675
+ if not buf: return stat
676
+ chkt = 1
677
+ lines = buf.split('\n')
678
+ for line in lines:
679
+ if chkt:
680
+ if re.match(r'^Job', line):
681
+ line = re.sub(r'^Job ID', 'JobID', line, 1)
682
+ line = re.sub(r'Finish Time', 'FinishTime', line, 1)
683
+ line = re.sub(r'Req Mem', 'ReqMem', line, 1)
684
+ line = re.sub(r'Used Mem\(GB\)', 'UsedMem(GB)', line, 1)
685
+ line = re.sub(r'Avg CPU \(%\)', 'AvgCPU(%)', line, 1)
686
+ line = re.sub(r'Elapsed \(h\)', 'WallTime(h)', line, 1)
687
+ line = re.sub(r'Job Name', 'JobName', line, 1)
688
+ ckeys = re.split(r'\s+', self.pgtrim(line))
689
+ ckeys[1] = 'UserName'
690
+ kcnt = len(ckeys)
691
+ chkt = 0
692
+ else:
693
+ vals = re.split(r'\s+', self.pgtrim(line))
694
+ for i in range(kcnt):
695
+ stat[ckeys[i]] = vals[i]
696
+ break
697
+ return stat
698
+
699
+ # check if a pbs batch id is live
700
+ # bid - specified batch id
701
+ # return 1 if process is steal live, 0 died already or error checking
702
+ def check_pbs_process(self, bid, pmsg = None, logact = None):
703
+ if logact is None: logact = self.LOGWRN
704
+ stat = self.get_pbs_info(bid, 0, logact)
705
+ ret = -1
706
+ if stat:
707
+ ms = re.match(r'^(B|R|Q|S|H|W|X)$', stat['State'])
708
+ if ms:
709
+ if pmsg: pmsg += ", STATE='{}' and returns 1".format(ms.group(1))
710
+ ret = 1
711
+ else:
712
+ if pmsg: pmsg += ", STATE='{}' and returns 0".format(stat['State'])
713
+ ret = 0
714
+ elif pmsg:
715
+ pmsg += ", Process Not Exists and returns -1"
716
+ if pmsg: self.pglog(pmsg, logact&~self.EXITLG)
717
+ return ret
718
+
719
+ # get wait time
720
+ def get_wait_time(self, wtime, default, tmsg):
721
+ if not wtime: wtime = default # use default time
722
+ if type(wtime) is int: return wtime
723
+ if re.match(r'^(\d*)$', wtime): return int(wtime)
724
+ ms = re.match(r'^(\d*)([DHMS])$', wtime, re.I)
725
+ if ms:
726
+ ret = int(ms.group(1))
727
+ unit = ms.group(2)
728
+ else:
729
+ self.pglog("{}: '{}' NOT in (D,H,M,S)".format(wtime, tmsg), self.LGEREX)
730
+ if unit != 'S':
731
+ ret *= 60 # seconds in a minute
732
+ if unit != 'M':
733
+ ret *= 60 # minutes in an hour
734
+ if unit != 'H':
735
+ ret *= 24 # hours in a day
736
+ return ret # in seconds
737
+
738
+ # start a background process and record its id; check self.pgsystem() in self.pm for
739
+ # valid cmdopt values
740
+ def start_background(self, cmd, logact = None, cmdopt = 5, dowait = 0):
741
+ if logact is None: logact = self.LOGWRN
742
+ if self.PGSIG['BPROC'] < 2: return self.pgsystem(cmd, logact, cmdopt) # no background
743
+ act = logact&(~self.EXITLG)
744
+ if act&self.MSGLOG: act |= self.FRCLOG # make sure background calls always logged
745
+ if len(self.CBIDS) >= self.PGSIG['BPROC']:
746
+ i = 0
747
+ while True:
748
+ bcnt = self.check_background(None, 0, act)
749
+ if bcnt < self.PGSIG['BPROC']: break
750
+ if dowait:
751
+ self.show_wait_message(i, "{}-{}: wait any {} background calls".format(self.PGSIG['DSTR'], cmd, bcnt), act, dowait)
752
+ i += 1
753
+ else:
754
+ return self.pglog("{}-{}: {} background calls already at {}".format(self.PGSIG['DSTR'], cmd, bcnt, self.current_datetime()), act)
755
+ cmdlog = (act if cmdopt&1 else self.WARNLG)
756
+ if cmdopt&8:
757
+ self.cmdlog("starts '{}'".format(cmd), None, cmdlog)
758
+ else:
759
+ self.pglog("{}({})-{} >{} &".format(self.PGLOG['HOSTNAME'], os.getpid(), self.current_datetime(), cmd), cmdlog)
760
+ bckcmd = cmd
761
+ if cmdopt&2:
762
+ bckcmd += " >> {}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['LOGFILE'])
763
+ if cmdopt&4:
764
+ if not self.PGLOG['ERRFILE']:
765
+ self.PGLOG['ERRFILE'] = re.sub(r'\.log$', '.err', self.PGLOG['LOGFILE'], 1)
766
+ bckcmd += " 2>> {}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['ERRFILE'])
767
+ bckcmd += " &"
768
+ os.system(bckcmd)
769
+ return self.record_background(cmd, logact)
770
+
771
+ # get background process id for given bcmd
772
+ def bcmd2cbid(self, bcmd):
773
+ for cbid in self.CBIDS:
774
+ if self.CBIDS[cbid] == bcmd: return cbid
775
+ return 0
776
+
777
+ # check one or all child processes if they are still running
778
+ # bid - check this specified background process id if given
779
+ # return the number of processes are still running
780
+ def check_background(self, bcmd, bid = 0, logact = None, dowait = 0):
781
+ if logact is None: logact = self.LOGWRN
782
+ if self.PGSIG['BPROC'] < 2: return 0 # no background process
783
+ if logact&self.EXITLG: logact &= ~self.EXITLG
784
+ if not bid and bcmd: bid = self.bcmd2cbid(bcmd)
785
+ bcnt = i = 0
786
+ while True:
787
+ if bid:
788
+ if self.check_process(bid): # process is not done yet
789
+ if bcmd:
790
+ self.pglog("{}({}): Background process still running".format(bcmd, bid), logact)
791
+ else:
792
+ self.pglog("{}: Background process still running".format(bid), logact)
793
+ bcnt = 1
794
+ elif bid in self.CBIDS:
795
+ del self.CBIDS[bid] # clean the saved info for the process
796
+ elif not bcmd:
797
+ for bid in self.CBIDS:
798
+ if self.check_process(bid): # process is not done yet
799
+ bcnt += 1
800
+ else:
801
+ del self.CBIDS[bid]
802
+ if not (bcnt and dowait): break
803
+ self.show_wait_message(i, "{}: wait {}/{} background processes".format(self.PGSIG['DSTR'], bcnt, self.PGSIG['MPROC']), logact, dowait)
804
+ i += 1
805
+ bcnt = 0
806
+ return bcnt
807
+
808
+ # check and record process id for background command; return 1 if success full;
809
+ # 0 otherwise; -1 if done already
810
+ def record_background(self, bcmd, logact = None):
811
+ if logact is None: logact = self.LOGWRN
812
+ ms = re.match(r'^(\S+)', bcmd)
813
+ if ms:
814
+ aname = ms.group(1)
815
+ else:
816
+ aname = bcmd
817
+ mp = r"^\s*(\S+)\s+(\d+)\s+1\s+.*{}(.*)$".format(aname)
818
+ pc = "ps -u {},{} -f | grep ' 1 ' | grep {}".format(self.PGLOG['CURUID'], self.PGLOG['GDEXUSER'], aname)
819
+ for i in range(2):
820
+ buf = self.pgsystem(pc, logact, 20+1024)
821
+ if buf:
822
+ lines = buf.split('\n')
823
+ for line in lines:
824
+ ms = re.match(mp, line)
825
+ if not ms: continue
826
+ (uid, sbid, acmd) = ms.groups()
827
+ bid = int(sbid)
828
+ if bid in self.CBIDS: return -1
829
+ if uid == self.PGLOG['GDEXUSER']:
830
+ acmd = re.sub(r'^\.(pl|py)\s+', '', acmd, 1)
831
+ if re.match(r'^{}{}'.format(aname, acmd), bcmd): continue
832
+ self.CBIDS[bid] = bcmd
833
+ return 1
834
+ time.sleep(2)
835
+ return 0
836
+
837
+ # sleep for given period for the daemon, stops if maximum running time reached
838
+ def sleep_daemon(self, wtime = 0, mtime = None):
839
+ if not wtime: wtime = self.PGSIG['WTIME']
840
+ if mtime is None: mtime = self.PGSIG['MTIME']
841
+ if mtime > 0:
842
+ rtime = int(time.time()) - self.PGSIG['STIME']
843
+ if rtime >= mtime:
844
+ self.PGSIG['QUIT'] = 1
845
+ wtime = 0
846
+ if wtime: time.sleep(wtime)
847
+ return wtime
848
+
849
+ # show wait message every dintv and then sleep for PGSIG['WTIME']
850
+ def show_wait_message(self, loop, msg, logact = None, dowait = 0):
851
+ if logact is None: logact = self.LOGWRN
852
+ if loop > 0 and (loop%30) == 0:
853
+ self.pglog("{} at {}".format(msg, self.current_datetime()), logact)
854
+ if dowait: time.sleep(self.PGSIG['WTIME'])
855
+
856
+ # register a time out function to raise a time out error
857
+ @contextmanager
858
+ def pgtimeout(self, seconds = 0, logact = 0):
859
+ if not seconds: seconds = self.PGLOG['TIMEOUT']
860
+ signal.signal(signal.SIGALRM, self.raise_pgtimeout)
861
+ signal.alarm(seconds)
862
+ try:
863
+ yield
864
+ except TimeoutError as e:
865
+ pass
866
+ finally:
867
+ signal.signal(signal.SIGALRM, signal.SIG_IGN)
868
+
869
+ # raise a timeout Error
870
+ @staticmethod
871
+ def raise_pgtimeout(signum, frame):
872
+ raise TimeoutError
873
+
874
+ # Add a timeout block.
875
+ def timeout_func(self):
876
+ with self.pgtimeout(1):
877
+ print('entering block')
878
+ time.sleep(10)
879
+ print('This should never get printed because the line before timed out')