rda-python-dscheck 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1628 @@
1
+ ###############################################################################
2
+ #
3
+ # Title : PgCheck.py
4
+ # Author : Zaihua Ji, zji@ucar.edu
5
+ # Date : 08/26/2020
6
+ # 2025-02-10 transferred to package rda_python_dscheck from
7
+ # https://github.com/NCAR/rda-shared-libraries.git
8
+ # Purpose : python library module for for holding some global variables and
9
+ # functions for dscheck utility
10
+ #
11
+ # Github : https://github.com/NCAR/rda-python-dscheck.git
12
+ #
13
+ ###############################################################################
14
+ #
15
+ import os
16
+ import re
17
+ import time
18
+ from rda_python_common import PgLOG
19
+ from rda_python_common import PgCMD
20
+ from rda_python_common import PgSIG
21
+ from rda_python_common import PgUtil
22
+ from rda_python_common import PgLock
23
+ from rda_python_common import PgFile
24
+ from rda_python_common import PgOPT
25
+ from rda_python_common import PgDBI
26
+
27
+ # global variables
28
+ LOOP = 0
29
+ PLIMITS = {}
30
+ DWHOSTS = {} # hosts are down
31
+ RUNPIDS = {}
32
+ SHELLS = {} # shell names used by specialists
33
+
34
+ #
35
+ # define initially the needed option values
36
+ #
37
+ PgOPT.OPTS = { # (!= 0) - setting actions
38
+ 'PC' : [0x0004, 'ProcessCheck', 1],
39
+ 'AC' : [0x0008, 'AddCheck', 1],
40
+ 'GD' : [0x0010, 'GetDaemon', 0],
41
+ 'SD' : [0x0020, 'SetDaemon', 1],
42
+ 'GC' : [0x0040, 'GetCheck', 0],
43
+ 'DL' : [0x0080, 'Delete', 1],
44
+ 'UL' : [0x0100, 'UnLockCheck', 1],
45
+ 'EC' : [0x0200, 'EmailCheck', 0],
46
+ 'IC' : [0x0400, 'InterruptCheck', 1],
47
+ 'CH' : [0x1000, 'CheckHost', 0],
48
+ 'SO' : [0x1000, 'SetOptions', 1],
49
+
50
+ 'AW' : [0, 'AnyWhere', 0],
51
+ 'BG' : [0, 'BackGround', 0],
52
+ 'CP' : [0, 'CheckPending', 0],
53
+ 'CS' : [0, 'CheckStatus', 0],
54
+ 'FI' : [0, 'ForceInterrrupt', 0],
55
+ 'FO' : [0, 'FormatOutput', 0],
56
+ 'LO' : [0, 'LogOn', 0],
57
+ 'MD' : [0, 'PgDataset', 3],
58
+ 'NC' : [0, 'NoCommand', 0],
59
+ 'ND' : [0, 'NewDaemon', 0],
60
+ 'NT' : [0, 'NoTrim', 0],
61
+ 'WR' : [0, 'WithdsRqst', 0],
62
+ 'WU' : [0, 'WithdsUpdt', 0],
63
+
64
+ 'DM' : [1, 'DaemonMode', 1], # for action PC, start|quit|logon|logoff
65
+ 'DV' : [1, 'Divider', 1], # default to <:>
66
+ 'ES' : [1, 'EqualSign', 1], # default to <=>
67
+ 'FN' : [1, 'FieldNames', 0],
68
+ 'LH' : [1, 'LocalHost', 0, ''],
69
+ 'MT' : [1, 'MaxrunTime', 0],
70
+ 'OF' : [1, 'OutputFile', 0],
71
+ 'ON' : [1, 'OrderNames', 0],
72
+ 'AO' : [1, 'ActOption', 1], # default to <!>
73
+ 'WI' : [1, 'WaitInterval', 1],
74
+
75
+ 'AN' : [2, 'ActionName', 0],
76
+ 'AV' : [2, 'ArgumentVector', 0],
77
+ 'AX' : [2, 'ArgumenteXtra', 0],
78
+ 'CC' : [2, 'CarbonCopy', 0],
79
+ 'CD' : [2, 'CheckDate', 256],
80
+ 'CI' : [2, 'CheckIndex', 16],
81
+ 'CM' : [2, 'Command', 1],
82
+ 'CT' : [2, 'CheckTime', 32],
83
+ 'DB' : [2, 'Debug', 0],
84
+ 'DC' : [2, 'DoneCount', 17],
85
+ 'DF' : [2, 'DownFlags', 1],
86
+ 'DI' : [2, 'DaemonIndex', 16],
87
+ 'DS' : [2, 'Dataset', 1],
88
+ 'ER' : [2, 'ERrormessage', 0],
89
+ 'EV' : [2, 'Environments', 1],
90
+ 'FC' : [2, 'FileCount', 17],
91
+ 'HN' : [2, 'HostName', 1],
92
+ 'IF' : [2, 'InputFile', 0],
93
+ 'MC' : [2, 'MaxCount', 17],
94
+ 'MH' : [2, 'MatchHost', 1],
95
+ 'MO' : [2, 'Modules', 1],
96
+ 'PI' : [2, 'ParentIndex', 17],
97
+ 'PL' : [2, 'ProcessLimit', 17],
98
+ 'PO' : [2, 'Priority', 17],
99
+ 'PQ' : [2, 'PBSQueue', 0],
100
+ 'QS' : [2, 'QSubOptions', 0],
101
+ 'SN' : [2, 'Specialist', 1],
102
+ 'ST' : [2, 'Status', 0],
103
+ 'SZ' : [2, 'DataSize', 16],
104
+ 'TC' : [2, 'TryCount', 17],
105
+ 'WD' : [2, 'WorkDir', 0],
106
+ }
107
+
108
+ PgOPT.ALIAS = {
109
+ 'AN' : ['Action'],
110
+ 'BG' : ['b'],
111
+ 'CF' : ['Confirmation', 'ConfirmAction'],
112
+ 'CM' : ['CommandName'],
113
+ 'DL' : ['RM', 'Remove'],
114
+ 'DS' : ['Dsid', 'DatasetID'],
115
+ 'DV' : ['Delimiter', 'Separater'],
116
+ 'EV' : ['Envs'],
117
+ 'GZ' : ['GMT', 'GreenwichZone', 'UTC'],
118
+ 'MC' : ['MaximumCount', 'MaxTryCount'],
119
+ 'MH' : ['MatchHostname'],
120
+ 'NC' : ['NoRemoteCommand'],
121
+ 'MO' : ['Mods'],
122
+ 'PI' : ['ParentCheckIndex'],
123
+ 'QS' : ['PBSOptions'],
124
+ 'SO' : ['SetBatchOptions'],
125
+ 'SZ' : ['Size', "ProcSize"],
126
+ 'UL' : ['UnLock'],
127
+ 'WD' : ["WorkDirectory"],
128
+ 'WR' : ["WithRequest"],
129
+ 'WU' : ["WithUpdate"],
130
+ }
131
+
132
+ PgOPT.TBLHASH['dscheck'] = {
133
+ #SHORTNM KEYS(PgOPT.OPTS) DBFIELD
134
+ 'C' : ['CI', "cindex", 0],
135
+ 'O' : ['CM', "command", 1],
136
+ 'V' : ['AV', "argv", 1],
137
+ 'T' : ['DS', "dsid", 1],
138
+ 'A' : ['AN', "action", 1],
139
+ 'U' : ['ST', "status", 1],
140
+ 'P' : ['PQ', "pbsqueue", 1],
141
+ 'R' : ['PI', "pindex", 0],
142
+ 'B' : ['DF', "dflags", 0],
143
+ 'F' : ['FC', "fcount", 0],
144
+ 'J' : ['DC', "dcount", 0],
145
+ 'K' : ['TC', "tcount", 0],
146
+ 'L' : ['MC', "mcount", 0],
147
+ 'Z' : ['SZ', "size", 0],
148
+ 'D' : ['CD', "date", 1],
149
+ 'Y' : ['CT', "time", 1],
150
+ 'H' : ['HN', "hostname", 1],
151
+ 'N' : ['SN', "specialist", 1],
152
+ 'W' : ['WD', "workdir", 1],
153
+ 'M' : ['MO', "modules", 1],
154
+ 'I' : ['EV', "environments", 1],
155
+ 'Q' : ['QS', "qoptions", 1],
156
+ 'X' : ['AX', "argextra", -1],
157
+ 'E' : ['ER', "errmsg", -1],
158
+ }
159
+
160
+ PgOPT.TBLHASH['dsdaemon'] = {
161
+ #SHORTNM KEYS(PgOPT.OPTS) DBFIELD
162
+ 'I' : ['DI', "dindex", 0],
163
+ 'C' : ['CM', "command", 1],
164
+ 'H' : ['HN', "hostname", 1],
165
+ 'M' : ['MH', "matchhost", 1],
166
+ 'S' : ['SN', "specialist", 1],
167
+ 'P' : ['PL', "proclimit", 0],
168
+ 'O' : ['PO', "priority", 0],
169
+ }
170
+
171
+ CHKHOST = {
172
+ 'curhost' : PgLOG.get_host(1),
173
+ 'chkhost' : None,
174
+ 'hostcond' : None,
175
+ 'isbatch' : 0
176
+ }
177
+
178
+ PgOPT.PGOPT['dscheck'] = "COVTUPFJDNW" # default
179
+ PgOPT.PGOPT['chkall'] = "COVTAUPRBFJKLZDYHNWMIQXE" # default to all
180
+ PgOPT.PGOPT['dsdaemon'] = "ICHQSPO" # default to all
181
+ PgOPT.PGOPT['waitlimit'] = 280 # limit of C and P request checks at a time
182
+ PgOPT.PGOPT['totallimit'] = 380 # maximum number of checks can be started on PBS
183
+
184
+ PBSQUEUES = {'rda' : None, 'htc' : 'casper@casper-pbs'}
185
+ PBSTIMES = {'default' : 21600, 'rda' : PgLOG.PGLOG['PBSTIME'], 'htc' : 86400}
186
+ #DOPTHOSTS = {'rda-work' : None, 'PBS' : ['!subconv -Q']}
187
+ DOPTHOSTS = {'rda-work' : None, 'PBS' : None, 'cron' : None}
188
+ DSLMTS = {}
189
+ EMLMTS = {}
190
+
191
+ #
192
+ # get the maximum running time for batch processes
193
+ #
194
+ def max_batch_time(qname):
195
+
196
+ if CHKHOST['curhost'] == PgLOG.PGLOG['PBSNAME']:
197
+ if not (qname and qname in PBSTIMES): qname = 'default'
198
+ return PBSTIMES[qname]
199
+ else:
200
+ return 0
201
+
202
+ #
203
+ # check if enough information entered on command line and/or input file
204
+ # for given action(s)
205
+ #
206
+ def check_dscheck_options(cact, aname):
207
+
208
+ errmsg = [
209
+ "Option -DM(-DaemonMode) works with Action -PC(-ProcessCheck) only",
210
+ "Do not specify Check Index for Daemon Mode",
211
+ "Miss check index per Info option -CI(-CheckIndex)",
212
+ "Need Machine Hostname per -HN for new daemon control",
213
+ "Need Application command name per -CM for new daemon control",
214
+ "Must be {} to process Checks in daemon mode".format(PgLOG.PGLOG['RDAUSER']),
215
+ "Miss Command information per Info option -CM(-Command)",
216
+ ]
217
+ erridx = -1
218
+ PgOPT.set_uid(aname)
219
+
220
+ if 'CI' in PgOPT.params: validate_checks()
221
+ if 'DS' in PgOPT.params: validate_datasets()
222
+
223
+ if 'DM' in PgOPT.params:
224
+ if cact != "PC":
225
+ erridx = 0
226
+ elif PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']:
227
+ erridx = 5
228
+ elif 'CI' in PgOPT.params:
229
+ erridx = 1
230
+ elif cact == "DL":
231
+ if not ('CI' in PgOPT.params or 'DI' in PgOPT.params): erridx = 2
232
+ elif cact == 'SD':
233
+ validate_daemons()
234
+ if 'SD' in PgOPT.params:
235
+ if 'HN' not in PgOPT.params:
236
+ erridx = 3
237
+ elif 'CM' not in PgOPT.params:
238
+ erridx = 4
239
+ elif cact == "AC":
240
+ if 'CM' not in PgOPT.params:
241
+ erridx = 6
242
+ elif 'CI' not in PgOPT.params and (cact == "IC" or cact == "UL" and 'LL' not in PgOPT.params):
243
+ erridx = 2
244
+
245
+ if erridx >= 0: PgOPT.action_error(errmsg[erridx], cact)
246
+
247
+ if cact == "PC" or cact == 'UL':
248
+ if PgLOG.PGLOG['CURUID'] != PgOPT.params['LN']:
249
+ PgOPT.action_error("{}: cannot process Checks as {}".format(PgLOG.PGLOG['CURUID'], PgOPT.params['LN']), cact)
250
+ if 'LH' in PgOPT.params:
251
+ chkhost = PgLOG.get_short_host(PgOPT.params['LH'])
252
+ if not chkhost: chkhost = PgLOG.get_host(1)
253
+ CHKHOST['chkhost'] = CHKHOST['curhost'] = chkhost
254
+ if PgLOG.valid_batch_host(chkhost):
255
+ PgLOG.reset_batch_host(chkhost)
256
+ CHKHOST['isbatch'] = 1
257
+ CHKHOST['hostcond'] = "IN ('{}', '{}')".format(chkhost, PgLOG.PGLOG['HOSTNAME'])
258
+ else:
259
+ if PgUtil.pgcmp(chkhost, PgLOG.PGLOG['HOSTNAME'], 1):
260
+ PgOPT.action_error("{}: Cannot handle checks on {}".format(PgLOG.PGLOG['HOSTNAME'], chkhost), cact)
261
+ CHKHOST['hostcond'] = "= '{}'".format(chkhost)
262
+
263
+ if 'DM' in PgOPT.params:
264
+ if PgLOG.PGLOG['CHKHOSTS'] and PgLOG.PGLOG['CHKHOSTS'].find(PgLOG.PGLOG['HOSTNAME']) < 0:
265
+ PgOPT.action_error("Daemon mode can only be started on '{}'".format(PgLOG.PGLOG['CHKHOSTS']), cact)
266
+ if re.match(r'^(start|begin)$', PgOPT.params['DM'], re.I):
267
+ if not ('NC' in PgOPT.params or 'LH' in PgOPT.params): PgOPT.params['NC'] = 1
268
+ wtime = PgOPT.params['WI'] if 'WI' in PgOPT.params else 0
269
+ mtime = PgOPT.params['MT'] if 'MT' in PgOPT.params else 0
270
+ logon = PgOPT.params['LO'] if 'LO' in PgOPT.params else 0
271
+ PgSIG.start_daemon(aname, PgLOG.PGLOG['CURUID'], 1, wtime, logon, 0, mtime)
272
+ else:
273
+ PgSIG.signal_daemon(PgOPT.params['DM'], aname, PgOPT.params['LN'])
274
+ else:
275
+ if cact == "PC":
276
+ PgSIG.validate_single_process(aname, PgOPT.params['LN'], PgLOG.argv_to_string())
277
+ elif cact == "SO":
278
+ plimit = PgOPT.params['PL'][0] if 'PL' in PgOPT.params and PgOPT.params['PL'][0] > 0 else 1
279
+ PgSIG.validate_multiple_process(aname, plimit, PgOPT.params['LN'], PgLOG.argv_to_string())
280
+ wtime = PgOPT.params['WI'] if 'WI' in PgOPT.params else 30
281
+ logon = PgOPT.params['LO'] if 'LO' in PgOPT.params else 1
282
+ PgSIG.start_none_daemon(aname, cact, PgOPT.params['LN'], 1, wtime, logon)
283
+ if not ('CI' in PgOPT.params or 'DS' in PgOPT.params or PgOPT.params['LN'] == PgLOG.PGLOG['RDAUSER']):
284
+ PgOPT.set_default_value("SN", PgOPT.params['LN'])
285
+
286
+ # minimal wait interval in seconds for next check
287
+ PgOPT.PGOPT['minlimit'] = PgOPT.params['WI'] = PgSIG.PGSIG['WTIME']
288
+
289
+ #
290
+ # process counts of hosts in dsdaemon control records for given command and specialist
291
+ #
292
+ def get_process_limits(cmd, specialist, logact = 0):
293
+
294
+ ckey = "{}-{}".format(cmd, specialist)
295
+ if ckey in PLIMITS: return PLIMITS[ckey]
296
+
297
+ cnd = "command = '{}' AND specialist = '{}'".format(cmd, specialist)
298
+ if CHKHOST['chkhost']:
299
+ ecnd = " AND hostname = '{}'".format(CHKHOST['chkhost'])
300
+ hstr = " for " + CHKHOST['chkhost']
301
+ else:
302
+ ecnd = " ORDER by priority, hostname"
303
+ hstr = ""
304
+
305
+ pgrecs = PgDBI.pgmget("dsdaemon", "hostname, bqueues, matchhost, proclimit, priority", cnd + ecnd, logact)
306
+ if not pgrecs and PgDBI.pgget("dsdaemon", "", cnd, logact) == 0:
307
+ pgrecs = PgDBI.pgmget("dsdaemon", "hostname, matchhost, proclimit, priority",
308
+ "command = 'ALL' AND specialist = '{}'{}".format(specialist, ecnd), logact)
309
+
310
+ cnt = (len(pgrecs['hostname']) if pgrecs else 0)
311
+ if cnt == 0:
312
+ PLIMITS[ckey] = 0
313
+ return 0
314
+
315
+ j = 0
316
+ PLIMITS[ckey] = {'host' : [], 'priority' : [], 'acnt' : [], 'match' : [], 'pcnd' : []}
317
+ for i in range(cnt):
318
+ if pgrecs['proclimit'][i] <= 0: continue
319
+ host = pgrecs['hostname'][i]
320
+ PLIMITS[ckey]['host'].append(host)
321
+ PLIMITS[ckey]['priority'].append(pgrecs['priority'][i])
322
+ PLIMITS[ckey]['acnt'].append(pgrecs['proclimit'][i])
323
+ PLIMITS[ckey]['match'].append(pgrecs['matchhost'][i])
324
+ PLIMITS[ckey]['pcnd'].append("{} AND pid > 0 AND lockhost = '{}'".format(cnd, host))
325
+
326
+ if not PLIMITS[ckey]['host']: PLIMITS[ckey] = 0
327
+ return PLIMITS[ckey]
328
+
329
+ #
330
+ # find a available host name to process a dscheck record
331
+ #
332
+ def get_process_host(limits, hosts, cmd, act, logact = 0):
333
+
334
+ cnt = len(limits['host'])
335
+ for i in range(cnt):
336
+ host = limits['host'][i]
337
+ if host in DWHOSTS: continue # the host is down
338
+ if limits['acnt'][i] > PgDBI.pgget("dscheck", "", limits['pcnd'][i], logact):
339
+ if cmd == 'dsrqst' and act == 'PR':
340
+ mflag = 'G'
341
+ else:
342
+ mflag = limits['match'][i]
343
+ if PgLOG.check_process_host(hosts, host, mflag): return i
344
+
345
+ return -1
346
+
347
+ #
348
+ # reset the cached process limits
349
+ #
350
+ def reset_process_limits():
351
+
352
+ global LOOP, DWHOSTS, PLIMITS
353
+
354
+ if LOOP%3 == 0:
355
+ PLIMITS = {} # clean the cache for available processes on hosts
356
+
357
+ if LOOP%10 == 0:
358
+ DWHOSTS = {}
359
+ PgLOG.set_pbs_host(None, 1)
360
+
361
+ LOOP += 1
362
+
363
+ #
364
+ # start dschecks
365
+ #
366
+ def start_dschecks(cnd, logact = 0):
367
+
368
+ rcnt = 0
369
+ check_dscheck_locks(cnd, logact)
370
+ if not CHKHOST['chkhost']: email_dschecks(cnd, logact)
371
+ purge_dschecks(cnd, logact)
372
+
373
+ if 'NC' in PgOPT.params or not CHKHOST['chkhost']: return 0
374
+ if CHKHOST['isbatch'] and 'CP' in PgOPT.params: check_dscheck_pends(cnd, logact)
375
+ # set_dscheck_options(CHKHOST['chkhost'], cnd, logact)
376
+ reset_process_limits()
377
+ if CHKHOST['isbatch']: rcnt = PgDBI.pgget("dscheck", "", "lockhost = '{}' AND pid > 0".format(PgLOG.PGLOG['PBSNAME']), logact)
378
+
379
+ cnd += "pid = 0 AND status <> 'D' AND einfo IS NULL AND (qoptions IS NULL OR LEFT(qoptions, 1) != '!') ORDER by hostname DESC, cindex"
380
+ pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact)
381
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
382
+ pcnt = 0
383
+ for i in range(cnt):
384
+ if (pcnt + rcnt) > PgOPT.PGOPT['totallimit']: break
385
+ pgrec = PgUtil.onerecord(pgrecs, i)
386
+ if(pgrec['fcount'] and pgrec['dcount'] >= pgrec['fcount'] or
387
+ pgrec['tcount'] and pgrec['tcount'] >= pgrec['mcount'] or
388
+ pgrec['pindex'] and PgDBI.pgget("dscheck", "", "cindex = {} AND status <> 'D'".format(pgrec['pindex']), logact)):
389
+ continue
390
+ if pgrec['dflags'] and PgFile.check_storage_dflags(pgrec['dflags'], pgrec, logact): continue
391
+ ret = start_one_dscheck(pgrec, logact)
392
+ if ret > 0: pcnt += ret
393
+
394
+ if cnt > 1: PgLOG.pglog("{} of {} DSCHECK records started on {}".format(pcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
395
+ return pcnt
396
+
397
+ #
398
+ # check long locked dschecks and unlock them if the processes are dead
399
+ #
400
+ def check_dscheck_locks(cnd, logact = 0):
401
+
402
+ global RUNPIDS
403
+ ltime = int(time.time())
404
+ lochost = PgLOG.PGLOG['HOSTNAME']
405
+ cnd += "pid > 0 AND "
406
+ dtime = ltime - PgSIG.PGSIG['DTIME']
407
+ ctime = ltime - PgSIG.PGSIG['CTIME']
408
+ rtime = ltime - PgSIG.PGSIG['RTIME']
409
+ if CHKHOST['chkhost']:
410
+ cnd += "lockhost {} AND (stttime = 0 OR chktime < {})".format(CHKHOST['hostcond'], dtime)
411
+ else:
412
+ cnd += "chktime > 0 AND (chktime < {} OR chktime < {} AND lockhost = '{}' OR chktime < {} AND lockhost = 'rda_config')".format(ctime, dtime, lochost, rtime)
413
+
414
+ pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact)
415
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
416
+ lcnt = 0
417
+ for i in range(cnt):
418
+ pgrec = PgUtil.onerecord(pgrecs, i)
419
+ lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
420
+ cidx = pgrec['cindex']
421
+ if CHKHOST['chkhost'] or pgrec['lockhost'] == lochost:
422
+ spid = "{}{}".format(pgrec['lockhost'], pgrec['pid'])
423
+ if spid not in RUNPIDS and PgLock.lock_dscheck(cidx, 0) > 0:
424
+ PgLOG.pglog("CHK{}: unlocked {}".format(cidx, lmsg), PgLOG.LOGWRN)
425
+ lcnt += 1
426
+ else:
427
+ update_dscheck_time(pgrec, ltime, logact)
428
+ elif not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config':
429
+ record = {'pid' : 0, 'lockhost' : ''}
430
+ if PgDBI.pgupdt("dscheck", record, "cindex = {} AND pid = {}".format(cidx, pgrec['pid']), logact):
431
+ PgLOG.pglog("CHK{}: unlocked {}".format(cidx, lmsg), PgLOG.LOGWRN)
432
+ lcnt += 1
433
+ elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
434
+ PgLOG.pglog("Chk{}: time NOT updated for {} of {}".format(cidx, dscheck_runtime(pgrec['chktime'], ltime), lmsg), logact)
435
+
436
+ if cnt > 0:
437
+ s = 's' if cnt > 1 else ''
438
+ PgLOG.pglog("{} of {} DSCHECK record{} unlocked on {}".format(lcnt, cnt, s, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
439
+ RUNPIDS = {}
440
+
441
+ #
442
+ # check long pending dschecks and kill them
443
+ #
444
+ def check_dscheck_pends(cnd, logact = 0):
445
+
446
+ ltime = int(time.time()) - PgSIG.PGSIG['RTIME']
447
+ cnd += "pid > 0 AND "
448
+ cnd += "lockhost {} AND status = 'P' AND subtime > 0 AND subtime < {}".format(CHKHOST['hostcond'], ltime)
449
+ pgrecs = PgDBI.pgmget("dscheck", "pid", cnd, logact)
450
+ cnt = (len(pgrecs['pid']) if pgrecs else 0)
451
+
452
+ pcnt = 0
453
+ for i in range(cnt):
454
+ pid = pgrecs['pid'][i]
455
+ info = PgSIG.get_pbs_info(pid, 0, logact)
456
+ if info and info['State'] == 'Q':
457
+ PgLOG.pgsystem("rdakill -h {} -p {}".format(PgLOG.PGLOG['PBSNAME'], pid), PgLOG.LOGWRN, 5)
458
+ pcnt += 1
459
+
460
+ if cnt > 0:
461
+ s = 's' if cnt > 1 else ''
462
+ PgLOG.pglog("{} of {} Pending DSCHECK record{} stopped on {}".format(pcnt, cnt, s, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
463
+
464
+ #
465
+ # update dscheck time in case in pending status or
466
+ # the command does not updateupdates not on time by itself
467
+ #
468
+ def update_dscheck_time(pgrec, ltime, logact = 0):
469
+
470
+ record = {'chktime' : ltime}
471
+ if(CHKHOST['chkhost'] and CHKHOST['chkhost'] == PgLOG.PGLOG['PBSNAME']
472
+ and pgrec['lockhost'] == PgLOG.PGLOG['PBSNAME']):
473
+ info = PgSIG.get_pbs_info(pgrec['pid'], 0, logact)
474
+ if info:
475
+ stat = info['State']
476
+ if stat == 'Q': stat = 'P'
477
+ if stat != pgrec['status']: record['status'] = stat
478
+ else:
479
+ if pgrec['lockhost'] != PgLOG.PGLOG['HOSTNAME']: return # connot update dscheck time
480
+ if PgSIG.check_host_pid(pgrec['lockhost'], pgrec['pid']):
481
+ if pgrec['status'] != "R": record['status'] = "R"
482
+ else:
483
+ if pgrec['status'] == "R": record['status'] = "F"
484
+
485
+ if pgrec['stttime']:
486
+ if pgrec['command'] == "dsrqst" and pgrec['oindex']:
487
+ (record['fcount'], record['dcount'], record['size']) = PgCMD.get_dsrqst_counts(pgrec, logact)
488
+
489
+ elif 'status' in record and record['status'] == 'R':
490
+ record['stttime'] = ltime
491
+
492
+ cnd = "cindex = {} AND pid = {}".format(pgrec['cindex'], pgrec['pid'])
493
+ if PgDBI.pgget("dscheck", "", "{} AND chktime = {}".format(cnd, pgrec['chktime']), logact):
494
+ # update only the chktime is not changed yet
495
+ PgDBI.pgupdt("dscheck", record, cnd, logact)
496
+
497
+ #
498
+ # return a running time string for given start and end times of the process
499
+ #
500
+ def dscheck_runtime(start, end = None):
501
+
502
+ stime = ''
503
+
504
+ if start:
505
+ if not end: end = int(time.time())
506
+ rtime = (end - start)
507
+ if rtime >= 60:
508
+ stime = PgLOG.seconds_to_string_time(rtime)
509
+
510
+ return stime
511
+
512
+ #
513
+ # check dschecks and purge them if done already
514
+ #
515
+ def purge_dschecks(cnd, logact = 0):
516
+
517
+ cnd += "pid = 0 AND einfo IS NULL AND bid "
518
+ cnd += ('> 0' if CHKHOST['curhost'] == PgLOG.PGLOG['PGBATCH'] else '= 0')
519
+ pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact)
520
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
521
+ ctime = int(time.time()) - PgSIG.PGSIG['CTIME']
522
+ dcnt = 0
523
+ for i in range(cnt):
524
+ pgrec = PgUtil.onerecord(pgrecs, i)
525
+ if(pgrec['status'] == "D" or
526
+ pgrec['status'] == "R" and pgrec['chktime'] < ctime or
527
+ pgrec['fcount'] and pgrec['dcount'] >= pgrec['fcount'] or
528
+ pgrec['tcount'] and pgrec['tcount'] >= pgrec['mcount']):
529
+ if PgLock.lock_dscheck(pgrec['cindex'], 1) <= 0: continue
530
+ dcnt += PgCMD.delete_dscheck(pgrec, None, logact)
531
+
532
+ if dcnt and cnt > 1: PgLOG.pglog("{} of {} DSCHECK records purged on {}".format(dcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
533
+
534
+ #
535
+ # check dschecks and send saved email
536
+ #
537
+ def email_dschecks(cnd, logact = 0):
538
+
539
+ emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
540
+ if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
541
+ cnd += "pid = 0 AND einfo IS NOT NULL"
542
+ pgrecs = PgDBI.pgmget("dscheck", "cindex", cnd, logact)
543
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
544
+ ecnt = 0
545
+ for i in range(cnt):
546
+ cidx = pgrecs['cindex'][i]
547
+ if PgLock.lock_dscheck(cidx, 1) <= 0: continue
548
+ pgrec = PgDBI.pgget("dscheck", "*", "cindex = {}".format(cidx), logact)
549
+ einfo = pgrec['einfo'] if pgrec else None
550
+ if einfo:
551
+ if pgrec['dflags'] and pgrec['tcount'] and pgrec['tcount'] < pgrec['mcount']:
552
+ msgary = PgFile.check_storage_dflags(pgrec['dflags'], pgrec, logact)
553
+ if msgary:
554
+ einfo = "The Check will be resubmitted after the down storage Up again:\n{}\n{}".format("\n".join(msgary), einfo)
555
+
556
+ sent = 1 if(PgLOG.send_customized_email("Chk{}".format(cidx), einfo, emlact) and
557
+ PgDBI.pgexec("UPDATE dscheck set einfo = NULL WHERE cindex = {}".format(cidx), logact)) else -1
558
+ else:
559
+ sent = 0
560
+
561
+ PgLock.lock_dscheck(cidx, 0)
562
+ if sent == -1: break
563
+ ecnt += sent
564
+
565
+ if ecnt and cnt > 1: PgLOG.pglog("{} of {} DSCHECK emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
566
+
567
+ #
568
+ # start a dscheck job for given dscheck record
569
+ #
570
+ def start_one_dscheck(pgrec, logact = 0):
571
+
572
+ cidx = pgrec['cindex']
573
+ specialist = pgrec['specialist']
574
+ host = CHKHOST['chkhost']
575
+ dlimit = get_system_down_limit(host, logact)
576
+ if dlimit < 0:
577
+ PgLock.lock_dscheck(cidx, 0)
578
+ return 0
579
+
580
+ limits = get_process_limits(pgrec['command'], specialist, logact)
581
+ if not limits:
582
+ if pgrec['hostname'] and (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
583
+ host = PgLOG.get_host(1)
584
+ if PgLOG.check_process_host(pgrec['hostname'], host, 'I'):
585
+ PgLOG.pglog("Chk{}: {} is not configured properly to run on {} for {}".format(cidx, pgrec['command'], host, specialist), logact)
586
+ return 0
587
+
588
+ lidx = get_process_host(limits, pgrec['hostname'], pgrec['command'], pgrec['action'], logact)
589
+ if lidx < 0 or skip_dscheck_record(pgrec, host, logact): return 0
590
+ cmd = "pgstart_{} ".format(specialist) if PgLOG.PGLOG['CURUID'] == PgLOG.PGLOG['RDAUSER'] else ""
591
+ if not PgUtil.pgcmp(host, PgLOG.PGLOG['PBSNAME'], 1):
592
+ if reach_dataset_limit(pgrec): return 0
593
+ cmd += get_specialist_shell(specialist) + 'qsub '
594
+ options = get_pbs_options(pgrec, dlimit, logact)
595
+ if options:
596
+ cmd += options
597
+ elif pgrec['status'] == 'E':
598
+ return 0
599
+ bstr = " in {} Queue {} ".format(PgLOG.PGLOG['PBSNAME'], pgrec['pbsqueue'])
600
+ else:
601
+ bstr = ""
602
+ cmd += "rdasub -bg "
603
+
604
+ if pgrec['workdir']:
605
+ if pgrec['workdir'].find('$') > -1:
606
+ cmd += "-cwd '{}' ".format(pgrec['workdir'])
607
+ else:
608
+ cmd += "-cwd {} ".format(pgrec['workdir'])
609
+ else:
610
+ cmd += "-cwd '$HOME' "
611
+
612
+ chkcmd = pgrec['command']
613
+ cmd += "-cmd " + chkcmd
614
+ if pgrec['argv']:
615
+ argv = pgrec['argv']
616
+ if pgrec['argextra']: argv += pgrec['argextra']
617
+ cmd += ' ' + argv + PgCMD.append_delayed_mode(chkcmd, argv)
618
+ chkcmd += ' ' + argv
619
+
620
+ PgLOG.pglog("Chk{}: issues '{}' onto {} for {}".format(cidx, chkcmd, host, pgrec['specialist']), PgLOG.LOGWRN)
621
+ PgLOG.PGLOG['ERR2STD'] = ['chmod: changing']
622
+ cstr = PgLOG.pgsystem(cmd, logact&(~PgLOG.EXITLG), 278) # 2+4+16+256
623
+ PgLOG.PGLOG['ERR2STD'] = []
624
+ pid = 0
625
+ if cstr:
626
+ lines = cstr.split('\n')
627
+ for line in lines:
628
+ if not line: continue
629
+ ms = re.match(r'^Job <(\d+)> is submitted', line)
630
+ if ms:
631
+ pid = int(ms.group(1))
632
+ break
633
+ ms = re.match(r'^(\d+)\.casper-pbs', line)
634
+ if ms:
635
+ pid = int(ms.group(1))
636
+ break
637
+ ms = re.match(r'^Submitted batch job (\d+)', line)
638
+ if ms:
639
+ pid = int(ms.group(1))
640
+ break
641
+ if not pid:
642
+ if PgLOG.PGLOG['SYSERR']:
643
+ if PgLOG.PGLOG['SYSERR'].find('Job not submitted') > -1:
644
+ cstr = "submit job"
645
+ elif PgLOG.PGLOG['SYSERR'].find('working directory') > -1:
646
+ cstr = "change working directory"
647
+ else:
648
+ cstr = "execute"
649
+ PgLock.lock_dscheck(cidx, 0)
650
+ return PgLOG.pglog("Chk{}: {} Failed {} on {}{}{}\n{}".format(cidx, PgCMD.get_command_info(pgrec),
651
+ cstr, PgLOG.PGLOG['HOSTNAME'], bstr, PgUtil.curtime(1), PgLOG.PGLOG['SYSERR']),
652
+ PgLOG.LOGWRN|PgLOG.FRCLOG)
653
+
654
+ PgLOG.pglog("Chk{}: {} started on {}{}{}".format(cidx, PgCMD.get_command_info(pgrec),
655
+ PgLOG.PGLOG['HOSTNAME'], bstr, PgUtil.curtime(1)), PgLOG.LOGWRN|PgLOG.FRCLOG)
656
+ return fill_dscheck_info(pgrec, pid, host, logact)
657
+
658
+ #
659
+ # get qsub shell command
660
+ #
661
+ def get_specialist_shell(specialist):
662
+
663
+ if specialist not in SHELLS:
664
+ pgrec = PgDBI.pgget("dssgrp", "shell_flag", "logname = '{}'".format(specialist))
665
+ if pgrec and pgrec['shell_flag'] == 'B':
666
+ SHELLS[specialist] = 'bash'
667
+ else:
668
+ SHELLS[specialist] = 'tcsh'
669
+
670
+ return SHELLS[specialist]
671
+
672
+ #
673
+ # get and cache process limit for a given dsid
674
+ #
675
+ def get_dataset_limit(dsid):
676
+
677
+ if dsid in DSLMTS: return DSLMTS[dsid]
678
+
679
+ pgrec = PgDBI.pgget('dslimit', 'processlimit', "dsid = '{}'".format(dsid))
680
+ dslmt = 45
681
+ if pgrec:
682
+ dslmt = pgrec['processlimit']
683
+ elif 'default' in DSLMTS:
684
+ dslmt = DSLMTS['default']
685
+ else:
686
+ pgrec = PgDBI.pgget('dslimit', 'processlimit', "dsid = 'all'")
687
+ if pgrec: DSLMTS['default'] = dslmt = pgrec['processlimit']
688
+ DSLMTS[dsid] = dslmt
689
+
690
+ return DSLMTS[dsid]
691
+
692
+ #
693
+ # check if reaching running limit for a specified dataset
694
+ #
695
+ def reach_dataset_limit(pgrec):
696
+
697
+ if pgrec['command'] != 'dsrqst': return 0
698
+ dsid = pgrec['dsid']
699
+ if dsid and pgrec['action'] in ['BR', 'SP', 'PP']:
700
+ dslmt = get_dataset_limit(dsid)
701
+ lmt = PgDBI.pgget('dscheck', '', "dsid = '{}' AND status <> 'C' AND action IN ('BR', 'SP', 'PP')".format(dsid))
702
+ if lmt > dslmt:
703
+ PgLock.lock_dscheck(pgrec['cindex'], 0)
704
+ return 1
705
+ return 0
706
+
707
+ #
708
+ # get and cache request limit for a given given email
709
+ #
710
+ def get_user_limit(email):
711
+
712
+ if email in EMLMTS: return EMLMTS[email]
713
+
714
+ emlmts = [20, 10, 36]
715
+ flds = 'maxrqstcheck, maxpartcheck'
716
+ pgrec = PgDBI.pgget('userlimit', flds, "email = '{}'".format(email))
717
+ if pgrec:
718
+ emlmts = [pgrec['maxrqstcheck'], pgrec['maxpartcheck']]
719
+ elif 'default' in EMLMTS:
720
+ emlmts = EMLMTS['default']
721
+ else:
722
+ pgrec = PgDBI.pgget('userlimit', flds, "email = 'all'".format(email))
723
+ if pgrec:
724
+ EMLMTS['default'] = emlmts = [pgrec['maxrqstcheck'], pgrec['maxpartcheck']]
725
+ EMLMTS[email] = emlmts.copy()
726
+
727
+ return EMLMTS[email]
728
+
729
+ #
730
+ # check if reaching running limit for a specified dataset
731
+ #
732
+ def reach_dataset_limit(pgrec):
733
+
734
+ if pgrec['command'] != 'dsrqst': return 0
735
+ dsid = pgrec['dsid']
736
+ if dsid and pgrec['action'] in ['BR', 'SP', 'PP']:
737
+ dslmt = get_dataset_limit(dsid)
738
+ lmt = PgDBI.pgget('dscheck', '', "dsid = '{}' AND status <> 'C' AND action IN ('BR', 'SP', 'PP')".format(dsid))
739
+ if lmt > dslmt:
740
+ PgLock.lock_dscheck(pgrec['cindex'], 0)
741
+ return 1
742
+ return 0
743
+
744
+ #
745
+ # check and return the time limit in seconds before a planned system down for given hostname
746
+ #
747
+ def get_system_down_limit(hostname, logact = 0):
748
+
749
+ dlimit = 0
750
+ down = PgDBI.get_system_downs(hostname, logact)
751
+ if down['start']:
752
+ dlimit = down['start'] - down['curtime'] - 2*PgSIG.PGSIG['CTIME']
753
+ if dlimit < PgOPT.PGOPT['minlimit']: dlimit = -1
754
+
755
+ return dlimit
756
+
757
+ #
758
+ # check and get the option string for submit a PBS job
759
+ #
760
+ def get_pbs_options(pgrec, limit = 0, logact = 0):
761
+
762
+ opttime = 0
763
+ qoptions = build_dscheck_options(pgrec, 'qoptions', 'PBS')
764
+ qname = get_pbsqueue_option(pgrec)
765
+ maxtime = max_batch_time(qname)
766
+ runtime = PBSTIMES['default']
767
+
768
+ if qoptions:
769
+ ms = re.match(r'^(-.+)/(-.+)$', qoptions)
770
+ if ms: qoptions = ms.group(2 if pgrec['otype'] == 'P' else 1)
771
+
772
+ ms = re.search(r'-l\s+\S*walltime=([\d:-]+)', qoptions)
773
+ if ms:
774
+ optval = ms.group(1)
775
+ vcs = optval.split(':')
776
+ vcl = len(vcs)
777
+ vds = vcs[0].split('-')
778
+ opttime = 3600*int(vds[0])
779
+ if len(vds) > 1:
780
+ opttime *= 24
781
+ opttime += 3600*int(vds[1])
782
+ if vcl > 1:
783
+ opttime += 60*int(vcs[1])
784
+ if vcl > 2: opttime += int(vcs[2])
785
+ runtime = opttime
786
+ qoptions += ' '
787
+
788
+ if limit > 0 and runtime > limit: runtime = limit
789
+ if runtime > maxtime: runtime = maxtime
790
+ if runtime != opttime and runtime != PBSTIMES['default']:
791
+ optval = "walltime={}:{:02}:{:02}".format(int(runtime/3600), int(runtime/60)%60, runtime%60)
792
+ if opttime:
793
+ if runtime < opttime: qoptions = re.sub(r'walltime=[\d:-]+', optval, qoptions)
794
+ elif qoptions.find('-l ') > -1:
795
+ qoptions = re.sub(r'-l\s+', "-l {},".format(optval), qoptions)
796
+ else:
797
+ qoptions += "-l " + optval
798
+
799
+ if pgrec['modules']:
800
+ options = build_dscheck_options(pgrec, 'modules', 'PBS')
801
+ if options: qoptions += "-mod {} ".format(options)
802
+ if pgrec['environments']:
803
+ options = build_dscheck_options(pgrec, 'environments', 'PBS')
804
+ if options: qoptions += "-env {} ".format(options)
805
+
806
+ if qname: qoptions += "-q {} ".format(qname)
807
+
808
+ return qoptions
809
+
810
+ #
811
+ # check rda queue for pending jobs to switch PBS queue if needed
812
+ #
813
+ def get_pbsqueue_option(pgrec):
814
+
815
+ cidx = pgrec['cindex']
816
+ for pname in PBSQUEUES:
817
+ if PBSQUEUES[pname]:
818
+ aname = pname
819
+ else:
820
+ qname = pname
821
+ pcnt = PgDBI.pgget("dscheck", '', "status = 'P' AND pbsqueue = '{}'".format(qname))
822
+ if pcnt > 1: qname = aname
823
+ if pgrec['pbsqueue'] != qname:
824
+ PgDBI.pgexec("UPDATE dscheck SET pbsqueue = '{}' WHERE cindex = {}".format(qname, cidx))
825
+ pgrec['pbsqueue'] = qname
826
+
827
+ return PBSQUEUES[qname]
828
+
829
+ #
830
+ # build individual option string for given option name
831
+ #
832
+ def build_dscheck_options(pgcheck, optname, optstr = None):
833
+
834
+ options = pgcheck[optname]
835
+ if not options or options == 'default': return ''
836
+ if not re.match(r'^!', options): return options
837
+ cidx = pgcheck['cindex']
838
+ # reget the option field to see if it is processed
839
+ pgrec = PgDBI.pgget('dscheck', optname, 'cindex = {}'.format(cidx))
840
+ if not pgrec or options != pgrec[optname]: return options
841
+
842
+ record = {}
843
+ errmsg = ''
844
+ record[optname] = options = PgCMD.get_dynamic_options(options[1:], pgcheck['oindex'], pgcheck['otype'])
845
+ if not options and PgLOG.PGLOG['SYSERR']:
846
+ record['status'] = pgcheck['status'] = 'E'
847
+ record['pid'] = 0
848
+ record['tcount'] = pgcheck['tcount'] + 1
849
+ if not optstr: optstr = optname.capitalize()
850
+ errmsg = "Chk{}: Fail to build {} Options, {}".format(cidx, optstr, PgLOG.PGLOG['SYSERR'])
851
+ PgDBI.pgupdt("dscheck", record, "cindex = {}".format(cidx))
852
+ if errmsg:
853
+ pgrqst = None
854
+ if pgcheck['otype'] == 'R':
855
+ ridx = pgcheck['oindex']
856
+ pgrqst = PgDBI.pgget('dsrqst', '*', 'rindex = {}'.format(ridx))
857
+ if pgrqst:
858
+ record = {}
859
+ record['status'] = PgOPT.send_request_email_notice(pgrqst, errmsg, 0, 'E')
860
+ record['ecount'] = pgrqst['ecount'] + 1
861
+ PgDBI.pgupdt("dsrqst", record, "rindex = {}".format(ridx), PgOPT.PGOPT['errlog'])
862
+ errmsg = ''
863
+ elif pgcheck['otype'] == 'P':
864
+ pidx = pgcheck['oindex']
865
+ pgpart = PgDBI.pgget('ptrqst', 'rindex', 'pindex = {}'.format(pidx))
866
+ if pgpart:
867
+ PgDBI.pgexec("UPDATE ptrqst SET status = 'E' WHERE pindex = {}".format(pidx))
868
+ ridx = pgpart['rindex']
869
+ pgrqst = PgDBI.pgget('dsrqst', '*', 'rindex = {}'.format(ridx))
870
+ if pgrqst and pgrqst['status'] != 'E':
871
+ record = {}
872
+ record['status'] = PgOPT.send_request_email_notice(pgrqst, errmsg, 0, 'E')
873
+ record['ecount'] = pgrqst['ecount'] + 1
874
+ PgDBI.pgupdt("dsrqst", record, "rindex = {}".format(ridx), PgOPT.PGOPT['errlog'])
875
+ errmsg = ''
876
+ if errmsg: PgLOG.pglog(errmsg, PgOPT.PGOPT['errlog'])
877
+ return options
878
+
879
+ #
880
+ # fill up dscheck record in case the command does not do it itself
881
+ #
882
+ def fill_dscheck_info(ckrec, pid, host, logact = 0):
883
+
884
+ chkcnd = "cindex = {}".format(ckrec['cindex'])
885
+ PgDBI.pgexec("UPDATE dscheck SET tcount = tcount+1 WHERE " + chkcnd, logact)
886
+ if pid and PgLock.lock_host_dscheck(ckrec['cindex'], pid, host, logact) <= 0: return 1 # under processing
887
+
888
+ record = {}
889
+ stat = 'R'
890
+ if pid:
891
+ record['pid'] = pid
892
+ if host == PgLOG.PGLOG['PBSNAME']:
893
+ info = PgSIG.get_pbs_info(pid, 0, logact, 2)
894
+ if info:
895
+ stat = info['State']
896
+ if stat == 'Q': stat = 'P'
897
+ else:
898
+ record['runhost'] = PgLOG.PGLOG['HOSTNAME']
899
+ record['bid'] = 0
900
+ else:
901
+ stat = 'F'
902
+ record['status'] = stat
903
+
904
+ record['stttime'] = record['subtime'] = record['chktime'] = int(time.time())
905
+ pgrec = PgDBI.pgget("dscheck", "status, stttime", chkcnd, logact)
906
+ if not pgrec: return 0
907
+ if pgrec['status'] != ckrec['status'] or pgrec['stttime'] > ckrec['stttime']: return 1
908
+ if not pid and PgLock.lock_dscheck(ckrec['cindex'], 0) <= 0: return 1
909
+
910
+ return PgDBI.pgupdt("dscheck", record, chkcnd, logact)
911
+
912
+ #
913
+ # return 1 to skip running if the dscheck record is not ready; 0 otherwise
914
+ #
915
+ def skip_dscheck_record(pgrec, host, logact = 0):
916
+
917
+ workdir = pgrec['workdir']
918
+ if workdir and workdir.find('$') > -1: workdir = ''
919
+
920
+ if PgFile.check_host_down(workdir, host, logact): return 1
921
+ if pgrec['command'] == "dsrqst":
922
+ if PgFile.check_host_down(PgLOG.PGLOG['RQSTHOME'], host, logact): return 1
923
+ elif pgrec['command'] == "dsupdt" or pgrec['command'] == "dsarch":
924
+ if PgFile.check_host_down(PgLOG.PGLOG['DSDHOME'], host, logact): return 1
925
+
926
+ newrec = PgDBI.pgget("dscheck", "pid, status, stttime, tcount", "cindex = {}".format(pgrec['cindex']), logact)
927
+ if(not newrec or newrec['pid'] > 0 or newrec['status'] != pgrec['status'] or
928
+ newrec['stttime'] > pgrec['stttime'] or newrec['tcount'] > pgrec['tcount']): return 1
929
+ if PgLock.lock_dscheck(pgrec['cindex'], 1) <= 0: return 1
930
+
931
+ if pgrec['subtime'] or pgrec['stttime']:
932
+ newrec = {'stttime' : 0, 'subtime' : 0, 'runhost' : '', 'bid' : 0}
933
+ (newrec['ttltime'], newrec['quetime']) = PgCMD.get_dscheck_runtime(pgrec)
934
+ if not PgDBI.pgupdt("dscheck", newrec, "cindex = {}".format(pgrec['cindex']), logact): return 1
935
+
936
+ return 0
937
+
938
+ #
939
+ # start recording Queued reuqests to checks
940
+ #
941
+ def start_dsrqsts(cnd, logact = 0):
942
+
943
+ check_dsrqst_locks(cnd, logact)
944
+ if CHKHOST['chkhost']: return 1
945
+ email_dsrqsts(cnd, logact)
946
+ purge_dsrqsts(cnd, logact)
947
+ rcnd = cnd
948
+ rcnd += ("status = 'Q' AND rqsttype <> 'C' AND (pid = 0 OR pid < ptcount) AND " +
949
+ "einfo IS NULL ORDER BY priority, rindex")
950
+ pgrecs = PgDBI.pgmget("dsrqst", "*", rcnd, logact)
951
+ cnt = (len(pgrecs['rindex']) if pgrecs else 0)
952
+ ccnt = PgDBI.pgget("dscheck", '', "status = 'C'", logact)
953
+ pcnt = PgDBI.pgget("dscheck", '', "status = 'P'", logact)
954
+ if (ccnt+pcnt) > PgOPT.PGOPT['waitlimit']:
955
+ if cnt: PgLOG.pglog("{}/{} Checks are Waiting/Pending; Add new dscheck records {} later".format(ccnt, pcnt, PgLOG.PGLOG['HOSTNAME']),
956
+ PgLOG.LOGWRN|PgLOG.FRCLOG)
957
+ rcnt = PgOPT.PGOPT['waitlimit']-ccnt-pcnt
958
+ if cnt == 0:
959
+ acnt = 0
960
+ cnts = start_dsrqst_partitions(None, rcnt, logact)
961
+ rcnt = cnts[0]
962
+ pcnt = cnts[1]
963
+ else:
964
+ tcnt = cnt
965
+ if cnt > rcnt: cnt = rcnt
966
+ if cnt > 1: PgLOG.pglog("Try to add dschecks for {} DSRQST records on {}".format(cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
967
+
968
+ i = acnt = ccnt = pcnt = rcnt = 0
969
+ while i < tcnt and ccnt < cnt:
970
+ pgrec = PgUtil.onerecord(pgrecs, i)
971
+ i += 1
972
+ if pgrec['ptcount'] == 0 and validate_dsrqst_partitions(pgrec, logact):
973
+ acnt += add_dsrqst_partitions(pgrec, logact)
974
+ elif pgrec['ptcount'] < 2:
975
+ rcnt += start_one_dsrqst(pgrec, logact)
976
+ else:
977
+ cnts = start_dsrqst_partitions(pgrec, (cnt-ccnt), logact)
978
+ rcnt += cnts[0]
979
+ pcnt += cnts[1]
980
+ ccnt += (acnt+pcnt+rcnt)
981
+
982
+ if rcnt > 1: PgLOG.pglog("build {} requests on {}".format(rcnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
983
+ if pcnt > 1: PgLOG.pglog("build {} request partitions on {}".format(pcnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
984
+ if acnt > 1: PgLOG.pglog("Add partitions to {} requests on {}".format(acnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
985
+
986
+ return rcnt
987
+
988
+ #
989
+ # validate a given request if ok to do partitions
990
+ #
991
+ def validate_dsrqst_partitions(pgrec, logact = 0):
992
+
993
+ pgctl = PgCMD.get_dsrqst_control(pgrec, logact)
994
+ if pgctl and (pgctl['ptlimit'] or pgctl['ptsize']): return True
995
+
996
+ record = {'ptcount' : 1}
997
+ pgrec['ptcount'] = 1
998
+ if pgrec['ptlimit']: pgrec['ptlimit'] = record['ptlimit'] = 0
999
+ if pgrec['ptsize']: pgrec['ptsize'] = record['ptsize'] = 0
1000
+
1001
+ PgDBI.pgupdt('dsrqst', record, "rindex = {}".format(pgrec['rindex']), logact)
1002
+ return False
1003
+
1004
+ #
1005
+ # call given command to evaluate dynamically the dscheck.qoptions
1006
+ #
1007
+ def set_dscheck_options(chost, cnd, logact):
1008
+
1009
+ if chost not in DOPTHOSTS: return
1010
+ qcnt = 0
1011
+ skipcmds = DOPTHOSTS[chost]
1012
+ pgrecs = PgDBI.pgmget("dscheck", "*", cnd + "pid = 0 AND status = 'C' AND LEFT(qoptions, 1) = '!'", logact)
1013
+ cnt = len(pgrecs['cindex']) if pgrecs else 0
1014
+ for i in range(cnt):
1015
+ pgrec = PgUtil.onerecord(pgrecs, i)
1016
+ if skipcmds and pgrec['qoptions'] in skipcmds: continue # skip
1017
+ if PgLock.lock_dscheck(pgrec['cindex'], 1) <= 0: continue
1018
+ qoptions = build_dscheck_options(pgrec, 'qoptions', 'PBS')
1019
+ if not qoptions and pgrec['status'] == 'E': continue # failed evaluating qoptions
1020
+ record = {'pid' : 0, 'qoptions': qoptions}
1021
+ qcnt += PgDBI.pgupdt('dscheck', record, "cindex = {}".format(pgrec['cindex']), PgOPT.PGOPT['errlog'])
1022
+
1023
+ if qcnt and cnt > 1: PgLOG.pglog("{} of {} DSCHECK PBS options Dynamically set on {}".format(qcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1024
+
1025
+ #
1026
+ # add a new dscheck record if a given request record is due
1027
+ #
1028
+ def start_one_dsrqst(pgrec, logact = 0):
1029
+
1030
+ if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst' AND action = 'BR'".format(pgrec['rindex']), logact): return 0
1031
+
1032
+ pgctl = PgCMD.get_dsrqst_control(pgrec, logact)
1033
+ if pgctl:
1034
+ if 'qoptions' in pgctl and pgctl['qoptions']:
1035
+ ms = re.match(r'^(-.+)/(-.+)$', pgctl['qoptions'])
1036
+ if ms: pgctl['qoptions'] = ms.group(1)
1037
+ argv = "{} BR -RI {} -b -d".format(pgrec['dsid'], pgrec['rindex'])
1038
+ return add_one_dscheck(pgrec['rindex'], 'R', "dsrqst", pgrec['dsid'], "BR",
1039
+ '', pgrec['specialist'], argv, pgrec['email'], pgctl, logact)
1040
+
1041
+ #
1042
+ # add a dscheck record for a given request to setup partitions
1043
+ #
1044
+ def add_dsrqst_partitions(pgrec, logact = 0):
1045
+
1046
+ if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst'".format(pgrec['rindex']), logact): return 0
1047
+
1048
+ pgctl = PgCMD.get_dsrqst_control(pgrec, logact)
1049
+ if pgctl:
1050
+ if 'qoptions' in pgctl and pgctl['qoptions']:
1051
+ ms =re.match(r'^(-.+)/(-.+)$', pgctl['qoptions'])
1052
+ if ms: pgctl['qoptions'] = ms.group(1)
1053
+ argv = "{} SP -RI {} -NP -b -d".format(pgrec['dsid'], pgrec['rindex'])
1054
+ return add_one_dscheck(pgrec['rindex'], 'R', "dsrqst", pgrec['dsid'], 'SP',
1055
+ '', pgrec['specialist'], argv, pgrec['email'], pgctl, logact)
1056
+
1057
+ #
1058
+ # add multiple dscheck records of partitions for a given request
1059
+ #
1060
+ def start_dsrqst_partitions(pgrqst, ccnt, logact = 0):
1061
+
1062
+ cnts = [0, 0]
1063
+ if pgrqst:
1064
+ rindex = pgrqst['rindex']
1065
+ cnd = "rindex = {} AND status = ".format(rindex)
1066
+ if pgrqst['pid'] == 0:
1067
+ cnt = PgDBI.pgget("ptrqst", "", cnd + "'E'", logact)
1068
+ if cnt > 0 and (pgrqst['ecount'] + cnt) <= PgOPT.PGOPT['PEMAX']:
1069
+ # set Error partions back to Q
1070
+ PgDBI.pgexec("UPDATE ptrqst SET status = 'Q' WHERE {}'E'".format(cnd), PgOPT.PGOPT['extlog'])
1071
+ else:
1072
+ rindex = 0
1073
+ cnd = "status = "
1074
+ pgrecs = PgDBI.pgmget("ptrqst", "*", cnd + "'Q' AND pid = 0 ORDER by pindex", logact)
1075
+ cnt = len(pgrecs['pindex']) if pgrecs else 0
1076
+ if cnt > 0:
1077
+ if cnt > ccnt: cnt = ccnt
1078
+ pgctl = PgCMD.get_dsrqst_control(pgrqst, logact) if pgrqst else None
1079
+ for i in range(cnt):
1080
+ pgrec = PgUtil.onerecord(pgrecs, i)
1081
+ if pgrec['rindex'] != rindex:
1082
+ rindex = pgrec['rindex']
1083
+ pgrqst = PgDBI.pgget("dsrqst", "*", "rindex = {}".format(rindex), logact)
1084
+ if pgrqst: pgctl = PgCMD.get_dsrqst_control(pgrqst, logact)
1085
+ if not pgrqst: # request missing
1086
+ PgDBI.pgdel('ptrqst', "rindex = {}".format(rindex))
1087
+ continue
1088
+ if pgrec['ptcmp'] == 'Y':
1089
+ pgptctl = None
1090
+ else:
1091
+ pgptctl = PgCMD.get_partition_control(pgrec, pgrqst, pgctl, logact)
1092
+ if pgptctl:
1093
+ if 'qoptions' in pgptctl and pgptctl['qoptions']:
1094
+ ms = re.match(r'^(-.+)/(-.+)$', pgptctl['qoptions'])
1095
+ if ms: pgptctl['qoptions'] = ms.group(2)
1096
+ if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst' AND action = 'PP'".format(pgrec['pindex']), logact): continue
1097
+ argv = "{} PP -PI {} -RI {} -b -d".format(pgrqst['dsid'], pgrec['pindex'], pgrqst['rindex'])
1098
+ cnts[1] += add_one_dscheck(pgrec['pindex'], 'P', "dsrqst", pgrqst['dsid'], "PP",
1099
+ '', pgrqst['specialist'], argv, pgrqst['email'], pgptctl, logact)
1100
+
1101
+ elif pgrqst and pgrqst['pid'] == 0 and pgrqst['ptcount'] == PgDBI.pgget("ptrqst", "", cnd + " 'O'", logact):
1102
+ cnts[0] = start_one_dsrqst(pgrqst, logact)
1103
+
1104
+ return cnts
1105
+
1106
+ #
1107
+ # check long procssing reuqests and unlock the processes that are aborted
1108
+ #
1109
+ def check_dsrqst_locks(cnd, logact = 0):
1110
+
1111
+ ltime = int(time.time())
1112
+ lochost = PgLOG.PGLOG['HOSTNAME']
1113
+ cnd += "pid > 0 AND "
1114
+ dtime = ltime - PgSIG.PGSIG['DTIME']
1115
+ ctime = ltime - PgSIG.PGSIG['CTIME']
1116
+ rtime = ltime - PgSIG.PGSIG['RTIME']
1117
+ if CHKHOST['chkhost']:
1118
+ cnd += "lockhost {} AND locktime < {}".format(CHKHOST['hostcond'], dtime)
1119
+ else:
1120
+ cnd += "locktime > 0 AND (locktime < {} OR locktime < {} AND lockhost = '{}' OR locktime < {} AND lockhost = 'rda_config')".format(ctime, dtime, lochost, rtime)
1121
+ check_partition_locks(cnd, ltime, logact) # check partitions first
1122
+
1123
+ pgrecs = PgDBI.pgmget("dsrqst", "rindex, lockhost, pid, locktime", cnd, logact)
1124
+ cnt = (len(pgrecs['rindex']) if pgrecs else 0)
1125
+ lcnt = 0
1126
+ for i in range(cnt):
1127
+ pgrec = PgUtil.onerecord(pgrecs, i)
1128
+ lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
1129
+ ridx = pgrec['rindex']
1130
+ if CHKHOST['chkhost'] or pgrec['lockhost'] == lochost:
1131
+ if PgLock.lock_request(ridx, 0) > 0:
1132
+ PgLOG.pglog("Rqst{}: unlocked {}".format(ridx, lmsg), PgLOG.LOGWRN)
1133
+ lcnt += 1
1134
+ continue
1135
+ if(PgDBI.pgexec("UPDATE dsrqst set locktime = {} WHERE rindex = {} AND pid = {}".format(ltime, ridx, pgrec['pid']), logact) and
1136
+ not PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst'".format(ridx))):
1137
+ PgLOG.pglog("Rqst{}: time updated for {}".format(ridx, lmsg), PgLOG.LOGWRN|PgLOG.FRCLOG)
1138
+ elif(not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config' or pgrec['lockhost'] == 'partition' and
1139
+ not PgDBI.pgget('ptrqst', '', "rindex = {} AND pid > 0".format(ridx), logact)):
1140
+ record = {'pid' : 0, 'lockhost' : ''}
1141
+ if PgDBI.pgupdt("dsrqst", record, "rindex = {} AND pid = {}".format(ridx, pgrec['pid']), logact):
1142
+ PgLOG.pglog("Rqst{}: unlocked {}".format(ridx, pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(ltime)), PgLOG.LOGWRN)
1143
+ lcnt += 1
1144
+ continue
1145
+ elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
1146
+ PgLOG.pglog("Rqst{}: time NOT updated for {} of {}".format(ridx, pgrec['lockhost'], pgrec['pid'], dscheck_runtime(pgrec['locktime'], ltime)), logact)
1147
+
1148
+ RUNPIDS["{}{}".format(pgrec['lockhost'], pgrec['pid'])] = 1
1149
+
1150
+ if cnt > 1: PgLOG.pglog("{} of {} DSRQST records unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1151
+
1152
+ #
1153
+ # check long procssing reuqest partitions and unlock the processes that are aborted
1154
+ #
1155
+ def check_partition_locks(cnd, ltime, logact = 0):
1156
+
1157
+ pgrecs = PgDBI.pgmget("ptrqst", "pindex, rindex, lockhost, pid, locktime", cnd, (logact&~PgLOG.LGEREX))
1158
+ cnt = (len(pgrecs['pindex']) if pgrecs else 0)
1159
+ lcnt = 0
1160
+ for i in range(cnt):
1161
+ pgrec = PgUtil.onerecord(pgrecs, i)
1162
+ lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
1163
+ pidx = pgrec['pindex']
1164
+ if CHKHOST['chkhost'] or pgrec['lockhost'] == PgLOG.PGLOG['HOSTNAME']:
1165
+ if PgLock.lock_partition(pidx, 0) > 0:
1166
+ PgLOG.pglog("RPT{}: unlocked {}".format(pidx, lmsg), PgLOG.LOGWRN)
1167
+ lcnt += 1
1168
+ continue
1169
+ if(PgDBI.pgexec("UPDATE ptrqst set locktime = {} WHERE pindex = {} AND pid = {}".format(ltime, pidx, pgrec['pid']), logact) and
1170
+ PgDBI.pgexec("UPDATE dsrqst set locktime = {} WHERE rindex = {}".format(ltime, pgrec['rindex']), logact) and
1171
+ not PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst' AND otype = 'P'".format(pidx))):
1172
+ PgLOG.pglog("RPT{}: time updated for {}".format(pidx, lmsg), PgLOG.LOGWRN)
1173
+ elif not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config':
1174
+ record = {'pid' : 0, 'lockhost' : ''}
1175
+ if PgDBI.pgupdt("ptrqst", record, "pindex = {} AND pid = {}".format(pidx, pgrec['pid']), logact):
1176
+ PgLOG.pglog("RPT{}: unlocked {}".format(pidx, lmsg), PgLOG.LOGWRN)
1177
+ lcnt += 1
1178
+ continue
1179
+ elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
1180
+ PgLOG.pglog("RPT{}: time NOT updated for {} of {}".format(pidx, dscheck_runtime(pgrec['locktime'], ltime), lmsg), logact)
1181
+
1182
+ RUNPIDS["{}{}".format(pgrec['lockhost'], pgrec['pid'])] = 1
1183
+
1184
+ if cnt > 1: PgLOG.pglog("{} of {} DSRQST partitions unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1185
+
1186
+ #
1187
+ # check dsrqsts and purge them if done already
1188
+ #
1189
+ def purge_dsrqsts(cnd, logact = 0):
1190
+
1191
+ (sdate, stime) = PgUtil.get_date_time()
1192
+ cnd += "(status = 'P' AND (date_purge IS NULL OR date_purge < '{}' OR date_purge = '{}' AND time_purge < '{}')".format(sdate, sdate, stime)
1193
+ cnd += " OR status = 'O' AND (date_purge < '{}' OR date_purge = '{}' AND time_purge < '{}')) ORDER BY rindex".format(sdate, sdate, stime)
1194
+ pgrecs = PgDBI.pgmget("dsrqst", "rindex, dsid, email, specialist", cnd, logact)
1195
+ cnt = (len(pgrecs['rindex']) if pgrecs else 0)
1196
+ pgctl = {'qoptions' : "-l walltime=1:00:00"}
1197
+ pcnt = 0
1198
+ for i in range(cnt):
1199
+ pgrec = PgUtil.onerecord(pgrecs, i)
1200
+ ridx = pgrec['rindex']
1201
+ if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst'".format(ridx), logact): continue
1202
+ argv = "{} PR -RI {} -b -d".format(pgrec['dsid'], ridx)
1203
+ add_one_dscheck(ridx, 'R', 'dsrqst', pgrec['dsid'], 'PR', '',
1204
+ pgrec['specialist'], argv, pgrec['email'], pgctl, logact)
1205
+
1206
+ #
1207
+ # check dsrqsts and send saved email
1208
+ #
1209
+ def email_dsrqsts(cnd, logact = 0):
1210
+
1211
+ emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
1212
+ if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
1213
+ cnd += "pid = 0 AND einfo IS NOT NULL"
1214
+ pgrecs = PgDBI.pgmget("dsrqst", "rindex, ptcount, einfo", cnd, logact)
1215
+ cnt = (len(pgrecs['rindex']) if pgrecs else 0)
1216
+ ecnt = 0
1217
+ for i in range(cnt):
1218
+ pgrec = PgUtil.onerecord(pgrecs, i)
1219
+ ridx = pgrec['rindex']
1220
+ if PgLock.lock_request(ridx, 1) <= 0: continue
1221
+ einfo = verify_request_einfo(ridx, pgrec['ptcount'], pgrec['einfo'], logact)
1222
+ if einfo:
1223
+ sent = 1 if (PgLOG.send_customized_email("Rqst{}".format(ridx), einfo, emlact) and
1224
+ PgDBI.pgexec("UPDATE dsrqst set einfo = NULL WHERE rindex = {}".format(ridx), logact)) else -1
1225
+ else:
1226
+ sent = 0
1227
+
1228
+ PgLock.lock_request(ridx, 0)
1229
+ if sent == -1: break
1230
+ ecnt += sent
1231
+
1232
+ if cnt > 1: PgLOG.pglog("{} of {} DSRQST emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1233
+
1234
+ #
1235
+ # veriy email info for partition errors
1236
+ # retrun None if not all partitions finished
1237
+ #
1238
+ def verify_request_einfo(ridx, ptcnt, einfo, logact = 0):
1239
+
1240
+ # no further checking if no partitionseinfo is empty
1241
+ if ptcnt < 2 or not einfo: return einfo
1242
+ # partition processes are not all done yet
1243
+ if PgDBI.pgget("ptrqst", "", "rindex = {} AND (pid > 0 OR status = 'R')".format(ridx), logact): return None
1244
+
1245
+ pkey = ["<PARTERR>", "<PARTCNT>"]
1246
+ # einfo does not contain partition error key
1247
+ if einfo.find(pkey[0]) < 0: return einfo
1248
+ einfo = re.sub(pkey[0], '', einfo)
1249
+ ecnt = PgDBI.pgget("ptrqst", "", "rindex = {} AND status = 'E'".format(ridx), logact)
1250
+ cbuf = "{} of {}".format(ecnt, ptcnt)
1251
+ einfo = re.sub(pkey[1], cbuf, einfo)
1252
+
1253
+ return einfo
1254
+
1255
+ #
1256
+ # start recording due updates to checks
1257
+ #
1258
+ def start_dsupdts(cnd, logact = 0):
1259
+
1260
+ ctime = PgUtil.curtime(1)
1261
+ check_dsupdt_locks(cnd, logact)
1262
+ if CHKHOST['chkhost']: return 0
1263
+ email_dsupdt_controls(cnd, logact)
1264
+ email_dsupdts(cnd, logact)
1265
+
1266
+ cnd += "pid = 0 and cntltime <= '{}' and action > '' AND einfo IS NULL ORDER by cntltime".format(ctime)
1267
+ pgrecs = PgDBI.pgmget("dcupdt", "*", cnd, logact)
1268
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
1269
+ ucnt = 0
1270
+ for i in range(cnt):
1271
+ pgrec = PgUtil.onerecord(pgrecs, i)
1272
+ if PgDBI.pgget("dscheck", "pid, lockhost", "oindex = {} AND command = 'dsupdt'".format(pgrec['cindex']), logact): continue
1273
+ if pgrec['pindex'] and not PgOPT.valid_data_time(pgrec): continue
1274
+ argv = "{} {} -CI {} -b -d".format(pgrec['dsid'], pgrec['action'], pgrec['cindex'])
1275
+ if not add_one_dscheck(pgrec['cindex'], 'C', "dsupdt", pgrec['dsid'], pgrec['action'],
1276
+ '', pgrec['specialist'], argv, None, pgrec, logact): break
1277
+ ucnt += 1
1278
+
1279
+ if cnt > 1: PgLOG.pglog("update {} of {} DSUPDT controls on {}".format(ucnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1280
+ return ucnt
1281
+
1282
+ #
1283
+ # check if the parent update control is finished
1284
+ #
1285
+ def parent_not_finished(pgrec):
1286
+
1287
+ freq = [0, 0, 0]
1288
+ ms = re.match(r'^(\d+)([YMWDH])$', pgrec['frequency'], re.I)
1289
+ if ms:
1290
+ val = int(ms.group(1))
1291
+ unit = ms.group(2).upper()
1292
+ if not val: return 0
1293
+ if unit == 'Y':
1294
+ freq[0] = val
1295
+ elif unit == 'M':
1296
+ freq[1] = val
1297
+ elif unit == 'W':
1298
+ freq[2] = 7 * val
1299
+ elif unit == 'D':
1300
+ freq[2] = val
1301
+ elif unit == 'H': # update frequency is hourly controlled
1302
+ freq.append(val)
1303
+ else:
1304
+ ms = re.match(r'^(\d+)M/(\d+)', pgrec['frequency'], re.I)
1305
+ if ms:
1306
+ val = int(ms.group(1))
1307
+ nf = int(ms.group(2))
1308
+ if nf < 2 or nf > 10 or (30%nf): return 0
1309
+ freq = [0, val, 0, 0, 0, 0, nf] # number of fractions in a month
1310
+
1311
+ dtime = PgUtil.adddatetime(pgrec['datatime'], freq[0], freq[1], freq[2], freq[3], freq[4], freq[5], freq[6])
1312
+ if PgDBI.pgget("dcupdt", "", "cindex = {} AND datatime < '{}'".format(pgrec['pindex'], dtime), PgOPT.PGOPT['extlog']):
1313
+ return 1
1314
+ else:
1315
+ return 0
1316
+
1317
+ #
1318
+ # check long procssing updates and unlock the processes that are aborted
1319
+ #
1320
+ def check_dsupdt_locks(ocnd, logact = 0):
1321
+
1322
+ ltime = int(time.time())
1323
+ lochost = PgLOG.PGLOG['HOSTNAME']
1324
+ dtime = ltime - PgSIG.PGSIG['DTIME']
1325
+ cnd = ocnd + "pid > 0 AND "
1326
+ ctime = ltime - 4*PgSIG.PGSIG['CTIME']
1327
+ rtime = ltime - PgSIG.PGSIG['RTIME']
1328
+ if CHKHOST['chkhost']:
1329
+ cnd += "lockhost {} AND chktime < {}".format(CHKHOST['hostcond'], dtime)
1330
+ else:
1331
+ cnd += "chktime > 0 AND (chktime < {} OR chktime < {} AND lockhost = '{}' OR chktime < {} AND lockhost = 'rda_config')".format(ctime, dtime, lochost, rtime)
1332
+
1333
+ pgrecs = PgDBI.pgmget("dcupdt", "cindex, lockhost, pid, chktime", cnd, logact)
1334
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
1335
+ lcnt = 0
1336
+ for i in range(cnt):
1337
+ pgrec = PgUtil.onerecord(pgrecs, i)
1338
+ lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
1339
+ idx = pgrec['cindex']
1340
+ if CHKHOST['chkhost'] or pgrec['lockhost'] == lochost:
1341
+ if PgLock.lock_update_control(idx, 0) > 0:
1342
+ PgLOG.pglog("UC{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
1343
+ lcnt += 1
1344
+ continue
1345
+ if(PgDBI.pgexec("UPDATE dcupdt SET chktime = {} WHERE cindex = {} AND pid = {}".format(ltime, idx, pgrec['pid']), logact) and
1346
+ not PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsupdt'".format(idx))):
1347
+ PgLOG.pglog("UC{}: time updated for {}".format(idx, lmsg), PgLOG.LOGWRN)
1348
+ elif not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config':
1349
+ record = {'pid' : 0, 'lockhost' : ''}
1350
+ if PgDBI.pgupdt("dcupdt", record, "cindex = {} AND pid = {}".format(idx, pgrec['pid']), logact):
1351
+ PgLOG.pglog("UC{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
1352
+ lcnt += 1
1353
+ continue
1354
+ elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
1355
+ PgLOG.pglog("UC{}: time NOT updated for {} of {}".format(idx, dscheck_runtime(pgrec['chktime'], ltime), lmsg), logact)
1356
+
1357
+ RUNPIDS["{}{}".format(pgrec['lockhost'], pgrec['pid'])] = 1
1358
+
1359
+ if cnt > 1: PgLOG.pglog("{} of {} DSUPDT Controls unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1360
+
1361
+ cnd = ocnd + "pid > 0 AND locktime > 0 AND "
1362
+ if CHKHOST['chkhost']:
1363
+ cnd += "hostname {} AND locktime < {}".format(CHKHOST['hostcond'], dtime)
1364
+ else:
1365
+ cnd += "(locktime < {} OR locktime < {} AND hostname = '{}' OR locktime < {} AND hostname = 'rda_config')".format(ctime, dtime, lochost, rtime)
1366
+
1367
+ pgrecs = PgDBI.pgmget("dlupdt", "lindex, hostname, pid, locktime", cnd, logact)
1368
+ cnt = (len(pgrecs['lindex']) if pgrecs else 0)
1369
+ lcnt = 0
1370
+ for i in range(cnt):
1371
+ pgrec = PgUtil.onerecord(pgrecs, i)
1372
+ lmsg = "{}({}) at {} on {}".format(pgrec['hostname'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
1373
+ idx = pgrec['lindex']
1374
+ if CHKHOST['chkhost'] or pgrec['hostname'] == lochost:
1375
+ if PgLock.lock_update(idx, None, 0) > 0:
1376
+ PgLOG.pglog("Updt{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
1377
+ lcnt += 1
1378
+ continue
1379
+ PgDBI.pgexec("UPDATE dlupdt SET locktime = {} WHERE lindex = {} AND pid = {}".format(ltime, idx, pgrec['pid']), logact)
1380
+ elif not pgrec['hostname'] or pgrec['hostname'] == 'rda_config':
1381
+ record = {'pid' : 0, 'hostname' : ''}
1382
+ if PgDBI.pgupdt("dlupdt", record, "lindex = {} AND pid = {}".format(idx, pgrec['pid']), logact):
1383
+ PgLOG.pglog("Updt{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
1384
+ lcnt += 1
1385
+ continue
1386
+ elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
1387
+ PgLOG.pglog("Updt{}: time NOT updated for {} of {}".format(idx, dscheck_runtime(pgrec['locktime'], ltime), lmsg), logact)
1388
+
1389
+ RUNPIDS["{}{}".format(pgrec['hostname'], pgrec['pid'])] = 1
1390
+
1391
+ if cnt > 1: PgLOG.pglog("{} of {} DSUPDT Local Files unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1392
+
1393
+ #
1394
+ # check dsupdts and send saved email
1395
+ #
1396
+ def email_dsupdt_controls(cnd, logact = 0):
1397
+
1398
+ emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
1399
+ if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
1400
+ cnd += "pid = 0 AND einfo IS NOT NULL"
1401
+ pgrecs = PgDBI.pgmget("dcupdt", "cindex", cnd, logact)
1402
+ cnt = (len(pgrecs['cindex']) if pgrecs else 0)
1403
+ ecnt = 0
1404
+ for i in range(cnt):
1405
+ cidx = pgrecs['cindex'][i]
1406
+ if PgLock.lock_update_control(cidx, 1) <= 0: continue
1407
+ pgrec = PgDBI.pgget("dcupdt", "einfo", "cindex = {}".format(cidx), logact)
1408
+ if pgrec['einfo']:
1409
+ sent = 1 if (PgLOG.send_customized_email("UC{}".format(cidx), pgrec['einfo'], emlact) and
1410
+ PgDBI.pgexec("UPDATE dcupdt set einfo = NULL WHERE cindex = {}".format(cidx), logact)) else -1
1411
+ else:
1412
+ sent = 0
1413
+
1414
+ PgLock.lock_update_control(cidx, 0)
1415
+ if sent == -1: break
1416
+ ecnt += sent
1417
+
1418
+ if cnt > 1: PgLOG.pglog("{} of {} DSUPDT Control emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1419
+
1420
+ #
1421
+ # check dsupdts and send saved email
1422
+ #
1423
+ def email_dsupdts(cnd, logact = 0):
1424
+
1425
+ emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
1426
+ if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
1427
+ cnd += "pid = 0 AND emnote IS NOT NULL"
1428
+ pgrecs = PgDBI.pgmget("dlupdt", "lindex, cindex", cnd, logact)
1429
+ cnt = (len(pgrecs['lindex']) if pgrecs else 0)
1430
+ ecnt = 0
1431
+ for i in range(cnt):
1432
+ idx = pgrecs['cindex'][i]
1433
+ if idx > 0 and PgDBI.pgget("dcupdt", "", "cindex = {} AND pid > 0".format(idx), logact): continue
1434
+ idx = pgrecs['lindex'][i]
1435
+ if PgLock.lock_update(idx, None, 1) <= 0: continue
1436
+ pgrec = PgDBI.pgget("dlupdt", "emnote", "lindex = {}".format(idx), logact)
1437
+ if pgrec['emnote']:
1438
+ sent = 1 if(PgLOG.send_customized_email("Updtidx", pgrec['emnote'], emlact) and
1439
+ PgDBI.pgexec("UPDATE dlupdt set emnote = NULL WHERE lindex = {}".format(idx), logact)) else -1
1440
+ else:
1441
+ sent = 0
1442
+
1443
+ PgLock.lock_update(idx, None, 0)
1444
+ if sent == -1: break
1445
+ ecnt += sent
1446
+
1447
+ if cnt > 0: PgLOG.pglog("{} of {} DSUPDT emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
1448
+
1449
+ #
1450
+ # create an dscheck record for a given command
1451
+ #
1452
+ def add_one_dscheck(oindex, otype, cmd, dsid, action, workdir, specialist, argv, remail, btctl, logact = 0):
1453
+
1454
+ cidx = 0
1455
+
1456
+ if len(argv) > 100:
1457
+ argextra = argv[100:]
1458
+ argv = argv[0:100]
1459
+ else:
1460
+ argextra = None
1461
+
1462
+ record = {'command' : cmd, 'argv' : argv, 'specialist' : specialist, 'workdir' : workdir,
1463
+ 'dsid' : dsid, 'action' : action, 'oindex' : oindex, 'otype' : otype}
1464
+ (record['date'], record['time']) = PgUtil.get_date_time()
1465
+ if argextra: record['argextra'] = argextra
1466
+ if 'PI' in PgOPT.params: record['pindex'] = PgOPT.params['PI'][0]
1467
+ if 'MC' in PgOPT.params and PgOPT.params['MC'][0] > 0: record['mcount'] = PgOPT.params['MC'][0]
1468
+ record.update(PgCMD.get_batch_options(btctl))
1469
+
1470
+ if cmd == 'dsrqst' and remail:
1471
+ record['remail'] = remail
1472
+ if otype == 'P':
1473
+ pgcnt = PgDBI.pgget("dscheck", "", "remail = '{}' AND otype = 'P'" .format(remail), logact)
1474
+ if pgcnt >= get_user_limit(remail)[1]: return PgLOG.FAILURE
1475
+ elif action != 'PR':
1476
+ pgcnt = PgDBI.pgget("dscheck", "", "remail = '{}' AND otype = 'R'".format(remail), logact)
1477
+ if pgcnt >= get_user_limit(remail)[0]: return PgLOG.FAILURE
1478
+
1479
+ if oindex and otype:
1480
+ pgrec = PgDBI.pgget('dscheck', '*', "oindex = {} AND otype = '{}'".format(oindex, otype), logact)
1481
+ else:
1482
+ pgrec = PgCMD.get_dscheck(cmd, argv, workdir, specialist, argextra, logact)
1483
+
1484
+ if pgrec:
1485
+ return PgLOG.pglog("Chk{}: {} added already {} {}".format(pgrec['cindex'], PgCMD.get_command_info(pgrec), pgrec['date'], pgrec['time']), PgLOG.LOGWRN|PgLOG.FRCLOG)
1486
+
1487
+ cidx = PgDBI.pgadd("dscheck", record, logact|PgLOG.AUTOID)
1488
+ if cidx:
1489
+ PgLOG.pglog("Chk{}: {} added {} {}".format(cidx, PgCMD.get_command_info(record), record['date'], record['time']), PgLOG.LOGWRN|PgLOG.FRCLOG)
1490
+ else:
1491
+ if oindex and otype:
1492
+ PgLOG.pglog("{}-{}-{}: Fail add check for {}".format(cmd, otype, oindex, specialist), PgLOG.LOGWRN|PgLOG.FRCLOG)
1493
+ else:
1494
+ PgLOG.pglog("{}: Fail add check for {}".format(cmd, specialist), PgLOG.LOGWRN|PgLOG.FRCLOG)
1495
+
1496
+ time.sleep(PgSIG.PGSIG['ETIME'])
1497
+ return PgLOG.FAILURE
1498
+
1499
+ return PgLOG.SUCCESS
1500
+
1501
+ #
1502
+ # get dscheck status
1503
+ #
1504
+ def dscheck_status(stat):
1505
+
1506
+ STATUS = {
1507
+ 'C' : "Created",
1508
+ 'D' : "Done",
1509
+ 'E' : "Exit",
1510
+ 'F' : "Finished",
1511
+ 'H' : "Held",
1512
+ 'I' : "Interrupted",
1513
+ 'P' : "Pending",
1514
+ 'Q' : "Queueing",
1515
+ 'R' : "Run",
1516
+ 'S' : "Suspended",
1517
+ }
1518
+ return (STATUS[stat] if stat in STATUS else "Unknown")
1519
+
1520
+ #
1521
+ # validate given daemon control indices
1522
+ #
1523
+ def validate_daemons():
1524
+
1525
+ if PgOPT.OPTS['DI'][2]&8: return # already validated
1526
+
1527
+ dcnt = len(PgOPT.params['DI']) if 'DI' in PgOPT.params else 0
1528
+ if not dcnt:
1529
+ if PgOPT.PGOPT['CACT'] == 'SD':
1530
+ if 'ND' not in PgOPT.params:
1531
+ PgOPT.action_error("Mode option -ND must be present to add new Daemon Control record")
1532
+ dcnt = PgOPT.get_max_count("HN", "CM")
1533
+ if dcnt > 0:
1534
+ PgOPT.params['DI'] = [0]*dcnt
1535
+ return
1536
+ i = 0
1537
+ while i < dcnt:
1538
+ val = PgOPT.params['DI'][i]
1539
+ if val:
1540
+ if not isinstance(val, int):
1541
+ if re.match(r'^(!|<|>|<>)$', val):
1542
+ if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0:
1543
+ PgOPT.action_error("Invalid condition '{}' of Daemon Control index".format(val))
1544
+ break
1545
+ PgOPT.params['DI'][i] = int(val)
1546
+ else:
1547
+ PgOPT.params['DI'][i] = 0
1548
+ i += 1
1549
+ if i >= dcnt: # normal daemon control index given
1550
+ for i in range(dcnt):
1551
+ val = PgOPT.params['DI'][i]
1552
+ if not val:
1553
+ if PgOPT.PGOPT['CACT'] != 'SD':
1554
+ PgOPT.action_error("Daemon Control Index 0 is not allowed\nUse Action SD with Mode option -ND to add new record")
1555
+ elif not PgOPT.params['ND']:
1556
+ PgOPT.action_error("Mode option -ND must be present to add new Daemon Control record")
1557
+ continue
1558
+ if i > 0 and val == PgOPT.params['DI'][i-1]: continue
1559
+ pgrec = PgDBI.pgget("dsdaemon", "specialist", "dindex = {}".format(val), PgOPT.PGOPT['extlog'])
1560
+ if not pgrec:
1561
+ PgOPT.action_error("Daemon Control Index '{}' is not in RDADB".format(val))
1562
+ elif(PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0 and PgOPT.params['LN'] != pgrec['specialist'] and
1563
+ PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']):
1564
+ PgOPT.action_error("{}: must be {}, owner of Daemon Control Index {}".format(PgOPT.params['LN'], pgrec['specialist'], val))
1565
+ else: # found none-equal condition sign
1566
+ pgrec = PgDBI.pgmget("dsdaemon", "DISTINCT dindex",
1567
+ PgDBI.get_field_condition("dindex", PgOPT.params['DI'], 0, 1), PgOPT.PGOPT['extlog'])
1568
+ if not pgrec: PgOPT.action_error("No Daemon Control matches given Index condition")
1569
+ PgOPT.params['DI'] = pgrec['dindex']
1570
+
1571
+ PgOPT.OPTS['DI'][2] |= 8 # set validated flag
1572
+
1573
+ #
1574
+ # validate given check indices
1575
+ #
1576
+ def validate_checks():
1577
+
1578
+ if (PgOPT.OPTS['CI'][2]&8) == 8: return # already validated
1579
+
1580
+ if 'CI' in PgOPT.params:
1581
+ cnt = len(PgOPT.params['CI'])
1582
+ i = 0
1583
+ while i < cnt:
1584
+ val = PgOPT.params['CI'][i]
1585
+ if val:
1586
+ if not isinstance(val, int):
1587
+ if re.match(r'^(!|<|>|<>)$', val):
1588
+ if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0:
1589
+ PgOPT.action_error("Invalid condition '{}' of Check index".format(val))
1590
+ break
1591
+ PgOPT.params['CI'][i] = int(val)
1592
+ else:
1593
+ PgOPT.params['CI'][i] = 0
1594
+ i += 1
1595
+ if i >= cnt: # normal check index given
1596
+ for i in range(cnt):
1597
+ val = PgOPT.params['CI'][i]
1598
+ if not val: PgOPT.action_error("Check Index 0 is not allowed")
1599
+ if i > 0 and val == PgOPT.params['CI'][i-1]: continue
1600
+ pgrec = PgDBI.pgget("dscheck", "specialist", "cindex = {}".format(val), PgOPT.PGOPT['extlog'])
1601
+ if not pgrec:
1602
+ PgOPT.action_error("Check Index '{}' is not in RDADB".format(val))
1603
+ elif(PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0 and PgOPT.params['LN'] != pgrec['specialist'] and
1604
+ PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']):
1605
+ PgOPT.action_error("{}: must be {}, owner of Check Index {}".format(PgOPT.params['LN'], pgrec['specialist'], val))
1606
+ else: # found none-equal condition sign
1607
+ pgrec = PgDBI.pgmget("dscheck", "cindex", PgDBI.get_field_condition("cindex", PgOPT.params['CI'], 0, 1), PgOPT.PGOPT['extlog'])
1608
+ if not pgrec: PgOPT.action_error("No Check matches given Index condition")
1609
+ PgOPT.params['CI'] = pgrec['cindex']
1610
+
1611
+ PgOPT.OPTS['CI'][2] |= 8 # set validated flag
1612
+
1613
+ #
1614
+ # validate given dataset IDs
1615
+ #
1616
+ def validate_datasets():
1617
+
1618
+ if PgOPT.OPTS['DS'][2]&8: return # already validated
1619
+
1620
+ dcnt = len(PgOPT.params['DS'])
1621
+ for i in range(dcnt):
1622
+ dsid = PgOPT.params['DS'][i]
1623
+ if not dsid: PgOPT.action_error("Empty Dataset ID is not allowed")
1624
+ if i and dsid == PgOPT.params['DS'][i-1]: continue
1625
+ if not PgDBI.pgget("dataset", "", "dsid = '{}'".format(dsid), PgOPT.PGOPT['extlog']):
1626
+ PgOPT.action_error("Dataset '{}' is not in RDADB".format(dsid))
1627
+
1628
+ PgOPT.OPTS['DS'][2] |= 8 # set validated flag