rda-python-dscheck 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_dscheck/PgCheck.py +1628 -0
- rda_python_dscheck/__init__.py +1 -0
- rda_python_dscheck/dscheck.py +671 -0
- rda_python_dscheck/dscheck.usg +737 -0
- rda_python_dscheck-1.0.1.dist-info/LICENSE +21 -0
- rda_python_dscheck-1.0.1.dist-info/METADATA +16 -0
- rda_python_dscheck-1.0.1.dist-info/RECORD +10 -0
- rda_python_dscheck-1.0.1.dist-info/WHEEL +5 -0
- rda_python_dscheck-1.0.1.dist-info/entry_points.txt +2 -0
- rda_python_dscheck-1.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1628 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# Title : PgCheck.py
|
|
4
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
5
|
+
# Date : 08/26/2020
|
|
6
|
+
# 2025-02-10 transferred to package rda_python_dscheck from
|
|
7
|
+
# https://github.com/NCAR/rda-shared-libraries.git
|
|
8
|
+
# Purpose : python library module for for holding some global variables and
|
|
9
|
+
# functions for dscheck utility
|
|
10
|
+
#
|
|
11
|
+
# Github : https://github.com/NCAR/rda-python-dscheck.git
|
|
12
|
+
#
|
|
13
|
+
###############################################################################
|
|
14
|
+
#
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import time
|
|
18
|
+
from rda_python_common import PgLOG
|
|
19
|
+
from rda_python_common import PgCMD
|
|
20
|
+
from rda_python_common import PgSIG
|
|
21
|
+
from rda_python_common import PgUtil
|
|
22
|
+
from rda_python_common import PgLock
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
from rda_python_common import PgOPT
|
|
25
|
+
from rda_python_common import PgDBI
|
|
26
|
+
|
|
27
|
+
# global variables
|
|
28
|
+
LOOP = 0
|
|
29
|
+
PLIMITS = {}
|
|
30
|
+
DWHOSTS = {} # hosts are down
|
|
31
|
+
RUNPIDS = {}
|
|
32
|
+
SHELLS = {} # shell names used by specialists
|
|
33
|
+
|
|
34
|
+
#
|
|
35
|
+
# define initially the needed option values
|
|
36
|
+
#
|
|
37
|
+
PgOPT.OPTS = { # (!= 0) - setting actions
|
|
38
|
+
'PC' : [0x0004, 'ProcessCheck', 1],
|
|
39
|
+
'AC' : [0x0008, 'AddCheck', 1],
|
|
40
|
+
'GD' : [0x0010, 'GetDaemon', 0],
|
|
41
|
+
'SD' : [0x0020, 'SetDaemon', 1],
|
|
42
|
+
'GC' : [0x0040, 'GetCheck', 0],
|
|
43
|
+
'DL' : [0x0080, 'Delete', 1],
|
|
44
|
+
'UL' : [0x0100, 'UnLockCheck', 1],
|
|
45
|
+
'EC' : [0x0200, 'EmailCheck', 0],
|
|
46
|
+
'IC' : [0x0400, 'InterruptCheck', 1],
|
|
47
|
+
'CH' : [0x1000, 'CheckHost', 0],
|
|
48
|
+
'SO' : [0x1000, 'SetOptions', 1],
|
|
49
|
+
|
|
50
|
+
'AW' : [0, 'AnyWhere', 0],
|
|
51
|
+
'BG' : [0, 'BackGround', 0],
|
|
52
|
+
'CP' : [0, 'CheckPending', 0],
|
|
53
|
+
'CS' : [0, 'CheckStatus', 0],
|
|
54
|
+
'FI' : [0, 'ForceInterrrupt', 0],
|
|
55
|
+
'FO' : [0, 'FormatOutput', 0],
|
|
56
|
+
'LO' : [0, 'LogOn', 0],
|
|
57
|
+
'MD' : [0, 'PgDataset', 3],
|
|
58
|
+
'NC' : [0, 'NoCommand', 0],
|
|
59
|
+
'ND' : [0, 'NewDaemon', 0],
|
|
60
|
+
'NT' : [0, 'NoTrim', 0],
|
|
61
|
+
'WR' : [0, 'WithdsRqst', 0],
|
|
62
|
+
'WU' : [0, 'WithdsUpdt', 0],
|
|
63
|
+
|
|
64
|
+
'DM' : [1, 'DaemonMode', 1], # for action PC, start|quit|logon|logoff
|
|
65
|
+
'DV' : [1, 'Divider', 1], # default to <:>
|
|
66
|
+
'ES' : [1, 'EqualSign', 1], # default to <=>
|
|
67
|
+
'FN' : [1, 'FieldNames', 0],
|
|
68
|
+
'LH' : [1, 'LocalHost', 0, ''],
|
|
69
|
+
'MT' : [1, 'MaxrunTime', 0],
|
|
70
|
+
'OF' : [1, 'OutputFile', 0],
|
|
71
|
+
'ON' : [1, 'OrderNames', 0],
|
|
72
|
+
'AO' : [1, 'ActOption', 1], # default to <!>
|
|
73
|
+
'WI' : [1, 'WaitInterval', 1],
|
|
74
|
+
|
|
75
|
+
'AN' : [2, 'ActionName', 0],
|
|
76
|
+
'AV' : [2, 'ArgumentVector', 0],
|
|
77
|
+
'AX' : [2, 'ArgumenteXtra', 0],
|
|
78
|
+
'CC' : [2, 'CarbonCopy', 0],
|
|
79
|
+
'CD' : [2, 'CheckDate', 256],
|
|
80
|
+
'CI' : [2, 'CheckIndex', 16],
|
|
81
|
+
'CM' : [2, 'Command', 1],
|
|
82
|
+
'CT' : [2, 'CheckTime', 32],
|
|
83
|
+
'DB' : [2, 'Debug', 0],
|
|
84
|
+
'DC' : [2, 'DoneCount', 17],
|
|
85
|
+
'DF' : [2, 'DownFlags', 1],
|
|
86
|
+
'DI' : [2, 'DaemonIndex', 16],
|
|
87
|
+
'DS' : [2, 'Dataset', 1],
|
|
88
|
+
'ER' : [2, 'ERrormessage', 0],
|
|
89
|
+
'EV' : [2, 'Environments', 1],
|
|
90
|
+
'FC' : [2, 'FileCount', 17],
|
|
91
|
+
'HN' : [2, 'HostName', 1],
|
|
92
|
+
'IF' : [2, 'InputFile', 0],
|
|
93
|
+
'MC' : [2, 'MaxCount', 17],
|
|
94
|
+
'MH' : [2, 'MatchHost', 1],
|
|
95
|
+
'MO' : [2, 'Modules', 1],
|
|
96
|
+
'PI' : [2, 'ParentIndex', 17],
|
|
97
|
+
'PL' : [2, 'ProcessLimit', 17],
|
|
98
|
+
'PO' : [2, 'Priority', 17],
|
|
99
|
+
'PQ' : [2, 'PBSQueue', 0],
|
|
100
|
+
'QS' : [2, 'QSubOptions', 0],
|
|
101
|
+
'SN' : [2, 'Specialist', 1],
|
|
102
|
+
'ST' : [2, 'Status', 0],
|
|
103
|
+
'SZ' : [2, 'DataSize', 16],
|
|
104
|
+
'TC' : [2, 'TryCount', 17],
|
|
105
|
+
'WD' : [2, 'WorkDir', 0],
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
PgOPT.ALIAS = {
|
|
109
|
+
'AN' : ['Action'],
|
|
110
|
+
'BG' : ['b'],
|
|
111
|
+
'CF' : ['Confirmation', 'ConfirmAction'],
|
|
112
|
+
'CM' : ['CommandName'],
|
|
113
|
+
'DL' : ['RM', 'Remove'],
|
|
114
|
+
'DS' : ['Dsid', 'DatasetID'],
|
|
115
|
+
'DV' : ['Delimiter', 'Separater'],
|
|
116
|
+
'EV' : ['Envs'],
|
|
117
|
+
'GZ' : ['GMT', 'GreenwichZone', 'UTC'],
|
|
118
|
+
'MC' : ['MaximumCount', 'MaxTryCount'],
|
|
119
|
+
'MH' : ['MatchHostname'],
|
|
120
|
+
'NC' : ['NoRemoteCommand'],
|
|
121
|
+
'MO' : ['Mods'],
|
|
122
|
+
'PI' : ['ParentCheckIndex'],
|
|
123
|
+
'QS' : ['PBSOptions'],
|
|
124
|
+
'SO' : ['SetBatchOptions'],
|
|
125
|
+
'SZ' : ['Size', "ProcSize"],
|
|
126
|
+
'UL' : ['UnLock'],
|
|
127
|
+
'WD' : ["WorkDirectory"],
|
|
128
|
+
'WR' : ["WithRequest"],
|
|
129
|
+
'WU' : ["WithUpdate"],
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
PgOPT.TBLHASH['dscheck'] = {
|
|
133
|
+
#SHORTNM KEYS(PgOPT.OPTS) DBFIELD
|
|
134
|
+
'C' : ['CI', "cindex", 0],
|
|
135
|
+
'O' : ['CM', "command", 1],
|
|
136
|
+
'V' : ['AV', "argv", 1],
|
|
137
|
+
'T' : ['DS', "dsid", 1],
|
|
138
|
+
'A' : ['AN', "action", 1],
|
|
139
|
+
'U' : ['ST', "status", 1],
|
|
140
|
+
'P' : ['PQ', "pbsqueue", 1],
|
|
141
|
+
'R' : ['PI', "pindex", 0],
|
|
142
|
+
'B' : ['DF', "dflags", 0],
|
|
143
|
+
'F' : ['FC', "fcount", 0],
|
|
144
|
+
'J' : ['DC', "dcount", 0],
|
|
145
|
+
'K' : ['TC', "tcount", 0],
|
|
146
|
+
'L' : ['MC', "mcount", 0],
|
|
147
|
+
'Z' : ['SZ', "size", 0],
|
|
148
|
+
'D' : ['CD', "date", 1],
|
|
149
|
+
'Y' : ['CT', "time", 1],
|
|
150
|
+
'H' : ['HN', "hostname", 1],
|
|
151
|
+
'N' : ['SN', "specialist", 1],
|
|
152
|
+
'W' : ['WD', "workdir", 1],
|
|
153
|
+
'M' : ['MO', "modules", 1],
|
|
154
|
+
'I' : ['EV', "environments", 1],
|
|
155
|
+
'Q' : ['QS', "qoptions", 1],
|
|
156
|
+
'X' : ['AX', "argextra", -1],
|
|
157
|
+
'E' : ['ER', "errmsg", -1],
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
PgOPT.TBLHASH['dsdaemon'] = {
|
|
161
|
+
#SHORTNM KEYS(PgOPT.OPTS) DBFIELD
|
|
162
|
+
'I' : ['DI', "dindex", 0],
|
|
163
|
+
'C' : ['CM', "command", 1],
|
|
164
|
+
'H' : ['HN', "hostname", 1],
|
|
165
|
+
'M' : ['MH', "matchhost", 1],
|
|
166
|
+
'S' : ['SN', "specialist", 1],
|
|
167
|
+
'P' : ['PL', "proclimit", 0],
|
|
168
|
+
'O' : ['PO', "priority", 0],
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
CHKHOST = {
|
|
172
|
+
'curhost' : PgLOG.get_host(1),
|
|
173
|
+
'chkhost' : None,
|
|
174
|
+
'hostcond' : None,
|
|
175
|
+
'isbatch' : 0
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
PgOPT.PGOPT['dscheck'] = "COVTUPFJDNW" # default
|
|
179
|
+
PgOPT.PGOPT['chkall'] = "COVTAUPRBFJKLZDYHNWMIQXE" # default to all
|
|
180
|
+
PgOPT.PGOPT['dsdaemon'] = "ICHQSPO" # default to all
|
|
181
|
+
PgOPT.PGOPT['waitlimit'] = 280 # limit of C and P request checks at a time
|
|
182
|
+
PgOPT.PGOPT['totallimit'] = 380 # maximum number of checks can be started on PBS
|
|
183
|
+
|
|
184
|
+
PBSQUEUES = {'rda' : None, 'htc' : 'casper@casper-pbs'}
|
|
185
|
+
PBSTIMES = {'default' : 21600, 'rda' : PgLOG.PGLOG['PBSTIME'], 'htc' : 86400}
|
|
186
|
+
#DOPTHOSTS = {'rda-work' : None, 'PBS' : ['!subconv -Q']}
|
|
187
|
+
DOPTHOSTS = {'rda-work' : None, 'PBS' : None, 'cron' : None}
|
|
188
|
+
DSLMTS = {}
|
|
189
|
+
EMLMTS = {}
|
|
190
|
+
|
|
191
|
+
#
|
|
192
|
+
# get the maximum running time for batch processes
|
|
193
|
+
#
|
|
194
|
+
def max_batch_time(qname):
|
|
195
|
+
|
|
196
|
+
if CHKHOST['curhost'] == PgLOG.PGLOG['PBSNAME']:
|
|
197
|
+
if not (qname and qname in PBSTIMES): qname = 'default'
|
|
198
|
+
return PBSTIMES[qname]
|
|
199
|
+
else:
|
|
200
|
+
return 0
|
|
201
|
+
|
|
202
|
+
#
|
|
203
|
+
# check if enough information entered on command line and/or input file
|
|
204
|
+
# for given action(s)
|
|
205
|
+
#
|
|
206
|
+
def check_dscheck_options(cact, aname):
|
|
207
|
+
|
|
208
|
+
errmsg = [
|
|
209
|
+
"Option -DM(-DaemonMode) works with Action -PC(-ProcessCheck) only",
|
|
210
|
+
"Do not specify Check Index for Daemon Mode",
|
|
211
|
+
"Miss check index per Info option -CI(-CheckIndex)",
|
|
212
|
+
"Need Machine Hostname per -HN for new daemon control",
|
|
213
|
+
"Need Application command name per -CM for new daemon control",
|
|
214
|
+
"Must be {} to process Checks in daemon mode".format(PgLOG.PGLOG['RDAUSER']),
|
|
215
|
+
"Miss Command information per Info option -CM(-Command)",
|
|
216
|
+
]
|
|
217
|
+
erridx = -1
|
|
218
|
+
PgOPT.set_uid(aname)
|
|
219
|
+
|
|
220
|
+
if 'CI' in PgOPT.params: validate_checks()
|
|
221
|
+
if 'DS' in PgOPT.params: validate_datasets()
|
|
222
|
+
|
|
223
|
+
if 'DM' in PgOPT.params:
|
|
224
|
+
if cact != "PC":
|
|
225
|
+
erridx = 0
|
|
226
|
+
elif PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']:
|
|
227
|
+
erridx = 5
|
|
228
|
+
elif 'CI' in PgOPT.params:
|
|
229
|
+
erridx = 1
|
|
230
|
+
elif cact == "DL":
|
|
231
|
+
if not ('CI' in PgOPT.params or 'DI' in PgOPT.params): erridx = 2
|
|
232
|
+
elif cact == 'SD':
|
|
233
|
+
validate_daemons()
|
|
234
|
+
if 'SD' in PgOPT.params:
|
|
235
|
+
if 'HN' not in PgOPT.params:
|
|
236
|
+
erridx = 3
|
|
237
|
+
elif 'CM' not in PgOPT.params:
|
|
238
|
+
erridx = 4
|
|
239
|
+
elif cact == "AC":
|
|
240
|
+
if 'CM' not in PgOPT.params:
|
|
241
|
+
erridx = 6
|
|
242
|
+
elif 'CI' not in PgOPT.params and (cact == "IC" or cact == "UL" and 'LL' not in PgOPT.params):
|
|
243
|
+
erridx = 2
|
|
244
|
+
|
|
245
|
+
if erridx >= 0: PgOPT.action_error(errmsg[erridx], cact)
|
|
246
|
+
|
|
247
|
+
if cact == "PC" or cact == 'UL':
|
|
248
|
+
if PgLOG.PGLOG['CURUID'] != PgOPT.params['LN']:
|
|
249
|
+
PgOPT.action_error("{}: cannot process Checks as {}".format(PgLOG.PGLOG['CURUID'], PgOPT.params['LN']), cact)
|
|
250
|
+
if 'LH' in PgOPT.params:
|
|
251
|
+
chkhost = PgLOG.get_short_host(PgOPT.params['LH'])
|
|
252
|
+
if not chkhost: chkhost = PgLOG.get_host(1)
|
|
253
|
+
CHKHOST['chkhost'] = CHKHOST['curhost'] = chkhost
|
|
254
|
+
if PgLOG.valid_batch_host(chkhost):
|
|
255
|
+
PgLOG.reset_batch_host(chkhost)
|
|
256
|
+
CHKHOST['isbatch'] = 1
|
|
257
|
+
CHKHOST['hostcond'] = "IN ('{}', '{}')".format(chkhost, PgLOG.PGLOG['HOSTNAME'])
|
|
258
|
+
else:
|
|
259
|
+
if PgUtil.pgcmp(chkhost, PgLOG.PGLOG['HOSTNAME'], 1):
|
|
260
|
+
PgOPT.action_error("{}: Cannot handle checks on {}".format(PgLOG.PGLOG['HOSTNAME'], chkhost), cact)
|
|
261
|
+
CHKHOST['hostcond'] = "= '{}'".format(chkhost)
|
|
262
|
+
|
|
263
|
+
if 'DM' in PgOPT.params:
|
|
264
|
+
if PgLOG.PGLOG['CHKHOSTS'] and PgLOG.PGLOG['CHKHOSTS'].find(PgLOG.PGLOG['HOSTNAME']) < 0:
|
|
265
|
+
PgOPT.action_error("Daemon mode can only be started on '{}'".format(PgLOG.PGLOG['CHKHOSTS']), cact)
|
|
266
|
+
if re.match(r'^(start|begin)$', PgOPT.params['DM'], re.I):
|
|
267
|
+
if not ('NC' in PgOPT.params or 'LH' in PgOPT.params): PgOPT.params['NC'] = 1
|
|
268
|
+
wtime = PgOPT.params['WI'] if 'WI' in PgOPT.params else 0
|
|
269
|
+
mtime = PgOPT.params['MT'] if 'MT' in PgOPT.params else 0
|
|
270
|
+
logon = PgOPT.params['LO'] if 'LO' in PgOPT.params else 0
|
|
271
|
+
PgSIG.start_daemon(aname, PgLOG.PGLOG['CURUID'], 1, wtime, logon, 0, mtime)
|
|
272
|
+
else:
|
|
273
|
+
PgSIG.signal_daemon(PgOPT.params['DM'], aname, PgOPT.params['LN'])
|
|
274
|
+
else:
|
|
275
|
+
if cact == "PC":
|
|
276
|
+
PgSIG.validate_single_process(aname, PgOPT.params['LN'], PgLOG.argv_to_string())
|
|
277
|
+
elif cact == "SO":
|
|
278
|
+
plimit = PgOPT.params['PL'][0] if 'PL' in PgOPT.params and PgOPT.params['PL'][0] > 0 else 1
|
|
279
|
+
PgSIG.validate_multiple_process(aname, plimit, PgOPT.params['LN'], PgLOG.argv_to_string())
|
|
280
|
+
wtime = PgOPT.params['WI'] if 'WI' in PgOPT.params else 30
|
|
281
|
+
logon = PgOPT.params['LO'] if 'LO' in PgOPT.params else 1
|
|
282
|
+
PgSIG.start_none_daemon(aname, cact, PgOPT.params['LN'], 1, wtime, logon)
|
|
283
|
+
if not ('CI' in PgOPT.params or 'DS' in PgOPT.params or PgOPT.params['LN'] == PgLOG.PGLOG['RDAUSER']):
|
|
284
|
+
PgOPT.set_default_value("SN", PgOPT.params['LN'])
|
|
285
|
+
|
|
286
|
+
# minimal wait interval in seconds for next check
|
|
287
|
+
PgOPT.PGOPT['minlimit'] = PgOPT.params['WI'] = PgSIG.PGSIG['WTIME']
|
|
288
|
+
|
|
289
|
+
#
|
|
290
|
+
# process counts of hosts in dsdaemon control records for given command and specialist
|
|
291
|
+
#
|
|
292
|
+
def get_process_limits(cmd, specialist, logact = 0):
|
|
293
|
+
|
|
294
|
+
ckey = "{}-{}".format(cmd, specialist)
|
|
295
|
+
if ckey in PLIMITS: return PLIMITS[ckey]
|
|
296
|
+
|
|
297
|
+
cnd = "command = '{}' AND specialist = '{}'".format(cmd, specialist)
|
|
298
|
+
if CHKHOST['chkhost']:
|
|
299
|
+
ecnd = " AND hostname = '{}'".format(CHKHOST['chkhost'])
|
|
300
|
+
hstr = " for " + CHKHOST['chkhost']
|
|
301
|
+
else:
|
|
302
|
+
ecnd = " ORDER by priority, hostname"
|
|
303
|
+
hstr = ""
|
|
304
|
+
|
|
305
|
+
pgrecs = PgDBI.pgmget("dsdaemon", "hostname, bqueues, matchhost, proclimit, priority", cnd + ecnd, logact)
|
|
306
|
+
if not pgrecs and PgDBI.pgget("dsdaemon", "", cnd, logact) == 0:
|
|
307
|
+
pgrecs = PgDBI.pgmget("dsdaemon", "hostname, matchhost, proclimit, priority",
|
|
308
|
+
"command = 'ALL' AND specialist = '{}'{}".format(specialist, ecnd), logact)
|
|
309
|
+
|
|
310
|
+
cnt = (len(pgrecs['hostname']) if pgrecs else 0)
|
|
311
|
+
if cnt == 0:
|
|
312
|
+
PLIMITS[ckey] = 0
|
|
313
|
+
return 0
|
|
314
|
+
|
|
315
|
+
j = 0
|
|
316
|
+
PLIMITS[ckey] = {'host' : [], 'priority' : [], 'acnt' : [], 'match' : [], 'pcnd' : []}
|
|
317
|
+
for i in range(cnt):
|
|
318
|
+
if pgrecs['proclimit'][i] <= 0: continue
|
|
319
|
+
host = pgrecs['hostname'][i]
|
|
320
|
+
PLIMITS[ckey]['host'].append(host)
|
|
321
|
+
PLIMITS[ckey]['priority'].append(pgrecs['priority'][i])
|
|
322
|
+
PLIMITS[ckey]['acnt'].append(pgrecs['proclimit'][i])
|
|
323
|
+
PLIMITS[ckey]['match'].append(pgrecs['matchhost'][i])
|
|
324
|
+
PLIMITS[ckey]['pcnd'].append("{} AND pid > 0 AND lockhost = '{}'".format(cnd, host))
|
|
325
|
+
|
|
326
|
+
if not PLIMITS[ckey]['host']: PLIMITS[ckey] = 0
|
|
327
|
+
return PLIMITS[ckey]
|
|
328
|
+
|
|
329
|
+
#
|
|
330
|
+
# find a available host name to process a dscheck record
|
|
331
|
+
#
|
|
332
|
+
def get_process_host(limits, hosts, cmd, act, logact = 0):
|
|
333
|
+
|
|
334
|
+
cnt = len(limits['host'])
|
|
335
|
+
for i in range(cnt):
|
|
336
|
+
host = limits['host'][i]
|
|
337
|
+
if host in DWHOSTS: continue # the host is down
|
|
338
|
+
if limits['acnt'][i] > PgDBI.pgget("dscheck", "", limits['pcnd'][i], logact):
|
|
339
|
+
if cmd == 'dsrqst' and act == 'PR':
|
|
340
|
+
mflag = 'G'
|
|
341
|
+
else:
|
|
342
|
+
mflag = limits['match'][i]
|
|
343
|
+
if PgLOG.check_process_host(hosts, host, mflag): return i
|
|
344
|
+
|
|
345
|
+
return -1
|
|
346
|
+
|
|
347
|
+
#
|
|
348
|
+
# reset the cached process limits
|
|
349
|
+
#
|
|
350
|
+
def reset_process_limits():
|
|
351
|
+
|
|
352
|
+
global LOOP, DWHOSTS, PLIMITS
|
|
353
|
+
|
|
354
|
+
if LOOP%3 == 0:
|
|
355
|
+
PLIMITS = {} # clean the cache for available processes on hosts
|
|
356
|
+
|
|
357
|
+
if LOOP%10 == 0:
|
|
358
|
+
DWHOSTS = {}
|
|
359
|
+
PgLOG.set_pbs_host(None, 1)
|
|
360
|
+
|
|
361
|
+
LOOP += 1
|
|
362
|
+
|
|
363
|
+
#
|
|
364
|
+
# start dschecks
|
|
365
|
+
#
|
|
366
|
+
def start_dschecks(cnd, logact = 0):
|
|
367
|
+
|
|
368
|
+
rcnt = 0
|
|
369
|
+
check_dscheck_locks(cnd, logact)
|
|
370
|
+
if not CHKHOST['chkhost']: email_dschecks(cnd, logact)
|
|
371
|
+
purge_dschecks(cnd, logact)
|
|
372
|
+
|
|
373
|
+
if 'NC' in PgOPT.params or not CHKHOST['chkhost']: return 0
|
|
374
|
+
if CHKHOST['isbatch'] and 'CP' in PgOPT.params: check_dscheck_pends(cnd, logact)
|
|
375
|
+
# set_dscheck_options(CHKHOST['chkhost'], cnd, logact)
|
|
376
|
+
reset_process_limits()
|
|
377
|
+
if CHKHOST['isbatch']: rcnt = PgDBI.pgget("dscheck", "", "lockhost = '{}' AND pid > 0".format(PgLOG.PGLOG['PBSNAME']), logact)
|
|
378
|
+
|
|
379
|
+
cnd += "pid = 0 AND status <> 'D' AND einfo IS NULL AND (qoptions IS NULL OR LEFT(qoptions, 1) != '!') ORDER by hostname DESC, cindex"
|
|
380
|
+
pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact)
|
|
381
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
382
|
+
pcnt = 0
|
|
383
|
+
for i in range(cnt):
|
|
384
|
+
if (pcnt + rcnt) > PgOPT.PGOPT['totallimit']: break
|
|
385
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
386
|
+
if(pgrec['fcount'] and pgrec['dcount'] >= pgrec['fcount'] or
|
|
387
|
+
pgrec['tcount'] and pgrec['tcount'] >= pgrec['mcount'] or
|
|
388
|
+
pgrec['pindex'] and PgDBI.pgget("dscheck", "", "cindex = {} AND status <> 'D'".format(pgrec['pindex']), logact)):
|
|
389
|
+
continue
|
|
390
|
+
if pgrec['dflags'] and PgFile.check_storage_dflags(pgrec['dflags'], pgrec, logact): continue
|
|
391
|
+
ret = start_one_dscheck(pgrec, logact)
|
|
392
|
+
if ret > 0: pcnt += ret
|
|
393
|
+
|
|
394
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSCHECK records started on {}".format(pcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
395
|
+
return pcnt
|
|
396
|
+
|
|
397
|
+
#
|
|
398
|
+
# check long locked dschecks and unlock them if the processes are dead
|
|
399
|
+
#
|
|
400
|
+
def check_dscheck_locks(cnd, logact = 0):
|
|
401
|
+
|
|
402
|
+
global RUNPIDS
|
|
403
|
+
ltime = int(time.time())
|
|
404
|
+
lochost = PgLOG.PGLOG['HOSTNAME']
|
|
405
|
+
cnd += "pid > 0 AND "
|
|
406
|
+
dtime = ltime - PgSIG.PGSIG['DTIME']
|
|
407
|
+
ctime = ltime - PgSIG.PGSIG['CTIME']
|
|
408
|
+
rtime = ltime - PgSIG.PGSIG['RTIME']
|
|
409
|
+
if CHKHOST['chkhost']:
|
|
410
|
+
cnd += "lockhost {} AND (stttime = 0 OR chktime < {})".format(CHKHOST['hostcond'], dtime)
|
|
411
|
+
else:
|
|
412
|
+
cnd += "chktime > 0 AND (chktime < {} OR chktime < {} AND lockhost = '{}' OR chktime < {} AND lockhost = 'rda_config')".format(ctime, dtime, lochost, rtime)
|
|
413
|
+
|
|
414
|
+
pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact)
|
|
415
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
416
|
+
lcnt = 0
|
|
417
|
+
for i in range(cnt):
|
|
418
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
419
|
+
lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
|
|
420
|
+
cidx = pgrec['cindex']
|
|
421
|
+
if CHKHOST['chkhost'] or pgrec['lockhost'] == lochost:
|
|
422
|
+
spid = "{}{}".format(pgrec['lockhost'], pgrec['pid'])
|
|
423
|
+
if spid not in RUNPIDS and PgLock.lock_dscheck(cidx, 0) > 0:
|
|
424
|
+
PgLOG.pglog("CHK{}: unlocked {}".format(cidx, lmsg), PgLOG.LOGWRN)
|
|
425
|
+
lcnt += 1
|
|
426
|
+
else:
|
|
427
|
+
update_dscheck_time(pgrec, ltime, logact)
|
|
428
|
+
elif not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config':
|
|
429
|
+
record = {'pid' : 0, 'lockhost' : ''}
|
|
430
|
+
if PgDBI.pgupdt("dscheck", record, "cindex = {} AND pid = {}".format(cidx, pgrec['pid']), logact):
|
|
431
|
+
PgLOG.pglog("CHK{}: unlocked {}".format(cidx, lmsg), PgLOG.LOGWRN)
|
|
432
|
+
lcnt += 1
|
|
433
|
+
elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
|
|
434
|
+
PgLOG.pglog("Chk{}: time NOT updated for {} of {}".format(cidx, dscheck_runtime(pgrec['chktime'], ltime), lmsg), logact)
|
|
435
|
+
|
|
436
|
+
if cnt > 0:
|
|
437
|
+
s = 's' if cnt > 1 else ''
|
|
438
|
+
PgLOG.pglog("{} of {} DSCHECK record{} unlocked on {}".format(lcnt, cnt, s, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
439
|
+
RUNPIDS = {}
|
|
440
|
+
|
|
441
|
+
#
|
|
442
|
+
# check long pending dschecks and kill them
|
|
443
|
+
#
|
|
444
|
+
def check_dscheck_pends(cnd, logact = 0):
|
|
445
|
+
|
|
446
|
+
ltime = int(time.time()) - PgSIG.PGSIG['RTIME']
|
|
447
|
+
cnd += "pid > 0 AND "
|
|
448
|
+
cnd += "lockhost {} AND status = 'P' AND subtime > 0 AND subtime < {}".format(CHKHOST['hostcond'], ltime)
|
|
449
|
+
pgrecs = PgDBI.pgmget("dscheck", "pid", cnd, logact)
|
|
450
|
+
cnt = (len(pgrecs['pid']) if pgrecs else 0)
|
|
451
|
+
|
|
452
|
+
pcnt = 0
|
|
453
|
+
for i in range(cnt):
|
|
454
|
+
pid = pgrecs['pid'][i]
|
|
455
|
+
info = PgSIG.get_pbs_info(pid, 0, logact)
|
|
456
|
+
if info and info['State'] == 'Q':
|
|
457
|
+
PgLOG.pgsystem("rdakill -h {} -p {}".format(PgLOG.PGLOG['PBSNAME'], pid), PgLOG.LOGWRN, 5)
|
|
458
|
+
pcnt += 1
|
|
459
|
+
|
|
460
|
+
if cnt > 0:
|
|
461
|
+
s = 's' if cnt > 1 else ''
|
|
462
|
+
PgLOG.pglog("{} of {} Pending DSCHECK record{} stopped on {}".format(pcnt, cnt, s, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
463
|
+
|
|
464
|
+
#
|
|
465
|
+
# update dscheck time in case in pending status or
|
|
466
|
+
# the command does not updateupdates not on time by itself
|
|
467
|
+
#
|
|
468
|
+
def update_dscheck_time(pgrec, ltime, logact = 0):
|
|
469
|
+
|
|
470
|
+
record = {'chktime' : ltime}
|
|
471
|
+
if(CHKHOST['chkhost'] and CHKHOST['chkhost'] == PgLOG.PGLOG['PBSNAME']
|
|
472
|
+
and pgrec['lockhost'] == PgLOG.PGLOG['PBSNAME']):
|
|
473
|
+
info = PgSIG.get_pbs_info(pgrec['pid'], 0, logact)
|
|
474
|
+
if info:
|
|
475
|
+
stat = info['State']
|
|
476
|
+
if stat == 'Q': stat = 'P'
|
|
477
|
+
if stat != pgrec['status']: record['status'] = stat
|
|
478
|
+
else:
|
|
479
|
+
if pgrec['lockhost'] != PgLOG.PGLOG['HOSTNAME']: return # connot update dscheck time
|
|
480
|
+
if PgSIG.check_host_pid(pgrec['lockhost'], pgrec['pid']):
|
|
481
|
+
if pgrec['status'] != "R": record['status'] = "R"
|
|
482
|
+
else:
|
|
483
|
+
if pgrec['status'] == "R": record['status'] = "F"
|
|
484
|
+
|
|
485
|
+
if pgrec['stttime']:
|
|
486
|
+
if pgrec['command'] == "dsrqst" and pgrec['oindex']:
|
|
487
|
+
(record['fcount'], record['dcount'], record['size']) = PgCMD.get_dsrqst_counts(pgrec, logact)
|
|
488
|
+
|
|
489
|
+
elif 'status' in record and record['status'] == 'R':
|
|
490
|
+
record['stttime'] = ltime
|
|
491
|
+
|
|
492
|
+
cnd = "cindex = {} AND pid = {}".format(pgrec['cindex'], pgrec['pid'])
|
|
493
|
+
if PgDBI.pgget("dscheck", "", "{} AND chktime = {}".format(cnd, pgrec['chktime']), logact):
|
|
494
|
+
# update only the chktime is not changed yet
|
|
495
|
+
PgDBI.pgupdt("dscheck", record, cnd, logact)
|
|
496
|
+
|
|
497
|
+
#
|
|
498
|
+
# return a running time string for given start and end times of the process
|
|
499
|
+
#
|
|
500
|
+
def dscheck_runtime(start, end = None):
|
|
501
|
+
|
|
502
|
+
stime = ''
|
|
503
|
+
|
|
504
|
+
if start:
|
|
505
|
+
if not end: end = int(time.time())
|
|
506
|
+
rtime = (end - start)
|
|
507
|
+
if rtime >= 60:
|
|
508
|
+
stime = PgLOG.seconds_to_string_time(rtime)
|
|
509
|
+
|
|
510
|
+
return stime
|
|
511
|
+
|
|
512
|
+
#
|
|
513
|
+
# check dschecks and purge them if done already
|
|
514
|
+
#
|
|
515
|
+
def purge_dschecks(cnd, logact = 0):
|
|
516
|
+
|
|
517
|
+
cnd += "pid = 0 AND einfo IS NULL AND bid "
|
|
518
|
+
cnd += ('> 0' if CHKHOST['curhost'] == PgLOG.PGLOG['PGBATCH'] else '= 0')
|
|
519
|
+
pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact)
|
|
520
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
521
|
+
ctime = int(time.time()) - PgSIG.PGSIG['CTIME']
|
|
522
|
+
dcnt = 0
|
|
523
|
+
for i in range(cnt):
|
|
524
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
525
|
+
if(pgrec['status'] == "D" or
|
|
526
|
+
pgrec['status'] == "R" and pgrec['chktime'] < ctime or
|
|
527
|
+
pgrec['fcount'] and pgrec['dcount'] >= pgrec['fcount'] or
|
|
528
|
+
pgrec['tcount'] and pgrec['tcount'] >= pgrec['mcount']):
|
|
529
|
+
if PgLock.lock_dscheck(pgrec['cindex'], 1) <= 0: continue
|
|
530
|
+
dcnt += PgCMD.delete_dscheck(pgrec, None, logact)
|
|
531
|
+
|
|
532
|
+
if dcnt and cnt > 1: PgLOG.pglog("{} of {} DSCHECK records purged on {}".format(dcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
533
|
+
|
|
534
|
+
#
|
|
535
|
+
# check dschecks and send saved email
|
|
536
|
+
#
|
|
537
|
+
def email_dschecks(cnd, logact = 0):
|
|
538
|
+
|
|
539
|
+
emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
|
|
540
|
+
if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
|
|
541
|
+
cnd += "pid = 0 AND einfo IS NOT NULL"
|
|
542
|
+
pgrecs = PgDBI.pgmget("dscheck", "cindex", cnd, logact)
|
|
543
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
544
|
+
ecnt = 0
|
|
545
|
+
for i in range(cnt):
|
|
546
|
+
cidx = pgrecs['cindex'][i]
|
|
547
|
+
if PgLock.lock_dscheck(cidx, 1) <= 0: continue
|
|
548
|
+
pgrec = PgDBI.pgget("dscheck", "*", "cindex = {}".format(cidx), logact)
|
|
549
|
+
einfo = pgrec['einfo'] if pgrec else None
|
|
550
|
+
if einfo:
|
|
551
|
+
if pgrec['dflags'] and pgrec['tcount'] and pgrec['tcount'] < pgrec['mcount']:
|
|
552
|
+
msgary = PgFile.check_storage_dflags(pgrec['dflags'], pgrec, logact)
|
|
553
|
+
if msgary:
|
|
554
|
+
einfo = "The Check will be resubmitted after the down storage Up again:\n{}\n{}".format("\n".join(msgary), einfo)
|
|
555
|
+
|
|
556
|
+
sent = 1 if(PgLOG.send_customized_email("Chk{}".format(cidx), einfo, emlact) and
|
|
557
|
+
PgDBI.pgexec("UPDATE dscheck set einfo = NULL WHERE cindex = {}".format(cidx), logact)) else -1
|
|
558
|
+
else:
|
|
559
|
+
sent = 0
|
|
560
|
+
|
|
561
|
+
PgLock.lock_dscheck(cidx, 0)
|
|
562
|
+
if sent == -1: break
|
|
563
|
+
ecnt += sent
|
|
564
|
+
|
|
565
|
+
if ecnt and cnt > 1: PgLOG.pglog("{} of {} DSCHECK emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
566
|
+
|
|
567
|
+
#
|
|
568
|
+
# start a dscheck job for given dscheck record
|
|
569
|
+
#
|
|
570
|
+
def start_one_dscheck(pgrec, logact = 0):
|
|
571
|
+
|
|
572
|
+
cidx = pgrec['cindex']
|
|
573
|
+
specialist = pgrec['specialist']
|
|
574
|
+
host = CHKHOST['chkhost']
|
|
575
|
+
dlimit = get_system_down_limit(host, logact)
|
|
576
|
+
if dlimit < 0:
|
|
577
|
+
PgLock.lock_dscheck(cidx, 0)
|
|
578
|
+
return 0
|
|
579
|
+
|
|
580
|
+
limits = get_process_limits(pgrec['command'], specialist, logact)
|
|
581
|
+
if not limits:
|
|
582
|
+
if pgrec['hostname'] and (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
|
|
583
|
+
host = PgLOG.get_host(1)
|
|
584
|
+
if PgLOG.check_process_host(pgrec['hostname'], host, 'I'):
|
|
585
|
+
PgLOG.pglog("Chk{}: {} is not configured properly to run on {} for {}".format(cidx, pgrec['command'], host, specialist), logact)
|
|
586
|
+
return 0
|
|
587
|
+
|
|
588
|
+
lidx = get_process_host(limits, pgrec['hostname'], pgrec['command'], pgrec['action'], logact)
|
|
589
|
+
if lidx < 0 or skip_dscheck_record(pgrec, host, logact): return 0
|
|
590
|
+
cmd = "pgstart_{} ".format(specialist) if PgLOG.PGLOG['CURUID'] == PgLOG.PGLOG['RDAUSER'] else ""
|
|
591
|
+
if not PgUtil.pgcmp(host, PgLOG.PGLOG['PBSNAME'], 1):
|
|
592
|
+
if reach_dataset_limit(pgrec): return 0
|
|
593
|
+
cmd += get_specialist_shell(specialist) + 'qsub '
|
|
594
|
+
options = get_pbs_options(pgrec, dlimit, logact)
|
|
595
|
+
if options:
|
|
596
|
+
cmd += options
|
|
597
|
+
elif pgrec['status'] == 'E':
|
|
598
|
+
return 0
|
|
599
|
+
bstr = " in {} Queue {} ".format(PgLOG.PGLOG['PBSNAME'], pgrec['pbsqueue'])
|
|
600
|
+
else:
|
|
601
|
+
bstr = ""
|
|
602
|
+
cmd += "rdasub -bg "
|
|
603
|
+
|
|
604
|
+
if pgrec['workdir']:
|
|
605
|
+
if pgrec['workdir'].find('$') > -1:
|
|
606
|
+
cmd += "-cwd '{}' ".format(pgrec['workdir'])
|
|
607
|
+
else:
|
|
608
|
+
cmd += "-cwd {} ".format(pgrec['workdir'])
|
|
609
|
+
else:
|
|
610
|
+
cmd += "-cwd '$HOME' "
|
|
611
|
+
|
|
612
|
+
chkcmd = pgrec['command']
|
|
613
|
+
cmd += "-cmd " + chkcmd
|
|
614
|
+
if pgrec['argv']:
|
|
615
|
+
argv = pgrec['argv']
|
|
616
|
+
if pgrec['argextra']: argv += pgrec['argextra']
|
|
617
|
+
cmd += ' ' + argv + PgCMD.append_delayed_mode(chkcmd, argv)
|
|
618
|
+
chkcmd += ' ' + argv
|
|
619
|
+
|
|
620
|
+
PgLOG.pglog("Chk{}: issues '{}' onto {} for {}".format(cidx, chkcmd, host, pgrec['specialist']), PgLOG.LOGWRN)
|
|
621
|
+
PgLOG.PGLOG['ERR2STD'] = ['chmod: changing']
|
|
622
|
+
cstr = PgLOG.pgsystem(cmd, logact&(~PgLOG.EXITLG), 278) # 2+4+16+256
|
|
623
|
+
PgLOG.PGLOG['ERR2STD'] = []
|
|
624
|
+
pid = 0
|
|
625
|
+
if cstr:
|
|
626
|
+
lines = cstr.split('\n')
|
|
627
|
+
for line in lines:
|
|
628
|
+
if not line: continue
|
|
629
|
+
ms = re.match(r'^Job <(\d+)> is submitted', line)
|
|
630
|
+
if ms:
|
|
631
|
+
pid = int(ms.group(1))
|
|
632
|
+
break
|
|
633
|
+
ms = re.match(r'^(\d+)\.casper-pbs', line)
|
|
634
|
+
if ms:
|
|
635
|
+
pid = int(ms.group(1))
|
|
636
|
+
break
|
|
637
|
+
ms = re.match(r'^Submitted batch job (\d+)', line)
|
|
638
|
+
if ms:
|
|
639
|
+
pid = int(ms.group(1))
|
|
640
|
+
break
|
|
641
|
+
if not pid:
|
|
642
|
+
if PgLOG.PGLOG['SYSERR']:
|
|
643
|
+
if PgLOG.PGLOG['SYSERR'].find('Job not submitted') > -1:
|
|
644
|
+
cstr = "submit job"
|
|
645
|
+
elif PgLOG.PGLOG['SYSERR'].find('working directory') > -1:
|
|
646
|
+
cstr = "change working directory"
|
|
647
|
+
else:
|
|
648
|
+
cstr = "execute"
|
|
649
|
+
PgLock.lock_dscheck(cidx, 0)
|
|
650
|
+
return PgLOG.pglog("Chk{}: {} Failed {} on {}{}{}\n{}".format(cidx, PgCMD.get_command_info(pgrec),
|
|
651
|
+
cstr, PgLOG.PGLOG['HOSTNAME'], bstr, PgUtil.curtime(1), PgLOG.PGLOG['SYSERR']),
|
|
652
|
+
PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
653
|
+
|
|
654
|
+
PgLOG.pglog("Chk{}: {} started on {}{}{}".format(cidx, PgCMD.get_command_info(pgrec),
|
|
655
|
+
PgLOG.PGLOG['HOSTNAME'], bstr, PgUtil.curtime(1)), PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
656
|
+
return fill_dscheck_info(pgrec, pid, host, logact)
|
|
657
|
+
|
|
658
|
+
#
|
|
659
|
+
# get qsub shell command
|
|
660
|
+
#
|
|
661
|
+
def get_specialist_shell(specialist):
|
|
662
|
+
|
|
663
|
+
if specialist not in SHELLS:
|
|
664
|
+
pgrec = PgDBI.pgget("dssgrp", "shell_flag", "logname = '{}'".format(specialist))
|
|
665
|
+
if pgrec and pgrec['shell_flag'] == 'B':
|
|
666
|
+
SHELLS[specialist] = 'bash'
|
|
667
|
+
else:
|
|
668
|
+
SHELLS[specialist] = 'tcsh'
|
|
669
|
+
|
|
670
|
+
return SHELLS[specialist]
|
|
671
|
+
|
|
672
|
+
#
|
|
673
|
+
# get and cache process limit for a given dsid
|
|
674
|
+
#
|
|
675
|
+
def get_dataset_limit(dsid):
|
|
676
|
+
|
|
677
|
+
if dsid in DSLMTS: return DSLMTS[dsid]
|
|
678
|
+
|
|
679
|
+
pgrec = PgDBI.pgget('dslimit', 'processlimit', "dsid = '{}'".format(dsid))
|
|
680
|
+
dslmt = 45
|
|
681
|
+
if pgrec:
|
|
682
|
+
dslmt = pgrec['processlimit']
|
|
683
|
+
elif 'default' in DSLMTS:
|
|
684
|
+
dslmt = DSLMTS['default']
|
|
685
|
+
else:
|
|
686
|
+
pgrec = PgDBI.pgget('dslimit', 'processlimit', "dsid = 'all'")
|
|
687
|
+
if pgrec: DSLMTS['default'] = dslmt = pgrec['processlimit']
|
|
688
|
+
DSLMTS[dsid] = dslmt
|
|
689
|
+
|
|
690
|
+
return DSLMTS[dsid]
|
|
691
|
+
|
|
692
|
+
#
|
|
693
|
+
# check if reaching running limit for a specified dataset
|
|
694
|
+
#
|
|
695
|
+
def reach_dataset_limit(pgrec):
|
|
696
|
+
|
|
697
|
+
if pgrec['command'] != 'dsrqst': return 0
|
|
698
|
+
dsid = pgrec['dsid']
|
|
699
|
+
if dsid and pgrec['action'] in ['BR', 'SP', 'PP']:
|
|
700
|
+
dslmt = get_dataset_limit(dsid)
|
|
701
|
+
lmt = PgDBI.pgget('dscheck', '', "dsid = '{}' AND status <> 'C' AND action IN ('BR', 'SP', 'PP')".format(dsid))
|
|
702
|
+
if lmt > dslmt:
|
|
703
|
+
PgLock.lock_dscheck(pgrec['cindex'], 0)
|
|
704
|
+
return 1
|
|
705
|
+
return 0
|
|
706
|
+
|
|
707
|
+
#
|
|
708
|
+
# get and cache request limit for a given given email
|
|
709
|
+
#
|
|
710
|
+
def get_user_limit(email):
|
|
711
|
+
|
|
712
|
+
if email in EMLMTS: return EMLMTS[email]
|
|
713
|
+
|
|
714
|
+
emlmts = [20, 10, 36]
|
|
715
|
+
flds = 'maxrqstcheck, maxpartcheck'
|
|
716
|
+
pgrec = PgDBI.pgget('userlimit', flds, "email = '{}'".format(email))
|
|
717
|
+
if pgrec:
|
|
718
|
+
emlmts = [pgrec['maxrqstcheck'], pgrec['maxpartcheck']]
|
|
719
|
+
elif 'default' in EMLMTS:
|
|
720
|
+
emlmts = EMLMTS['default']
|
|
721
|
+
else:
|
|
722
|
+
pgrec = PgDBI.pgget('userlimit', flds, "email = 'all'".format(email))
|
|
723
|
+
if pgrec:
|
|
724
|
+
EMLMTS['default'] = emlmts = [pgrec['maxrqstcheck'], pgrec['maxpartcheck']]
|
|
725
|
+
EMLMTS[email] = emlmts.copy()
|
|
726
|
+
|
|
727
|
+
return EMLMTS[email]
|
|
728
|
+
|
|
729
|
+
#
|
|
730
|
+
# check if reaching running limit for a specified dataset
|
|
731
|
+
#
|
|
732
|
+
def reach_dataset_limit(pgrec):
|
|
733
|
+
|
|
734
|
+
if pgrec['command'] != 'dsrqst': return 0
|
|
735
|
+
dsid = pgrec['dsid']
|
|
736
|
+
if dsid and pgrec['action'] in ['BR', 'SP', 'PP']:
|
|
737
|
+
dslmt = get_dataset_limit(dsid)
|
|
738
|
+
lmt = PgDBI.pgget('dscheck', '', "dsid = '{}' AND status <> 'C' AND action IN ('BR', 'SP', 'PP')".format(dsid))
|
|
739
|
+
if lmt > dslmt:
|
|
740
|
+
PgLock.lock_dscheck(pgrec['cindex'], 0)
|
|
741
|
+
return 1
|
|
742
|
+
return 0
|
|
743
|
+
|
|
744
|
+
#
|
|
745
|
+
# check and return the time limit in seconds before a planned system down for given hostname
|
|
746
|
+
#
|
|
747
|
+
def get_system_down_limit(hostname, logact = 0):
|
|
748
|
+
|
|
749
|
+
dlimit = 0
|
|
750
|
+
down = PgDBI.get_system_downs(hostname, logact)
|
|
751
|
+
if down['start']:
|
|
752
|
+
dlimit = down['start'] - down['curtime'] - 2*PgSIG.PGSIG['CTIME']
|
|
753
|
+
if dlimit < PgOPT.PGOPT['minlimit']: dlimit = -1
|
|
754
|
+
|
|
755
|
+
return dlimit
|
|
756
|
+
|
|
757
|
+
#
|
|
758
|
+
# check and get the option string for submit a PBS job
|
|
759
|
+
#
|
|
760
|
+
def get_pbs_options(pgrec, limit = 0, logact = 0):
|
|
761
|
+
|
|
762
|
+
opttime = 0
|
|
763
|
+
qoptions = build_dscheck_options(pgrec, 'qoptions', 'PBS')
|
|
764
|
+
qname = get_pbsqueue_option(pgrec)
|
|
765
|
+
maxtime = max_batch_time(qname)
|
|
766
|
+
runtime = PBSTIMES['default']
|
|
767
|
+
|
|
768
|
+
if qoptions:
|
|
769
|
+
ms = re.match(r'^(-.+)/(-.+)$', qoptions)
|
|
770
|
+
if ms: qoptions = ms.group(2 if pgrec['otype'] == 'P' else 1)
|
|
771
|
+
|
|
772
|
+
ms = re.search(r'-l\s+\S*walltime=([\d:-]+)', qoptions)
|
|
773
|
+
if ms:
|
|
774
|
+
optval = ms.group(1)
|
|
775
|
+
vcs = optval.split(':')
|
|
776
|
+
vcl = len(vcs)
|
|
777
|
+
vds = vcs[0].split('-')
|
|
778
|
+
opttime = 3600*int(vds[0])
|
|
779
|
+
if len(vds) > 1:
|
|
780
|
+
opttime *= 24
|
|
781
|
+
opttime += 3600*int(vds[1])
|
|
782
|
+
if vcl > 1:
|
|
783
|
+
opttime += 60*int(vcs[1])
|
|
784
|
+
if vcl > 2: opttime += int(vcs[2])
|
|
785
|
+
runtime = opttime
|
|
786
|
+
qoptions += ' '
|
|
787
|
+
|
|
788
|
+
if limit > 0 and runtime > limit: runtime = limit
|
|
789
|
+
if runtime > maxtime: runtime = maxtime
|
|
790
|
+
if runtime != opttime and runtime != PBSTIMES['default']:
|
|
791
|
+
optval = "walltime={}:{:02}:{:02}".format(int(runtime/3600), int(runtime/60)%60, runtime%60)
|
|
792
|
+
if opttime:
|
|
793
|
+
if runtime < opttime: qoptions = re.sub(r'walltime=[\d:-]+', optval, qoptions)
|
|
794
|
+
elif qoptions.find('-l ') > -1:
|
|
795
|
+
qoptions = re.sub(r'-l\s+', "-l {},".format(optval), qoptions)
|
|
796
|
+
else:
|
|
797
|
+
qoptions += "-l " + optval
|
|
798
|
+
|
|
799
|
+
if pgrec['modules']:
|
|
800
|
+
options = build_dscheck_options(pgrec, 'modules', 'PBS')
|
|
801
|
+
if options: qoptions += "-mod {} ".format(options)
|
|
802
|
+
if pgrec['environments']:
|
|
803
|
+
options = build_dscheck_options(pgrec, 'environments', 'PBS')
|
|
804
|
+
if options: qoptions += "-env {} ".format(options)
|
|
805
|
+
|
|
806
|
+
if qname: qoptions += "-q {} ".format(qname)
|
|
807
|
+
|
|
808
|
+
return qoptions
|
|
809
|
+
|
|
810
|
+
#
|
|
811
|
+
# check rda queue for pending jobs to switch PBS queue if needed
|
|
812
|
+
#
|
|
813
|
+
def get_pbsqueue_option(pgrec):
|
|
814
|
+
|
|
815
|
+
cidx = pgrec['cindex']
|
|
816
|
+
for pname in PBSQUEUES:
|
|
817
|
+
if PBSQUEUES[pname]:
|
|
818
|
+
aname = pname
|
|
819
|
+
else:
|
|
820
|
+
qname = pname
|
|
821
|
+
pcnt = PgDBI.pgget("dscheck", '', "status = 'P' AND pbsqueue = '{}'".format(qname))
|
|
822
|
+
if pcnt > 1: qname = aname
|
|
823
|
+
if pgrec['pbsqueue'] != qname:
|
|
824
|
+
PgDBI.pgexec("UPDATE dscheck SET pbsqueue = '{}' WHERE cindex = {}".format(qname, cidx))
|
|
825
|
+
pgrec['pbsqueue'] = qname
|
|
826
|
+
|
|
827
|
+
return PBSQUEUES[qname]
|
|
828
|
+
|
|
829
|
+
#
|
|
830
|
+
# build individual option string for given option name
|
|
831
|
+
#
|
|
832
|
+
def build_dscheck_options(pgcheck, optname, optstr = None):
|
|
833
|
+
|
|
834
|
+
options = pgcheck[optname]
|
|
835
|
+
if not options or options == 'default': return ''
|
|
836
|
+
if not re.match(r'^!', options): return options
|
|
837
|
+
cidx = pgcheck['cindex']
|
|
838
|
+
# reget the option field to see if it is processed
|
|
839
|
+
pgrec = PgDBI.pgget('dscheck', optname, 'cindex = {}'.format(cidx))
|
|
840
|
+
if not pgrec or options != pgrec[optname]: return options
|
|
841
|
+
|
|
842
|
+
record = {}
|
|
843
|
+
errmsg = ''
|
|
844
|
+
record[optname] = options = PgCMD.get_dynamic_options(options[1:], pgcheck['oindex'], pgcheck['otype'])
|
|
845
|
+
if not options and PgLOG.PGLOG['SYSERR']:
|
|
846
|
+
record['status'] = pgcheck['status'] = 'E'
|
|
847
|
+
record['pid'] = 0
|
|
848
|
+
record['tcount'] = pgcheck['tcount'] + 1
|
|
849
|
+
if not optstr: optstr = optname.capitalize()
|
|
850
|
+
errmsg = "Chk{}: Fail to build {} Options, {}".format(cidx, optstr, PgLOG.PGLOG['SYSERR'])
|
|
851
|
+
PgDBI.pgupdt("dscheck", record, "cindex = {}".format(cidx))
|
|
852
|
+
if errmsg:
|
|
853
|
+
pgrqst = None
|
|
854
|
+
if pgcheck['otype'] == 'R':
|
|
855
|
+
ridx = pgcheck['oindex']
|
|
856
|
+
pgrqst = PgDBI.pgget('dsrqst', '*', 'rindex = {}'.format(ridx))
|
|
857
|
+
if pgrqst:
|
|
858
|
+
record = {}
|
|
859
|
+
record['status'] = PgOPT.send_request_email_notice(pgrqst, errmsg, 0, 'E')
|
|
860
|
+
record['ecount'] = pgrqst['ecount'] + 1
|
|
861
|
+
PgDBI.pgupdt("dsrqst", record, "rindex = {}".format(ridx), PgOPT.PGOPT['errlog'])
|
|
862
|
+
errmsg = ''
|
|
863
|
+
elif pgcheck['otype'] == 'P':
|
|
864
|
+
pidx = pgcheck['oindex']
|
|
865
|
+
pgpart = PgDBI.pgget('ptrqst', 'rindex', 'pindex = {}'.format(pidx))
|
|
866
|
+
if pgpart:
|
|
867
|
+
PgDBI.pgexec("UPDATE ptrqst SET status = 'E' WHERE pindex = {}".format(pidx))
|
|
868
|
+
ridx = pgpart['rindex']
|
|
869
|
+
pgrqst = PgDBI.pgget('dsrqst', '*', 'rindex = {}'.format(ridx))
|
|
870
|
+
if pgrqst and pgrqst['status'] != 'E':
|
|
871
|
+
record = {}
|
|
872
|
+
record['status'] = PgOPT.send_request_email_notice(pgrqst, errmsg, 0, 'E')
|
|
873
|
+
record['ecount'] = pgrqst['ecount'] + 1
|
|
874
|
+
PgDBI.pgupdt("dsrqst", record, "rindex = {}".format(ridx), PgOPT.PGOPT['errlog'])
|
|
875
|
+
errmsg = ''
|
|
876
|
+
if errmsg: PgLOG.pglog(errmsg, PgOPT.PGOPT['errlog'])
|
|
877
|
+
return options
|
|
878
|
+
|
|
879
|
+
#
|
|
880
|
+
# fill up dscheck record in case the command does not do it itself
|
|
881
|
+
#
|
|
882
|
+
def fill_dscheck_info(ckrec, pid, host, logact = 0):
|
|
883
|
+
|
|
884
|
+
chkcnd = "cindex = {}".format(ckrec['cindex'])
|
|
885
|
+
PgDBI.pgexec("UPDATE dscheck SET tcount = tcount+1 WHERE " + chkcnd, logact)
|
|
886
|
+
if pid and PgLock.lock_host_dscheck(ckrec['cindex'], pid, host, logact) <= 0: return 1 # under processing
|
|
887
|
+
|
|
888
|
+
record = {}
|
|
889
|
+
stat = 'R'
|
|
890
|
+
if pid:
|
|
891
|
+
record['pid'] = pid
|
|
892
|
+
if host == PgLOG.PGLOG['PBSNAME']:
|
|
893
|
+
info = PgSIG.get_pbs_info(pid, 0, logact, 2)
|
|
894
|
+
if info:
|
|
895
|
+
stat = info['State']
|
|
896
|
+
if stat == 'Q': stat = 'P'
|
|
897
|
+
else:
|
|
898
|
+
record['runhost'] = PgLOG.PGLOG['HOSTNAME']
|
|
899
|
+
record['bid'] = 0
|
|
900
|
+
else:
|
|
901
|
+
stat = 'F'
|
|
902
|
+
record['status'] = stat
|
|
903
|
+
|
|
904
|
+
record['stttime'] = record['subtime'] = record['chktime'] = int(time.time())
|
|
905
|
+
pgrec = PgDBI.pgget("dscheck", "status, stttime", chkcnd, logact)
|
|
906
|
+
if not pgrec: return 0
|
|
907
|
+
if pgrec['status'] != ckrec['status'] or pgrec['stttime'] > ckrec['stttime']: return 1
|
|
908
|
+
if not pid and PgLock.lock_dscheck(ckrec['cindex'], 0) <= 0: return 1
|
|
909
|
+
|
|
910
|
+
return PgDBI.pgupdt("dscheck", record, chkcnd, logact)
|
|
911
|
+
|
|
912
|
+
#
|
|
913
|
+
# return 1 to skip running if the dscheck record is not ready; 0 otherwise
|
|
914
|
+
#
|
|
915
|
+
def skip_dscheck_record(pgrec, host, logact = 0):
|
|
916
|
+
|
|
917
|
+
workdir = pgrec['workdir']
|
|
918
|
+
if workdir and workdir.find('$') > -1: workdir = ''
|
|
919
|
+
|
|
920
|
+
if PgFile.check_host_down(workdir, host, logact): return 1
|
|
921
|
+
if pgrec['command'] == "dsrqst":
|
|
922
|
+
if PgFile.check_host_down(PgLOG.PGLOG['RQSTHOME'], host, logact): return 1
|
|
923
|
+
elif pgrec['command'] == "dsupdt" or pgrec['command'] == "dsarch":
|
|
924
|
+
if PgFile.check_host_down(PgLOG.PGLOG['DSDHOME'], host, logact): return 1
|
|
925
|
+
|
|
926
|
+
newrec = PgDBI.pgget("dscheck", "pid, status, stttime, tcount", "cindex = {}".format(pgrec['cindex']), logact)
|
|
927
|
+
if(not newrec or newrec['pid'] > 0 or newrec['status'] != pgrec['status'] or
|
|
928
|
+
newrec['stttime'] > pgrec['stttime'] or newrec['tcount'] > pgrec['tcount']): return 1
|
|
929
|
+
if PgLock.lock_dscheck(pgrec['cindex'], 1) <= 0: return 1
|
|
930
|
+
|
|
931
|
+
if pgrec['subtime'] or pgrec['stttime']:
|
|
932
|
+
newrec = {'stttime' : 0, 'subtime' : 0, 'runhost' : '', 'bid' : 0}
|
|
933
|
+
(newrec['ttltime'], newrec['quetime']) = PgCMD.get_dscheck_runtime(pgrec)
|
|
934
|
+
if not PgDBI.pgupdt("dscheck", newrec, "cindex = {}".format(pgrec['cindex']), logact): return 1
|
|
935
|
+
|
|
936
|
+
return 0
|
|
937
|
+
|
|
938
|
+
#
|
|
939
|
+
# start recording Queued reuqests to checks
|
|
940
|
+
#
|
|
941
|
+
def start_dsrqsts(cnd, logact = 0):
|
|
942
|
+
|
|
943
|
+
check_dsrqst_locks(cnd, logact)
|
|
944
|
+
if CHKHOST['chkhost']: return 1
|
|
945
|
+
email_dsrqsts(cnd, logact)
|
|
946
|
+
purge_dsrqsts(cnd, logact)
|
|
947
|
+
rcnd = cnd
|
|
948
|
+
rcnd += ("status = 'Q' AND rqsttype <> 'C' AND (pid = 0 OR pid < ptcount) AND " +
|
|
949
|
+
"einfo IS NULL ORDER BY priority, rindex")
|
|
950
|
+
pgrecs = PgDBI.pgmget("dsrqst", "*", rcnd, logact)
|
|
951
|
+
cnt = (len(pgrecs['rindex']) if pgrecs else 0)
|
|
952
|
+
ccnt = PgDBI.pgget("dscheck", '', "status = 'C'", logact)
|
|
953
|
+
pcnt = PgDBI.pgget("dscheck", '', "status = 'P'", logact)
|
|
954
|
+
if (ccnt+pcnt) > PgOPT.PGOPT['waitlimit']:
|
|
955
|
+
if cnt: PgLOG.pglog("{}/{} Checks are Waiting/Pending; Add new dscheck records {} later".format(ccnt, pcnt, PgLOG.PGLOG['HOSTNAME']),
|
|
956
|
+
PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
957
|
+
rcnt = PgOPT.PGOPT['waitlimit']-ccnt-pcnt
|
|
958
|
+
if cnt == 0:
|
|
959
|
+
acnt = 0
|
|
960
|
+
cnts = start_dsrqst_partitions(None, rcnt, logact)
|
|
961
|
+
rcnt = cnts[0]
|
|
962
|
+
pcnt = cnts[1]
|
|
963
|
+
else:
|
|
964
|
+
tcnt = cnt
|
|
965
|
+
if cnt > rcnt: cnt = rcnt
|
|
966
|
+
if cnt > 1: PgLOG.pglog("Try to add dschecks for {} DSRQST records on {}".format(cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
967
|
+
|
|
968
|
+
i = acnt = ccnt = pcnt = rcnt = 0
|
|
969
|
+
while i < tcnt and ccnt < cnt:
|
|
970
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
971
|
+
i += 1
|
|
972
|
+
if pgrec['ptcount'] == 0 and validate_dsrqst_partitions(pgrec, logact):
|
|
973
|
+
acnt += add_dsrqst_partitions(pgrec, logact)
|
|
974
|
+
elif pgrec['ptcount'] < 2:
|
|
975
|
+
rcnt += start_one_dsrqst(pgrec, logact)
|
|
976
|
+
else:
|
|
977
|
+
cnts = start_dsrqst_partitions(pgrec, (cnt-ccnt), logact)
|
|
978
|
+
rcnt += cnts[0]
|
|
979
|
+
pcnt += cnts[1]
|
|
980
|
+
ccnt += (acnt+pcnt+rcnt)
|
|
981
|
+
|
|
982
|
+
if rcnt > 1: PgLOG.pglog("build {} requests on {}".format(rcnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
983
|
+
if pcnt > 1: PgLOG.pglog("build {} request partitions on {}".format(pcnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
984
|
+
if acnt > 1: PgLOG.pglog("Add partitions to {} requests on {}".format(acnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
985
|
+
|
|
986
|
+
return rcnt
|
|
987
|
+
|
|
988
|
+
#
|
|
989
|
+
# validate a given request if ok to do partitions
|
|
990
|
+
#
|
|
991
|
+
def validate_dsrqst_partitions(pgrec, logact = 0):
|
|
992
|
+
|
|
993
|
+
pgctl = PgCMD.get_dsrqst_control(pgrec, logact)
|
|
994
|
+
if pgctl and (pgctl['ptlimit'] or pgctl['ptsize']): return True
|
|
995
|
+
|
|
996
|
+
record = {'ptcount' : 1}
|
|
997
|
+
pgrec['ptcount'] = 1
|
|
998
|
+
if pgrec['ptlimit']: pgrec['ptlimit'] = record['ptlimit'] = 0
|
|
999
|
+
if pgrec['ptsize']: pgrec['ptsize'] = record['ptsize'] = 0
|
|
1000
|
+
|
|
1001
|
+
PgDBI.pgupdt('dsrqst', record, "rindex = {}".format(pgrec['rindex']), logact)
|
|
1002
|
+
return False
|
|
1003
|
+
|
|
1004
|
+
#
|
|
1005
|
+
# call given command to evaluate dynamically the dscheck.qoptions
|
|
1006
|
+
#
|
|
1007
|
+
def set_dscheck_options(chost, cnd, logact):
|
|
1008
|
+
|
|
1009
|
+
if chost not in DOPTHOSTS: return
|
|
1010
|
+
qcnt = 0
|
|
1011
|
+
skipcmds = DOPTHOSTS[chost]
|
|
1012
|
+
pgrecs = PgDBI.pgmget("dscheck", "*", cnd + "pid = 0 AND status = 'C' AND LEFT(qoptions, 1) = '!'", logact)
|
|
1013
|
+
cnt = len(pgrecs['cindex']) if pgrecs else 0
|
|
1014
|
+
for i in range(cnt):
|
|
1015
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1016
|
+
if skipcmds and pgrec['qoptions'] in skipcmds: continue # skip
|
|
1017
|
+
if PgLock.lock_dscheck(pgrec['cindex'], 1) <= 0: continue
|
|
1018
|
+
qoptions = build_dscheck_options(pgrec, 'qoptions', 'PBS')
|
|
1019
|
+
if not qoptions and pgrec['status'] == 'E': continue # failed evaluating qoptions
|
|
1020
|
+
record = {'pid' : 0, 'qoptions': qoptions}
|
|
1021
|
+
qcnt += PgDBI.pgupdt('dscheck', record, "cindex = {}".format(pgrec['cindex']), PgOPT.PGOPT['errlog'])
|
|
1022
|
+
|
|
1023
|
+
if qcnt and cnt > 1: PgLOG.pglog("{} of {} DSCHECK PBS options Dynamically set on {}".format(qcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1024
|
+
|
|
1025
|
+
#
|
|
1026
|
+
# add a new dscheck record if a given request record is due
|
|
1027
|
+
#
|
|
1028
|
+
def start_one_dsrqst(pgrec, logact = 0):
|
|
1029
|
+
|
|
1030
|
+
if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst' AND action = 'BR'".format(pgrec['rindex']), logact): return 0
|
|
1031
|
+
|
|
1032
|
+
pgctl = PgCMD.get_dsrqst_control(pgrec, logact)
|
|
1033
|
+
if pgctl:
|
|
1034
|
+
if 'qoptions' in pgctl and pgctl['qoptions']:
|
|
1035
|
+
ms = re.match(r'^(-.+)/(-.+)$', pgctl['qoptions'])
|
|
1036
|
+
if ms: pgctl['qoptions'] = ms.group(1)
|
|
1037
|
+
argv = "{} BR -RI {} -b -d".format(pgrec['dsid'], pgrec['rindex'])
|
|
1038
|
+
return add_one_dscheck(pgrec['rindex'], 'R', "dsrqst", pgrec['dsid'], "BR",
|
|
1039
|
+
'', pgrec['specialist'], argv, pgrec['email'], pgctl, logact)
|
|
1040
|
+
|
|
1041
|
+
#
|
|
1042
|
+
# add a dscheck record for a given request to setup partitions
|
|
1043
|
+
#
|
|
1044
|
+
def add_dsrqst_partitions(pgrec, logact = 0):
|
|
1045
|
+
|
|
1046
|
+
if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst'".format(pgrec['rindex']), logact): return 0
|
|
1047
|
+
|
|
1048
|
+
pgctl = PgCMD.get_dsrqst_control(pgrec, logact)
|
|
1049
|
+
if pgctl:
|
|
1050
|
+
if 'qoptions' in pgctl and pgctl['qoptions']:
|
|
1051
|
+
ms =re.match(r'^(-.+)/(-.+)$', pgctl['qoptions'])
|
|
1052
|
+
if ms: pgctl['qoptions'] = ms.group(1)
|
|
1053
|
+
argv = "{} SP -RI {} -NP -b -d".format(pgrec['dsid'], pgrec['rindex'])
|
|
1054
|
+
return add_one_dscheck(pgrec['rindex'], 'R', "dsrqst", pgrec['dsid'], 'SP',
|
|
1055
|
+
'', pgrec['specialist'], argv, pgrec['email'], pgctl, logact)
|
|
1056
|
+
|
|
1057
|
+
#
|
|
1058
|
+
# add multiple dscheck records of partitions for a given request
|
|
1059
|
+
#
|
|
1060
|
+
def start_dsrqst_partitions(pgrqst, ccnt, logact = 0):
|
|
1061
|
+
|
|
1062
|
+
cnts = [0, 0]
|
|
1063
|
+
if pgrqst:
|
|
1064
|
+
rindex = pgrqst['rindex']
|
|
1065
|
+
cnd = "rindex = {} AND status = ".format(rindex)
|
|
1066
|
+
if pgrqst['pid'] == 0:
|
|
1067
|
+
cnt = PgDBI.pgget("ptrqst", "", cnd + "'E'", logact)
|
|
1068
|
+
if cnt > 0 and (pgrqst['ecount'] + cnt) <= PgOPT.PGOPT['PEMAX']:
|
|
1069
|
+
# set Error partions back to Q
|
|
1070
|
+
PgDBI.pgexec("UPDATE ptrqst SET status = 'Q' WHERE {}'E'".format(cnd), PgOPT.PGOPT['extlog'])
|
|
1071
|
+
else:
|
|
1072
|
+
rindex = 0
|
|
1073
|
+
cnd = "status = "
|
|
1074
|
+
pgrecs = PgDBI.pgmget("ptrqst", "*", cnd + "'Q' AND pid = 0 ORDER by pindex", logact)
|
|
1075
|
+
cnt = len(pgrecs['pindex']) if pgrecs else 0
|
|
1076
|
+
if cnt > 0:
|
|
1077
|
+
if cnt > ccnt: cnt = ccnt
|
|
1078
|
+
pgctl = PgCMD.get_dsrqst_control(pgrqst, logact) if pgrqst else None
|
|
1079
|
+
for i in range(cnt):
|
|
1080
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1081
|
+
if pgrec['rindex'] != rindex:
|
|
1082
|
+
rindex = pgrec['rindex']
|
|
1083
|
+
pgrqst = PgDBI.pgget("dsrqst", "*", "rindex = {}".format(rindex), logact)
|
|
1084
|
+
if pgrqst: pgctl = PgCMD.get_dsrqst_control(pgrqst, logact)
|
|
1085
|
+
if not pgrqst: # request missing
|
|
1086
|
+
PgDBI.pgdel('ptrqst', "rindex = {}".format(rindex))
|
|
1087
|
+
continue
|
|
1088
|
+
if pgrec['ptcmp'] == 'Y':
|
|
1089
|
+
pgptctl = None
|
|
1090
|
+
else:
|
|
1091
|
+
pgptctl = PgCMD.get_partition_control(pgrec, pgrqst, pgctl, logact)
|
|
1092
|
+
if pgptctl:
|
|
1093
|
+
if 'qoptions' in pgptctl and pgptctl['qoptions']:
|
|
1094
|
+
ms = re.match(r'^(-.+)/(-.+)$', pgptctl['qoptions'])
|
|
1095
|
+
if ms: pgptctl['qoptions'] = ms.group(2)
|
|
1096
|
+
if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst' AND action = 'PP'".format(pgrec['pindex']), logact): continue
|
|
1097
|
+
argv = "{} PP -PI {} -RI {} -b -d".format(pgrqst['dsid'], pgrec['pindex'], pgrqst['rindex'])
|
|
1098
|
+
cnts[1] += add_one_dscheck(pgrec['pindex'], 'P', "dsrqst", pgrqst['dsid'], "PP",
|
|
1099
|
+
'', pgrqst['specialist'], argv, pgrqst['email'], pgptctl, logact)
|
|
1100
|
+
|
|
1101
|
+
elif pgrqst and pgrqst['pid'] == 0 and pgrqst['ptcount'] == PgDBI.pgget("ptrqst", "", cnd + " 'O'", logact):
|
|
1102
|
+
cnts[0] = start_one_dsrqst(pgrqst, logact)
|
|
1103
|
+
|
|
1104
|
+
return cnts
|
|
1105
|
+
|
|
1106
|
+
#
|
|
1107
|
+
# check long procssing reuqests and unlock the processes that are aborted
|
|
1108
|
+
#
|
|
1109
|
+
def check_dsrqst_locks(cnd, logact = 0):
|
|
1110
|
+
|
|
1111
|
+
ltime = int(time.time())
|
|
1112
|
+
lochost = PgLOG.PGLOG['HOSTNAME']
|
|
1113
|
+
cnd += "pid > 0 AND "
|
|
1114
|
+
dtime = ltime - PgSIG.PGSIG['DTIME']
|
|
1115
|
+
ctime = ltime - PgSIG.PGSIG['CTIME']
|
|
1116
|
+
rtime = ltime - PgSIG.PGSIG['RTIME']
|
|
1117
|
+
if CHKHOST['chkhost']:
|
|
1118
|
+
cnd += "lockhost {} AND locktime < {}".format(CHKHOST['hostcond'], dtime)
|
|
1119
|
+
else:
|
|
1120
|
+
cnd += "locktime > 0 AND (locktime < {} OR locktime < {} AND lockhost = '{}' OR locktime < {} AND lockhost = 'rda_config')".format(ctime, dtime, lochost, rtime)
|
|
1121
|
+
check_partition_locks(cnd, ltime, logact) # check partitions first
|
|
1122
|
+
|
|
1123
|
+
pgrecs = PgDBI.pgmget("dsrqst", "rindex, lockhost, pid, locktime", cnd, logact)
|
|
1124
|
+
cnt = (len(pgrecs['rindex']) if pgrecs else 0)
|
|
1125
|
+
lcnt = 0
|
|
1126
|
+
for i in range(cnt):
|
|
1127
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1128
|
+
lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
|
|
1129
|
+
ridx = pgrec['rindex']
|
|
1130
|
+
if CHKHOST['chkhost'] or pgrec['lockhost'] == lochost:
|
|
1131
|
+
if PgLock.lock_request(ridx, 0) > 0:
|
|
1132
|
+
PgLOG.pglog("Rqst{}: unlocked {}".format(ridx, lmsg), PgLOG.LOGWRN)
|
|
1133
|
+
lcnt += 1
|
|
1134
|
+
continue
|
|
1135
|
+
if(PgDBI.pgexec("UPDATE dsrqst set locktime = {} WHERE rindex = {} AND pid = {}".format(ltime, ridx, pgrec['pid']), logact) and
|
|
1136
|
+
not PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst'".format(ridx))):
|
|
1137
|
+
PgLOG.pglog("Rqst{}: time updated for {}".format(ridx, lmsg), PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
1138
|
+
elif(not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config' or pgrec['lockhost'] == 'partition' and
|
|
1139
|
+
not PgDBI.pgget('ptrqst', '', "rindex = {} AND pid > 0".format(ridx), logact)):
|
|
1140
|
+
record = {'pid' : 0, 'lockhost' : ''}
|
|
1141
|
+
if PgDBI.pgupdt("dsrqst", record, "rindex = {} AND pid = {}".format(ridx, pgrec['pid']), logact):
|
|
1142
|
+
PgLOG.pglog("Rqst{}: unlocked {}".format(ridx, pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(ltime)), PgLOG.LOGWRN)
|
|
1143
|
+
lcnt += 1
|
|
1144
|
+
continue
|
|
1145
|
+
elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
|
|
1146
|
+
PgLOG.pglog("Rqst{}: time NOT updated for {} of {}".format(ridx, pgrec['lockhost'], pgrec['pid'], dscheck_runtime(pgrec['locktime'], ltime)), logact)
|
|
1147
|
+
|
|
1148
|
+
RUNPIDS["{}{}".format(pgrec['lockhost'], pgrec['pid'])] = 1
|
|
1149
|
+
|
|
1150
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSRQST records unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1151
|
+
|
|
1152
|
+
#
|
|
1153
|
+
# check long procssing reuqest partitions and unlock the processes that are aborted
|
|
1154
|
+
#
|
|
1155
|
+
def check_partition_locks(cnd, ltime, logact = 0):
|
|
1156
|
+
|
|
1157
|
+
pgrecs = PgDBI.pgmget("ptrqst", "pindex, rindex, lockhost, pid, locktime", cnd, (logact&~PgLOG.LGEREX))
|
|
1158
|
+
cnt = (len(pgrecs['pindex']) if pgrecs else 0)
|
|
1159
|
+
lcnt = 0
|
|
1160
|
+
for i in range(cnt):
|
|
1161
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1162
|
+
lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
|
|
1163
|
+
pidx = pgrec['pindex']
|
|
1164
|
+
if CHKHOST['chkhost'] or pgrec['lockhost'] == PgLOG.PGLOG['HOSTNAME']:
|
|
1165
|
+
if PgLock.lock_partition(pidx, 0) > 0:
|
|
1166
|
+
PgLOG.pglog("RPT{}: unlocked {}".format(pidx, lmsg), PgLOG.LOGWRN)
|
|
1167
|
+
lcnt += 1
|
|
1168
|
+
continue
|
|
1169
|
+
if(PgDBI.pgexec("UPDATE ptrqst set locktime = {} WHERE pindex = {} AND pid = {}".format(ltime, pidx, pgrec['pid']), logact) and
|
|
1170
|
+
PgDBI.pgexec("UPDATE dsrqst set locktime = {} WHERE rindex = {}".format(ltime, pgrec['rindex']), logact) and
|
|
1171
|
+
not PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst' AND otype = 'P'".format(pidx))):
|
|
1172
|
+
PgLOG.pglog("RPT{}: time updated for {}".format(pidx, lmsg), PgLOG.LOGWRN)
|
|
1173
|
+
elif not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config':
|
|
1174
|
+
record = {'pid' : 0, 'lockhost' : ''}
|
|
1175
|
+
if PgDBI.pgupdt("ptrqst", record, "pindex = {} AND pid = {}".format(pidx, pgrec['pid']), logact):
|
|
1176
|
+
PgLOG.pglog("RPT{}: unlocked {}".format(pidx, lmsg), PgLOG.LOGWRN)
|
|
1177
|
+
lcnt += 1
|
|
1178
|
+
continue
|
|
1179
|
+
elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
|
|
1180
|
+
PgLOG.pglog("RPT{}: time NOT updated for {} of {}".format(pidx, dscheck_runtime(pgrec['locktime'], ltime), lmsg), logact)
|
|
1181
|
+
|
|
1182
|
+
RUNPIDS["{}{}".format(pgrec['lockhost'], pgrec['pid'])] = 1
|
|
1183
|
+
|
|
1184
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSRQST partitions unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1185
|
+
|
|
1186
|
+
#
|
|
1187
|
+
# check dsrqsts and purge them if done already
|
|
1188
|
+
#
|
|
1189
|
+
def purge_dsrqsts(cnd, logact = 0):
|
|
1190
|
+
|
|
1191
|
+
(sdate, stime) = PgUtil.get_date_time()
|
|
1192
|
+
cnd += "(status = 'P' AND (date_purge IS NULL OR date_purge < '{}' OR date_purge = '{}' AND time_purge < '{}')".format(sdate, sdate, stime)
|
|
1193
|
+
cnd += " OR status = 'O' AND (date_purge < '{}' OR date_purge = '{}' AND time_purge < '{}')) ORDER BY rindex".format(sdate, sdate, stime)
|
|
1194
|
+
pgrecs = PgDBI.pgmget("dsrqst", "rindex, dsid, email, specialist", cnd, logact)
|
|
1195
|
+
cnt = (len(pgrecs['rindex']) if pgrecs else 0)
|
|
1196
|
+
pgctl = {'qoptions' : "-l walltime=1:00:00"}
|
|
1197
|
+
pcnt = 0
|
|
1198
|
+
for i in range(cnt):
|
|
1199
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1200
|
+
ridx = pgrec['rindex']
|
|
1201
|
+
if PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsrqst'".format(ridx), logact): continue
|
|
1202
|
+
argv = "{} PR -RI {} -b -d".format(pgrec['dsid'], ridx)
|
|
1203
|
+
add_one_dscheck(ridx, 'R', 'dsrqst', pgrec['dsid'], 'PR', '',
|
|
1204
|
+
pgrec['specialist'], argv, pgrec['email'], pgctl, logact)
|
|
1205
|
+
|
|
1206
|
+
#
|
|
1207
|
+
# check dsrqsts and send saved email
|
|
1208
|
+
#
|
|
1209
|
+
def email_dsrqsts(cnd, logact = 0):
|
|
1210
|
+
|
|
1211
|
+
emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
|
|
1212
|
+
if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
|
|
1213
|
+
cnd += "pid = 0 AND einfo IS NOT NULL"
|
|
1214
|
+
pgrecs = PgDBI.pgmget("dsrqst", "rindex, ptcount, einfo", cnd, logact)
|
|
1215
|
+
cnt = (len(pgrecs['rindex']) if pgrecs else 0)
|
|
1216
|
+
ecnt = 0
|
|
1217
|
+
for i in range(cnt):
|
|
1218
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1219
|
+
ridx = pgrec['rindex']
|
|
1220
|
+
if PgLock.lock_request(ridx, 1) <= 0: continue
|
|
1221
|
+
einfo = verify_request_einfo(ridx, pgrec['ptcount'], pgrec['einfo'], logact)
|
|
1222
|
+
if einfo:
|
|
1223
|
+
sent = 1 if (PgLOG.send_customized_email("Rqst{}".format(ridx), einfo, emlact) and
|
|
1224
|
+
PgDBI.pgexec("UPDATE dsrqst set einfo = NULL WHERE rindex = {}".format(ridx), logact)) else -1
|
|
1225
|
+
else:
|
|
1226
|
+
sent = 0
|
|
1227
|
+
|
|
1228
|
+
PgLock.lock_request(ridx, 0)
|
|
1229
|
+
if sent == -1: break
|
|
1230
|
+
ecnt += sent
|
|
1231
|
+
|
|
1232
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSRQST emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1233
|
+
|
|
1234
|
+
#
|
|
1235
|
+
# veriy email info for partition errors
|
|
1236
|
+
# retrun None if not all partitions finished
|
|
1237
|
+
#
|
|
1238
|
+
def verify_request_einfo(ridx, ptcnt, einfo, logact = 0):
|
|
1239
|
+
|
|
1240
|
+
# no further checking if no partitionseinfo is empty
|
|
1241
|
+
if ptcnt < 2 or not einfo: return einfo
|
|
1242
|
+
# partition processes are not all done yet
|
|
1243
|
+
if PgDBI.pgget("ptrqst", "", "rindex = {} AND (pid > 0 OR status = 'R')".format(ridx), logact): return None
|
|
1244
|
+
|
|
1245
|
+
pkey = ["<PARTERR>", "<PARTCNT>"]
|
|
1246
|
+
# einfo does not contain partition error key
|
|
1247
|
+
if einfo.find(pkey[0]) < 0: return einfo
|
|
1248
|
+
einfo = re.sub(pkey[0], '', einfo)
|
|
1249
|
+
ecnt = PgDBI.pgget("ptrqst", "", "rindex = {} AND status = 'E'".format(ridx), logact)
|
|
1250
|
+
cbuf = "{} of {}".format(ecnt, ptcnt)
|
|
1251
|
+
einfo = re.sub(pkey[1], cbuf, einfo)
|
|
1252
|
+
|
|
1253
|
+
return einfo
|
|
1254
|
+
|
|
1255
|
+
#
|
|
1256
|
+
# start recording due updates to checks
|
|
1257
|
+
#
|
|
1258
|
+
def start_dsupdts(cnd, logact = 0):
|
|
1259
|
+
|
|
1260
|
+
ctime = PgUtil.curtime(1)
|
|
1261
|
+
check_dsupdt_locks(cnd, logact)
|
|
1262
|
+
if CHKHOST['chkhost']: return 0
|
|
1263
|
+
email_dsupdt_controls(cnd, logact)
|
|
1264
|
+
email_dsupdts(cnd, logact)
|
|
1265
|
+
|
|
1266
|
+
cnd += "pid = 0 and cntltime <= '{}' and action > '' AND einfo IS NULL ORDER by cntltime".format(ctime)
|
|
1267
|
+
pgrecs = PgDBI.pgmget("dcupdt", "*", cnd, logact)
|
|
1268
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
1269
|
+
ucnt = 0
|
|
1270
|
+
for i in range(cnt):
|
|
1271
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1272
|
+
if PgDBI.pgget("dscheck", "pid, lockhost", "oindex = {} AND command = 'dsupdt'".format(pgrec['cindex']), logact): continue
|
|
1273
|
+
if pgrec['pindex'] and not PgOPT.valid_data_time(pgrec): continue
|
|
1274
|
+
argv = "{} {} -CI {} -b -d".format(pgrec['dsid'], pgrec['action'], pgrec['cindex'])
|
|
1275
|
+
if not add_one_dscheck(pgrec['cindex'], 'C', "dsupdt", pgrec['dsid'], pgrec['action'],
|
|
1276
|
+
'', pgrec['specialist'], argv, None, pgrec, logact): break
|
|
1277
|
+
ucnt += 1
|
|
1278
|
+
|
|
1279
|
+
if cnt > 1: PgLOG.pglog("update {} of {} DSUPDT controls on {}".format(ucnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1280
|
+
return ucnt
|
|
1281
|
+
|
|
1282
|
+
#
|
|
1283
|
+
# check if the parent update control is finished
|
|
1284
|
+
#
|
|
1285
|
+
def parent_not_finished(pgrec):
|
|
1286
|
+
|
|
1287
|
+
freq = [0, 0, 0]
|
|
1288
|
+
ms = re.match(r'^(\d+)([YMWDH])$', pgrec['frequency'], re.I)
|
|
1289
|
+
if ms:
|
|
1290
|
+
val = int(ms.group(1))
|
|
1291
|
+
unit = ms.group(2).upper()
|
|
1292
|
+
if not val: return 0
|
|
1293
|
+
if unit == 'Y':
|
|
1294
|
+
freq[0] = val
|
|
1295
|
+
elif unit == 'M':
|
|
1296
|
+
freq[1] = val
|
|
1297
|
+
elif unit == 'W':
|
|
1298
|
+
freq[2] = 7 * val
|
|
1299
|
+
elif unit == 'D':
|
|
1300
|
+
freq[2] = val
|
|
1301
|
+
elif unit == 'H': # update frequency is hourly controlled
|
|
1302
|
+
freq.append(val)
|
|
1303
|
+
else:
|
|
1304
|
+
ms = re.match(r'^(\d+)M/(\d+)', pgrec['frequency'], re.I)
|
|
1305
|
+
if ms:
|
|
1306
|
+
val = int(ms.group(1))
|
|
1307
|
+
nf = int(ms.group(2))
|
|
1308
|
+
if nf < 2 or nf > 10 or (30%nf): return 0
|
|
1309
|
+
freq = [0, val, 0, 0, 0, 0, nf] # number of fractions in a month
|
|
1310
|
+
|
|
1311
|
+
dtime = PgUtil.adddatetime(pgrec['datatime'], freq[0], freq[1], freq[2], freq[3], freq[4], freq[5], freq[6])
|
|
1312
|
+
if PgDBI.pgget("dcupdt", "", "cindex = {} AND datatime < '{}'".format(pgrec['pindex'], dtime), PgOPT.PGOPT['extlog']):
|
|
1313
|
+
return 1
|
|
1314
|
+
else:
|
|
1315
|
+
return 0
|
|
1316
|
+
|
|
1317
|
+
#
|
|
1318
|
+
# check long procssing updates and unlock the processes that are aborted
|
|
1319
|
+
#
|
|
1320
|
+
def check_dsupdt_locks(ocnd, logact = 0):
|
|
1321
|
+
|
|
1322
|
+
ltime = int(time.time())
|
|
1323
|
+
lochost = PgLOG.PGLOG['HOSTNAME']
|
|
1324
|
+
dtime = ltime - PgSIG.PGSIG['DTIME']
|
|
1325
|
+
cnd = ocnd + "pid > 0 AND "
|
|
1326
|
+
ctime = ltime - 4*PgSIG.PGSIG['CTIME']
|
|
1327
|
+
rtime = ltime - PgSIG.PGSIG['RTIME']
|
|
1328
|
+
if CHKHOST['chkhost']:
|
|
1329
|
+
cnd += "lockhost {} AND chktime < {}".format(CHKHOST['hostcond'], dtime)
|
|
1330
|
+
else:
|
|
1331
|
+
cnd += "chktime > 0 AND (chktime < {} OR chktime < {} AND lockhost = '{}' OR chktime < {} AND lockhost = 'rda_config')".format(ctime, dtime, lochost, rtime)
|
|
1332
|
+
|
|
1333
|
+
pgrecs = PgDBI.pgmget("dcupdt", "cindex, lockhost, pid, chktime", cnd, logact)
|
|
1334
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
1335
|
+
lcnt = 0
|
|
1336
|
+
for i in range(cnt):
|
|
1337
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1338
|
+
lmsg = "{}({}) at {} on {}".format(pgrec['lockhost'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
|
|
1339
|
+
idx = pgrec['cindex']
|
|
1340
|
+
if CHKHOST['chkhost'] or pgrec['lockhost'] == lochost:
|
|
1341
|
+
if PgLock.lock_update_control(idx, 0) > 0:
|
|
1342
|
+
PgLOG.pglog("UC{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
|
|
1343
|
+
lcnt += 1
|
|
1344
|
+
continue
|
|
1345
|
+
if(PgDBI.pgexec("UPDATE dcupdt SET chktime = {} WHERE cindex = {} AND pid = {}".format(ltime, idx, pgrec['pid']), logact) and
|
|
1346
|
+
not PgDBI.pgget("dscheck", "", "oindex = {} AND command = 'dsupdt'".format(idx))):
|
|
1347
|
+
PgLOG.pglog("UC{}: time updated for {}".format(idx, lmsg), PgLOG.LOGWRN)
|
|
1348
|
+
elif not pgrec['lockhost'] or pgrec['lockhost'] == 'rda_config':
|
|
1349
|
+
record = {'pid' : 0, 'lockhost' : ''}
|
|
1350
|
+
if PgDBI.pgupdt("dcupdt", record, "cindex = {} AND pid = {}".format(idx, pgrec['pid']), logact):
|
|
1351
|
+
PgLOG.pglog("UC{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
|
|
1352
|
+
lcnt += 1
|
|
1353
|
+
continue
|
|
1354
|
+
elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
|
|
1355
|
+
PgLOG.pglog("UC{}: time NOT updated for {} of {}".format(idx, dscheck_runtime(pgrec['chktime'], ltime), lmsg), logact)
|
|
1356
|
+
|
|
1357
|
+
RUNPIDS["{}{}".format(pgrec['lockhost'], pgrec['pid'])] = 1
|
|
1358
|
+
|
|
1359
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSUPDT Controls unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1360
|
+
|
|
1361
|
+
cnd = ocnd + "pid > 0 AND locktime > 0 AND "
|
|
1362
|
+
if CHKHOST['chkhost']:
|
|
1363
|
+
cnd += "hostname {} AND locktime < {}".format(CHKHOST['hostcond'], dtime)
|
|
1364
|
+
else:
|
|
1365
|
+
cnd += "(locktime < {} OR locktime < {} AND hostname = '{}' OR locktime < {} AND hostname = 'rda_config')".format(ctime, dtime, lochost, rtime)
|
|
1366
|
+
|
|
1367
|
+
pgrecs = PgDBI.pgmget("dlupdt", "lindex, hostname, pid, locktime", cnd, logact)
|
|
1368
|
+
cnt = (len(pgrecs['lindex']) if pgrecs else 0)
|
|
1369
|
+
lcnt = 0
|
|
1370
|
+
for i in range(cnt):
|
|
1371
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
1372
|
+
lmsg = "{}({}) at {} on {}".format(pgrec['hostname'], pgrec['pid'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME'])
|
|
1373
|
+
idx = pgrec['lindex']
|
|
1374
|
+
if CHKHOST['chkhost'] or pgrec['hostname'] == lochost:
|
|
1375
|
+
if PgLock.lock_update(idx, None, 0) > 0:
|
|
1376
|
+
PgLOG.pglog("Updt{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
|
|
1377
|
+
lcnt += 1
|
|
1378
|
+
continue
|
|
1379
|
+
PgDBI.pgexec("UPDATE dlupdt SET locktime = {} WHERE lindex = {} AND pid = {}".format(ltime, idx, pgrec['pid']), logact)
|
|
1380
|
+
elif not pgrec['hostname'] or pgrec['hostname'] == 'rda_config':
|
|
1381
|
+
record = {'pid' : 0, 'hostname' : ''}
|
|
1382
|
+
if PgDBI.pgupdt("dlupdt", record, "lindex = {} AND pid = {}".format(idx, pgrec['pid']), logact):
|
|
1383
|
+
PgLOG.pglog("Updt{}: unlocked {}".format(idx, lmsg), PgLOG.LOGWRN)
|
|
1384
|
+
lcnt += 1
|
|
1385
|
+
continue
|
|
1386
|
+
elif (logact&PgLOG.EMEROL) == PgLOG.EMEROL:
|
|
1387
|
+
PgLOG.pglog("Updt{}: time NOT updated for {} of {}".format(idx, dscheck_runtime(pgrec['locktime'], ltime), lmsg), logact)
|
|
1388
|
+
|
|
1389
|
+
RUNPIDS["{}{}".format(pgrec['hostname'], pgrec['pid'])] = 1
|
|
1390
|
+
|
|
1391
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSUPDT Local Files unlocked on {}".format(lcnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1392
|
+
|
|
1393
|
+
#
|
|
1394
|
+
# check dsupdts and send saved email
|
|
1395
|
+
#
|
|
1396
|
+
def email_dsupdt_controls(cnd, logact = 0):
|
|
1397
|
+
|
|
1398
|
+
emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
|
|
1399
|
+
if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
|
|
1400
|
+
cnd += "pid = 0 AND einfo IS NOT NULL"
|
|
1401
|
+
pgrecs = PgDBI.pgmget("dcupdt", "cindex", cnd, logact)
|
|
1402
|
+
cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
1403
|
+
ecnt = 0
|
|
1404
|
+
for i in range(cnt):
|
|
1405
|
+
cidx = pgrecs['cindex'][i]
|
|
1406
|
+
if PgLock.lock_update_control(cidx, 1) <= 0: continue
|
|
1407
|
+
pgrec = PgDBI.pgget("dcupdt", "einfo", "cindex = {}".format(cidx), logact)
|
|
1408
|
+
if pgrec['einfo']:
|
|
1409
|
+
sent = 1 if (PgLOG.send_customized_email("UC{}".format(cidx), pgrec['einfo'], emlact) and
|
|
1410
|
+
PgDBI.pgexec("UPDATE dcupdt set einfo = NULL WHERE cindex = {}".format(cidx), logact)) else -1
|
|
1411
|
+
else:
|
|
1412
|
+
sent = 0
|
|
1413
|
+
|
|
1414
|
+
PgLock.lock_update_control(cidx, 0)
|
|
1415
|
+
if sent == -1: break
|
|
1416
|
+
ecnt += sent
|
|
1417
|
+
|
|
1418
|
+
if cnt > 1: PgLOG.pglog("{} of {} DSUPDT Control emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1419
|
+
|
|
1420
|
+
#
|
|
1421
|
+
# check dsupdts and send saved email
|
|
1422
|
+
#
|
|
1423
|
+
def email_dsupdts(cnd, logact = 0):
|
|
1424
|
+
|
|
1425
|
+
emlact = PgLOG.LOGWRN|PgLOG.FRCLOG
|
|
1426
|
+
if logact and (logact&PgLOG.EMEROL) == PgLOG.EMEROL: emlact |= PgLOG.EMEROL
|
|
1427
|
+
cnd += "pid = 0 AND emnote IS NOT NULL"
|
|
1428
|
+
pgrecs = PgDBI.pgmget("dlupdt", "lindex, cindex", cnd, logact)
|
|
1429
|
+
cnt = (len(pgrecs['lindex']) if pgrecs else 0)
|
|
1430
|
+
ecnt = 0
|
|
1431
|
+
for i in range(cnt):
|
|
1432
|
+
idx = pgrecs['cindex'][i]
|
|
1433
|
+
if idx > 0 and PgDBI.pgget("dcupdt", "", "cindex = {} AND pid > 0".format(idx), logact): continue
|
|
1434
|
+
idx = pgrecs['lindex'][i]
|
|
1435
|
+
if PgLock.lock_update(idx, None, 1) <= 0: continue
|
|
1436
|
+
pgrec = PgDBI.pgget("dlupdt", "emnote", "lindex = {}".format(idx), logact)
|
|
1437
|
+
if pgrec['emnote']:
|
|
1438
|
+
sent = 1 if(PgLOG.send_customized_email("Updtidx", pgrec['emnote'], emlact) and
|
|
1439
|
+
PgDBI.pgexec("UPDATE dlupdt set emnote = NULL WHERE lindex = {}".format(idx), logact)) else -1
|
|
1440
|
+
else:
|
|
1441
|
+
sent = 0
|
|
1442
|
+
|
|
1443
|
+
PgLock.lock_update(idx, None, 0)
|
|
1444
|
+
if sent == -1: break
|
|
1445
|
+
ecnt += sent
|
|
1446
|
+
|
|
1447
|
+
if cnt > 0: PgLOG.pglog("{} of {} DSUPDT emails sent on {}".format(ecnt, cnt, PgLOG.PGLOG['HOSTNAME']), PgLOG.WARNLG)
|
|
1448
|
+
|
|
1449
|
+
#
|
|
1450
|
+
# create an dscheck record for a given command
|
|
1451
|
+
#
|
|
1452
|
+
def add_one_dscheck(oindex, otype, cmd, dsid, action, workdir, specialist, argv, remail, btctl, logact = 0):
|
|
1453
|
+
|
|
1454
|
+
cidx = 0
|
|
1455
|
+
|
|
1456
|
+
if len(argv) > 100:
|
|
1457
|
+
argextra = argv[100:]
|
|
1458
|
+
argv = argv[0:100]
|
|
1459
|
+
else:
|
|
1460
|
+
argextra = None
|
|
1461
|
+
|
|
1462
|
+
record = {'command' : cmd, 'argv' : argv, 'specialist' : specialist, 'workdir' : workdir,
|
|
1463
|
+
'dsid' : dsid, 'action' : action, 'oindex' : oindex, 'otype' : otype}
|
|
1464
|
+
(record['date'], record['time']) = PgUtil.get_date_time()
|
|
1465
|
+
if argextra: record['argextra'] = argextra
|
|
1466
|
+
if 'PI' in PgOPT.params: record['pindex'] = PgOPT.params['PI'][0]
|
|
1467
|
+
if 'MC' in PgOPT.params and PgOPT.params['MC'][0] > 0: record['mcount'] = PgOPT.params['MC'][0]
|
|
1468
|
+
record.update(PgCMD.get_batch_options(btctl))
|
|
1469
|
+
|
|
1470
|
+
if cmd == 'dsrqst' and remail:
|
|
1471
|
+
record['remail'] = remail
|
|
1472
|
+
if otype == 'P':
|
|
1473
|
+
pgcnt = PgDBI.pgget("dscheck", "", "remail = '{}' AND otype = 'P'" .format(remail), logact)
|
|
1474
|
+
if pgcnt >= get_user_limit(remail)[1]: return PgLOG.FAILURE
|
|
1475
|
+
elif action != 'PR':
|
|
1476
|
+
pgcnt = PgDBI.pgget("dscheck", "", "remail = '{}' AND otype = 'R'".format(remail), logact)
|
|
1477
|
+
if pgcnt >= get_user_limit(remail)[0]: return PgLOG.FAILURE
|
|
1478
|
+
|
|
1479
|
+
if oindex and otype:
|
|
1480
|
+
pgrec = PgDBI.pgget('dscheck', '*', "oindex = {} AND otype = '{}'".format(oindex, otype), logact)
|
|
1481
|
+
else:
|
|
1482
|
+
pgrec = PgCMD.get_dscheck(cmd, argv, workdir, specialist, argextra, logact)
|
|
1483
|
+
|
|
1484
|
+
if pgrec:
|
|
1485
|
+
return PgLOG.pglog("Chk{}: {} added already {} {}".format(pgrec['cindex'], PgCMD.get_command_info(pgrec), pgrec['date'], pgrec['time']), PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
1486
|
+
|
|
1487
|
+
cidx = PgDBI.pgadd("dscheck", record, logact|PgLOG.AUTOID)
|
|
1488
|
+
if cidx:
|
|
1489
|
+
PgLOG.pglog("Chk{}: {} added {} {}".format(cidx, PgCMD.get_command_info(record), record['date'], record['time']), PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
1490
|
+
else:
|
|
1491
|
+
if oindex and otype:
|
|
1492
|
+
PgLOG.pglog("{}-{}-{}: Fail add check for {}".format(cmd, otype, oindex, specialist), PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
1493
|
+
else:
|
|
1494
|
+
PgLOG.pglog("{}: Fail add check for {}".format(cmd, specialist), PgLOG.LOGWRN|PgLOG.FRCLOG)
|
|
1495
|
+
|
|
1496
|
+
time.sleep(PgSIG.PGSIG['ETIME'])
|
|
1497
|
+
return PgLOG.FAILURE
|
|
1498
|
+
|
|
1499
|
+
return PgLOG.SUCCESS
|
|
1500
|
+
|
|
1501
|
+
#
|
|
1502
|
+
# get dscheck status
|
|
1503
|
+
#
|
|
1504
|
+
def dscheck_status(stat):
|
|
1505
|
+
|
|
1506
|
+
STATUS = {
|
|
1507
|
+
'C' : "Created",
|
|
1508
|
+
'D' : "Done",
|
|
1509
|
+
'E' : "Exit",
|
|
1510
|
+
'F' : "Finished",
|
|
1511
|
+
'H' : "Held",
|
|
1512
|
+
'I' : "Interrupted",
|
|
1513
|
+
'P' : "Pending",
|
|
1514
|
+
'Q' : "Queueing",
|
|
1515
|
+
'R' : "Run",
|
|
1516
|
+
'S' : "Suspended",
|
|
1517
|
+
}
|
|
1518
|
+
return (STATUS[stat] if stat in STATUS else "Unknown")
|
|
1519
|
+
|
|
1520
|
+
#
|
|
1521
|
+
# validate given daemon control indices
|
|
1522
|
+
#
|
|
1523
|
+
def validate_daemons():
|
|
1524
|
+
|
|
1525
|
+
if PgOPT.OPTS['DI'][2]&8: return # already validated
|
|
1526
|
+
|
|
1527
|
+
dcnt = len(PgOPT.params['DI']) if 'DI' in PgOPT.params else 0
|
|
1528
|
+
if not dcnt:
|
|
1529
|
+
if PgOPT.PGOPT['CACT'] == 'SD':
|
|
1530
|
+
if 'ND' not in PgOPT.params:
|
|
1531
|
+
PgOPT.action_error("Mode option -ND must be present to add new Daemon Control record")
|
|
1532
|
+
dcnt = PgOPT.get_max_count("HN", "CM")
|
|
1533
|
+
if dcnt > 0:
|
|
1534
|
+
PgOPT.params['DI'] = [0]*dcnt
|
|
1535
|
+
return
|
|
1536
|
+
i = 0
|
|
1537
|
+
while i < dcnt:
|
|
1538
|
+
val = PgOPT.params['DI'][i]
|
|
1539
|
+
if val:
|
|
1540
|
+
if not isinstance(val, int):
|
|
1541
|
+
if re.match(r'^(!|<|>|<>)$', val):
|
|
1542
|
+
if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0:
|
|
1543
|
+
PgOPT.action_error("Invalid condition '{}' of Daemon Control index".format(val))
|
|
1544
|
+
break
|
|
1545
|
+
PgOPT.params['DI'][i] = int(val)
|
|
1546
|
+
else:
|
|
1547
|
+
PgOPT.params['DI'][i] = 0
|
|
1548
|
+
i += 1
|
|
1549
|
+
if i >= dcnt: # normal daemon control index given
|
|
1550
|
+
for i in range(dcnt):
|
|
1551
|
+
val = PgOPT.params['DI'][i]
|
|
1552
|
+
if not val:
|
|
1553
|
+
if PgOPT.PGOPT['CACT'] != 'SD':
|
|
1554
|
+
PgOPT.action_error("Daemon Control Index 0 is not allowed\nUse Action SD with Mode option -ND to add new record")
|
|
1555
|
+
elif not PgOPT.params['ND']:
|
|
1556
|
+
PgOPT.action_error("Mode option -ND must be present to add new Daemon Control record")
|
|
1557
|
+
continue
|
|
1558
|
+
if i > 0 and val == PgOPT.params['DI'][i-1]: continue
|
|
1559
|
+
pgrec = PgDBI.pgget("dsdaemon", "specialist", "dindex = {}".format(val), PgOPT.PGOPT['extlog'])
|
|
1560
|
+
if not pgrec:
|
|
1561
|
+
PgOPT.action_error("Daemon Control Index '{}' is not in RDADB".format(val))
|
|
1562
|
+
elif(PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0 and PgOPT.params['LN'] != pgrec['specialist'] and
|
|
1563
|
+
PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']):
|
|
1564
|
+
PgOPT.action_error("{}: must be {}, owner of Daemon Control Index {}".format(PgOPT.params['LN'], pgrec['specialist'], val))
|
|
1565
|
+
else: # found none-equal condition sign
|
|
1566
|
+
pgrec = PgDBI.pgmget("dsdaemon", "DISTINCT dindex",
|
|
1567
|
+
PgDBI.get_field_condition("dindex", PgOPT.params['DI'], 0, 1), PgOPT.PGOPT['extlog'])
|
|
1568
|
+
if not pgrec: PgOPT.action_error("No Daemon Control matches given Index condition")
|
|
1569
|
+
PgOPT.params['DI'] = pgrec['dindex']
|
|
1570
|
+
|
|
1571
|
+
PgOPT.OPTS['DI'][2] |= 8 # set validated flag
|
|
1572
|
+
|
|
1573
|
+
#
|
|
1574
|
+
# validate given check indices
|
|
1575
|
+
#
|
|
1576
|
+
def validate_checks():
|
|
1577
|
+
|
|
1578
|
+
if (PgOPT.OPTS['CI'][2]&8) == 8: return # already validated
|
|
1579
|
+
|
|
1580
|
+
if 'CI' in PgOPT.params:
|
|
1581
|
+
cnt = len(PgOPT.params['CI'])
|
|
1582
|
+
i = 0
|
|
1583
|
+
while i < cnt:
|
|
1584
|
+
val = PgOPT.params['CI'][i]
|
|
1585
|
+
if val:
|
|
1586
|
+
if not isinstance(val, int):
|
|
1587
|
+
if re.match(r'^(!|<|>|<>)$', val):
|
|
1588
|
+
if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0:
|
|
1589
|
+
PgOPT.action_error("Invalid condition '{}' of Check index".format(val))
|
|
1590
|
+
break
|
|
1591
|
+
PgOPT.params['CI'][i] = int(val)
|
|
1592
|
+
else:
|
|
1593
|
+
PgOPT.params['CI'][i] = 0
|
|
1594
|
+
i += 1
|
|
1595
|
+
if i >= cnt: # normal check index given
|
|
1596
|
+
for i in range(cnt):
|
|
1597
|
+
val = PgOPT.params['CI'][i]
|
|
1598
|
+
if not val: PgOPT.action_error("Check Index 0 is not allowed")
|
|
1599
|
+
if i > 0 and val == PgOPT.params['CI'][i-1]: continue
|
|
1600
|
+
pgrec = PgDBI.pgget("dscheck", "specialist", "cindex = {}".format(val), PgOPT.PGOPT['extlog'])
|
|
1601
|
+
if not pgrec:
|
|
1602
|
+
PgOPT.action_error("Check Index '{}' is not in RDADB".format(val))
|
|
1603
|
+
elif(PgOPT.OPTS[PgOPT.PGOPT['CACT']][2] > 0 and PgOPT.params['LN'] != pgrec['specialist'] and
|
|
1604
|
+
PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']):
|
|
1605
|
+
PgOPT.action_error("{}: must be {}, owner of Check Index {}".format(PgOPT.params['LN'], pgrec['specialist'], val))
|
|
1606
|
+
else: # found none-equal condition sign
|
|
1607
|
+
pgrec = PgDBI.pgmget("dscheck", "cindex", PgDBI.get_field_condition("cindex", PgOPT.params['CI'], 0, 1), PgOPT.PGOPT['extlog'])
|
|
1608
|
+
if not pgrec: PgOPT.action_error("No Check matches given Index condition")
|
|
1609
|
+
PgOPT.params['CI'] = pgrec['cindex']
|
|
1610
|
+
|
|
1611
|
+
PgOPT.OPTS['CI'][2] |= 8 # set validated flag
|
|
1612
|
+
|
|
1613
|
+
#
|
|
1614
|
+
# validate given dataset IDs
|
|
1615
|
+
#
|
|
1616
|
+
def validate_datasets():
|
|
1617
|
+
|
|
1618
|
+
if PgOPT.OPTS['DS'][2]&8: return # already validated
|
|
1619
|
+
|
|
1620
|
+
dcnt = len(PgOPT.params['DS'])
|
|
1621
|
+
for i in range(dcnt):
|
|
1622
|
+
dsid = PgOPT.params['DS'][i]
|
|
1623
|
+
if not dsid: PgOPT.action_error("Empty Dataset ID is not allowed")
|
|
1624
|
+
if i and dsid == PgOPT.params['DS'][i-1]: continue
|
|
1625
|
+
if not PgDBI.pgget("dataset", "", "dsid = '{}'".format(dsid), PgOPT.PGOPT['extlog']):
|
|
1626
|
+
PgOPT.action_error("Dataset '{}' is not in RDADB".format(dsid))
|
|
1627
|
+
|
|
1628
|
+
PgOPT.OPTS['DS'][2] |= 8 # set validated flag
|