rda-python-dscheck 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_dscheck/PgCheck.py +1628 -0
- rda_python_dscheck/__init__.py +1 -0
- rda_python_dscheck/dscheck.py +671 -0
- rda_python_dscheck/dscheck.usg +737 -0
- rda_python_dscheck-1.0.1.dist-info/LICENSE +21 -0
- rda_python_dscheck-1.0.1.dist-info/METADATA +16 -0
- rda_python_dscheck-1.0.1.dist-info/RECORD +10 -0
- rda_python_dscheck-1.0.1.dist-info/WHEEL +5 -0
- rda_python_dscheck-1.0.1.dist-info/entry_points.txt +2 -0
- rda_python_dscheck-1.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title: dscheck
|
|
6
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date: 09/28/2020
|
|
8
|
+
# 2025-02-05 transferred to package rda_python_dscheck from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# Purpose: python utility program to check and start command saved in dscheck
|
|
11
|
+
#
|
|
12
|
+
# Github: https://github.com/NCAR/rda-python-dscheck.git
|
|
13
|
+
#
|
|
14
|
+
##################################################################################
|
|
15
|
+
#
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
from os import path as op
|
|
21
|
+
from rda_python_common import PgLOG
|
|
22
|
+
from rda_python_common import PgCMD
|
|
23
|
+
from rda_python_common import PgSIG
|
|
24
|
+
from rda_python_common import PgLock
|
|
25
|
+
from rda_python_common import PgUtil
|
|
26
|
+
from rda_python_common import PgFile
|
|
27
|
+
from rda_python_common import PgOPT
|
|
28
|
+
from rda_python_common import PgDBI
|
|
29
|
+
from . import PgCheck
|
|
30
|
+
|
|
31
|
+
ALLCNT = 0 # global counting variables
|
|
32
|
+
|
|
33
|
+
#
|
|
34
|
+
# main function to run dscheck
|
|
35
|
+
#
|
|
36
|
+
def main():
|
|
37
|
+
|
|
38
|
+
aname = 'dscheck'
|
|
39
|
+
PgOPT.parsing_input(aname)
|
|
40
|
+
PgCheck.check_dscheck_options(PgOPT.PGOPT['CACT'], aname)
|
|
41
|
+
start_action()
|
|
42
|
+
|
|
43
|
+
if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2]: PgLOG.cmdlog() # log end time if not getting action
|
|
44
|
+
|
|
45
|
+
PgLOG.pgexit(0)
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
# start action of dscheck
|
|
49
|
+
#
|
|
50
|
+
def start_action():
|
|
51
|
+
|
|
52
|
+
global ALLCNT
|
|
53
|
+
if PgOPT.PGOPT['CACT'] == 'AC':
|
|
54
|
+
add_check_info()
|
|
55
|
+
elif PgOPT.PGOPT['CACT'] == 'CH':
|
|
56
|
+
check_host_connection()
|
|
57
|
+
elif PgOPT.PGOPT['CACT'] == 'DL':
|
|
58
|
+
if 'CI' in PgOPT.params:
|
|
59
|
+
ALLCNT = len(PgOPT.params['CI'])
|
|
60
|
+
delete_check_info()
|
|
61
|
+
if 'DI' in PgOPT.params:
|
|
62
|
+
ALLCNT = len(PgOPT.params['DI'])
|
|
63
|
+
delete_daemon_info()
|
|
64
|
+
elif PgOPT.PGOPT['CACT'] == 'EC':
|
|
65
|
+
email_check_info()
|
|
66
|
+
elif PgOPT.PGOPT['CACT'] == 'GC':
|
|
67
|
+
get_check_info()
|
|
68
|
+
elif PgOPT.PGOPT['CACT'] == 'GD':
|
|
69
|
+
get_daemon_info()
|
|
70
|
+
elif PgOPT.PGOPT['CACT'] == "IC":
|
|
71
|
+
ALLCNT = len(PgOPT.params['CI'])
|
|
72
|
+
interrupt_dschecks()
|
|
73
|
+
elif PgOPT.PGOPT['CACT'] == 'PC':
|
|
74
|
+
PgCMD.set_batch_options(PgOPT.params, 2, 1)
|
|
75
|
+
if 'DM' in PgOPT.params:
|
|
76
|
+
ALLCNT = 0
|
|
77
|
+
handle_dschecks()
|
|
78
|
+
else:
|
|
79
|
+
process_dschecks()
|
|
80
|
+
elif PgOPT.PGOPT['CACT'] == 'SD':
|
|
81
|
+
ALLCNT = len(PgOPT.params['DI'])
|
|
82
|
+
set_daemon_info()
|
|
83
|
+
elif PgOPT.PGOPT['CACT'] == 'SO':
|
|
84
|
+
PgCMD.set_batch_options(PgOPT.params, 2, 1)
|
|
85
|
+
process_dscheck_options()
|
|
86
|
+
elif PgOPT.PGOPT['CACT'] == "UL":
|
|
87
|
+
ALLCNT = len(PgOPT.params['CI']) if 'CI' in PgOPT.params else 0
|
|
88
|
+
unlock_checks()
|
|
89
|
+
|
|
90
|
+
#
|
|
91
|
+
# add a check for customized command
|
|
92
|
+
#
|
|
93
|
+
def add_check_info():
|
|
94
|
+
|
|
95
|
+
cmd = PgOPT.params['CM'].pop(0)
|
|
96
|
+
argstr = PgLOG.argv_to_string(PgOPT.params['CM'], 0)
|
|
97
|
+
if 'AV' in PgOPT.params:
|
|
98
|
+
if argstr: argstr += " "
|
|
99
|
+
argstr += PgLOG.argv_to_string(PgOPT.params['AV'], 0)
|
|
100
|
+
dsid = PgOPT.params['DS'][0] if 'DS' in PgOPT.params else None
|
|
101
|
+
action = PgOPT.params['AN'][0] if 'AN' in PgOPT.params else None
|
|
102
|
+
PgCMD.set_batch_options(PgOPT.params, 2, 1)
|
|
103
|
+
specialist = PgOPT.params['SN'][0] if 'SN' in PgOPT.params else PgOPT.params['LN']
|
|
104
|
+
workdir = PgOPT.params['WD'][0] if 'WD' in PgOPT.params else PgLOG.PGLOG['CURDIR']
|
|
105
|
+
PgCheck.add_one_dscheck(0, '', cmd, dsid, action, workdir, specialist,
|
|
106
|
+
argstr, None, None, PgOPT.PGOPT['extlog'])
|
|
107
|
+
|
|
108
|
+
#
|
|
109
|
+
# delete dscheck daemon controls for given daemon control indices
|
|
110
|
+
#
|
|
111
|
+
def delete_daemon_info():
|
|
112
|
+
|
|
113
|
+
s = 's' if ALLCNT > 1 else ''
|
|
114
|
+
PgLOG.pglog("Delete {} dscheck daemon control{} ...".format(ALLCNT, s), PgLOG.WARNLG)
|
|
115
|
+
|
|
116
|
+
delcnt = 0
|
|
117
|
+
for i in range(ALLCNT):
|
|
118
|
+
delcnt += PgDBI.pgdel("dsdaemon", "dindex = {}".format(PgOPT.params['DI'][i]), PgOPT.PGOPT['extlog'])
|
|
119
|
+
PgLOG.pglog("{} of {} dscheck daemon control{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog'])
|
|
120
|
+
|
|
121
|
+
#
|
|
122
|
+
# delete checks for given check indices
|
|
123
|
+
#
|
|
124
|
+
def delete_check_info():
|
|
125
|
+
|
|
126
|
+
s = 's' if ALLCNT > 1 else ''
|
|
127
|
+
PgLOG.pglog("Delete {} dscheck record{} ...".format(ALLCNT, s), PgLOG.WARNLG)
|
|
128
|
+
|
|
129
|
+
delcnt = 0
|
|
130
|
+
for i in range(ALLCNT):
|
|
131
|
+
cidx = PgLock.lock_dscheck(PgOPT.params['CI'][i], 2, PgOPT.PGOPT['extlog'])
|
|
132
|
+
if cidx <= 0: continue
|
|
133
|
+
delcnt += PgCMD.delete_dscheck(None, "cindex = {}".format(cidx), PgOPT.PGOPT['extlog'])
|
|
134
|
+
PgLOG.pglog("{} of {} check record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog'])
|
|
135
|
+
|
|
136
|
+
#
|
|
137
|
+
# email notice of check status for specialist
|
|
138
|
+
#
|
|
139
|
+
def email_check_info():
|
|
140
|
+
|
|
141
|
+
cnd = PgOPT.get_hash_condition("dscheck", None, None, 1)
|
|
142
|
+
pgrecs = PgDBI.pgmget("dscheck", "*", cnd + " ORDER BY cindex", PgOPT.PGOPT['extlog'])
|
|
143
|
+
|
|
144
|
+
allcnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
145
|
+
if not allcnt: return PgLOG.pglog("{}: No Check Information Found to send email for {}".format(PgLOG.PGLOG['CURUID'], cnd), PgLOG.LOGWRN)
|
|
146
|
+
if allcnt > 1:
|
|
147
|
+
s = 's'
|
|
148
|
+
ss = "are"
|
|
149
|
+
else:
|
|
150
|
+
s = ''
|
|
151
|
+
ss = "is"
|
|
152
|
+
subject = "{} active Check Record{}".format(allcnt, s)
|
|
153
|
+
mbuf = "{} {} listed:\n".format(subject, ss)
|
|
154
|
+
pgrecs = {'status' : get_check_status(pgrecs, allcnt)}
|
|
155
|
+
|
|
156
|
+
for i in range(allcnt):
|
|
157
|
+
if i > 0: mbuf += PgLOG.PGLOG['SEPLINE']
|
|
158
|
+
mbuf += build_check_message(PgUtil.onerecord(pgrecs, i))
|
|
159
|
+
|
|
160
|
+
if 'CC' in PgOPT.params: PgLOG.add_carbon_copy(PgOPT.params['CC'])
|
|
161
|
+
subject += " found"
|
|
162
|
+
PgLOG.send_email(subject, PgOPT.params['LN'], mbuf)
|
|
163
|
+
PgLOG.pglog("Email sent to {} With Subject '{}'".format(PgOPT.params['LN'], subject), PgLOG.LOGWRN)
|
|
164
|
+
|
|
165
|
+
#
|
|
166
|
+
# build email message for a given check record
|
|
167
|
+
#
|
|
168
|
+
def build_check_message(pgrec):
|
|
169
|
+
|
|
170
|
+
msg = "Check Index: {}\nCommand: {} {}".format(pgrec['cindex'], pgrec['command'], pgrec['argv'])
|
|
171
|
+
if pgrec['argextra']: msg += PgLOG.break_long_string(pgrec['argextra'], 100, "...", 1)
|
|
172
|
+
msg += ("\nWork Directory: {}\n".format(pgrec['workdir']) +
|
|
173
|
+
"Initial Execution: {} {} byb {}\n".format(pgrec['date'], pgrec['time'], pgrec['specialist']) +
|
|
174
|
+
"Current Status: {}\n".format(pgrec['status']))
|
|
175
|
+
if pgrec['errmsg']:
|
|
176
|
+
msg += "Error Message: {}\n".format(pgrec['errmsg'])
|
|
177
|
+
elif not pgrec['pid']:
|
|
178
|
+
msg += "Error Message: Aborted abnormally\n";
|
|
179
|
+
|
|
180
|
+
return msg
|
|
181
|
+
|
|
182
|
+
#
|
|
183
|
+
# get dscheck daemon control information
|
|
184
|
+
#
|
|
185
|
+
def get_daemon_info():
|
|
186
|
+
|
|
187
|
+
tname = "dsdaemon"
|
|
188
|
+
hash = PgOPT.TBLHASH[tname]
|
|
189
|
+
PgLOG.pglog("Get dscheck daemon control information from RDADB ...", PgLOG.WARNLG)
|
|
190
|
+
|
|
191
|
+
oflds = lens = fnames = None
|
|
192
|
+
if 'FN' in PgOPT.params: fnames = PgOPT.params['FN']
|
|
193
|
+
fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT[tname])
|
|
194
|
+
onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "I"
|
|
195
|
+
qnames = fnames + PgOPT.append_order_fields(onames, fnames, tname)
|
|
196
|
+
condition = PgOPT.get_hash_condition(tname, None, None, 1);
|
|
197
|
+
if 'ON' in PgOPT.params and 'OB' in PgOPT.params:
|
|
198
|
+
oflds = PgOPT.append_order_fields(onames, None, tname)
|
|
199
|
+
else:
|
|
200
|
+
condition += PgOPT.get_order_string(onames, tname)
|
|
201
|
+
|
|
202
|
+
pgrecs = PgDBI.pgmget(tname, PgOPT.get_string_fields(qnames, tname), condition, PgOPT.PGOPT['extlog'])
|
|
203
|
+
if pgrecs:
|
|
204
|
+
if 'OF' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash)
|
|
205
|
+
if oflds: pgrecs = PgUtil.sorthash(pgrecs, fnames, hash, PgOPT.params['OB'])
|
|
206
|
+
|
|
207
|
+
PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n")
|
|
208
|
+
if pgrecs:
|
|
209
|
+
cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens)
|
|
210
|
+
s = 's' if cnt > 1 else ''
|
|
211
|
+
PgLOG.pglog("{} daemon control{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog'])
|
|
212
|
+
else:
|
|
213
|
+
PgLOG.pglog("No daemon control information retrieved", PgOPT.PGOPT['wrnlog'])
|
|
214
|
+
|
|
215
|
+
#
|
|
216
|
+
# get check information
|
|
217
|
+
#
|
|
218
|
+
def get_check_info():
|
|
219
|
+
|
|
220
|
+
tname = 'dscheck'
|
|
221
|
+
hash = PgOPT.TBLHASH[tname]
|
|
222
|
+
PgLOG.pglog("Get check information from RDADB ...", PgLOG.WARNLG)
|
|
223
|
+
|
|
224
|
+
lens = oflds = fnames = None
|
|
225
|
+
if 'FN' in PgOPT.params: fnames = PgOPT.params['FN']
|
|
226
|
+
fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['chkall'])
|
|
227
|
+
onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "I"
|
|
228
|
+
condition = PgOPT.get_hash_condition(tname, None, None, 1);
|
|
229
|
+
if 'ON' in PgOPT.params and 'OB' in PgOPT.params:
|
|
230
|
+
oflds = PgOPT.append_order_fields(onames, None, tname)
|
|
231
|
+
else:
|
|
232
|
+
condition += PgOPT.get_order_string(onames, tname)
|
|
233
|
+
|
|
234
|
+
pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog'])
|
|
235
|
+
if pgrecs:
|
|
236
|
+
if 'CS' in PgOPT.params:
|
|
237
|
+
pgrecs['status'] = get_check_status(pgrecs)
|
|
238
|
+
if fnames.find('U') < 0: fnames == 'U'
|
|
239
|
+
if 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash)
|
|
240
|
+
if oflds: pgrecs = PgUtil.sorthash(pgrecs, oflds, hash, PgOPT.params['OB'])
|
|
241
|
+
|
|
242
|
+
PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n")
|
|
243
|
+
if pgrecs:
|
|
244
|
+
cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens)
|
|
245
|
+
s = 's' if cnt > 1 else ''
|
|
246
|
+
PgLOG.pglog("{} check record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog'])
|
|
247
|
+
else:
|
|
248
|
+
PgLOG.pglog("No check information retrieved", PgOPT.PGOPT['wrnlog'])
|
|
249
|
+
|
|
250
|
+
#
|
|
251
|
+
# add or modify dscheck daemon control information
|
|
252
|
+
#
|
|
253
|
+
def set_daemon_info():
|
|
254
|
+
|
|
255
|
+
tname = "dsdaemon"
|
|
256
|
+
hash = PgOPT.TBLHASH[tname]
|
|
257
|
+
s = 's' if ALLCNT > 1 else ''
|
|
258
|
+
PgLOG.pglog("Set information of {} dscheck daemon control{} ...".format(ALLCNT, s), PgLOG.WARNLG)
|
|
259
|
+
|
|
260
|
+
addcnt = modcnt = 0
|
|
261
|
+
flds = PgOPT.get_field_keys(tname, None, 'I')
|
|
262
|
+
PgOPT.validate_multiple_values(tname, ALLCNT, flds)
|
|
263
|
+
|
|
264
|
+
for i in range(ALLCNT):
|
|
265
|
+
didx = PgOPT.params['DI'][i] if 'DI' in PgOPT.params else 0
|
|
266
|
+
if didx > 0:
|
|
267
|
+
cnd = "dindex = {}".format(didx)
|
|
268
|
+
pgrec = PgDBI.pgget(tname, "*", cnd, PgOPT.PGOPT['extlog'])
|
|
269
|
+
if not pgrec: PgOPT.action_error("Miss daemon record for " + cnd, 'SD')
|
|
270
|
+
else:
|
|
271
|
+
pgrec = None
|
|
272
|
+
|
|
273
|
+
record = PgOPT.build_record(flds, pgrec, tname, i)
|
|
274
|
+
if record:
|
|
275
|
+
if 'priority' in record and (record['priority'] < 0 or record['priority'] > 10):
|
|
276
|
+
PgOPT.action_error("{}: Priority value must in range 0(highest) - 10(lowest)".format(record['priority']), 'SD')
|
|
277
|
+
|
|
278
|
+
if pgrec:
|
|
279
|
+
modcnt += PgDBI.pgupdt(tname, record, cnd, PgOPT.PGOPT['extlog'])
|
|
280
|
+
else:
|
|
281
|
+
if 'specialist' not in record and PgOPT.params['LN'] != PgLOG.PGLOG['RDAUSER']: record['specialist'] = PgOPT.params['LN']
|
|
282
|
+
didx = PgDBI.pgadd(tname, record, PgOPT.PGOPT['extlog']|PgLOG.AUTOID)
|
|
283
|
+
if didx:
|
|
284
|
+
PgLOG.pglog("Daemon Control Index {} added".format(didx), PgOPT.PGOPT['wrnlog'])
|
|
285
|
+
addcnt += 1
|
|
286
|
+
|
|
287
|
+
PgLOG.pglog("{}/{} of {} daemon control{} added/modified in RDADB!".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog'])
|
|
288
|
+
|
|
289
|
+
#
|
|
290
|
+
# expand check status info
|
|
291
|
+
#
|
|
292
|
+
def get_check_status(pgrecs, cnt = 0):
|
|
293
|
+
|
|
294
|
+
if not cnt: cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
295
|
+
stats = [None]*cnt
|
|
296
|
+
for i in range(cnt):
|
|
297
|
+
pgrec = PgUtil.onerecord(pgrecs, i)
|
|
298
|
+
if pgrec['pid']:
|
|
299
|
+
percent = complete_percentage(pgrec)
|
|
300
|
+
runhost = ""
|
|
301
|
+
if percent < 0:
|
|
302
|
+
stats[i] = "Pending"
|
|
303
|
+
else:
|
|
304
|
+
stats[i] = get_execution_string(pgrec['status'], pgrec['tcount'])
|
|
305
|
+
rtime = PgCheck.dscheck_runtime(pgrec['stttime'])
|
|
306
|
+
if rtime: stats[i] += " {}".format(rtime)
|
|
307
|
+
if percent > 0: stats[i] += ", {}% done".format(percent)
|
|
308
|
+
if pgrec['runhost']: runhost = pgrec['runhost']
|
|
309
|
+
stats[i] += PgLock.lock_process_info(pgrec['pid'], pgrec['lockhost'], runhost)
|
|
310
|
+
else:
|
|
311
|
+
stats[i] = PgCheck.dscheck_status(pgrec['status'])
|
|
312
|
+
if pgrec['status'] == 'D' or pgrec['status'] == 'P':
|
|
313
|
+
runhost = (pgrec['runhost'] if pgrec['runhost'] else pgrec['lockhost'])
|
|
314
|
+
if runhost: stats[i] += " on " + runhost
|
|
315
|
+
elif pgrec['status'] == 'C' and pgrec['pindex']:
|
|
316
|
+
stats[i] = "Wait on CHK {}".format(pgrec['pindex'])
|
|
317
|
+
|
|
318
|
+
return stats
|
|
319
|
+
|
|
320
|
+
#
|
|
321
|
+
# get the percentage of the check job done
|
|
322
|
+
#
|
|
323
|
+
def complete_percentage(check):
|
|
324
|
+
|
|
325
|
+
percent = 0
|
|
326
|
+
|
|
327
|
+
if check['bid'] and not check['stttime']:
|
|
328
|
+
percent = -1
|
|
329
|
+
elif check['fcount'] > 0 and check['dcount']:
|
|
330
|
+
percent = int(100*check['dcount']/check['fcount'])
|
|
331
|
+
elif check['command'] == "dsrqst" and check['oindex']:
|
|
332
|
+
if check['otype'] == 'P':
|
|
333
|
+
percent = get_partition_percentage(check['oindex'])
|
|
334
|
+
else:
|
|
335
|
+
percent = get_dsrqst_percentage(check['oindex'])
|
|
336
|
+
|
|
337
|
+
return (percent if percent < 100 else 99)
|
|
338
|
+
|
|
339
|
+
#
|
|
340
|
+
# get a request percentage finished
|
|
341
|
+
#
|
|
342
|
+
def get_dsrqst_percentage(ridx):
|
|
343
|
+
|
|
344
|
+
rcnd = "rindex = {}".format(ridx)
|
|
345
|
+
pgrqst = PgDBI.pgget("dsrqst", "fcount, pcount", rcnd)
|
|
346
|
+
if pgrqst:
|
|
347
|
+
fcnt = pgrqst['fcount'] if pgrqst['fcount'] else 0
|
|
348
|
+
if fcnt < 1: fcnt = PgDBI.pgget("wfrqst", "", rcnd)
|
|
349
|
+
if fcnt > 0:
|
|
350
|
+
dcnt = pgrqst['pcount'] if pgrqst['pcount'] else 0
|
|
351
|
+
if dcnt < 1: dcnt = PgDBI.pgget("wfrqst", "", rcnd + " AND status = 'O'")
|
|
352
|
+
if dcnt > 0:
|
|
353
|
+
percent = int(100*dcnt/fcnt)
|
|
354
|
+
if percent > 99: percent = 99
|
|
355
|
+
return percent
|
|
356
|
+
return 0
|
|
357
|
+
|
|
358
|
+
#
|
|
359
|
+
# get a partition percentage finished
|
|
360
|
+
#
|
|
361
|
+
def get_partition_percentage(pidx, cidx = 0):
|
|
362
|
+
|
|
363
|
+
pcnd = "pindex = {}".format(pidx)
|
|
364
|
+
pgrec = PgDBI.pgget('ptrqst', "fcount", pcnd)
|
|
365
|
+
if pgrec:
|
|
366
|
+
fcnt = pgrec['fcount'] if pgrec['fcount'] else 0
|
|
367
|
+
if fcnt < 1: fcnt = PgDBI.pgget("wfrqst", "", pcnd)
|
|
368
|
+
if fcnt > 0:
|
|
369
|
+
dcnt = PgDBI.pgget("wfrqst", "", pcnd + " AND status = 'O'")
|
|
370
|
+
if dcnt > 0:
|
|
371
|
+
percent = int(100*dcnt/fcnt)
|
|
372
|
+
if percent > 99: percent = 99
|
|
373
|
+
return percent
|
|
374
|
+
return 0
|
|
375
|
+
|
|
376
|
+
#
|
|
377
|
+
# get excecution string for give try count
|
|
378
|
+
#
|
|
379
|
+
def get_execution_string(stat, trycnt = 0):
|
|
380
|
+
|
|
381
|
+
str = PgCheck.dscheck_status(stat)
|
|
382
|
+
if trycnt > 1: str += "({})".format(PgLOG.int2order(trycnt))
|
|
383
|
+
|
|
384
|
+
return str
|
|
385
|
+
|
|
386
|
+
#
|
|
387
|
+
# interrupt checks for given dscheck indices
|
|
388
|
+
#
|
|
389
|
+
def interrupt_dschecks():
|
|
390
|
+
|
|
391
|
+
s = 's' if ALLCNT > 1 else ''
|
|
392
|
+
delcnt = 0
|
|
393
|
+
for i in range(ALLCNT):
|
|
394
|
+
cidx = PgOPT.params['CI'][i]
|
|
395
|
+
cnd = "cindex = {}".format(cidx)
|
|
396
|
+
cstr = "Check Index {}".format(cidx)
|
|
397
|
+
pgrec = PgDBI.pgget("dscheck", "*", cnd, PgOPT.PGOPT['extlog'])
|
|
398
|
+
if not pgrec: PgLOG.pglog(cstr +": NOT in RDADB", PgOPT.PGOPT['extlog'])
|
|
399
|
+
pid = pgrec['pid']
|
|
400
|
+
if pid == 0:
|
|
401
|
+
PgLOG.pglog(cstr + ": Check is not under process; no interruption", PgOPT.PGOPT['wrnlog'])
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
host = pgrec['lockhost']
|
|
405
|
+
if not PgFile.local_host_action(host, "interrupt check", cstr, PgOPT.PGOPT['errlog']): continue
|
|
406
|
+
|
|
407
|
+
opts = "-h {} -p {}".format(host, pid)
|
|
408
|
+
buf = PgLOG.pgsystem("rdaps " + opts, PgLOG.LOGWRN, 20) # 21 = 4 + 16
|
|
409
|
+
if buf:
|
|
410
|
+
ms = re.match(r'^\s*(\w+)\s+', buf)
|
|
411
|
+
if ms:
|
|
412
|
+
uid = ms.group(1)
|
|
413
|
+
if uid != PgOPT.params['LN']:
|
|
414
|
+
PgLOG.pglog("{}: login name '{}'; must be '{}' to interrupt".format(cstr, PgOPT.params['LN'], uid), PgOPT.PGOPT['wrnlog'])
|
|
415
|
+
continue
|
|
416
|
+
if 'FI' not in PgOPT.params:
|
|
417
|
+
PgLOG.pglog("{}: locked by {}/{}; must add Mode option -FI (-ForceInterrupt) to interrupt".format(cstr, pid, host), PgOPT.PGOPT['wrnlog'])
|
|
418
|
+
continue
|
|
419
|
+
if not PgLOG.pgsystem("rdakill " + opts, PgLOG.LOGWRN, 7):
|
|
420
|
+
PgLOG.pglog("{}: Failed to interrupt Check locked by {}/{}".format(cstr, pid, host), PgOPT.PGOPT['errlog'])
|
|
421
|
+
continue
|
|
422
|
+
else:
|
|
423
|
+
PgLOG.pglog("{}: check process stopped for {}/{}".format(cstr, pid, host), PgOPT.PGOPT['wrnlog'])
|
|
424
|
+
|
|
425
|
+
pgrec = PgDBI.pgget("dscheck", "*", cnd, PgOPT.PGOPT['extlog'])
|
|
426
|
+
if not pgrec['pid']:
|
|
427
|
+
if PgLock.lock_dscheck(cidx, 1, PgOPT.PGOPT['extlog']) <= 0: continue
|
|
428
|
+
elif pid != pgrec['pid'] or host != pgrec['lockhost']:
|
|
429
|
+
PgLOG.pglog("{}: Check is relocked by {}/{}".format(cstr, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['errlog'])
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
pgrec['status'] = 'I'
|
|
433
|
+
PgCMD.delete_dscheck(pgrec, None, PgOPT.PGOPT['extlog'])
|
|
434
|
+
if pgrec['command'] == 'dsupdt':
|
|
435
|
+
if pgrec['oindex']:
|
|
436
|
+
cnd = "cindex = {} AND pid = {} AND ".format(pgrec['oindex'], pid)
|
|
437
|
+
if PgDBI.pgexec("UPDATE dcupdt set pid = 0 WHERE {}lockhost = '{}'".format(cnd, host), PgOPT.PGOPT['extlog']):
|
|
438
|
+
PgLOG.pglog("Update Control Index {} unlocked".format(pgrec['oindex']), PgLOG.LOGWRN)
|
|
439
|
+
else:
|
|
440
|
+
cnd = "dsid = '{}' AND pid = {} AND ".format(pgrec['dsid'], pid)
|
|
441
|
+
|
|
442
|
+
dlupdt = PgDBI.pgget("dlupdt", "lindex", "{}hostname = '{}'".format(cnd , host))
|
|
443
|
+
if dlupdt and PgDBI.pgexec("UPDATE dlupdt set pid = 0 WHERE lindex = {}".format(dlupdt['lindex']), PgOPT.PGOPT['extlog']):
|
|
444
|
+
PgLOG.pglog("Update Local File Index {} unlocked".format(dlupdt['lindex']), PgLOG.LOGWRN)
|
|
445
|
+
|
|
446
|
+
elif pgrec['command'] == 'dsrqst':
|
|
447
|
+
record = {'status' : 'I', 'pid' : 0}
|
|
448
|
+
if pgrec['otype'] == 'P':
|
|
449
|
+
table = "ptrqst"
|
|
450
|
+
field = "pindex"
|
|
451
|
+
msg = "Request Partition Index"
|
|
452
|
+
else:
|
|
453
|
+
table = "dsrqst"
|
|
454
|
+
field = "rindex"
|
|
455
|
+
msg = "Request Index"
|
|
456
|
+
|
|
457
|
+
if pgrec['oindex']:
|
|
458
|
+
cnd = "{} = {} AND pid = {} AND lockhost = '{}'".format(field, pgrec['oindex'], pid, host)
|
|
459
|
+
else:
|
|
460
|
+
cnd = "dsid = '{}' AND pid = {} AND lockhost = '{}'".format(pgrec['dsid'], pid, host)
|
|
461
|
+
|
|
462
|
+
if PgDBI.pgupdt(table, record, cnd, PgOPT.PGOPT['extlog']):
|
|
463
|
+
PgLOG.pglog("{} {} unlocked".format(msg, pgrec['oindex']), PgLOG.LOGWRN)
|
|
464
|
+
delcnt += 1
|
|
465
|
+
|
|
466
|
+
if ALLCNT > 1: PgLOG.pglog("{} of {} check{} interrupted".format(delcnt, ALLCNT, s), PgLOG.LOGWRN)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
#
|
|
470
|
+
# unlock checks for given check indices
|
|
471
|
+
#
|
|
472
|
+
def unlock_checks():
|
|
473
|
+
|
|
474
|
+
if ALLCNT > 0:
|
|
475
|
+
s = 's' if ALLCNT > 1 else ''
|
|
476
|
+
PgLOG.pglog("Unlock {} check{} ...".format(ALLCNT, s), PgLOG.WARNLG)
|
|
477
|
+
modcnt = 0
|
|
478
|
+
for cidx in PgOPT.params['CI']:
|
|
479
|
+
pgrec = PgDBI.pgget("dscheck", "pid, lockhost", "cindex = {}".format(cidx), PgOPT.PGOPT['extlog'])
|
|
480
|
+
if not pgrec:
|
|
481
|
+
PgLOG.pglog("Check {}: Not exists".format(cidx), PgOPT.PGOPT['errlog'])
|
|
482
|
+
elif not pgrec['pid']:
|
|
483
|
+
PgLOG.pglog("Check {}: Not locked".format(cidx), PgOPT.PGOPT['wrnlog'])
|
|
484
|
+
elif PgLock.lock_dscheck(cidx, -1, PgOPT.PGOPT['extlog']) > 0:
|
|
485
|
+
modcnt += 1
|
|
486
|
+
PgLOG.pglog("Check {}: Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog'])
|
|
487
|
+
elif(PgFile.check_host_down(None, pgrec['lockhost']) and
|
|
488
|
+
PgLock.lock_dscheck(cidx, -2, PgOPT.PGOPT['extlog']) > 0):
|
|
489
|
+
modcnt += 1
|
|
490
|
+
PgLOG.pglog("Check {}: Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog'])
|
|
491
|
+
else:
|
|
492
|
+
PgLOG.pglog("Check {}: Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog'])
|
|
493
|
+
|
|
494
|
+
if ALLCNT > 1: PgLOG.pglog("{} of {} check{} unlocked from RDADB".format(modcnt, ALLCNT, s), PgLOG.LOGWRN)
|
|
495
|
+
else:
|
|
496
|
+
cnd = "lockhost = '{}' AND ".format(PgLOG.get_host(1))
|
|
497
|
+
PgCheck.check_dsrqst_locks(cnd, PgOPT.PGOPT['extlog'])
|
|
498
|
+
PgCheck.check_dsupdt_locks(cnd, PgOPT.PGOPT['extlog'])
|
|
499
|
+
PgCheck.check_dscheck_locks(cnd, PgOPT.PGOPT['extlog'])
|
|
500
|
+
|
|
501
|
+
#
|
|
502
|
+
# process the checks
|
|
503
|
+
#
|
|
504
|
+
def process_dschecks():
|
|
505
|
+
|
|
506
|
+
logact = PgLOG.LOGERR
|
|
507
|
+
|
|
508
|
+
if PgLOG.PGLOG['CURUID'] == PgLOG.PGLOG['RDAUSER'] and (time.time()%(3*PgSIG.PGSIG['CTIME'])) < 60:
|
|
509
|
+
logact |= PgLOG.EMEROL
|
|
510
|
+
|
|
511
|
+
cnd = PgOPT.get_hash_condition("dscheck", "ST", None, 1)
|
|
512
|
+
if cnd: cnd += " AND "
|
|
513
|
+
if 'SN' not in PgOPT.params and PgOPT.params['LN'] != PgLOG.PGLOG['RDAUSER']:
|
|
514
|
+
cnd += "specialist = '{}' AND ".format(PgOPT.params['LN'])
|
|
515
|
+
|
|
516
|
+
if 'WR' in PgOPT.params: PgCheck.start_dsrqsts(cnd, logact)
|
|
517
|
+
if 'WU' in PgOPT.params: PgCheck.start_dsupdts(cnd, logact)
|
|
518
|
+
|
|
519
|
+
acnd = PgOPT.get_hash_condition("dscheck", None, "ST", 1)
|
|
520
|
+
if acnd: acnd += " AND "
|
|
521
|
+
PgCheck.start_dschecks(cnd + acnd, logact)
|
|
522
|
+
|
|
523
|
+
if PgLOG.PGLOG['ERRCNT']: send_error_email()
|
|
524
|
+
|
|
525
|
+
#
|
|
526
|
+
# process the checks
|
|
527
|
+
#
|
|
528
|
+
def process_dscheck_options():
|
|
529
|
+
|
|
530
|
+
logact = PgLOG.LOGERR
|
|
531
|
+
|
|
532
|
+
if PgLOG.PGLOG['CURUID'] == PgLOG.PGLOG['RDAUSER'] and (time.time()%(3*PgSIG.PGSIG['CTIME'])) < 60:
|
|
533
|
+
logact |= PgLOG.EMEROL
|
|
534
|
+
|
|
535
|
+
cnd = PgOPT.get_hash_condition("dscheck", "ST", None, 1)
|
|
536
|
+
if cnd: cnd += " AND "
|
|
537
|
+
if 'SN' not in PgOPT.params and PgOPT.params['LN'] != PgLOG.PGLOG['RDAUSER']:
|
|
538
|
+
cnd += "specialist = '{}' AND ".format(PgOPT.params['LN'])
|
|
539
|
+
|
|
540
|
+
acnd = PgOPT.get_hash_condition("dscheck", None, "ST", 1)
|
|
541
|
+
if acnd: acnd += " AND "
|
|
542
|
+
PgCheck.set_dscheck_options(PgLOG.get_host(1), cnd + acnd, logact)
|
|
543
|
+
|
|
544
|
+
if PgLOG.PGLOG['ERRCNT']: send_error_email()
|
|
545
|
+
|
|
546
|
+
#
|
|
547
|
+
# send an email notice to the running specialist
|
|
548
|
+
#
|
|
549
|
+
def send_email_notice(cmd, pgrec):
|
|
550
|
+
|
|
551
|
+
s = 's' if pgrec['tcount'] > 1 else ''
|
|
552
|
+
msg = ("Check Index {} for command:\n {}\n".format(pgrec['cindex'], cmd) +
|
|
553
|
+
"under '{}' has be executed {} time{}.\n".format(pgrec['workdir'], pgrec['tcount'], s))
|
|
554
|
+
if pgrec['errmsg']:
|
|
555
|
+
msg += "Error message from previous execution:\n {}\n".format(pgrec['errmsg'])
|
|
556
|
+
|
|
557
|
+
msg += ("If there is any problem, please fix it, delete the dscheck record via " +
|
|
558
|
+
"'dscheck dl -ci '\nand restart the command.\n".format(pgrec['cindex']))
|
|
559
|
+
|
|
560
|
+
PgLOG.send_email("Check Index {} reprocessed {} time{}".format(pgrec['cindex'], pgrec['tcount'], s), None, msg)
|
|
561
|
+
|
|
562
|
+
#
|
|
563
|
+
# rdadata daemon handles the daemon controls
|
|
564
|
+
#
|
|
565
|
+
def handle_dschecks():
|
|
566
|
+
|
|
567
|
+
logact = ccnt = rcnt = ucnt = 0
|
|
568
|
+
PgLOG.PGLOG['NOQUIT'] = 1
|
|
569
|
+
ctime = 4*PgSIG.PGSIG['CTIME']
|
|
570
|
+
etime = ctime
|
|
571
|
+
|
|
572
|
+
while not PgSIG.PGSIG['QUIT']:
|
|
573
|
+
if etime >= ctime:
|
|
574
|
+
logact = PgLOG.LGEREX|PgLOG.EMEROL
|
|
575
|
+
etime = 0
|
|
576
|
+
else:
|
|
577
|
+
logact = PgLOG.LGEREX
|
|
578
|
+
|
|
579
|
+
ncnt = 0
|
|
580
|
+
cnt = PgCheck.start_dsrqsts("", logact)
|
|
581
|
+
ncnt += cnt
|
|
582
|
+
rcnt += cnt
|
|
583
|
+
cnt = PgCheck.start_dsupdts("", logact)
|
|
584
|
+
ncnt += cnt
|
|
585
|
+
ucnt += cnt
|
|
586
|
+
cnt = PgCheck.start_dschecks("", logact)
|
|
587
|
+
ncnt += cnt
|
|
588
|
+
ccnt += cnt
|
|
589
|
+
|
|
590
|
+
if PgLOG.PGLOG['ERRCNT']: send_error_email()
|
|
591
|
+
if not ncnt: PgDBI.pgdisconnect(1)
|
|
592
|
+
|
|
593
|
+
etime += PgSIG.sleep_daemon()
|
|
594
|
+
|
|
595
|
+
PgLOG.PGLOG['NOQUIT'] = 0
|
|
596
|
+
PgSIG.stop_daemon(prepare_quit(ccnt, rcnt, ucnt))
|
|
597
|
+
|
|
598
|
+
#
|
|
599
|
+
# send an error email to the specialist
|
|
600
|
+
#
|
|
601
|
+
def send_error_email():
|
|
602
|
+
|
|
603
|
+
msg = "Error message for DSCHECK on " + PgLOG.PGLOG['HOSTNAME']
|
|
604
|
+
|
|
605
|
+
PgLOG.set_email(msg, PgLOG.EMLTOP)
|
|
606
|
+
msg = PgLOG.send_email(msg)
|
|
607
|
+
PgLOG.pglog(msg, PgLOG.MSGLOG|PgLOG.FRCLOG)
|
|
608
|
+
|
|
609
|
+
#
|
|
610
|
+
# prepare a summary string for quit
|
|
611
|
+
#
|
|
612
|
+
def prepare_quit(ccnt, rcnt, ucnt):
|
|
613
|
+
|
|
614
|
+
msg = ""
|
|
615
|
+
if rcnt > 0:
|
|
616
|
+
s = 's' if rcnt > 1 else ''
|
|
617
|
+
msg = "{} dsrqst{}".format(rcnt, s)
|
|
618
|
+
if ccnt > 0:
|
|
619
|
+
if msg: msg += ", "
|
|
620
|
+
s = 's' if ccnt > 1 else ''
|
|
621
|
+
msg += "{} dscheck{}".format(ccnt, s)
|
|
622
|
+
if ucnt > 0:
|
|
623
|
+
if msg: msg += ", "
|
|
624
|
+
s = 's' if ucnt > 1 else ''
|
|
625
|
+
msg += "{} dsupdt{}".format(ucnt, s)
|
|
626
|
+
|
|
627
|
+
return msg
|
|
628
|
+
|
|
629
|
+
#
|
|
630
|
+
# check a daemon host if connectable
|
|
631
|
+
#
|
|
632
|
+
def check_host_connection():
|
|
633
|
+
|
|
634
|
+
tname = "dsdaemon"
|
|
635
|
+
hash = PgOPT.TBLHASH[tname]
|
|
636
|
+
condition = PgOPT.get_hash_condition(tname, None, "H", 1)
|
|
637
|
+
if 'HN' in PgOPT.params:
|
|
638
|
+
pgrecs = {'specialist' : [], 'hostname' : []}
|
|
639
|
+
spclsts = PgDBI.pgmget(tname, "DISTINCT specialist", condition, PgOPT.PGOPT['extlog'])
|
|
640
|
+
if spclsts:
|
|
641
|
+
for specialist in spclsts['specialist']:
|
|
642
|
+
for hostname in PgOPT.params['HN']:
|
|
643
|
+
pgrecs['specialist'].append(specialist)
|
|
644
|
+
pgrecs['hostname'].append(hostname)
|
|
645
|
+
else:
|
|
646
|
+
pgrecs = PgDBI.pgmget(tname, "DISTINCT specialist, hostname", condition, PgOPT.PGOPT['extlog'])
|
|
647
|
+
|
|
648
|
+
cnt = len(pgrecs['specialist']) if pgrecs else 0
|
|
649
|
+
if not cnt:
|
|
650
|
+
PgLOG.pglog("No daemon host found to check connectivity", PgLOG.LOGWRN)
|
|
651
|
+
return
|
|
652
|
+
if cnt > 1: PgLOG.pglog("Check {} daemon hosts for connectivity ...".format(cnt), PgLOG.WARNLG)
|
|
653
|
+
|
|
654
|
+
for i in range(cnt):
|
|
655
|
+
specialist = pgrecs['specialist'][i]
|
|
656
|
+
hostname = pgrecs['hostname'][i]
|
|
657
|
+
cmd = "ssh {} ps".format(hostname)
|
|
658
|
+
if specialist != PgLOG.PGLOG['CURUID']:
|
|
659
|
+
if PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']:
|
|
660
|
+
PgLOG.pglog("{}: Cannot check connection to '{}' for {}".format(PgLOG.PGLOG['CURUID'], hostname, specialist), PgLOG.LOGERR)
|
|
661
|
+
continue
|
|
662
|
+
else:
|
|
663
|
+
cmd = "pgstart_{} {}".format(specialist, cmd)
|
|
664
|
+
|
|
665
|
+
PgLOG.pglog("Check conection to '{}' for {} ...".format(hostname, specialist), PgLOG.WARNLG)
|
|
666
|
+
PgLOG.pgsystem(cmd, PgLOG.LOGERR, 4, None, 15)
|
|
667
|
+
|
|
668
|
+
#
|
|
669
|
+
# call main() to start program
|
|
670
|
+
#
|
|
671
|
+
if __name__ == "__main__": main()
|