rda-python-dscheck 1.0.9__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/PKG-INFO +1 -2
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/pyproject.toml +6 -11
- rda_python_dscheck-2.0.1/src/rda_python_dscheck/dscheck.py +532 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck/dscheck.usg +7 -7
- rda_python_dscheck-2.0.1/src/rda_python_dscheck/pg_check.py +1334 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/PKG-INFO +1 -2
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/SOURCES.txt +2 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/requires.txt +0 -1
- rda_python_dscheck-2.0.1/tests/test_dscheck.py +8 -0
- rda_python_dscheck-1.0.9/tests/test_dscheck.py +0 -6
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/LICENSE +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/MANIFEST.in +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/README.md +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/setup.cfg +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck/PgCheck.py +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck/__init__.py +0 -0
- /rda_python_dscheck-1.0.9/src/rda_python_dscheck/dscheck.py → /rda_python_dscheck-2.0.1/src/rda_python_dscheck/ds_check.py +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/dependency_links.txt +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/entry_points.txt +0 -0
- {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rda_python_dscheck
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: RDA python package to add and process batch jobs
|
|
5
5
|
Author-email: Zaihua Ji <zji@ucar.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/NCAR/rda-python-dscheck
|
|
@@ -12,7 +12,6 @@ Requires-Python: >=3.7
|
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: rda_python_common
|
|
15
|
-
Requires-Dist: rda_python_setuid
|
|
16
15
|
Dynamic: license-file
|
|
17
16
|
|
|
18
17
|
RDA python package to add and process batch jobs.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rda_python_dscheck"
|
|
7
|
-
version = "
|
|
7
|
+
version = "2.0.1"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Zaihua Ji", email="zji@ucar.edu" },
|
|
10
10
|
]
|
|
@@ -18,18 +18,13 @@ classifiers = [
|
|
|
18
18
|
"Development Status :: 5 - Production/Stable",
|
|
19
19
|
]
|
|
20
20
|
dependencies = [
|
|
21
|
-
"rda_python_common"
|
|
22
|
-
"rda_python_setuid",
|
|
21
|
+
"rda_python_common"
|
|
23
22
|
]
|
|
24
23
|
|
|
25
|
-
[tool.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
where = ["src"]
|
|
30
|
-
|
|
31
|
-
[tool.setuptools.package-data]
|
|
32
|
-
"rda_python_dscheck" = ["dscheck.usg"]
|
|
24
|
+
[tool.pytest.ini_options]
|
|
25
|
+
pythonpath = [
|
|
26
|
+
"src"
|
|
27
|
+
]
|
|
33
28
|
|
|
34
29
|
[project.urls]
|
|
35
30
|
"Homepage" = "https://github.com/NCAR/rda-python-dscheck"
|
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title: dscheck
|
|
6
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date: 09/28/2020
|
|
8
|
+
# 2025-02-05 transferred to package rda_python_dscheck from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# 2025-12-05 convert to class DsCheck
|
|
11
|
+
# Purpose: python utility program to check and start command saved in dscheck
|
|
12
|
+
#
|
|
13
|
+
# Github: https://github.com/NCAR/rda-python-dscheck.git
|
|
14
|
+
#
|
|
15
|
+
##################################################################################
|
|
16
|
+
#
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
import sys
|
|
20
|
+
import time
|
|
21
|
+
from os import path as op
|
|
22
|
+
from .pg_check import PgCheck
|
|
23
|
+
|
|
24
|
+
class DsCheck(PgCheck):
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
super().__init__() # initialize parent class
|
|
28
|
+
self.ALLCNT = 0 # global counting variables
|
|
29
|
+
|
|
30
|
+
# read in command line parameters
|
|
31
|
+
def read_parameters(self):
|
|
32
|
+
self.set_help_path(__file__)
|
|
33
|
+
aname = 'dscheck'
|
|
34
|
+
self.parsing_input(aname)
|
|
35
|
+
self.check_dscheck_options(self.PGOPT['CACT'], aname)
|
|
36
|
+
|
|
37
|
+
# start action of dscheck
|
|
38
|
+
def start_actions(self):
|
|
39
|
+
if self.PGOPT['CACT'] == 'AC':
|
|
40
|
+
self.add_check_info()
|
|
41
|
+
elif self.PGOPT['CACT'] == 'CH':
|
|
42
|
+
self.check_host_connection()
|
|
43
|
+
elif self.PGOPT['CACT'] == 'DL':
|
|
44
|
+
if 'CI' in self.params:
|
|
45
|
+
self.ALLCNT = len(self.params['CI'])
|
|
46
|
+
self.delete_check_info()
|
|
47
|
+
if 'DI' in self.params:
|
|
48
|
+
self.ALLCNT = len(self.params['DI'])
|
|
49
|
+
self.delete_daemon_info()
|
|
50
|
+
elif self.PGOPT['CACT'] == 'EC':
|
|
51
|
+
self.email_check_info()
|
|
52
|
+
elif self.PGOPT['CACT'] == 'GC':
|
|
53
|
+
self.get_check_info()
|
|
54
|
+
elif self.PGOPT['CACT'] == 'GD':
|
|
55
|
+
self.get_daemon_info()
|
|
56
|
+
elif self.PGOPT['CACT'] == "IC":
|
|
57
|
+
self.ALLCNT = len(self.params['CI'])
|
|
58
|
+
self.interrupt_dschecks()
|
|
59
|
+
elif self.PGOPT['CACT'] == 'PC':
|
|
60
|
+
self.set_batch_options(self.params, 2, 1)
|
|
61
|
+
if 'DM' in self.params:
|
|
62
|
+
self.ALLCNT = 0
|
|
63
|
+
self.handle_dschecks()
|
|
64
|
+
else:
|
|
65
|
+
self.process_dschecks()
|
|
66
|
+
elif self.PGOPT['CACT'] == 'SD':
|
|
67
|
+
self.ALLCNT = len(self.params['DI'])
|
|
68
|
+
self.set_daemon_info()
|
|
69
|
+
elif self.PGOPT['CACT'] == 'SO':
|
|
70
|
+
self.set_batch_options(self.params, 2, 1)
|
|
71
|
+
self.process_dscheck_options()
|
|
72
|
+
elif self.PGOPT['CACT'] == "UL":
|
|
73
|
+
self.ALLCNT = len(self.params['CI']) if 'CI' in self.params else 0
|
|
74
|
+
self.unlock_checks()
|
|
75
|
+
if self.OPTS[self.PGOPT['CACT']][2]: self.cmdlog() # log end time if not getting action
|
|
76
|
+
|
|
77
|
+
# add a check for customized command
|
|
78
|
+
def add_check_info(self):
|
|
79
|
+
cmd = self.params['CM'].pop(0)
|
|
80
|
+
argstr = self.argv_to_string(self.params['CM'], 0)
|
|
81
|
+
if 'AV' in self.params:
|
|
82
|
+
if argstr: argstr += " "
|
|
83
|
+
argstr += self.argv_to_string(self.params['AV'], 0)
|
|
84
|
+
dsid = self.params['DS'][0] if 'DS' in self.params else None
|
|
85
|
+
action = self.params['AN'][0] if 'AN' in self.params else None
|
|
86
|
+
self.set_batch_options(self.params, 2, 1)
|
|
87
|
+
specialist = self.params['SN'][0] if 'SN' in self.params else self.params['LN']
|
|
88
|
+
workdir = self.params['WD'][0] if 'WD' in self.params else self.PGLOG['CURDIR']
|
|
89
|
+
self.add_one_dscheck(0, '', cmd, dsid, action, workdir, specialist,
|
|
90
|
+
argstr, None, None, self.PGOPT['extlog'])
|
|
91
|
+
|
|
92
|
+
# delete dscheck daemon controls for given daemon control indices
|
|
93
|
+
def delete_daemon_info(self):
|
|
94
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
95
|
+
self.pglog("Delete {} dscheck daemon control{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
96
|
+
delcnt = 0
|
|
97
|
+
for i in range(self.ALLCNT):
|
|
98
|
+
delcnt += self.pgdel("dsdaemon", "dindex = {}".format(self.params['DI'][i]), self.PGOPT['extlog'])
|
|
99
|
+
self.pglog("{} of {} dscheck daemon control{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
100
|
+
|
|
101
|
+
# delete checks for given check indices
|
|
102
|
+
def delete_check_info(self):
|
|
103
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
104
|
+
self.pglog("Delete {} dscheck record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
105
|
+
delcnt = 0
|
|
106
|
+
for i in range(self.ALLCNT):
|
|
107
|
+
cidx = self.lock_dscheck(self.params['CI'][i], 2, self.PGOPT['extlog'])
|
|
108
|
+
if cidx <= 0: continue
|
|
109
|
+
delcnt += self.delete_dscheck(None, "cindex = {}".format(cidx), self.PGOPT['extlog'])
|
|
110
|
+
self.pglog("{} of {} check record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
111
|
+
|
|
112
|
+
# email notice of check status for specialist
|
|
113
|
+
def email_check_info(self):
|
|
114
|
+
cnd = self.get_hash_condition("dscheck", None, None, 1)
|
|
115
|
+
pgrecs = self.pgmget("dscheck", "*", cnd + " ORDER BY cindex", self.PGOPT['extlog'])
|
|
116
|
+
allcnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
117
|
+
if not allcnt: return self.pglog("{}: No Check Information Found to send email for {}".format(self.PGLOG['CURUID'], cnd), self.LOGWRN)
|
|
118
|
+
if allcnt > 1:
|
|
119
|
+
s = 's'
|
|
120
|
+
ss = "are"
|
|
121
|
+
else:
|
|
122
|
+
s = ''
|
|
123
|
+
ss = "is"
|
|
124
|
+
subject = "{} active Check Record{}".format(allcnt, s)
|
|
125
|
+
mbuf = "{} {} listed:\n".format(subject, ss)
|
|
126
|
+
pgrecs = {'status': self.get_check_status(pgrecs, allcnt)}
|
|
127
|
+
for i in range(allcnt):
|
|
128
|
+
if i > 0: mbuf += self.PGLOG['SEPLINE']
|
|
129
|
+
mbuf += self.build_check_message(self.onerecord(pgrecs, i))
|
|
130
|
+
if 'CC' in self.params: self.add_carbon_copy(self.params['CC'])
|
|
131
|
+
subject += " found"
|
|
132
|
+
self.send_email(subject, self.params['LN'], mbuf)
|
|
133
|
+
self.pglog("Email sent to {} With Subject '{}'".format(self.params['LN'], subject), self.LOGWRN)
|
|
134
|
+
|
|
135
|
+
# build email message for a given check record
|
|
136
|
+
def build_check_message(self, pgrec):
|
|
137
|
+
msg = "Check Index: {}\nCommand: {} {}".format(pgrec['cindex'], pgrec['command'], pgrec['argv'])
|
|
138
|
+
if pgrec['argextra']: msg += self.break_long_string(pgrec['argextra'], 100, "...", 1)
|
|
139
|
+
msg += ("\nWork Directory: {}\n".format(pgrec['workdir']) +
|
|
140
|
+
"Initial Execution: {} {} byb {}\n".format(pgrec['date'], pgrec['time'], pgrec['specialist']) +
|
|
141
|
+
"Current Status: {}\n".format(pgrec['status']))
|
|
142
|
+
if pgrec['errmsg']:
|
|
143
|
+
msg += "Error Message: {}\n".format(pgrec['errmsg'])
|
|
144
|
+
elif not pgrec['pid']:
|
|
145
|
+
msg += "Error Message: Aborted abnormally\n";
|
|
146
|
+
return msg
|
|
147
|
+
|
|
148
|
+
# get dscheck daemon control information
|
|
149
|
+
def get_daemon_info(self):
|
|
150
|
+
tname = "dsdaemon"
|
|
151
|
+
hash = self.TBLHASH[tname]
|
|
152
|
+
self.pglog("Get dscheck daemon control information from RDADB ...", self.WARNLG)
|
|
153
|
+
oflds = lens = fnames = None
|
|
154
|
+
if 'FN' in self.params: fnames = self.params['FN']
|
|
155
|
+
fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT[tname])
|
|
156
|
+
onames = self.params['ON'] if 'ON' in self.params else "I"
|
|
157
|
+
qnames = fnames + self.append_order_fields(onames, fnames, tname)
|
|
158
|
+
condition = self.get_hash_condition(tname, None, None, 1);
|
|
159
|
+
if 'ON' in self.params and 'OB' in self.params:
|
|
160
|
+
oflds = self.append_order_fields(onames, None, tname)
|
|
161
|
+
else:
|
|
162
|
+
condition += self.get_order_string(onames, tname)
|
|
163
|
+
pgrecs = self.pgmget(tname, self.get_string_fields(qnames, tname), condition, self.PGOPT['extlog'])
|
|
164
|
+
if pgrecs:
|
|
165
|
+
if 'OF' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
|
|
166
|
+
if oflds: pgrecs = self.sorthash(pgrecs, fnames, hash, self.params['OB'])
|
|
167
|
+
self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
|
|
168
|
+
if pgrecs:
|
|
169
|
+
cnt = self.print_column_format(pgrecs, fnames, hash, lens)
|
|
170
|
+
s = 's' if cnt > 1 else ''
|
|
171
|
+
self.pglog("{} daemon control{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
|
|
172
|
+
else:
|
|
173
|
+
self.pglog("No daemon control information retrieved", self.PGOPT['wrnlog'])
|
|
174
|
+
|
|
175
|
+
# get check information
|
|
176
|
+
def get_check_info(self):
|
|
177
|
+
tname = 'dscheck'
|
|
178
|
+
hash = self.TBLHASH[tname]
|
|
179
|
+
self.pglog("Get check information from RDADB ...", self.WARNLG)
|
|
180
|
+
lens = oflds = fnames = None
|
|
181
|
+
if 'FN' in self.params: fnames = self.params['FN']
|
|
182
|
+
fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['chkall'])
|
|
183
|
+
onames = self.params['ON'] if 'ON' in self.params else "I"
|
|
184
|
+
condition = self.get_hash_condition(tname, None, None, 1);
|
|
185
|
+
if 'ON' in self.params and 'OB' in self.params:
|
|
186
|
+
oflds = self.append_order_fields(onames, None, tname)
|
|
187
|
+
else:
|
|
188
|
+
condition += self.get_order_string(onames, tname)
|
|
189
|
+
pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
|
|
190
|
+
if pgrecs:
|
|
191
|
+
if 'CS' in self.params:
|
|
192
|
+
pgrecs['status'] = self.get_check_status(pgrecs)
|
|
193
|
+
if fnames.find('U') < 0: fnames == 'U'
|
|
194
|
+
if 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
|
|
195
|
+
if oflds: pgrecs = self.sorthash(pgrecs, oflds, hash, self.params['OB'])
|
|
196
|
+
self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
|
|
197
|
+
if pgrecs:
|
|
198
|
+
cnt = self.print_column_format(pgrecs, fnames, hash, lens)
|
|
199
|
+
s = 's' if cnt > 1 else ''
|
|
200
|
+
self.pglog("{} check record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
|
|
201
|
+
else:
|
|
202
|
+
self.pglog("No check information retrieved", self.PGOPT['wrnlog'])
|
|
203
|
+
|
|
204
|
+
# add or modify dscheck daemon control information
|
|
205
|
+
def set_daemon_info(self):
|
|
206
|
+
tname = "dsdaemon"
|
|
207
|
+
hash = self.TBLHASH[tname]
|
|
208
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
209
|
+
self.pglog("Set information of {} dscheck daemon control{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
210
|
+
addcnt = modcnt = 0
|
|
211
|
+
flds = self.get_field_keys(tname, None, 'I')
|
|
212
|
+
self.validate_multiple_values(tname, self.ALLCNT, flds)
|
|
213
|
+
for i in range(self.ALLCNT):
|
|
214
|
+
didx = self.params['DI'][i] if 'DI' in self.params else 0
|
|
215
|
+
if didx > 0:
|
|
216
|
+
cnd = "dindex = {}".format(didx)
|
|
217
|
+
pgrec = self.pgget(tname, "*", cnd, self.PGOPT['extlog'])
|
|
218
|
+
if not pgrec: self.action_error("Miss daemon record for " + cnd, 'SD')
|
|
219
|
+
else:
|
|
220
|
+
pgrec = None
|
|
221
|
+
record = self.build_record(flds, pgrec, tname, i)
|
|
222
|
+
if record:
|
|
223
|
+
if 'priority' in record and (record['priority'] < 0 or record['priority'] > 10):
|
|
224
|
+
self.action_error("{}: Priority value must in range 0(highest) - 10(lowest)".format(record['priority']), 'SD')
|
|
225
|
+
if pgrec:
|
|
226
|
+
modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['extlog'])
|
|
227
|
+
else:
|
|
228
|
+
if 'specialist' not in record and self.params['LN'] != self.PGLOG['GDEXUSER']: record['specialist'] = self.params['LN']
|
|
229
|
+
didx = self.pgadd(tname, record, self.PGOPT['extlog']|self.AUTOID)
|
|
230
|
+
if didx:
|
|
231
|
+
self.pglog("Daemon Control Index {} added".format(didx), self.PGOPT['wrnlog'])
|
|
232
|
+
addcnt += 1
|
|
233
|
+
self.pglog("{}/{} of {} daemon control{} added/modified in RDADB!".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
234
|
+
|
|
235
|
+
# expand check status info
|
|
236
|
+
def get_check_status(self, pgrecs, cnt = 0):
|
|
237
|
+
if not cnt: cnt = (len(pgrecs['cindex']) if pgrecs else 0)
|
|
238
|
+
stats = [None]*cnt
|
|
239
|
+
for i in range(cnt):
|
|
240
|
+
pgrec = self.onerecord(pgrecs, i)
|
|
241
|
+
if pgrec['pid']:
|
|
242
|
+
percent = self.complete_percentage(pgrec)
|
|
243
|
+
runhost = ""
|
|
244
|
+
if percent < 0:
|
|
245
|
+
stats[i] = "Pending"
|
|
246
|
+
else:
|
|
247
|
+
stats[i] = self.get_execution_string(pgrec['status'], pgrec['tcount'])
|
|
248
|
+
rtime = self.dscheck_runtime(pgrec['stttime'])
|
|
249
|
+
if rtime: stats[i] += " {}".format(rtime)
|
|
250
|
+
if percent > 0: stats[i] += ", {}% done".format(percent)
|
|
251
|
+
if pgrec['runhost']: runhost = pgrec['runhost']
|
|
252
|
+
stats[i] += self.lock_process_info(pgrec['pid'], pgrec['lockhost'], runhost)
|
|
253
|
+
else:
|
|
254
|
+
stats[i] = self.dscheck_status(pgrec['status'])
|
|
255
|
+
if pgrec['status'] == 'D' or pgrec['status'] == 'P':
|
|
256
|
+
runhost = (pgrec['runhost'] if pgrec['runhost'] else pgrec['lockhost'])
|
|
257
|
+
if runhost: stats[i] += " on " + runhost
|
|
258
|
+
elif pgrec['status'] == 'C' and pgrec['pindex']:
|
|
259
|
+
stats[i] = "Wait on CHK {}".format(pgrec['pindex'])
|
|
260
|
+
return stats
|
|
261
|
+
|
|
262
|
+
# get the percentage of the check job done
|
|
263
|
+
def complete_percentage(self, check):
|
|
264
|
+
percent = 0
|
|
265
|
+
if check['bid'] and not check['stttime']:
|
|
266
|
+
percent = -1
|
|
267
|
+
elif check['fcount'] > 0 and check['dcount']:
|
|
268
|
+
percent = int(100*check['dcount']/check['fcount'])
|
|
269
|
+
elif check['command'] == "dsrqst" and check['oindex']:
|
|
270
|
+
if check['otype'] == 'P':
|
|
271
|
+
percent = self.get_partition_percentage(check['oindex'])
|
|
272
|
+
else:
|
|
273
|
+
percent = self.get_dsrqst_percentage(check['oindex'])
|
|
274
|
+
return (percent if percent < 100 else 99)
|
|
275
|
+
|
|
276
|
+
# get a request percentage finished
|
|
277
|
+
def get_dsrqst_percentage(self, ridx):
|
|
278
|
+
rcnd = "rindex = {}".format(ridx)
|
|
279
|
+
pgrqst = self.pgget("dsrqst", "fcount, pcount", rcnd)
|
|
280
|
+
if pgrqst:
|
|
281
|
+
fcnt = pgrqst['fcount'] if pgrqst['fcount'] else 0
|
|
282
|
+
if fcnt < 1: fcnt = self.pgget("wfrqst", "", rcnd)
|
|
283
|
+
if fcnt > 0:
|
|
284
|
+
dcnt = pgrqst['pcount'] if pgrqst['pcount'] else 0
|
|
285
|
+
if dcnt < 1: dcnt = self.pgget("wfrqst", "", rcnd + " AND status = 'O'")
|
|
286
|
+
if dcnt > 0:
|
|
287
|
+
percent = int(100*dcnt/fcnt)
|
|
288
|
+
if percent > 99: percent = 99
|
|
289
|
+
return percent
|
|
290
|
+
return 0
|
|
291
|
+
|
|
292
|
+
# get a partition percentage finished
|
|
293
|
+
def get_partition_percentage(self, pidx, cidx = 0):
|
|
294
|
+
pcnd = "pindex = {}".format(pidx)
|
|
295
|
+
pgrec = self.pgget('ptrqst', "fcount", pcnd)
|
|
296
|
+
if pgrec:
|
|
297
|
+
fcnt = pgrec['fcount'] if pgrec['fcount'] else 0
|
|
298
|
+
if fcnt < 1: fcnt = self.pgget("wfrqst", "", pcnd)
|
|
299
|
+
if fcnt > 0:
|
|
300
|
+
dcnt = self.pgget("wfrqst", "", pcnd + " AND status = 'O'")
|
|
301
|
+
if dcnt > 0:
|
|
302
|
+
percent = int(100*dcnt/fcnt)
|
|
303
|
+
if percent > 99: percent = 99
|
|
304
|
+
return percent
|
|
305
|
+
return 0
|
|
306
|
+
|
|
307
|
+
# get excecution string for give try count
|
|
308
|
+
def get_execution_string(self, stat, trycnt = 0):
|
|
309
|
+
str = self.dscheck_status(stat)
|
|
310
|
+
if trycnt > 1: str += "({})".format(self.int2order(trycnt))
|
|
311
|
+
return str
|
|
312
|
+
|
|
313
|
+
# interrupt checks for given dscheck indices
|
|
314
|
+
def interrupt_dschecks(self):
|
|
315
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
316
|
+
delcnt = 0
|
|
317
|
+
for i in range(self.ALLCNT):
|
|
318
|
+
cidx = self.params['CI'][i]
|
|
319
|
+
cnd = "cindex = {}".format(cidx)
|
|
320
|
+
cstr = "Check Index {}".format(cidx)
|
|
321
|
+
pgrec = self.pgget("dscheck", "*", cnd, self.PGOPT['extlog'])
|
|
322
|
+
if not pgrec: self.pglog(cstr +": NOT in RDADB", self.PGOPT['extlog'])
|
|
323
|
+
pid = pgrec['pid']
|
|
324
|
+
if pid == 0:
|
|
325
|
+
self.pglog(cstr + ": Check is not under process; no interruption", self.PGOPT['wrnlog'])
|
|
326
|
+
continue
|
|
327
|
+
host = pgrec['lockhost']
|
|
328
|
+
if not self.local_host_action(host, "interrupt check", cstr, self.PGOPT['errlog']): continue
|
|
329
|
+
opts = "-h {} -p {}".format(host, pid)
|
|
330
|
+
buf = self.pgsystem("rdaps " + opts, self.LOGWRN, 20) # 21 = 4 + 16
|
|
331
|
+
if buf:
|
|
332
|
+
ms = re.match(r'^\s*(\w+)\s+', buf)
|
|
333
|
+
if ms:
|
|
334
|
+
uid = ms.group(1)
|
|
335
|
+
if uid != self.params['LN']:
|
|
336
|
+
self.pglog("{}: login name '{}'; must be '{}' to interrupt".format(cstr, self.params['LN'], uid), self.PGOPT['wrnlog'])
|
|
337
|
+
continue
|
|
338
|
+
if 'FI' not in self.params:
|
|
339
|
+
self.pglog("{}: locked by {}/{}; must add Mode option -FI (-ForceInterrupt) to interrupt".format(cstr, pid, host), self.PGOPT['wrnlog'])
|
|
340
|
+
continue
|
|
341
|
+
if not self.pgsystem("rdakill " + opts, self.LOGWRN, 7):
|
|
342
|
+
self.pglog("{}: Failed to interrupt Check locked by {}/{}".format(cstr, pid, host), self.PGOPT['errlog'])
|
|
343
|
+
continue
|
|
344
|
+
else:
|
|
345
|
+
self.pglog("{}: check process stopped for {}/{}".format(cstr, pid, host), self.PGOPT['wrnlog'])
|
|
346
|
+
pgrec = self.pgget("dscheck", "*", cnd, self.PGOPT['extlog'])
|
|
347
|
+
if not pgrec['pid']:
|
|
348
|
+
if self.lock_dscheck(cidx, 1, self.PGOPT['extlog']) <= 0: continue
|
|
349
|
+
elif pid != pgrec['pid'] or host != pgrec['lockhost']:
|
|
350
|
+
self.pglog("{}: Check is relocked by {}/{}".format(cstr, pgrec['pid'], pgrec['lockhost']), self.PGOPT['errlog'])
|
|
351
|
+
continue
|
|
352
|
+
pgrec['status'] = 'I'
|
|
353
|
+
self.delete_dscheck(pgrec, None, self.PGOPT['extlog'])
|
|
354
|
+
if pgrec['command'] == 'dsupdt':
|
|
355
|
+
if pgrec['oindex']:
|
|
356
|
+
cnd = "cindex = {} AND pid = {} AND ".format(pgrec['oindex'], pid)
|
|
357
|
+
if self.pgexec("UPDATE dcupdt set pid = 0 WHERE {}lockhost = '{}'".format(cnd, host), self.PGOPT['extlog']):
|
|
358
|
+
self.pglog("Update Control Index {} unlocked".format(pgrec['oindex']), self.LOGWRN)
|
|
359
|
+
else:
|
|
360
|
+
cnd = "dsid = '{}' AND pid = {} AND ".format(pgrec['dsid'], pid)
|
|
361
|
+
dlupdt = self.pgget("dlupdt", "lindex", "{}hostname = '{}'".format(cnd , host))
|
|
362
|
+
if dlupdt and self.pgexec("UPDATE dlupdt set pid = 0 WHERE lindex = {}".format(dlupdt['lindex']), self.PGOPT['extlog']):
|
|
363
|
+
self.pglog("Update Local File Index {} unlocked".format(dlupdt['lindex']), self.LOGWRN)
|
|
364
|
+
elif pgrec['command'] == 'dsrqst':
|
|
365
|
+
record = {'status': 'I', 'pid': 0}
|
|
366
|
+
if pgrec['otype'] == 'P':
|
|
367
|
+
table = "ptrqst"
|
|
368
|
+
field = "pindex"
|
|
369
|
+
msg = "Request Partition Index"
|
|
370
|
+
else:
|
|
371
|
+
table = "dsrqst"
|
|
372
|
+
field = "rindex"
|
|
373
|
+
msg = "Request Index"
|
|
374
|
+
if pgrec['oindex']:
|
|
375
|
+
cnd = "{} = {} AND pid = {} AND lockhost = '{}'".format(field, pgrec['oindex'], pid, host)
|
|
376
|
+
else:
|
|
377
|
+
cnd = "dsid = '{}' AND pid = {} AND lockhost = '{}'".format(pgrec['dsid'], pid, host)
|
|
378
|
+
if self.pgupdt(table, record, cnd, self.PGOPT['extlog']):
|
|
379
|
+
self.pglog("{} {} unlocked".format(msg, pgrec['oindex']), self.LOGWRN)
|
|
380
|
+
delcnt += 1
|
|
381
|
+
if self.ALLCNT > 1: self.pglog("{} of {} check{} interrupted".format(delcnt, self.ALLCNT, s), self.LOGWRN)
|
|
382
|
+
|
|
383
|
+
# unlock checks for given check indices
|
|
384
|
+
def unlock_checks(self):
|
|
385
|
+
if self.ALLCNT > 0:
|
|
386
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
387
|
+
self.pglog("Unlock {} check{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
388
|
+
modcnt = 0
|
|
389
|
+
for cidx in self.params['CI']:
|
|
390
|
+
pgrec = self.pgget("dscheck", "pid, lockhost", "cindex = {}".format(cidx), self.PGOPT['extlog'])
|
|
391
|
+
if not pgrec:
|
|
392
|
+
self.pglog("Check {}: Not exists".format(cidx), self.PGOPT['errlog'])
|
|
393
|
+
elif not pgrec['pid']:
|
|
394
|
+
self.pglog("Check {}: Not locked".format(cidx), self.PGOPT['wrnlog'])
|
|
395
|
+
elif self.lock_dscheck(cidx, -1, self.PGOPT['extlog']) > 0:
|
|
396
|
+
modcnt += 1
|
|
397
|
+
self.pglog("Check {}: Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
|
|
398
|
+
elif(self.check_host_down(None, pgrec['lockhost']) and
|
|
399
|
+
self.lock_dscheck(cidx, -2, self.PGOPT['extlog']) > 0):
|
|
400
|
+
modcnt += 1
|
|
401
|
+
self.pglog("Check {}: Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
|
|
402
|
+
else:
|
|
403
|
+
self.pglog("Check {}: Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
|
|
404
|
+
if self.ALLCNT > 1: self.pglog("{} of {} check{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN)
|
|
405
|
+
else:
|
|
406
|
+
cnd = "lockhost = '{}' AND ".format(self.get_host(1))
|
|
407
|
+
self.check_dsrqst_locks(cnd, self.PGOPT['extlog'])
|
|
408
|
+
self.check_dsupdt_locks(cnd, self.PGOPT['extlog'])
|
|
409
|
+
self.check_dscheck_locks(cnd, self.PGOPT['extlog'])
|
|
410
|
+
|
|
411
|
+
# process the checks
|
|
412
|
+
def process_dschecks(self):
|
|
413
|
+
logact = self.LOGERR
|
|
414
|
+
if self.PGLOG['CURUID'] == self.PGLOG['GDEXUSER'] and (time.time()%(3*self.PGSIG['CTIME'])) < 60:
|
|
415
|
+
logact |= self.EMEROL
|
|
416
|
+
cnd = self.get_hash_condition("dscheck", "ST", None, 1)
|
|
417
|
+
if cnd: cnd += " AND "
|
|
418
|
+
if 'SN' not in self.params and self.params['LN'] != self.PGLOG['GDEXUSER']:
|
|
419
|
+
cnd += "specialist = '{}' AND ".format(self.params['LN'])
|
|
420
|
+
if 'WR' in self.params: self.start_dsrqsts(cnd, logact)
|
|
421
|
+
if 'WU' in self.params: self.start_dsupdts(cnd, logact)
|
|
422
|
+
acnd = self.get_hash_condition("dscheck", None, "ST", 1)
|
|
423
|
+
if acnd: acnd += " AND "
|
|
424
|
+
self.start_dschecks(cnd + acnd, logact)
|
|
425
|
+
if self.PGLOG['ERRCNT']: self.send_error_email()
|
|
426
|
+
|
|
427
|
+
# process the checks
|
|
428
|
+
def process_dscheck_options(self):
|
|
429
|
+
logact = self.LOGERR
|
|
430
|
+
if self.PGLOG['CURUID'] == self.PGLOG['GDEXUSER'] and (time.time()%(3*self.PGSIG['CTIME'])) < 60:
|
|
431
|
+
logact |= self.EMEROL
|
|
432
|
+
cnd = self.get_hash_condition("dscheck", "ST", None, 1)
|
|
433
|
+
if cnd: cnd += " AND "
|
|
434
|
+
if 'SN' not in self.params and self.params['LN'] != self.PGLOG['GDEXUSER']:
|
|
435
|
+
cnd += "specialist = '{}' AND ".format(self.params['LN'])
|
|
436
|
+
acnd = self.get_hash_condition("dscheck", None, "ST", 1)
|
|
437
|
+
if acnd: acnd += " AND "
|
|
438
|
+
self.set_dscheck_options(self.get_host(1), cnd + acnd, logact)
|
|
439
|
+
if self.PGLOG['ERRCNT']: self.send_error_email()
|
|
440
|
+
|
|
441
|
+
# rdadata daemon handles the daemon controls
|
|
442
|
+
def handle_dschecks(self):
|
|
443
|
+
logact = ccnt = rcnt = ucnt = 0
|
|
444
|
+
self.PGLOG['NOQUIT'] = 1
|
|
445
|
+
ctime = 4*self.PGSIG['CTIME']
|
|
446
|
+
etime = ctime
|
|
447
|
+
while not self.PGSIG['QUIT']:
|
|
448
|
+
if etime >= ctime:
|
|
449
|
+
logact = self.LGEREX|self.EMEROL
|
|
450
|
+
etime = 0
|
|
451
|
+
else:
|
|
452
|
+
logact = self.LGEREX
|
|
453
|
+
ncnt = 0
|
|
454
|
+
cnt = self.start_dsrqsts("", logact)
|
|
455
|
+
ncnt += cnt
|
|
456
|
+
rcnt += cnt
|
|
457
|
+
cnt = self.start_dsupdts("", logact)
|
|
458
|
+
ncnt += cnt
|
|
459
|
+
ucnt += cnt
|
|
460
|
+
cnt = self.start_dschecks("", logact)
|
|
461
|
+
ncnt += cnt
|
|
462
|
+
ccnt += cnt
|
|
463
|
+
if self.PGLOG['ERRCNT']: self.send_error_email()
|
|
464
|
+
if not ncnt: self.pgdisconnect(1)
|
|
465
|
+
etime += self.sleep_daemon()
|
|
466
|
+
self.PGLOG['NOQUIT'] = 0
|
|
467
|
+
self.stop_daemon(self.prepare_quit(ccnt, rcnt, ucnt))
|
|
468
|
+
|
|
469
|
+
# send an error email to the specialist
|
|
470
|
+
def send_error_email(self):
|
|
471
|
+
msg = "Error message for DSCHECK on " + self.PGLOG['HOSTNAME']
|
|
472
|
+
self.send_email(msg)
|
|
473
|
+
|
|
474
|
+
# prepare a summary string for quit
|
|
475
|
+
@staticmethod
|
|
476
|
+
def prepare_quit(ccnt, rcnt, ucnt):
|
|
477
|
+
msg = ""
|
|
478
|
+
if rcnt > 0:
|
|
479
|
+
s = 's' if rcnt > 1 else ''
|
|
480
|
+
msg = "{} dsrqst{}".format(rcnt, s)
|
|
481
|
+
if ccnt > 0:
|
|
482
|
+
if msg: msg += ", "
|
|
483
|
+
s = 's' if ccnt > 1 else ''
|
|
484
|
+
msg += "{} dscheck{}".format(ccnt, s)
|
|
485
|
+
if ucnt > 0:
|
|
486
|
+
if msg: msg += ", "
|
|
487
|
+
s = 's' if ucnt > 1 else ''
|
|
488
|
+
msg += "{} dsupdt{}".format(ucnt, s)
|
|
489
|
+
return msg
|
|
490
|
+
|
|
491
|
+
# check a daemon host if connectable
|
|
492
|
+
def check_host_connection(self):
|
|
493
|
+
tname = "dsdaemon"
|
|
494
|
+
hash = self.TBLHASH[tname]
|
|
495
|
+
condition = self.get_hash_condition(tname, None, "H", 1)
|
|
496
|
+
if 'HN' in self.params:
|
|
497
|
+
pgrecs = {'specialist': [], 'hostname': []}
|
|
498
|
+
spclsts = self.pgmget(tname, "DISTINCT specialist", condition, self.PGOPT['extlog'])
|
|
499
|
+
if spclsts:
|
|
500
|
+
for specialist in spclsts['specialist']:
|
|
501
|
+
for hostname in self.params['HN']:
|
|
502
|
+
pgrecs['specialist'].append(specialist)
|
|
503
|
+
pgrecs['hostname'].append(hostname)
|
|
504
|
+
else:
|
|
505
|
+
pgrecs = self.pgmget(tname, "DISTINCT specialist, hostname", condition, self.PGOPT['extlog'])
|
|
506
|
+
cnt = len(pgrecs['specialist']) if pgrecs else 0
|
|
507
|
+
if not cnt:
|
|
508
|
+
self.pglog("No daemon host found to check connectivity", self.LOGWRN)
|
|
509
|
+
return
|
|
510
|
+
if cnt > 1: self.pglog("Check {} daemon hosts for connectivity ...".format(cnt), self.WARNLG)
|
|
511
|
+
for i in range(cnt):
|
|
512
|
+
specialist = pgrecs['specialist'][i]
|
|
513
|
+
hostname = pgrecs['hostname'][i]
|
|
514
|
+
cmd = "ssh {} ps".format(hostname)
|
|
515
|
+
if specialist != self.PGLOG['CURUID']:
|
|
516
|
+
if self.PGLOG['CURUID'] != self.PGLOG['GDEXUSER']:
|
|
517
|
+
self.pglog("{}: Cannot check connection to '{}' for {}".format(self.PGLOG['CURUID'], hostname, specialist), self.LOGERR)
|
|
518
|
+
continue
|
|
519
|
+
else:
|
|
520
|
+
cmd = "pgstart_{} {}".format(specialist, cmd)
|
|
521
|
+
self.pglog("Check conection to '{}' for {} ...".format(hostname, specialist), self.WARNLG)
|
|
522
|
+
self.pgsystem(cmd, self.LOGERR, 4, None, 15)
|
|
523
|
+
|
|
524
|
+
# main function to excecute this script
|
|
525
|
+
def main():
|
|
526
|
+
object = DsCheck()
|
|
527
|
+
object.read_parameters()
|
|
528
|
+
object.start_actions()
|
|
529
|
+
object.pgexit(0)
|
|
530
|
+
|
|
531
|
+
# call main() to start program
|
|
532
|
+
if __name__ == "__main__": main()
|
|
@@ -170,11 +170,11 @@ categories:
|
|
|
170
170
|
is present. Combination of specialist login name, command name and hostname of
|
|
171
171
|
computer must be unique for for each daemon control record.
|
|
172
172
|
|
|
173
|
-
Specify host name '
|
|
173
|
+
Specify host name 'PBS' for putting the command in the PBS batch control system. If
|
|
174
174
|
a specified command name is not found in the daemon control, the general 'dscheck'
|
|
175
175
|
configuration for command name 'ALL' is used.
|
|
176
176
|
|
|
177
|
-
For example, set daemon control information for schuster, all commands on
|
|
177
|
+
For example, set daemon control information for schuster, all commands on PBS hosts,
|
|
178
178
|
for maximum 4 checks can be processed at the same time with priority 1, the smaller
|
|
179
179
|
the number the higher the priority is, via input file daemon.ctl
|
|
180
180
|
|
|
@@ -182,7 +182,7 @@ categories:
|
|
|
182
182
|
|
|
183
183
|
<<Content of input file daemon.ctl>>
|
|
184
184
|
DaemonIndex<:>Command<:>Specialist<:>Hostname<:>ProcessLimit<:>Priority<:>
|
|
185
|
-
0<:>schuster<:>ALL<:>
|
|
185
|
+
0<:>schuster<:>ALL<:>PBS<:>4<:>1<:>
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
3.1.2 Get Daemon Control
|
|
@@ -311,11 +311,11 @@ DaemonIndex<:>Command<:>Specialist<:>Hostname<:>ProcessLimit<:>Priority<:>
|
|
|
311
311
|
(ls -l | grep test) 1> test2.out 2>test2.err
|
|
312
312
|
|
|
313
313
|
For example, to add testing command 'test2' into 'dscheck' for delayed mode execution on
|
|
314
|
-
|
|
314
|
+
PBS
|
|
315
315
|
|
|
316
|
-
dsheck AC -CM test2 -HN
|
|
316
|
+
dsheck AC -CM test2 -HN PBS
|
|
317
317
|
|
|
318
|
-
The command 'test2' must be executable at the current working directory on
|
|
318
|
+
The command 'test2' must be executable at the current working directory on PBS machines.
|
|
319
319
|
|
|
320
320
|
|
|
321
321
|
3.2.2 Get Check
|
|
@@ -591,7 +591,7 @@ Information options are used to pass information, one or multiple values, into
|
|
|
591
591
|
|
|
592
592
|
-LH or -LocalHost, specify a local hostname to processes checks on the host for
|
|
593
593
|
action -PC(-ProcessCheck). It defaults to '' to use the local host name. Specify
|
|
594
|
-
|
|
594
|
+
PBS to process batch jobs.
|
|
595
595
|
|
|
596
596
|
-MT or -MaxrunTime, specify the maxmum run time for deamon mode. It defaults to 0
|
|
597
597
|
for unlimit time. For examples, 5000 means seconds, and 1D means 1 day for 86400
|