rda-python-dscheck 1.0.9__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/PKG-INFO +1 -2
  2. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/pyproject.toml +6 -11
  3. rda_python_dscheck-2.0.1/src/rda_python_dscheck/dscheck.py +532 -0
  4. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck/dscheck.usg +7 -7
  5. rda_python_dscheck-2.0.1/src/rda_python_dscheck/pg_check.py +1334 -0
  6. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/PKG-INFO +1 -2
  7. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/SOURCES.txt +2 -0
  8. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/requires.txt +0 -1
  9. rda_python_dscheck-2.0.1/tests/test_dscheck.py +8 -0
  10. rda_python_dscheck-1.0.9/tests/test_dscheck.py +0 -6
  11. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/LICENSE +0 -0
  12. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/MANIFEST.in +0 -0
  13. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/README.md +0 -0
  14. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/setup.cfg +0 -0
  15. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck/PgCheck.py +0 -0
  16. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck/__init__.py +0 -0
  17. /rda_python_dscheck-1.0.9/src/rda_python_dscheck/dscheck.py → /rda_python_dscheck-2.0.1/src/rda_python_dscheck/ds_check.py +0 -0
  18. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/dependency_links.txt +0 -0
  19. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/entry_points.txt +0 -0
  20. {rda_python_dscheck-1.0.9 → rda_python_dscheck-2.0.1}/src/rda_python_dscheck.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_dscheck
3
- Version: 1.0.9
3
+ Version: 2.0.1
4
4
  Summary: RDA python package to add and process batch jobs
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-dscheck
@@ -12,7 +12,6 @@ Requires-Python: >=3.7
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: rda_python_common
15
- Requires-Dist: rda_python_setuid
16
15
  Dynamic: license-file
17
16
 
18
17
  RDA python package to add and process batch jobs.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rda_python_dscheck"
7
- version = "1.0.9"
7
+ version = "2.0.1"
8
8
  authors = [
9
9
  { name="Zaihua Ji", email="zji@ucar.edu" },
10
10
  ]
@@ -18,18 +18,13 @@ classifiers = [
18
18
  "Development Status :: 5 - Production/Stable",
19
19
  ]
20
20
  dependencies = [
21
- "rda_python_common",
22
- "rda_python_setuid",
21
+ "rda_python_common"
23
22
  ]
24
23
 
25
- [tool.setuptools]
26
- include-package-data = true
27
-
28
- [tool.setuptools.packages.find]
29
- where = ["src"]
30
-
31
- [tool.setuptools.package-data]
32
- "rda_python_dscheck" = ["dscheck.usg"]
24
+ [tool.pytest.ini_options]
25
+ pythonpath = [
26
+ "src"
27
+ ]
33
28
 
34
29
  [project.urls]
35
30
  "Homepage" = "https://github.com/NCAR/rda-python-dscheck"
@@ -0,0 +1,532 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ ##################################################################################
4
+ #
5
+ # Title: dscheck
6
+ # Author: Zaihua Ji, zji@ucar.edu
7
+ # Date: 09/28/2020
8
+ # 2025-02-05 transferred to package rda_python_dscheck from
9
+ # https://github.com/NCAR/rda-utility-programs.git
10
+ # 2025-12-05 convert to class DsCheck
11
+ # Purpose: python utility program to check and start command saved in dscheck
12
+ #
13
+ # Github: https://github.com/NCAR/rda-python-dscheck.git
14
+ #
15
+ ##################################################################################
16
+ #
17
+ import os
18
+ import re
19
+ import sys
20
+ import time
21
+ from os import path as op
22
+ from .pg_check import PgCheck
23
+
24
+ class DsCheck(PgCheck):
25
+
26
+ def __init__(self):
27
+ super().__init__() # initialize parent class
28
+ self.ALLCNT = 0 # global counting variables
29
+
30
+ # read in command line parameters
31
+ def read_parameters(self):
32
+ self.set_help_path(__file__)
33
+ aname = 'dscheck'
34
+ self.parsing_input(aname)
35
+ self.check_dscheck_options(self.PGOPT['CACT'], aname)
36
+
37
+ # start action of dscheck
38
+ def start_actions(self):
39
+ if self.PGOPT['CACT'] == 'AC':
40
+ self.add_check_info()
41
+ elif self.PGOPT['CACT'] == 'CH':
42
+ self.check_host_connection()
43
+ elif self.PGOPT['CACT'] == 'DL':
44
+ if 'CI' in self.params:
45
+ self.ALLCNT = len(self.params['CI'])
46
+ self.delete_check_info()
47
+ if 'DI' in self.params:
48
+ self.ALLCNT = len(self.params['DI'])
49
+ self.delete_daemon_info()
50
+ elif self.PGOPT['CACT'] == 'EC':
51
+ self.email_check_info()
52
+ elif self.PGOPT['CACT'] == 'GC':
53
+ self.get_check_info()
54
+ elif self.PGOPT['CACT'] == 'GD':
55
+ self.get_daemon_info()
56
+ elif self.PGOPT['CACT'] == "IC":
57
+ self.ALLCNT = len(self.params['CI'])
58
+ self.interrupt_dschecks()
59
+ elif self.PGOPT['CACT'] == 'PC':
60
+ self.set_batch_options(self.params, 2, 1)
61
+ if 'DM' in self.params:
62
+ self.ALLCNT = 0
63
+ self.handle_dschecks()
64
+ else:
65
+ self.process_dschecks()
66
+ elif self.PGOPT['CACT'] == 'SD':
67
+ self.ALLCNT = len(self.params['DI'])
68
+ self.set_daemon_info()
69
+ elif self.PGOPT['CACT'] == 'SO':
70
+ self.set_batch_options(self.params, 2, 1)
71
+ self.process_dscheck_options()
72
+ elif self.PGOPT['CACT'] == "UL":
73
+ self.ALLCNT = len(self.params['CI']) if 'CI' in self.params else 0
74
+ self.unlock_checks()
75
+ if self.OPTS[self.PGOPT['CACT']][2]: self.cmdlog() # log end time if not getting action
76
+
77
+ # add a check for customized command
78
+ def add_check_info(self):
79
+ cmd = self.params['CM'].pop(0)
80
+ argstr = self.argv_to_string(self.params['CM'], 0)
81
+ if 'AV' in self.params:
82
+ if argstr: argstr += " "
83
+ argstr += self.argv_to_string(self.params['AV'], 0)
84
+ dsid = self.params['DS'][0] if 'DS' in self.params else None
85
+ action = self.params['AN'][0] if 'AN' in self.params else None
86
+ self.set_batch_options(self.params, 2, 1)
87
+ specialist = self.params['SN'][0] if 'SN' in self.params else self.params['LN']
88
+ workdir = self.params['WD'][0] if 'WD' in self.params else self.PGLOG['CURDIR']
89
+ self.add_one_dscheck(0, '', cmd, dsid, action, workdir, specialist,
90
+ argstr, None, None, self.PGOPT['extlog'])
91
+
92
+ # delete dscheck daemon controls for given daemon control indices
93
+ def delete_daemon_info(self):
94
+ s = 's' if self.ALLCNT > 1 else ''
95
+ self.pglog("Delete {} dscheck daemon control{} ...".format(self.ALLCNT, s), self.WARNLG)
96
+ delcnt = 0
97
+ for i in range(self.ALLCNT):
98
+ delcnt += self.pgdel("dsdaemon", "dindex = {}".format(self.params['DI'][i]), self.PGOPT['extlog'])
99
+ self.pglog("{} of {} dscheck daemon control{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
100
+
101
+ # delete checks for given check indices
102
+ def delete_check_info(self):
103
+ s = 's' if self.ALLCNT > 1 else ''
104
+ self.pglog("Delete {} dscheck record{} ...".format(self.ALLCNT, s), self.WARNLG)
105
+ delcnt = 0
106
+ for i in range(self.ALLCNT):
107
+ cidx = self.lock_dscheck(self.params['CI'][i], 2, self.PGOPT['extlog'])
108
+ if cidx <= 0: continue
109
+ delcnt += self.delete_dscheck(None, "cindex = {}".format(cidx), self.PGOPT['extlog'])
110
+ self.pglog("{} of {} check record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
111
+
112
+ # email notice of check status for specialist
113
+ def email_check_info(self):
114
+ cnd = self.get_hash_condition("dscheck", None, None, 1)
115
+ pgrecs = self.pgmget("dscheck", "*", cnd + " ORDER BY cindex", self.PGOPT['extlog'])
116
+ allcnt = (len(pgrecs['cindex']) if pgrecs else 0)
117
+ if not allcnt: return self.pglog("{}: No Check Information Found to send email for {}".format(self.PGLOG['CURUID'], cnd), self.LOGWRN)
118
+ if allcnt > 1:
119
+ s = 's'
120
+ ss = "are"
121
+ else:
122
+ s = ''
123
+ ss = "is"
124
+ subject = "{} active Check Record{}".format(allcnt, s)
125
+ mbuf = "{} {} listed:\n".format(subject, ss)
126
+ pgrecs = {'status': self.get_check_status(pgrecs, allcnt)}
127
+ for i in range(allcnt):
128
+ if i > 0: mbuf += self.PGLOG['SEPLINE']
129
+ mbuf += self.build_check_message(self.onerecord(pgrecs, i))
130
+ if 'CC' in self.params: self.add_carbon_copy(self.params['CC'])
131
+ subject += " found"
132
+ self.send_email(subject, self.params['LN'], mbuf)
133
+ self.pglog("Email sent to {} With Subject '{}'".format(self.params['LN'], subject), self.LOGWRN)
134
+
135
+ # build email message for a given check record
136
+ def build_check_message(self, pgrec):
137
+ msg = "Check Index: {}\nCommand: {} {}".format(pgrec['cindex'], pgrec['command'], pgrec['argv'])
138
+ if pgrec['argextra']: msg += self.break_long_string(pgrec['argextra'], 100, "...", 1)
139
+ msg += ("\nWork Directory: {}\n".format(pgrec['workdir']) +
140
+ "Initial Execution: {} {} byb {}\n".format(pgrec['date'], pgrec['time'], pgrec['specialist']) +
141
+ "Current Status: {}\n".format(pgrec['status']))
142
+ if pgrec['errmsg']:
143
+ msg += "Error Message: {}\n".format(pgrec['errmsg'])
144
+ elif not pgrec['pid']:
145
+ msg += "Error Message: Aborted abnormally\n";
146
+ return msg
147
+
148
+ # get dscheck daemon control information
149
+ def get_daemon_info(self):
150
+ tname = "dsdaemon"
151
+ hash = self.TBLHASH[tname]
152
+ self.pglog("Get dscheck daemon control information from RDADB ...", self.WARNLG)
153
+ oflds = lens = fnames = None
154
+ if 'FN' in self.params: fnames = self.params['FN']
155
+ fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT[tname])
156
+ onames = self.params['ON'] if 'ON' in self.params else "I"
157
+ qnames = fnames + self.append_order_fields(onames, fnames, tname)
158
+ condition = self.get_hash_condition(tname, None, None, 1);
159
+ if 'ON' in self.params and 'OB' in self.params:
160
+ oflds = self.append_order_fields(onames, None, tname)
161
+ else:
162
+ condition += self.get_order_string(onames, tname)
163
+ pgrecs = self.pgmget(tname, self.get_string_fields(qnames, tname), condition, self.PGOPT['extlog'])
164
+ if pgrecs:
165
+ if 'OF' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
166
+ if oflds: pgrecs = self.sorthash(pgrecs, fnames, hash, self.params['OB'])
167
+ self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
168
+ if pgrecs:
169
+ cnt = self.print_column_format(pgrecs, fnames, hash, lens)
170
+ s = 's' if cnt > 1 else ''
171
+ self.pglog("{} daemon control{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
172
+ else:
173
+ self.pglog("No daemon control information retrieved", self.PGOPT['wrnlog'])
174
+
175
+ # get check information
176
+ def get_check_info(self):
177
+ tname = 'dscheck'
178
+ hash = self.TBLHASH[tname]
179
+ self.pglog("Get check information from RDADB ...", self.WARNLG)
180
+ lens = oflds = fnames = None
181
+ if 'FN' in self.params: fnames = self.params['FN']
182
+ fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['chkall'])
183
+ onames = self.params['ON'] if 'ON' in self.params else "I"
184
+ condition = self.get_hash_condition(tname, None, None, 1);
185
+ if 'ON' in self.params and 'OB' in self.params:
186
+ oflds = self.append_order_fields(onames, None, tname)
187
+ else:
188
+ condition += self.get_order_string(onames, tname)
189
+ pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
190
+ if pgrecs:
191
+ if 'CS' in self.params:
192
+ pgrecs['status'] = self.get_check_status(pgrecs)
193
+ if fnames.find('U') < 0: fnames == 'U'
194
+ if 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
195
+ if oflds: pgrecs = self.sorthash(pgrecs, oflds, hash, self.params['OB'])
196
+ self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
197
+ if pgrecs:
198
+ cnt = self.print_column_format(pgrecs, fnames, hash, lens)
199
+ s = 's' if cnt > 1 else ''
200
+ self.pglog("{} check record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
201
+ else:
202
+ self.pglog("No check information retrieved", self.PGOPT['wrnlog'])
203
+
204
+ # add or modify dscheck daemon control information
205
+ def set_daemon_info(self):
206
+ tname = "dsdaemon"
207
+ hash = self.TBLHASH[tname]
208
+ s = 's' if self.ALLCNT > 1 else ''
209
+ self.pglog("Set information of {} dscheck daemon control{} ...".format(self.ALLCNT, s), self.WARNLG)
210
+ addcnt = modcnt = 0
211
+ flds = self.get_field_keys(tname, None, 'I')
212
+ self.validate_multiple_values(tname, self.ALLCNT, flds)
213
+ for i in range(self.ALLCNT):
214
+ didx = self.params['DI'][i] if 'DI' in self.params else 0
215
+ if didx > 0:
216
+ cnd = "dindex = {}".format(didx)
217
+ pgrec = self.pgget(tname, "*", cnd, self.PGOPT['extlog'])
218
+ if not pgrec: self.action_error("Miss daemon record for " + cnd, 'SD')
219
+ else:
220
+ pgrec = None
221
+ record = self.build_record(flds, pgrec, tname, i)
222
+ if record:
223
+ if 'priority' in record and (record['priority'] < 0 or record['priority'] > 10):
224
+ self.action_error("{}: Priority value must in range 0(highest) - 10(lowest)".format(record['priority']), 'SD')
225
+ if pgrec:
226
+ modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['extlog'])
227
+ else:
228
+ if 'specialist' not in record and self.params['LN'] != self.PGLOG['GDEXUSER']: record['specialist'] = self.params['LN']
229
+ didx = self.pgadd(tname, record, self.PGOPT['extlog']|self.AUTOID)
230
+ if didx:
231
+ self.pglog("Daemon Control Index {} added".format(didx), self.PGOPT['wrnlog'])
232
+ addcnt += 1
233
+ self.pglog("{}/{} of {} daemon control{} added/modified in RDADB!".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
234
+
235
+ # expand check status info
236
+ def get_check_status(self, pgrecs, cnt = 0):
237
+ if not cnt: cnt = (len(pgrecs['cindex']) if pgrecs else 0)
238
+ stats = [None]*cnt
239
+ for i in range(cnt):
240
+ pgrec = self.onerecord(pgrecs, i)
241
+ if pgrec['pid']:
242
+ percent = self.complete_percentage(pgrec)
243
+ runhost = ""
244
+ if percent < 0:
245
+ stats[i] = "Pending"
246
+ else:
247
+ stats[i] = self.get_execution_string(pgrec['status'], pgrec['tcount'])
248
+ rtime = self.dscheck_runtime(pgrec['stttime'])
249
+ if rtime: stats[i] += " {}".format(rtime)
250
+ if percent > 0: stats[i] += ", {}% done".format(percent)
251
+ if pgrec['runhost']: runhost = pgrec['runhost']
252
+ stats[i] += self.lock_process_info(pgrec['pid'], pgrec['lockhost'], runhost)
253
+ else:
254
+ stats[i] = self.dscheck_status(pgrec['status'])
255
+ if pgrec['status'] == 'D' or pgrec['status'] == 'P':
256
+ runhost = (pgrec['runhost'] if pgrec['runhost'] else pgrec['lockhost'])
257
+ if runhost: stats[i] += " on " + runhost
258
+ elif pgrec['status'] == 'C' and pgrec['pindex']:
259
+ stats[i] = "Wait on CHK {}".format(pgrec['pindex'])
260
+ return stats
261
+
262
+ # get the percentage of the check job done
263
+ def complete_percentage(self, check):
264
+ percent = 0
265
+ if check['bid'] and not check['stttime']:
266
+ percent = -1
267
+ elif check['fcount'] > 0 and check['dcount']:
268
+ percent = int(100*check['dcount']/check['fcount'])
269
+ elif check['command'] == "dsrqst" and check['oindex']:
270
+ if check['otype'] == 'P':
271
+ percent = self.get_partition_percentage(check['oindex'])
272
+ else:
273
+ percent = self.get_dsrqst_percentage(check['oindex'])
274
+ return (percent if percent < 100 else 99)
275
+
276
+ # get a request percentage finished
277
+ def get_dsrqst_percentage(self, ridx):
278
+ rcnd = "rindex = {}".format(ridx)
279
+ pgrqst = self.pgget("dsrqst", "fcount, pcount", rcnd)
280
+ if pgrqst:
281
+ fcnt = pgrqst['fcount'] if pgrqst['fcount'] else 0
282
+ if fcnt < 1: fcnt = self.pgget("wfrqst", "", rcnd)
283
+ if fcnt > 0:
284
+ dcnt = pgrqst['pcount'] if pgrqst['pcount'] else 0
285
+ if dcnt < 1: dcnt = self.pgget("wfrqst", "", rcnd + " AND status = 'O'")
286
+ if dcnt > 0:
287
+ percent = int(100*dcnt/fcnt)
288
+ if percent > 99: percent = 99
289
+ return percent
290
+ return 0
291
+
292
+ # get a partition percentage finished
293
+ def get_partition_percentage(self, pidx, cidx = 0):
294
+ pcnd = "pindex = {}".format(pidx)
295
+ pgrec = self.pgget('ptrqst', "fcount", pcnd)
296
+ if pgrec:
297
+ fcnt = pgrec['fcount'] if pgrec['fcount'] else 0
298
+ if fcnt < 1: fcnt = self.pgget("wfrqst", "", pcnd)
299
+ if fcnt > 0:
300
+ dcnt = self.pgget("wfrqst", "", pcnd + " AND status = 'O'")
301
+ if dcnt > 0:
302
+ percent = int(100*dcnt/fcnt)
303
+ if percent > 99: percent = 99
304
+ return percent
305
+ return 0
306
+
307
+ # get excecution string for give try count
308
+ def get_execution_string(self, stat, trycnt = 0):
309
+ str = self.dscheck_status(stat)
310
+ if trycnt > 1: str += "({})".format(self.int2order(trycnt))
311
+ return str
312
+
313
+ # interrupt checks for given dscheck indices
314
+ def interrupt_dschecks(self):
315
+ s = 's' if self.ALLCNT > 1 else ''
316
+ delcnt = 0
317
+ for i in range(self.ALLCNT):
318
+ cidx = self.params['CI'][i]
319
+ cnd = "cindex = {}".format(cidx)
320
+ cstr = "Check Index {}".format(cidx)
321
+ pgrec = self.pgget("dscheck", "*", cnd, self.PGOPT['extlog'])
322
+ if not pgrec: self.pglog(cstr +": NOT in RDADB", self.PGOPT['extlog'])
323
+ pid = pgrec['pid']
324
+ if pid == 0:
325
+ self.pglog(cstr + ": Check is not under process; no interruption", self.PGOPT['wrnlog'])
326
+ continue
327
+ host = pgrec['lockhost']
328
+ if not self.local_host_action(host, "interrupt check", cstr, self.PGOPT['errlog']): continue
329
+ opts = "-h {} -p {}".format(host, pid)
330
+ buf = self.pgsystem("rdaps " + opts, self.LOGWRN, 20) # 21 = 4 + 16
331
+ if buf:
332
+ ms = re.match(r'^\s*(\w+)\s+', buf)
333
+ if ms:
334
+ uid = ms.group(1)
335
+ if uid != self.params['LN']:
336
+ self.pglog("{}: login name '{}'; must be '{}' to interrupt".format(cstr, self.params['LN'], uid), self.PGOPT['wrnlog'])
337
+ continue
338
+ if 'FI' not in self.params:
339
+ self.pglog("{}: locked by {}/{}; must add Mode option -FI (-ForceInterrupt) to interrupt".format(cstr, pid, host), self.PGOPT['wrnlog'])
340
+ continue
341
+ if not self.pgsystem("rdakill " + opts, self.LOGWRN, 7):
342
+ self.pglog("{}: Failed to interrupt Check locked by {}/{}".format(cstr, pid, host), self.PGOPT['errlog'])
343
+ continue
344
+ else:
345
+ self.pglog("{}: check process stopped for {}/{}".format(cstr, pid, host), self.PGOPT['wrnlog'])
346
+ pgrec = self.pgget("dscheck", "*", cnd, self.PGOPT['extlog'])
347
+ if not pgrec['pid']:
348
+ if self.lock_dscheck(cidx, 1, self.PGOPT['extlog']) <= 0: continue
349
+ elif pid != pgrec['pid'] or host != pgrec['lockhost']:
350
+ self.pglog("{}: Check is relocked by {}/{}".format(cstr, pgrec['pid'], pgrec['lockhost']), self.PGOPT['errlog'])
351
+ continue
352
+ pgrec['status'] = 'I'
353
+ self.delete_dscheck(pgrec, None, self.PGOPT['extlog'])
354
+ if pgrec['command'] == 'dsupdt':
355
+ if pgrec['oindex']:
356
+ cnd = "cindex = {} AND pid = {} AND ".format(pgrec['oindex'], pid)
357
+ if self.pgexec("UPDATE dcupdt set pid = 0 WHERE {}lockhost = '{}'".format(cnd, host), self.PGOPT['extlog']):
358
+ self.pglog("Update Control Index {} unlocked".format(pgrec['oindex']), self.LOGWRN)
359
+ else:
360
+ cnd = "dsid = '{}' AND pid = {} AND ".format(pgrec['dsid'], pid)
361
+ dlupdt = self.pgget("dlupdt", "lindex", "{}hostname = '{}'".format(cnd , host))
362
+ if dlupdt and self.pgexec("UPDATE dlupdt set pid = 0 WHERE lindex = {}".format(dlupdt['lindex']), self.PGOPT['extlog']):
363
+ self.pglog("Update Local File Index {} unlocked".format(dlupdt['lindex']), self.LOGWRN)
364
+ elif pgrec['command'] == 'dsrqst':
365
+ record = {'status': 'I', 'pid': 0}
366
+ if pgrec['otype'] == 'P':
367
+ table = "ptrqst"
368
+ field = "pindex"
369
+ msg = "Request Partition Index"
370
+ else:
371
+ table = "dsrqst"
372
+ field = "rindex"
373
+ msg = "Request Index"
374
+ if pgrec['oindex']:
375
+ cnd = "{} = {} AND pid = {} AND lockhost = '{}'".format(field, pgrec['oindex'], pid, host)
376
+ else:
377
+ cnd = "dsid = '{}' AND pid = {} AND lockhost = '{}'".format(pgrec['dsid'], pid, host)
378
+ if self.pgupdt(table, record, cnd, self.PGOPT['extlog']):
379
+ self.pglog("{} {} unlocked".format(msg, pgrec['oindex']), self.LOGWRN)
380
+ delcnt += 1
381
+ if self.ALLCNT > 1: self.pglog("{} of {} check{} interrupted".format(delcnt, self.ALLCNT, s), self.LOGWRN)
382
+
383
+ # unlock checks for given check indices
384
+ def unlock_checks(self):
385
+ if self.ALLCNT > 0:
386
+ s = 's' if self.ALLCNT > 1 else ''
387
+ self.pglog("Unlock {} check{} ...".format(self.ALLCNT, s), self.WARNLG)
388
+ modcnt = 0
389
+ for cidx in self.params['CI']:
390
+ pgrec = self.pgget("dscheck", "pid, lockhost", "cindex = {}".format(cidx), self.PGOPT['extlog'])
391
+ if not pgrec:
392
+ self.pglog("Check {}: Not exists".format(cidx), self.PGOPT['errlog'])
393
+ elif not pgrec['pid']:
394
+ self.pglog("Check {}: Not locked".format(cidx), self.PGOPT['wrnlog'])
395
+ elif self.lock_dscheck(cidx, -1, self.PGOPT['extlog']) > 0:
396
+ modcnt += 1
397
+ self.pglog("Check {}: Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
398
+ elif(self.check_host_down(None, pgrec['lockhost']) and
399
+ self.lock_dscheck(cidx, -2, self.PGOPT['extlog']) > 0):
400
+ modcnt += 1
401
+ self.pglog("Check {}: Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
402
+ else:
403
+ self.pglog("Check {}: Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
404
+ if self.ALLCNT > 1: self.pglog("{} of {} check{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN)
405
+ else:
406
+ cnd = "lockhost = '{}' AND ".format(self.get_host(1))
407
+ self.check_dsrqst_locks(cnd, self.PGOPT['extlog'])
408
+ self.check_dsupdt_locks(cnd, self.PGOPT['extlog'])
409
+ self.check_dscheck_locks(cnd, self.PGOPT['extlog'])
410
+
411
+ # process the checks
412
+ def process_dschecks(self):
413
+ logact = self.LOGERR
414
+ if self.PGLOG['CURUID'] == self.PGLOG['GDEXUSER'] and (time.time()%(3*self.PGSIG['CTIME'])) < 60:
415
+ logact |= self.EMEROL
416
+ cnd = self.get_hash_condition("dscheck", "ST", None, 1)
417
+ if cnd: cnd += " AND "
418
+ if 'SN' not in self.params and self.params['LN'] != self.PGLOG['GDEXUSER']:
419
+ cnd += "specialist = '{}' AND ".format(self.params['LN'])
420
+ if 'WR' in self.params: self.start_dsrqsts(cnd, logact)
421
+ if 'WU' in self.params: self.start_dsupdts(cnd, logact)
422
+ acnd = self.get_hash_condition("dscheck", None, "ST", 1)
423
+ if acnd: acnd += " AND "
424
+ self.start_dschecks(cnd + acnd, logact)
425
+ if self.PGLOG['ERRCNT']: self.send_error_email()
426
+
427
+ # process the checks
428
+ def process_dscheck_options(self):
429
+ logact = self.LOGERR
430
+ if self.PGLOG['CURUID'] == self.PGLOG['GDEXUSER'] and (time.time()%(3*self.PGSIG['CTIME'])) < 60:
431
+ logact |= self.EMEROL
432
+ cnd = self.get_hash_condition("dscheck", "ST", None, 1)
433
+ if cnd: cnd += " AND "
434
+ if 'SN' not in self.params and self.params['LN'] != self.PGLOG['GDEXUSER']:
435
+ cnd += "specialist = '{}' AND ".format(self.params['LN'])
436
+ acnd = self.get_hash_condition("dscheck", None, "ST", 1)
437
+ if acnd: acnd += " AND "
438
+ self.set_dscheck_options(self.get_host(1), cnd + acnd, logact)
439
+ if self.PGLOG['ERRCNT']: self.send_error_email()
440
+
441
+ # rdadata daemon handles the daemon controls
442
+ def handle_dschecks(self):
443
+ logact = ccnt = rcnt = ucnt = 0
444
+ self.PGLOG['NOQUIT'] = 1
445
+ ctime = 4*self.PGSIG['CTIME']
446
+ etime = ctime
447
+ while not self.PGSIG['QUIT']:
448
+ if etime >= ctime:
449
+ logact = self.LGEREX|self.EMEROL
450
+ etime = 0
451
+ else:
452
+ logact = self.LGEREX
453
+ ncnt = 0
454
+ cnt = self.start_dsrqsts("", logact)
455
+ ncnt += cnt
456
+ rcnt += cnt
457
+ cnt = self.start_dsupdts("", logact)
458
+ ncnt += cnt
459
+ ucnt += cnt
460
+ cnt = self.start_dschecks("", logact)
461
+ ncnt += cnt
462
+ ccnt += cnt
463
+ if self.PGLOG['ERRCNT']: self.send_error_email()
464
+ if not ncnt: self.pgdisconnect(1)
465
+ etime += self.sleep_daemon()
466
+ self.PGLOG['NOQUIT'] = 0
467
+ self.stop_daemon(self.prepare_quit(ccnt, rcnt, ucnt))
468
+
469
+ # send an error email to the specialist
470
+ def send_error_email(self):
471
+ msg = "Error message for DSCHECK on " + self.PGLOG['HOSTNAME']
472
+ self.send_email(msg)
473
+
474
+ # prepare a summary string for quit
475
+ @staticmethod
476
+ def prepare_quit(ccnt, rcnt, ucnt):
477
+ msg = ""
478
+ if rcnt > 0:
479
+ s = 's' if rcnt > 1 else ''
480
+ msg = "{} dsrqst{}".format(rcnt, s)
481
+ if ccnt > 0:
482
+ if msg: msg += ", "
483
+ s = 's' if ccnt > 1 else ''
484
+ msg += "{} dscheck{}".format(ccnt, s)
485
+ if ucnt > 0:
486
+ if msg: msg += ", "
487
+ s = 's' if ucnt > 1 else ''
488
+ msg += "{} dsupdt{}".format(ucnt, s)
489
+ return msg
490
+
491
+ # check a daemon host if connectable
492
+ def check_host_connection(self):
493
+ tname = "dsdaemon"
494
+ hash = self.TBLHASH[tname]
495
+ condition = self.get_hash_condition(tname, None, "H", 1)
496
+ if 'HN' in self.params:
497
+ pgrecs = {'specialist': [], 'hostname': []}
498
+ spclsts = self.pgmget(tname, "DISTINCT specialist", condition, self.PGOPT['extlog'])
499
+ if spclsts:
500
+ for specialist in spclsts['specialist']:
501
+ for hostname in self.params['HN']:
502
+ pgrecs['specialist'].append(specialist)
503
+ pgrecs['hostname'].append(hostname)
504
+ else:
505
+ pgrecs = self.pgmget(tname, "DISTINCT specialist, hostname", condition, self.PGOPT['extlog'])
506
+ cnt = len(pgrecs['specialist']) if pgrecs else 0
507
+ if not cnt:
508
+ self.pglog("No daemon host found to check connectivity", self.LOGWRN)
509
+ return
510
+ if cnt > 1: self.pglog("Check {} daemon hosts for connectivity ...".format(cnt), self.WARNLG)
511
+ for i in range(cnt):
512
+ specialist = pgrecs['specialist'][i]
513
+ hostname = pgrecs['hostname'][i]
514
+ cmd = "ssh {} ps".format(hostname)
515
+ if specialist != self.PGLOG['CURUID']:
516
+ if self.PGLOG['CURUID'] != self.PGLOG['GDEXUSER']:
517
+ self.pglog("{}: Cannot check connection to '{}' for {}".format(self.PGLOG['CURUID'], hostname, specialist), self.LOGERR)
518
+ continue
519
+ else:
520
+ cmd = "pgstart_{} {}".format(specialist, cmd)
521
+ self.pglog("Check conection to '{}' for {} ...".format(hostname, specialist), self.WARNLG)
522
+ self.pgsystem(cmd, self.LOGERR, 4, None, 15)
523
+
524
+ # main function to excecute this script
525
+ def main():
526
+ object = DsCheck()
527
+ object.read_parameters()
528
+ object.start_actions()
529
+ object.pgexit(0)
530
+
531
+ # call main() to start program
532
+ if __name__ == "__main__": main()
@@ -170,11 +170,11 @@ categories:
170
170
  is present. Combination of specialist login name, command name and hostname of
171
171
  computer must be unique for for each daemon control record.
172
172
 
173
- Specify host name 'SLURM' for putting the command in the SLURM batch control system. If
173
+ Specify host name 'PBS' for putting the command in the PBS batch control system. If
174
174
  a specified command name is not found in the daemon control, the general 'dscheck'
175
175
  configuration for command name 'ALL' is used.
176
176
 
177
- For example, set daemon control information for schuster, all commands on SLURM hosts,
177
+ For example, set daemon control information for schuster, all commands on PBS hosts,
178
178
  for maximum 4 checks can be processed at the same time with priority 1, the smaller
179
179
  the number the higher the priority is, via input file daemon.ctl
180
180
 
@@ -182,7 +182,7 @@ categories:
182
182
 
183
183
  <<Content of input file daemon.ctl>>
184
184
  DaemonIndex<:>Command<:>Specialist<:>Hostname<:>ProcessLimit<:>Priority<:>
185
- 0<:>schuster<:>ALL<:>SLURM<:>4<:>1<:>
185
+ 0<:>schuster<:>ALL<:>PBS<:>4<:>1<:>
186
186
 
187
187
 
188
188
  3.1.2 Get Daemon Control
@@ -311,11 +311,11 @@ DaemonIndex<:>Command<:>Specialist<:>Hostname<:>ProcessLimit<:>Priority<:>
311
311
  (ls -l | grep test) 1> test2.out 2>test2.err
312
312
 
313
313
  For example, to add testing command 'test2' into 'dscheck' for delayed mode execution on
314
- SLURM
314
+ PBS
315
315
 
316
- dsheck AC -CM test2 -HN SLURM
316
+ dsheck AC -CM test2 -HN PBS
317
317
 
318
- The command 'test2' must be executable at the current working directory on SLURM machines.
318
+ The command 'test2' must be executable at the current working directory on PBS machines.
319
319
 
320
320
 
321
321
  3.2.2 Get Check
@@ -591,7 +591,7 @@ Information options are used to pass information, one or multiple values, into
591
591
 
592
592
  -LH or -LocalHost, specify a local hostname to processes checks on the host for
593
593
  action -PC(-ProcessCheck). It defaults to '' to use the local host name. Specify
594
- SLURM or PBS to process batch jobs.
594
+ PBS to process batch jobs.
595
595
 
596
596
  -MT or -MaxrunTime, specify the maxmum run time for deamon mode. It defaults to 0
597
597
  for unlimit time. For examples, 5000 means seconds, and 1D means 1 day for 86400