rda-python-miscs 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_miscs/__init__.py +1 -0
- rda_python_miscs/bashqsub.py +213 -0
- rda_python_miscs/bashqsub.usg +64 -0
- rda_python_miscs/pgwget.py +186 -0
- rda_python_miscs/rdacp.py +212 -0
- rda_python_miscs/rdacp.usg +62 -0
- rda_python_miscs/rdakill.py +267 -0
- rda_python_miscs/rdakill.usg +36 -0
- rda_python_miscs/rdals.py +278 -0
- rda_python_miscs/rdals.usg +61 -0
- rda_python_miscs/rdamod.py +172 -0
- rda_python_miscs/rdamod.usg +51 -0
- rda_python_miscs/rdaown.py +169 -0
- rda_python_miscs/rdaown.usg +46 -0
- rda_python_miscs/rdaps.py +194 -0
- rda_python_miscs/rdaps.usg +29 -0
- rda_python_miscs/rdasub.py +117 -0
- rda_python_miscs/rdasub.usg +12 -0
- rda_python_miscs/rdazip.py +64 -0
- rda_python_miscs/rdazip.usg +20 -0
- rda_python_miscs/tcshqsub.py +213 -0
- rda_python_miscs/tcshqsub.usg +62 -0
- rda_python_miscs-1.0.1.dist-info/LICENSE +21 -0
- rda_python_miscs-1.0.1.dist-info/METADATA +17 -0
- rda_python_miscs-1.0.1.dist-info/RECORD +28 -0
- rda_python_miscs-1.0.1.dist-info/WHEEL +5 -0
- rda_python_miscs-1.0.1.dist-info/entry_points.txt +11 -0
- rda_python_miscs-1.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
|
|
2
|
+
Copy directories and files in the current or specified directories to a target
|
|
3
|
+
directory. The owner of the target directories and files is 'rdadata' with default
|
|
4
|
+
or specified modes.
|
|
5
|
+
|
|
6
|
+
Usage: rdacp [-D DiretoryMode] [-f] FromDirecctories/Files [-F FileMode] \
|
|
7
|
+
[-hf HtarFileName] [-fh FromHostName] [-h] [-r] [-R RecursiveLevel] \
|
|
8
|
+
[-t ToDirectory/FileName] [-th ToHostName] [-fl FileCountLimit]
|
|
9
|
+
|
|
10
|
+
- Option -D, changes mode of target directories. It defaults to "775";
|
|
11
|
+
|
|
12
|
+
- Option -f, mandatory option for directories and files to be copied from.
|
|
13
|
+
Unix command line wildcards are supported. Use './' or '*' for all
|
|
14
|
+
directories and files in the current directory to be copied.
|
|
15
|
+
The directories and files must be readable by user 'rdadata';
|
|
16
|
+
otherwise 'rdacp' will try to change file mode to make them readable.
|
|
17
|
+
The file names are htar member files if a htar file name is provided
|
|
18
|
+
by Option -hf;
|
|
19
|
+
|
|
20
|
+
- Option -fh, host name for directories and files to be copied from.
|
|
21
|
+
It defaults to local host unless htar file name is provded via Option
|
|
22
|
+
-hf, then it defaults to hpss;
|
|
23
|
+
|
|
24
|
+
- Option -F, changes mode of target files. It defaults to "664";
|
|
25
|
+
|
|
26
|
+
- Option -h, display this help document;
|
|
27
|
+
|
|
28
|
+
- Option -hf, provides a HTAR file name to retrieve its one or multiple
|
|
29
|
+
member files to local for option -fh HPSS, or to htar givens local files
|
|
30
|
+
onto HPSS for option -th HPSS;
|
|
31
|
+
|
|
32
|
+
- Option -r, copies directories and files recursively;
|
|
33
|
+
|
|
34
|
+
- Option -R, copies directories and files recursively up to the level provided
|
|
35
|
+
with this Option;
|
|
36
|
+
|
|
37
|
+
- Option -t, provides a target Directory/File name. Its value defaults to '.'.
|
|
38
|
+
Mutilple source files can not be copied if a single target file name is
|
|
39
|
+
specified.
|
|
40
|
+
|
|
41
|
+
- Option -th, host name for target directories and files.
|
|
42
|
+
It defaults to local host;
|
|
43
|
+
|
|
44
|
+
- Option -fl, file count limit in each htar file, default 0 means all files in
|
|
45
|
+
a single htar file; if > 0, it means to archive multiple htar files, with
|
|
46
|
+
each holding up to the given file count. The max value is 5000000;
|
|
47
|
+
|
|
48
|
+
This utility program can be executed anywhere. This help document is displayed if
|
|
49
|
+
either the source Directory/File Names are missed.
|
|
50
|
+
|
|
51
|
+
For examples, to copy every directory/file under the current directory to under
|
|
52
|
+
ds277.6 on host castle, you can
|
|
53
|
+
|
|
54
|
+
rdacp -r -t /PathTo/ds277.6/ -th castle -f *
|
|
55
|
+
|
|
56
|
+
To copy every directory/file under a given local directory to under ds277.6
|
|
57
|
+
on host castle, you can
|
|
58
|
+
|
|
59
|
+
rdacp -r -t /PathTo/ds277.6/ -th castle -f /PathTo/DirectoryName/
|
|
60
|
+
|
|
61
|
+
where if the ending '/' is missed, the DirectoryName itself is copied too.
|
|
62
|
+
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title: rdakill
|
|
6
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date: 10/24/2020
|
|
8
|
+
# 2025-03-10 transferred to package rda_python_miscs from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# Purpose: kill a local or batch process and its child processes for a given
|
|
11
|
+
# running process ID by 'rdadata'
|
|
12
|
+
#
|
|
13
|
+
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
14
|
+
#
|
|
15
|
+
##################################################################################
|
|
16
|
+
#
|
|
17
|
+
import re
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
from rda_python_common import PgSIG
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
from rda_python_common import PgDBI
|
|
25
|
+
|
|
26
|
+
RDAKILL = {
|
|
27
|
+
'a' : None, # application name
|
|
28
|
+
'h' : None, # hostname
|
|
29
|
+
'p' : 0, # process id to be killed
|
|
30
|
+
'P' : 0, # parent pid
|
|
31
|
+
'r' : 0, # 1 - reserved for exclusive, working with -s PEND only
|
|
32
|
+
'u' : None, # login user name
|
|
33
|
+
's' : None, # batch status to kill
|
|
34
|
+
'q' : None # batch partition/queue for SLURM/PBS, rda for default
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
#
|
|
38
|
+
# main function to run the application
|
|
39
|
+
#
|
|
40
|
+
def main():
|
|
41
|
+
|
|
42
|
+
optcnt = 0
|
|
43
|
+
option = None
|
|
44
|
+
argv = sys.argv[1:]
|
|
45
|
+
PgDBI.dssdb_dbname()
|
|
46
|
+
PgLOG.set_suid(PgLOG.PGLOG['EUID'])
|
|
47
|
+
PgLOG.set_help_path(__file__)
|
|
48
|
+
PgLOG.PGLOG['LOGFILE'] = "rdakill.log" # set different log file
|
|
49
|
+
PgLOG.cmdlog("rdakill {}".format(' '.join(argv)))
|
|
50
|
+
|
|
51
|
+
for arg in argv:
|
|
52
|
+
ms = re.match(r'-([ahpPqstu])$', arg)
|
|
53
|
+
if ms:
|
|
54
|
+
option = ms.group(1)
|
|
55
|
+
elif re.match(r'-r$', arg):
|
|
56
|
+
RDAKILL['r'] = 1
|
|
57
|
+
elif re.match(r'-\w+$', arg):
|
|
58
|
+
PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX)
|
|
59
|
+
elif option:
|
|
60
|
+
if RDAKILL[option]: PgLOG.pglog("{}: value passed to Option -{} already".format(arg, option), PgLOG.LGEREX)
|
|
61
|
+
if 'pPt'.find(option) > -1:
|
|
62
|
+
RDAKILL[option] = int(arg)
|
|
63
|
+
elif option == 'h':
|
|
64
|
+
RDAKILL[option] = PgLOG.get_short_host(arg)
|
|
65
|
+
else:
|
|
66
|
+
RDAKILL[option] = arg
|
|
67
|
+
option = None
|
|
68
|
+
optcnt += 1
|
|
69
|
+
else:
|
|
70
|
+
ms = re.match(r'^(\d+)$', arg)
|
|
71
|
+
if ms and RDAKILL['p']:
|
|
72
|
+
RDAKILL['p'] = int(ms.group(1)) # pid allow value only without leading option
|
|
73
|
+
optcnt += 1
|
|
74
|
+
else:
|
|
75
|
+
PgLOG.pglog(arg + ": pass in value without Option", PgLOG.LGEREX)
|
|
76
|
+
|
|
77
|
+
if not optcnt: PgLOG.show_usage("rdakill")
|
|
78
|
+
killloc = 1
|
|
79
|
+
if RDAKILL['h']:
|
|
80
|
+
PgFile.local_host_action(RDAKILL['h'], "kill processes", PgLOG.PGLOG['HOSTNAME'], PgLOG.LGEREX)
|
|
81
|
+
if not PgUtil.pgcmp(RDAKILL['h'], PgLOG.PGLOG['SLMNAME'], 1):
|
|
82
|
+
if not (RDAKILL['p'] or RDAKILL['s']):
|
|
83
|
+
PgLOG.pglog("Provide Batch ID or Job Status to kill SLURM jobs", PgLOG.LGEREX)
|
|
84
|
+
if RDAKILL['p']:
|
|
85
|
+
rdakill_slurm_batch(RDAKILL['p'])
|
|
86
|
+
else:
|
|
87
|
+
rdakill_slurm_status(RDAKILL['s'], RDAKILL['q'], RDAKILL['u'])
|
|
88
|
+
killloc = 0
|
|
89
|
+
elif not PgUtil.pgcmp(RDAKILL['h'], PgLOG.PGLOG['PBSNAME'], 1):
|
|
90
|
+
if not (RDAKILL['p'] or RDAKILL['s']):
|
|
91
|
+
PgLOG.pglog("Provide Batch ID or Job Status to kill PBS jobs", PgLOG.LGEREX)
|
|
92
|
+
if RDAKILL['p']:
|
|
93
|
+
rdakill_pbs_batch(RDAKILL['p'])
|
|
94
|
+
else:
|
|
95
|
+
rdakill_pbs_status(RDAKILL['s'], RDAKILL['q'], RDAKILL['u'])
|
|
96
|
+
killloc = 0
|
|
97
|
+
if killloc:
|
|
98
|
+
if not (RDAKILL['p'] or RDAKILL['P'] or RDAKILL['a']):
|
|
99
|
+
PgLOG.pglog("Specify process ID, parent PID or App Name to kill", PgLOG.LGEREX)
|
|
100
|
+
rdakill_processes(RDAKILL['p'], RDAKILL['P'], RDAKILL['a'], RDAKILL['u'])
|
|
101
|
+
|
|
102
|
+
PgLOG.cmdlog()
|
|
103
|
+
PgLOG.pgexit(0)
|
|
104
|
+
|
|
105
|
+
#
|
|
106
|
+
# kill processes for given condition
|
|
107
|
+
#
|
|
108
|
+
def rdakill_processes(pid, ppid, aname = None, uname = None, level = 0):
|
|
109
|
+
|
|
110
|
+
kcnt = 0
|
|
111
|
+
if pid:
|
|
112
|
+
cmd = "ps -p {} -f".format(pid)
|
|
113
|
+
elif ppid:
|
|
114
|
+
cmd = "ps --ppid {} -f".format(ppid)
|
|
115
|
+
elif uname:
|
|
116
|
+
cmd = "ps -u {} -f".format(uname)
|
|
117
|
+
else:
|
|
118
|
+
cmd = "ps -ef"
|
|
119
|
+
|
|
120
|
+
buf = PgLOG.pgsystem(cmd, PgLOG.LGWNEX, 20)
|
|
121
|
+
if buf:
|
|
122
|
+
for line in re.split('\n', buf):
|
|
123
|
+
ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line)
|
|
124
|
+
if ms:
|
|
125
|
+
uid = ms.group(1)
|
|
126
|
+
cid = int(ms.group(2))
|
|
127
|
+
pcid = int(ms.group(3))
|
|
128
|
+
cname = ms.group(4)
|
|
129
|
+
if pid and pid != cid: continue
|
|
130
|
+
if ppid and ppid != pcid: continue
|
|
131
|
+
if uname and not re.match(r'all$', uname, re.I) and uname != uid: continue
|
|
132
|
+
if aname and cname.find(aname) < 0: continue
|
|
133
|
+
kcnt += 1
|
|
134
|
+
rdakill_processes(0, cid, None, None, level+1)
|
|
135
|
+
kill_local_child(cid, uid, re.sub(r' +', ' ', line))
|
|
136
|
+
record_dscheck_interrupt(cid, PgLOG.PGLOG['HOSTNAME'])
|
|
137
|
+
|
|
138
|
+
if not (kcnt or level):
|
|
139
|
+
buf = "No process idendified to kill "
|
|
140
|
+
if RDAKILL['h']:
|
|
141
|
+
buf += "on " + RDAKILL['h']
|
|
142
|
+
else:
|
|
143
|
+
buf += "locally"
|
|
144
|
+
if PgLOG.PGLOG['CURBID']: buf += "; add Option '-h SLURM' if SLURM batch ID provided"
|
|
145
|
+
PgLOG.pglog(buf, PgLOG.LOGWRN)
|
|
146
|
+
|
|
147
|
+
#
|
|
148
|
+
# a local child process
|
|
149
|
+
def kill_local_child(pid, uid, line):
|
|
150
|
+
|
|
151
|
+
if PgSIG.check_process(pid):
|
|
152
|
+
cmd = PgLOG.get_local_command("kill -9 {}".format(pid), uid)
|
|
153
|
+
if PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 260): # 4+256
|
|
154
|
+
return PgLOG.pglog("Kill: " + line, PgLOG.LOGWRN)
|
|
155
|
+
elif PgSIG.check_process(pid):
|
|
156
|
+
return PgLOG.pglog("Error Kill: {}\n{}".format(line, PgLOG.PGLOG['SYSERR']), PgLOG.LOGWRN)
|
|
157
|
+
|
|
158
|
+
if not PgSIG.check_process(pid): PgLOG.pglog("Quit: " + line, PgLOG.LOGWRN)
|
|
159
|
+
|
|
160
|
+
#
|
|
161
|
+
# kill a slurm batch job
|
|
162
|
+
#
|
|
163
|
+
def rdakill_slurm_batch(bid):
|
|
164
|
+
|
|
165
|
+
ret = 0
|
|
166
|
+
stat = PgSIG.check_slurm_status(bid, PgLOG.LOGWRN)
|
|
167
|
+
if stat:
|
|
168
|
+
cmd = PgLOG.get_local_command("scancel {}".format(bid), stat['USER'])
|
|
169
|
+
ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 6)
|
|
170
|
+
if ret: record_dscheck_interrupt(bid, PgLOG.PGLOG['SLMNAME'])
|
|
171
|
+
else:
|
|
172
|
+
PgLOG.pglog("{}: cannot find SLURM batch ID".format(bid), PgLOG.LOGERR)
|
|
173
|
+
|
|
174
|
+
if not ret and PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX)
|
|
175
|
+
|
|
176
|
+
return ret
|
|
177
|
+
|
|
178
|
+
#
|
|
179
|
+
# kill SLURM batch jobs for given status
|
|
180
|
+
#
|
|
181
|
+
def rdakill_slurm_status(stat, part, uname):
|
|
182
|
+
|
|
183
|
+
if not part: part = 'rda'
|
|
184
|
+
bcmd = "sacct -o jobid,user,state -r {} -".format(part)
|
|
185
|
+
bcmd += ("u " + uname if uname else 'a')
|
|
186
|
+
|
|
187
|
+
lines = PgSIG.get_slurm_multiple(bcmd)
|
|
188
|
+
bcnt = len(lines['JOBID']) if lines else 0
|
|
189
|
+
pcnt = kcnt = 0
|
|
190
|
+
for i in range(bcnt):
|
|
191
|
+
if lines['STATE'][i] == stat:
|
|
192
|
+
pcnt += 1
|
|
193
|
+
kcnt += rdakill_slurm_batch(lines['JOBID'][i])
|
|
194
|
+
|
|
195
|
+
if pcnt > 0:
|
|
196
|
+
s = 's' if pcnt > 1 else ''
|
|
197
|
+
line = "{} of {} SLURM '{}' job{} Killed".format(kcnt, pcnt, stat, s)
|
|
198
|
+
else:
|
|
199
|
+
line = "No SLURM '{}' job found to kill".format(stat)
|
|
200
|
+
|
|
201
|
+
line += " in Partition '{}'".format(part)
|
|
202
|
+
if uname: line += " for " + uname
|
|
203
|
+
PgLOG.pglog(line, PgLOG.LOGWRN)
|
|
204
|
+
|
|
205
|
+
#
|
|
206
|
+
# kill a pbs batch job
|
|
207
|
+
#
|
|
208
|
+
def rdakill_pbs_batch(bid):
|
|
209
|
+
|
|
210
|
+
ret = 0
|
|
211
|
+
stat = PgSIG.get_pbs_info(bid, 0, PgLOG.LOGWRN)
|
|
212
|
+
if stat:
|
|
213
|
+
dcmd = 'qdel'
|
|
214
|
+
if PgLOG.PGLOG['HOSTTYPE'] == 'ch': dcmd += 'casper'
|
|
215
|
+
cmd = PgLOG.get_local_command("{} {}".format(dcmd, bid), stat['UserName'])
|
|
216
|
+
ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 7)
|
|
217
|
+
if ret: record_dscheck_interrupt(bid, PgLOG.PGLOG['PBSNAME'])
|
|
218
|
+
else:
|
|
219
|
+
PgLOG.pglog("{}: cannot find PBS batch ID".format(bid), PgLOG.LOGERR)
|
|
220
|
+
|
|
221
|
+
if not ret and PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX)
|
|
222
|
+
|
|
223
|
+
return ret
|
|
224
|
+
|
|
225
|
+
#
|
|
226
|
+
# kill PBS batch jobs for given status
|
|
227
|
+
#
|
|
228
|
+
def rdakill_pbs_status(stat, queue, uname):
|
|
229
|
+
|
|
230
|
+
if not queue: queue = 'rda'
|
|
231
|
+
qopts = ''
|
|
232
|
+
if uname:
|
|
233
|
+
qopts = "-u " + uname
|
|
234
|
+
if qopts: qopts += ' '
|
|
235
|
+
qopts += queue
|
|
236
|
+
lines = PgSIG.get_pbs_info(qopts, 1)
|
|
237
|
+
bcnt = len(lines['JobID'])
|
|
238
|
+
pcnt = kcnt = 0
|
|
239
|
+
for i in range(bcnt):
|
|
240
|
+
if stat != lines['State'][i]: continue
|
|
241
|
+
pcnt += 1
|
|
242
|
+
kcnt += rdakill_pbs_batch(lines['JobID'][i])
|
|
243
|
+
|
|
244
|
+
if pcnt > 0:
|
|
245
|
+
s = 's' if pcnt > 1 else ''
|
|
246
|
+
line = "{} of {} PBS '{}' job{} Killed".format(kcnt, pcnt, stat, s)
|
|
247
|
+
else:
|
|
248
|
+
line = "No PBS '{}' job found to kill".format(stat)
|
|
249
|
+
|
|
250
|
+
line += " in Queue '{}'".format(queue)
|
|
251
|
+
if uname: line += " for " + uname
|
|
252
|
+
PgLOG.pglog(line, PgLOG.LOGWRN)
|
|
253
|
+
|
|
254
|
+
#
|
|
255
|
+
# record a dscheck
|
|
256
|
+
#
|
|
257
|
+
def record_dscheck_interrupt(pid, host):
|
|
258
|
+
|
|
259
|
+
pgrec = PgDBI.pgget("dscheck", "cindex", "pid = {} AND hostname = '{}'".format(pid, host), PgLOG.LOGERR)
|
|
260
|
+
if pgrec:
|
|
261
|
+
record = {'chktime' : int(time.time()), 'status' : 'I', 'pid' : 0} # release lock
|
|
262
|
+
PgDBI.pgupdt("dscheck", record, "cindex = {}".format(pgrec['cindex']), PgLOG.LGEREX)
|
|
263
|
+
|
|
264
|
+
#
|
|
265
|
+
# call main() to start program
|
|
266
|
+
#
|
|
267
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
|
|
2
|
+
Kill one of multiple processes and theirs children for given local process ID or
|
|
3
|
+
other process information; kill one or multiple SLURM/PBS batch jobs for give batch
|
|
4
|
+
Job ID or Status. For killing SLURM/PBS batch jobs, you must login to cheyenne/caser
|
|
5
|
+
login nodes.
|
|
6
|
+
|
|
7
|
+
Usage: rdakill [-h HostName] [-p ProcessID] [-P ParentProcessID] \
|
|
8
|
+
[-s BatchStatus] [-u ProcessOwner] [-a ApplicationName]
|
|
9
|
+
|
|
10
|
+
- Option -a, application name of the process;
|
|
11
|
+
|
|
12
|
+
- Option -h, hostname the process is on. Omit it for local process,
|
|
13
|
+
but it is mandatory if the process id is a SLURM/PBS bactch id.
|
|
14
|
+
|
|
15
|
+
- Option -p, the process id or batch job id to be stopped.
|
|
16
|
+
|
|
17
|
+
- Option -P, the parent process id;
|
|
18
|
+
|
|
19
|
+
- Option -q, the SLURM Partition or PBS queue name. It defaults to 'rda';
|
|
20
|
+
|
|
21
|
+
- Option -s, the Batch Job Status; this is mantatory if batch id is not provided;
|
|
22
|
+
|
|
23
|
+
- Option -u, use login name for the process owner. It defaults to 'all' for
|
|
24
|
+
all user login names.
|
|
25
|
+
|
|
26
|
+
This help document is displayed if rdakill is executed without option.
|
|
27
|
+
|
|
28
|
+
For examples, to kill a process of a dsrqst process with pid 13199 locally,
|
|
29
|
+
|
|
30
|
+
rdakill -p 13199 -a dsrqst
|
|
31
|
+
|
|
32
|
+
The child processes under the dsrqst process, if any, are also terminated. To kill
|
|
33
|
+
a PBS bactch process with a bid = 334323,
|
|
34
|
+
|
|
35
|
+
rdakill -h PBS -p 334323
|
|
36
|
+
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title: rdals
|
|
6
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date: 10/20/2020
|
|
8
|
+
# 2025-03-10 transferred to package rda_python_miscs from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# Purpose: list files/directories in a local directory and show additional
|
|
11
|
+
# information recorded in RDADB if any
|
|
12
|
+
#
|
|
13
|
+
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
14
|
+
#
|
|
15
|
+
##################################################################################
|
|
16
|
+
#
|
|
17
|
+
import re
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
import glob
|
|
21
|
+
from os import path as op
|
|
22
|
+
from rda_python_common import PgLOG
|
|
23
|
+
from rda_python_common import PgDBI
|
|
24
|
+
from rda_python_common import PgUtil
|
|
25
|
+
from rda_python_common import PgSplit
|
|
26
|
+
|
|
27
|
+
# define some constants for rdals actions
|
|
28
|
+
DIDX = 3 # description column index
|
|
29
|
+
CLMT = 500 # reformat list if count reach this limit
|
|
30
|
+
WIDTHS = [0, 0, 0] # WIDTHS for formated display
|
|
31
|
+
ALIGNS = [0, 1, 1] # alignment, 0 - left; 1 - right
|
|
32
|
+
|
|
33
|
+
RDALS = {
|
|
34
|
+
'd' : 0, # 1 to list directory information only
|
|
35
|
+
'f' : 0, # 1 to list file information only
|
|
36
|
+
'N' : 0, # 1 to list files unformatted
|
|
37
|
+
'r' : 0, # 1 if recursive all
|
|
38
|
+
'R' : 0, # > 0 to set recursive limit
|
|
39
|
+
'D' : None, # specify delimiting symbols, default to ' '
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
LINFO = {
|
|
43
|
+
'files' : [],
|
|
44
|
+
'curdir' : None,
|
|
45
|
+
'tpath' : None,
|
|
46
|
+
'dhome' : None,
|
|
47
|
+
'dsid' : None,
|
|
48
|
+
'dcnt' : 0,
|
|
49
|
+
'gcnt' : 0,
|
|
50
|
+
'fcnt' : 0,
|
|
51
|
+
'pcnt' : 0,
|
|
52
|
+
'pgrecs' : []
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
#
|
|
56
|
+
# main function to run the application
|
|
57
|
+
#
|
|
58
|
+
def main():
|
|
59
|
+
|
|
60
|
+
PgDBI.view_dbinfo()
|
|
61
|
+
PgLOG.set_help_path(__file__)
|
|
62
|
+
PgLOG.PGLOG['LOGFILE'] = "rdals.log" # set different log file
|
|
63
|
+
LINFO['curdir'] = get_real_path(os.getcwd())
|
|
64
|
+
argv = sys.argv[1:]
|
|
65
|
+
PgLOG.pglog("rdals {} ({})".format(' '.join(argv), LINFO['curdir']))
|
|
66
|
+
option = defopt = 'l'
|
|
67
|
+
for arg in argv:
|
|
68
|
+
if re.match(r'-(h|-*help|\?)$', arg): PgLOG.show_usage("rdals")
|
|
69
|
+
ms = re.match(r'-(\w)$', arg)
|
|
70
|
+
if ms:
|
|
71
|
+
option = ms.group(1)
|
|
72
|
+
if option not in RDALS: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX)
|
|
73
|
+
if 'dfNr'.find(option) > -1:
|
|
74
|
+
RDALS[option] = 1
|
|
75
|
+
option = defopt
|
|
76
|
+
continue
|
|
77
|
+
if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX)
|
|
78
|
+
if option == 'l':
|
|
79
|
+
LINFO['files'].append(get_real_path(arg))
|
|
80
|
+
defopt = None
|
|
81
|
+
else:
|
|
82
|
+
if option == 'R':
|
|
83
|
+
RDALS[option] = int(arg)
|
|
84
|
+
else:
|
|
85
|
+
RDALS[option] = arg
|
|
86
|
+
option = defopt
|
|
87
|
+
|
|
88
|
+
if not LINFO['files']:
|
|
89
|
+
LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory
|
|
90
|
+
if not LINFO['files']:
|
|
91
|
+
sys.stderr.write(LINFO['curdir'] + ": Empty directory\n")
|
|
92
|
+
PgLOG.pgexit(1)
|
|
93
|
+
|
|
94
|
+
if not (RDALS['d'] or RDALS['f']):
|
|
95
|
+
RDALS['d'] = RDALS['f'] = 1 # list both directories and files as default
|
|
96
|
+
if not RDALS['D']: RDALS['D'] = '|' if RDALS['N'] else " " # default delimiter for no format display
|
|
97
|
+
if not RDALS['R'] and RDALS['r']: RDALS['R'] = 1000
|
|
98
|
+
|
|
99
|
+
display_top_list(LINFO['files']) # display or cache file/directory list
|
|
100
|
+
if LINFO['pcnt'] > 0: display_format_list() # if some left over
|
|
101
|
+
|
|
102
|
+
if (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) > 1:
|
|
103
|
+
msg = ''
|
|
104
|
+
if LINFO['dcnt'] > 0:
|
|
105
|
+
s = 's' if LINFO['dcnt'] > 1 else ''
|
|
106
|
+
msg += "{} Dataset{}".format(LINFO['dcnt'], s)
|
|
107
|
+
if LINFO['gcnt'] > 0:
|
|
108
|
+
s = 's' if LINFO['gcnt'] > 1 else ''
|
|
109
|
+
if msg: msg += " & "
|
|
110
|
+
msg += "{} Group{}".format(LINFO['gcnt'], s)
|
|
111
|
+
if LINFO['fcnt'] > 0:
|
|
112
|
+
s = 's' if LINFO['fcnt'] > 1 else ''
|
|
113
|
+
if msg: msg += " & "
|
|
114
|
+
msg += "{} File{}".format(LINFO['fcnt'], s)
|
|
115
|
+
print("Total {} displayed".format(msg))
|
|
116
|
+
elif (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) == 0:
|
|
117
|
+
sys.stderr.write((LINFO['tpath'] if LINFO['tpath'] else LINFO['curdir']) + ": No RDA data information found\n")
|
|
118
|
+
PgLOG.pgexit(1)
|
|
119
|
+
|
|
120
|
+
PgLOG.pgexit(0)
|
|
121
|
+
|
|
122
|
+
#
|
|
123
|
+
# display the top level list
|
|
124
|
+
#
|
|
125
|
+
def display_top_list(files):
|
|
126
|
+
|
|
127
|
+
for file in files:
|
|
128
|
+
|
|
129
|
+
if not op.exists(file):
|
|
130
|
+
sys.stderr.write(file + ": NOT exists\n")
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
isdir = 1 if op.isdir(file) else 0
|
|
134
|
+
display = 1
|
|
135
|
+
if isdir and re.search(r'/$', file):
|
|
136
|
+
display = 0 # do not display the directory info if it is ended by '/'
|
|
137
|
+
file = re.sub(r'/$', '', file)
|
|
138
|
+
|
|
139
|
+
if not re.match(r'^/', file): file = PgLOG.join_paths(LINFO['curdir'], file)
|
|
140
|
+
LINFO['tpath'] = (op.dirname(file) if display else file) + "/"
|
|
141
|
+
if display: display_line(file, isdir)
|
|
142
|
+
if isdir and (RDALS['R'] or not display or not LINFO['dsid']):
|
|
143
|
+
fs = sorted(glob.glob(file + "/*"))
|
|
144
|
+
display_list(fs, 1)
|
|
145
|
+
if LINFO['pcnt'] > CLMT: display_format_list()
|
|
146
|
+
|
|
147
|
+
#
|
|
148
|
+
# recursively display directory/file info
|
|
149
|
+
#
|
|
150
|
+
def display_list(files, level):
|
|
151
|
+
|
|
152
|
+
for file in files:
|
|
153
|
+
isdir = 1 if op.isdir(file) else 0
|
|
154
|
+
display_line(file, isdir)
|
|
155
|
+
if isdir and level < RDALS['R']:
|
|
156
|
+
fs = sorted(glob.glob(file + "/*"))
|
|
157
|
+
display_list(fs, level+1)
|
|
158
|
+
if LINFO['pcnt'] > CLMT: display_format_list()
|
|
159
|
+
|
|
160
|
+
#
|
|
161
|
+
# find dataset/group info; display or cache file
|
|
162
|
+
#
|
|
163
|
+
def display_line(file, isdir):
|
|
164
|
+
|
|
165
|
+
getwfile = 1
|
|
166
|
+
if LINFO['dsid'] and LINFO['dhome']:
|
|
167
|
+
ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file)
|
|
168
|
+
if ms:
|
|
169
|
+
wfile = ms.group(1)
|
|
170
|
+
getwfile = 0
|
|
171
|
+
if getwfile:
|
|
172
|
+
LINFO['dsid'] = PgUtil.find_dataset_id(file, logact = PgLOG.LOGWRN)
|
|
173
|
+
if LINFO['dsid'] == None: return # skip for missing dsid
|
|
174
|
+
|
|
175
|
+
pgrec = PgDBI.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(LINFO['dsid']), PgLOG.LGEREX)
|
|
176
|
+
if not pgrec: return None
|
|
177
|
+
|
|
178
|
+
LINFO['dhome'] = "{}/{}".format(PgLOG.PGLOG['DSDHOME'], LINFO['dsid'])
|
|
179
|
+
if LINFO['dhome'] == file:
|
|
180
|
+
file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1)
|
|
181
|
+
if RDALS['d']:
|
|
182
|
+
title = pgrec['title'] if pgrec['title'] else ''
|
|
183
|
+
display_record(["D" + file, pgrec['ns'], str(pgrec['nc']), title])
|
|
184
|
+
LINFO['dcnt'] += 1
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file)
|
|
188
|
+
if ms:
|
|
189
|
+
wfile = ms.group(1)
|
|
190
|
+
else:
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
if isdir:
|
|
194
|
+
if RDALS['d']: # check and display group info for directory
|
|
195
|
+
pgrec = PgDBI.pgget("dsgroup", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns",
|
|
196
|
+
"dsid = '{}' AND webpath = '{}'".format(LINFO['dsid'], wfile), PgLOG.LGEREX)
|
|
197
|
+
if pgrec:
|
|
198
|
+
file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1)
|
|
199
|
+
title = pgrec['title'] if pgrec['title'] else ''
|
|
200
|
+
display_record(["G" + file, pgrec['ns'], str(pgrec['nc']), title])
|
|
201
|
+
LINFO['gcnt'] += 1
|
|
202
|
+
|
|
203
|
+
elif RDALS['f']: # check and display file info
|
|
204
|
+
pgrec = PgSplit.pgget_wfile(LINFO['dsid'], "data_size, data_format, note",
|
|
205
|
+
"wfile = '{}'".format(wfile), PgLOG.LGEREX)
|
|
206
|
+
if pgrec:
|
|
207
|
+
if pgrec['note']:
|
|
208
|
+
note = re.sub(r'\n', ' ', pgrec['note']) # remove '\n' in note
|
|
209
|
+
else:
|
|
210
|
+
note = ''
|
|
211
|
+
file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1)
|
|
212
|
+
display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note])
|
|
213
|
+
LINFO['fcnt'] += 1
|
|
214
|
+
|
|
215
|
+
#
|
|
216
|
+
# display one file info
|
|
217
|
+
#
|
|
218
|
+
def display_record(disp):
|
|
219
|
+
|
|
220
|
+
disp[1] = get_float_string(disp[1])
|
|
221
|
+
if RDALS['N']:
|
|
222
|
+
print(RDALS['D'].join(disp))
|
|
223
|
+
else:
|
|
224
|
+
LINFO['pgrecs'].append(disp)
|
|
225
|
+
LINFO['pcnt'] += 1
|
|
226
|
+
for i in range(DIDX):
|
|
227
|
+
dlen = len(disp[i])
|
|
228
|
+
if dlen > WIDTHS[i]: WIDTHS[i] = dlen
|
|
229
|
+
|
|
230
|
+
#
|
|
231
|
+
# display cached list with format
|
|
232
|
+
#
|
|
233
|
+
def display_format_list():
|
|
234
|
+
|
|
235
|
+
for j in range(LINFO['pcnt']):
|
|
236
|
+
disp = LINFO['pgrecs'][j]
|
|
237
|
+
for i in range(DIDX):
|
|
238
|
+
if ALIGNS[i] == 1:
|
|
239
|
+
disp[i] = "{:>{}}".format(disp[i], WIDTHS[i])
|
|
240
|
+
else:
|
|
241
|
+
disp[i] = "{:{}}".format(disp[i], WIDTHS[i])
|
|
242
|
+
print(RDALS['D'].join(disp))
|
|
243
|
+
|
|
244
|
+
LINFO['pcnt'] = 0
|
|
245
|
+
|
|
246
|
+
#
|
|
247
|
+
# change size to floating point value with unit
|
|
248
|
+
#
|
|
249
|
+
def get_float_string(val):
|
|
250
|
+
|
|
251
|
+
units = ['B', 'K', 'M', 'G', 'T', 'P']
|
|
252
|
+
|
|
253
|
+
idx = 0
|
|
254
|
+
while val > 1000 and idx < 5:
|
|
255
|
+
val /= 1000
|
|
256
|
+
idx += 1
|
|
257
|
+
|
|
258
|
+
if idx > 0:
|
|
259
|
+
return "{:.2f}{}".format(val, units[idx])
|
|
260
|
+
else:
|
|
261
|
+
return "{}{}".format(val, units[idx])
|
|
262
|
+
|
|
263
|
+
#
|
|
264
|
+
# replace /glade to the real path /gpfs
|
|
265
|
+
#
|
|
266
|
+
def get_real_path(path):
|
|
267
|
+
|
|
268
|
+
if re.match(r'^/gpfs/u', path):
|
|
269
|
+
path = re.sub(r'^/gpfs', '/glade', path, 1)
|
|
270
|
+
elif re.match(r'^/gpfs/csfs1/', path):
|
|
271
|
+
path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1)
|
|
272
|
+
|
|
273
|
+
return path
|
|
274
|
+
|
|
275
|
+
#
|
|
276
|
+
# call main() to start program
|
|
277
|
+
#
|
|
278
|
+
if __name__ == "__main__": main()
|