rda-python-miscs 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_miscs/{rdacp.py → gdexcp.py} +165 -22
- rda_python_miscs/gdexcp.usg +145 -0
- rda_python_miscs/{rdakill.py → gdexkill.py} +10 -10
- rda_python_miscs/{rdakill.usg → gdexkill.usg} +9 -9
- rda_python_miscs/{rdamod.py → gdexmod.py} +16 -16
- rda_python_miscs/{rdamod.usg → gdexmod.usg} +13 -13
- rda_python_miscs/{rdaown.py → gdexown.py} +19 -19
- rda_python_miscs/{rdaown.usg → gdexown.usg} +12 -12
- rda_python_miscs/{rdaps.py → gdexps.py} +8 -8
- rda_python_miscs/{rdaps.usg → gdexps.usg} +8 -8
- rda_python_miscs/{rdasub.py → gdexsub.py} +6 -6
- rda_python_miscs/{rdasub.usg → gdexsub.usg} +7 -7
- rda_python_miscs/{rdazip.py → gdexzip.py} +8 -8
- rda_python_miscs/{rdazip.usg → gdexzip.usg} +7 -7
- {rda_python_miscs-3.0.1.dist-info → rda_python_miscs-3.0.3.dist-info}/METADATA +9 -9
- {rda_python_miscs-3.0.1.dist-info → rda_python_miscs-3.0.3.dist-info}/RECORD +20 -20
- rda_python_miscs-3.0.3.dist-info/entry_points.txt +20 -0
- rda_python_miscs/rdacp.usg +0 -77
- rda_python_miscs-3.0.1.dist-info/entry_points.txt +0 -20
- {rda_python_miscs-3.0.1.dist-info → rda_python_miscs-3.0.3.dist-info}/WHEEL +0 -0
- {rda_python_miscs-3.0.1.dist-info → rda_python_miscs-3.0.3.dist-info}/licenses/LICENSE +0 -0
- {rda_python_miscs-3.0.1.dist-info → rda_python_miscs-3.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
##################################################################################
|
|
3
|
-
# Title:
|
|
3
|
+
# Title: gdexcp
|
|
4
4
|
# Author: Zaihua Ji, zji@ucar.edu
|
|
5
5
|
# Date: 10/24/2020
|
|
6
6
|
# 2025-03-10 transferred to package rda_python_miscs from
|
|
7
7
|
# https://github.com/NCAR/rda-utility-programs.git
|
|
8
|
-
# Purpose: copy files locally and remotely by '
|
|
8
|
+
# Purpose: copy files locally and remotely by 'gdexdata'
|
|
9
9
|
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
10
10
|
##################################################################################
|
|
11
11
|
import re
|
|
@@ -14,16 +14,16 @@ import sys
|
|
|
14
14
|
from os import path as op
|
|
15
15
|
from rda_python_common.pg_file import PgFile
|
|
16
16
|
|
|
17
|
-
class
|
|
18
|
-
"""Copy files and directories locally or between remote hosts via '
|
|
17
|
+
class GdexCp(PgFile):
|
|
18
|
+
"""Copy files and directories locally or between remote hosts via 'gdexdata'.
|
|
19
19
|
|
|
20
20
|
Supports local-to-local, local-to-remote, remote-to-local, and Object Store /
|
|
21
|
-
Globus transfers. Target files are owned by '
|
|
21
|
+
Globus transfers. Target files are owned by 'gdexdata' and created with
|
|
22
22
|
configurable permission modes. Recursive copying is controlled by -r / -R.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
def __init__(self):
|
|
26
|
-
"""Initialize
|
|
26
|
+
"""Initialize GdexCp with default copy options and runtime state."""
|
|
27
27
|
super().__init__()
|
|
28
28
|
self.RDACP = {
|
|
29
29
|
'fh': None, # from host name, default to localhost
|
|
@@ -33,15 +33,21 @@ class RdaCp(PgFile):
|
|
|
33
33
|
'fp': None, # from Globus endpoint
|
|
34
34
|
'tp': None, # to Globus endpoint
|
|
35
35
|
'f': [], # from file names
|
|
36
|
+
'i': None, # input file holding a list of from file names, one per line
|
|
36
37
|
't': None, # to file name
|
|
37
38
|
'r': 0, # 1 if recursive all
|
|
38
39
|
'R': 0, # > 0 to set recursive limit
|
|
40
|
+
'o': 0, # 1 to force a downloaded file owned by COMMONUSER; needs -fp
|
|
41
|
+
'O': 0, # 1 to override an existing target file of the same size
|
|
42
|
+
'm': 1, # number of multiple processes to copy files in parallel
|
|
43
|
+
'd': 0, # 1 to add a dscheck record for delayed PBS batch process
|
|
39
44
|
'F': 0o664, # to file mode, default to 664
|
|
40
45
|
'D': 0o775, # to directory mode, default to 775
|
|
41
46
|
}
|
|
42
47
|
self.CINFO = {
|
|
43
48
|
'tcnt': 0,
|
|
44
49
|
'htcnt': 0,
|
|
50
|
+
'pcnt': 0, # count of dispatched child processes for option -m
|
|
45
51
|
'cpflag': 0, # 1 file only, 2 directory only, 3 both
|
|
46
52
|
'cpstr': ['', 'Files', 'Directories', 'Files/Directories'],
|
|
47
53
|
'fpath': None,
|
|
@@ -64,8 +70,8 @@ class RdaCp(PgFile):
|
|
|
64
70
|
argv = sys.argv[1:]
|
|
65
71
|
self.set_suid(self.PGLOG['EUID'])
|
|
66
72
|
self.set_help_path(__file__)
|
|
67
|
-
self.PGLOG['LOGFILE'] = "
|
|
68
|
-
self.cmdlog("
|
|
73
|
+
self.PGLOG['LOGFILE'] = "gdexcp.log" # set different log file
|
|
74
|
+
self.cmdlog("gdexcp {} ({})".format(' '.join(argv), self.CINFO['curdir']))
|
|
69
75
|
defopt = option = 'f'
|
|
70
76
|
for arg in argv:
|
|
71
77
|
if re.match(r'-(h|-help)$', arg, re.I):
|
|
@@ -75,8 +81,8 @@ class RdaCp(PgFile):
|
|
|
75
81
|
if ms:
|
|
76
82
|
option = ms.group(1)
|
|
77
83
|
if option not in self.RDACP: self.pglog(arg + ": Unknown Option", self.LGEREX)
|
|
78
|
-
if option
|
|
79
|
-
self.RDACP[
|
|
84
|
+
if option in ('r', 'o', 'd', 'O'):
|
|
85
|
+
self.RDACP[option] = 1
|
|
80
86
|
option = None
|
|
81
87
|
continue
|
|
82
88
|
if not option: self.pglog(arg + ": Value provided without option", self.LGEREX)
|
|
@@ -84,7 +90,7 @@ class RdaCp(PgFile):
|
|
|
84
90
|
self.RDACP['f'].append(arg)
|
|
85
91
|
defopt = None
|
|
86
92
|
else:
|
|
87
|
-
if option
|
|
93
|
+
if option in ('R', 'm'):
|
|
88
94
|
self.RDACP[option] = int(arg)
|
|
89
95
|
elif option in 'FD':
|
|
90
96
|
self.RDACP[option] = self.base2int(arg, 8)
|
|
@@ -95,8 +101,29 @@ class RdaCp(PgFile):
|
|
|
95
101
|
elif option == 'fh':
|
|
96
102
|
self.CINFO['fhost'] = arg + '-'
|
|
97
103
|
option = defopt
|
|
98
|
-
if
|
|
99
|
-
|
|
104
|
+
if self.RDACP['i']: self.add_input_files(self.RDACP['i'])
|
|
105
|
+
if dohelp or not self.RDACP['f']: self.show_usage("gdexcp")
|
|
106
|
+
|
|
107
|
+
# read source paths from an input file and append them to the -f list
|
|
108
|
+
def add_input_files(self, infile):
|
|
109
|
+
"""Append source paths read from an input file to the -f source list.
|
|
110
|
+
|
|
111
|
+
Each non-empty line in the input file is treated as one source path;
|
|
112
|
+
leading/trailing whitespace is stripped and lines starting with '#' are
|
|
113
|
+
ignored as comments.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
infile (str): Path to the input file holding one source path per line.
|
|
117
|
+
"""
|
|
118
|
+
finfo = self.check_local_file(infile, 0, self.LGWNEX)
|
|
119
|
+
if not finfo: self.pglog("{}: Input file of -i not found".format(infile), self.LGEREX)
|
|
120
|
+
fd = open(infile, 'r')
|
|
121
|
+
for line in fd:
|
|
122
|
+
line = line.strip()
|
|
123
|
+
if not line or line[0] == '#': continue
|
|
124
|
+
self.RDACP['f'].append(line)
|
|
125
|
+
fd.close()
|
|
126
|
+
|
|
100
127
|
# function to start actions
|
|
101
128
|
def start_actions(self):
|
|
102
129
|
"""Validate copy targets, configure host/bucket/endpoint context, and dispatch copies.
|
|
@@ -106,7 +133,7 @@ class RdaCp(PgFile):
|
|
|
106
133
|
when specified, then calls copy_top_list. Logs a summary count on completion.
|
|
107
134
|
"""
|
|
108
135
|
self.dssdb_dbname()
|
|
109
|
-
self.validate_decs_group('
|
|
136
|
+
self.validate_decs_group('gdexcp', self.PGLOG['CURUID'], 1)
|
|
110
137
|
if not self.RDACP['R'] and self.RDACP['r']: self.RDACP['R'] = 1000
|
|
111
138
|
if not self.RDACP['t']:
|
|
112
139
|
self.CINFO['tpath'] = self.RDACP['t'] = "."
|
|
@@ -132,7 +159,23 @@ class RdaCp(PgFile):
|
|
|
132
159
|
self.PGLOG['BACKUPEP'] = self.RDACP['fp']
|
|
133
160
|
elif self.RDACP['tp']:
|
|
134
161
|
self.PGLOG['BACKUPEP'] = self.RDACP['tp']
|
|
162
|
+
if self.RDACP['o']:
|
|
163
|
+
if not self.RDACP['fp']:
|
|
164
|
+
self.pglog("-o: works only when source Globus endpoint -fp is provided", self.LGEREX)
|
|
165
|
+
if self.RDACP['th'] or self.RDACP['tp'] or self.RDACP['tb']:
|
|
166
|
+
self.pglog("-o: works only for downloading to local files (no -th/-tp/-tb)", self.LGEREX)
|
|
167
|
+
if self.RDACP['m'] < 1: self.RDACP['m'] = 1
|
|
168
|
+
if self.RDACP['m'] > 16:
|
|
169
|
+
self.pglog("-m {}: process count too large, capped at 16".format(self.RDACP['m']), self.LOGWRN)
|
|
170
|
+
self.RDACP['m'] = 16
|
|
171
|
+
if self.RDACP['d']:
|
|
172
|
+
self.add_delayed_check()
|
|
173
|
+
self.cmdlog()
|
|
174
|
+
return
|
|
175
|
+
if self.RDACP['m'] > 1:
|
|
176
|
+
self.start_none_daemon('gdexcp', '', self.PGLOG['CURUID'], self.RDACP['m'], 120)
|
|
135
177
|
self.copy_top_list(self.RDACP['f'])
|
|
178
|
+
if self.RDACP['m'] > 1: self.check_child(None, 0, self.LOGWRN, 1)
|
|
136
179
|
hinfo = ''
|
|
137
180
|
if self.RDACP['fh']: hinfo += " From " + self.RDACP['fh']
|
|
138
181
|
if self.RDACP['th']: hinfo += " To " + self.RDACP['th']
|
|
@@ -175,7 +218,7 @@ class RdaCp(PgFile):
|
|
|
175
218
|
if not re.match(r'^/', file): file = self.join_paths(self.CINFO['curdir'], file)
|
|
176
219
|
self.CINFO['fpath'] = (file if dosub else op.dirname(file)) + "/"
|
|
177
220
|
if info['isfile']:
|
|
178
|
-
self.CINFO['tcnt'] += self.
|
|
221
|
+
self.CINFO['tcnt'] += self.dispatch_copy(file, info)
|
|
179
222
|
elif dosub or self.RDACP['R']:
|
|
180
223
|
flist = self.gdex_glob(file, self.RDACP['fh'], 0, self.LGWNEX)
|
|
181
224
|
if flist: self.copy_list(flist, 1, file)
|
|
@@ -197,7 +240,7 @@ class RdaCp(PgFile):
|
|
|
197
240
|
fcnt = 0
|
|
198
241
|
for file in tlist:
|
|
199
242
|
if tlist[file]['isfile']:
|
|
200
|
-
fcnt += self.
|
|
243
|
+
fcnt += self.dispatch_copy(file, tlist[file])
|
|
201
244
|
self.CINFO['cpflag'] |= (1 if tlist[file]['isfile'] else 2)
|
|
202
245
|
elif level < self.RDACP['R']:
|
|
203
246
|
flist = self.gdex_glob(file, self.RDACP['fh'], 0, self.LGWNEX)
|
|
@@ -206,20 +249,50 @@ class RdaCp(PgFile):
|
|
|
206
249
|
self.pglog("{}{}: {} {} copied from directory".format(self.CINFO['fhost'], cdir, fcnt, self.CINFO['cpstr'][self.CINFO['cpflag']]), self.LOGWRN)
|
|
207
250
|
self.CINFO['tcnt'] += fcnt
|
|
208
251
|
|
|
252
|
+
# copy one file, forking a child process when running with option -m
|
|
253
|
+
def dispatch_copy(self, fromfile, finfo):
|
|
254
|
+
"""Copy one file, dispatching the copy to a child process when -m > 1.
|
|
255
|
+
|
|
256
|
+
With a single process (-m 1) the file is copied in line. With multiple
|
|
257
|
+
processes the copy is forked to a child via the PgSIG process pool (up to
|
|
258
|
+
RDACP['m'] children run concurrently); the parent records one dispatched
|
|
259
|
+
file and continues traversing, while each child performs the copy, logs
|
|
260
|
+
its own result, and exits.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
fromfile (str): Absolute source file path.
|
|
264
|
+
finfo (dict): Source file-info dict (with 'isfile' and 'data_size').
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
int: 1 if the file was copied (single process) or dispatched to a
|
|
268
|
+
child process (-m > 1), 0 otherwise.
|
|
269
|
+
"""
|
|
270
|
+
if self.RDACP['m'] < 2: return self.copy_file(fromfile, finfo)
|
|
271
|
+
stat = self.start_child("gdexcp_{}".format(self.CINFO['pcnt']), self.LOGWRN, 1)
|
|
272
|
+
if stat <= 0: self.pglog("{}: cannot start child process to copy".format(fromfile), self.LGEREX)
|
|
273
|
+
if self.PGSIG['PPID'] > 1: # in child process
|
|
274
|
+
self.copy_file(fromfile, finfo)
|
|
275
|
+
sys.exit(0)
|
|
276
|
+
self.CINFO['pcnt'] += 1 # in parent process; child already dropped its DB link
|
|
277
|
+
return 1
|
|
278
|
+
|
|
209
279
|
# copy one file
|
|
210
|
-
def copy_file(self, fromfile,
|
|
280
|
+
def copy_file(self, fromfile, finfo):
|
|
211
281
|
"""Resolve the destination path for one source file and perform the copy.
|
|
212
282
|
|
|
213
283
|
When a target directory is set (tpath), strips the source base path prefix
|
|
214
284
|
and joins the remainder to tpath. Otherwise copies directly to the -t value.
|
|
285
|
+
Skips the copy when the target already exists with the same size as the
|
|
286
|
+
source, unless -O is given to override an existing same-size target.
|
|
215
287
|
|
|
216
288
|
Args:
|
|
217
289
|
fromfile (str): Absolute source file path.
|
|
218
|
-
|
|
290
|
+
finfo (dict): Source file-info dict (with 'isfile' and 'data_size').
|
|
219
291
|
|
|
220
292
|
Returns:
|
|
221
293
|
int: 1 if the file was copied successfully, 0 otherwise.
|
|
222
294
|
"""
|
|
295
|
+
isfile = finfo['isfile']
|
|
223
296
|
if self.CINFO['tpath']:
|
|
224
297
|
fname = re.sub(r'^{}'.format(self.CINFO['fpath']), '', fromfile)
|
|
225
298
|
if isfile:
|
|
@@ -228,14 +301,84 @@ class RdaCp(PgFile):
|
|
|
228
301
|
tofile = self.CINFO['tpath'] + '/'
|
|
229
302
|
else:
|
|
230
303
|
tofile = self.RDACP['t']
|
|
231
|
-
|
|
304
|
+
if isfile and not self.RDACP['O']:
|
|
305
|
+
tinfo = self.check_gdex_file(tofile, self.RDACP['th'], 0, self.LGWNEX)
|
|
306
|
+
if tinfo and tinfo['data_size'] == finfo['data_size']:
|
|
307
|
+
self.pglog("{}{}: Target exists with same size, skip copying".format(self.CINFO['thost'], tofile), self.LOGWRN)
|
|
308
|
+
return 0
|
|
309
|
+
if self.RDACP['o']: return self.force_owner_copy(tofile, fromfile)
|
|
310
|
+
logact = self.LGWNEX | (self.OVRIDE if self.RDACP['O'] else 0)
|
|
311
|
+
return (1 if self.copy_gdex_file(tofile, fromfile, self.RDACP['th'], self.RDACP['fh'], logact) else 0)
|
|
312
|
+
|
|
313
|
+
# copy one file from a Globus endpoint and force COMMONUSER ownership
|
|
314
|
+
def force_owner_copy(self, tofile, fromfile):
|
|
315
|
+
"""Download a Globus file via a tmp file so the final copy is owned by COMMONUSER.
|
|
316
|
+
|
|
317
|
+
A Globus endpoint dumps the local file owned by the endpoint's mapped user
|
|
318
|
+
rather than COMMONUSER ('gdexdata'). This downloads to a tmp file under
|
|
319
|
+
PGLOG['TMPPATH'], makes it group readable/writable as its owner via the
|
|
320
|
+
pgstart_<user> setuid wrapper, then copies it locally so the final file is
|
|
321
|
+
owned by COMMONUSER, and removes the tmp file.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
tofile (str): Final local destination path.
|
|
325
|
+
fromfile (str): Source file path on the Globus endpoint.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
int: 1 if the file was copied successfully, 0 otherwise.
|
|
329
|
+
"""
|
|
330
|
+
tmpfile = self.join_paths(self.PGLOG['TMPPATH'], "{}.{}".format(op.basename(fromfile), os.getpid()))
|
|
331
|
+
if not self.copy_gdex_file(tmpfile, fromfile, self.RDACP['th'], self.RDACP['fh'], self.LGWNEX): return 0
|
|
332
|
+
finfo = self.check_local_file(tmpfile, 2, self.LGWNEX)
|
|
333
|
+
owner = finfo['logname'] if finfo else None
|
|
334
|
+
if owner and owner != self.PGLOG['COMMONUSER']:
|
|
335
|
+
self.pgsystem(self.get_local_command("chmod g+rw " + tmpfile, owner), self.LGWNEX)
|
|
336
|
+
ret = self.copy_gdex_file(tofile, tmpfile, self.RDACP['th'], None, self.LGWNEX)
|
|
337
|
+
self.delete_local_file(tmpfile, self.LGWNEX)
|
|
338
|
+
return (1 if ret else 0)
|
|
339
|
+
|
|
340
|
+
# add a dscheck record so this gdexcp command runs later as a PBS batch job
|
|
341
|
+
def add_delayed_check(self):
|
|
342
|
+
"""Queue this gdexcp invocation as a delayed PBS batch job via a dscheck record.
|
|
343
|
+
|
|
344
|
+
Records the current command (with the -d flag stripped so the batch run
|
|
345
|
+
performs the actual copy) into the RDADB dscheck table for the dscheck
|
|
346
|
+
daemon to later submit to PBS through bashqsub/tcshqsub. The qsub resource
|
|
347
|
+
option always sets a 24 hour walltime; when -m > 1 it also reserves a single
|
|
348
|
+
node with (number of processes) cpus and 1gb of memory per cpu.
|
|
349
|
+
"""
|
|
350
|
+
argv = [arg for arg in sys.argv[1:] if arg != '-d']
|
|
351
|
+
argstr = self.argv_to_string(argv, 1)
|
|
352
|
+
argextra = None
|
|
353
|
+
if len(argstr) > 100:
|
|
354
|
+
argextra = argstr[100:]
|
|
355
|
+
argstr = argstr[0:100]
|
|
356
|
+
record = {
|
|
357
|
+
'command': 'gdexcp',
|
|
358
|
+
'argv': argstr,
|
|
359
|
+
'specialist': self.PGLOG['CURUID'],
|
|
360
|
+
'workdir': self.CINFO['curdir'],
|
|
361
|
+
'oindex': 0,
|
|
362
|
+
'otype': '',
|
|
363
|
+
'action': None,
|
|
364
|
+
'dsid': None,
|
|
365
|
+
'mcount': 1,
|
|
366
|
+
}
|
|
367
|
+
(record['date'], record['time']) = self.get_date_time()
|
|
368
|
+
if argextra: record['argextra'] = argextra
|
|
369
|
+
qoptions = "-l walltime=24:00:00"
|
|
370
|
+
if self.RDACP['m'] > 1:
|
|
371
|
+
qoptions += ",select=1:ncpus={0}:mem={0}gb".format(self.RDACP['m'])
|
|
372
|
+
record['qoptions'] = qoptions
|
|
373
|
+
cidx = self.pgadd("dscheck", record, self.LGWNEX|self.AUTOID)
|
|
374
|
+
self.pglog("Chk{}: gdexcp {} added for delayed batch process".format(cidx, argstr), self.LOGWRN)
|
|
232
375
|
|
|
233
376
|
# main function to execute this script
|
|
234
377
|
def main():
|
|
235
|
-
"""Entry point: instantiate
|
|
378
|
+
"""Entry point: instantiate GdexCp, parse arguments, run, and exit."""
|
|
236
379
|
from rda_python_setuid.setup_guide import show_setup_guide
|
|
237
|
-
object =
|
|
238
|
-
show_setup_guide(object, 'rda_python_miscs', ['
|
|
380
|
+
object = GdexCp()
|
|
381
|
+
show_setup_guide(object, 'rda_python_miscs', ['gdexcp', 'gdexkill', 'gdexmod'])
|
|
239
382
|
object.read_parameters()
|
|
240
383
|
object.start_actions()
|
|
241
384
|
object.pgexit(0)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
|
|
2
|
+
Name: gdexcp - copy files and directories as user 'gdexdata'
|
|
3
|
+
|
|
4
|
+
Copy files and directories to a target location. The source and the target may
|
|
5
|
+
each reside on the local host, a remote host, an Object Store bucket, or a Globus
|
|
6
|
+
endpoint. Target files are owned by 'gdexdata' and created with configurable
|
|
7
|
+
permission modes.
|
|
8
|
+
|
|
9
|
+
Usage: gdexcp [-f] FromDirectories/Files [-t ToDirectory/FileName] \
|
|
10
|
+
[-i InputFile] [-m ProcessCount] [-d] [-r] [-R RecursiveLevel] \
|
|
11
|
+
[-fh FromHostName] [-th ToHostName] \
|
|
12
|
+
[-fb FromBucket] [-tb ToBucket] \
|
|
13
|
+
[-fp FromGlobusEndpoint] [-tp ToGlobusEndpoint] \
|
|
14
|
+
[-F FileMode] [-D DirectoryMode] [-o] [-O] [-h]
|
|
15
|
+
|
|
16
|
+
gdexcp can be run from any directory. Usage is displayed when no source paths
|
|
17
|
+
are given.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
SOURCE AND TARGET
|
|
21
|
+
|
|
22
|
+
-f FromDirectories/Files
|
|
23
|
+
Source directories and/or files to copy. This is the default option,
|
|
24
|
+
so paths may be given without the -f flag. Shell wildcards are
|
|
25
|
+
supported; use './' or '*' to copy everything in the current directory.
|
|
26
|
+
A trailing '/' on a source directory path copies the contents of that
|
|
27
|
+
directory (as a file list) rather than the directory entry itself.
|
|
28
|
+
Source paths must be readable by user 'gdexdata'; gdexcp attempts to
|
|
29
|
+
fix the mode when they are not.
|
|
30
|
+
|
|
31
|
+
-i InputFile
|
|
32
|
+
A file holding a list of source paths to copy, one path per line.
|
|
33
|
+
Blank lines and lines starting with '#' are ignored. The paths read
|
|
34
|
+
are appended to the -f source list.
|
|
35
|
+
|
|
36
|
+
-t ToDirectory/FileName
|
|
37
|
+
Target directory or file name. Defaults to '.' (the current
|
|
38
|
+
directory). A trailing '/' forces the target to be treated as a
|
|
39
|
+
directory. Multiple source files cannot be copied to a single target
|
|
40
|
+
file name.
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
SOURCE/TARGET LOCATIONS (default to the local host)
|
|
44
|
+
|
|
45
|
+
-fh FromHostName host name where the source files reside.
|
|
46
|
+
-th ToHostName host name where the target files are written.
|
|
47
|
+
-fb FromBucket Object Store bucket name for the source files.
|
|
48
|
+
-tb ToBucket Object Store bucket name for the target files.
|
|
49
|
+
-fp FromGlobusEndpoint Globus endpoint for the source files.
|
|
50
|
+
-tp ToGlobusEndpoint Globus endpoint for the target files.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
COPY BEHAVIOR
|
|
54
|
+
|
|
55
|
+
-r Copy directories and files recursively, with no depth limit.
|
|
56
|
+
|
|
57
|
+
-R RecursiveLevel
|
|
58
|
+
Copy recursively up to the given depth. -R 1 copies only the
|
|
59
|
+
immediate contents of each source directory.
|
|
60
|
+
|
|
61
|
+
-m ProcessCount
|
|
62
|
+
Number of processes used to copy files in parallel. Defaults to 1.
|
|
63
|
+
When greater than 1, the source files are distributed across that many
|
|
64
|
+
concurrent child processes. Capped at 16; a larger value is reduced to
|
|
65
|
+
16 with a warning.
|
|
66
|
+
|
|
67
|
+
-d Add a dscheck record so this gdexcp command runs later as a delayed PBS
|
|
68
|
+
batch job (submitted by the dscheck daemon via bashqsub / tcshqsub).
|
|
69
|
+
The qsub resource always sets a 24 hour walltime; when -m is greater
|
|
70
|
+
than 1 it also reserves a single node with (ProcessCount) cpus and 1gb
|
|
71
|
+
of memory per cpu.
|
|
72
|
+
|
|
73
|
+
-o Force a downloaded file to be owned by 'gdexdata'. A Globus endpoint
|
|
74
|
+
writes the local file owned by the endpoint's mapped user; with -o the
|
|
75
|
+
file is downloaded to a tmp file, then copied locally so the final file
|
|
76
|
+
is owned by 'gdexdata'. Only valid together with -fp and for
|
|
77
|
+
downloading to local files (not with -th/-tp/-tb).
|
|
78
|
+
|
|
79
|
+
-O Override an existing target. By default a source file is skipped when
|
|
80
|
+
the target already exists with the same size; give -O to copy it anyway
|
|
81
|
+
and overwrite the existing target.
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
TARGET PERMISSIONS (octal notation)
|
|
85
|
+
|
|
86
|
+
-F FileMode permission mode for target files. Defaults to 664.
|
|
87
|
+
-D DirectoryMode permission mode for target directories. Defaults to 775.
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
MISCELLANEOUS
|
|
91
|
+
|
|
92
|
+
-h Display this help document.
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
NOTES
|
|
96
|
+
|
|
97
|
+
- A trailing '/' on a source directory path copies the contents of that
|
|
98
|
+
directory rather than the directory entry itself.
|
|
99
|
+
- By default an unchanged target (same size) is skipped; use -O to overwrite.
|
|
100
|
+
- If a Globus endpoint (-fp/-tp) is locally accessible, a direct local copy
|
|
101
|
+
(omitting -fp/-tp and giving the local path) is faster, avoiding the Globus
|
|
102
|
+
transfer overhead.
|
|
103
|
+
- A delayed batch job (-d) is given a 24 hour walltime. Do not submit a single
|
|
104
|
+
batch job to copy too many files at once; if the copy cannot finish within 24
|
|
105
|
+
hours the job is killed. Split a very large copy into multiple -d jobs (and/or
|
|
106
|
+
raise -m) so each one completes well within the walltime.
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
EXAMPLES
|
|
110
|
+
|
|
111
|
+
1. Copy all files and subdirectories under the current directory to a remote
|
|
112
|
+
host:
|
|
113
|
+
|
|
114
|
+
gdexcp -r -f * -t /PathTo/d277006/ -th castle
|
|
115
|
+
|
|
116
|
+
2. Copy the contents of a local directory to a remote location (trailing '/'
|
|
117
|
+
on the source omits the directory entry itself):
|
|
118
|
+
|
|
119
|
+
gdexcp -r -f /PathTo/DirectoryName/ -t /PathTo/d277006/ -th castle
|
|
120
|
+
|
|
121
|
+
Without the trailing '/', DirectoryName itself is also copied:
|
|
122
|
+
|
|
123
|
+
gdexcp -r -f /PathTo/DirectoryName -t /PathTo/d277006/ -th castle
|
|
124
|
+
|
|
125
|
+
3. Copy a single file to an Object Store bucket:
|
|
126
|
+
|
|
127
|
+
gdexcp -f /PathTo/myfile.nc -tb my-bucket -t myfile.nc
|
|
128
|
+
|
|
129
|
+
4. Copy files from a remote host to the local current directory:
|
|
130
|
+
|
|
131
|
+
gdexcp -fh castle -f /PathTo/d277006/myfile.nc
|
|
132
|
+
|
|
133
|
+
5. Download a file from a source Globus endpoint and force the local file to
|
|
134
|
+
be owned by 'gdexdata' (-o requires -fp):
|
|
135
|
+
|
|
136
|
+
gdexcp -fp gdex-quasar -f /d277006/myfile.nc -t /PathTo/myfile.nc -o
|
|
137
|
+
|
|
138
|
+
6. Copy the source paths listed in an input file using 4 parallel processes:
|
|
139
|
+
|
|
140
|
+
gdexcp -i filelist.txt -t /PathTo/d277006/ -m 4
|
|
141
|
+
|
|
142
|
+
7. Queue a delayed PBS batch job to copy a directory in parallel; the dscheck
|
|
143
|
+
daemon submits it later, reserving one node with 4 cpus and 4gb of memory:
|
|
144
|
+
|
|
145
|
+
gdexcp -r -f /PathTo/DirectoryName/ -t /PathTo/d277006/ -m 4 -d
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
##################################################################################
|
|
3
|
-
# Title:
|
|
3
|
+
# Title: gdexkill
|
|
4
4
|
# Author: Zaihua Ji, zji@ucar.edu
|
|
5
5
|
# Date: 10/24/2020
|
|
6
6
|
# 2025-03-10 transferred to package rda_python_miscs from
|
|
7
7
|
# https://github.com/NCAR/rda-utility-programs.git
|
|
8
8
|
# Purpose: kill a local or batch process and its child processes for a given
|
|
9
|
-
# running process ID by '
|
|
9
|
+
# running process ID by 'gdexdata'
|
|
10
10
|
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
11
11
|
##################################################################################
|
|
12
12
|
import re
|
|
@@ -14,7 +14,7 @@ import sys
|
|
|
14
14
|
import time
|
|
15
15
|
from rda_python_common.pg_file import PgFile
|
|
16
16
|
|
|
17
|
-
class
|
|
17
|
+
class GdexKill(PgFile):
|
|
18
18
|
"""Kill local processes or PBS batch jobs by process ID, parent PID, or status.
|
|
19
19
|
|
|
20
20
|
For local processes, sends SIGKILL (-9) to the matched process and all its
|
|
@@ -24,7 +24,7 @@ class RdaKill(PgFile):
|
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
26
|
def __init__(self):
|
|
27
|
-
"""Initialize
|
|
27
|
+
"""Initialize GdexKill with default kill options."""
|
|
28
28
|
super().__init__()
|
|
29
29
|
self.RDAKILL = {
|
|
30
30
|
'a': None, # application name
|
|
@@ -52,8 +52,8 @@ class RdaKill(PgFile):
|
|
|
52
52
|
self.dssdb_dbname()
|
|
53
53
|
self.set_suid(self.PGLOG['EUID'])
|
|
54
54
|
self.set_help_path(__file__)
|
|
55
|
-
self.PGLOG['LOGFILE'] = "
|
|
56
|
-
self.cmdlog("
|
|
55
|
+
self.PGLOG['LOGFILE'] = "gdexkill.log" # set different log file
|
|
56
|
+
self.cmdlog("gdexkill {}".format(' '.join(argv)))
|
|
57
57
|
for arg in argv:
|
|
58
58
|
ms = re.match(r'-([ahpPqsu])$', arg)
|
|
59
59
|
if ms:
|
|
@@ -79,7 +79,7 @@ class RdaKill(PgFile):
|
|
|
79
79
|
optcnt += 1
|
|
80
80
|
else:
|
|
81
81
|
self.pglog(arg + ": pass in value without Option", self.LGEREX)
|
|
82
|
-
if not optcnt: self.show_usage("
|
|
82
|
+
if not optcnt: self.show_usage("gdexkill")
|
|
83
83
|
|
|
84
84
|
# function to start actions
|
|
85
85
|
def start_actions(self):
|
|
@@ -257,10 +257,10 @@ class RdaKill(PgFile):
|
|
|
257
257
|
|
|
258
258
|
# main function to execute this script
|
|
259
259
|
def main():
|
|
260
|
-
"""Entry point: instantiate
|
|
260
|
+
"""Entry point: instantiate GdexKill, parse arguments, run, and exit."""
|
|
261
261
|
from rda_python_setuid.setup_guide import show_setup_guide
|
|
262
|
-
object =
|
|
263
|
-
show_setup_guide(object, 'rda_python_miscs', ['
|
|
262
|
+
object = GdexKill()
|
|
263
|
+
show_setup_guide(object, 'rda_python_miscs', ['gdexcp', 'gdexkill', 'gdexmod'])
|
|
264
264
|
object.read_parameters()
|
|
265
265
|
object.start_actions()
|
|
266
266
|
object.pgexit(0)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
other filter criteria; or cancel one or more PBS batch jobs by job ID or by
|
|
4
4
|
job status. For PBS operations the -h option must specify the PBS node name.
|
|
5
5
|
|
|
6
|
-
Usage:
|
|
6
|
+
Usage: gdexkill [-h HostName] [-p ProcessID] [-P ParentProcessID] \
|
|
7
7
|
[-s BatchStatus] [-q BatchQueue] [-r] \
|
|
8
8
|
[-u ProcessOwner] [-a ApplicationName]
|
|
9
9
|
|
|
@@ -36,31 +36,31 @@
|
|
|
36
36
|
Without this option all users are included;
|
|
37
37
|
|
|
38
38
|
For local processes, all child processes are also terminated recursively.
|
|
39
|
-
Usage is displayed if
|
|
39
|
+
Usage is displayed if gdexkill is run without any options or arguments.
|
|
40
40
|
|
|
41
41
|
Examples:
|
|
42
42
|
|
|
43
43
|
1. Kill a local process by PID:
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
gdexkill -p 13199
|
|
46
|
+
gdexkill 13199
|
|
47
47
|
|
|
48
48
|
2. Kill a local process by PID and verify it matches a specific application:
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
gdexkill -p 13199 -a dsrqst
|
|
51
51
|
|
|
52
52
|
3. Kill all local processes (and their children) owned by a user:
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
gdexkill -u zji -a dsrqst
|
|
55
55
|
|
|
56
56
|
4. Kill a PBS batch job by job ID:
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
gdexkill -h casper-pbs -p 334323
|
|
59
59
|
|
|
60
60
|
5. Kill all running PBS batch jobs for a user in the default queue:
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
gdexkill -h casper-pbs -s RUN -u zji
|
|
63
63
|
|
|
64
64
|
6. Kill all pending PBS batch jobs in a specific queue:
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
gdexkill -h casper-pbs -s PEND -q gdex
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
##################################################################################
|
|
3
|
-
# Title:
|
|
3
|
+
# Title: gdexmod
|
|
4
4
|
# Author: Zaihua Ji, zji@ucar.edu
|
|
5
5
|
# Date: 10/24/2020
|
|
6
6
|
# 2025-03-10 transferred to package rda_python_miscs from
|
|
7
7
|
# https://github.com/NCAR/rda-utility-programs.git
|
|
8
8
|
# Purpose: change file/directory modes in given one or multiple local directories
|
|
9
|
-
# owned by '
|
|
9
|
+
# owned by 'gdexdata'
|
|
10
10
|
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
11
11
|
##################################################################################
|
|
12
12
|
import re
|
|
@@ -15,17 +15,17 @@ import sys
|
|
|
15
15
|
from os import path as op
|
|
16
16
|
from rda_python_common.pg_file import PgFile
|
|
17
17
|
|
|
18
|
-
class
|
|
19
|
-
"""Change file and directory permission modes for paths owned by '
|
|
18
|
+
class GdexMod(PgFile):
|
|
19
|
+
"""Change file and directory permission modes for paths owned by 'gdexdata'.
|
|
20
20
|
|
|
21
|
-
Only items owned by '
|
|
21
|
+
Only items owned by 'gdexdata' are changed; items with a different owner are
|
|
22
22
|
logged as errors. Items already at the target mode are silently skipped.
|
|
23
23
|
A leading letter ('D' or 'F') is logged with each changed path to indicate
|
|
24
24
|
its type.
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
def __init__(self):
|
|
28
|
-
"""Initialize
|
|
28
|
+
"""Initialize GdexMod with default mode-change options and runtime state."""
|
|
29
29
|
super().__init__()
|
|
30
30
|
self.RDAMOD = {
|
|
31
31
|
'd': 0, # 1 to change directory mode
|
|
@@ -55,9 +55,9 @@ class RdaMod(PgFile):
|
|
|
55
55
|
"""
|
|
56
56
|
self.set_suid(self.PGLOG['EUID'])
|
|
57
57
|
self.set_help_path(__file__)
|
|
58
|
-
self.PGLOG['LOGFILE'] = "
|
|
58
|
+
self.PGLOG['LOGFILE'] = "gdexmod.log" # set different log file
|
|
59
59
|
argv = sys.argv[1:]
|
|
60
|
-
self.cmdlog("
|
|
60
|
+
self.cmdlog("gdexmod {} ({})".format(' '.join(argv), self.MINFO['curdir']))
|
|
61
61
|
option = defopt = 'l'
|
|
62
62
|
for arg in argv:
|
|
63
63
|
ms = re.match(r'-(\w)$', arg)
|
|
@@ -80,7 +80,7 @@ class RdaMod(PgFile):
|
|
|
80
80
|
else:
|
|
81
81
|
self.RDAMOD[option] = arg
|
|
82
82
|
option = defopt
|
|
83
|
-
if self.RDAMOD['h'] or not self.MINFO['files']: self.show_usage("
|
|
83
|
+
if self.RDAMOD['h'] or not self.MINFO['files']: self.show_usage("gdexmod")
|
|
84
84
|
|
|
85
85
|
# function to start actions
|
|
86
86
|
def start_actions(self):
|
|
@@ -89,7 +89,7 @@ class RdaMod(PgFile):
|
|
|
89
89
|
if not (self.RDAMOD['d'] or self.RDAMOD['f']):
|
|
90
90
|
self.RDAMOD['d'] = self.RDAMOD['f'] = 1 # both directories and files as default
|
|
91
91
|
if not self.RDAMOD['R'] and self.RDAMOD['r']: self.RDAMOD['R'] = 1000
|
|
92
|
-
self.validate_decs_group('
|
|
92
|
+
self.validate_decs_group('gdexmod', self.PGLOG['CURUID'], 1)
|
|
93
93
|
self.change_top_list(self.MINFO['files'])
|
|
94
94
|
if (self.MINFO['dcnt'] + self.MINFO['fcnt']) > 1:
|
|
95
95
|
msg = ''
|
|
@@ -159,7 +159,7 @@ class RdaMod(PgFile):
|
|
|
159
159
|
"""Change the permission mode of one file or directory.
|
|
160
160
|
|
|
161
161
|
Skips the item if the -f/-d flag for its type is not set, if it is not
|
|
162
|
-
owned by '
|
|
162
|
+
owned by 'gdexdata', or if its current mode already matches the target.
|
|
163
163
|
Logs the old-to-new mode transition on success or an error on owner mismatch.
|
|
164
164
|
Updates MINFO['fcnt'] for files and MINFO['dcnt'] for directories on success.
|
|
165
165
|
|
|
@@ -180,8 +180,8 @@ class RdaMod(PgFile):
|
|
|
180
180
|
if not self.RDAMOD['d']: return 0
|
|
181
181
|
fname = "D" + fname
|
|
182
182
|
mode = self.RDAMOD['D']
|
|
183
|
-
if info['logname'] != "
|
|
184
|
-
return self.pglog("{}: owner {} not
|
|
183
|
+
if info['logname'] != "gdexdata":
|
|
184
|
+
return self.pglog("{}: owner {} not gdexdata".format(fname, info['logname']), self.LOGERR)
|
|
185
185
|
if info['mode'] == mode: return 0 # no need change mode
|
|
186
186
|
if self.set_local_mode(file, info['isfile'], mode, info['mode'], info['logname'], self.LOGWRN):
|
|
187
187
|
if info['isfile']:
|
|
@@ -193,10 +193,10 @@ class RdaMod(PgFile):
|
|
|
193
193
|
|
|
194
194
|
# main function to execute this script
|
|
195
195
|
def main():
|
|
196
|
-
"""Entry point: instantiate
|
|
196
|
+
"""Entry point: instantiate GdexMod, parse arguments, run, and exit."""
|
|
197
197
|
from rda_python_setuid.setup_guide import show_setup_guide
|
|
198
|
-
object =
|
|
199
|
-
show_setup_guide(object, 'rda_python_miscs', ['
|
|
198
|
+
object = GdexMod()
|
|
199
|
+
show_setup_guide(object, 'rda_python_miscs', ['gdexcp', 'gdexkill', 'gdexmod'])
|
|
200
200
|
object.read_parameters()
|
|
201
201
|
object.start_actions()
|
|
202
202
|
object.pgexit(0)
|