rda-python-miscs 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_miscs/__init__.py +1 -0
- rda_python_miscs/bashqsub.py +213 -0
- rda_python_miscs/bashqsub.usg +64 -0
- rda_python_miscs/pgwget.py +186 -0
- rda_python_miscs/rdacp.py +212 -0
- rda_python_miscs/rdacp.usg +62 -0
- rda_python_miscs/rdakill.py +267 -0
- rda_python_miscs/rdakill.usg +36 -0
- rda_python_miscs/rdals.py +278 -0
- rda_python_miscs/rdals.usg +61 -0
- rda_python_miscs/rdamod.py +172 -0
- rda_python_miscs/rdamod.usg +51 -0
- rda_python_miscs/rdaown.py +169 -0
- rda_python_miscs/rdaown.usg +46 -0
- rda_python_miscs/rdaps.py +194 -0
- rda_python_miscs/rdaps.usg +29 -0
- rda_python_miscs/rdasub.py +117 -0
- rda_python_miscs/rdasub.usg +12 -0
- rda_python_miscs/rdazip.py +64 -0
- rda_python_miscs/rdazip.usg +20 -0
- rda_python_miscs/tcshqsub.py +213 -0
- rda_python_miscs/tcshqsub.usg +62 -0
- rda_python_miscs-1.0.1.dist-info/LICENSE +21 -0
- rda_python_miscs-1.0.1.dist-info/METADATA +17 -0
- rda_python_miscs-1.0.1.dist-info/RECORD +28 -0
- rda_python_miscs-1.0.1.dist-info/WHEEL +5 -0
- rda_python_miscs-1.0.1.dist-info/entry_points.txt +11 -0
- rda_python_miscs-1.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title: bashqsub
|
|
6
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date: 11/19/2020
|
|
8
|
+
# 2025-03-07 transferred to package rda_python_miscs from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# Purpose: python script to submit a batch job on PBS node via bash script
|
|
11
|
+
#
|
|
12
|
+
# Github: https://github.com/NCAR/rda-pythn-miscs.git
|
|
13
|
+
#
|
|
14
|
+
##################################################################################
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
from os import path as op
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
|
|
22
|
+
DEFMODS = {
|
|
23
|
+
'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
DEFLIBS = {
|
|
27
|
+
'default' : "conda activate /glade/work/rdadata/conda-envs/pg-casper",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
SWAPMODS = {
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
RESOURCES = { # resource list for option -l
|
|
34
|
+
'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too
|
|
35
|
+
'select' : '1:ncpus=1:mem=1gb'
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
SOPTIONS = { # single-dash option values
|
|
39
|
+
'o' : None, # will set to default if not provided
|
|
40
|
+
'e' : None,
|
|
41
|
+
'A' : "P43713000",
|
|
42
|
+
'q' : "rda@casper-pbs",
|
|
43
|
+
# 'm' : 'a',
|
|
44
|
+
'm' : 'n',
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
# main function to excecute this script
|
|
49
|
+
#
|
|
50
|
+
def main():
|
|
51
|
+
|
|
52
|
+
aname = 'bashqsub'
|
|
53
|
+
pname = 'rdaqsub'
|
|
54
|
+
PgLOG.set_help_path(__file__)
|
|
55
|
+
rdasub = PgLOG.BCHCMDS['PBS']
|
|
56
|
+
coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options
|
|
57
|
+
copts = '|'.join(coptions)
|
|
58
|
+
option = None
|
|
59
|
+
dcount = 0
|
|
60
|
+
argv = sys.argv[1:]
|
|
61
|
+
if not argv: PgLOG.show_usage(aname)
|
|
62
|
+
PgLOG.PGLOG['LOGFILE'] = pname + ".log"
|
|
63
|
+
PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv)))
|
|
64
|
+
if not PgLOG.valid_command(rdasub): PgLOG.pglog("{}: miss {} command to submit batch job".format(rdasub, PgLOG.PGLOG['PBSNAME']), PgLOG.LGWNEX)
|
|
65
|
+
|
|
66
|
+
while argv:
|
|
67
|
+
arg = argv.pop(0)
|
|
68
|
+
ms = re.match(r'^-(\w)$', arg)
|
|
69
|
+
if ms:
|
|
70
|
+
option = ms.group(1)
|
|
71
|
+
if option == "b":
|
|
72
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
73
|
+
option = None
|
|
74
|
+
else:
|
|
75
|
+
SOPTIONS[option] = ''
|
|
76
|
+
continue
|
|
77
|
+
ms = re.match(r'^-({})$'.format(copts), arg)
|
|
78
|
+
if ms:
|
|
79
|
+
option = ms.group(1)
|
|
80
|
+
if option == "env": option = 'v'
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, rdasub), PgLOG.LGEREX)
|
|
84
|
+
if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet
|
|
85
|
+
if arg.find("'") > -1:
|
|
86
|
+
arg = '"{}"'.format(arg)
|
|
87
|
+
else:
|
|
88
|
+
arg = "'{}'".format(arg)
|
|
89
|
+
|
|
90
|
+
if option in coptions:
|
|
91
|
+
coptions[option] = arg
|
|
92
|
+
if option == "cmd": break
|
|
93
|
+
else:
|
|
94
|
+
SOPTIONS[option] = arg
|
|
95
|
+
option = None
|
|
96
|
+
|
|
97
|
+
if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX)
|
|
98
|
+
args = PgLOG.argv_to_string(argv, 0) # append command options
|
|
99
|
+
if not SOPTIONS['o']: SOPTIONS['o'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname)
|
|
100
|
+
if not SOPTIONS['e']: SOPTIONS['e'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname)
|
|
101
|
+
if 'N' not in SOPTIONS: SOPTIONS['N'] = op.basename(coptions['cmd'])
|
|
102
|
+
msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime())
|
|
103
|
+
|
|
104
|
+
if coptions['cwd']:
|
|
105
|
+
if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX)
|
|
106
|
+
msg += "-" + coptions['cwd']
|
|
107
|
+
os.chdir(coptions['cwd'])
|
|
108
|
+
|
|
109
|
+
cmd = PgLOG.valid_command(coptions['cmd'])
|
|
110
|
+
if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd'])
|
|
111
|
+
if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX)
|
|
112
|
+
if args: cmd += " " + args
|
|
113
|
+
|
|
114
|
+
sbuf = build_bash_script(cmd, coptions, rdasub)
|
|
115
|
+
PgLOG.pglog(sbuf, PgLOG.MSGLOG)
|
|
116
|
+
PgLOG.PGLOG['ERR2STD'] = ['bind mouting']
|
|
117
|
+
PgLOG.pgsystem(rdasub, PgLOG.LOGWRN, 6, sbuf)
|
|
118
|
+
PgLOG.PGLOG['ERR2STD'] = []
|
|
119
|
+
|
|
120
|
+
sys.exit(0)
|
|
121
|
+
|
|
122
|
+
#
|
|
123
|
+
# build bash script to submit a PBS batch job
|
|
124
|
+
#
|
|
125
|
+
def build_bash_script(cmd, coptions, rdasub):
|
|
126
|
+
|
|
127
|
+
buf = "#!/usr/bin/bash\n\n" # qsub starting bash script
|
|
128
|
+
|
|
129
|
+
if 'l' in SOPTIONS: add_resources()
|
|
130
|
+
# add options to bash script for qsub
|
|
131
|
+
for option in SOPTIONS:
|
|
132
|
+
buf += "#PBS -" + option
|
|
133
|
+
if SOPTIONS[option]: buf += " {}".format(SOPTIONS[option])
|
|
134
|
+
buf += "\n"
|
|
135
|
+
for option in RESOURCES:
|
|
136
|
+
buf += "#PBS -l"
|
|
137
|
+
if RESOURCES[option]: buf += " {}={}".format(option, RESOURCES[option])
|
|
138
|
+
buf += "\n"
|
|
139
|
+
|
|
140
|
+
# always include the login user's bash resource file
|
|
141
|
+
homedir = "{}/{}".format(PgLOG.PGLOG['USRHOME'], PgLOG.PGLOG['CURUID'])
|
|
142
|
+
buf += "export HOME={}\n".format(homedir)
|
|
143
|
+
buf += "source /etc/profile.d/z00_modules.sh\n"
|
|
144
|
+
buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.sh\n"
|
|
145
|
+
buf += "source {}/.bashrc\n".format(homedir)
|
|
146
|
+
buf += "pwd; hostname; date\n"
|
|
147
|
+
buf += add_modules(coptions['res'], coptions['mod'])
|
|
148
|
+
buf += set_vm_libs(coptions['res'])
|
|
149
|
+
buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd)
|
|
150
|
+
|
|
151
|
+
return buf
|
|
152
|
+
|
|
153
|
+
#
|
|
154
|
+
# check and add resource options
|
|
155
|
+
#
|
|
156
|
+
def add_resources():
|
|
157
|
+
|
|
158
|
+
for res in re.split(',', SOPTIONS['l']):
|
|
159
|
+
ms = re.match(r'^([^=]+)=(.+)$', res)
|
|
160
|
+
if ms:
|
|
161
|
+
RESOURCES[ms.group(1)] = ms.group(2)
|
|
162
|
+
else:
|
|
163
|
+
PgLOG.pglog(res + ": use '=' to separate resource name & value", PgLOG.LGEREX)
|
|
164
|
+
del SOPTIONS['l']
|
|
165
|
+
|
|
166
|
+
#
|
|
167
|
+
# add module loads for modules provided
|
|
168
|
+
#
|
|
169
|
+
def add_modules(res, mods):
|
|
170
|
+
|
|
171
|
+
mbuf = "\n"
|
|
172
|
+
defmods = DEFMODS[res] if res in DEFMODS else DEFMODS['default']
|
|
173
|
+
|
|
174
|
+
dmods = re.split(',', defmods)
|
|
175
|
+
for dmod in dmods:
|
|
176
|
+
ms = re.match(r'^(.+)/', dmod)
|
|
177
|
+
smod = ms.group(1) if ms else dmod
|
|
178
|
+
if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod])
|
|
179
|
+
mbuf += "module load {}\n".format(dmod)
|
|
180
|
+
|
|
181
|
+
if mods:
|
|
182
|
+
amods = re.split(',', mods)
|
|
183
|
+
for amod in amods:
|
|
184
|
+
if re.match(r'^/', amod):
|
|
185
|
+
mbuf += "module use {}\n".format(amod)
|
|
186
|
+
else:
|
|
187
|
+
ms = re.match(r'^(.+)/', amod)
|
|
188
|
+
smod = ms.group(1) if ms else amod
|
|
189
|
+
if smod in dmods: continue
|
|
190
|
+
if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod])
|
|
191
|
+
mbuf += "module load {}\n".format(amod)
|
|
192
|
+
|
|
193
|
+
return mbuf
|
|
194
|
+
|
|
195
|
+
#
|
|
196
|
+
# set virtual machine libraries
|
|
197
|
+
#
|
|
198
|
+
def set_vm_libs(res):
|
|
199
|
+
|
|
200
|
+
deflibs = DEFLIBS[res] if res in DEFLIBS else DEFLIBS['default']
|
|
201
|
+
if not deflibs: return ''
|
|
202
|
+
|
|
203
|
+
dlibs = re.split(',', deflibs)
|
|
204
|
+
libbuf = "\n"
|
|
205
|
+
for dlib in dlibs:
|
|
206
|
+
libbuf += dlib + "\n"
|
|
207
|
+
|
|
208
|
+
return libbuf
|
|
209
|
+
|
|
210
|
+
#
|
|
211
|
+
# call main() to start program
|
|
212
|
+
#
|
|
213
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
|
|
2
|
+
To submit a job execution as a batch job in bash script on a PBS node via 'qsub'.
|
|
3
|
+
|
|
4
|
+
Usage: rdaqsub [qsub-options] [-cwd WorkDir] [-env EnvironmentPairs] \
|
|
5
|
+
[-mod Modules] [-res Reservation] -cmd Command [cmd-options]
|
|
6
|
+
|
|
7
|
+
- qsub-options, accepts options that can be passed to 'qsub' in a bash script.
|
|
8
|
+
Check qsub man page for help on the PBS batch options. Here is the list of
|
|
9
|
+
qsub options that are included at default:
|
|
10
|
+
-o LOGPATH/rdaqsub/
|
|
11
|
+
-e LOGPATH/rdaqsub/
|
|
12
|
+
-A P43713000
|
|
13
|
+
-m a
|
|
14
|
+
-q regular
|
|
15
|
+
-l walltime=6:00:00,select=1:node=1:mem=1gb
|
|
16
|
+
|
|
17
|
+
- Option -cwd, set the working directory for the Command to be executed. If
|
|
18
|
+
it is not specified, it defaults to the current directory where qsub
|
|
19
|
+
command is executed.
|
|
20
|
+
|
|
21
|
+
- Option -env, set environment name/value pairs separated by ',' dynamically
|
|
22
|
+
for the Command to be executed. This is equivilent to qsub-option -v.
|
|
23
|
+
|
|
24
|
+
- Option -mod, set module list separated by ',' for loading modules to execute
|
|
25
|
+
the command.
|
|
26
|
+
|
|
27
|
+
- Option -res, set Reservation name to load modules and start VM libs;
|
|
28
|
+
|
|
29
|
+
- Option -cmd, mandatory option to lead a Command to be executed;
|
|
30
|
+
|
|
31
|
+
- cmd-options, specifies options that can be passed to the Command.
|
|
32
|
+
|
|
33
|
+
A bash script example:
|
|
34
|
+
#!/usr/bin/bash
|
|
35
|
+
|
|
36
|
+
#PBS -o /gpfs/u/home/rdadata/dssdb/log/rdaqsub/
|
|
37
|
+
#PBS -e /gpfs/u/home/rdadata/dssdb/log/rdaqsub/
|
|
38
|
+
#PBS -A P43713000
|
|
39
|
+
#PBS -q rda@casper-pbs
|
|
40
|
+
#PBS -m n
|
|
41
|
+
#PBS -N dsrqst
|
|
42
|
+
#PBS -l walltime=1:00:00
|
|
43
|
+
#PBS -l select=1:ncpus=1:mem=1gb
|
|
44
|
+
export HOME=/gpfs/u/home/zji
|
|
45
|
+
source /etc/profile.d/z00_modules.sh
|
|
46
|
+
source /glade/u/apps/opt/conda/etc/profile.d/conda.sh
|
|
47
|
+
source /gpfs/u/home/zji/.bashrc
|
|
48
|
+
pwd; hostname; date
|
|
49
|
+
|
|
50
|
+
module load ncarenv
|
|
51
|
+
module load netcdf
|
|
52
|
+
module load ncl
|
|
53
|
+
module load nco
|
|
54
|
+
module load cdo
|
|
55
|
+
module load conda
|
|
56
|
+
module load grib-util
|
|
57
|
+
module load wgrib2
|
|
58
|
+
|
|
59
|
+
conda activate /glade/work/rdadata/conda-envs/pg-casper
|
|
60
|
+
|
|
61
|
+
echo /gpfs/u/home/rdadata/bin/dsrqst d277000 PR -RI 750748 -b -d
|
|
62
|
+
/gpfs/u/home/rdadata/bin/dsrqst d277000 PR -RI 750748 -b -d
|
|
63
|
+
|
|
64
|
+
date
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : pgwget
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 12/02/2020
|
|
8
|
+
# 2025-03-10 transferred to package rda_python_miscs from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# Purpose : wrapper to wget to get a file with wildcard in name
|
|
11
|
+
#
|
|
12
|
+
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
13
|
+
#
|
|
14
|
+
##################################################################################
|
|
15
|
+
|
|
16
|
+
import sys
|
|
17
|
+
import re
|
|
18
|
+
from rda_python_common import PgLOG
|
|
19
|
+
from rda_python_common import PgUtil
|
|
20
|
+
from rda_python_common import PgFile
|
|
21
|
+
|
|
22
|
+
OPTIONS = {
|
|
23
|
+
'OP' : "-np -nH -nd -m -e robots=off --no-check-certificate",
|
|
24
|
+
'UL' : None,
|
|
25
|
+
'RN' : None,
|
|
26
|
+
'FN' : None,
|
|
27
|
+
'FC' : 1,
|
|
28
|
+
'SM' : 0,
|
|
29
|
+
'MC' : 0,
|
|
30
|
+
'CN' : 0,
|
|
31
|
+
'CR' : 0,
|
|
32
|
+
'EX' : None,
|
|
33
|
+
'JC' : 'cat'
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# main function to excecute this script
|
|
38
|
+
#
|
|
39
|
+
def main():
|
|
40
|
+
|
|
41
|
+
option = None
|
|
42
|
+
JCS = ['cat', 'tar', 'first', 'last']
|
|
43
|
+
options = '|'.join(OPTIONS)
|
|
44
|
+
argv = sys.argv[1:]
|
|
45
|
+
PgLOG.PGLOG['LOGFILE'] = "pgwget.log"
|
|
46
|
+
|
|
47
|
+
for arg in argv:
|
|
48
|
+
if arg == "-b":
|
|
49
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
50
|
+
option = None
|
|
51
|
+
continue
|
|
52
|
+
ms = re.match(r'^-({})$'.format(options), arg, re.I)
|
|
53
|
+
if ms:
|
|
54
|
+
option = ms.group(1).upper()
|
|
55
|
+
if re.match(r'^(CN|CR|SM)$', option):
|
|
56
|
+
OPTIONS[option] = 1
|
|
57
|
+
option = None
|
|
58
|
+
continue
|
|
59
|
+
if re.match(r'^-.*$', arg): PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX)
|
|
60
|
+
if not option: PgLOG.pglog(arg + ": Value passed in without leading option", PgLOG.LGEREX)
|
|
61
|
+
|
|
62
|
+
if option == 'JC' and arg not in JCS:
|
|
63
|
+
PgLOG.pglog(arg + ": Joining Command must be one of {}".format(JCS), PgLOG.LGEREX)
|
|
64
|
+
OPTIONS[option] = int(arg) if re.match(r'^(FC|MC)$', option) else arg
|
|
65
|
+
option = None
|
|
66
|
+
|
|
67
|
+
if not (OPTIONS['UL'] and OPTIONS['RN']):
|
|
68
|
+
print("Usage: pgwget [-CN] [-CR] [-FC FileCount] [-JC JoinCommand] [-MC MinFileCount] [-FN FileName] -UL WebURL -RN RootFileName [-EX FileNameExtension]")
|
|
69
|
+
print(" Provide at least WebURL and RootFileName to wget file(s)")
|
|
70
|
+
print(" Option -CN - check new file if presents")
|
|
71
|
+
print(" Option -CR - clean the downloaded remote file(s) if presents")
|
|
72
|
+
print(" Option -FC - number of files to be valid download; defaults to 1")
|
|
73
|
+
print(" Option -JC - file joining command, it defaults to cat, could be tar, or last/first to choose the last/first one")
|
|
74
|
+
print(" Option -SM - Show wget dumping message; defaults to False")
|
|
75
|
+
print(" Option -MC - minimal number of files to be valid download; defaults to -FC")
|
|
76
|
+
print(" Option -FN - file name to be used if successful download; defaults to RootFileName.FileNameExtension")
|
|
77
|
+
print(" Option -OP - options used by wget, defaults to '-np -nH -nd -m -e robots=off'")
|
|
78
|
+
print(" Option -UL - (mandatory) WebURL with path")
|
|
79
|
+
print(" Option -RN - (mandatory) the root portion of the remote file name to be downloaded")
|
|
80
|
+
print(" Option -EX - file name extension to be used.")
|
|
81
|
+
sys.exit(0)
|
|
82
|
+
|
|
83
|
+
PgLOG.cmdlog("pgwget " + ' '.join(argv))
|
|
84
|
+
if not OPTIONS['MC']: OPTIONS['MC'] = OPTIONS['FC']
|
|
85
|
+
if not OPTIONS['SM']: OPTIONS['OP'] += ' -q'
|
|
86
|
+
download_wildcard_files()
|
|
87
|
+
PgLOG.cmdlog()
|
|
88
|
+
|
|
89
|
+
sys.exit(0)
|
|
90
|
+
|
|
91
|
+
#
|
|
92
|
+
# download one or multiple remote files via wget; concat files to a single one if multiple
|
|
93
|
+
#
|
|
94
|
+
def download_wildcard_files():
|
|
95
|
+
|
|
96
|
+
deleted = 0
|
|
97
|
+
if OPTIONS['FN']:
|
|
98
|
+
dfile = OPTIONS['FN']
|
|
99
|
+
else:
|
|
100
|
+
dfile = OPTIONS['RN']
|
|
101
|
+
if OPTIONS['EX']: dfile += "." + OPTIONS['EX']
|
|
102
|
+
|
|
103
|
+
dinfo = PgFile.check_local_file(dfile, 1)
|
|
104
|
+
if dinfo and not OPTIONS['CN']:
|
|
105
|
+
return PgLOG.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN)
|
|
106
|
+
|
|
107
|
+
build = 0 if dinfo else 1
|
|
108
|
+
wfile = OPTIONS['RN'] + "*"
|
|
109
|
+
if OPTIONS['EX']: wfile += "." + OPTIONS['EX']
|
|
110
|
+
dlist = PgFile.local_glob(wfile, 1)
|
|
111
|
+
if dfile in dlist and dinfo:
|
|
112
|
+
del dlist[dfile]
|
|
113
|
+
deleted = 1
|
|
114
|
+
dcnt = len(dlist)
|
|
115
|
+
|
|
116
|
+
if OPTIONS['CN'] or dcnt < OPTIONS['FC']:
|
|
117
|
+
cmd = "wget {} {} -A '{}'".format(OPTIONS['OP'], OPTIONS['UL'], wfile)
|
|
118
|
+
PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 7)
|
|
119
|
+
nlist = PgFile.local_glob(wfile, 1)
|
|
120
|
+
if dfile in nlist and dinfo:
|
|
121
|
+
del nlist[dfile]
|
|
122
|
+
deleted = 1
|
|
123
|
+
ncnt = len(nlist)
|
|
124
|
+
else:
|
|
125
|
+
nlist = dlist
|
|
126
|
+
ncnt = dcnt
|
|
127
|
+
|
|
128
|
+
if ncnt == 0:
|
|
129
|
+
if deleted:
|
|
130
|
+
return PgLOG.pglog("{}: File dowloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
131
|
+
else:
|
|
132
|
+
return PgLOG.pglog("{}: NO file to dowload on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
133
|
+
elif ncnt < OPTIONS['MC']:
|
|
134
|
+
return PgLOG.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN)
|
|
135
|
+
|
|
136
|
+
rfiles = sorted(nlist)
|
|
137
|
+
size = skip = 0
|
|
138
|
+
for i in range(ncnt):
|
|
139
|
+
rfile = rfiles[i]
|
|
140
|
+
rinfo = nlist[rfile]
|
|
141
|
+
size += rinfo['data_size']
|
|
142
|
+
if dinfo and PgUtil.cmptime(dinfo['date_modified'], dinfo['time_modified'], rinfo['date_modified'], rinfo['time_modified']) >= 0:
|
|
143
|
+
PgLOG.pglog("{}: Not newer than {}".format(rfile, dfile), PgLOG.LOGWRN)
|
|
144
|
+
skip += 1
|
|
145
|
+
elif rfile not in dlist:
|
|
146
|
+
build = 1
|
|
147
|
+
elif PgFile.compare_file_info(dlist[rfile], rinfo) > 0:
|
|
148
|
+
PgLOG.pglog("{}: Newer file dowloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
149
|
+
build = 1
|
|
150
|
+
else:
|
|
151
|
+
PgLOG.pglog("{}: No newer file found on ".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
152
|
+
|
|
153
|
+
if skip == ncnt: return 0
|
|
154
|
+
|
|
155
|
+
if not (build or size == dinfo['data_size']): build = 1
|
|
156
|
+
if not build: return PgLOG.pglog(dfile + ": Use existing file", PgLOG.LOGWRN)
|
|
157
|
+
|
|
158
|
+
if OPTIONS['JC'] == 'cat':
|
|
159
|
+
for i in range(ncnt):
|
|
160
|
+
rfile = rfiles[i]
|
|
161
|
+
if i == 0:
|
|
162
|
+
if dfile != rfile: PgFile.local_copy_local(dfile, rfile, PgLOG.LOGWRN)
|
|
163
|
+
else:
|
|
164
|
+
PgLOG.pgsystem("cat {} >> {}".format(rfile, dfile), PgLOG.LOGWRN, 5)
|
|
165
|
+
if OPTIONS['CR'] and dfile != rfile: PgLOG.pgsystem("rm -f " + rfile, PgLOG.LOGWRN, 5)
|
|
166
|
+
elif OPTIONS['JC'] == 'tar':
|
|
167
|
+
topt = 'c'
|
|
168
|
+
for i in range(ncnt):
|
|
169
|
+
rfile = rfiles[i]
|
|
170
|
+
PgLOG.pgsystem("tar -{}vf {} {}".format(topt, dfile, rfile), PgLOG.LOGWRN, 5)
|
|
171
|
+
topt = 'u'
|
|
172
|
+
if OPTIONS['CR']: PgLOG.pgsystem("rm -f " + rfile, PgLOG.LOGWRN, 5)
|
|
173
|
+
else:
|
|
174
|
+
didx = 0 if OPTIONS['JC'] == 'first' else (ncnt - 1)
|
|
175
|
+
PgLOG.pgsystem("mv {} {}".format(rfiles[didx], dfile), PgLOG.LOGWRN, 5)
|
|
176
|
+
if OPTIONS['CR']:
|
|
177
|
+
for i in range(ncnt):
|
|
178
|
+
if i == didx: continue
|
|
179
|
+
PgLOG.pgsystem("rm -f " + rfiles[i], PgLOG.LOGWRN, 5)
|
|
180
|
+
|
|
181
|
+
return 1
|
|
182
|
+
|
|
183
|
+
#
|
|
184
|
+
# call main() to start program
|
|
185
|
+
#
|
|
186
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title: rdacp
|
|
6
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date: 10/24/2020
|
|
8
|
+
# 2025-03-10 transferred to package rda_python_miscs from
|
|
9
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
10
|
+
# Purpose: copy files locally and remotely by 'rdadata'
|
|
11
|
+
#
|
|
12
|
+
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
13
|
+
#
|
|
14
|
+
##################################################################################
|
|
15
|
+
#
|
|
16
|
+
import re
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
from os import path as op
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
from rda_python_common import PgUtil
|
|
22
|
+
from rda_python_common import PgDBI
|
|
23
|
+
from rda_python_common import PgFile
|
|
24
|
+
|
|
25
|
+
RDACP = {
|
|
26
|
+
'fh' : None, # from host name, default to localhost
|
|
27
|
+
'th' : None, # to host name, defaul to localhost
|
|
28
|
+
'fb' : None, # from bucket name for a from file in Object Store
|
|
29
|
+
'tb' : None, # to bucket name for a to file in Object Store
|
|
30
|
+
'fp' : None, # from Globus endpoint
|
|
31
|
+
'tp' : None, # to Globus endpoint
|
|
32
|
+
'f' : [], # from file names
|
|
33
|
+
't' : None, # to file name
|
|
34
|
+
'r' : 0, # 1 if recursive all
|
|
35
|
+
'R' : 0, # > 0 to set recursive limit
|
|
36
|
+
'F' : 0o664, # to file mode, default to 664
|
|
37
|
+
'D' : 0o775, # to directory mode, default to 775
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
CINFO = {
|
|
41
|
+
'tcnt' : 0,
|
|
42
|
+
'htcnt' : 0,
|
|
43
|
+
'cpflag' : 0, # 1 file only, 2 directory only, 3 both
|
|
44
|
+
'cpstr' : ['', 'Files', 'Directories', 'Files/Directories'],
|
|
45
|
+
'fpath' : None,
|
|
46
|
+
'tpath' : None,
|
|
47
|
+
'fhost' : '',
|
|
48
|
+
'thost' : '',
|
|
49
|
+
'curdir' : os.getcwd()
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
#
|
|
53
|
+
# main function to run the application
|
|
54
|
+
#
|
|
55
|
+
def main():
|
|
56
|
+
|
|
57
|
+
dohelp = 0
|
|
58
|
+
argv = sys.argv[1:]
|
|
59
|
+
PgDBI.dssdb_dbname()
|
|
60
|
+
PgLOG.set_suid(PgLOG.PGLOG['EUID'])
|
|
61
|
+
PgLOG.set_help_path(__file__)
|
|
62
|
+
PgLOG.PGLOG['LOGFILE'] = "rdacp.log" # set different log file
|
|
63
|
+
PgLOG.cmdlog("rdacp {} ({})".format(' '.join(argv), CINFO['curdir']))
|
|
64
|
+
defopt = option = 'f'
|
|
65
|
+
for arg in argv:
|
|
66
|
+
if re.match(r'-(h|-help)$', arg, re.I):
|
|
67
|
+
dohelp = 1
|
|
68
|
+
continue
|
|
69
|
+
ms = re.match(r'-(\w+)$', arg)
|
|
70
|
+
if ms:
|
|
71
|
+
option = ms.group(1)
|
|
72
|
+
if option not in RDACP: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX)
|
|
73
|
+
if option == 'r':
|
|
74
|
+
RDACP['r'] = 1
|
|
75
|
+
option = None
|
|
76
|
+
continue
|
|
77
|
+
if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX)
|
|
78
|
+
if option == "f":
|
|
79
|
+
RDACP['f'].append(arg)
|
|
80
|
+
defopt = None
|
|
81
|
+
else:
|
|
82
|
+
if option == 'R':
|
|
83
|
+
RDACP[option] = int(arg)
|
|
84
|
+
elif 'FD'.find(option) > -1:
|
|
85
|
+
RDACP[option] = PgLOG.base2int(arg, 8)
|
|
86
|
+
else:
|
|
87
|
+
RDACP[option] = arg
|
|
88
|
+
if option == 'th':
|
|
89
|
+
CINFO['thost'] = arg + '-'
|
|
90
|
+
elif option == 'fh':
|
|
91
|
+
CINFO['fhost'] = arg + '-'
|
|
92
|
+
option = defopt
|
|
93
|
+
|
|
94
|
+
if dohelp or not RDACP['f']: PgLOG.show_usage("rdacp")
|
|
95
|
+
PgDBI.validate_decs_group('rdacp', PgLOG.PGLOG['CURUID'], 1)
|
|
96
|
+
if not RDACP['R'] and RDACP['r']: RDACP['R'] = 1000
|
|
97
|
+
if not RDACP['t']:
|
|
98
|
+
CINFO['tpath'] = RDACP['t'] = "."
|
|
99
|
+
else:
|
|
100
|
+
ms = re.match(r'^(.+)/$', RDACP['t'])
|
|
101
|
+
if ms:
|
|
102
|
+
CINFO['tpath'] = ms.group(1)
|
|
103
|
+
else:
|
|
104
|
+
tinfo = PgFile.check_rda_file(RDACP['t'], RDACP['th'], 0, PgLOG.LGWNEX)
|
|
105
|
+
if tinfo and tinfo['isfile'] == 0: CINFO['tpath'] = RDACP['t']
|
|
106
|
+
PgLOG.PGLOG['FILEMODE'] = RDACP['F']
|
|
107
|
+
PgLOG.PGLOG['EXECMODE'] = RDACP['D']
|
|
108
|
+
|
|
109
|
+
fcnt = len(RDACP['f'])
|
|
110
|
+
if not CINFO['tpath'] and fcnt > 1:
|
|
111
|
+
PgLOG.pglog("{}{}: Cannot copy multiple files to a single file".format(CINFO['thost'], RDACP['t']), PgLOG.LGEREX)
|
|
112
|
+
if RDACP['th'] and RDACP['fh'] and RDACP['th'] == RDACP['fh'] and RDACP['fh'] != 'HPSS':
|
|
113
|
+
PgLOG.pglog(RDACP['fh'] + ": Cannot copy file onto the same host", PgLOG.LGEREX)
|
|
114
|
+
if RDACP['fb']:
|
|
115
|
+
PgLOG.PGLOG['OBJCTBKT'] = RDACP['fb']
|
|
116
|
+
elif RDACP['tb']:
|
|
117
|
+
PgLOG.PGLOG['OBJCTBKT'] = RDACP['tb']
|
|
118
|
+
if RDACP['fp']:
|
|
119
|
+
PgLOG.PGLOG['BACKUPEP'] = RDACP['fp']
|
|
120
|
+
elif RDACP['tp']:
|
|
121
|
+
PgLOG.PGLOG['BACKUPEP'] = RDACP['tp']
|
|
122
|
+
|
|
123
|
+
copy_top_list(RDACP['f'])
|
|
124
|
+
|
|
125
|
+
hinfo = ''
|
|
126
|
+
if RDACP['fh']: hinfo += " From " + RDACP['fh']
|
|
127
|
+
if RDACP['th']: hinfo += " To " + RDACP['th']
|
|
128
|
+
|
|
129
|
+
if CINFO['tcnt'] > 1:
|
|
130
|
+
PgLOG.pglog("Total {} {} copiled{}".format(CINFO['tcnt'], CINFO['cpstr'][CINFO['cpflag']], hinfo), PgLOG.LOGWRN)
|
|
131
|
+
elif CINFO['tcnt'] == 0 and not RDACP['fh']:
|
|
132
|
+
PgLOG.pglog("{}: No File copied{}".format((CINFO['fpath'] if CINFO['fpath'] else CINFO['curdir']), hinfo), PgLOG.LOGWRN)
|
|
133
|
+
|
|
134
|
+
PgLOG.cmdlog()
|
|
135
|
+
PgLOG.pgexit(0)
|
|
136
|
+
|
|
137
|
+
#
|
|
138
|
+
# display the top level list
|
|
139
|
+
#
|
|
140
|
+
def copy_top_list(files):
|
|
141
|
+
|
|
142
|
+
for file in files:
|
|
143
|
+
if RDACP['th'] and not PgUtil.pgcmp(RDACP['th'], PgLOG.PGLOG['BACKUPNM'], 1):
|
|
144
|
+
info = PgFile.check_globus_file(file, 'rda-glade', 0, PgLOG.LGWNEX)
|
|
145
|
+
else:
|
|
146
|
+
info = PgFile.check_rda_file(file, RDACP['fh'], 0, PgLOG.LGWNEX)
|
|
147
|
+
if not info:
|
|
148
|
+
PgLOG.pglog("{}{}: {}".format(CINFO['fhost'], file, PgLOG.PGLOG['MISSFILE']), PgLOG.LOGERR)
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
dosub = 0
|
|
152
|
+
if info['isfile'] == 0:
|
|
153
|
+
CINFO['cpflag'] |= 2
|
|
154
|
+
if not CINFO['tpath']:
|
|
155
|
+
PgLOG.pglog("{}{}: Cannot copy directory to a single file".format(CINFO['fhost'], file), PgLOG.LGEREX)
|
|
156
|
+
|
|
157
|
+
if re.search(r'/$', file):
|
|
158
|
+
dosub = 1 # copy the file under this directory if it is ended by '/'
|
|
159
|
+
file = re.sub(r'/$', '', file)
|
|
160
|
+
else:
|
|
161
|
+
CINFO['cpflag'] |= 1
|
|
162
|
+
|
|
163
|
+
if not re.match(r'^/', file): file = PgLOG.join_paths(CINFO['curdir'], file)
|
|
164
|
+
CINFO['fpath'] = (file if dosub else op.dirname(file)) + "/"
|
|
165
|
+
if info['isfile']:
|
|
166
|
+
CINFO['tcnt'] += copy_file(file, info['isfile'])
|
|
167
|
+
elif dosub or RDACP['R']:
|
|
168
|
+
flist = PgFile.rda_glob(file, RDACP['fh'], 0, PgLOG.LGWNEX)
|
|
169
|
+
if flist: copy_list(flist, 1, file)
|
|
170
|
+
else:
|
|
171
|
+
PgLOG.pglog("{}{}: Add option -r to copy directory".format(CINFO['fhost'], file), PgLOG.LGEREX)
|
|
172
|
+
|
|
173
|
+
#
|
|
174
|
+
# recursively copy directory/file
|
|
175
|
+
#
|
|
176
|
+
def copy_list(tlist, level, cdir):
|
|
177
|
+
|
|
178
|
+
fcnt = 0
|
|
179
|
+
|
|
180
|
+
for file in tlist:
|
|
181
|
+
if tlist[file]['isfile']:
|
|
182
|
+
fcnt += copy_file(file, tlist[file]['isfile'])
|
|
183
|
+
CINFO['cpflag'] |= (1 if tlist[file]['isfile'] else 2)
|
|
184
|
+
elif level < RDACP['R']:
|
|
185
|
+
flist = PgFile.rda_glob(file, RDACP['fh'], 0, PgLOG.LGWNEX)
|
|
186
|
+
if flist: copy_list(flist, level+1, file)
|
|
187
|
+
|
|
188
|
+
if fcnt > 1: # display sub count if two or more files are copied
|
|
189
|
+
PgLOG.pglog("{}{}: {} {} copied from directory".format(CINFO['fhost'], cdir, fcnt, CINFO['cpstr'][CINFO['cpflag']]), PgLOG.LOGWRN)
|
|
190
|
+
CINFO['tcnt'] += fcnt
|
|
191
|
+
|
|
192
|
+
#
|
|
193
|
+
# copy one file each time
|
|
194
|
+
#
|
|
195
|
+
def copy_file(fromfile, isfile):
|
|
196
|
+
|
|
197
|
+
if CINFO['tpath']:
|
|
198
|
+
fname = re.sub(r'^{}'.format(CINFO['fpath']), '', fromfile)
|
|
199
|
+
if isfile:
|
|
200
|
+
tofile = PgLOG.join_paths(CINFO['tpath'], fname)
|
|
201
|
+
else:
|
|
202
|
+
tofile = CINFO['tpath'] + '/'
|
|
203
|
+
else:
|
|
204
|
+
tofile = RDACP['t']
|
|
205
|
+
|
|
206
|
+
return (1 if PgFile.copy_rda_file(tofile, fromfile, RDACP['th'], RDACP['fh'], PgLOG.LGWNEX) else 0)
|
|
207
|
+
|
|
208
|
+
#
|
|
209
|
+
# call main() to start program
|
|
210
|
+
#
|
|
211
|
+
if __name__ == "__main__": main()
|
|
212
|
+
|