rda-python-miscs 2.0.15__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_miscs/bashqsub.py +60 -5
- rda_python_miscs/bashqsub.usg +7 -7
- rda_python_miscs/gdexls.py +99 -8
- rda_python_miscs/gdexls.usg +58 -44
- rda_python_miscs/pg_rst.py +5 -0
- rda_python_miscs/pg_rst.usg +60 -0
- rda_python_miscs/pg_wget.py +21 -8
- rda_python_miscs/pgwget.py +41 -24
- rda_python_miscs/pgwget.usg +75 -0
- rda_python_miscs/rdacp.py +61 -6
- rda_python_miscs/rdacp.usg +73 -58
- rda_python_miscs/rdakill.py +93 -12
- rda_python_miscs/rdakill.usg +64 -34
- rda_python_miscs/rdamod.py +63 -10
- rda_python_miscs/rdamod.usg +70 -50
- rda_python_miscs/rdaown.py +55 -6
- rda_python_miscs/rdaown.usg +57 -43
- rda_python_miscs/rdaps.py +37 -5
- rda_python_miscs/rdaps.usg +47 -21
- rda_python_miscs/rdasub.py +45 -8
- rda_python_miscs/rdasub.usg +45 -8
- rda_python_miscs/rdazip.py +20 -4
- rda_python_miscs/rdazip.usg +55 -19
- rda_python_miscs/tcshqsub.py +58 -3
- rda_python_miscs/tcshqsub.usg +9 -9
- rda_python_miscs-3.0.1.dist-info/METADATA +161 -0
- rda_python_miscs-3.0.1.dist-info/RECORD +48 -0
- rda_python_miscs-3.0.1.dist-info/entry_points.txt +20 -0
- rda_python_miscs-2.0.15.dist-info/METADATA +0 -18
- rda_python_miscs-2.0.15.dist-info/RECORD +0 -46
- rda_python_miscs-2.0.15.dist-info/entry_points.txt +0 -14
- {rda_python_miscs-2.0.15.dist-info → rda_python_miscs-3.0.1.dist-info}/WHEEL +0 -0
- {rda_python_miscs-2.0.15.dist-info → rda_python_miscs-3.0.1.dist-info}/licenses/LICENSE +0 -0
- {rda_python_miscs-2.0.15.dist-info → rda_python_miscs-3.0.1.dist-info}/top_level.txt +0 -0
rda_python_miscs/bashqsub.py
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# https://github.com/NCAR/rda-utility-programs.git
|
|
8
8
|
# 2025-12-29 convert to class BashQsub
|
|
9
9
|
# Purpose: python script to submit a batch job on PBS node via bash script
|
|
10
|
-
# Github: https://github.com/NCAR/rda-
|
|
10
|
+
# Github: https://github.com/NCAR/rda-python-miscs.git
|
|
11
11
|
##################################################################################
|
|
12
12
|
import os
|
|
13
13
|
import sys
|
|
@@ -16,8 +16,14 @@ from os import path as op
|
|
|
16
16
|
from rda_python_common.pg_log import PgLOG
|
|
17
17
|
|
|
18
18
|
class BashQsub(PgLOG):
|
|
19
|
+
"""Submit a PBS batch job via a dynamically generated bash script using qsub.
|
|
20
|
+
|
|
21
|
+
Builds a bash script with PBS directives, module loads, and conda environment
|
|
22
|
+
activation, then submits it through the PBS qsub command.
|
|
23
|
+
"""
|
|
19
24
|
|
|
20
25
|
def __init__(self):
|
|
26
|
+
"""Initialize BashQsub with default PBS resource settings and options."""
|
|
21
27
|
super().__init__()
|
|
22
28
|
self.DEFMODS = {
|
|
23
29
|
'default': "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2"
|
|
@@ -42,8 +48,15 @@ class BashQsub(PgLOG):
|
|
|
42
48
|
self.gdexsub = self.BCHCMDS['PBS']
|
|
43
49
|
self.args = None
|
|
44
50
|
|
|
45
|
-
# function to
|
|
51
|
+
# function to read parameters
|
|
46
52
|
def read_parameters(self):
|
|
53
|
+
"""Parse command-line arguments and populate PBS options and customized options.
|
|
54
|
+
|
|
55
|
+
Handles single-dash qsub options (e.g. -q, -A, -l) and long custom options
|
|
56
|
+
(-cmd, -cwd, -env, -mod, -res). Validates that the qsub command is available
|
|
57
|
+
and that a -cmd value is provided. Sets default log paths and job name if not
|
|
58
|
+
specified, and changes the working directory if -cwd is given.
|
|
59
|
+
"""
|
|
47
60
|
aname = 'bashqsub'
|
|
48
61
|
pname = 'gdexqsub'
|
|
49
62
|
self.set_help_path(__file__)
|
|
@@ -88,11 +101,12 @@ class BashQsub(PgLOG):
|
|
|
88
101
|
if not self.SOPTIONS['e']: self.SOPTIONS['e'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname)
|
|
89
102
|
if 'N' not in self.SOPTIONS: self.SOPTIONS['N'] = op.basename(self.coptions['cmd'])
|
|
90
103
|
if self.coptions['cwd']:
|
|
91
|
-
if '
|
|
104
|
+
if '$' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX)
|
|
92
105
|
os.chdir(self.coptions['cwd'])
|
|
93
106
|
|
|
94
107
|
# function to start actions
|
|
95
108
|
def start_actions(self):
|
|
109
|
+
"""Resolve the command path, build the bash script, and submit it via qsub."""
|
|
96
110
|
cmd = self.valid_command(self.coptions['cmd'])
|
|
97
111
|
if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd'])
|
|
98
112
|
if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX)
|
|
@@ -105,6 +119,17 @@ class BashQsub(PgLOG):
|
|
|
105
119
|
|
|
106
120
|
# build bash script to submit a PBS batch job
|
|
107
121
|
def build_bash_script(self, cmd):
|
|
122
|
+
"""Build and return a bash script string with PBS directives for the given command.
|
|
123
|
+
|
|
124
|
+
Sets HOME, sources system and conda profile scripts and the user's .bashrc,
|
|
125
|
+
loads modules, activates the conda environment, then runs the command.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
cmd (str): The fully-resolved command (with arguments) to execute in the job.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
str: The complete bash batch script content.
|
|
132
|
+
"""
|
|
108
133
|
buf = "#!/usr/bin/bash\n\n" # qsub starting bash script
|
|
109
134
|
if 'l' in self.SOPTIONS: self.add_resources()
|
|
110
135
|
# add options to bash script for qsub
|
|
@@ -128,8 +153,13 @@ class BashQsub(PgLOG):
|
|
|
128
153
|
buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd)
|
|
129
154
|
return buf
|
|
130
155
|
|
|
131
|
-
# check and add resource options
|
|
156
|
+
# check and add resource options
|
|
132
157
|
def add_resources(self):
|
|
158
|
+
"""Parse -l option value into the RESOURCES dict and remove the raw -l entry.
|
|
159
|
+
|
|
160
|
+
Expects comma-separated name=value pairs (e.g. 'walltime=2:00:00,select=1:ncpus=4').
|
|
161
|
+
Logs an error if a token does not contain '='.
|
|
162
|
+
"""
|
|
133
163
|
for res in re.split(',', self.SOPTIONS['l']):
|
|
134
164
|
ms = re.match(r'^([^=]+)=(.+)$', res)
|
|
135
165
|
if ms:
|
|
@@ -140,6 +170,20 @@ class BashQsub(PgLOG):
|
|
|
140
170
|
|
|
141
171
|
# add module loads for modules provided
|
|
142
172
|
def add_modules(self, res, mods):
|
|
173
|
+
"""Build and return module load/unload commands for the bash script.
|
|
174
|
+
|
|
175
|
+
Loads the default module set for the given reservation (or the 'default' set).
|
|
176
|
+
Additional modules in ``mods`` are appended; path-style entries (starting with
|
|
177
|
+
'/') use 'module use' instead of 'module load'. Modules already in the default
|
|
178
|
+
set are skipped. SWAPMODS entries trigger an unload before the new load.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
res (str): Reservation name used to look up DEFMODS; falls back to 'default'.
|
|
182
|
+
mods (str): Comma-separated list of extra modules (or None).
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
str: Shell commands to load/unload modules.
|
|
186
|
+
"""
|
|
143
187
|
mbuf = "\n"
|
|
144
188
|
defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default']
|
|
145
189
|
dmods = re.split(',', defmods)
|
|
@@ -163,6 +207,16 @@ class BashQsub(PgLOG):
|
|
|
163
207
|
|
|
164
208
|
# set virtual machine libraries
|
|
165
209
|
def set_vm_libs(self, res):
|
|
210
|
+
"""Build and return conda/VM library activation commands for the bash script.
|
|
211
|
+
|
|
212
|
+
Looks up DEFLIBS for the given reservation (falls back to 'default').
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
res (str): Reservation name used to look up DEFLIBS; falls back to 'default'.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
str: Shell commands to activate virtual environment libraries, or '' if none.
|
|
219
|
+
"""
|
|
166
220
|
deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default']
|
|
167
221
|
if not deflibs: return ''
|
|
168
222
|
dlibs = re.split(',', deflibs)
|
|
@@ -171,8 +225,9 @@ class BashQsub(PgLOG):
|
|
|
171
225
|
libbuf += dlib + "\n"
|
|
172
226
|
return libbuf
|
|
173
227
|
|
|
174
|
-
# main function to
|
|
228
|
+
# main function to execute this script
|
|
175
229
|
def main():
|
|
230
|
+
"""Entry point: instantiate BashQsub, parse arguments, run, and exit."""
|
|
176
231
|
object = BashQsub()
|
|
177
232
|
object.read_parameters()
|
|
178
233
|
object.start_actions()
|
rda_python_miscs/bashqsub.usg
CHANGED
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
-o LOGPATH/gdexqsub/
|
|
11
11
|
-e LOGPATH/gdexqsub/
|
|
12
12
|
-A P43713000
|
|
13
|
-
-m
|
|
14
|
-
-q gdex
|
|
15
|
-
-l walltime=6:00:00,select=1:
|
|
13
|
+
-m n
|
|
14
|
+
-q gdex@casper-pbs
|
|
15
|
+
-l walltime=6:00:00,select=1:ncpus=1:mem=1gb
|
|
16
16
|
|
|
17
17
|
- Option -cwd, set the working directory for the Command to be executed. If
|
|
18
18
|
it is not specified, it defaults to the current directory where qsub
|
|
@@ -33,18 +33,18 @@
|
|
|
33
33
|
A bash script example:
|
|
34
34
|
#!/usr/bin/bash
|
|
35
35
|
|
|
36
|
-
#PBS -o /
|
|
37
|
-
#PBS -e /
|
|
36
|
+
#PBS -o /glade/u/home/gdexdata/dssdb/log/gdexqsub/
|
|
37
|
+
#PBS -e /glade/u/home/gdexdata/dssdb/log/gdexqsub/
|
|
38
38
|
#PBS -A P43713000
|
|
39
39
|
#PBS -q gdex@casper-pbs
|
|
40
40
|
#PBS -m n
|
|
41
41
|
#PBS -N dsrqst
|
|
42
42
|
#PBS -l walltime=1:00:00
|
|
43
43
|
#PBS -l select=1:ncpus=1:mem=1gb
|
|
44
|
-
export HOME=/
|
|
44
|
+
export HOME=/glade/u/home/zji
|
|
45
45
|
source /etc/profile.d/z00_modules.sh
|
|
46
46
|
source /glade/u/apps/opt/conda/etc/profile.d/conda.sh
|
|
47
|
-
source /
|
|
47
|
+
source /glade/u/home/zji/.bashrc
|
|
48
48
|
pwd; hostname; date
|
|
49
49
|
|
|
50
50
|
module load ncarenv
|
rda_python_miscs/gdexls.py
CHANGED
|
@@ -18,8 +18,16 @@ from os import path as op
|
|
|
18
18
|
from rda_python_common.pg_split import PgSplit
|
|
19
19
|
|
|
20
20
|
class GdexLs(PgSplit):
|
|
21
|
+
"""List local files/directories and display matching metadata from GDEXDB.
|
|
22
|
+
|
|
23
|
+
For each path, queries the GDEX database for dataset, group, or file records
|
|
24
|
+
and prints them in aligned columns: type-prefixed name, size, file count (or
|
|
25
|
+
format), and description. A leading letter on each output line indicates the
|
|
26
|
+
item type: 'D' for a dataset root, 'G' for a sub-group, 'F' for a data file.
|
|
27
|
+
"""
|
|
21
28
|
|
|
22
29
|
def __init__(self):
|
|
30
|
+
"""Initialize display constants, CLI option flags, and listing state."""
|
|
23
31
|
super().__init__()
|
|
24
32
|
# define some constants for gdexls actions
|
|
25
33
|
self.DIDX = 3 # description column index
|
|
@@ -49,6 +57,13 @@ class GdexLs(PgSplit):
|
|
|
49
57
|
|
|
50
58
|
# function to read parameters
|
|
51
59
|
def read_parameters(self):
|
|
60
|
+
"""Parse command-line arguments into GDEXLS option flags and the file/directory list.
|
|
61
|
+
|
|
62
|
+
Recognises boolean flags -d, -f, -N, -r and value options -R, -D.
|
|
63
|
+
Positional arguments are resolved to real paths and appended to LINFO['files'].
|
|
64
|
+
Exits with usage if -h/--help/? is given; errors on unknown options or
|
|
65
|
+
values without a preceding option.
|
|
66
|
+
"""
|
|
52
67
|
self.set_help_path(__file__)
|
|
53
68
|
self.PGLOG['LOGFILE'] = "gdexls.log" # set different log file
|
|
54
69
|
self.LINFO['curdir'] = self.get_real_path(os.getcwd())
|
|
@@ -61,7 +76,7 @@ class GdexLs(PgSplit):
|
|
|
61
76
|
if ms:
|
|
62
77
|
option = ms.group(1)
|
|
63
78
|
if option not in self.GDEXLS: self.pglog(arg + ": Unknown Option", self.LGEREX)
|
|
64
|
-
if 'dfNr'
|
|
79
|
+
if option in 'dfNr':
|
|
65
80
|
self.GDEXLS[option] = 1
|
|
66
81
|
option = defopt
|
|
67
82
|
continue
|
|
@@ -76,8 +91,15 @@ class GdexLs(PgSplit):
|
|
|
76
91
|
self.GDEXLS[option] = arg
|
|
77
92
|
option = defopt
|
|
78
93
|
|
|
79
|
-
#
|
|
80
|
-
def start_actions(self):
|
|
94
|
+
# function to start actions
|
|
95
|
+
def start_actions(self):
|
|
96
|
+
"""Fetch DB connection info, resolve the default file list, and drive display.
|
|
97
|
+
|
|
98
|
+
If no paths were given, lists all entries in the current directory.
|
|
99
|
+
Defaults both -d and -f flags when neither is explicitly set.
|
|
100
|
+
Prints a summary count of datasets, groups, and files at the end,
|
|
101
|
+
or exits with an error if nothing matched in the database.
|
|
102
|
+
"""
|
|
81
103
|
self.view_dbinfo()
|
|
82
104
|
if not self.LINFO['files']:
|
|
83
105
|
self.LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory
|
|
@@ -112,6 +134,15 @@ class GdexLs(PgSplit):
|
|
|
112
134
|
|
|
113
135
|
# display the top level list
|
|
114
136
|
def display_top_list(self, files):
|
|
137
|
+
"""Process and display each top-level path, expanding directories as needed.
|
|
138
|
+
|
|
139
|
+
A path ending with '/' suppresses display of the directory entry itself and
|
|
140
|
+
always recurses into it. Paths not starting with '/' are joined to curdir.
|
|
141
|
+
Flushes the cached formatted list when it exceeds CLMT entries.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
files (list[str]): Top-level paths provided on the command line (or cwd glob).
|
|
145
|
+
"""
|
|
115
146
|
for file in files:
|
|
116
147
|
if not op.exists(file):
|
|
117
148
|
sys.stderr.write(file + ": NOT exists\n")
|
|
@@ -131,6 +162,14 @@ class GdexLs(PgSplit):
|
|
|
131
162
|
|
|
132
163
|
# recursively display directory/file info
|
|
133
164
|
def display_list(self, files, level):
|
|
165
|
+
"""Recursively display metadata for each path up to the configured depth limit.
|
|
166
|
+
|
|
167
|
+
Flushes the formatted cache when it exceeds CLMT entries to keep memory bounded.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
files (list[str]): Glob-expanded paths at the current recursion level.
|
|
171
|
+
level (int): Current recursion depth (1-based); stops when >= GDEXLS['R'].
|
|
172
|
+
"""
|
|
134
173
|
for file in files:
|
|
135
174
|
isdir = 1 if op.isdir(file) else 0
|
|
136
175
|
self.display_line(file, isdir)
|
|
@@ -141,6 +180,17 @@ class GdexLs(PgSplit):
|
|
|
141
180
|
|
|
142
181
|
# find dataset/group info; display or cache file
|
|
143
182
|
def display_line(self, file, isdir):
|
|
183
|
+
"""Look up GDEX metadata for a path and pass a formatted record to display_record.
|
|
184
|
+
|
|
185
|
+
Resolves the dataset ID and home path on first call, then reuses cached values
|
|
186
|
+
for subsequent paths under the same dataset. Skips paths with no matching
|
|
187
|
+
dataset ID. Dispatches to the dataset, group, or file branch based on whether
|
|
188
|
+
the path is the dataset root, a subdirectory, or a regular file.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
file (str): Absolute path to the file or directory.
|
|
192
|
+
isdir (int): 1 if the path is a directory, 0 otherwise.
|
|
193
|
+
"""
|
|
144
194
|
getwfile = 1
|
|
145
195
|
if self.LINFO['dsid'] and self.LINFO['dhome']:
|
|
146
196
|
ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file)
|
|
@@ -151,7 +201,7 @@ class GdexLs(PgSplit):
|
|
|
151
201
|
self.LINFO['dsid'] = self.find_dataset_id(file)
|
|
152
202
|
if self.LINFO['dsid'] is None: return # skip for missing dsid
|
|
153
203
|
pgrec = self.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(self.LINFO['dsid']), self.LGEREX)
|
|
154
|
-
if not pgrec: return
|
|
204
|
+
if not pgrec: return
|
|
155
205
|
self.LINFO['dhome'] = "{}/{}".format(self.PGLOG['DSDHOME'], self.LINFO['dsid'])
|
|
156
206
|
if self.LINFO['dhome'] == file:
|
|
157
207
|
file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1)
|
|
@@ -183,8 +233,17 @@ class GdexLs(PgSplit):
|
|
|
183
233
|
self.display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note])
|
|
184
234
|
self.LINFO['fcnt'] += 1
|
|
185
235
|
|
|
186
|
-
# display one
|
|
236
|
+
# display one record
|
|
187
237
|
def display_record(self, disp):
|
|
238
|
+
"""Format the size field and either print immediately or cache for aligned output.
|
|
239
|
+
|
|
240
|
+
In unformatted mode (-N) the columns are joined by the delimiter and printed
|
|
241
|
+
directly. Otherwise the record is appended to pgrecs and the per-column
|
|
242
|
+
maximum widths are updated for later aligned rendering by display_format_list.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
disp (list[str]): Four-element list: [name, size, count/format, description].
|
|
246
|
+
"""
|
|
188
247
|
disp[1] = self.get_float_string(disp[1])
|
|
189
248
|
if self.GDEXLS['N']:
|
|
190
249
|
print(self.GDEXLS['D'].join(disp))
|
|
@@ -197,6 +256,12 @@ class GdexLs(PgSplit):
|
|
|
197
256
|
|
|
198
257
|
# display cached list with format
|
|
199
258
|
def display_format_list(self):
|
|
259
|
+
"""Flush the cached record list with column-aligned formatting and reset the cache.
|
|
260
|
+
|
|
261
|
+
Applies left or right alignment to each of the first DIDX columns based on
|
|
262
|
+
ALIGNS, padding to the maximum observed width, then joins with the delimiter.
|
|
263
|
+
Resets pcnt to 0 after printing (pgrecs entries are left but ignored).
|
|
264
|
+
"""
|
|
200
265
|
for j in range(self.LINFO['pcnt']):
|
|
201
266
|
disp = self.LINFO['pgrecs'][j]
|
|
202
267
|
for i in range(self.DIDX):
|
|
@@ -207,9 +272,20 @@ class GdexLs(PgSplit):
|
|
|
207
272
|
print(self.GDEXLS['D'].join(disp))
|
|
208
273
|
self.LINFO['pcnt'] = 0
|
|
209
274
|
|
|
210
|
-
#
|
|
275
|
+
# convert size to floating point value with unit
|
|
211
276
|
@staticmethod
|
|
212
277
|
def get_float_string(val):
|
|
278
|
+
"""Convert a numeric byte count to a human-readable string with a unit suffix.
|
|
279
|
+
|
|
280
|
+
Divides by 1000 repeatedly until the value is <= 1000 or the largest unit
|
|
281
|
+
(Petabytes) is reached. Values >= 1K are formatted to two decimal places.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
val (int|float): Size in bytes.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
str: Formatted string such as '1.50M' or '512B'.
|
|
288
|
+
"""
|
|
213
289
|
units = ['B', 'K', 'M', 'G', 'T', 'P']
|
|
214
290
|
idx = 0
|
|
215
291
|
while val > 1000 and idx < 5:
|
|
@@ -220,17 +296,32 @@ class GdexLs(PgSplit):
|
|
|
220
296
|
else:
|
|
221
297
|
return "{}{}".format(val, units[idx])
|
|
222
298
|
|
|
223
|
-
#
|
|
299
|
+
# normalize /gpfs paths to /glade equivalents and resolve symlinks
|
|
224
300
|
@staticmethod
|
|
225
301
|
def get_real_path(path):
|
|
302
|
+
"""Translate legacy /gpfs mount-point prefixes to their /glade equivalents.
|
|
303
|
+
|
|
304
|
+
Handles two mappings:
|
|
305
|
+
- /gpfs/u/... → /glade/...
|
|
306
|
+
- /gpfs/csfs1/... → /glade/campaign/...
|
|
307
|
+
|
|
308
|
+
Then calls os.path.realpath to resolve any symlinks.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
path (str): Filesystem path, possibly using a /gpfs prefix.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
str: Canonicalized absolute path under the /glade hierarchy.
|
|
315
|
+
"""
|
|
226
316
|
if re.match(r'^/gpfs/u', path):
|
|
227
317
|
path = re.sub(r'^/gpfs', '/glade', path, 1)
|
|
228
318
|
elif re.match(r'^/gpfs/csfs1/', path):
|
|
229
319
|
path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1)
|
|
230
320
|
return op.realpath(path)
|
|
231
321
|
|
|
232
|
-
# main function to
|
|
322
|
+
# main function to execute this script
|
|
233
323
|
def main():
|
|
324
|
+
"""Entry point: instantiate GdexLs, parse arguments, run, and exit."""
|
|
234
325
|
object = GdexLs()
|
|
235
326
|
object.read_parameters()
|
|
236
327
|
object.start_actions()
|
rda_python_miscs/gdexls.usg
CHANGED
|
@@ -1,60 +1,74 @@
|
|
|
1
1
|
|
|
2
|
-
List
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
2
|
+
List local files and directories with matching metadata from the GDEX database.
|
|
3
|
+
Each output line has four columns:
|
|
4
|
+
|
|
5
|
+
For a dataset root (D) or group (G):
|
|
6
|
+
Name | Total Data Volume | File Count | Description
|
|
7
|
+
|
|
8
|
+
For a data file (F):
|
|
9
|
+
Name | File Size | Data Format | Note
|
|
10
|
+
|
|
11
|
+
A leading letter on each line indicates the item type:
|
|
12
|
+
D - dataset root directory
|
|
13
|
+
G - group or sub-group directory within a dataset
|
|
14
|
+
F - individual data file
|
|
15
|
+
|
|
16
|
+
Output is column-aligned by default using double spaces as the delimiter.
|
|
17
|
+
Use -N to disable formatting; the delimiter then defaults to '|'.
|
|
18
|
+
Nothing is displayed if no matching GDEX metadata is found for the given paths.
|
|
19
|
+
|
|
17
20
|
Usage: gdexls [-d] [-f] [-N] [-h] [-r] [-D DelimitSymbols] [-R RecursiveLevel] [Directory/File List]
|
|
18
21
|
|
|
19
|
-
- Option -d, list directory information only.
|
|
20
|
-
|
|
22
|
+
- Option -d, list dataset/group (directory) information only.
|
|
23
|
+
Both directories and files are listed by default; this option
|
|
24
|
+
suppresses file output;
|
|
21
25
|
|
|
22
|
-
- Option -f, list file information only.
|
|
23
|
-
|
|
26
|
+
- Option -f, list file information only.
|
|
27
|
+
Both directories and files are listed by default; this option
|
|
28
|
+
suppresses directory output;
|
|
24
29
|
|
|
25
|
-
- Option -N,
|
|
30
|
+
- Option -N, display output without column alignment;
|
|
26
31
|
|
|
27
32
|
- Option -h, display this help document;
|
|
28
33
|
|
|
29
|
-
- Option -r, list directories and files recursively;
|
|
34
|
+
- Option -r, list directories and files recursively (no depth limit);
|
|
35
|
+
|
|
36
|
+
- Option -R RecursiveLevel, list recursively up to the specified depth.
|
|
37
|
+
-R 1 lists only the immediate contents of each given directory;
|
|
38
|
+
|
|
39
|
+
- Option -D DelimitSymbols, specify the column delimiter string.
|
|
40
|
+
Defaults to " " (two spaces) for formatted output and '|' for
|
|
41
|
+
unformatted (-N) output. Quote the string if it contains shell
|
|
42
|
+
metacharacters, e.g. -D '<:>';
|
|
43
|
+
|
|
44
|
+
- Directory/File List is optional. Without it, all entries in the
|
|
45
|
+
current directory are listed. Shell wildcards are supported.
|
|
46
|
+
|
|
47
|
+
This utility can be run from any directory. It searches the GDEX database
|
|
48
|
+
using the resolved absolute path of each argument, so both absolute and
|
|
49
|
+
relative paths are accepted.
|
|
50
|
+
|
|
51
|
+
Examples for dataset d277006:
|
|
52
|
+
|
|
53
|
+
1. Change into the dataset home directory and run gdexls:
|
|
30
54
|
|
|
31
|
-
|
|
32
|
-
|
|
55
|
+
cd /PathTo/d277006
|
|
56
|
+
gdexls
|
|
33
57
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
Make sure quote the symbols if any character in the symbols has Unix
|
|
37
|
-
meaning, for example -D '<:>';
|
|
58
|
+
Add -r to recurse into sub-directories, or cd into a sub-directory
|
|
59
|
+
first to list only its contents.
|
|
38
60
|
|
|
39
|
-
|
|
40
|
-
and files in the current directory are listed. Unix command line
|
|
41
|
-
wildcards are supported.
|
|
61
|
+
2. Pass an absolute path directly:
|
|
42
62
|
|
|
43
|
-
|
|
44
|
-
|
|
63
|
+
gdexls /PathTo/d277006/ # list contents of the dataset directory
|
|
64
|
+
gdexls /PathTo/d277006/* # same effect via shell glob expansion
|
|
45
65
|
|
|
46
|
-
|
|
66
|
+
Without a trailing '/' or wildcard, the dataset entry itself is listed
|
|
67
|
+
unless -r or -R is given:
|
|
47
68
|
|
|
48
|
-
|
|
49
|
-
execute 'gdexls'; add recursive option '-r' to check directories and files
|
|
50
|
-
further into the sub-directories, or change directory into a sub-directory
|
|
51
|
-
to check files inside of it.
|
|
69
|
+
gdexls /PathTo/d277006 # shows the D-line for the dataset root
|
|
52
70
|
|
|
53
|
-
|
|
54
|
-
'gdexls /PathTo/d277006/*'; without the ending by '/' or an appended
|
|
55
|
-
wildcard symbol '*' information of the dataset itself is check unless
|
|
56
|
-
the recursive option '-r' or '-R RecursiveLevel' is present
|
|
71
|
+
3. Use a relative path from a neighbouring directory:
|
|
57
72
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
as 'gdexls ../d277006/' or as 'gdexls ../d277006/*'
|
|
73
|
+
gdexls ../d277006/
|
|
74
|
+
gdexls ../d277006/*
|
rda_python_miscs/pg_rst.py
CHANGED
|
@@ -1237,6 +1237,11 @@ class PgRST(PgFile, PgUtil):
|
|
|
1237
1237
|
|
|
1238
1238
|
def main():
|
|
1239
1239
|
"""Entry point for command-line usage of pg_rst.py."""
|
|
1240
|
+
import sys
|
|
1241
|
+
if len(sys.argv) == 1 or any(a in sys.argv[1:] for a in ('-h', '--help', '-?')):
|
|
1242
|
+
pg = PgRST()
|
|
1243
|
+
pg.show_usage("pg_rst")
|
|
1244
|
+
|
|
1240
1245
|
parser = argparse.ArgumentParser(
|
|
1241
1246
|
description=(
|
|
1242
1247
|
"Convert a .usg help document to reStructuredText (.rst) using RST templates. "
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
Convert a text-based program usage document (.usg file) into reStructuredText
|
|
2
|
+
(.rst) files using RST template files, for publication to readthedocs.io via
|
|
3
|
+
a gdex-docs-* GitHub repository.
|
|
4
|
+
|
|
5
|
+
OPTS and ALIAS are loaded from rda_python_<docname>/<docname>.py (or from
|
|
6
|
+
--pyfile if given): the module is searched first for a class that carries
|
|
7
|
+
both as class attributes, then for module-level OPTS/ALIAS variables.
|
|
8
|
+
|
|
9
|
+
Usage: pgrst [docname] [-u FILE] [-p FILE] [-d DIR] [-h]
|
|
10
|
+
|
|
11
|
+
- docname
|
|
12
|
+
Short document name, e.g. 'dsarch' or 'dsupdt'. Required unless
|
|
13
|
+
--usgfile is given, in which case the name is derived from the .usg
|
|
14
|
+
filename by removing the extension.
|
|
15
|
+
|
|
16
|
+
- Option -u or --usgfile FILE
|
|
17
|
+
Path to the .usg source document. When given, docname is derived
|
|
18
|
+
from the filename by removing the .usg extension, and the source
|
|
19
|
+
directory is set to the directory containing the file.
|
|
20
|
+
|
|
21
|
+
- Option -p or --pyfile FILE
|
|
22
|
+
Path to a Python file that defines OPTS (and optionally ALIAS) either
|
|
23
|
+
at module level or as class attributes. When given, the default
|
|
24
|
+
module-import convention (rda_python_<docname>/<docname>.py) is
|
|
25
|
+
bypassed.
|
|
26
|
+
|
|
27
|
+
- Option -d or --docdir DIR
|
|
28
|
+
Root directory under which the per-document RST output is written.
|
|
29
|
+
Defaults to the current working directory.
|
|
30
|
+
|
|
31
|
+
- Option -h, display this help document.
|
|
32
|
+
|
|
33
|
+
The .usg source document must be structured with a summary paragraph at the
|
|
34
|
+
top, followed by option descriptions and an examples section. OPTS defines
|
|
35
|
+
the option types (mode, single-value, multi-value, or action) used to
|
|
36
|
+
categorise each option in the RST output.
|
|
37
|
+
|
|
38
|
+
Output files are written to DOCDIR using RST template files bundled with
|
|
39
|
+
this package under rda_python_miscs/rst_templates/.
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
|
|
43
|
+
1. Convert dsarch.usg to RST using the default module-import convention.
|
|
44
|
+
OPTS and ALIAS are loaded from rda_python_dsarch/dsarch.py.
|
|
45
|
+
RST output is written under the current directory:
|
|
46
|
+
|
|
47
|
+
pgrst dsarch
|
|
48
|
+
|
|
49
|
+
2. Convert dsarch.usg from a specific path, writing RST to /tmp/docs/:
|
|
50
|
+
|
|
51
|
+
pgrst dsarch -u /path/to/dsarch.usg -d /tmp/docs/
|
|
52
|
+
|
|
53
|
+
3. Convert using a custom Python file for OPTS/ALIAS instead of the
|
|
54
|
+
installed package module:
|
|
55
|
+
|
|
56
|
+
pgrst dsarch -p /path/to/dsarch.py
|
|
57
|
+
|
|
58
|
+
4. Derive the document name from the .usg filename (no positional arg):
|
|
59
|
+
|
|
60
|
+
pgrst -u /path/to/dsupdt.usg -d /tmp/docs/
|
rda_python_miscs/pg_wget.py
CHANGED
|
@@ -34,9 +34,10 @@ OPTIONS = {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
#
|
|
37
|
-
# main function to
|
|
37
|
+
# main function to execute this script
|
|
38
38
|
#
|
|
39
39
|
def main():
|
|
40
|
+
"""Parse command-line options, validate inputs, and run the wildcard download."""
|
|
40
41
|
|
|
41
42
|
option = None
|
|
42
43
|
JCS = ['cat', 'tar', 'first', 'last']
|
|
@@ -89,9 +90,21 @@ def main():
|
|
|
89
90
|
sys.exit(0)
|
|
90
91
|
|
|
91
92
|
#
|
|
92
|
-
# download one or multiple remote files via wget;
|
|
93
|
+
# download one or multiple remote files via wget; join files to a single one if multiple
|
|
93
94
|
#
|
|
94
95
|
def download_wildcard_files():
|
|
96
|
+
"""Download remote files matching the wildcard pattern and combine into one output file.
|
|
97
|
+
|
|
98
|
+
Skips the download if the local output file already exists and -CN is not set.
|
|
99
|
+
Runs wget only when -CN is set or fewer than FC files are already present locally.
|
|
100
|
+
Compares timestamps and file metadata to decide whether a rebuild is needed.
|
|
101
|
+
Combines downloaded parts using the strategy selected by -JC (cat/tar/first/last).
|
|
102
|
+
Removes intermediate part-files when -CR is set.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
int: 1 if the output file was built or rebuilt, 0 if all parts were already
|
|
106
|
+
up-to-date, or None (implicitly) when a warning/error caused early return.
|
|
107
|
+
"""
|
|
95
108
|
|
|
96
109
|
deleted = 0
|
|
97
110
|
if OPTIONS['FN']:
|
|
@@ -102,7 +115,7 @@ def download_wildcard_files():
|
|
|
102
115
|
|
|
103
116
|
dinfo = PgFile.check_local_file(dfile, 1)
|
|
104
117
|
if dinfo and not OPTIONS['CN']:
|
|
105
|
-
return PgLOG.pglog("{}: file
|
|
118
|
+
return PgLOG.pglog("{}: file downloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN)
|
|
106
119
|
|
|
107
120
|
build = 0 if dinfo else 1
|
|
108
121
|
wfile = OPTIONS['RN'] + "*"
|
|
@@ -127,11 +140,11 @@ def download_wildcard_files():
|
|
|
127
140
|
|
|
128
141
|
if ncnt == 0:
|
|
129
142
|
if deleted:
|
|
130
|
-
return PgLOG.pglog("{}: File
|
|
143
|
+
return PgLOG.pglog("{}: File downloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
131
144
|
else:
|
|
132
|
-
return PgLOG.pglog("{}: NO file to
|
|
145
|
+
return PgLOG.pglog("{}: NO file to download on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
133
146
|
elif ncnt < OPTIONS['MC']:
|
|
134
|
-
return PgLOG.pglog("{}: NOT ready, only {} of {} files
|
|
147
|
+
return PgLOG.pglog("{}: NOT ready, only {} of {} files downloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN)
|
|
135
148
|
|
|
136
149
|
rfiles = sorted(nlist)
|
|
137
150
|
size = skip = 0
|
|
@@ -145,10 +158,10 @@ def download_wildcard_files():
|
|
|
145
158
|
elif rfile not in dlist:
|
|
146
159
|
build = 1
|
|
147
160
|
elif PgFile.compare_file_info(dlist[rfile], rinfo) > 0:
|
|
148
|
-
PgLOG.pglog("{}: Newer file
|
|
161
|
+
PgLOG.pglog("{}: Newer file downloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
149
162
|
build = 1
|
|
150
163
|
else:
|
|
151
|
-
PgLOG.pglog("{}: No newer file found on ".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
164
|
+
PgLOG.pglog("{}: No newer file found on {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
|
|
152
165
|
|
|
153
166
|
if skip == ncnt: return 0
|
|
154
167
|
|