rda-python-miscs 2.0.15__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@
7
7
  # https://github.com/NCAR/rda-utility-programs.git
8
8
  # 2025-12-29 convert to class BashQsub
9
9
  # Purpose: python script to submit a batch job on PBS node via bash script
10
- # Github: https://github.com/NCAR/rda-pythn-miscs.git
10
+ # Github: https://github.com/NCAR/rda-python-miscs.git
11
11
  ##################################################################################
12
12
  import os
13
13
  import sys
@@ -16,8 +16,14 @@ from os import path as op
16
16
  from rda_python_common.pg_log import PgLOG
17
17
 
18
18
  class BashQsub(PgLOG):
19
+ """Submit a PBS batch job via a dynamically generated bash script using qsub.
20
+
21
+ Builds a bash script with PBS directives, module loads, and conda environment
22
+ activation, then submits it through the PBS qsub command.
23
+ """
19
24
 
20
25
  def __init__(self):
26
+ """Initialize BashQsub with default PBS resource settings and options."""
21
27
  super().__init__()
22
28
  self.DEFMODS = {
23
29
  'default': "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2"
@@ -42,8 +48,15 @@ class BashQsub(PgLOG):
42
48
  self.gdexsub = self.BCHCMDS['PBS']
43
49
  self.args = None
44
50
 
45
- # function to readparameters
51
+ # function to read parameters
46
52
  def read_parameters(self):
53
+ """Parse command-line arguments and populate PBS options and customized options.
54
+
55
+ Handles single-dash qsub options (e.g. -q, -A, -l) and long custom options
56
+ (-cmd, -cwd, -env, -mod, -res). Validates that the qsub command is available
57
+ and that a -cmd value is provided. Sets default log paths and job name if not
58
+ specified, and changes the working directory if -cwd is given.
59
+ """
47
60
  aname = 'bashqsub'
48
61
  pname = 'gdexqsub'
49
62
  self.set_help_path(__file__)
@@ -88,11 +101,12 @@ class BashQsub(PgLOG):
88
101
  if not self.SOPTIONS['e']: self.SOPTIONS['e'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname)
89
102
  if 'N' not in self.SOPTIONS: self.SOPTIONS['N'] = op.basename(self.coptions['cmd'])
90
103
  if self.coptions['cwd']:
91
- if 's' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX)
104
+ if '$' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX)
92
105
  os.chdir(self.coptions['cwd'])
93
106
 
94
107
  # function to start actions
95
108
  def start_actions(self):
109
+ """Resolve the command path, build the bash script, and submit it via qsub."""
96
110
  cmd = self.valid_command(self.coptions['cmd'])
97
111
  if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd'])
98
112
  if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX)
@@ -105,6 +119,17 @@ class BashQsub(PgLOG):
105
119
 
106
120
  # build bash script to submit a PBS batch job
107
121
  def build_bash_script(self, cmd):
122
+ """Build and return a bash script string with PBS directives for the given command.
123
+
124
+ Sets HOME, sources system and conda profile scripts and the user's .bashrc,
125
+ loads modules, activates the conda environment, then runs the command.
126
+
127
+ Args:
128
+ cmd (str): The fully-resolved command (with arguments) to execute in the job.
129
+
130
+ Returns:
131
+ str: The complete bash batch script content.
132
+ """
108
133
  buf = "#!/usr/bin/bash\n\n" # qsub starting bash script
109
134
  if 'l' in self.SOPTIONS: self.add_resources()
110
135
  # add options to bash script for qsub
@@ -128,8 +153,13 @@ class BashQsub(PgLOG):
128
153
  buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd)
129
154
  return buf
130
155
 
131
- # check and add resource options
156
+ # check and add resource options
132
157
  def add_resources(self):
158
+ """Parse -l option value into the RESOURCES dict and remove the raw -l entry.
159
+
160
+ Expects comma-separated name=value pairs (e.g. 'walltime=2:00:00,select=1:ncpus=4').
161
+ Logs an error if a token does not contain '='.
162
+ """
133
163
  for res in re.split(',', self.SOPTIONS['l']):
134
164
  ms = re.match(r'^([^=]+)=(.+)$', res)
135
165
  if ms:
@@ -140,6 +170,20 @@ class BashQsub(PgLOG):
140
170
 
141
171
  # add module loads for modules provided
142
172
  def add_modules(self, res, mods):
173
+ """Build and return module load/unload commands for the bash script.
174
+
175
+ Loads the default module set for the given reservation (or the 'default' set).
176
+ Additional modules in ``mods`` are appended; path-style entries (starting with
177
+ '/') use 'module use' instead of 'module load'. Modules already in the default
178
+ set are skipped. SWAPMODS entries trigger an unload before the new load.
179
+
180
+ Args:
181
+ res (str): Reservation name used to look up DEFMODS; falls back to 'default'.
182
+ mods (str): Comma-separated list of extra modules (or None).
183
+
184
+ Returns:
185
+ str: Shell commands to load/unload modules.
186
+ """
143
187
  mbuf = "\n"
144
188
  defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default']
145
189
  dmods = re.split(',', defmods)
@@ -163,6 +207,16 @@ class BashQsub(PgLOG):
163
207
 
164
208
  # set virtual machine libraries
165
209
  def set_vm_libs(self, res):
210
+ """Build and return conda/VM library activation commands for the bash script.
211
+
212
+ Looks up DEFLIBS for the given reservation (falls back to 'default').
213
+
214
+ Args:
215
+ res (str): Reservation name used to look up DEFLIBS; falls back to 'default'.
216
+
217
+ Returns:
218
+ str: Shell commands to activate virtual environment libraries, or '' if none.
219
+ """
166
220
  deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default']
167
221
  if not deflibs: return ''
168
222
  dlibs = re.split(',', deflibs)
@@ -171,8 +225,9 @@ class BashQsub(PgLOG):
171
225
  libbuf += dlib + "\n"
172
226
  return libbuf
173
227
 
174
- # main function to excecute this script
228
+ # main function to execute this script
175
229
  def main():
230
+ """Entry point: instantiate BashQsub, parse arguments, run, and exit."""
176
231
  object = BashQsub()
177
232
  object.read_parameters()
178
233
  object.start_actions()
@@ -10,9 +10,9 @@
10
10
  -o LOGPATH/gdexqsub/
11
11
  -e LOGPATH/gdexqsub/
12
12
  -A P43713000
13
- -m a
14
- -q gdex
15
- -l walltime=6:00:00,select=1:node=1:mem=1gb
13
+ -m n
14
+ -q gdex@casper-pbs
15
+ -l walltime=6:00:00,select=1:ncpus=1:mem=1gb
16
16
 
17
17
  - Option -cwd, set the working directory for the Command to be executed. If
18
18
  it is not specified, it defaults to the current directory where qsub
@@ -33,18 +33,18 @@
33
33
  A bash script example:
34
34
  #!/usr/bin/bash
35
35
 
36
- #PBS -o /gpfs/u/home/gdexdata/dssdb/log/gdexqsub/
37
- #PBS -e /gpfs/u/home/gdexdata/dssdb/log/gdexqsub/
36
+ #PBS -o /glade/u/home/gdexdata/dssdb/log/gdexqsub/
37
+ #PBS -e /glade/u/home/gdexdata/dssdb/log/gdexqsub/
38
38
  #PBS -A P43713000
39
39
  #PBS -q gdex@casper-pbs
40
40
  #PBS -m n
41
41
  #PBS -N dsrqst
42
42
  #PBS -l walltime=1:00:00
43
43
  #PBS -l select=1:ncpus=1:mem=1gb
44
- export HOME=/gpfs/u/home/zji
44
+ export HOME=/glade/u/home/zji
45
45
  source /etc/profile.d/z00_modules.sh
46
46
  source /glade/u/apps/opt/conda/etc/profile.d/conda.sh
47
- source /gpfs/u/home/zji/.bashrc
47
+ source /glade/u/home/zji/.bashrc
48
48
  pwd; hostname; date
49
49
 
50
50
  module load ncarenv
@@ -18,8 +18,16 @@ from os import path as op
18
18
  from rda_python_common.pg_split import PgSplit
19
19
 
20
20
  class GdexLs(PgSplit):
21
+ """List local files/directories and display matching metadata from GDEXDB.
22
+
23
+ For each path, queries the GDEX database for dataset, group, or file records
24
+ and prints them in aligned columns: type-prefixed name, size, file count (or
25
+ format), and description. A leading letter on each output line indicates the
26
+ item type: 'D' for a dataset root, 'G' for a sub-group, 'F' for a data file.
27
+ """
21
28
 
22
29
  def __init__(self):
30
+ """Initialize display constants, CLI option flags, and listing state."""
23
31
  super().__init__()
24
32
  # define some constants for gdexls actions
25
33
  self.DIDX = 3 # description column index
@@ -49,6 +57,13 @@ class GdexLs(PgSplit):
49
57
 
50
58
  # function to read parameters
51
59
  def read_parameters(self):
60
+ """Parse command-line arguments into GDEXLS option flags and the file/directory list.
61
+
62
+ Recognises boolean flags -d, -f, -N, -r and value options -R, -D.
63
+ Positional arguments are resolved to real paths and appended to LINFO['files'].
64
+ Exits with usage if -h/--help/? is given; errors on unknown options or
65
+ values without a preceding option.
66
+ """
52
67
  self.set_help_path(__file__)
53
68
  self.PGLOG['LOGFILE'] = "gdexls.log" # set different log file
54
69
  self.LINFO['curdir'] = self.get_real_path(os.getcwd())
@@ -61,7 +76,7 @@ class GdexLs(PgSplit):
61
76
  if ms:
62
77
  option = ms.group(1)
63
78
  if option not in self.GDEXLS: self.pglog(arg + ": Unknown Option", self.LGEREX)
64
- if 'dfNr'.find(option) > -1:
79
+ if option in 'dfNr':
65
80
  self.GDEXLS[option] = 1
66
81
  option = defopt
67
82
  continue
@@ -76,8 +91,15 @@ class GdexLs(PgSplit):
76
91
  self.GDEXLS[option] = arg
77
92
  option = defopt
78
93
 
79
- # functio to start actions
80
- def start_actions(self):
94
+ # function to start actions
95
+ def start_actions(self):
96
+ """Fetch DB connection info, resolve the default file list, and drive display.
97
+
98
+ If no paths were given, lists all entries in the current directory.
99
+ Defaults both -d and -f flags when neither is explicitly set.
100
+ Prints a summary count of datasets, groups, and files at the end,
101
+ or exits with an error if nothing matched in the database.
102
+ """
81
103
  self.view_dbinfo()
82
104
  if not self.LINFO['files']:
83
105
  self.LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory
@@ -112,6 +134,15 @@ class GdexLs(PgSplit):
112
134
 
113
135
  # display the top level list
114
136
  def display_top_list(self, files):
137
+ """Process and display each top-level path, expanding directories as needed.
138
+
139
+ A path ending with '/' suppresses display of the directory entry itself and
140
+ always recurses into it. Paths not starting with '/' are joined to curdir.
141
+ Flushes the cached formatted list when it exceeds CLMT entries.
142
+
143
+ Args:
144
+ files (list[str]): Top-level paths provided on the command line (or cwd glob).
145
+ """
115
146
  for file in files:
116
147
  if not op.exists(file):
117
148
  sys.stderr.write(file + ": NOT exists\n")
@@ -131,6 +162,14 @@ class GdexLs(PgSplit):
131
162
 
132
163
  # recursively display directory/file info
133
164
  def display_list(self, files, level):
165
+ """Recursively display metadata for each path up to the configured depth limit.
166
+
167
+ Flushes the formatted cache when it exceeds CLMT entries to keep memory bounded.
168
+
169
+ Args:
170
+ files (list[str]): Glob-expanded paths at the current recursion level.
171
+ level (int): Current recursion depth (1-based); stops when >= GDEXLS['R'].
172
+ """
134
173
  for file in files:
135
174
  isdir = 1 if op.isdir(file) else 0
136
175
  self.display_line(file, isdir)
@@ -141,6 +180,17 @@ class GdexLs(PgSplit):
141
180
 
142
181
  # find dataset/group info; display or cache file
143
182
  def display_line(self, file, isdir):
183
+ """Look up GDEX metadata for a path and pass a formatted record to display_record.
184
+
185
+ Resolves the dataset ID and home path on first call, then reuses cached values
186
+ for subsequent paths under the same dataset. Skips paths with no matching
187
+ dataset ID. Dispatches to the dataset, group, or file branch based on whether
188
+ the path is the dataset root, a subdirectory, or a regular file.
189
+
190
+ Args:
191
+ file (str): Absolute path to the file or directory.
192
+ isdir (int): 1 if the path is a directory, 0 otherwise.
193
+ """
144
194
  getwfile = 1
145
195
  if self.LINFO['dsid'] and self.LINFO['dhome']:
146
196
  ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file)
@@ -151,7 +201,7 @@ class GdexLs(PgSplit):
151
201
  self.LINFO['dsid'] = self.find_dataset_id(file)
152
202
  if self.LINFO['dsid'] is None: return # skip for missing dsid
153
203
  pgrec = self.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(self.LINFO['dsid']), self.LGEREX)
154
- if not pgrec: return None
204
+ if not pgrec: return
155
205
  self.LINFO['dhome'] = "{}/{}".format(self.PGLOG['DSDHOME'], self.LINFO['dsid'])
156
206
  if self.LINFO['dhome'] == file:
157
207
  file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1)
@@ -183,8 +233,17 @@ class GdexLs(PgSplit):
183
233
  self.display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note])
184
234
  self.LINFO['fcnt'] += 1
185
235
 
186
- # display one file info
236
+ # display one record
187
237
  def display_record(self, disp):
238
+ """Format the size field and either print immediately or cache for aligned output.
239
+
240
+ In unformatted mode (-N) the columns are joined by the delimiter and printed
241
+ directly. Otherwise the record is appended to pgrecs and the per-column
242
+ maximum widths are updated for later aligned rendering by display_format_list.
243
+
244
+ Args:
245
+ disp (list[str]): Four-element list: [name, size, count/format, description].
246
+ """
188
247
  disp[1] = self.get_float_string(disp[1])
189
248
  if self.GDEXLS['N']:
190
249
  print(self.GDEXLS['D'].join(disp))
@@ -197,6 +256,12 @@ class GdexLs(PgSplit):
197
256
 
198
257
  # display cached list with format
199
258
  def display_format_list(self):
259
+ """Flush the cached record list with column-aligned formatting and reset the cache.
260
+
261
+ Applies left or right alignment to each of the first DIDX columns based on
262
+ ALIGNS, padding to the maximum observed width, then joins with the delimiter.
263
+ Resets pcnt to 0 after printing (pgrecs entries are left but ignored).
264
+ """
200
265
  for j in range(self.LINFO['pcnt']):
201
266
  disp = self.LINFO['pgrecs'][j]
202
267
  for i in range(self.DIDX):
@@ -207,9 +272,20 @@ class GdexLs(PgSplit):
207
272
  print(self.GDEXLS['D'].join(disp))
208
273
  self.LINFO['pcnt'] = 0
209
274
 
210
- # change size to floating point value with unit
275
+ # convert size to floating point value with unit
211
276
  @staticmethod
212
277
  def get_float_string(val):
278
+ """Convert a numeric byte count to a human-readable string with a unit suffix.
279
+
280
+ Divides by 1000 repeatedly until the value is <= 1000 or the largest unit
281
+ (Petabytes) is reached. Values >= 1K are formatted to two decimal places.
282
+
283
+ Args:
284
+ val (int|float): Size in bytes.
285
+
286
+ Returns:
287
+ str: Formatted string such as '1.50M' or '512B'.
288
+ """
213
289
  units = ['B', 'K', 'M', 'G', 'T', 'P']
214
290
  idx = 0
215
291
  while val > 1000 and idx < 5:
@@ -220,17 +296,32 @@ class GdexLs(PgSplit):
220
296
  else:
221
297
  return "{}{}".format(val, units[idx])
222
298
 
223
- # replace /gpfs to the path /glade
299
+ # normalize /gpfs paths to /glade equivalents and resolve symlinks
224
300
  @staticmethod
225
301
  def get_real_path(path):
302
+ """Translate legacy /gpfs mount-point prefixes to their /glade equivalents.
303
+
304
+ Handles two mappings:
305
+ - /gpfs/u/... → /glade/...
306
+ - /gpfs/csfs1/... → /glade/campaign/...
307
+
308
+ Then calls os.path.realpath to resolve any symlinks.
309
+
310
+ Args:
311
+ path (str): Filesystem path, possibly using a /gpfs prefix.
312
+
313
+ Returns:
314
+ str: Canonicalized absolute path under the /glade hierarchy.
315
+ """
226
316
  if re.match(r'^/gpfs/u', path):
227
317
  path = re.sub(r'^/gpfs', '/glade', path, 1)
228
318
  elif re.match(r'^/gpfs/csfs1/', path):
229
319
  path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1)
230
320
  return op.realpath(path)
231
321
 
232
- # main function to excecute this script
322
+ # main function to execute this script
233
323
  def main():
324
+ """Entry point: instantiate GdexLs, parse arguments, run, and exit."""
234
325
  object = GdexLs()
235
326
  object.read_parameters()
236
327
  object.start_actions()
@@ -1,60 +1,74 @@
1
1
 
2
- List directory and file information of the current or specified directories
3
- with metadata information if matched. Four columns are listed, they are Directory
4
- Name, Data Volume, File Count, and Brief Description if the listed item is a
5
- directory, and they are File Name, Data Size, Data Format, and Brief Description
6
- if the listed item is a file.
7
-
8
- A leading letter is displayed on each line to indicate what type item is listed;
9
- including 'D' for a whole dataset, 'G' for a group or subgroup in a dataset,
10
- and 'F' for a data file.
11
-
12
- The output of directory/file list is formatted as default with double spaces
13
- as delimiter and each column lined up vertically at least for the files under each
14
- directory. Provide Option -N to display list without format. A delimiter symbol '|'
15
- is defaulted if Option -N is present.
16
-
2
+ List local files and directories with matching metadata from the GDEX database.
3
+ Each output line has four columns:
4
+
5
+ For a dataset root (D) or group (G):
6
+ Name | Total Data Volume | File Count | Description
7
+
8
+ For a data file (F):
9
+ Name | File Size | Data Format | Note
10
+
11
+ A leading letter on each line indicates the item type:
12
+ D - dataset root directory
13
+ G - group or sub-group directory within a dataset
14
+ F - individual data file
15
+
16
+ Output is column-aligned by default using double spaces as the delimiter.
17
+ Use -N to disable formatting; the delimiter then defaults to '|'.
18
+ Nothing is displayed if no matching GDEX metadata is found for the given paths.
19
+
17
20
  Usage: gdexls [-d] [-f] [-N] [-h] [-r] [-D DelimitSymbols] [-R RecursiveLevel] [Directory/File List]
18
21
 
19
- - Option -d, list directory information only. Directory information
20
- is included as default. Add this option to exclude file information;
22
+ - Option -d, list dataset/group (directory) information only.
23
+ Both directories and files are listed by default; this option
24
+ suppresses file output;
21
25
 
22
- - Option -f, list file information only. File information
23
- is included as default. Add this option to exclude directory information;
26
+ - Option -f, list file information only.
27
+ Both directories and files are listed by default; this option
28
+ suppresses directory output;
24
29
 
25
- - Option -N, list files unformatted;
30
+ - Option -N, display output without column alignment;
26
31
 
27
32
  - Option -h, display this help document;
28
33
 
29
- - Option -r, list directories and files recursively;
34
+ - Option -r, list directories and files recursively (no depth limit);
35
+
36
+ - Option -R RecursiveLevel, list recursively up to the specified depth.
37
+ -R 1 lists only the immediate contents of each given directory;
38
+
39
+ - Option -D DelimitSymbols, specify the column delimiter string.
40
+ Defaults to " " (two spaces) for formatted output and '|' for
41
+ unformatted (-N) output. Quote the string if it contains shell
42
+ metacharacters, e.g. -D '<:>';
43
+
44
+ - Directory/File List is optional. Without it, all entries in the
45
+ current directory are listed. Shell wildcards are supported.
46
+
47
+ This utility can be run from any directory. It searches the GDEX database
48
+ using the resolved absolute path of each argument, so both absolute and
49
+ relative paths are accepted.
50
+
51
+ Examples for dataset d277006:
52
+
53
+ 1. Change into the dataset home directory and run gdexls:
30
54
 
31
- - Option -R, list directories and files recursively up to the level
32
- provided with this Option;
55
+ cd /PathTo/d277006
56
+ gdexls
33
57
 
34
- - Option -D, specify delimiting symbols for dividing the columns.
35
- It defaults to " " for formatted output and '|' for unformatted output.
36
- Make sure quote the symbols if any character in the symbols has Unix
37
- meaning, for example -D '<:>';
58
+ Add -r to recurse into sub-directories, or cd into a sub-directory
59
+ first to list only its contents.
38
60
 
39
- - Directory/file List is optional; without specification, all directories
40
- and files in the current directory are listed. Unix command line
41
- wildcards are supported.
61
+ 2. Pass an absolute path directly:
42
62
 
43
- This utility program can be executed anywhere. Nothing is displayed if neither
44
- directory nor file information pre-gathered in database.
63
+ gdexls /PathTo/d277006/ # list contents of the dataset directory
64
+ gdexls /PathTo/d277006/* # same effect via shell glob expansion
45
65
 
46
- For examples, to check directories and files of d277006, you can
66
+ Without a trailing '/' or wildcard, the dataset entry itself is listed
67
+ unless -r or -R is given:
47
68
 
48
- 1. Change into the dataset home data directory as 'cd /PathTo/d277006' and
49
- execute 'gdexls'; add recursive option '-r' to check directories and files
50
- further into the sub-directories, or change directory into a sub-directory
51
- to check files inside of it.
69
+ gdexls /PathTo/d277006 # shows the D-line for the dataset root
52
70
 
53
- 2. Pass an absolute path to gdexls as 'gdexls /PathTo/d277006/' or as
54
- 'gdexls /PathTo/d277006/*'; without the ending by '/' or an appended
55
- wildcard symbol '*' information of the dataset itself is check unless
56
- the recursive option '-r' or '-R RecursiveLevel' is present
71
+ 3. Use a relative path from a neighbouring directory:
57
72
 
58
- 3. If the current directory is in another dataset home data directory,
59
- such as /PathTo/d277006, you can pass a relative path to gdexls
60
- as 'gdexls ../d277006/' or as 'gdexls ../d277006/*'
73
+ gdexls ../d277006/
74
+ gdexls ../d277006/*
@@ -1237,6 +1237,11 @@ class PgRST(PgFile, PgUtil):
1237
1237
 
1238
1238
  def main():
1239
1239
  """Entry point for command-line usage of pg_rst.py."""
1240
+ import sys
1241
+ if len(sys.argv) == 1 or any(a in sys.argv[1:] for a in ('-h', '--help', '-?')):
1242
+ pg = PgRST()
1243
+ pg.show_usage("pg_rst")
1244
+
1240
1245
  parser = argparse.ArgumentParser(
1241
1246
  description=(
1242
1247
  "Convert a .usg help document to reStructuredText (.rst) using RST templates. "
@@ -0,0 +1,60 @@
1
+ Convert a text-based program usage document (.usg file) into reStructuredText
2
+ (.rst) files using RST template files, for publication to readthedocs.io via
3
+ a gdex-docs-* GitHub repository.
4
+
5
+ OPTS and ALIAS are loaded from rda_python_<docname>/<docname>.py (or from
6
+ --pyfile if given): the module is searched first for a class that carries
7
+ both as class attributes, then for module-level OPTS/ALIAS variables.
8
+
9
+ Usage: pgrst [docname] [-u FILE] [-p FILE] [-d DIR] [-h]
10
+
11
+ - docname
12
+ Short document name, e.g. 'dsarch' or 'dsupdt'. Required unless
13
+ --usgfile is given, in which case the name is derived from the .usg
14
+ filename by removing the extension.
15
+
16
+ - Option -u or --usgfile FILE
17
+ Path to the .usg source document. When given, docname is derived
18
+ from the filename by removing the .usg extension, and the source
19
+ directory is set to the directory containing the file.
20
+
21
+ - Option -p or --pyfile FILE
22
+ Path to a Python file that defines OPTS (and optionally ALIAS) either
23
+ at module level or as class attributes. When given, the default
24
+ module-import convention (rda_python_<docname>/<docname>.py) is
25
+ bypassed.
26
+
27
+ - Option -d or --docdir DIR
28
+ Root directory under which the per-document RST output is written.
29
+ Defaults to the current working directory.
30
+
31
+ - Option -h, display this help document.
32
+
33
+ The .usg source document must be structured with a summary paragraph at the
34
+ top, followed by option descriptions and an examples section. OPTS defines
35
+ the option types (mode, single-value, multi-value, or action) used to
36
+ categorise each option in the RST output.
37
+
38
+ Output files are written to DOCDIR using RST template files bundled with
39
+ this package under rda_python_miscs/rst_templates/.
40
+
41
+ Examples:
42
+
43
+ 1. Convert dsarch.usg to RST using the default module-import convention.
44
+ OPTS and ALIAS are loaded from rda_python_dsarch/dsarch.py.
45
+ RST output is written under the current directory:
46
+
47
+ pgrst dsarch
48
+
49
+ 2. Convert dsarch.usg from a specific path, writing RST to /tmp/docs/:
50
+
51
+ pgrst dsarch -u /path/to/dsarch.usg -d /tmp/docs/
52
+
53
+ 3. Convert using a custom Python file for OPTS/ALIAS instead of the
54
+ installed package module:
55
+
56
+ pgrst dsarch -p /path/to/dsarch.py
57
+
58
+ 4. Derive the document name from the .usg filename (no positional arg):
59
+
60
+ pgrst -u /path/to/dsupdt.usg -d /tmp/docs/
@@ -34,9 +34,10 @@ OPTIONS = {
34
34
  }
35
35
 
36
36
  #
37
- # main function to excecute this script
37
+ # main function to execute this script
38
38
  #
39
39
  def main():
40
+ """Parse command-line options, validate inputs, and run the wildcard download."""
40
41
 
41
42
  option = None
42
43
  JCS = ['cat', 'tar', 'first', 'last']
@@ -89,9 +90,21 @@ def main():
89
90
  sys.exit(0)
90
91
 
91
92
  #
92
- # download one or multiple remote files via wget; concat files to a single one if multiple
93
+ # download one or multiple remote files via wget; join files to a single one if multiple
93
94
  #
94
95
  def download_wildcard_files():
96
+ """Download remote files matching the wildcard pattern and combine into one output file.
97
+
98
+ Skips the download if the local output file already exists and -CN is not set.
99
+ Runs wget only when -CN is set or fewer than FC files are already present locally.
100
+ Compares timestamps and file metadata to decide whether a rebuild is needed.
101
+ Combines downloaded parts using the strategy selected by -JC (cat/tar/first/last).
102
+ Removes intermediate part-files when -CR is set.
103
+
104
+ Returns:
105
+ int: 1 if the output file was built or rebuilt, 0 if all parts were already
106
+ up-to-date, or None (implicitly) when a warning/error caused early return.
107
+ """
95
108
 
96
109
  deleted = 0
97
110
  if OPTIONS['FN']:
@@ -102,7 +115,7 @@ def download_wildcard_files():
102
115
 
103
116
  dinfo = PgFile.check_local_file(dfile, 1)
104
117
  if dinfo and not OPTIONS['CN']:
105
- return PgLOG.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN)
118
+ return PgLOG.pglog("{}: file downloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN)
106
119
 
107
120
  build = 0 if dinfo else 1
108
121
  wfile = OPTIONS['RN'] + "*"
@@ -127,11 +140,11 @@ def download_wildcard_files():
127
140
 
128
141
  if ncnt == 0:
129
142
  if deleted:
130
- return PgLOG.pglog("{}: File dowloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
143
+ return PgLOG.pglog("{}: File downloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
131
144
  else:
132
- return PgLOG.pglog("{}: NO file to dowload on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
145
+ return PgLOG.pglog("{}: NO file to download on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN)
133
146
  elif ncnt < OPTIONS['MC']:
134
- return PgLOG.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN)
147
+ return PgLOG.pglog("{}: NOT ready, only {} of {} files downloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN)
135
148
 
136
149
  rfiles = sorted(nlist)
137
150
  size = skip = 0
@@ -145,10 +158,10 @@ def download_wildcard_files():
145
158
  elif rfile not in dlist:
146
159
  build = 1
147
160
  elif PgFile.compare_file_info(dlist[rfile], rinfo) > 0:
148
- PgLOG.pglog("{}: Newer file dowloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
161
+ PgLOG.pglog("{}: Newer file downloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
149
162
  build = 1
150
163
  else:
151
- PgLOG.pglog("{}: No newer file found on ".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
164
+ PgLOG.pglog("{}: No newer file found on {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN)
152
165
 
153
166
  if skip == ncnt: return 0
154
167