rda-python-miscs 1.0.7__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {rda_python_miscs-1.0.7/src/rda_python_miscs.egg-info → rda_python_miscs-2.0.1}/PKG-INFO +1 -1
  2. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/pyproject.toml +5 -11
  3. rda_python_miscs-1.0.7/src/rda_python_miscs/bashqsub.py → rda_python_miscs-2.0.1/src/rda_python_miscs/bash_qsub.py +2 -2
  4. rda_python_miscs-2.0.1/src/rda_python_miscs/bashqsub.py +182 -0
  5. rda_python_miscs-2.0.1/src/rda_python_miscs/gdexls.py +240 -0
  6. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/gdexls_standalone.py +2 -2
  7. rda_python_miscs-2.0.1/src/rda_python_miscs/pgwget.py +171 -0
  8. rda_python_miscs-2.0.1/src/rda_python_miscs/rdacp.py +190 -0
  9. rda_python_miscs-2.0.1/src/rda_python_miscs/rdakill.py +188 -0
  10. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdakill.usg +4 -4
  11. rda_python_miscs-2.0.1/src/rda_python_miscs/rdamod.py +152 -0
  12. rda_python_miscs-2.0.1/src/rda_python_miscs/rdaown.py +150 -0
  13. rda_python_miscs-2.0.1/src/rda_python_miscs/rdaps.py +137 -0
  14. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdaps.usg +1 -1
  15. rda_python_miscs-2.0.1/src/rda_python_miscs/rdasub.py +112 -0
  16. rda_python_miscs-2.0.1/src/rda_python_miscs/rdazip.py +66 -0
  17. rda_python_miscs-2.0.1/src/rda_python_miscs/tcshqsub.py +182 -0
  18. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1/src/rda_python_miscs.egg-info}/PKG-INFO +1 -1
  19. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs.egg-info/SOURCES.txt +11 -0
  20. rda_python_miscs-2.0.1/tests/test_miscs.py +16 -0
  21. rda_python_miscs-1.0.7/tests/test_miscs.py +0 -6
  22. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/LICENSE +0 -0
  23. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/MANIFEST.in +0 -0
  24. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/README.md +0 -0
  25. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/setup.cfg +0 -0
  26. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/__init__.py +0 -0
  27. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/bashqsub.usg +0 -0
  28. /rda_python_miscs-1.0.7/src/rda_python_miscs/gdexls.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/gdex_ls.py +0 -0
  29. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/gdexls.usg +0 -0
  30. /rda_python_miscs-1.0.7/src/rda_python_miscs/pgwget.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/pg_wget.py +0 -0
  31. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdacp.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_cp.py +0 -0
  32. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdakill.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_kill.py +0 -0
  33. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdamod.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_mod.py +0 -0
  34. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdaown.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_own.py +0 -0
  35. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdaps.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_ps.py +0 -0
  36. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdasub.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_sub.py +0 -0
  37. /rda_python_miscs-1.0.7/src/rda_python_miscs/rdazip.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/rda_zip.py +0 -0
  38. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdacp.usg +0 -0
  39. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdals.py +0 -0
  40. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdals.usg +0 -0
  41. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdamod.usg +0 -0
  42. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdaown.usg +0 -0
  43. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdasub.usg +0 -0
  44. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/rdazip.usg +0 -0
  45. /rda_python_miscs-1.0.7/src/rda_python_miscs/tcshqsub.py → /rda_python_miscs-2.0.1/src/rda_python_miscs/tcsh_qsub.py +0 -0
  46. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs/tcshqsub.usg +0 -0
  47. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs.egg-info/dependency_links.txt +0 -0
  48. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs.egg-info/entry_points.txt +0 -0
  49. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs.egg-info/requires.txt +0 -0
  50. {rda_python_miscs-1.0.7 → rda_python_miscs-2.0.1}/src/rda_python_miscs.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_miscs
3
- Version: 1.0.7
3
+ Version: 2.0.1
4
4
  Summary: RDA Python package to hold RDA miscellaneous utility programs
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-miscs
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "rda_python_miscs"
9
- version = "1.0.7"
9
+ version = "2.0.1"
10
10
  authors = [
11
11
  { name="Zaihua Ji", email="zji@ucar.edu" },
12
12
  ]
@@ -24,16 +24,10 @@ dependencies = [
24
24
  "rda_python_setuid",
25
25
  ]
26
26
 
27
- [tool.setuptools]
28
- include-package-data = true
29
-
30
- [tool.setuptools.packages.find]
31
- where = ["src"]
32
-
33
- [tool.setuptools.package-data]
34
- "rda_python_miscs" = ["bashqsub.usg", "tcshqsub.usg", "rdasub.usg", "rdacp.usg",
35
- "rdakill.usg", "rdals.usg", "rdamod.usg", "rdaown.usg",
36
- "rdaps.usg", "rdazip.usg"]
27
+ [tool.pytest.ini_options]
28
+ pythonpath = [
29
+ "src"
30
+ ]
37
31
 
38
32
  [project.urls]
39
33
  "Homepage" = "https://github.com/NCAR/rda-python-miscs"
@@ -24,7 +24,7 @@ DEFMODS = {
24
24
  }
25
25
 
26
26
  DEFLIBS = {
27
- 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-casper",
27
+ 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-gdex",
28
28
  }
29
29
 
30
30
  SWAPMODS = {
@@ -102,7 +102,7 @@ def main():
102
102
  msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime())
103
103
 
104
104
  if coptions['cwd']:
105
- if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX)
105
+ if 's' in coptions['cwd']: coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX)
106
106
  msg += "-" + coptions['cwd']
107
107
  os.chdir(coptions['cwd'])
108
108
 
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env python3
2
+ ##################################################################################
3
+ # Title: bashqsub
4
+ # Author: Zaihua Ji, zji@ucar.edu
5
+ # Date: 11/19/2020
6
+ # 2025-03-07 transferred to package rda_python_miscs from
7
+ # https://github.com/NCAR/rda-utility-programs.git
8
+ # 2025-12-29 convert to class BashQsub
9
+ # Purpose: python script to submit a batch job on PBS node via bash script
10
+ # Github: https://github.com/NCAR/rda-pythn-miscs.git
11
+ ##################################################################################
12
+ import os
13
+ import sys
14
+ import re
15
+ from os import path as op
16
+ from rda_python_common.pg_log import PgLOG
17
+
18
+ class BashQsub(PgLOG):
19
+
20
+ def __init__(self):
21
+ super().__init__()
22
+ self.DEFMODS = {
23
+ 'default': "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2"
24
+ }
25
+ self.DEFLIBS = {
26
+ 'default': "conda activate /glade/work/gdexdata/conda-envs/pg-gdex"
27
+ }
28
+ self.SWAPMODS = {}
29
+ self.RESOURCES = { # resource list for option -l
30
+ 'walltime': '6:00:00', # if this is changed, change defpbstime in PgCheck.py too
31
+ 'select': '1:ncpus=1:mem=1gb'
32
+ }
33
+ self.SOPTIONS = { # single-dash option values
34
+ 'o': None, # will set to default if not provided
35
+ 'e': None,
36
+ 'A': "P43713000",
37
+ 'q': "gdex@casper-pbs",
38
+ # 'm': 'a',
39
+ 'm': 'n',
40
+ }
41
+ self.coptions = {'cmd': None, 'cwd': None, 'env': None, 'mod': None, 'res': 'default'} # customized options
42
+ self.gdexsub = self.BCHCMDS['PBS']
43
+ self.args = None
44
+
45
+ # function to readparameters
46
+ def read_parameters(self):
47
+ aname = 'bashqsub'
48
+ pname = 'gdexqsub'
49
+ self.set_help_path(__file__)
50
+ copts = '|'.join(self.coptions)
51
+ option = None
52
+ argv = sys.argv[1:]
53
+ if not argv: self.show_usage(aname)
54
+ self.PGLOG['LOGFILE'] = pname + ".log"
55
+ self.cmdlog("{} {}".format(aname, ' '.join(argv)))
56
+ if not self.valid_command(self.gdexsub): self.pglog("{}: miss {} command to submit batch job".format(self.gdexsub, self.PGLOG['PBSNAME']), self.LGWNEX)
57
+ while argv:
58
+ arg = argv.pop(0)
59
+ ms = re.match(r'^-(\w)$', arg)
60
+ if ms:
61
+ option = ms.group(1)
62
+ if option == "b":
63
+ self.PGLOG['BCKGRND'] = 1
64
+ option = None
65
+ else:
66
+ self.SOPTIONS[option] = ''
67
+ continue
68
+ ms = re.match(r'^-({})$'.format(copts), arg)
69
+ if ms:
70
+ option = ms.group(1)
71
+ if option == "env": option = 'v'
72
+ continue
73
+ if not option: self.pglog("{}: Value passed in without leading option for {}".format(arg, self.gdexsub), self.LGEREX)
74
+ if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet
75
+ if arg.find("'") > -1:
76
+ arg = '"{}"'.format(arg)
77
+ else:
78
+ arg = "'{}'".format(arg)
79
+ if option in self.coptions:
80
+ self.coptions[option] = arg
81
+ if option == "cmd": break
82
+ else:
83
+ self.SOPTIONS[option] = arg
84
+ option = None
85
+ self.args = self.argv_to_string(argv, 0) # append command options
86
+ if not self.coptions['cmd']: self.pglog(aname + ": specify command via option -cmd to run", self.LGWNEX)
87
+ if not self.SOPTIONS['o']: self.SOPTIONS['o'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname)
88
+ if not self.SOPTIONS['e']: self.SOPTIONS['e'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname)
89
+ if 'N' not in self.SOPTIONS: self.SOPTIONS['N'] = op.basename(self.coptions['cmd'])
90
+ if self.coptions['cwd']:
91
+ if 's' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX)
92
+ os.chdir(self.coptions['cwd'])
93
+
94
+ # function to start actions
95
+ def start_actions(self):
96
+ cmd = self.valid_command(self.coptions['cmd'])
97
+ if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd'])
98
+ if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX)
99
+ if self.args: cmd += " " + self.args
100
+ sbuf = self.build_bash_script(cmd)
101
+ self.pglog(sbuf, self.MSGLOG)
102
+ self.PGLOG['ERR2STD'] = ['bind mouting']
103
+ self.pgsystem(self.gdexsub, self.LOGWRN, 6, sbuf)
104
+ self.PGLOG['ERR2STD'] = []
105
+
106
+ # build bash script to submit a PBS batch job
107
+ def build_bash_script(self, cmd):
108
+ buf = "#!/usr/bin/bash\n\n" # qsub starting bash script
109
+ if 'l' in self.SOPTIONS: self.add_resources()
110
+ # add options to bash script for qsub
111
+ for option in self.SOPTIONS:
112
+ buf += "#PBS -" + option
113
+ if self.SOPTIONS[option]: buf += " {}".format(self.SOPTIONS[option])
114
+ buf += "\n"
115
+ for option in self.RESOURCES:
116
+ buf += "#PBS -l"
117
+ if self.RESOURCES[option]: buf += " {}={}".format(option, self.RESOURCES[option])
118
+ buf += "\n"
119
+ # always include the login user's bash resource file
120
+ homedir = "{}/{}".format(self.PGLOG['USRHOME'], self.PGLOG['CURUID'])
121
+ buf += "export HOME={}\n".format(homedir)
122
+ buf += "source /etc/profile.d/z00_modules.sh\n"
123
+ buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.sh\n"
124
+ buf += "source {}/.bashrc\n".format(homedir)
125
+ buf += "pwd; hostname; date\n"
126
+ buf += self.add_modules(self.coptions['res'], self.coptions['mod'])
127
+ buf += self.set_vm_libs(self.coptions['res'])
128
+ buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd)
129
+ return buf
130
+
131
+ # check and add resource options
132
+ def add_resources(self):
133
+ for res in re.split(',', self.SOPTIONS['l']):
134
+ ms = re.match(r'^([^=]+)=(.+)$', res)
135
+ if ms:
136
+ self.RESOURCES[ms.group(1)] = ms.group(2)
137
+ else:
138
+ self.pglog(res + ": use '=' to separate resource name & value", self.LGEREX)
139
+ del self.SOPTIONS['l']
140
+
141
+ # add module loads for modules provided
142
+ def add_modules(self, res, mods):
143
+ mbuf = "\n"
144
+ defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default']
145
+ dmods = re.split(',', defmods)
146
+ for dmod in dmods:
147
+ ms = re.match(r'^(.+)/', dmod)
148
+ smod = ms.group(1) if ms else dmod
149
+ if smod in self.SWAPMODS: mbuf += "module unload {}\n".format(self.SWAPMODS[smod])
150
+ mbuf += "module load {}\n".format(dmod)
151
+ if mods:
152
+ amods = re.split(',', mods)
153
+ for amod in amods:
154
+ if re.match(r'^/', amod):
155
+ mbuf += "module use {}\n".format(amod)
156
+ else:
157
+ ms = re.match(r'^(.+)/', amod)
158
+ smod = ms.group(1) if ms else amod
159
+ if smod in dmods: continue
160
+ if smod in self.SWAPMODS: mbuf += "module unload {}\n".format(self.SWAPMODS[smod])
161
+ mbuf += "module load {}\n".format(amod)
162
+ return mbuf
163
+
164
+ # set virtual machine libraries
165
+ def set_vm_libs(self, res):
166
+ deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default']
167
+ if not deflibs: return ''
168
+ dlibs = re.split(',', deflibs)
169
+ libbuf = "\n"
170
+ for dlib in dlibs:
171
+ libbuf += dlib + "\n"
172
+ return libbuf
173
+
174
+ # main function to excecute this script
175
+ def main():
176
+ object = BashQsub()
177
+ object.read_parameters()
178
+ object.start_actions()
179
+ object.pgexit(0)
180
+
181
+ # call main() to start program
182
+ if __name__ == "__main__": main()
@@ -0,0 +1,240 @@
1
+ #!/usr/bin/env python3
2
+ ##################################################################################
3
+ # Title: gdexls
4
+ # Author: Zaihua Ji, zji@ucar.edu
5
+ # Date: 10/20/2020
6
+ # 2025-03-10 transferred to package rda_python_miscs from
7
+ # https://github.com/NCAR/rda-utility-programs.git
8
+ # 2025-09-21 copied from rdals to gdexls
9
+ # Purpose: list files/directories in a local directory and show additional
10
+ # information recorded in GDEXDB if any
11
+ # Github: https://github.com/NCAR/rda-python-miscs.git
12
+ ##################################################################################
13
+ import re
14
+ import os
15
+ import sys
16
+ import glob
17
+ from os import path as op
18
+ from rda_python_common.pg_split import PgSplit
19
+
20
+ class GdexLs(PgSplit):
21
+
22
+ def __init__(self):
23
+ super().__init__()
24
+ # define some constants for gdexls actions
25
+ self.DIDX = 3 # description column index
26
+ self.CLMT = 500 # reformat list if count reach this limit
27
+ self.WIDTHS = [0, 0, 0] # WIDTHS for formated display
28
+ self.ALIGNS = [0, 1, 1] # alignment, 0 - left; 1 - right
29
+ self.GDEXLS = {
30
+ 'd': 0, # 1 to list directory information only
31
+ 'f': 0, # 1 to list file information only
32
+ 'N': 0, # 1 to list files unformatted
33
+ 'r': 0, # 1 if recursive all
34
+ 'R': 0, # > 0 to set recursive limit
35
+ 'D': None, # specify delimiting symbols, default to ' '
36
+ }
37
+ self.LINFO = {
38
+ 'files': [],
39
+ 'curdir': None,
40
+ 'tpath': None,
41
+ 'dhome': None,
42
+ 'dsid': None,
43
+ 'dcnt': 0,
44
+ 'gcnt': 0,
45
+ 'fcnt': 0,
46
+ 'pcnt': 0,
47
+ 'pgrecs': []
48
+ }
49
+
50
+ # function to read parameters
51
+ def read_parameters(self):
52
+ self.set_help_path(__file__)
53
+ self.PGLOG['LOGFILE'] = "gdexls.log" # set different log file
54
+ self.LINFO['curdir'] = self.get_real_path(os.getcwd())
55
+ argv = sys.argv[1:]
56
+ self.pglog("gdexls {} ({})".format(' '.join(argv), self.LINFO['curdir']))
57
+ option = defopt = 'l'
58
+ for arg in argv:
59
+ if re.match(r'-(h|-*help|\?)$', arg): self.show_usage("gdexls")
60
+ ms = re.match(r'-(\w)$', arg)
61
+ if ms:
62
+ option = ms.group(1)
63
+ if option not in self.GDEXLS: self.pglog(arg + ": Unknown Option", self.LGEREX)
64
+ if 'dfNr'.find(option) > -1:
65
+ self.GDEXLS[option] = 1
66
+ option = defopt
67
+ continue
68
+ if not option: self.pglog(arg + ": Value provided without option", self.LGEREX)
69
+ if option == 'l':
70
+ self.LINFO['files'].append(self.get_real_path(arg))
71
+ defopt = None
72
+ else:
73
+ if option == 'R':
74
+ self.GDEXLS[option] = int(arg)
75
+ else:
76
+ self.GDEXLS[option] = arg
77
+ option = defopt
78
+
79
+ # functio to start actions
80
+ def start_actions(self):
81
+ self.view_dbinfo()
82
+ if not self.LINFO['files']:
83
+ self.LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory
84
+ if not self.LINFO['files']:
85
+ sys.stderr.write(self.LINFO['curdir'] + ": Empty directory\n")
86
+ self.pgexit(1)
87
+
88
+ if not (self.GDEXLS['d'] or self.GDEXLS['f']):
89
+ self.GDEXLS['d'] = self.GDEXLS['f'] = 1 # list both directories and files as default
90
+ if not self.GDEXLS['D']: self.GDEXLS['D'] = '|' if self.GDEXLS['N'] else " " # default delimiter for no format display
91
+ if not self.GDEXLS['R'] and self.GDEXLS['r']: self.GDEXLS['R'] = 1000
92
+
93
+ self.display_top_list(self.LINFO['files']) # display or cache file/directory list
94
+ if self.LINFO['pcnt'] > 0: self.display_format_list() # if some left over
95
+ if (self.LINFO['dcnt'] + self.LINFO['gcnt'] + self.LINFO['fcnt']) > 1:
96
+ msg = ''
97
+ if self.LINFO['dcnt'] > 0:
98
+ s = 's' if self.LINFO['dcnt'] > 1 else ''
99
+ msg += "{} Dataset{}".format(self.LINFO['dcnt'], s)
100
+ if self.LINFO['gcnt'] > 0:
101
+ s = 's' if self.LINFO['gcnt'] > 1 else ''
102
+ if msg: msg += " & "
103
+ msg += "{} Group{}".format(self.LINFO['gcnt'], s)
104
+ if self.LINFO['fcnt'] > 0:
105
+ s = 's' if self.LINFO['fcnt'] > 1 else ''
106
+ if msg: msg += " & "
107
+ msg += "{} File{}".format(self.LINFO['fcnt'], s)
108
+ print("Total {} displayed".format(msg))
109
+ elif (self.LINFO['dcnt'] + self.LINFO['gcnt'] + self.LINFO['fcnt']) == 0:
110
+ sys.stderr.write((self.LINFO['tpath'] if self.LINFO['tpath'] else self.LINFO['curdir']) + ": No GDEX data information found\n")
111
+ self.pgexit(1)
112
+
113
+ # display the top level list
114
+ def display_top_list(self, files):
115
+ for file in files:
116
+ if not op.exists(file):
117
+ sys.stderr.write(file + ": NOT exists\n")
118
+ continue
119
+ isdir = 1 if op.isdir(file) else 0
120
+ display = 1
121
+ if isdir and re.search(r'/$', file):
122
+ display = 0 # do not display the directory info if it is ended by '/'
123
+ file = re.sub(r'/$', '', file)
124
+ if not re.match(r'^/', file): file = self.join_paths(self.LINFO['curdir'], file)
125
+ self.LINFO['tpath'] = (op.dirname(file) if display else file) + "/"
126
+ if display: self.display_line(file, isdir)
127
+ if isdir and (self.GDEXLS['R'] or not display or not self.LINFO['dsid']):
128
+ fs = sorted(glob.glob(file + "/*"))
129
+ self.display_list(fs, 1)
130
+ if self.LINFO['pcnt'] > self.CLMT: self.display_format_list()
131
+
132
+ # recursively display directory/file info
133
+ def display_list(self, files, level):
134
+ for file in files:
135
+ isdir = 1 if op.isdir(file) else 0
136
+ self.display_line(file, isdir)
137
+ if isdir and level < self.GDEXLS['R']:
138
+ fs = sorted(glob.glob(file + "/*"))
139
+ self.display_list(fs, level+1)
140
+ if self.LINFO['pcnt'] > self.CLMT: self.display_format_list()
141
+
142
+ # find dataset/group info; display or cache file
143
+ def display_line(self, file, isdir):
144
+ getwfile = 1
145
+ if self.LINFO['dsid'] and self.LINFO['dhome']:
146
+ ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file)
147
+ if ms:
148
+ wfile = ms.group(1)
149
+ getwfile = 0
150
+ if getwfile:
151
+ self.LINFO['dsid'] = self.find_dataset_id(file)
152
+ if self.LINFO['dsid'] is None: return # skip for missing dsid
153
+ pgrec = self.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(self.LINFO['dsid']), self.LGEREX)
154
+ if not pgrec: return None
155
+ self.LINFO['dhome'] = "{}/{}".format(self.PGLOG['DSDHOME'], self.LINFO['dsid'])
156
+ if self.LINFO['dhome'] == file:
157
+ file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1)
158
+ if self.GDEXLS['d']:
159
+ title = pgrec['title'] if pgrec['title'] else ''
160
+ self.display_record(["D" + file, pgrec['ns'], str(pgrec['nc']), title])
161
+ self.LINFO['dcnt'] += 1
162
+ return
163
+ ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file)
164
+ if ms:
165
+ wfile = ms.group(1)
166
+ else:
167
+ return
168
+ if isdir:
169
+ if self.GDEXLS['d']: # check and display group info for directory
170
+ pgrec = self.pgget("dsgroup", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns",
171
+ "dsid = '{}' AND webpath = '{}'".format(self.LINFO['dsid'], wfile), self.LGEREX)
172
+ if pgrec:
173
+ file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1)
174
+ title = pgrec['title'] if pgrec['title'] else ''
175
+ self.display_record(["G" + file, pgrec['ns'], str(pgrec['nc']), title])
176
+ self.LINFO['gcnt'] += 1
177
+ elif self.GDEXLS['f']: # check and display file info
178
+ pgrec = self.pgget_wfile(self.LINFO['dsid'], "data_size, data_format, note",
179
+ "wfile = '{}'".format(wfile), self.LGEREX)
180
+ if pgrec:
181
+ note = re.sub(r'\n', ' ', pgrec['note']) if pgrec['note'] else ''
182
+ file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1)
183
+ self.display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note])
184
+ self.LINFO['fcnt'] += 1
185
+
186
+ # display one file info
187
+ def display_record(self, disp):
188
+ disp[1] = self.get_float_string(disp[1])
189
+ if self.GDEXLS['N']:
190
+ print(self.GDEXLS['D'].join(disp))
191
+ else:
192
+ self.LINFO['pgrecs'].append(disp)
193
+ self.LINFO['pcnt'] += 1
194
+ for i in range(self.DIDX):
195
+ dlen = len(disp[i])
196
+ if dlen > self.WIDTHS[i]: self.WIDTHS[i] = dlen
197
+
198
+ # display cached list with format
199
+ def display_format_list(self):
200
+ for j in range(self.LINFO['pcnt']):
201
+ disp = self.LINFO['pgrecs'][j]
202
+ for i in range(self.DIDX):
203
+ if self.ALIGNS[i] == 1:
204
+ disp[i] = "{:>{}}".format(disp[i], self.WIDTHS[i])
205
+ else:
206
+ disp[i] = "{:{}}".format(disp[i], self.WIDTHS[i])
207
+ print(self.GDEXLS['D'].join(disp))
208
+ self.LINFO['pcnt'] = 0
209
+
210
+ # change size to floating point value with unit
211
+ @staticmethod
212
+ def get_float_string(val):
213
+ units = ['B', 'K', 'M', 'G', 'T', 'P']
214
+ idx = 0
215
+ while val > 1000 and idx < 5:
216
+ val /= 1000
217
+ idx += 1
218
+ if idx > 0:
219
+ return "{:.2f}{}".format(val, units[idx])
220
+ else:
221
+ return "{}{}".format(val, units[idx])
222
+
223
+ # replace /gpfs to the path /glade
224
+ @staticmethod
225
+ def get_real_path(path):
226
+ if re.match(r'^/gpfs/u', path):
227
+ path = re.sub(r'^/gpfs', '/glade', path, 1)
228
+ elif re.match(r'^/gpfs/csfs1/', path):
229
+ path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1)
230
+ return op.realpath(path)
231
+
232
+ # main function to excecute this script
233
+ def main():
234
+ object = GdexLs()
235
+ object.read_parameters()
236
+ object.start_actions()
237
+ object.pgexit(0)
238
+
239
+ # call main() to start program
240
+ if __name__ == "__main__": main()
@@ -1,9 +1,9 @@
1
- #!/glade/work/zji/conda-envs/pg-rda/bin/python
1
+ #!/glade/work/zji/conda-envs/pg-gdex/bin/python
2
2
  # -*- coding: utf-8 -*-
3
3
  # 2025-09-23, zji@ucar.edu, created for a standalone version of gdexls
4
4
  import re
5
5
  import sys
6
- pgpath = '/glade/work/zji/conda-envs/pg-rda/lib/python3.10/site-packages'
6
+ pgpath = '/glade/work/zji/conda-envs/pg-gdex/lib/python3.12/site-packages'
7
7
  if pgpath not in sys.path: sys.path.insert(0, pgpath)
8
8
 
9
9
  from rda_python_miscs.gdexls import main
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env python3
2
+ ##################################################################################
3
+ # Title : pgwget
4
+ # Author : Zaihua Ji, zji@ucar.edu
5
+ # Date : 12/02/2020
6
+ # 2025-03-10 transferred to package rda_python_miscs from
7
+ # https://github.com/NCAR/rda-utility-programs.git
8
+ # 2026-01-05 convert to class PgWget
9
+ # Purpose : wrapper to wget to get a file with wildcard in name
10
+ # Github: https://github.com/NCAR/rda-python-miscs.git
11
+ ##################################################################################
12
+ import sys
13
+ import re
14
+ from rda_python_common.pg_file import PgFile
15
+
16
+ class PgWget(PgFile):
17
+
18
+ def __init__(self):
19
+ super().__init__()
20
+ self.OPTIONS = {
21
+ 'OP' : "-np -nH -nd -m -e robots=off --no-check-certificate",
22
+ 'UL' : None,
23
+ 'RN' : None,
24
+ 'FN' : None,
25
+ 'FC' : 1,
26
+ 'SM' : 0,
27
+ 'MC' : 0,
28
+ 'CN' : 0,
29
+ 'CR' : 0,
30
+ 'EX' : None,
31
+ 'JC' : 'cat'
32
+ }
33
+
34
+ # function to read parameters
35
+ def read_parameters(self):
36
+ option = None
37
+ JCS = ['cat', 'tar', 'first', 'last']
38
+ options = '|'.join(self.OPTIONS)
39
+ argv = sys.argv[1:]
40
+ self.PGLOG['LOGFILE'] = "pgwget.log"
41
+ for arg in argv:
42
+ if arg == "-b":
43
+ self.PGLOG['BCKGRND'] = 1
44
+ option = None
45
+ continue
46
+ ms = re.match(r'^-({})$'.format(options), arg, re.I)
47
+ if ms:
48
+ option = ms.group(1).upper()
49
+ if re.match(r'^(CN|CR|SM)$', option):
50
+ self.OPTIONS[option] = 1
51
+ option = None
52
+ continue
53
+ if re.match(r'^-.*$', arg): self.pglog(arg + ": Unknown Option", self.LGEREX)
54
+ if not option: self.pglog(arg + ": Value passed in without leading option", self.LGEREX)
55
+ if option == 'JC' and arg not in JCS:
56
+ self.pglog(arg + ": Joining Command must be one of {}".format(JCS), self.LGEREX)
57
+ self.OPTIONS[option] = int(arg) if re.match(r'^(FC|MC)$', option) else arg
58
+ option = None
59
+ if not (self.OPTIONS['UL'] and self.OPTIONS['RN']):
60
+ print("Usage: pgwget [-CN] [-CR] [-FC FileCount] [-JC JoinCommand] [-MC MinFileCount] [-FN FileName] -UL WebURL -RN RootFileName [-EX FileNameExtension]")
61
+ print(" Provide at least WebURL and RootFileName to wget file(s)")
62
+ print(" Option -CN - check new file if presents")
63
+ print(" Option -CR - clean the downloaded remote file(s) if presents")
64
+ print(" Option -FC - number of files to be valid download; defaults to 1")
65
+ print(" Option -JC - file joining command, it defaults to cat, could be tar, or last/first to choose the last/first one")
66
+ print(" Option -SM - Show wget dumping message; defaults to False")
67
+ print(" Option -MC - minimal number of files to be valid download; defaults to -FC")
68
+ print(" Option -FN - file name to be used if successful download; defaults to RootFileName.FileNameExtension")
69
+ print(" Option -OP - options used by wget, defaults to '-np -nH -nd -m -e robots=off'")
70
+ print(" Option -UL - (mandatory) WebURL with path")
71
+ print(" Option -RN - (mandatory) the root portion of the remote file name to be downloaded")
72
+ print(" Option -EX - file name extension to be used.")
73
+ sys.exit(0)
74
+ self.cmdlog("pgwget " + ' '.join(argv))
75
+ if not self.OPTIONS['MC']: self.OPTIONS['MC'] = self.OPTIONS['FC']
76
+ if not self.OPTIONS['SM']: self.OPTIONS['OP'] += ' -q'
77
+
78
+ # function to start actions
79
+ def start_actions(self):
80
+ self.download_wildcard_files()
81
+ self.cmdlog()
82
+
83
+ # download one or multiple remote files via wget; concat files to a single one if multiple
84
+ def download_wildcard_files(self):
85
+ deleted = 0
86
+ if self.OPTIONS['FN']:
87
+ dfile = self.OPTIONS['FN']
88
+ else:
89
+ dfile = self.OPTIONS['RN']
90
+ if self.OPTIONS['EX']: dfile += "." + self.OPTIONS['EX']
91
+ dinfo = self.check_local_file(dfile, 1)
92
+ if dinfo and not self.OPTIONS['CN']:
93
+ return self.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), self.LOGWRN)
94
+ build = 0 if dinfo else 1
95
+ wfile = self.OPTIONS['RN'] + "*"
96
+ if self.OPTIONS['EX']: wfile += "." + self.OPTIONS['EX']
97
+ dlist = self.local_glob(wfile, 1)
98
+ if dfile in dlist and dinfo:
99
+ del dlist[dfile]
100
+ deleted = 1
101
+ dcnt = len(dlist)
102
+ if self.OPTIONS['CN'] or dcnt < self.OPTIONS['FC']:
103
+ cmd = "wget {} {} -A '{}'".format(self.OPTIONS['OP'], self.OPTIONS['UL'], wfile)
104
+ self.pgsystem(cmd, self.LOGWRN, 7)
105
+ nlist = self.local_glob(wfile, 1)
106
+ if dfile in nlist and dinfo:
107
+ del nlist[dfile]
108
+ deleted = 1
109
+ ncnt = len(nlist)
110
+ else:
111
+ nlist = dlist
112
+ ncnt = dcnt
113
+ if ncnt == 0:
114
+ if deleted:
115
+ return self.pglog("{}: File dowloaded on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN)
116
+ else:
117
+ return self.pglog("{}: NO file to dowload on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN)
118
+ elif ncnt < self.OPTIONS['MC']:
119
+ return self.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, self.OPTIONS['MC']), self.LOGWRN)
120
+ rfiles = sorted(nlist)
121
+ size = skip = 0
122
+ for i in range(ncnt):
123
+ rfile = rfiles[i]
124
+ rinfo = nlist[rfile]
125
+ size += rinfo['data_size']
126
+ if dinfo and self.cmptime(dinfo['date_modified'], dinfo['time_modified'], rinfo['date_modified'], rinfo['time_modified']) >= 0:
127
+ self.pglog("{}: Not newer than {}".format(rfile, dfile), self.LOGWRN)
128
+ skip += 1
129
+ elif rfile not in dlist:
130
+ build = 1
131
+ elif self.compare_file_info(dlist[rfile], rinfo) > 0:
132
+ self.pglog("{}: Newer file dowloaded from {}".format(rfile, self.OPTIONS['UL']), self.LOGWRN)
133
+ build = 1
134
+ else:
135
+ self.pglog("{}: No newer file found on ".format(rfile, self.OPTIONS['UL']), self.LOGWRN)
136
+ if skip == ncnt: return 0
137
+ if not (build or size == dinfo['data_size']): build = 1
138
+ if not build: return self.pglog(dfile + ": Use existing file", self.LOGWRN)
139
+ if self.OPTIONS['JC'] == 'cat':
140
+ for i in range(ncnt):
141
+ rfile = rfiles[i]
142
+ if i == 0:
143
+ if dfile != rfile: self.local_copy_local(dfile, rfile, self.LOGWRN)
144
+ else:
145
+ self.pgsystem("cat {} >> {}".format(rfile, dfile), self.LOGWRN, 5)
146
+ if self.OPTIONS['CR'] and dfile != rfile: self.pgsystem("rm -f " + rfile, self.LOGWRN, 5)
147
+ elif self.OPTIONS['JC'] == 'tar':
148
+ topt = 'c'
149
+ for i in range(ncnt):
150
+ rfile = rfiles[i]
151
+ self.pgsystem("tar -{}vf {} {}".format(topt, dfile, rfile), self.LOGWRN, 5)
152
+ topt = 'u'
153
+ if self.OPTIONS['CR']: self.pgsystem("rm -f " + rfile, self.LOGWRN, 5)
154
+ else:
155
+ didx = 0 if self.OPTIONS['JC'] == 'first' else (ncnt - 1)
156
+ self.pgsystem("mv {} {}".format(rfiles[didx], dfile), self.LOGWRN, 5)
157
+ if self.OPTIONS['CR']:
158
+ for i in range(ncnt):
159
+ if i == didx: continue
160
+ self.pgsystem("rm -f " + rfiles[i], self.LOGWRN, 5)
161
+ return 1
162
+
163
+ # main function to excecute this script
164
+ def main():
165
+ object = PgWget()
166
+ object.read_parameters()
167
+ object.start_actions()
168
+ object.pgexit(0)
169
+
170
+ # call main() to start program
171
+ if __name__ == "__main__": main()