rda-python-icoads 1.0.7__tar.gz → 1.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_icoads-1.0.10/MANIFEST.in +6 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/PKG-INFO +1 -1
- rda_python_icoads-1.0.10/pyproject.toml +61 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/R3.0-stat_doc.pdf +0 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/checkicoads.py +222 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/cleanicoads.py +175 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/countattm.py +125 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/countattmvar.py +237 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/counticoads.py +153 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/countsst.py +221 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/fillicoads.py +138 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/fillinventory.py +149 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/fillitable.py +289 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/fillmonth.py +94 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/fixiidx.py +70 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/maxsst.py +262 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/msg +457 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/msg3.0_subset_readme.txt +94 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/msg3_subset.py +345 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/msg_download.py +211 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/msgsubset.f +612 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads/writeicoads.py +169 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads.egg-info/PKG-INFO +1 -1
- rda_python_icoads-1.0.10/src/rda_python_icoads.egg-info/SOURCES.txt +35 -0
- rda_python_icoads-1.0.10/src/rda_python_icoads.egg-info/entry_points.txt +17 -0
- rda_python_icoads-1.0.7/MANIFEST.in +0 -2
- rda_python_icoads-1.0.7/pyproject.toml +0 -39
- rda_python_icoads-1.0.7/src/rda_python_icoads.egg-info/SOURCES.txt +0 -16
- rda_python_icoads-1.0.7/src/rda_python_icoads.egg-info/entry_points.txt +0 -2
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/LICENSE +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/README.md +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/setup.cfg +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads/PgIMMA.py +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads/README_R3.0_Subset.html +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads/__init__.py +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads/imma1_subset.py +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads/rdimma1_csv.f +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads.egg-info/dependency_links.txt +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads.egg-info/requires.txt +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/src/rda_python_icoads.egg-info/top_level.txt +0 -0
- {rda_python_icoads-1.0.7 → rda_python_icoads-1.0.10}/tests/test_icoads.py +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
include src/rda_python_icoads/README_R3.0_Subset.html
|
|
2
|
+
include src/rda_python_icoads/rdimma1_csv.f
|
|
3
|
+
include src/rda_python_icoads/msg
|
|
4
|
+
include src/rda_python_icoads/R3.0-stat_doc.pdf
|
|
5
|
+
include src/rda_python_icoads/msg3.0_subset_readme.txt
|
|
6
|
+
include src/rda_python_icoads/msgsubset.f
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [
|
|
3
|
+
"setuptools>=61.0",
|
|
4
|
+
]
|
|
5
|
+
build-backend = "setuptools.build_meta"
|
|
6
|
+
|
|
7
|
+
[project]
|
|
8
|
+
name = "rda_python_icoads"
|
|
9
|
+
version = "1.0.10"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name="Zaihua Ji", email="zji@ucar.edu" },
|
|
12
|
+
]
|
|
13
|
+
description = "RDA python package to manage RDA ICOADS datasets"
|
|
14
|
+
readme = "README.md"
|
|
15
|
+
requires-python = ">=3.7"
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Development Status :: 5 - Production/Stable",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"rda_python_common",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[tool.setuptools]
|
|
27
|
+
include-package-data = true
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
where = ["src"]
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.package-data]
|
|
33
|
+
"rda_python_icoads" = [
|
|
34
|
+
"README_R3.0_Subset.html",
|
|
35
|
+
"rdimma1_csv.f",
|
|
36
|
+
"msg",
|
|
37
|
+
"R3.0-stat_doc.pdf",
|
|
38
|
+
"msg3.0_subset_readme.txt",
|
|
39
|
+
"msgsubset.f",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
"Homepage" = "https://github.com/NCAR/rda-python-icoads"
|
|
44
|
+
|
|
45
|
+
[project.scripts]
|
|
46
|
+
"imma1_subset" = "rda_python_icoads.imma1_subset:main"
|
|
47
|
+
"msg3_subset" = "rda_python_icoads.msg3_subset:main"
|
|
48
|
+
"checkicoads" = "rda_python_icoads.checkicoads:main"
|
|
49
|
+
"cleanicoads" = "rda_python_icoads.cleanicoads:main"
|
|
50
|
+
"counticoads" = "rda_python_icoads.counticoads:main"
|
|
51
|
+
"countattm" = "rda_python_icoads.countattm:main"
|
|
52
|
+
"countattmvar" = "rda_python_icoads.countattmvar:main"
|
|
53
|
+
"countsst" = "rda_python_icoads.countsst:main"
|
|
54
|
+
"fillicoads" = "rda_python_icoads.fillicoads:main"
|
|
55
|
+
"fillinventory" = "rda_python_icoads.fillinventory:main"
|
|
56
|
+
"fillitable" = "rda_python_icoads.fillitable:main"
|
|
57
|
+
"fillmonth" = "rda_python_icoads.fillmonth:main"
|
|
58
|
+
"fixidx" = "rda_python_icoads.fixidx:main"
|
|
59
|
+
"msg_download" = "rda_python_icoads.msg_download:main"
|
|
60
|
+
"maxsst" = "rda_python_icoads.maxsst:main"
|
|
61
|
+
"writeicoads" = "rda_python_icoads.writeicoads:main"
|
|
Binary file
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : checkicoads
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 12/30/2020
|
|
8
|
+
# 2025-03-03 transferred to package rda_python_icoads from
|
|
9
|
+
# https://github.com/NCAR/rda-icoads.git
|
|
10
|
+
# Purpose : check and compare ICOADS data files and IVADDB records
|
|
11
|
+
#
|
|
12
|
+
# Github : https://github.com/NCAR/rda-python-icoads.git
|
|
13
|
+
#
|
|
14
|
+
##################################################################################
|
|
15
|
+
|
|
16
|
+
import sys
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
from os import path as op
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
from rda_python_common import PgDBI
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from rda_python_common import PgOPT
|
|
24
|
+
from rda_python_common import PgSIG
|
|
25
|
+
from . import PgIMMA
|
|
26
|
+
|
|
27
|
+
PVALS = {
|
|
28
|
+
'bdate' : None,
|
|
29
|
+
'edate' : None,
|
|
30
|
+
'bmdate' : [],
|
|
31
|
+
'emdate' : [],
|
|
32
|
+
'fname' : [],
|
|
33
|
+
'flag' : [], # 1 - file exists, 2 - db records exist, 3 - both
|
|
34
|
+
'mproc' : 10,
|
|
35
|
+
'fpattern' : "IMMA1_R3.0.0_<YYYY-MM>",
|
|
36
|
+
'readall' : 0
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
#
|
|
40
|
+
# main function to run dsarch
|
|
41
|
+
#
|
|
42
|
+
def main():
|
|
43
|
+
|
|
44
|
+
option = ''
|
|
45
|
+
argv = sys.argv[1:]
|
|
46
|
+
|
|
47
|
+
for arg in argv:
|
|
48
|
+
if arg == "-b":
|
|
49
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
50
|
+
elif arg == "-a":
|
|
51
|
+
PVALS['readall'] = 1
|
|
52
|
+
elif arg == "-f":
|
|
53
|
+
option = 'f'
|
|
54
|
+
elif arg == "-m":
|
|
55
|
+
option = 'm'
|
|
56
|
+
elif re.match(r'^-', arg):
|
|
57
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
58
|
+
elif option:
|
|
59
|
+
if option == 'f':
|
|
60
|
+
PVALS['fpattern'] = arg
|
|
61
|
+
elif option == 'm':
|
|
62
|
+
PVALS['mproc'] = arg
|
|
63
|
+
option = ''
|
|
64
|
+
elif not PVALS['bdate']:
|
|
65
|
+
PVALS['bdate'] = arg
|
|
66
|
+
elif not PVALS['edate']:
|
|
67
|
+
PVALS['edate'] = arg
|
|
68
|
+
else:
|
|
69
|
+
PgLOG.pglog(arg + ": Invalid parameter", PgLOG.LGWNEX)
|
|
70
|
+
|
|
71
|
+
PgLOG.PGLOG['LOGFILE'] = "icoads.log"
|
|
72
|
+
PgDBI.ivaddb_dbname()
|
|
73
|
+
|
|
74
|
+
if not (PVALS['bdate'] and PVALS['edate']):
|
|
75
|
+
pgrec = PgDBI.pgget("cntldb.inventory", "min(date) bdate, max(date) edate", '', PgLOG.LGEREX)
|
|
76
|
+
print("Usage: checkicoads [-a] [-m mproc] [-f FilePattern] BeginDate EndDate")
|
|
77
|
+
print(" Default FilePattern is " + PVALS['fpattern'])
|
|
78
|
+
print(" Option -a - read all attms, including multi-line ones, such as IVAD and REANQC")
|
|
79
|
+
print(" Option -m - start up to given number of processes, one for each month (Default to 10)")
|
|
80
|
+
print(" Set BeginDate and EndDate between '{}' and '{}'".format(pgrec['bdate'], pgrec['edate']))
|
|
81
|
+
sys.exit(0)
|
|
82
|
+
|
|
83
|
+
if PgUtil.diffdate(PVALS['bdate'], PVALS['edate']) > 0:
|
|
84
|
+
tmpdate = PVALS['bdate']
|
|
85
|
+
PVALS['bdate'] = PVALS['edate']
|
|
86
|
+
PVALS['edate'] = tmpdate
|
|
87
|
+
|
|
88
|
+
PgLOG.cmdlog("checkicoads {}".format(' '.join(argv)))
|
|
89
|
+
check_imma_data()
|
|
90
|
+
PgLOG.cmdlog()
|
|
91
|
+
sys.exit(0)
|
|
92
|
+
|
|
93
|
+
#
|
|
94
|
+
# check imma data
|
|
95
|
+
#
|
|
96
|
+
def check_imma_data():
|
|
97
|
+
|
|
98
|
+
mcnt = init_months()
|
|
99
|
+
if mcnt == 1: PVALS['mproc'] = 1
|
|
100
|
+
if PVALS['mproc'] > 1:
|
|
101
|
+
PgSIG.start_none_daemon('writeicoads', '', PgLOG.PGLOG['CURUID'], PVALS['mproc'], 300, 1)
|
|
102
|
+
|
|
103
|
+
for midx in range(mcnt):
|
|
104
|
+
fname = PVALS['fname'][midx]
|
|
105
|
+
if op.isfile(fname +".cnt"): continue # monthly file counted already
|
|
106
|
+
if PVALS['mproc'] > 1:
|
|
107
|
+
stat = PgSIG.start_child("checkicoads_{}".format(midx), PgLOG.LOGWRN, 1) # try to start a child process
|
|
108
|
+
if stat <= 0:
|
|
109
|
+
sys.exit(1) # something wrong
|
|
110
|
+
elif PgSIG.PGSIG['PPID'] > 1:
|
|
111
|
+
check_imma_file(fname, midx)
|
|
112
|
+
sys.exit(0) # stop child process
|
|
113
|
+
else:
|
|
114
|
+
PgDBI.pgdisconnect(0) # disconnect database for reconnection
|
|
115
|
+
continue # continue for next midx
|
|
116
|
+
else:
|
|
117
|
+
check_imma_file(fname, midx)
|
|
118
|
+
|
|
119
|
+
if PVALS['mproc'] > 1: PgSIG.check_child(None, 0, PgLOG.LOGWRN, 1)
|
|
120
|
+
|
|
121
|
+
dump_final_counts()
|
|
122
|
+
|
|
123
|
+
#
|
|
124
|
+
# compare icoads records from given file name and IVADDB
|
|
125
|
+
#
|
|
126
|
+
def check_imma_file(fname, midx):
|
|
127
|
+
|
|
128
|
+
PgLOG.pglog("Count IMMA records in File '{}'".format(fname), PgLOG.WARNLG)
|
|
129
|
+
flag = PVALS['flag'][midx]
|
|
130
|
+
|
|
131
|
+
acnts = [0]*PgIMMA.TABLECOUNT
|
|
132
|
+
acounts = [0]*PgIMMA.TABLECOUNT
|
|
133
|
+
|
|
134
|
+
if flag&1:
|
|
135
|
+
IMMA = open(fname, 'r')
|
|
136
|
+
line = IMMA.readline()
|
|
137
|
+
while line:
|
|
138
|
+
if PVALS['readall'] and re.match(r'^98', line):
|
|
139
|
+
PgIMMA.get_imma_multiple_counts(line, acnts)
|
|
140
|
+
else:
|
|
141
|
+
PgIMMA.get_imma_counts(line, acnts)
|
|
142
|
+
line.IMMA.readline()
|
|
143
|
+
IMMA.close()
|
|
144
|
+
for i in range(PgIMMA.TABLECOUNT): acounts[i] = acnts[i]
|
|
145
|
+
|
|
146
|
+
if flag&2:
|
|
147
|
+
PgLOG.pglog("Count IMMA records in in IVADDB", PgLOG.WARNLG)
|
|
148
|
+
cdate = bdate = PVALS['bmdate'][midx]
|
|
149
|
+
edate = PVALS['emdate'][midx]
|
|
150
|
+
while cdate <= edate:
|
|
151
|
+
acnts = PgIMMA.count_imma_records(cdate, 0, PVALS['readall'])
|
|
152
|
+
cdate = PgUtil.adddate(cdate, 0, 0, 1)
|
|
153
|
+
if acnts:
|
|
154
|
+
for i in range(PgIMMA.TABLECOUNT): acounts[i] -= acnts[i]
|
|
155
|
+
|
|
156
|
+
dump_monthly_counts(fname, acounts)
|
|
157
|
+
|
|
158
|
+
#
|
|
159
|
+
# dump monthly counts
|
|
160
|
+
#
|
|
161
|
+
def dump_monthly_counts(fname, acounts):
|
|
162
|
+
|
|
163
|
+
oname = fname + ".cnt"
|
|
164
|
+
IMMA = open(oname, 'w')
|
|
165
|
+
IMMA.write("{}, {}\n".format(fname, ', '.join(acounts)))
|
|
166
|
+
IMMA.close()
|
|
167
|
+
|
|
168
|
+
#
|
|
169
|
+
# concat all monthly counts into one
|
|
170
|
+
#
|
|
171
|
+
def dump_final_counts():
|
|
172
|
+
|
|
173
|
+
fname = "ICOADS_DIFF_COUNTS.csv"
|
|
174
|
+
|
|
175
|
+
IMMA = open(fname, 'w')
|
|
176
|
+
IMMA.write("FileName, {}\n".format(', '.join(PgIMMA.IMMA_NAMES)))
|
|
177
|
+
IMMA.close()
|
|
178
|
+
PgLOG.pgsystem("cat *.cnt >> " + fname)
|
|
179
|
+
|
|
180
|
+
#
|
|
181
|
+
# initialize the month list
|
|
182
|
+
#
|
|
183
|
+
def init_months():
|
|
184
|
+
|
|
185
|
+
seps = ["<" , ">"]; # temporal pattern delimiters
|
|
186
|
+
match = "[^" + seps[1] + "]+"
|
|
187
|
+
|
|
188
|
+
ms = re.search(r'{}({}){}'.format(seps[0], match, seps[1]), PVALS['fpattern'])
|
|
189
|
+
if ms:
|
|
190
|
+
tpattern = ms.group(1)
|
|
191
|
+
treplace = "{}{}{}".format(seps[0], tpattern, seps[1])
|
|
192
|
+
else:
|
|
193
|
+
PgLOG.pglog(PVALS['fpattern'] + ": Not temporal pattern found to get month list", PgLOG.LGEREX)
|
|
194
|
+
|
|
195
|
+
bdate = PVALS['bdate']
|
|
196
|
+
done = midx = 0
|
|
197
|
+
while True:
|
|
198
|
+
edate = PgUtil.enddate(bdate, 0, 'M')
|
|
199
|
+
if PgUtil.diffdate(PVALS['edate'], edate) <= 0:
|
|
200
|
+
edate = PVALS['edate']
|
|
201
|
+
done = 1
|
|
202
|
+
mdate = PgUtil.format_date(bdate, tpattern)
|
|
203
|
+
fname = PVALS['fpattern'].replace(treplace, mdate)
|
|
204
|
+
flag = 0
|
|
205
|
+
if op.isfile(fname): flag += 1
|
|
206
|
+
if PgDBI.pgget("cntldb.inventory", "", "date BETWEEN '{}' AND '{}'".format(bdate, edate), PgLOG.LGEREX):
|
|
207
|
+
flag += 2
|
|
208
|
+
if flag:
|
|
209
|
+
PVALS['bmdate'].append(bdate)
|
|
210
|
+
PVALS['emdate'].append(edate)
|
|
211
|
+
PVALS['fname'].append(fname)
|
|
212
|
+
PVALS['flag'].append(flag)
|
|
213
|
+
midx += 1
|
|
214
|
+
if done: break
|
|
215
|
+
bdate = PgUtil.adddate(edate, 0, 0, 1)
|
|
216
|
+
|
|
217
|
+
return midx
|
|
218
|
+
|
|
219
|
+
#
|
|
220
|
+
# call main() to start program
|
|
221
|
+
#
|
|
222
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : cleanicoads
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 12/30/2020
|
|
8
|
+
# 2025-03-03 transferred to package rda_python_icoads from
|
|
9
|
+
# https://github.com/NCAR/rda-icoads.git
|
|
10
|
+
# Purpose : clean up one or all IMMA1 attms in IVADDB for given period
|
|
11
|
+
#
|
|
12
|
+
# Github : https://github.com/NCAR/rda-python-icoads.git
|
|
13
|
+
#
|
|
14
|
+
##################################################################################
|
|
15
|
+
|
|
16
|
+
import sys
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
from os import path as op
|
|
20
|
+
from rda_python_common import PgLOG
|
|
21
|
+
from rda_python_common import PgDBI
|
|
22
|
+
from rda_python_common import PgUtil
|
|
23
|
+
from . import PgIMMA
|
|
24
|
+
|
|
25
|
+
PVALS = {
|
|
26
|
+
'bdate' : None,
|
|
27
|
+
'edate' : None,
|
|
28
|
+
'aname' : None,
|
|
29
|
+
'tinfo' : {},
|
|
30
|
+
'tcnt' : 0,
|
|
31
|
+
'dcnd' : None,
|
|
32
|
+
'uatti' : '',
|
|
33
|
+
'names' : None
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# main function to run dsarch
|
|
38
|
+
#
|
|
39
|
+
def main():
|
|
40
|
+
|
|
41
|
+
option = ''
|
|
42
|
+
files = []
|
|
43
|
+
leaduid = 0
|
|
44
|
+
chkexist = 0
|
|
45
|
+
readall = 0
|
|
46
|
+
argv = sys.argv[1:]
|
|
47
|
+
|
|
48
|
+
for arg in argv:
|
|
49
|
+
if arg == "-b":
|
|
50
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
51
|
+
elif arg == "-a":
|
|
52
|
+
option = 'a'
|
|
53
|
+
elif re.match(r'^-', arg):
|
|
54
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
55
|
+
elif option:
|
|
56
|
+
PVALS['aname'] = arg
|
|
57
|
+
option = ''
|
|
58
|
+
elif not PVALS['bdate']:
|
|
59
|
+
PVALS['bdate'] = arg
|
|
60
|
+
elif not PVALS['edate']:
|
|
61
|
+
PVALS['edate'] = arg
|
|
62
|
+
else:
|
|
63
|
+
PgLOG.pglog(arg + ": Invalid parameter", PgLOG.LGWNEX)
|
|
64
|
+
|
|
65
|
+
if not PVALS['bdate']:
|
|
66
|
+
print("Usage: cleanicoads [-a ATTNAME] BDATE EDATE")
|
|
67
|
+
print(" Option -a - clean a single attm for given attm name")
|
|
68
|
+
PgLOG.pgexit()
|
|
69
|
+
|
|
70
|
+
PgLOG.PGLOG['LOGFILE'] = "icoads.log"
|
|
71
|
+
PgDBI.ivaddb_dbname()
|
|
72
|
+
PgLOG.cmdlog("cleanicoads {}".format(' '.join(argv)))
|
|
73
|
+
set_table_info()
|
|
74
|
+
clean_imma_data()
|
|
75
|
+
PgLOG.cmdlog()
|
|
76
|
+
PgLOG.pgexit()
|
|
77
|
+
|
|
78
|
+
#
|
|
79
|
+
# set the table index list
|
|
80
|
+
#
|
|
81
|
+
def set_table_info():
|
|
82
|
+
|
|
83
|
+
table = "cntldb.inventory"
|
|
84
|
+
if PVALS['edate']:
|
|
85
|
+
PVALS['dcnd'] = "date BETWEEN '{}' AND '{}'".format(PVALS['bdate'], PVALS['edate'])
|
|
86
|
+
else:
|
|
87
|
+
PVALS['dcnd'] = "date >= '{}'".format(PVALS['bdate'])
|
|
88
|
+
|
|
89
|
+
PVALS['tinfo'] = PgDBI.pgmget(table, "tidx, min(miniidx) bidx, max(maxiidx) eidx", PVALS['dcnd'] + " GROUP BY tidx", PgLOG.LGEREX)
|
|
90
|
+
PVALS['tcnt'] = len(PVALS['tinfo']['tidx']) if PVALS['tinfo'] else 0
|
|
91
|
+
|
|
92
|
+
if not PVALS['tcnt']:
|
|
93
|
+
PgLOG.pglog("{}: No data found in IVADDB for {}".format(table, PVALS['dcnd']), PgLOG.LGEREX)
|
|
94
|
+
|
|
95
|
+
#
|
|
96
|
+
# clean up imma data
|
|
97
|
+
#
|
|
98
|
+
def clean_imma_data():
|
|
99
|
+
|
|
100
|
+
table = "cntldb.inventory"
|
|
101
|
+
|
|
102
|
+
for i in range(PVALS['tcnt']):
|
|
103
|
+
tidx = PVALS['tinfo']['tidx'][i]
|
|
104
|
+
cnd = "iidx BETWEEN {} AND {}".format(PVALS['tinfo']['bidx'][i], PVALS['tinfo']['eidx'][i])
|
|
105
|
+
if PVALS['aname']:
|
|
106
|
+
clean_one_attm_for_tidx(PVALS['aname'], tidx, cnd)
|
|
107
|
+
else:
|
|
108
|
+
clean_imma_data_for_tidx(tidx, cnd)
|
|
109
|
+
|
|
110
|
+
cnt = PgDBI.pgdel(table, PVALS['dcnd'], PgLOG.LGEREX)
|
|
111
|
+
s = 's' if cnt > 1 else ''
|
|
112
|
+
PgLOG.pglog("{}: {} record{} deleted for {}".format(table, cnt, s, PVALS['dcnd']), PgLOG.LOGWRN)
|
|
113
|
+
|
|
114
|
+
#
|
|
115
|
+
# clean up imma data for table index
|
|
116
|
+
#
|
|
117
|
+
def clean_imma_data_for_tidx(tidx, cnd):
|
|
118
|
+
|
|
119
|
+
PgLOG.pglog("Clean IMMA data for table index {}...".format(tidx), PgLOG.LOGWRN)
|
|
120
|
+
|
|
121
|
+
for i in range(PgIMMA.TABLECOUNT):
|
|
122
|
+
aname = PgIMMA.IMMA_NAMES[i]
|
|
123
|
+
clean_one_attm_for_tidx(aname, tidx, cnd)
|
|
124
|
+
|
|
125
|
+
#
|
|
126
|
+
# clean up one attm data for table index
|
|
127
|
+
#
|
|
128
|
+
def clean_one_attm_for_tidx(aname, tidx, cnd):
|
|
129
|
+
|
|
130
|
+
table = "{}_{}".format(aname, tidx)
|
|
131
|
+
if not PgDBI.pgcheck(table): return 0 # not record to delete
|
|
132
|
+
|
|
133
|
+
if aname == 'iuida': clean_itidx_for_tidx(table, cnd)
|
|
134
|
+
|
|
135
|
+
cnt = PgDBI.pgdel(table, cnd, PgLOG.LGEREX)
|
|
136
|
+
s = 's' if cnt > 1 else ''
|
|
137
|
+
PgLOG.pglog("{}: {} record{} deleted for {}".format(table, cnt, s, cnd), PgLOG.LOGWRN)
|
|
138
|
+
|
|
139
|
+
cnt = PgDBI.pgget(table, "", "", PgLOG.LGEREX)
|
|
140
|
+
clean_iattm_for_tidx(aname, tidx, cnt)
|
|
141
|
+
|
|
142
|
+
#
|
|
143
|
+
# clean up table itidx for table index
|
|
144
|
+
#
|
|
145
|
+
def clean_itidx_for_tidx(table, cnd):
|
|
146
|
+
|
|
147
|
+
tname = "cntldb.itidx"
|
|
148
|
+
uids = PgDBI.pgmget(table, "distinct (substring(uid, 1, 2)) uida", cnd, PgLOG.LGEREX)
|
|
149
|
+
ucnt = len(uids['uida']) if uids else 0
|
|
150
|
+
for i in range(ucnt):
|
|
151
|
+
table = "{}_{}".format(tname, uids['uida'][i].lower())
|
|
152
|
+
if not PgDBI.pgcheck(table): continue
|
|
153
|
+
cnt = PgDBI.pgdel(table, cnd, PgLOG.LOGWRN)
|
|
154
|
+
s = 's' if cnt > 1 else ''
|
|
155
|
+
PgLOG.pglog("{}: {} record{} deleted".format(table, cnt, s), PgLOG.LOGWRN)
|
|
156
|
+
|
|
157
|
+
#
|
|
158
|
+
# clean up table iattm for table index
|
|
159
|
+
#
|
|
160
|
+
def clean_iattm_for_tidx(aname, tidx, cnt):
|
|
161
|
+
|
|
162
|
+
table = "cntldb.iattm"
|
|
163
|
+
cnd = "attm = '{}' AND tidx = {}".format(aname, tidx)
|
|
164
|
+
pgrec = {'count' : cnt}
|
|
165
|
+
PgDBI.pgupdt(table, pgrec, cnd, PgLOG.LGWNEX)
|
|
166
|
+
PgLOG.pglog("{}: Set count to {} for {}".format(table, cnt, cnd), PgLOG.LOGWRN)
|
|
167
|
+
|
|
168
|
+
table += "_daily"
|
|
169
|
+
cnd += " AND " + PVALS['dcnd']
|
|
170
|
+
PgDBI.pgdel(table, PVALS['dcnd'], PgLOG.LGWNEX)
|
|
171
|
+
|
|
172
|
+
#
|
|
173
|
+
# call main() to start program
|
|
174
|
+
#
|
|
175
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
##################################################################################
|
|
4
|
+
#
|
|
5
|
+
# Title : countattm
|
|
6
|
+
# Author : Zaihua Ji, zji@ucar.edu
|
|
7
|
+
# Date : 01/09/2021
|
|
8
|
+
# 2025-03-04 transferred to package rda_python_icoads from
|
|
9
|
+
# https://github.com/NCAR/rda-icoads.git
|
|
10
|
+
# Purpose : process ICOADS data files in IMMA format and count the matching,
|
|
11
|
+
# unmatching and empty records
|
|
12
|
+
#
|
|
13
|
+
# Github : https://github.com/NCAR/rda-python-icoads.git
|
|
14
|
+
#
|
|
15
|
+
##################################################################################
|
|
16
|
+
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
from rda_python_common import PgLOG
|
|
20
|
+
from rda_python_common import PgDBI
|
|
21
|
+
from . import PgIMMA
|
|
22
|
+
|
|
23
|
+
PVALS = {
|
|
24
|
+
'group' : None,
|
|
25
|
+
'files' : [],
|
|
26
|
+
'aname' : None,
|
|
27
|
+
'bym' : None,
|
|
28
|
+
'eym' : None
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
ACOUNTS = {}
|
|
32
|
+
|
|
33
|
+
#
|
|
34
|
+
# main function
|
|
35
|
+
#
|
|
36
|
+
def main():
|
|
37
|
+
|
|
38
|
+
option = ''
|
|
39
|
+
argv = sys.argv[1:]
|
|
40
|
+
|
|
41
|
+
for arg in argv:
|
|
42
|
+
if arg == "-b":
|
|
43
|
+
PgLOG.PGLOG['BCKGRND'] = 1
|
|
44
|
+
elif arg == '-g':
|
|
45
|
+
option = 'g'
|
|
46
|
+
elif re.match(r'^-', arg):
|
|
47
|
+
PgLOG.pglog(arg + ": Invalid Option", PgLOG.LGWNEX)
|
|
48
|
+
elif option:
|
|
49
|
+
PVALS['group'] = arg
|
|
50
|
+
option = ''
|
|
51
|
+
else:
|
|
52
|
+
PVALS['files'].append(arg)
|
|
53
|
+
|
|
54
|
+
if not (PVALS['files'] and re.match(r'^(monthly|yearly)$', PVALS['group'])):
|
|
55
|
+
print("Usage: countattm -g GroupBy (monthly|yearly) FileNameList")
|
|
56
|
+
print(" Group by Monthly or Yearly is mandatory")
|
|
57
|
+
print(" At least one file name needs to be present to count icoads attm data")
|
|
58
|
+
sys.exit(0)
|
|
59
|
+
|
|
60
|
+
PgLOG.PGLOG['LOGFILE'] = "icoads.log"
|
|
61
|
+
PgDBI.ivaddb_dbname()
|
|
62
|
+
PgLOG.cmdlog("countattm {}".format(' '.join(argv)))
|
|
63
|
+
for file in PVALS['files']: count_attm_file(file)
|
|
64
|
+
dump_attm_counts()
|
|
65
|
+
PgLOG.cmdlog()
|
|
66
|
+
sys.exit(0)
|
|
67
|
+
|
|
68
|
+
#
|
|
69
|
+
# read icoads record from given file name and count the records
|
|
70
|
+
#
|
|
71
|
+
def count_attm_file(fname):
|
|
72
|
+
|
|
73
|
+
PgLOG.pglog("Count attm records in File '{}'".format(fname), PgLOG.WARNLG)
|
|
74
|
+
|
|
75
|
+
# Get file month
|
|
76
|
+
ms = re.search(r'(\d\d\d\d)-(\d\d)', fname)
|
|
77
|
+
if ms:
|
|
78
|
+
yr = ms.group(1)
|
|
79
|
+
mn = ms.group(2)
|
|
80
|
+
ym = "{}-{}".format(yr, mn)
|
|
81
|
+
if not PVALS['bym']: PVALS['bym'] = ym
|
|
82
|
+
PVALS['eym'] = ym
|
|
83
|
+
key = yr if PVALS['group'] == "yearly" else ym
|
|
84
|
+
if key not in ACOUNTS:
|
|
85
|
+
ACOUNTS[key] = {'match' : 0, 'unmatch' : 0, 'empty' : 0, 'total' : 0}
|
|
86
|
+
else:
|
|
87
|
+
PgLOG.pglog(fname + ": miss year/month values in file name", PgLOG.LGEREX)
|
|
88
|
+
|
|
89
|
+
ATTM = open(fname, 'r')
|
|
90
|
+
acnt = 0
|
|
91
|
+
line = ATTM.readline()
|
|
92
|
+
# check and record standalone attm name
|
|
93
|
+
if not PVALS['aname']: PVALS['aname'] = PgIMMA.identify_attm_name(line)
|
|
94
|
+
while line:
|
|
95
|
+
ACOUNTS[key]['total'] += 1
|
|
96
|
+
# commet out these two line for normal records
|
|
97
|
+
line = line.rstrip()
|
|
98
|
+
if len(line) < 20:
|
|
99
|
+
ACOUNTS[key]['empty'] += 1
|
|
100
|
+
else:
|
|
101
|
+
idate = PgIMMA.get_imma_date(line)
|
|
102
|
+
if idate or idate is None:
|
|
103
|
+
ACOUNTS[key]['match'] += 1
|
|
104
|
+
else:
|
|
105
|
+
ACOUNTS[key]['unmatch'] += 1
|
|
106
|
+
line = ATTM.readline()
|
|
107
|
+
ATTM.close()
|
|
108
|
+
|
|
109
|
+
#
|
|
110
|
+
# dump attm counts by group
|
|
111
|
+
#
|
|
112
|
+
def dump_attm_counts():
|
|
113
|
+
|
|
114
|
+
fname = "{}_COUNTS_{}_{}-{}.txt".format(PVALS['aname'], PVALS['group'].upper(), PVALS['bym'], PVALS['eym'])
|
|
115
|
+
ATTM = open(fname, 'w')
|
|
116
|
+
ATTM.write(PVALS['group'] + ", match, unmatch, empty, total\n")
|
|
117
|
+
|
|
118
|
+
for key in sorted(ACOUNTS):
|
|
119
|
+
ATTM.write("{}, {}, {}, {}, {}\n".format(key, ACOUNTS[key]['match'],
|
|
120
|
+
ACOUNTS[key]['unmatch'], ACOUNTS[key]['empty'], ACOUNTS[key]['total']))
|
|
121
|
+
|
|
122
|
+
#
|
|
123
|
+
# call main() to start program
|
|
124
|
+
#
|
|
125
|
+
if __name__ == "__main__": main()
|