rda-python-dsupdt 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rda_python_dsupdt/PgUpdt.py +1921 -0
- rda_python_dsupdt/__init__.py +1 -0
- rda_python_dsupdt/ds_updt.py +2454 -0
- rda_python_dsupdt/dsupdt.py +2134 -0
- rda_python_dsupdt/dsupdt.usg +1774 -0
- rda_python_dsupdt/pg_updt.py +1652 -0
- rda_python_dsupdt-2.0.4.dist-info/METADATA +18 -0
- rda_python_dsupdt-2.0.4.dist-info/RECORD +12 -0
- rda_python_dsupdt-2.0.4.dist-info/WHEEL +5 -0
- rda_python_dsupdt-2.0.4.dist-info/entry_points.txt +2 -0
- rda_python_dsupdt-2.0.4.dist-info/licenses/LICENSE +21 -0
- rda_python_dsupdt-2.0.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2134 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
##################################################################################
|
|
3
|
+
# Title: dsupdt
|
|
4
|
+
# Author: Zaihua Ji, zji@ucar.edu
|
|
5
|
+
# Date: 10/10/2020
|
|
6
|
+
# 2025-02-05 transferred to package rda_python_dsupdt from
|
|
7
|
+
# https://github.com/NCAR/rda-utility-programs.git
|
|
8
|
+
# 2025-12-08 convert to class DsUpdt
|
|
9
|
+
# Purpose: python utility program to download remote files,
|
|
10
|
+
# process downloaded files and create local file, and
|
|
11
|
+
# archive local files onto RDA Server
|
|
12
|
+
# save information of web online data files or Saved files into RDADB
|
|
13
|
+
# Github: https://github.com/NCAR/rda-python-dsupdt.git
|
|
14
|
+
##################################################################################
|
|
15
|
+
|
|
16
|
+
import sys
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
from os import path as op
|
|
20
|
+
from .pg_updt import PgUpdt
|
|
21
|
+
from rda_python_common.pg_split import PgSplit
|
|
22
|
+
|
|
23
|
+
class DsUpdt(PgUpdt, PgSplit):
|
|
24
|
+
def __init__(self):
|
|
25
|
+
super().__init__() # initialize parent class
|
|
26
|
+
self.TEMPINFO = {}
|
|
27
|
+
self.TOPMSG = self.SUBJECT = self.ACTSTR = None
|
|
28
|
+
self.ALLCNT = 0
|
|
29
|
+
self.DEFTYPES = {'WT': 'D', 'ST': 'P', 'QT': 'B'}
|
|
30
|
+
|
|
31
|
+
# main function to run dsupdt
|
|
32
|
+
def read_parameters(self):
|
|
33
|
+
self.set_help_path(__file__)
|
|
34
|
+
aname = 'dsupdt'
|
|
35
|
+
self.parsing_input(aname)
|
|
36
|
+
self.check_enough_options(self.PGOPT['CACT'], self.PGOPT['ACTS'])
|
|
37
|
+
|
|
38
|
+
# start actions of dsupdt
|
|
39
|
+
def start_actions(self):
|
|
40
|
+
if self.PGOPT['ACTS']&self.OPTS['CU'][0]:
|
|
41
|
+
if 'CI' in self.params:
|
|
42
|
+
if self.cache_update_control(self.params['CI'][0], 1):
|
|
43
|
+
self.check_dataset_status()
|
|
44
|
+
else:
|
|
45
|
+
self.ALLCNT = self.get_option_count(["ED", "EH"])
|
|
46
|
+
self.check_dataset_status(0)
|
|
47
|
+
elif self.PGOPT['ACTS'] == self.OPTS['DL'][0]:
|
|
48
|
+
if 'CI' in self.params:
|
|
49
|
+
self.ALLCNT = len(self.params['CI'])
|
|
50
|
+
self.delete_control_info()
|
|
51
|
+
elif 'RF' in self.params:
|
|
52
|
+
self.ALLCNT = len(self.params['RF'])
|
|
53
|
+
self.delete_remote_info()
|
|
54
|
+
else:
|
|
55
|
+
self.ALLCNT = len(self.params['LI'])
|
|
56
|
+
self.delete_local_info()
|
|
57
|
+
elif self.OPTS[self.PGOPT['CACT']][0]&self.OPTS['GA'][0]:
|
|
58
|
+
self.get_update_info()
|
|
59
|
+
elif self.PGOPT['CACT'] == 'PC':
|
|
60
|
+
self.process_update_controls()
|
|
61
|
+
elif self.PGOPT['ACTS'] == self.OPTS['SA'][0]:
|
|
62
|
+
if 'IF' not in self.params:
|
|
63
|
+
self.action_error("Missing input file via Option -IF")
|
|
64
|
+
if self.get_input_info(self.params['IF'], 'DCUPDT'):
|
|
65
|
+
self.check_enough_options('SC', self.OPTS['SC'][0])
|
|
66
|
+
self.ALLCNT = len(self.params['CI'])
|
|
67
|
+
self.set_control_info()
|
|
68
|
+
if self.get_input_info(self.params['IF'], 'DLUPDT'):
|
|
69
|
+
self.check_enough_options('SL', self.OPTS['SL'][0])
|
|
70
|
+
self.ALLCNT = len(self.params['LI'])
|
|
71
|
+
self.set_local_info()
|
|
72
|
+
if self.get_input_info(self.params['IF'], 'DRUPDT') and self.params['RF']:
|
|
73
|
+
self.check_enough_options('SR', self.OPTS['SR'][0])
|
|
74
|
+
self.ALLCNT = len(self.params['RF']) if 'RF' in self.params else 0
|
|
75
|
+
self.set_remote_info()
|
|
76
|
+
elif self.PGOPT['ACTS'] == self.OPTS['SC'][0]:
|
|
77
|
+
self.ALLCNT = len(self.params['CI'])
|
|
78
|
+
self.set_control_info()
|
|
79
|
+
elif self.PGOPT['ACTS'] == self.OPTS['SL'][0]:
|
|
80
|
+
self.ALLCNT = len(self.params['LI'])
|
|
81
|
+
self.set_local_info()
|
|
82
|
+
elif self.PGOPT['ACTS'] == self.OPTS['SR'][0]:
|
|
83
|
+
self.ALLCNT = len(self.params['RF'])
|
|
84
|
+
self.set_remote_info()
|
|
85
|
+
elif self.PGOPT['ACTS']&self.OPTS['UF'][0]:
|
|
86
|
+
if 'CI' in self.params:
|
|
87
|
+
if self.cache_update_control(self.params['CI'][0], 1): self.dataset_update()
|
|
88
|
+
else:
|
|
89
|
+
self.ALLCNT = self.get_option_count(["ED", "EH"])
|
|
90
|
+
self.dataset_update()
|
|
91
|
+
elif self.PGOPT['ACTS'] == self.OPTS['UL'][0]:
|
|
92
|
+
if 'CI' in self.params:
|
|
93
|
+
self.ALLCNT = len(self.params['CI'])
|
|
94
|
+
self.unlock_control_info()
|
|
95
|
+
if 'LI' in self.params:
|
|
96
|
+
self.ALLCNT = len(self.params['LI'])
|
|
97
|
+
self.unlock_update_info()
|
|
98
|
+
if self.SUBJECT and 'NE' not in self.params and (self.PGLOG['ERRCNT'] or 'EE' not in self.params):
|
|
99
|
+
self.SUBJECT += " on " + self.PGLOG['HOSTNAME']
|
|
100
|
+
self.set_email("{}: {}".format(self.SUBJECT, self.TOPMSG), self.EMLTOP)
|
|
101
|
+
if self.ACTSTR: self.SUBJECT = "{} for {}".format(self.ACTSTR, self.SUBJECT)
|
|
102
|
+
if self.PGSIG['PPID'] > 1: self.SUBJECT += " in CPID {}".format(self.PGSIG['PID'])
|
|
103
|
+
if self.PGLOG['ERRCNT'] > 0: self.SUBJECT += " With Error"
|
|
104
|
+
if self.PGLOG['DSCHECK']:
|
|
105
|
+
self.build_customized_email("dscheck", "einfo", "cindex = {}".format(self.PGLOG['DSCHECK']['cindex']),
|
|
106
|
+
self.SUBJECT, self.PGOPT['wrnlog'])
|
|
107
|
+
elif self.PGOPT['UCNTL']:
|
|
108
|
+
self.build_customized_email("dcupdt", "einfo", "cindex = {}".format(self.PGOPT['UCNTL']['cindex']),
|
|
109
|
+
self.SUBJECT, self.PGOPT['wrnlog'])
|
|
110
|
+
else:
|
|
111
|
+
self.pglog(self.SUBJECT, self.PGOPT['wrnlog']|self.SNDEML)
|
|
112
|
+
if self.PGLOG['DSCHECK']:
|
|
113
|
+
if self.PGLOG['ERRMSG']:
|
|
114
|
+
self.record_dscheck_error(self.PGLOG['ERRMSG'])
|
|
115
|
+
else:
|
|
116
|
+
self.record_dscheck_status("D")
|
|
117
|
+
if self.OPTS[self.PGOPT['CACT']][2]: self.cmdlog() # log end time if not getting only action
|
|
118
|
+
|
|
119
|
+
# delete update control records for given dsid and control indices
|
|
120
|
+
def delete_control_info(self):
|
|
121
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
122
|
+
self.pglog("Delete {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
123
|
+
delcnt = modcnt = 0
|
|
124
|
+
for i in range(self.ALLCNT):
|
|
125
|
+
cidx = self.lock_update_control(self.params['CI'][i], 2, self.PGOPT['extlog'])
|
|
126
|
+
if cidx <= 0: continue
|
|
127
|
+
ccnd = "cindex = {}".format(cidx)
|
|
128
|
+
delcnt += self.pgdel("dcupdt", ccnd, self.PGOPT['extlog'])
|
|
129
|
+
modcnt += self.pgexec("UPDATE dlupdt SET cindex = 0 WHERE " + ccnd, self.PGOPT['extlog'])
|
|
130
|
+
self.pglog("{} of {} update control record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
131
|
+
if modcnt > 0:
|
|
132
|
+
s = 's' if modcnt > 1 else ''
|
|
133
|
+
self.pglog("{} associated local file record{} modified".format(modcnt, s), self.PGOPT['wrnlog'])
|
|
134
|
+
|
|
135
|
+
# delete local files for given dsid and locfile indices
|
|
136
|
+
def delete_local_info(self):
|
|
137
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
138
|
+
self.pglog("Delete {} Locfile record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
139
|
+
dcnt = delcnt = 0
|
|
140
|
+
for i in range(self.ALLCNT):
|
|
141
|
+
lidx = self.params['LI'][i]
|
|
142
|
+
lcnd = "lindex = {}".format(lidx)
|
|
143
|
+
if self.lock_update(lidx, None, 2, self.PGOPT['errlog']) <= 0: continue
|
|
144
|
+
cnt = self.pgget("drupdt", "", lcnd, self.PGOPT['extlog'])
|
|
145
|
+
if cnt > 0:
|
|
146
|
+
ss = 's' if cnt > 1 else ''
|
|
147
|
+
self.pglog("Delete {} associated remote file record{} for Locfile index {} ...".format(cnt, ss, lidx), self.WARNLG)
|
|
148
|
+
dcnt += self.pgdel("drupdt", lcnd, self.PGOPT['extlog'])
|
|
149
|
+
delcnt += self.pgdel("dlupdt", lcnd, self.PGOPT['extlog'])
|
|
150
|
+
self.pglog("{} of {} Locfile record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
151
|
+
if dcnt > 0:
|
|
152
|
+
s = "s" if (dcnt > 1) else ""
|
|
153
|
+
self.pglog("{} associated Remote file record{} deleted too".format(dcnt, s), self.PGOPT['wrnlog'])
|
|
154
|
+
|
|
155
|
+
# delete update remote files for given dsid and remote files/locfile indices
|
|
156
|
+
def delete_remote_info(self):
|
|
157
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
158
|
+
self.pglog("Delete {} remote file record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
159
|
+
self.validate_multiple_options(self.ALLCNT, ["LI", "DO"])
|
|
160
|
+
delcnt = 0
|
|
161
|
+
for i in range(self.ALLCNT):
|
|
162
|
+
lcnd = "lindex = {} AND remotefile = '{}'".format(self.params['LI'][i], self.params['RF'][i])
|
|
163
|
+
if 'DO' in self.params: lcnd += " AND dindex = {}".format(self.params['DO'][i])
|
|
164
|
+
delcnt += self.pgdel("drupdt", lcnd, self.PGOPT['extlog'])
|
|
165
|
+
self.pglog("{} of {} remote file record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
166
|
+
|
|
167
|
+
# get update control information
|
|
168
|
+
def get_control_info(self):
|
|
169
|
+
tname = "dcupdt"
|
|
170
|
+
hash = self.TBLHASH[tname]
|
|
171
|
+
self.pglog("Get update control info of {} from RDADB ...".format(self.params['DS']), self.WARNLG)
|
|
172
|
+
lens = fnames = None
|
|
173
|
+
if 'FN' in self.params: fnames = self.params['FN']
|
|
174
|
+
fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['dcall'])
|
|
175
|
+
onames = self.params['ON'] if 'ON' in self.params else "C"
|
|
176
|
+
condition = self.file_condition(tname) + self.get_order_string(onames, tname)
|
|
177
|
+
pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
|
|
178
|
+
if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
|
|
179
|
+
self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS']))
|
|
180
|
+
if self.PGOPT['CACT'] == "GA": self.OUTPUT.write("[{}]\n".format(tname.upper()))
|
|
181
|
+
self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
|
|
182
|
+
if pgrecs:
|
|
183
|
+
cnt = self.print_column_format(pgrecs, fnames, hash, lens)
|
|
184
|
+
s = 's' if cnt > 1 else ''
|
|
185
|
+
self.pglog("{} update control record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
|
|
186
|
+
else:
|
|
187
|
+
self.pglog("no update control record retrieved", self.PGOPT['wrnlog'])
|
|
188
|
+
|
|
189
|
+
# get local file update information
|
|
190
|
+
def get_local_info(self):
|
|
191
|
+
tname = "dlupdt"
|
|
192
|
+
hash = self.TBLHASH[tname]
|
|
193
|
+
self.pglog("Get local file update info of {} from RDADB ...".format(self.params['DS']), self.WARNLG)
|
|
194
|
+
lens = fnames = None
|
|
195
|
+
if 'FN' in self.params: fnames = self.params['FN']
|
|
196
|
+
fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['dlall'])
|
|
197
|
+
onames = self.params['ON'] if 'ON' in self.params else "XL"
|
|
198
|
+
condition = self.file_condition(tname) + self.get_order_string(onames, tname)
|
|
199
|
+
pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
|
|
200
|
+
if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
|
|
201
|
+
if self.PGOPT['CACT'] == "GL":
|
|
202
|
+
self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS']))
|
|
203
|
+
else:
|
|
204
|
+
self.OUTPUT.write("[{}]\n".format(tname.upper()))
|
|
205
|
+
self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
|
|
206
|
+
if pgrecs:
|
|
207
|
+
cnt = self.print_column_format(pgrecs, fnames, hash, lens)
|
|
208
|
+
s = 's' if cnt > 1 else ''
|
|
209
|
+
self.pglog("{} locfile record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
|
|
210
|
+
else:
|
|
211
|
+
self.pglog("no locfile record retrieved", self.PGOPT['wrnlog'])
|
|
212
|
+
|
|
213
|
+
# get remote file update information
|
|
214
|
+
def get_remote_info(self):
|
|
215
|
+
tname = "drupdt"
|
|
216
|
+
hash = self.TBLHASH[tname]
|
|
217
|
+
self.pglog("Get remote file update info of {} from RDADB ...".format(self.params['DS']), self.WARNLG)
|
|
218
|
+
lens = fnames = None
|
|
219
|
+
if 'FN' in self.params: fnames = self.params['FN']
|
|
220
|
+
fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['drall'])
|
|
221
|
+
onames = self.params['ON'] if 'ON' in self.params else "LDF"
|
|
222
|
+
condition = self.file_condition(tname) + self.get_order_string(onames, tname)
|
|
223
|
+
pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
|
|
224
|
+
if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
|
|
225
|
+
if self.PGOPT['CACT'] == "GR":
|
|
226
|
+
self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS']))
|
|
227
|
+
else:
|
|
228
|
+
self.OUTPUT.write("[{}]\n".format(tname.upper()))
|
|
229
|
+
self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
|
|
230
|
+
if pgrecs:
|
|
231
|
+
cnt = self.print_column_format(pgrecs, fnames, hash, lens)
|
|
232
|
+
s = 's' if cnt > 1 else ''
|
|
233
|
+
self.pglog("{} remote file record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
|
|
234
|
+
else:
|
|
235
|
+
self.pglog("no remote file record retrieved", self.PGOPT['wrnlog'])
|
|
236
|
+
|
|
237
|
+
# add or modify update control information
|
|
238
|
+
def set_control_info(self):
|
|
239
|
+
tname = 'dcupdt'
|
|
240
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
241
|
+
self.pglog("Set {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
242
|
+
addcnt = modcnt = 0
|
|
243
|
+
flds = self.get_field_keys(tname, None, 'C')
|
|
244
|
+
if not flds: return self.pglog("Nothing to set for update control!", self.PGOPT['errlog'])
|
|
245
|
+
self.validate_multiple_values(tname, self.ALLCNT, flds)
|
|
246
|
+
fields = self.get_string_fields(flds, tname)
|
|
247
|
+
for i in range(self.ALLCNT):
|
|
248
|
+
cidx = self.params['CI'][i]
|
|
249
|
+
if cidx > 0:
|
|
250
|
+
if self.lock_update_control(cidx, 2, self.PGOPT['errlog']) <= 0: continue
|
|
251
|
+
cnd = "cindex = {}".format(cidx)
|
|
252
|
+
pgrec = self.pgget(tname, fields, cnd, self.PGOPT['errlog'])
|
|
253
|
+
if not pgrec: self.action_error("Error get update control record for " + cnd)
|
|
254
|
+
else:
|
|
255
|
+
pgrec = None
|
|
256
|
+
record = self.build_record(flds, pgrec, tname, i)
|
|
257
|
+
if record:
|
|
258
|
+
if 'pindex' in record and record['pindex'] and not self.pgget("dcupdt", "", "cindex = {}".format(record['pindex'])):
|
|
259
|
+
self.action_error("Parent control Index {} is not in RDADB".format(record['pindex']))
|
|
260
|
+
if 'action' in record and not re.match(r'^({})$'.format(self.PGOPT['UPDTACTS']), record['action']):
|
|
261
|
+
self.action_error("Action Name '{}' must be one of dsupdt Actions ({})".format(record['action'], self.PGOPT['UPDTACTS']))
|
|
262
|
+
if pgrec:
|
|
263
|
+
record['pid'] = 0
|
|
264
|
+
record['lockhost'] = ''
|
|
265
|
+
modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['errlog']|self.DODFLT)
|
|
266
|
+
else:
|
|
267
|
+
record['dsid'] = self.params['DS']
|
|
268
|
+
if 'specialist' not in record: record['specialist'] = self.params['LN']
|
|
269
|
+
addcnt += self.pgadd(tname, record, self.PGOPT['errlog']|self.DODFLT)
|
|
270
|
+
elif cidx: # unlock
|
|
271
|
+
self.lock_update_control(cidx, 0, self.PGOPT['errlog'])
|
|
272
|
+
self.pglog("{}/{} of {} control record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
273
|
+
|
|
274
|
+
# add or modify local file update information
|
|
275
|
+
def set_local_info(self):
|
|
276
|
+
tname = 'dlupdt'
|
|
277
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
278
|
+
self.pglog("Set {} local file record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
279
|
+
addcnt = modcnt = 0
|
|
280
|
+
flds = self.get_field_keys(tname, None, 'L')
|
|
281
|
+
if 'RO' in self.params and 'XO' not in self.params: flds += 'X'
|
|
282
|
+
if not flds: return self.pglog("Nothing to set for update local file!", self.PGOPT['errlog'])
|
|
283
|
+
self.validate_multiple_values(tname, self.ALLCNT, flds)
|
|
284
|
+
fields = self.get_string_fields(flds, tname)
|
|
285
|
+
for i in range(self.ALLCNT):
|
|
286
|
+
lidx = self.params['LI'][i]
|
|
287
|
+
if lidx > 0:
|
|
288
|
+
if self.lock_update(lidx, None, 2, self.PGOPT['errlog']) <= 0: continue
|
|
289
|
+
cnd = "lindex = {}".format(lidx)
|
|
290
|
+
pgrec = self.pgget(tname, fields, cnd, self.PGOPT['errlog'])
|
|
291
|
+
if not pgrec: self.action_error("Error get Local file record for " + cnd)
|
|
292
|
+
else:
|
|
293
|
+
pgrec = None
|
|
294
|
+
if 'RO' in self.params: self.params['XO'][i] = self.get_next_exec_order(self.params['DS'], 0)
|
|
295
|
+
record = self.build_record(flds, pgrec, tname, i)
|
|
296
|
+
if record:
|
|
297
|
+
if 'cindex' in record and record['cindex'] and not self.pgget("dcupdt", "", "cindex = {}".format(record['cindex'])):
|
|
298
|
+
self.action_error("Update control Index {} is not in RDADB".format(record['cindex']))
|
|
299
|
+
if 'action' in record and not re.match(r'^({})$'.format(self.PGOPT['ARCHACTS']), record['action']):
|
|
300
|
+
self.action_error("Action Name '{}' must be one of dsarch Actions ({})".format(record['action'], self.PGOPT['ARCHACTS']))
|
|
301
|
+
if pgrec:
|
|
302
|
+
if 'VI' in record and not record['VI'] and pgrec['missdate']: record['missdate'] = record['misshour'] = None
|
|
303
|
+
record['pid'] = 0
|
|
304
|
+
record['hostname'] = 0
|
|
305
|
+
modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['errlog']|self.DODFLT)
|
|
306
|
+
else:
|
|
307
|
+
record['dsid'] = self.params['DS']
|
|
308
|
+
if 'specialist' not in record: record['specialist'] = self.params['LN']
|
|
309
|
+
if 'execorder' not in record: record['execorder'] = self.get_next_exec_order(self.params['DS'], 1)
|
|
310
|
+
addcnt += self.pgadd(tname, record, self.PGOPT['errlog']|self.DODFLT)
|
|
311
|
+
elif lidx: # unlock
|
|
312
|
+
self.lock_update(lidx, None, 0, self.PGOPT['errlog'])
|
|
313
|
+
self.pglog("{}/{} of {} Locfile record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
314
|
+
|
|
315
|
+
# add or modify remote file update information
|
|
316
|
+
def set_remote_info(self):
|
|
317
|
+
tname = 'drupdt'
|
|
318
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
319
|
+
self.pglog("Set {} update remote file{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
320
|
+
addcnt = modcnt = 0
|
|
321
|
+
flds = self.get_field_keys(tname)
|
|
322
|
+
if not flds: return self.pglog("Nothing to set for update remote file!", self.PGOPT['errlog'])
|
|
323
|
+
self.validate_multiple_values(tname, self.ALLCNT, flds)
|
|
324
|
+
fields = self.get_string_fields(flds, tname)
|
|
325
|
+
for i in range(self.ALLCNT):
|
|
326
|
+
lidx = self.params['LI'][i]
|
|
327
|
+
didx = self.params['DO'][i] if 'DO' in self.params else 0
|
|
328
|
+
cnd = "lindex = {} AND remotefile = '{}' AND dindex = {}".format(lidx, self.params['RF'][i], didx)
|
|
329
|
+
pgrec = self.pgget("drupdt", fields, cnd, self.PGOPT['errlog'])
|
|
330
|
+
record = self.build_record(flds, pgrec, tname, i)
|
|
331
|
+
if record:
|
|
332
|
+
if 'lindex' in record and record['lindex'] and not self.pgget("dlupdt", "", "lindex = {}".format(record['lindex'])):
|
|
333
|
+
self.action_error("Local file Index {} is not in RDADB".format(record['lindex']))
|
|
334
|
+
if pgrec:
|
|
335
|
+
modcnt += self.pgupdt("drupdt", record, cnd, self.PGOPT['errlog']|self.DODFLT)
|
|
336
|
+
else:
|
|
337
|
+
record['lindex'] = lidx
|
|
338
|
+
record['dsid'] = self.params['DS']
|
|
339
|
+
addcnt += self.pgadd("drupdt", record, self.PGOPT['errlog']|self.DODFLT)
|
|
340
|
+
self.pglog("{}/{} of {} remote file record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
|
|
341
|
+
|
|
342
|
+
# unlock update records for given locfile indices
|
|
343
|
+
def unlock_update_info(self):
|
|
344
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
345
|
+
self.pglog("Unlock {} update locfile{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
346
|
+
modcnt = 0
|
|
347
|
+
for lidx in self.params['LI']:
|
|
348
|
+
cnd = "lindex = {}".format(lidx)
|
|
349
|
+
pgrec = self.pgget("dlupdt", "pid, hostname", cnd, self.PGOPT['extlog'])
|
|
350
|
+
if not pgrec:
|
|
351
|
+
self.pglog("{}: Local File Not exists".format(lidx), self.PGOPT['errlog'])
|
|
352
|
+
elif not pgrec['pid']:
|
|
353
|
+
self.pglog("{}: Local File Not locked".format(lidx), self.PGOPT['wrnlog'])
|
|
354
|
+
elif self.lock_update(lidx, None, -1, self.PGOPT['errlog']) > 0:
|
|
355
|
+
modcnt += 1
|
|
356
|
+
self.pglog("{}: Local File Unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog'])
|
|
357
|
+
elif (self.check_host_down(None, pgrec['hostname']) and
|
|
358
|
+
self.lock_update(lidx, None, -2, self.PGOPT['errlog']) > 0):
|
|
359
|
+
modcnt += 1
|
|
360
|
+
self.pglog("{}: Local File Force unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog'])
|
|
361
|
+
else:
|
|
362
|
+
self.pglog("{}: Local File Unable to unlock {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog'])
|
|
363
|
+
self.pglog("{} of {} local file record{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN)
|
|
364
|
+
|
|
365
|
+
# unlock update control records for given locfile indices
|
|
366
|
+
def unlock_control_info(self):
|
|
367
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
368
|
+
self.pglog("Unlock {} update control{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
369
|
+
modcnt = 0
|
|
370
|
+
for cidx in self.params['CI']:
|
|
371
|
+
pgrec = self.pgget("dcupdt", "pid, lockhost", "cindex = {}".format(cidx), self.PGOPT['extlog'])
|
|
372
|
+
if not pgrec:
|
|
373
|
+
self.pglog("{}: Update Control Not exists".format(cidx), self.PGOPT['errlog'])
|
|
374
|
+
elif not pgrec['pid']:
|
|
375
|
+
self.pglog("{}: Update Control Not locked".format(cidx), self.PGOPT['wrnlog'])
|
|
376
|
+
elif self.lock_update_control(cidx, -1, self.PGOPT['extlog']) > 0:
|
|
377
|
+
modcnt += 1
|
|
378
|
+
self.pglog("{}: Update Control Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
|
|
379
|
+
elif (self.check_host_down(None, pgrec['lockhost']) and
|
|
380
|
+
self.lock_update_control(cidx, -2, self.PGOPT['extlog']) > 0):
|
|
381
|
+
modcnt += 1
|
|
382
|
+
self.pglog("{}: Update Control Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
|
|
383
|
+
else:
|
|
384
|
+
self.pglog("{}: Undate Control Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
|
|
385
|
+
self.pglog("{} of {} update control record{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN)
|
|
386
|
+
|
|
387
|
+
# get update info of local and remote files owned by login name
|
|
388
|
+
def get_update_info(self):
|
|
389
|
+
if 'DS' in self.params:
|
|
390
|
+
dsids = {'dsid': [self.params['DS']]}
|
|
391
|
+
dscnt = 1
|
|
392
|
+
else:
|
|
393
|
+
tname = "dlupdt"
|
|
394
|
+
cnd = self.file_condition(tname, None, None, 1)
|
|
395
|
+
if not cnd:
|
|
396
|
+
self.set_default_value("SN", self.params['LN'])
|
|
397
|
+
cnd = self.file_condition(tname, None, None, 1)
|
|
398
|
+
dsids = self.pgmget(tname, "DISTINCT dsid", cnd, self.PGOPT['extlog'])
|
|
399
|
+
dscnt = len(dsids['dsid']) if dsids else 0
|
|
400
|
+
if dscnt == 0:
|
|
401
|
+
return self.pglog("NO dataset identified for giving condition", self.PGOPT['wrnlog'])
|
|
402
|
+
elif dscnt > 1:
|
|
403
|
+
self.pglog("Get Update Info for {} datasets".format(dscnt), self.PGOPT['wrnlog'])
|
|
404
|
+
self.PGOPT['AUTODS'] = dscnt
|
|
405
|
+
for i in range(dscnt):
|
|
406
|
+
self.params['DS'] = dsids['dsid'][i]
|
|
407
|
+
if self.PGOPT['ACTS'] == self.OPTS['GC'][0]:
|
|
408
|
+
self.get_control_info()
|
|
409
|
+
elif self.PGOPT['ACTS'] == self.OPTS['GL'][0]:
|
|
410
|
+
self.get_local_info()
|
|
411
|
+
elif self.PGOPT['ACTS'] == self.OPTS['GR'][0]:
|
|
412
|
+
self.get_remote_info()
|
|
413
|
+
else:
|
|
414
|
+
if 'ON' in self.params: del self.params['ON'] # use default order string
|
|
415
|
+
if 'FN' not in self.params: self.params['FN'] = 'ALL'
|
|
416
|
+
if self.PGOPT['ACTS']&self.OPTS['GC'][0]: self.get_control_info()
|
|
417
|
+
if self.PGOPT['ACTS']&self.OPTS['GL'][0]: self.get_local_info()
|
|
418
|
+
if self.PGOPT['ACTS']&self.OPTS['GR'][0]: self.get_remote_info()
|
|
419
|
+
if dscnt > 1: self.pglog("Update Info of {} datasets retrieved".format(dscnt), self.PGOPT['wrnlog'])
|
|
420
|
+
|
|
421
|
+
# gather due datasets for data update
|
|
422
|
+
def dataset_update(self):
|
|
423
|
+
actcnd = "specialist = '{}'".format(self.params['LN'])
|
|
424
|
+
if self.PGOPT['ACTS']&self.OPTS['AF'][0]: actcnd += " AND action IN ('AW', 'AS', 'AQ')"
|
|
425
|
+
(self.PGOPT['CURDATE'], self.PGOPT['CURHOUR']) = self.curdatehour()
|
|
426
|
+
if 'CD' not in self.params: self.params['CD'] = self.PGOPT['CURDATE'] # default to current date
|
|
427
|
+
if 'CH' not in self.params: self.params['CH'] = self.PGOPT['CURHOUR'] # default to current hour
|
|
428
|
+
if self.ALLCNT > 1 and self.params['MU']: del self.params['MU']
|
|
429
|
+
if 'CN' in self.params and 'RD' in self.params: del self.params['CN']
|
|
430
|
+
if 'CN' in self.params or 'RD' in self.params or 'RA' in self.params:
|
|
431
|
+
if 'MO' in self.params: del self.params['MO']
|
|
432
|
+
elif 'MO' not in self.params and self.PGOPT['CACT'] == "UF":
|
|
433
|
+
self.params['MO'] = -1
|
|
434
|
+
if 'DS' in self.params:
|
|
435
|
+
dsids = [self.params['DS']]
|
|
436
|
+
dscnt = 1
|
|
437
|
+
else:
|
|
438
|
+
if 'CI' not in self.params: actcnd += " AND cindex = 0"
|
|
439
|
+
loccnd = self.file_condition('dlupdt', "LQFIXA", None, 1)
|
|
440
|
+
dscnd = actcnd
|
|
441
|
+
if loccnd: dscnd += " AND " + loccnd
|
|
442
|
+
pgrecs = self.pgmget("dlupdt", "DISTINCT dsid", dscnd, self.PGOPT['extlog'])
|
|
443
|
+
dsids = pgrecs['dsid'] if pgrecs else []
|
|
444
|
+
dscnt = len(dsids)
|
|
445
|
+
if not dscnt: return self.pglog("NO dataset is due for update on {} for {}".format(self.params['CD'], self.params['LN']), self.PGOPT['wrnlog'])
|
|
446
|
+
self.PGOPT['AUTODS'] = dscnt
|
|
447
|
+
actcnd += " ORDER BY execorder, lindex"
|
|
448
|
+
if self.PGLOG['DSCHECK']:
|
|
449
|
+
fcnt = 0
|
|
450
|
+
for i in range(dscnt):
|
|
451
|
+
self.params['DS'] = dsids[i]
|
|
452
|
+
loccnd = self.file_condition('dlupdt', "LQFIXA")
|
|
453
|
+
locrecs = self.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), self.PGOPT['extlog'])
|
|
454
|
+
loccnt = len(locrecs['locfile']) if locrecs else 0
|
|
455
|
+
if loccnt == 0: continue
|
|
456
|
+
for j in range(loccnt):
|
|
457
|
+
locrec = self.onerecord(locrecs, j)
|
|
458
|
+
if (loccnt == 1 and 'LI' in self.params and 'LF' in self.params and
|
|
459
|
+
len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']):
|
|
460
|
+
locrec['locfile'] = self.params['LF'][0]
|
|
461
|
+
fcnt += self.file_update(locrec, self.LOGWRN, 1)
|
|
462
|
+
self.set_dscheck_fcount(fcnt, self.LOGERR)
|
|
463
|
+
# check and update data for each dataset
|
|
464
|
+
logact = self.PGOPT['emllog']
|
|
465
|
+
acnt = ucnt = 0
|
|
466
|
+
for i in range(dscnt):
|
|
467
|
+
self.params['DS'] = dsids[i]
|
|
468
|
+
loccnd = self.file_condition('dlupdt', "LQFIXA")
|
|
469
|
+
locrecs = self.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), self.PGOPT['extlog'])
|
|
470
|
+
loccnt = len(locrecs['locfile']) if locrecs else 0
|
|
471
|
+
if loccnt == 0:
|
|
472
|
+
s = "-UC{}".format(self.params['CI'][0]) if ('CI' in self.params and len(self.params['CI']) == 1) else ""
|
|
473
|
+
self.pglog("{}{}: no config record of local file found to update for '{}'".format(self.params['DS'], s, self.params['LN']), self.PGOPT['wrnlog'])
|
|
474
|
+
continue
|
|
475
|
+
s = 's' if loccnt > 1 else ''
|
|
476
|
+
self.pglog("{}: {} for {} update record{}".format(self.params['DS'], self.PGOPT['CACT'], loccnt, s), logact)
|
|
477
|
+
logact = self.PGOPT['emlsep']
|
|
478
|
+
for j in range(loccnt):
|
|
479
|
+
locrec = self.onerecord(locrecs, j)
|
|
480
|
+
if (loccnt == 1 and 'LI' in self.params and 'LF' in self.params and
|
|
481
|
+
len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']):
|
|
482
|
+
locrec['locfile'] = self.params['LF'][0]
|
|
483
|
+
if locrec['cindex']:
|
|
484
|
+
if 'CI' not in self.params:
|
|
485
|
+
self.params['CI'] = [locrec['cindex']]
|
|
486
|
+
self.cache_update_control(locrec['cindex'], 0)
|
|
487
|
+
if 'CN' in self.params and 'RD' in self.params: del self.params['CN']
|
|
488
|
+
if 'CN' in self.params or 'RD' in self.params or 'RA' in self.params:
|
|
489
|
+
if 'MO' in self.params: del self.params['MO']
|
|
490
|
+
elif 'MO' not in self.params and self.PGOPT['CACT'] == "UF":
|
|
491
|
+
self.params['MO'] = -1
|
|
492
|
+
elif locrec['cindex'] != self.params['CI'][0]:
|
|
493
|
+
self.pglog("{}-{}: Skipped due to control index {} mismatches {}".format(self.params['DS'], locrec['lindex'], locrec['cindex'], self.params['CI'][0]), self.PGOPT['emlerr'])
|
|
494
|
+
continue
|
|
495
|
+
self.PGOPT['rstat'] = 1 # reset remote download status for each local file
|
|
496
|
+
if self.PGSIG['MPROC'] > 1: acnt += 1
|
|
497
|
+
fcnt = self.file_update(locrec, logact)
|
|
498
|
+
if self.PGSIG['PPID'] > 1:
|
|
499
|
+
if self.PGOPT['AUTODS'] > 1: self.PGOPT['AUTODS'] = dscnt = 1
|
|
500
|
+
acnt = ucnt = 0 # reinitialize counts for child process
|
|
501
|
+
break # stop loop in child
|
|
502
|
+
if self.PGSIG['MPROC'] > 1:
|
|
503
|
+
if fcnt == 0:
|
|
504
|
+
break # quit
|
|
505
|
+
else:
|
|
506
|
+
if fcnt > 0: ucnt += 1 # record update count, s is either -1 or 1
|
|
507
|
+
continue # non-daemon parent
|
|
508
|
+
if 'QE' in self.params and fcnt <= 0: break
|
|
509
|
+
if self.PGOPT['vcnt'] > 0:
|
|
510
|
+
self.renew_internal_version(self.params['DS'], self.PGOPT['vcnt'])
|
|
511
|
+
self.PGOPT['vcnt'] = 0
|
|
512
|
+
if self.PGSIG['MPROC'] > 1:
|
|
513
|
+
if not self.PGSIG['QUIT'] and j == loccnt: continue
|
|
514
|
+
break
|
|
515
|
+
if self.PGOPT['rcnt']:
|
|
516
|
+
if self.PGOPT['CACT'] == "DR":
|
|
517
|
+
acnt += self.PGOPT['rcnt']
|
|
518
|
+
ucnt += self.PGOPT['dcnt']
|
|
519
|
+
s = 's' if self.PGOPT['rcnt'] > 1 else ''
|
|
520
|
+
if loccnt > 1:
|
|
521
|
+
self.pglog("{}: {} of {} rfile{} gotten!".format(self.params['DS'], self.PGOPT['dcnt'], self.PGOPT['rcnt'], s), self.PGOPT['emllog'])
|
|
522
|
+
self.PGOPT['rcnt'] = self.PGOPT['dcnt'] = 0
|
|
523
|
+
if self.PGOPT['lcnt']:
|
|
524
|
+
if self.PGOPT['CACT'] == "BL" or self.PGOPT['CACT'] == "PB":
|
|
525
|
+
acnt += self.PGOPT['lcnt']
|
|
526
|
+
ucnt += self.PGOPT['bcnt']
|
|
527
|
+
s = 's' if self.PGOPT['lcnt'] > 1 else ''
|
|
528
|
+
if loccnt > 1 and self.PGOPT['bcnt'] > 0:
|
|
529
|
+
self.pglog("{}: {} of {} lfile{} built!".format(self.params['DS'], self.PGOPT['bcnt'], self.PGOPT['lcnt'], s), self.PGOPT['emllog'])
|
|
530
|
+
self.PGOPT['lcnt'] = self.PGOPT['bcnt'] = 0
|
|
531
|
+
if self.PGOPT['acnt']:
|
|
532
|
+
acnt += self.PGOPT['acnt']
|
|
533
|
+
ucnt += self.PGOPT['ucnt']
|
|
534
|
+
s = 's' if self.PGOPT['acnt'] > 1 else ''
|
|
535
|
+
self.pglog("{}: {} of {} local file{} archived!".format(self.params['DS'], self.PGOPT['ucnt'], self.PGOPT['acnt'], s),
|
|
536
|
+
(self.PGOPT['emlsum'] if dscnt > 1 else self.PGOPT['emllog']))
|
|
537
|
+
self.PGOPT['acnt'] = self.PGOPT['ucnt'] = 0
|
|
538
|
+
if self.PGSIG['PPID'] > 1: break # stop loop child
|
|
539
|
+
if acnt > 0:
|
|
540
|
+
self.TOPMSG = detail = ""
|
|
541
|
+
if self.PGSIG['MPROC'] > 1:
|
|
542
|
+
s = 's' if acnt > 1 else ''
|
|
543
|
+
self.ACTSTR = "{} of {} CPIDs{} for 'dsupdt {}' started".format(ucnt, acnt, s, self.PGOPT['CACT'])
|
|
544
|
+
else:
|
|
545
|
+
s = 's' if ucnt > 1 else ''
|
|
546
|
+
self.TOPMSG = ""
|
|
547
|
+
if self.PGOPT['CACT'] == "DR":
|
|
548
|
+
atype = "remote file{} gotten".format(s)
|
|
549
|
+
elif self.PGOPT['CACT'] == "BL" or self.PGOPT['CACT'] == "PB":
|
|
550
|
+
atype = "local file{} built".format(s)
|
|
551
|
+
else:
|
|
552
|
+
atype = "local file{} archived".format(s)
|
|
553
|
+
if self.PGOPT['rdcnt'] > 0:
|
|
554
|
+
s = 's' if self.PGOPT['rdcnt'] > 1 else ''
|
|
555
|
+
self.TOPMSG = "{} remote server file{} downloaded and ".format(self.PGOPT['rdcnt'], s)
|
|
556
|
+
if self.PGOPT['udcnt'] > 0:
|
|
557
|
+
if detail: detail += " & "
|
|
558
|
+
detail += "{} Web Online".format(self.PGOPT['udcnt'])
|
|
559
|
+
if self.PGOPT['uncnt'] > 0:
|
|
560
|
+
if detail: detail += " & "
|
|
561
|
+
detail += "{} Glade Only".format(self.PGOPT['uncnt'])
|
|
562
|
+
if self.PGOPT['uwcnt'] > 0:
|
|
563
|
+
if detail: detail += " & "
|
|
564
|
+
detail += "{} Web".format(self.PGOPT['uwcnt'])
|
|
565
|
+
if self.PGOPT['uscnt'] > 0:
|
|
566
|
+
if detail: detail += " & "
|
|
567
|
+
detail += "{} Saved".format(self.PGOPT['uscnt'])
|
|
568
|
+
if self.PGOPT['qbcnt'] > 0:
|
|
569
|
+
if detail: detail += " & "
|
|
570
|
+
detail += "{} Quasar Backup".format(self.PGOPT['qbcnt'])
|
|
571
|
+
if self.PGOPT['qdcnt'] > 0:
|
|
572
|
+
if detail: detail += " & "
|
|
573
|
+
detail += "{} Quasar Drdata".format(self.PGOPT['qdcnt'])
|
|
574
|
+
self.ACTSTR = "{} {}".format(ucnt, atype)
|
|
575
|
+
self.TOPMSG += self.ACTSTR
|
|
576
|
+
if detail: self.TOPMSG += " ({})".format(detail)
|
|
577
|
+
if dscnt > 1:
|
|
578
|
+
self.pglog("{} datasets: {}".format(dscnt, self.TOPMSG), self.PGOPT['emlsum'])
|
|
579
|
+
self.SUBJECT = "DSUPDT of "
|
|
580
|
+
if self.PGOPT['AUTODS'] < 2:
|
|
581
|
+
self.SUBJECT += self.params['DS'].upper()
|
|
582
|
+
else:
|
|
583
|
+
self.SUBJECT += "{} Datasets".format(self.PGOPT['AUTODS'])
|
|
584
|
+
if self.PGOPT['UCNTL']:
|
|
585
|
+
self.reset_control_time()
|
|
586
|
+
if self.SUBJECT: self.SUBJECT += "-C{}".format(self.PGOPT['UCNTL']['cindex'])
|
|
587
|
+
|
|
588
|
+
# renew internal version number for given dataset
|
|
589
|
+
def renew_internal_version(self, dsid, vcnt):
|
|
590
|
+
s = 's' if vcnt > 1 else ''
|
|
591
|
+
cmd = "dsarch {} SV -NV -DE '{} Data file{} rearchived'".format(dsid, vcnt, s)
|
|
592
|
+
if self.pgsystem(cmd, self.PGOPT['emerol'], 5): # 1 + 4
|
|
593
|
+
pgrec = self.pgget('dsvrsn', '*', "dsid = '{}' and status = 'A'".format(dsid), self.PGOPT['emerol'])
|
|
594
|
+
if pgrec:
|
|
595
|
+
vmsg = "set to {} for DOI {}".format(pgrec['iversion'], pgrec['doi'])
|
|
596
|
+
else:
|
|
597
|
+
vmsg = 'renewed'
|
|
598
|
+
self.pglog("{}: {} Data file{} rearchived, Internal version number {}".format(dsid, vcnt, s, vmsg), self.PGOPT['emlsum'])
|
|
599
|
+
|
|
600
|
+
# cach the total count of files to be archived
|
|
601
|
+
def count_caching(self, locrec, locinfo):
|
|
602
|
+
files = self.expand_serial_pattern(locrec['locfile'])
|
|
603
|
+
scnt = len(files) if files else 1
|
|
604
|
+
if self.ALLCNT > 1:
|
|
605
|
+
ecnt = self.ALLCNT
|
|
606
|
+
else:
|
|
607
|
+
tinfo = self.TEMPINFO[locrec['lindex']] = self.get_tempinfo(locrec, locinfo, 0)
|
|
608
|
+
ecnt = len(tinfo['ED']) if tinfo else 1
|
|
609
|
+
return ecnt * scnt
|
|
610
|
+
|
|
611
|
+
# gather/archive due data file for update of each local file
|
|
612
|
+
def file_update(self, locrec, logact, caching = 0):
|
|
613
|
+
lfile = locrec['locfile']
|
|
614
|
+
endonly = retcnt = 0
|
|
615
|
+
lindex = locrec['lindex']
|
|
616
|
+
loccnd = "lindex = {}".format(lindex)
|
|
617
|
+
locinfo = "{}-L{}".format(locrec['dsid'], lindex)
|
|
618
|
+
if not lfile:
|
|
619
|
+
if caching:
|
|
620
|
+
return None
|
|
621
|
+
else:
|
|
622
|
+
return self.pglog(locinfo + ": local file name NOT specified", self.PGOPT['emlerr'])
|
|
623
|
+
locinfo += "-" + lfile
|
|
624
|
+
if locrec['specialist'] != self.params['LN']:
|
|
625
|
+
if caching:
|
|
626
|
+
return None
|
|
627
|
+
else:
|
|
628
|
+
return self.pglog("{}: owner '{}', NOT '{}'".format(locinfo, locrec['specialist'], self.params['LN']), self.PGOPT['emlerr'])
|
|
629
|
+
if caching: return self.count_caching(locrec, locinfo)
|
|
630
|
+
tempinfo = self.TEMPINFO[lindex] if lindex in self.TEMPINFO else self.get_tempinfo(locrec, locinfo, 0)
|
|
631
|
+
if not tempinfo: return 0 # simply return if miss temporal info for update
|
|
632
|
+
rmtcnd = loccnd
|
|
633
|
+
rcnd = self.file_condition('drupdt', ('D' if 'DO' in self.params else "RS"), None, 1)
|
|
634
|
+
if rcnd: rmtcnd += " AND " + rcnd
|
|
635
|
+
rmtrecs = self.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", self.PGOPT['extlog'])
|
|
636
|
+
rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0
|
|
637
|
+
if rcnt == 0:
|
|
638
|
+
if rcnd and self.pgget("drupdt", "", loccnd):
|
|
639
|
+
return self.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), self.PGOPT['emlerr'])
|
|
640
|
+
# create a empty record remote file
|
|
641
|
+
rcnt = 1
|
|
642
|
+
rmtrecs = {'lindex': [lindex], 'dindex': [0]}
|
|
643
|
+
rflds = ['remotefile', 'serverfile', 'download', 'begintime', 'endtime', 'tinterval']
|
|
644
|
+
for rfld in rflds: rmtrecs[rfld] = [None]
|
|
645
|
+
if rcnt == 1:
|
|
646
|
+
if 'RF' in self.params and len(self.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and self.params['RF'][0] == rmtrecs['remotefile'][0]):
|
|
647
|
+
rmtrecs['remotefile'][0] = self.params['RF'][0]
|
|
648
|
+
if 'SF' in self.params and len(self.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and self.params['SF'][0] == rmtrecs['serverfile'][0]):
|
|
649
|
+
rmtrecs['serverfile'][0] = self.params['SF'][0]
|
|
650
|
+
ecnt = self.ALLCNT if self.ALLCNT > 1 else len(tempinfo['ED']) # should be at least one
|
|
651
|
+
if self.PGSIG['MPROC'] > 1:
|
|
652
|
+
pname = "updt{}".format(lindex)
|
|
653
|
+
pid = self.start_child(pname, self.PGOPT['wrnlog'], 1) # try to start a child process
|
|
654
|
+
if pid <= 0: return pid # failed to start a child process
|
|
655
|
+
if self.PGSIG['PPID'] > 1:
|
|
656
|
+
self.set_email() # empty email in child process
|
|
657
|
+
self.PGOPT['acnt'] = self.PGOPT['ucnt'] = 0
|
|
658
|
+
else:
|
|
659
|
+
edate = tempinfo['ED'][0]
|
|
660
|
+
ehour = tempinfo['EH'][0]
|
|
661
|
+
lfile = self.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ'])
|
|
662
|
+
locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile)
|
|
663
|
+
if ecnt > 1: locinfo += ", {} Update Periods".format(ecnt)
|
|
664
|
+
self.pglog("CPID {} for 'dsupdt {}' of {}".format(self.pname2cpid(pname), self.PGOPT['CACT'], locinfo), self.PGOPT['emllog'])
|
|
665
|
+
return 1 # no further action in non-daemon program
|
|
666
|
+
if self.lock_update(lindex, locinfo, 1, self.PGOPT['emllog']) <= 0: return 0
|
|
667
|
+
self.PGOPT['lindex'] = lindex
|
|
668
|
+
tempinfo['prcmd'] = self.params['PR'][0] if 'PR' in self.params else locrec['processremote']
|
|
669
|
+
tempinfo['blcmd'] = self.params['BC'][0] if 'BC' in self.params else locrec['buildcmd']
|
|
670
|
+
postcnt = -1
|
|
671
|
+
if self.PGOPT['UCNTL'] and self.PGOPT['CACT'] == self.PGOPT['UCNTL']['action']:
|
|
672
|
+
tempinfo['postcmd'] = self.params['XC'][0] if 'XC' in self.params else self.PGOPT['UCNTL']['execcmd']
|
|
673
|
+
if tempinfo['postcmd']: postcnt = 0
|
|
674
|
+
setmiss = 1 if tempinfo['VD'] else 0
|
|
675
|
+
ufile = uinfo = None
|
|
676
|
+
rscnt = ucnt = lcnt = 0
|
|
677
|
+
for i in range(ecnt):
|
|
678
|
+
if self.ALLCNT > 1 and i > 0:
|
|
679
|
+
tempinfo = self.get_tempinfo(locrec, locinfo, i)
|
|
680
|
+
if not tempinfo: break
|
|
681
|
+
edate = tempinfo['ED'][0]
|
|
682
|
+
ehour = tempinfo['EH'][0]
|
|
683
|
+
else:
|
|
684
|
+
edate = tempinfo['ED'][i]
|
|
685
|
+
ehour = tempinfo['EH'][i]
|
|
686
|
+
if 'RE' in self.params and i and self.diffdatehour(edate, ehour, tempinfo['edate'], tempinfo['ehour']) <= 0:
|
|
687
|
+
continue
|
|
688
|
+
if ucnt and tempinfo['RS'] == 1 and i%20 == 0: self.refresh_metadata(locrec['dsid'])
|
|
689
|
+
tempinfo['edate'] = edate
|
|
690
|
+
if ehour != None:
|
|
691
|
+
tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour)
|
|
692
|
+
tempinfo['ehour'] = ehour
|
|
693
|
+
else:
|
|
694
|
+
tempinfo['einfo'] = "end data date {}".format(edate)
|
|
695
|
+
tempinfo['ehour'] = None
|
|
696
|
+
if 'GZ' in self.params: tempinfo['einfo'] += "(UTC)"
|
|
697
|
+
locfiles = self.get_local_names(locrec['locfile'], tempinfo)
|
|
698
|
+
lcnt = len(locfiles) if locfiles else 0
|
|
699
|
+
if not lcnt: break
|
|
700
|
+
rmtcnt = acnt = ccnt = ut = 0
|
|
701
|
+
rfiles = rfile = None
|
|
702
|
+
if tempinfo['RS'] == 0 and lcnt > 2: tempinfo['RS'] = 1
|
|
703
|
+
for l in range(lcnt):
|
|
704
|
+
if self.PGLOG['DSCHECK'] and ((l+1)%20) == 0:
|
|
705
|
+
self.add_dscheck_dcount(20, 0, self.PGOPT['extlog'])
|
|
706
|
+
lfile = locfiles[l]
|
|
707
|
+
locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile)
|
|
708
|
+
tempinfo['gotnew'] = tempinfo['archived'] = 0
|
|
709
|
+
tempinfo['ainfo'] = None
|
|
710
|
+
tempinfo['ainfo'] = self.file_archive_info(lfile, locrec, tempinfo)
|
|
711
|
+
if not tempinfo['ainfo']: continue
|
|
712
|
+
if tempinfo['ainfo']['archived'] == tempinfo['ainfo']['archcnt']:
|
|
713
|
+
ufile = "{} at {} {}".format(lfile, tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime'])
|
|
714
|
+
tempinfo['archived'] = 1
|
|
715
|
+
if 'MO' in self.params:
|
|
716
|
+
if self.params['MO'] < 0:
|
|
717
|
+
self.pglog("{}: {} already for {}".format(locinfo, self.PGOPT['CACT'], tempinfo['einfo']), self.PGOPT['emlsum'])
|
|
718
|
+
if i == 0: self.pglog("Add Mode option -RA if you want to re-archive", self.PGOPT['wrnlog'])
|
|
719
|
+
if 'UT' in self.params or 'ED' not in self.params: ut = 1
|
|
720
|
+
retcnt += 1
|
|
721
|
+
continue
|
|
722
|
+
else:
|
|
723
|
+
if self.PGOPT['ACTS']&self.OPTS['AF'][0]: uinfo = locinfo
|
|
724
|
+
self.pglog("{}: {} for {}".format(locinfo, self.PGOPT['CACT'], tempinfo['einfo']), logact)
|
|
725
|
+
if not self.change_workdir(locrec['workdir'], locinfo, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']):
|
|
726
|
+
break
|
|
727
|
+
if self.PGOPT['ACTS']&self.OPTS['AF'][0]: self.PGOPT['acnt'] += 1
|
|
728
|
+
if self.PGOPT['ACTS']&self.OPTS['BL'][0]: self.PGOPT['lcnt'] += 1
|
|
729
|
+
opt = 1 if tempinfo['AQ'] else 65 # 1+64(remove small file)
|
|
730
|
+
linfo = self.check_local_file(lfile, opt, self.PGOPT['emerol'])
|
|
731
|
+
cnt = -1
|
|
732
|
+
if rmtcnt > 0:
|
|
733
|
+
cnt = rmtcnt
|
|
734
|
+
rfile = rfiles[l]
|
|
735
|
+
else:
|
|
736
|
+
dr = 1 if self.PGOPT['ACTS']&self.OPTS['PB'][0] else 0
|
|
737
|
+
if linfo and self.PGOPT['CACT'] == "BL" and not tempinfo['prcmd']: dr = 0 # skip download for BL only
|
|
738
|
+
if dr:
|
|
739
|
+
dfiles = None
|
|
740
|
+
for j in range(rcnt): # processs each remote record
|
|
741
|
+
pgrec = self.onerecord(rmtrecs, j)
|
|
742
|
+
if dfiles and pgrec['remotefile'] == rfile and not self.PGOPT['mcnt']:
|
|
743
|
+
continue # skip
|
|
744
|
+
rfile = pgrec['remotefile']
|
|
745
|
+
act = 0 if locrec['action'] == 'AQ' else self.PGOPT['ACTS']&self.OPTS['DR'][0]
|
|
746
|
+
dfiles = self.download_remote_files(pgrec, lfile, linfo, locrec, locinfo, tempinfo, act)
|
|
747
|
+
if self.PGOPT['rstat'] < 0:
|
|
748
|
+
i = ecnt
|
|
749
|
+
break
|
|
750
|
+
if dfiles: rfiles = self.joinarray(rfiles, dfiles)
|
|
751
|
+
rmtcnt = len(rfiles) if rfiles else 0
|
|
752
|
+
if rmtcnt > 0:
|
|
753
|
+
if lcnt > 1 and rmtcnt != lcnt:
|
|
754
|
+
self.pglog("{}: {} files found for {} local files".format(locrec['locinfo'], rmtcnt, lcnt), self.PGOPT['emlerr'])
|
|
755
|
+
i = ecnt
|
|
756
|
+
break
|
|
757
|
+
cnt = rmtcnt
|
|
758
|
+
rfile = rfiles[l] if lcnt > 1 else rfiles[rmtcnt-1] # record the break remote file name
|
|
759
|
+
else:
|
|
760
|
+
rfile = None
|
|
761
|
+
if linfo and self.PGOPT['rstat'] == 0: self.PGOPT['rstat'] = 1
|
|
762
|
+
if cnt != 0 and self.PGOPT['rstat'] > 0:
|
|
763
|
+
if self.PGOPT['ACTS']&(self.OPTS['BL'][0]|self.OPTS['AF'][0]):
|
|
764
|
+
if cnt < 0 and linfo:
|
|
765
|
+
if tempinfo['archived'] and self.PGOPT['CACT'] == "UF" and not tempinfo['gotnew']:
|
|
766
|
+
if self.PGOPT['ACTS']&self.OPTS['AF'][0] and 'RA' not in self.params:
|
|
767
|
+
self.pglog(lfile + ": local file archived already", self.PGOPT['emllog'])
|
|
768
|
+
cnt = 0
|
|
769
|
+
else:
|
|
770
|
+
if self.PGOPT['ACTS']&self.OPTS['BL'][0]:
|
|
771
|
+
self.pglog(lfile + ": local file exists already", self.PGOPT['emllog'])
|
|
772
|
+
cnt = 1
|
|
773
|
+
elif rmtcnt == lcnt and lfile == rfile:
|
|
774
|
+
if self.PGOPT['ACTS']&self.OPTS['BL'][0]:
|
|
775
|
+
self.pglog(lfile + ": local file same as remote file", self.PGOPT['emllog'])
|
|
776
|
+
elif not (self.PGOPT['ACTS']&self.OPTS['BL'][0]):
|
|
777
|
+
self.pglog(lfile + ": local file not built yet", self.PGOPT['emlerr'])
|
|
778
|
+
cnt = 0
|
|
779
|
+
else:
|
|
780
|
+
cnt = self.build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l)
|
|
781
|
+
if cnt and 'lfile' in tempinfo:
|
|
782
|
+
lfile = tempinfo['lfile']
|
|
783
|
+
del tempinfo['lfile']
|
|
784
|
+
if cnt != 0 and (self.PGOPT['ACTS']&self.OPTS['AF'][0]):
|
|
785
|
+
self.file_status_info(lfile, rfile, tempinfo)
|
|
786
|
+
cnt = self.archive_data_file(lfile, locrec, tempinfo, i)
|
|
787
|
+
if cnt > 0:
|
|
788
|
+
ucnt += 1
|
|
789
|
+
if tempinfo['RS'] == 1: rscnt += 1
|
|
790
|
+
if postcnt > -1: postcnt += 1
|
|
791
|
+
elif cnt > 0:
|
|
792
|
+
cnt = 0
|
|
793
|
+
if cnt > 0 and self.PGOPT['rstat'] > 0:
|
|
794
|
+
ccnt += 1
|
|
795
|
+
elif 'UT' in self.params or tempinfo['archived']:
|
|
796
|
+
ut = 1
|
|
797
|
+
if cnt > 0: acnt += 1
|
|
798
|
+
if self.PGLOG['DSCHECK']:
|
|
799
|
+
self.add_dscheck_dcount(lcnt%20, 0, self.PGOPT['extlog'])
|
|
800
|
+
if ccnt == lcnt and (self.PGOPT['ACTS']&self.OPTS['CF'][0]) and locrec['cleancmd']:
|
|
801
|
+
if tempinfo['CVD'] and self.diffdate(edate, tempinfo['CVD']) > 0:
|
|
802
|
+
self.clean_older_files(locrec['cleancmd'], locrec['workdir'], locinfo, tempinfo['CVD'], locrec['locfile'], rmtrecs, rcnt, tempinfo)
|
|
803
|
+
else:
|
|
804
|
+
if not rfiles and rcnt and locrec['cleancmd'].find(' -RF') > -1:
|
|
805
|
+
rfiles = self.get_all_remote_files(rmtrecs, rcnt, tempinfo, edate)
|
|
806
|
+
self.clean_files(locrec['cleancmd'], edate, ehour, locfiles, rfiles, tempinfo['FQ'])
|
|
807
|
+
if self.PGOPT['ACTS']&self.OPTS['AF'][0] or self.PGOPT['UCNTL'] and self.PGOPT['CACT'] == self.PGOPT['UCNTL']['action']:
|
|
808
|
+
rmonly = 1 if self.PGOPT['rstat'] > 0 else 0
|
|
809
|
+
if ccnt == lcnt:
|
|
810
|
+
self.reset_update_time(locinfo, locrec, tempinfo, ccnt, endonly)
|
|
811
|
+
elif ut:
|
|
812
|
+
self.reset_update_time(locinfo, locrec, tempinfo, acnt, endonly)
|
|
813
|
+
else:
|
|
814
|
+
if self.PGOPT['rstat'] == 0:
|
|
815
|
+
if tempinfo['VD'] and self.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) < 0:
|
|
816
|
+
self.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update
|
|
817
|
+
self.PGOPT['rstat'] = 1 # reset remote download status
|
|
818
|
+
elif 'IE' in self.params:
|
|
819
|
+
if tempinfo['VD'] and self.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) >= 0:
|
|
820
|
+
endonly = 1
|
|
821
|
+
self.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update
|
|
822
|
+
self.PGOPT['rstat'] = 1 # reset remote download status
|
|
823
|
+
if setmiss: setmiss = self.set_miss_time(lfile, locrec, tempinfo, rmonly)
|
|
824
|
+
if postcnt > 0:
|
|
825
|
+
postcmd = self.executable_command(self.replace_pattern(tempinfo['postcmd'], edate, ehour, tempinfo['FQ']),
|
|
826
|
+
lfile, self.params['DS'], edate, ehour)
|
|
827
|
+
self.pgsystem(postcmd, self.PGOPT['emllog'], 5)
|
|
828
|
+
postcnt = 0
|
|
829
|
+
if rscnt >= self.PGOPT['RSMAX']:
|
|
830
|
+
self.refresh_metadata(locrec['dsid'])
|
|
831
|
+
rscnt = 0
|
|
832
|
+
if self.PGOPT['rstat'] < -1 or self.PGOPT['rstat'] < 0 and 'QE' in self.params: break # unrecoverable errors
|
|
833
|
+
if rscnt > 0: self.refresh_metadata(locrec['dsid'])
|
|
834
|
+
if ufile and uinfo and ucnt == 0:
|
|
835
|
+
self.pglog("{}: Last successful update - {}".format(uinfo, ufile), self.PGOPT['emlsum'])
|
|
836
|
+
self.lock_update(lindex, locinfo, 0, self.PGOPT['errlog'])
|
|
837
|
+
self.PGOPT['lindex'] = 0
|
|
838
|
+
return retcnt
|
|
839
|
+
|
|
840
|
+
# refresh the gathered metadata with speed up option -R and -S
|
|
841
|
+
def refresh_metadata(self, dsid):
|
|
842
|
+
sx = "{} -d {} -r".format(self.PGOPT['scm'], dsid)
|
|
843
|
+
if self.PGOPT['wtidx']:
|
|
844
|
+
if 0 in self.PGOPT['wtidx']:
|
|
845
|
+
self.pgsystem(sx + 'w all', self.PGOPT['emllog'], 5)
|
|
846
|
+
else:
|
|
847
|
+
for tidx in self.PGOPT['wtidx']:
|
|
848
|
+
self.pgsystem("{}w {}".format(sx, tidx), self.PGOPT['emllog'], 5)
|
|
849
|
+
self.PGOPT['wtidx'] = {}
|
|
850
|
+
|
|
851
|
+
# retrieve remote files# act: > 0 - create filenames and get data files physically; 0 - create filenames only
|
|
852
|
+
def download_remote_files(self, rmtrec, lfile, linfo, locrec, locinfo, tempinfo, act = 0):
|
|
853
|
+
emlsum = self.PGOPT['emlsum'] if self.PGOPT['CACT'] == "DR" else self.PGOPT['emllog']
|
|
854
|
+
rfile = rmtrec['remotefile']
|
|
855
|
+
rmtinfo = locinfo
|
|
856
|
+
dfiles = []
|
|
857
|
+
if not rfile:
|
|
858
|
+
rfile = lfile
|
|
859
|
+
rcnt = 1
|
|
860
|
+
if rfile != locrec['locfile']: rmtinfo += "-" + rfile
|
|
861
|
+
if act:
|
|
862
|
+
tempinfo['DC'] = (self.params['DC'][0] if 'DC' in self.params and self.params['DC'][0] else
|
|
863
|
+
(rmtrec['download'] if rmtrec['download'] else locrec['download']))
|
|
864
|
+
rfiles = self.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo)
|
|
865
|
+
rcnt = len(rfiles) if rfiles else 0
|
|
866
|
+
if rcnt == 0:
|
|
867
|
+
self.PGOPT['rstat'] = -2
|
|
868
|
+
return self.pglog(rmtinfo + ": NO remote file name identified", self.PGOPT['emlerr'])
|
|
869
|
+
self.PGOPT['rcnt'] += rcnt # accumulate remote file counts
|
|
870
|
+
if tempinfo['DC']: tempinfo['DC'] = None
|
|
871
|
+
if act: # get file names on remote server and create download command
|
|
872
|
+
sfile = rmtrec['serverfile']
|
|
873
|
+
if sfile and sfile != rfile:
|
|
874
|
+
sfiles = self.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo)
|
|
875
|
+
scnt = len(sfiles) if sfiles else 0
|
|
876
|
+
if scnt != rcnt:
|
|
877
|
+
self.PGOPT['rstat'] = -2
|
|
878
|
+
return self.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), self.PGOPT['emlerr'])
|
|
879
|
+
else:
|
|
880
|
+
sfiles = rfiles
|
|
881
|
+
scnt = rcnt
|
|
882
|
+
if tempinfo['AQ']:
|
|
883
|
+
tstr = tempinfo['AQ']
|
|
884
|
+
if tstr == 'Web':
|
|
885
|
+
rpath = "{}/{}/".format(self.PGLOG['DSDHOME'], self.params['DS'])
|
|
886
|
+
else:
|
|
887
|
+
rpath = "{}/{}/{}/".format(self.PGLOG['DECSHOME'], self.params['DS'], tempinfo['ST'])
|
|
888
|
+
else:
|
|
889
|
+
tstr = 'Remote'
|
|
890
|
+
rpath = ''
|
|
891
|
+
ks = 1 if 'KS' in self.params else 0
|
|
892
|
+
self.PGOPT['mcnt'] = ocnt = ecnt = scnt = dcnt = ncnt = 0
|
|
893
|
+
omsize = self.PGLOG['MINSIZE']
|
|
894
|
+
if 'VS' in tempinfo and 'VS' not in self.params: self.PGLOG['MINSIZE'] = tempinfo['VS']
|
|
895
|
+
for i in range(rcnt):
|
|
896
|
+
rfile = rfiles[i]
|
|
897
|
+
rname = rfile['fname']
|
|
898
|
+
rcmd = rfile['rcmd']
|
|
899
|
+
rinfo = self.check_local_file(rpath + rname, 65, self.PGOPT['emerol']) # 65 = 1 + 64
|
|
900
|
+
gotnew = 0
|
|
901
|
+
if not act:
|
|
902
|
+
if rinfo:
|
|
903
|
+
dfiles.append(rname)
|
|
904
|
+
dcnt += 1
|
|
905
|
+
else:
|
|
906
|
+
ecnt += 1
|
|
907
|
+
if rfile['amiss']:
|
|
908
|
+
self.pglog(rname + ": SKIP for NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr'])
|
|
909
|
+
self.PGOPT['mcnt'] += 1
|
|
910
|
+
elif 'IE' in self.params:
|
|
911
|
+
self.pglog(rname + ": NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr'])
|
|
912
|
+
self.PGOPT['rstat'] = -1
|
|
913
|
+
else:
|
|
914
|
+
self.pglog(rname + ": ERROR for NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr'])
|
|
915
|
+
self.PGOPT['rstat'] = -2
|
|
916
|
+
break
|
|
917
|
+
continue
|
|
918
|
+
elif rinfo and 'RD' not in self.params:
|
|
919
|
+
if not rcmd:
|
|
920
|
+
dfiles.append(rname)
|
|
921
|
+
dcnt += 1
|
|
922
|
+
if tempinfo['archived']:
|
|
923
|
+
if 'CN' not in self.params:
|
|
924
|
+
ocnt += 1
|
|
925
|
+
elif self.cmptime(rinfo['date_modified'], rinfo['time_modified'], tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) < 1:
|
|
926
|
+
ocnt += 1
|
|
927
|
+
self.pglog("{}: ARCHIVED, NO newer remote file {} found".format(lfile, rname), self.PGOPT['emllog'])
|
|
928
|
+
continue
|
|
929
|
+
elif 'CN' in self.params:
|
|
930
|
+
if rfile['ready'] == -1: # out of check new period already
|
|
931
|
+
dfiles.append(rname)
|
|
932
|
+
dcnt += 1
|
|
933
|
+
if tempinfo['archived']: ocnt += 1
|
|
934
|
+
continue
|
|
935
|
+
elif self.cmptime(rinfo['date_modified'], rinfo['time_modified'], rfile['date'], rfile['time']) >= 0:
|
|
936
|
+
dfiles.append(rname)
|
|
937
|
+
dcnt += 1
|
|
938
|
+
if tempinfo['archived']:
|
|
939
|
+
ocnt += 1
|
|
940
|
+
else:
|
|
941
|
+
self.pglog(rname + ": IS local already", self.PGOPT['emllog'])
|
|
942
|
+
continue
|
|
943
|
+
sfile = sfiles[i]
|
|
944
|
+
sname = sfile['fname']
|
|
945
|
+
sinfo = rinfo if sname == rname else self.check_local_file(sname, 65, self.PGOPT['emerol'])
|
|
946
|
+
dact = self.get_download_action(rcmd)
|
|
947
|
+
rdcnt = 1 if re.search(r'(ncftpget|wget) ', dact) else 0
|
|
948
|
+
dcmd = derr = ""
|
|
949
|
+
info0 = cfile = pcmd = bname = None
|
|
950
|
+
ftype = "remote" if sname == rname else "server"
|
|
951
|
+
if sinfo:
|
|
952
|
+
if rcmd:
|
|
953
|
+
if 'RD' in self.params:
|
|
954
|
+
self.pglog(sname + ": ftype file is local, Try dact again", self.PGOPT['emllog'])
|
|
955
|
+
elif ('CN' not in self.params and
|
|
956
|
+
self.cmptime(sinfo['date_modified'], sinfo['time_modified'], sfile['date'], sfile['time']) >= 0):
|
|
957
|
+
rcmd = None # do not need download again
|
|
958
|
+
else:
|
|
959
|
+
self.pglog("{}: USE the local copy of {} file for NO download command".format(sname, ftype), self.PGOPT['emllog'])
|
|
960
|
+
elif not rcmd:
|
|
961
|
+
if tempinfo['archived']:
|
|
962
|
+
ocnt += 1
|
|
963
|
+
self.pglog("{}: ARCHIVED, NO need get {} file {} again for NO download command".format(lfile, ftype, sname), emlsum)
|
|
964
|
+
else:
|
|
965
|
+
ecnt += 1
|
|
966
|
+
if rfile['amiss']:
|
|
967
|
+
self.pglog(rname + ": SKIP missing remote file for NO download command", self.PGOPT['emlerr'])
|
|
968
|
+
self.PGOPT['mcnt'] += 1
|
|
969
|
+
elif 'IE' in self.params:
|
|
970
|
+
self.pglog(rname + ": MISS remote file for NO download command", self.PGOPT['emlerr'])
|
|
971
|
+
self.PGOPT['rstat'] = -1
|
|
972
|
+
else:
|
|
973
|
+
self.pglog(rname + ": ERROR missing remote file for NO download command", self.PGOPT['emlerr'])
|
|
974
|
+
self.PGOPT['rstat'] = -2
|
|
975
|
+
break
|
|
976
|
+
continue
|
|
977
|
+
if rcmd: # try to download now
|
|
978
|
+
if not sfile['ready']:
|
|
979
|
+
self.PGOPT['rstat'] = 0
|
|
980
|
+
self.pglog("{}: {} file NOT Ready yet".format(sname, ftype), self.PGOPT['emllog'])
|
|
981
|
+
ecnt += 1
|
|
982
|
+
break
|
|
983
|
+
if 'CN' in self.params:
|
|
984
|
+
if sinfo:
|
|
985
|
+
cfile = sname
|
|
986
|
+
elif rinfo:
|
|
987
|
+
cfile = rname
|
|
988
|
+
info0 = rinfo
|
|
989
|
+
elif rcnt == 1 and linfo:
|
|
990
|
+
cfile = lfile
|
|
991
|
+
info0 = linfo
|
|
992
|
+
elif tempinfo['archived']:
|
|
993
|
+
cfile = ''
|
|
994
|
+
dcmd = self.executable_command(rcmd, sname, self.params['DS'], sfile['date'], sfile['hour'])
|
|
995
|
+
if tempinfo['AT']:
|
|
996
|
+
stat = self.check_agetime(dcmd, sname, tempinfo['AT'])
|
|
997
|
+
if stat <= 0:
|
|
998
|
+
self.PGOPT['rstat'] = stat
|
|
999
|
+
ecnt += 1
|
|
1000
|
+
break
|
|
1001
|
+
if cfile != None:
|
|
1002
|
+
stat = self.check_newer_file(dcmd, cfile, tempinfo['ainfo'])
|
|
1003
|
+
if stat > 0:
|
|
1004
|
+
if cfile != sname:
|
|
1005
|
+
if stat < 3: self.pglog("{}: Found newer {} file {}".format(cfile, ftype, sname), emlsum)
|
|
1006
|
+
else:
|
|
1007
|
+
if stat < 3: self.pglog("{}: Found newer {} file".format(cfile, ftype), emlsum)
|
|
1008
|
+
if stat == 2: # file redlownloaded, reget file info
|
|
1009
|
+
sinfo = self.check_local_file(sname, 64, self.PGOPT['emerol'])
|
|
1010
|
+
else: # force download file
|
|
1011
|
+
cfile = None
|
|
1012
|
+
else:
|
|
1013
|
+
if stat < 0:
|
|
1014
|
+
if self.PGOPT['STATUS']:
|
|
1015
|
+
if cfile != sname:
|
|
1016
|
+
self.pglog("{}: Error check newer {} file {}\n{}".format(cfile, ftype, sname, self.PGOPT['STATUS']), self.PGOPT['emlerr'])
|
|
1017
|
+
else:
|
|
1018
|
+
self.pglog("{}: Error check newer {} file\n{}".format(cfile, ftype, self.PGOPT['STATUS']), self.PGOPT['emlerr'])
|
|
1019
|
+
else:
|
|
1020
|
+
if cfile != sname:
|
|
1021
|
+
self.pglog("{}: Cannot check newer {} file {} via {}".format(cfile, ftype, sname, dcmd), self.PGOPT['emlsum'])
|
|
1022
|
+
else:
|
|
1023
|
+
self.pglog("{}: Cannot check newer {} file via {}".format(cfile, ftype, dcmd), self.PGOPT['emlsum'])
|
|
1024
|
+
if stat < -1: # uncrecoverable error
|
|
1025
|
+
self.PGOPT['rstat'] = stat
|
|
1026
|
+
ecnt += 1
|
|
1027
|
+
break
|
|
1028
|
+
elif cfile and cfile != sname:
|
|
1029
|
+
self.pglog("{}: NO newer {} file {} found\n{}".format(cfile, ftype, sname, self.PGOPT['STATUS']), emlsum)
|
|
1030
|
+
else:
|
|
1031
|
+
self.pglog("{}: NO newer {} file found\n{}".format(sname, ftype, self.PGOPT['STATUS']), emlsum)
|
|
1032
|
+
if tempinfo['archived']:
|
|
1033
|
+
ncnt += 1
|
|
1034
|
+
if rcnt == 1: continue
|
|
1035
|
+
if not info0: info0 = sinfo
|
|
1036
|
+
sinfo = None
|
|
1037
|
+
if not cfile:
|
|
1038
|
+
if op.isfile(sname) and self.pgsystem("mv -f {} {}.rd".format(sname, sname), self.PGOPT['emerol'], 4):
|
|
1039
|
+
bname = sname + ".rd"
|
|
1040
|
+
if not info0: info0 = self.check_local_file(bname, 64, self.PGOPT['emerol'])
|
|
1041
|
+
if dcmd.find('wget ') > -1: self.slow_web_access(dcmd)
|
|
1042
|
+
self.pgsystem(dcmd, self.PGOPT['wrnlog'], 257) # 1 + 256
|
|
1043
|
+
derr = self.PGLOG['SYSERR']
|
|
1044
|
+
sinfo = self.check_local_file(sname, 70, self.PGOPT['emerol'])
|
|
1045
|
+
if sinfo:
|
|
1046
|
+
mode = 0o664 if sinfo['isfile'] else 0o775
|
|
1047
|
+
if mode != sinfo['mode']: self.set_local_mode(sname, sinfo['isfile'], mode, sinfo['mode'], sinfo['logname'], self.PGOPT['emerol'])
|
|
1048
|
+
(stat, derr) = self.parse_download_error(derr, dact, sinfo)
|
|
1049
|
+
if stat < -1: # uncrecoverable error
|
|
1050
|
+
self.pglog("{}: error {}\n{}".format(sname, dcmd, derr), self.PGOPT['emlerr'])
|
|
1051
|
+
self.PGOPT['rstat'] = stat
|
|
1052
|
+
ecnt += 1
|
|
1053
|
+
break
|
|
1054
|
+
elif stat > 0 and self.PGLOG['DSCHECK'] and sinfo:
|
|
1055
|
+
self.add_dscheck_dcount(0, sinfo['data_size'], self.PGOPT['extlog'])
|
|
1056
|
+
if sinfo:
|
|
1057
|
+
if info0:
|
|
1058
|
+
if info0['data_size'] == sinfo['data_size'] and bname:
|
|
1059
|
+
if self.compare_md5sum(bname, sname, self.PGOPT['emlsum']):
|
|
1060
|
+
self.pglog("{}: GOT same size, but different content, {} file via {}".format(sname, ftype, dact), self.PGOPT['emlsum'])
|
|
1061
|
+
tempinfo['gotnew'] = gotnew = 1
|
|
1062
|
+
self.PGOPT['rdcnt'] += rdcnt
|
|
1063
|
+
scnt += 1
|
|
1064
|
+
else:
|
|
1065
|
+
self.pglog("{}: GOT same {} file via {}".format(sname, ftype, dact), emlsum)
|
|
1066
|
+
if rinfo and rname != sname and 'KS' not in self.params:
|
|
1067
|
+
self.pgsystem("rm -f " + sname, self.PGOPT['emllog'], 5)
|
|
1068
|
+
sinfo = None
|
|
1069
|
+
if tempinfo['archived']:
|
|
1070
|
+
ncnt += 1
|
|
1071
|
+
else:
|
|
1072
|
+
self.pglog("{}: GOT different {} file via {}".format(sname, ftype, dact), self.PGOPT['emlsum'])
|
|
1073
|
+
tempinfo['gotnew'] = gotnew = 1
|
|
1074
|
+
self.PGOPT['rdcnt'] += rdcnt
|
|
1075
|
+
scnt += 1
|
|
1076
|
+
if bname: self.pgsystem("rm -rf " + bname, self.PGOPT['emerol'], 4)
|
|
1077
|
+
elif rcmd:
|
|
1078
|
+
self.pglog("{}: GOT {} file via {}".format(sname, ftype, dact), emlsum)
|
|
1079
|
+
self.PGOPT['rdcnt'] += rdcnt
|
|
1080
|
+
scnt += 1
|
|
1081
|
+
self.PGOPT['dcnt'] += 1
|
|
1082
|
+
if tempinfo['prcmd']: pcmd = tempinfo['prcmd']
|
|
1083
|
+
elif info0:
|
|
1084
|
+
if bname:
|
|
1085
|
+
self.pglog("{}: RETAIN the older {} file".format(sname, ftype), emlsum)
|
|
1086
|
+
self.pgsystem("mv -f {} {}".format(bname, sname), self.PGOPT['emerol'], 4)
|
|
1087
|
+
if tempinfo['prcmd']: pcmd = tempinfo['prcmd']
|
|
1088
|
+
sinfo = info0
|
|
1089
|
+
elif cfile:
|
|
1090
|
+
if tempinfo['archived']:
|
|
1091
|
+
ocnt += 1
|
|
1092
|
+
elif rcnt == 1:
|
|
1093
|
+
if tempinfo['prcmd']: pcmd = tempinfo['prcmd']
|
|
1094
|
+
if cfile == sname:
|
|
1095
|
+
sinfo = info0
|
|
1096
|
+
elif not rinfo and cfile == lfile:
|
|
1097
|
+
continue
|
|
1098
|
+
elif not cfile:
|
|
1099
|
+
ecnt += 1
|
|
1100
|
+
if sfile['amiss']:
|
|
1101
|
+
self.pglog("{}: SKIP {} file for FAIL {}\n{}".format(sname, ftype, dact, derr), self.PGOPT['emlsum'])
|
|
1102
|
+
self.PGOPT['mcnt'] += 1
|
|
1103
|
+
else:
|
|
1104
|
+
self.PGOPT['rstat'] = 0 if 'IE' in self.params else -1
|
|
1105
|
+
if not derr or derr and derr.find(self.PGLOG['MISSFILE']) > -1:
|
|
1106
|
+
msg = "{}: NOT Available for {}\n".format(sname, dact)
|
|
1107
|
+
self.set_email(msg, self.PGOPT['emlsum'])
|
|
1108
|
+
if derr: self.pglog(derr, self.PGOPT['emllog'])
|
|
1109
|
+
else:
|
|
1110
|
+
self.pglog("{}: ERROR {}\n{}".format(sname, dact, derr), self.PGOPT['emlerr'])
|
|
1111
|
+
if self.PGOPT['rstat'] < 0: break
|
|
1112
|
+
continue
|
|
1113
|
+
else:
|
|
1114
|
+
ecnt += 1
|
|
1115
|
+
if sfile['amiss']: self.PGOPT['mcnt'] += 1
|
|
1116
|
+
continue
|
|
1117
|
+
if sinfo:
|
|
1118
|
+
if rname == sname:
|
|
1119
|
+
rinfo = sinfo
|
|
1120
|
+
elif not rinfo or gotnew:
|
|
1121
|
+
if rinfo: self.pgsystem("rm -f " + rname, self.PGOPT['emerol'], 5)
|
|
1122
|
+
if self.convert_files(rname, sname, ks, self.PGOPT['emerol']):
|
|
1123
|
+
rinfo = self.check_local_file(rname, 64, self.PGOPT['emerol'])
|
|
1124
|
+
else:
|
|
1125
|
+
self.PGOPT['rstat'] = -1
|
|
1126
|
+
ecnt += 1
|
|
1127
|
+
break
|
|
1128
|
+
if not rinfo:
|
|
1129
|
+
ecnt += 1
|
|
1130
|
+
if sfile['amiss']:
|
|
1131
|
+
self.pglog(rname + ": SKIP missing remote file", self.PGOPT['emlsum'])
|
|
1132
|
+
self.PGOPT['mcnt'] += 1
|
|
1133
|
+
elif 'IE' in self.params:
|
|
1134
|
+
self.pglog(rname + ": MISS remote file", self.PGOPT['emlerr'])
|
|
1135
|
+
self.PGOPT['rstat'] = -1
|
|
1136
|
+
else:
|
|
1137
|
+
self.pglog(rname + ": ERROR missing remote file", self.PGOPT['emlerr'])
|
|
1138
|
+
self.PGOPT['rstat'] = -2
|
|
1139
|
+
break
|
|
1140
|
+
continue
|
|
1141
|
+
if pcmd:
|
|
1142
|
+
pcmd = self.executable_command(self.replace_pattern(pcmd, rfile['date'], rfile['hour'], tempinfo['FQ']),
|
|
1143
|
+
rname, self.params['DS'], rfile['date'], rfile['hour'])
|
|
1144
|
+
if not self.pgsystem(pcmd, self.PGOPT['emllog'], 259):
|
|
1145
|
+
if self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.PGOPT['emlerr'])
|
|
1146
|
+
self.PGOPT['rstat'] = -1
|
|
1147
|
+
ecnt += 1
|
|
1148
|
+
break
|
|
1149
|
+
dfiles.append(rname)
|
|
1150
|
+
dcnt += 1
|
|
1151
|
+
self.PGLOG['MINSIZE'] = omsize
|
|
1152
|
+
if ncnt == rcnt:
|
|
1153
|
+
self.PGOPT['rstat'] = 0
|
|
1154
|
+
if dcnt > 0: dcnt = 0
|
|
1155
|
+
elif ecnt > 0:
|
|
1156
|
+
s = 's' if rcnt > 1 else ''
|
|
1157
|
+
if dcnt > scnt:
|
|
1158
|
+
self.pglog("{}/{} of {} rfile{} obtained/at local".format(scnt, dcnt, rcnt, s), self.PGOPT['emllog'])
|
|
1159
|
+
else:
|
|
1160
|
+
self.pglog("{} of {} rfile{} obtained".format(scnt, rcnt, s), self.PGOPT['emllog'])
|
|
1161
|
+
if dcnt > 0 and ocnt > 0: dcnt = 0
|
|
1162
|
+
elif ocnt == rcnt:
|
|
1163
|
+
self.PGOPT['rstat'] = 0
|
|
1164
|
+
return dfiles if self.PGOPT['rstat'] == 1 and dcnt > 0 else None
|
|
1165
|
+
|
|
1166
|
+
# build up local files
|
|
1167
|
+
def build_local_file(self, rfiles, lfile, linfo, locrec, tempinfo, lcnt, l):
|
|
1168
|
+
emlsum = self.PGOPT['emlsum'] if (self.PGOPT['ACTS'] == self.OPTS['BL'][0]) else self.PGOPT['emllog']
|
|
1169
|
+
if lcnt > 1:
|
|
1170
|
+
rcnt = 1
|
|
1171
|
+
rmax = l + 1
|
|
1172
|
+
else:
|
|
1173
|
+
rmax = rcnt = len(rfiles) if rfiles else 0
|
|
1174
|
+
rbfile = None
|
|
1175
|
+
if linfo:
|
|
1176
|
+
if rcnt == 1 and lfile == rfiles[l]: return 1
|
|
1177
|
+
if self.pgsystem("mv -f {} {}".format(lfile, rbfile), self.PGOPT['emerol'], 4):
|
|
1178
|
+
rbfile = lfile + '.rb'
|
|
1179
|
+
else:
|
|
1180
|
+
s = op.dirname(lfile)
|
|
1181
|
+
if s and not op.isdir(s): self.make_local_directory(s, self.PGOPT['emllog']|self.EXITLG)
|
|
1182
|
+
cext = None
|
|
1183
|
+
if locrec['options']:
|
|
1184
|
+
ms = re.search(r'-AF\s+([\w\.]+)', locrec['options'], re.I)
|
|
1185
|
+
if ms:
|
|
1186
|
+
fmt = ms.group(1)
|
|
1187
|
+
ms = re.search(r'(\w+)\.TAR(\.|$)', fmt, re.I)
|
|
1188
|
+
if ms: # check compression before tarring
|
|
1189
|
+
fmt = ms.group(1)
|
|
1190
|
+
ms = re.match(r'^({})$'.format(self.CMPSTR), fmt, re.I)
|
|
1191
|
+
if ms: cext = '.' + fmt
|
|
1192
|
+
if tempinfo['blcmd']:
|
|
1193
|
+
blcmd = self.executable_command(self.replace_pattern(tempinfo['blcmd'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']),
|
|
1194
|
+
lfile, self.params['DS'], tempinfo['edate'], tempinfo['ehour'])
|
|
1195
|
+
if not self.pgsystem(blcmd, self.PGOPT['emllog']) or self.local_file_size(lfile, 2, self.PGOPT['emerol']) <= 0:
|
|
1196
|
+
ret = self.pglog("{}: error build {}".format(blcmd, lfile), self.PGOPT['emlerr'])
|
|
1197
|
+
else:
|
|
1198
|
+
self.PGOPT['bcnt'] += 1
|
|
1199
|
+
ret = 1
|
|
1200
|
+
if rbfile:
|
|
1201
|
+
if ret:
|
|
1202
|
+
self.pgsystem("rm -rf " + rbfile, self.PGOPT['emerol'], 4)
|
|
1203
|
+
else:
|
|
1204
|
+
self.pglog(lfile + ": RETAIN the older local file", emlsum)
|
|
1205
|
+
self.pgsystem("mv -f {} {}".format(rbfile, lfile), self.PGOPT['emerol'], 4)
|
|
1206
|
+
return ret
|
|
1207
|
+
if lfile[0] == '!': # executable for build up local file name
|
|
1208
|
+
blcmd = self.executable_command(lfile[1:], None, self.params['DS'], tempinfo['edate'], tempinfo['ehour'])
|
|
1209
|
+
lfile = self.pgsystem(blcmd, self.PGOPT['emllog'], 21)
|
|
1210
|
+
if lfile and self.local_file_size(lfile, 2, self.PGOPT['emerol']) > 0:
|
|
1211
|
+
tempinfo['lfile'] = lfile
|
|
1212
|
+
return 1
|
|
1213
|
+
else:
|
|
1214
|
+
return self.pglog("{}: error build {}".format(blcmd, lfile), self.PGOPT['emlerr'])
|
|
1215
|
+
if rcnt == 0 and not linfo: return 0 # no remote file found to build local file
|
|
1216
|
+
ret = 1
|
|
1217
|
+
kr = 1 if 'KR' in self.params else 0
|
|
1218
|
+
if rcnt == 1 and not op.isdir(rfiles[l]):
|
|
1219
|
+
rfile = rfiles[l]
|
|
1220
|
+
else:
|
|
1221
|
+
ms = re.match(r'^(.+)\.({})$'.format(self.CMPSTR), lfile, re.I)
|
|
1222
|
+
rfile = ms.group(1) if ms else lfile
|
|
1223
|
+
fd = None
|
|
1224
|
+
if tempinfo['AQ']:
|
|
1225
|
+
if not self.validate_one_infile(rfile, self.params['DS']): return 0
|
|
1226
|
+
fd = open(rfile, 'w')
|
|
1227
|
+
fd.write(tempinfo['AQ'] + "File\n")
|
|
1228
|
+
for i in range(rmax):
|
|
1229
|
+
tfile = rfiles[i]
|
|
1230
|
+
if fd:
|
|
1231
|
+
fd.write(tfile + "\n")
|
|
1232
|
+
continue
|
|
1233
|
+
if op.isfile(tfile) and cext and not re.search(r'{}$'.format(cext), tfile, re.I):
|
|
1234
|
+
ms = re.match(r'^(.+)\.({})$'.format(self.CMPSTR), tfile, re.I)
|
|
1235
|
+
if ms: tfile = ms.group(1)
|
|
1236
|
+
tfile += cext
|
|
1237
|
+
if not self.convert_files(tfile, rfiles[i], kr, self.PGOPT['emllog']):
|
|
1238
|
+
if op.exists(rfile): self.pgsystem("rm -f " + rfile, self.PGOPT['emllog'])
|
|
1239
|
+
ret = self.pglog("{}: QUIT converting file from {}".format(rfile, tfile), self.PGOPT['emllog'])
|
|
1240
|
+
break
|
|
1241
|
+
cmd = "tar -{}vf {} {}".format('u' if i else 'c', rfile, tfile)
|
|
1242
|
+
ret = self.pgsystem(cmd, self.PGOPT['emllog'])
|
|
1243
|
+
if not ret: break
|
|
1244
|
+
if fd:
|
|
1245
|
+
ret = -1
|
|
1246
|
+
fd.close()
|
|
1247
|
+
if op.exists(rfile):
|
|
1248
|
+
s = "s" if rcnt > 1 else ""
|
|
1249
|
+
if tempinfo['AQ']:
|
|
1250
|
+
self.pglog("{}: input file CREATED for backing up {} {} file{}".format(rfile, rcnt, tempinfo['AQ'], s), emlsum)
|
|
1251
|
+
else:
|
|
1252
|
+
self.pglog("{}: tar file CREATED from {} file{}".format(rfile, rcnt, s), emlsum)
|
|
1253
|
+
else:
|
|
1254
|
+
ret = self.pglog(rfile + ": ERROR creating tar file", self.PGOPT['emlerr'])
|
|
1255
|
+
if ret > 0:
|
|
1256
|
+
if lfile != rfile:
|
|
1257
|
+
ret = self.convert_files(lfile, rfile, kr, self.PGOPT['emllog'])
|
|
1258
|
+
if ret: self.pglog("{}: BUILT from {}".format(lfile, rfile), emlsum)
|
|
1259
|
+
if ret:
|
|
1260
|
+
fsize = self.local_file_size(lfile, 3, self.PGOPT['emerol'])
|
|
1261
|
+
if fsize > 0:
|
|
1262
|
+
self.PGOPT['bcnt'] += 1
|
|
1263
|
+
if self.PGLOG['DSCHECK']: self.add_dscheck_dcount(0, fsize, self.PGOPT['extlog'])
|
|
1264
|
+
else:
|
|
1265
|
+
ret = 0
|
|
1266
|
+
if rbfile:
|
|
1267
|
+
if ret:
|
|
1268
|
+
self.pgsystem("rm -rf " + rbfile, self.PGOPT['emerol'], 4)
|
|
1269
|
+
else:
|
|
1270
|
+
self.pglog(lfile + ": RETAIN the older local file", emlsum)
|
|
1271
|
+
self.pgsystem("mv -f {} {}".format(rbfile, lfile), self.PGOPT['emerol'], 4)
|
|
1272
|
+
return 1 if ret else 0
|
|
1273
|
+
|
|
1274
|
+
# append data type to options for given type name if not in options
|
|
1275
|
+
def append_data_type(self, tname, options):
|
|
1276
|
+
mp = r'(^|\s)-{}(\s|$)'.format(tname)
|
|
1277
|
+
if not re.search(mp, options, re.I): options += " -{} {}".format(tname, self.DEFTYPES[tname])
|
|
1278
|
+
return options
|
|
1279
|
+
|
|
1280
|
+
# get data type from options for given type name, and default one if not in options
|
|
1281
|
+
def get_data_type(self, tname, options):
|
|
1282
|
+
mp = r'(^|\s)-{}\s+(\w)(\s|$)'.format(tname)
|
|
1283
|
+
ms = re.search(mp, options, re.I)
|
|
1284
|
+
return ms.group(2) if ms else self.DEFTYPES[tname]
|
|
1285
|
+
|
|
1286
|
+
# archive a data file
|
|
1287
|
+
def archive_data_file(self, lfile, locrec, tempinfo, eidx):
|
|
1288
|
+
growing = -1
|
|
1289
|
+
if tempinfo['ainfo']:
|
|
1290
|
+
ainfo = tempinfo['ainfo']
|
|
1291
|
+
if ainfo['vindex']: growing = self.is_growing_file(locrec['locfile'], tempinfo['FQ'])
|
|
1292
|
+
tempinfo['ainfo'] = None # clean the archive info recorded earlier
|
|
1293
|
+
else:
|
|
1294
|
+
ainfo = {'archived': 0, 'note': None} # reference to empty hash
|
|
1295
|
+
self.pglog("{}: start {} for {}".format(lfile, locrec['action'], tempinfo['einfo']), self.PGOPT['emllog'])
|
|
1296
|
+
options = locrec['options'] if locrec['options'] else ""
|
|
1297
|
+
act = locrec['action']
|
|
1298
|
+
archfile = None
|
|
1299
|
+
if locrec['archfile']: archfile = self.replace_pattern(locrec['archfile'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'])
|
|
1300
|
+
if act == 'AW':
|
|
1301
|
+
if archfile and 'wfile' not in ainfo: ainfo['wfile'] = archfile
|
|
1302
|
+
options = self.append_data_type('WT', options)
|
|
1303
|
+
elif act == 'AS':
|
|
1304
|
+
if archfile and 'sfile' not in ainfo: ainfo['sfile'] = archfile
|
|
1305
|
+
options = self.append_data_type('ST', options)
|
|
1306
|
+
elif act == 'AQ':
|
|
1307
|
+
if archfile and 'bfile' not in ainfo: ainfo['bfile'] = archfile
|
|
1308
|
+
options = self.append_data_type('QT', options)
|
|
1309
|
+
if tempinfo['archived'] and not ('RA' in self.params and growing > 0):
|
|
1310
|
+
if (ainfo['chksm'] and ainfo['chksm'] == self.PGOPT['chksm'] or
|
|
1311
|
+
ainfo['asize'] and ainfo['asize'] == self.PGOPT['fsize'] and
|
|
1312
|
+
self.cmptime(self.PGOPT['fdate'], self.PGOPT['ftime'], ainfo['adate'], ainfo['atime']) >= 0):
|
|
1313
|
+
if 'RA' not in self.params:
|
|
1314
|
+
amsg = "{}: ARCHIVED by {}".format(lfile, ainfo['adate'])
|
|
1315
|
+
if tempinfo['ehour'] != None: amsg += ":{:02}".format(ainfo['ahour'])
|
|
1316
|
+
self.pglog(amsg, self.PGOPT['emllog'])
|
|
1317
|
+
if eidx == 0: self.pglog("Add Mode option -RA if you want to re-archive", self.PGOPT['emllog'])
|
|
1318
|
+
return -1
|
|
1319
|
+
elif growing == 0:
|
|
1320
|
+
growing = -1
|
|
1321
|
+
if growing == 0: tempinfo['archived'] = self.move_archived_file(ainfo, tempinfo['archived'])
|
|
1322
|
+
if tempinfo['AQ']:
|
|
1323
|
+
ifopt = 'IF'
|
|
1324
|
+
else:
|
|
1325
|
+
ifopt = 'LF'
|
|
1326
|
+
acmd = "dsarch {} {} -{} {}".format(self.params['DS'], act, ifopt, lfile)
|
|
1327
|
+
gcmd = None
|
|
1328
|
+
if 'wfile' in ainfo: acmd += " -WF " + ainfo['wfile']
|
|
1329
|
+
if 'sfile' in ainfo: acmd += " -SF " + ainfo['sfile']
|
|
1330
|
+
if 'bfile' in ainfo: acmd += " -QF " + ainfo['bfile']
|
|
1331
|
+
if self.PGOPT['chksm']: acmd += " -MC " + self.PGOPT['chksm']
|
|
1332
|
+
if growing > 0 and not re.search(r'(^|\s)-GF(\s|$)', options, re.I): acmd += " -GF"
|
|
1333
|
+
if 'MD' in self.params and not re.search(r'(^|\s)-MD(\s|$)', options, re.I): acmd += " -MD"
|
|
1334
|
+
if not re.search(r'(^|\s)-NE(\s|$)', options, re.I): acmd += " -NE" # no email in dsarch
|
|
1335
|
+
if tempinfo['gotnew'] and not re.search(r'(^|\s)-OE(\s|$)', options, re.I): acmd += " -OE"
|
|
1336
|
+
if 'VS' in self.params:
|
|
1337
|
+
acmd += " -VS {}".format(self.params['VS'])
|
|
1338
|
+
if 'VS' in tempinfo: options = re.sub(r'-VS\s+\d+\s*', '', options, flags=re.I)
|
|
1339
|
+
if re.search(r'(^|\s)-GX(\s|$)', options, re.I):
|
|
1340
|
+
wfile = ainfo['wfile'] if 'wfile' in ainfo else ainfo['afile']
|
|
1341
|
+
ms = re.search(r'(^|\s)-DF (\w+)(\s|$)', options, re.I)
|
|
1342
|
+
fmt = ms.ms.group(2).lower() if ms else None
|
|
1343
|
+
if wfile and fmt:
|
|
1344
|
+
if fmt == "netcdf": fmt = "cf" + fmt
|
|
1345
|
+
rs = " -R -S" if tempinfo['RS'] == 1 else ''
|
|
1346
|
+
gcmd = "gatherxml -d {} -f {}{} {}".format(self.params['DS'], fmt, rs, wfile)
|
|
1347
|
+
options = re.sub(r'-GX\s*', '', options, flags=re.I)
|
|
1348
|
+
fnote = None
|
|
1349
|
+
if locrec['note'] and not re.search(r'(^|\s)-DE(\s|$)', options, re.I):
|
|
1350
|
+
note = self.build_data_note(ainfo['note'], lfile, locrec, tempinfo)
|
|
1351
|
+
if note:
|
|
1352
|
+
if re.search(r'(\n|\"|\')', note): # if found \n or ' or ", create temporary input file
|
|
1353
|
+
fnote = self.params['DS'] + ".note"
|
|
1354
|
+
nd = open(fnote, 'w')
|
|
1355
|
+
nd.write("DE<:>\n{}<:>\n".format(note))
|
|
1356
|
+
nd.close()
|
|
1357
|
+
acmd += " -IF " + fnote
|
|
1358
|
+
else:
|
|
1359
|
+
acmd += " -DE '{}'".format(note)
|
|
1360
|
+
if options:
|
|
1361
|
+
if locrec['cleancmd']: options = re.sub(r'(^-NW\s+|\s+-NW$)', '', options, 1, re.I)
|
|
1362
|
+
acmd += " " + self.replace_pattern(options, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'])
|
|
1363
|
+
ret = self.pgsystem(acmd, self.PGOPT['emerol'], 69) # 1 + 4 + 64
|
|
1364
|
+
if gcmd: self.pgsystem(gcmd, self.PGOPT['emerol'], 5)
|
|
1365
|
+
if fnote: self.pgsystem("rm -f " + fnote, self.PGOPT['emerol'], 4)
|
|
1366
|
+
tempinfo['ainfo'] = self.file_archive_info(lfile, locrec, tempinfo)
|
|
1367
|
+
note = self.count_update_files(ainfo, tempinfo['ainfo'], ret, tempinfo['RS'])
|
|
1368
|
+
self.pglog("{}: UPDATED({}) for {}".format(lfile, locrec['action'], tempinfo['einfo']), self.PGOPT['emlsum'])
|
|
1369
|
+
return ret
|
|
1370
|
+
|
|
1371
|
+
# count files updated
|
|
1372
|
+
def count_update_files(self, oinfo, ninfo, success, rsopt):
|
|
1373
|
+
nrecs = ninfo['types'] if ninfo else {}
|
|
1374
|
+
orecs = oinfo['types'] if oinfo else {}
|
|
1375
|
+
astrs = []
|
|
1376
|
+
astr = ""
|
|
1377
|
+
for type in nrecs:
|
|
1378
|
+
nrec = nrecs[type]
|
|
1379
|
+
orec = orecs[type] if type in orecs else None
|
|
1380
|
+
if 'sfile' in nrec:
|
|
1381
|
+
atype = "Saved {} File".format(self.STYPE[type])
|
|
1382
|
+
elif 'bfile' in nrec:
|
|
1383
|
+
atype = "Quasar backup {} File".format(self.BTYPE[type])
|
|
1384
|
+
else:
|
|
1385
|
+
atype = "RDA {} File".format(self.WTYPE[type])
|
|
1386
|
+
if rsopt == 1:
|
|
1387
|
+
tidx = nrec['tindex'] if nrec['tindex'] else 0
|
|
1388
|
+
self.PGOPT['wtidx'][tidx] = 1
|
|
1389
|
+
if (not orec or
|
|
1390
|
+
nrec['data_size'] != orec['data_size'] or
|
|
1391
|
+
self.cmptime(orec['date_modified'], orec['time_modified'], nrec['date_modified'], nrec['time_modified']) or
|
|
1392
|
+
not (nrec['checksum'] and orec['checksum'] and nrec['checksum'] == orec['checksum'])):
|
|
1393
|
+
if 'sfile' in nrec:
|
|
1394
|
+
self.PGOPT['uscnt'] += 1
|
|
1395
|
+
elif 'bfile' in nrec:
|
|
1396
|
+
if type == 'D': self.PGOPT['qdcnt'] += 1
|
|
1397
|
+
self.PGOPT['qbcnt'] += 1
|
|
1398
|
+
elif type == 'D':
|
|
1399
|
+
self.PGOPT['udcnt'] += 1
|
|
1400
|
+
elif type == 'N':
|
|
1401
|
+
self.PGOPT['uncnt'] += 1
|
|
1402
|
+
else:
|
|
1403
|
+
self.PGOPT['uwcnt'] += 1
|
|
1404
|
+
astrs.append("{} {}rchived".format(atype, "Re-a" if orec else "A"))
|
|
1405
|
+
if self.PGLOG['DSCHECK']:
|
|
1406
|
+
self.add_dscheck_dcount(0, nrec['data_size'], self.PGOPT['extlog'])
|
|
1407
|
+
if astrs:
|
|
1408
|
+
self.PGOPT['ucnt'] += 1
|
|
1409
|
+
if len(astrs) < ninfo['archcnt']:
|
|
1410
|
+
if success:
|
|
1411
|
+
astr = " Successful, but only "
|
|
1412
|
+
else:
|
|
1413
|
+
astr = " Partially finished, "
|
|
1414
|
+
astr += ', '.join(astrs)
|
|
1415
|
+
else:
|
|
1416
|
+
if success:
|
|
1417
|
+
astr = " Successful, but NO file Re-archived"
|
|
1418
|
+
else:
|
|
1419
|
+
astr = " Failed, NO file {}rchived".format('Re-a' if oinfo['present'] == ninfo['archcnt'] else "A")
|
|
1420
|
+
if astr:
|
|
1421
|
+
s = "s" if ninfo['archcnt'] > 1 else ""
|
|
1422
|
+
astr += " of {} archfile{}".format(ninfo['archcnt'], s)
|
|
1423
|
+
return astr
|
|
1424
|
+
|
|
1425
|
+
# get the temporal info in local and remote file names and the possible values# between the break update and the current date
|
|
1426
|
+
# BTW, change to working directory
|
|
1427
|
+
def get_tempinfo(self, locrec, locinfo, eidx = 0):
|
|
1428
|
+
# get data end date for update action
|
|
1429
|
+
edate = self.params['ED'][eidx] if ('ED' in self.params and self.params['ED'][eidx]) else locrec['enddate']
|
|
1430
|
+
if not edate: return self.pglog(locinfo + ": MISS End Data Date for local update", self.PGOPT['emlerr'])
|
|
1431
|
+
ehour = self.params['EH'][eidx] if ('EH' in self.params and self.params['EH'][eidx] != None) else locrec['endhour']
|
|
1432
|
+
if not isinstance(edate, str): edate = str(edate)
|
|
1433
|
+
if ehour is None and self.pgget('drupdt', '', "lindex = {} and tinterval like '%H'".format(locrec['lindex'])):
|
|
1434
|
+
return self.pglog(locinfo + ": MISS End Data Hour for hourly remote update", self.PGOPT['emlerr'])
|
|
1435
|
+
if locrec['validint']:
|
|
1436
|
+
val = locrec['validint']
|
|
1437
|
+
elif self.PGOPT['UCNTL'] and self.PGOPT['UCNTL']['validint']:
|
|
1438
|
+
val = self.PGOPT['UCNTL']['validint']
|
|
1439
|
+
else:
|
|
1440
|
+
val = None
|
|
1441
|
+
tempinfo = {'AT': None, 'DC': None, 'ED': [], 'EH': [], 'VI': None,
|
|
1442
|
+
'VD': None, 'VH': None, 'CVD': None, 'NX': None, 'FQ': None,
|
|
1443
|
+
'QU': None, 'EP': 0, 'RS': -1, 'AQ': None}
|
|
1444
|
+
if val: val = self.get_control_time(val, "Valid Internal")
|
|
1445
|
+
if val:
|
|
1446
|
+
tempinfo['VI'] = val
|
|
1447
|
+
if ehour is None and val[3]: ehour = 0
|
|
1448
|
+
val = self.get_control_time(locrec['agetime'], "File Age Time")
|
|
1449
|
+
if val:
|
|
1450
|
+
tempinfo['AT'] = val
|
|
1451
|
+
if ehour is None and val[3]: ehour = 0
|
|
1452
|
+
frequency = self.params['FQ'][0] if 'FQ' in self.params else locrec['frequency']
|
|
1453
|
+
if frequency: # get data update frequency info
|
|
1454
|
+
(val, unit) = self.get_control_frequency(frequency)
|
|
1455
|
+
if val:
|
|
1456
|
+
tempinfo['FQ'] = val
|
|
1457
|
+
tempinfo['QU'] = unit # update frequency unit of meassure
|
|
1458
|
+
else:
|
|
1459
|
+
locinfo = self.replace_pattern(locinfo, edate, ehour)
|
|
1460
|
+
return self.pglog("{}: {}".format(locinfo, unit), self.PGOPT['emlerr'])
|
|
1461
|
+
if locrec['endperiod']: tempinfo['EP'] = locrec['endperiod']
|
|
1462
|
+
if val[3] and ehour is None: ehour = 0
|
|
1463
|
+
edate = self.enddate(edate, tempinfo['EP'], unit, tempinfo['FQ'][6])
|
|
1464
|
+
elif 'MU' in self.params or 'CP' in self.params:
|
|
1465
|
+
locinfo = self.replace_pattern(locinfo, edate, ehour)
|
|
1466
|
+
return self.pglog(locinfo + ": MISS frequency for Update", self.PGOPT['emlerr'])
|
|
1467
|
+
val = self.get_control_time(locrec['nextdue'], "Due Internval")
|
|
1468
|
+
if val:
|
|
1469
|
+
tempinfo['NX'] = val
|
|
1470
|
+
if ehour is None and val[3]: ehour = 0
|
|
1471
|
+
# check if allow missing remote file
|
|
1472
|
+
if 'MR' in self.params and self.params['MR'][0]:
|
|
1473
|
+
tempinfo['amiss'] = self.params['MR'][0]
|
|
1474
|
+
elif locrec['missremote']:
|
|
1475
|
+
tempinfo['amiss'] = locrec['missremote']
|
|
1476
|
+
else:
|
|
1477
|
+
tempinfo['amiss'] = 'N'
|
|
1478
|
+
options = locrec['options']
|
|
1479
|
+
if locrec['action'] == 'AQ':
|
|
1480
|
+
if options:
|
|
1481
|
+
ms = re.search(r'-(ST|WT)\s+(\w)', options)
|
|
1482
|
+
if ms:
|
|
1483
|
+
if ms.group(1) == 'ST':
|
|
1484
|
+
tempinfo['AQ'] = 'Saved'
|
|
1485
|
+
tempinfo['ST'] = ms.group(2)
|
|
1486
|
+
else:
|
|
1487
|
+
tempinfo['AQ'] = 'Web'
|
|
1488
|
+
else:
|
|
1489
|
+
return self.pglog("{}: MISS -ST or -WT to backup {}".format(options, locinfo), self.PGOPT['emlerr'])
|
|
1490
|
+
else:
|
|
1491
|
+
return self.pglog("Set -ST or -WT in Options to backup {}".format(locinfo), self.PGOPT['emlerr'])
|
|
1492
|
+
if (options and re.search(r'(^|\s)-GX(\s|$)', options, re.I) and
|
|
1493
|
+
not re.search(r'(^|\s)-RS(\s|$)', options, re.I)):
|
|
1494
|
+
tempinfo['RS'] = 0 # set to 1 if need pass -RS to dsarch
|
|
1495
|
+
ddate = edate
|
|
1496
|
+
dhour = ehour
|
|
1497
|
+
dcnt = 0
|
|
1498
|
+
self.PGOPT['wtidx'] = {}
|
|
1499
|
+
if options:
|
|
1500
|
+
ms = re.search(r'-VS\s+(\d+)', options, re.I)
|
|
1501
|
+
if ms: tempinfo['VS'] = int(ms.group(1))
|
|
1502
|
+
if tempinfo['VI']:
|
|
1503
|
+
if tempinfo['VI'][3]:
|
|
1504
|
+
(vdate, vhour) = self.adddatehour(self.PGOPT['CURDATE'], self.PGOPT['CURHOUR'], -tempinfo['VI'][0],
|
|
1505
|
+
-tempinfo['VI'][1], -tempinfo['VI'][2], -tempinfo['VI'][3])
|
|
1506
|
+
else:
|
|
1507
|
+
vdate = self.adddate(self.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -tempinfo['VI'][2])
|
|
1508
|
+
vhour = self.PGOPT['CURHOUR']
|
|
1509
|
+
if 'CN' in self.params and locrec['cleancmd']:
|
|
1510
|
+
tempinfo['CVD'] = self.adddate(self.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -(1+tempinfo['VI'][2]))
|
|
1511
|
+
tempinfo['setmiss'] = 1
|
|
1512
|
+
if self.diffdatehour(edate, ehour, vdate, vhour) < 0:
|
|
1513
|
+
vdate = edate
|
|
1514
|
+
vhour = ehour
|
|
1515
|
+
if tempinfo['amiss'] == 'N' and locrec['missdate']:
|
|
1516
|
+
dhour = self.diffdatehour(vdate, vhour, locrec['missdate'], locrec['misshour'])
|
|
1517
|
+
if dhour > 0:
|
|
1518
|
+
if dhour > 240:
|
|
1519
|
+
record = {'missdate': None, 'misshour': None}
|
|
1520
|
+
self.pgupdt("dlupdt", record, "lindex = {}".format(locrec['lindex']))
|
|
1521
|
+
else:
|
|
1522
|
+
vdate = locrec['missdate']
|
|
1523
|
+
vhour = locrec['misshour']
|
|
1524
|
+
if vdate and not isinstance(vdate, str): vdate = str(vdate)
|
|
1525
|
+
tempinfo['VD'] = vdate
|
|
1526
|
+
tempinfo['VH'] = vhour
|
|
1527
|
+
if 'ED' not in self.params and self.diffdatehour(edate, ehour, vdate, vhour) > 0:
|
|
1528
|
+
edate = vdate
|
|
1529
|
+
if tempinfo['FQ']:
|
|
1530
|
+
if tempinfo['EP'] or tempinfo['QU'] == 'M':
|
|
1531
|
+
edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
|
|
1532
|
+
while True:
|
|
1533
|
+
(udate, uhour) = self.addfrequency(edate, ehour, tempinfo['FQ'], -1)
|
|
1534
|
+
if self.diffdatehour(udate, uhour, vdate, vhour) < 0: break
|
|
1535
|
+
edate = udate
|
|
1536
|
+
ehour = uhour
|
|
1537
|
+
if tempinfo['EP'] or tempinfo['QU'] == 'M':
|
|
1538
|
+
edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
|
|
1539
|
+
vdate = self.params['CD']
|
|
1540
|
+
vhour = self.params['CH']
|
|
1541
|
+
if tempinfo['NX']:
|
|
1542
|
+
if tempinfo['NX'][3]:
|
|
1543
|
+
(udate, uhour) = self.adddatehour(self.PGOPT['CURDATE'], vhour, -tempinfo['NX'][0],
|
|
1544
|
+
-tempinfo['NX'][1], -tempinfo['NX'][2], -tempinfo['NX'][3])
|
|
1545
|
+
else:
|
|
1546
|
+
udate = self.adddate(self.PGOPT['CURDATE'], -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2])
|
|
1547
|
+
uhour = vhour
|
|
1548
|
+
if self.diffdatehour(udate, uhour, vdate, vhour) <= 0:
|
|
1549
|
+
vdate = udate
|
|
1550
|
+
vhour = uhour
|
|
1551
|
+
if 'CP' in self.params: (vdate, vhour) = self.addfrequency(vdate, vhour, tempinfo['FQ'], 1)
|
|
1552
|
+
fupdate = 1 if 'FU' in self.params else 0
|
|
1553
|
+
while fupdate or self.diffdatehour(edate, ehour, vdate, vhour) <= 0:
|
|
1554
|
+
tempinfo['ED'].append(edate)
|
|
1555
|
+
if ehour != None and tempinfo['QU'] != 'H':
|
|
1556
|
+
tempinfo['EH'].append(23)
|
|
1557
|
+
else:
|
|
1558
|
+
tempinfo['EH'].append(ehour)
|
|
1559
|
+
if 'MU' not in self.params: break
|
|
1560
|
+
if tempinfo['RS'] == 0 and dcnt < 3:
|
|
1561
|
+
if self.diffdatehour(edate, ehour, ddate, dhour) >= 0: dcnt += 1
|
|
1562
|
+
(edate, ehour) = self.addfrequency(edate, ehour, tempinfo['FQ'], 1)
|
|
1563
|
+
edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
|
|
1564
|
+
fupdate = 0
|
|
1565
|
+
if tempinfo['RS'] == 0 and dcnt > 2: tempinfo['RS'] = 1
|
|
1566
|
+
if not tempinfo['ED']: # no end time found, update not due yet
|
|
1567
|
+
if tempinfo['NX']:
|
|
1568
|
+
(udate, uhour) = self.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3])
|
|
1569
|
+
else:
|
|
1570
|
+
udate = edate
|
|
1571
|
+
uhour = ehour
|
|
1572
|
+
locinfo = self.replace_pattern(locinfo, edate, ehour, tempinfo['FQ'])
|
|
1573
|
+
vdate = self.params['CD']
|
|
1574
|
+
val = "Update data"
|
|
1575
|
+
if tempinfo['NX']: val += " due"
|
|
1576
|
+
if uhour is None:
|
|
1577
|
+
locinfo += ": {} on {}".format(val, udate)
|
|
1578
|
+
else:
|
|
1579
|
+
locinfo += ": {} at {}:{:02}".format(val, udate, uhour)
|
|
1580
|
+
vdate += ":{:02}".format(self.params['CH'])
|
|
1581
|
+
return self.pglog("{} NOT due yet by {}".format(locinfo, vdate), self.PGOPT['emllog'])
|
|
1582
|
+
return tempinfo
|
|
1583
|
+
|
|
1584
|
+
# get archived file info
|
|
1585
|
+
def file_archive_info(self, lfile, locrec, tempinfo):
|
|
1586
|
+
if tempinfo['ainfo'] != None: return tempinfo['ainfo']
|
|
1587
|
+
edate = tempinfo['edate']
|
|
1588
|
+
ehour = tempinfo['ehour']
|
|
1589
|
+
ainfo = {'archcnt': 0, 'archived': 0, 'present': 0, 'vindex': 0, 'types': {}, 'note': None, 'afile' : None}
|
|
1590
|
+
growing = self.is_growing_file(locrec['locfile'], tempinfo['FQ'])
|
|
1591
|
+
if growing:
|
|
1592
|
+
if tempinfo['NX']:
|
|
1593
|
+
(udate, uhour) = self.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3])
|
|
1594
|
+
else:
|
|
1595
|
+
udate = edate
|
|
1596
|
+
uhour = ehour
|
|
1597
|
+
if self.PGLOG['GMTZ'] and uhour != None: # convert to local times
|
|
1598
|
+
(udate, uhour) = self.adddatehour(udate, uhour, 0, 0, 0, -self.PGLOG['GMTZ'])
|
|
1599
|
+
options = locrec['options'] if locrec['options'] else ""
|
|
1600
|
+
act = locrec['action']
|
|
1601
|
+
locrec['gindex'] = self.get_group_index(options, edate, ehour, tempinfo['FQ'])
|
|
1602
|
+
dsid = self.params['DS']
|
|
1603
|
+
gcnd = "gindex = {}".format(locrec['gindex'])
|
|
1604
|
+
cnd = "dsid = '{}' AND {}".format(dsid, gcnd)
|
|
1605
|
+
mmiss = 0
|
|
1606
|
+
if re.match(r'^A(B|W)$', act): # check existing web files
|
|
1607
|
+
ainfo['archcnt'] = 1
|
|
1608
|
+
ms = re.search(r'(^|\s)-WT\s+(\w)(\s|$)', options, re.I)
|
|
1609
|
+
type = self.get_data_type('WT', options)
|
|
1610
|
+
if locrec['archfile']:
|
|
1611
|
+
afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ'])
|
|
1612
|
+
else:
|
|
1613
|
+
afile = lfile if re.search(r'(^|\s)-KP(\s|$)', lfile, re.I) else op.basename(lfile)
|
|
1614
|
+
ms =re.search(r'(^|\s)-WP\s+(\S+)', options, re.I)
|
|
1615
|
+
if ms:
|
|
1616
|
+
path = self.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ'])
|
|
1617
|
+
else:
|
|
1618
|
+
path = self.get_group_field_path(locrec['gindex'], dsid, 'webpath')
|
|
1619
|
+
if path: afile = self.join_paths(path, afile)
|
|
1620
|
+
ainfo['afile'] = afile
|
|
1621
|
+
wrec = self.pgget_wfile(dsid, "*", "{} AND type = '{}' AND wfile = '{}'".format(gcnd, type, afile), self.PGOPT['extlog'])
|
|
1622
|
+
if wrec:
|
|
1623
|
+
ainfo['wfile'] = wrec['wfile']
|
|
1624
|
+
adate = ainfo['adate'] = str(wrec['date_modified'])
|
|
1625
|
+
atime = ainfo['atime'] = str(wrec['time_modified'])
|
|
1626
|
+
ahour = None
|
|
1627
|
+
if atime:
|
|
1628
|
+
ms = re.match(r'^(\d+):', atime)
|
|
1629
|
+
if ms: ahour = int(ms.group(1))
|
|
1630
|
+
ainfo['ahour'] = ahour
|
|
1631
|
+
ainfo['asize'] = wrec['data_size']
|
|
1632
|
+
ainfo['chksm'] = wrec['checksum'] if wrec['checksum'] else ''
|
|
1633
|
+
ainfo['note'] = wrec['note']
|
|
1634
|
+
ainfo['types'][type] = wrec
|
|
1635
|
+
ainfo['wtype'] = type
|
|
1636
|
+
if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1
|
|
1637
|
+
if wrec['vindex']: ainfo['vindex'] = wrec['vindex']
|
|
1638
|
+
ainfo['present'] += 1
|
|
1639
|
+
if act == 'AS': # check existing save files
|
|
1640
|
+
ainfo['archcnt'] = 1
|
|
1641
|
+
type = self.get_data_type('ST', options)
|
|
1642
|
+
if locrec['archfile']:
|
|
1643
|
+
afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ'])
|
|
1644
|
+
else:
|
|
1645
|
+
afile = lfile if re.search(r'(^|\s)-KP(\s|$)', options, re.I) else op.basename(lfile)
|
|
1646
|
+
ms = re.search(r'(^|\s)-SP\s+(\S+)', options, re.I)
|
|
1647
|
+
if ms:
|
|
1648
|
+
path = self.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ'])
|
|
1649
|
+
else:
|
|
1650
|
+
path = self.get_group_field_path(locrec['gindex'], self.params['DS'], 'savedpath')
|
|
1651
|
+
if path: afile = self.join_paths(path, afile)
|
|
1652
|
+
srec = self.pgget("sfile", "*", "{} AND type = '{}' AND sfile = '{}'".format(cnd, type, afile), self.PGOPT['extlog'])
|
|
1653
|
+
if srec:
|
|
1654
|
+
ainfo['sfile'] = srec['sfile']
|
|
1655
|
+
adate = ainfo['adate'] = str(srec['date_modified'])
|
|
1656
|
+
atime = ainfo['atime'] = str(srec['time_modified'])
|
|
1657
|
+
ahour = None
|
|
1658
|
+
if atime:
|
|
1659
|
+
ms = re.match(r'^(\d+):', atime)
|
|
1660
|
+
if ms: ahour = int(ms.group(1))
|
|
1661
|
+
ainfo['asize'] = srec['data_size']
|
|
1662
|
+
ainfo['chksm'] = srec['checksum'] if srec['checksum'] else ''
|
|
1663
|
+
ainfo['note'] = srec['note']
|
|
1664
|
+
ainfo['types'][type] = srec
|
|
1665
|
+
ainfo['stype'] = type
|
|
1666
|
+
if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1
|
|
1667
|
+
if srec['vindex']: ainfo['vindex'] = srec['vindex']
|
|
1668
|
+
ainfo['present'] += 1
|
|
1669
|
+
if act == 'AQ': # check existing quasar backup files
|
|
1670
|
+
ainfo['archcnt'] = 1
|
|
1671
|
+
type = self.get_data_type('QT', options)
|
|
1672
|
+
if locrec['archfile']:
|
|
1673
|
+
afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ'])
|
|
1674
|
+
else:
|
|
1675
|
+
return self.pglog(lfile + ": Miss Backup file name via (FA|FileArchived)", self.PGOPT['emlerr'])
|
|
1676
|
+
brec = self.pgget("bfile", "*", "dsid = '{}' AND type = '{}' AND bfile = '{}'".format(self.params['DS'], type, afile), self.PGOPT['extlog'])
|
|
1677
|
+
if brec:
|
|
1678
|
+
ainfo['bfile'] = brec['bfile']
|
|
1679
|
+
adate = ainfo['adate'] = str(brec['date_modified'])
|
|
1680
|
+
atime = ainfo['atime'] = str(brec['time_modified'])
|
|
1681
|
+
ahour = None
|
|
1682
|
+
if atime:
|
|
1683
|
+
ms = re.match(r'^(\d+):', atime)
|
|
1684
|
+
if ms: ahour = int(ms.group(1))
|
|
1685
|
+
ainfo['asize'] = brec['data_size']
|
|
1686
|
+
ainfo['chksm'] = brec['checksum'] if brec['checksum'] else ''
|
|
1687
|
+
ainfo['note'] = brec['note']
|
|
1688
|
+
ainfo['types'][type] = brec
|
|
1689
|
+
ainfo['btype'] = type
|
|
1690
|
+
if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1
|
|
1691
|
+
ainfo['present'] += 1
|
|
1692
|
+
if ainfo['archcnt'] == 0:
|
|
1693
|
+
self.pglog("{}: unknown archive action {}".format(lfile, act), self.PGOPT['extlog'])
|
|
1694
|
+
return ainfo # always returns a hash reference for archiving info
|
|
1695
|
+
|
|
1696
|
+
# build up data note based on temporal info, keep the begin timestamp# for existing record; change end timestamp only if new data added
|
|
1697
|
+
# return None if no change for existing note
|
|
1698
|
+
def build_data_note(self, onote, lfile, locrec, tempinfo):
|
|
1699
|
+
note = locrec['note']
|
|
1700
|
+
if not note: return onote
|
|
1701
|
+
seps = self.params['PD']
|
|
1702
|
+
match = "[^{}]+".format(seps[1])
|
|
1703
|
+
edate = tempinfo['edate']
|
|
1704
|
+
ehour = tempinfo['ehour']
|
|
1705
|
+
if note[0] == '!': # executable for build up data note
|
|
1706
|
+
cmd = self.executable_command(1, None, None, edate)
|
|
1707
|
+
if not cmd: return 0
|
|
1708
|
+
return self.pgsystem(cmd, self.PGOPT['emllog'], 21)
|
|
1709
|
+
# repalce generic patterns first
|
|
1710
|
+
note = self.replace_pattern(note, None) # replace generic patterns first
|
|
1711
|
+
# get temporal patterns
|
|
1712
|
+
patterns = re.findall(r'{}({}){}'.format(seps[0], match, seps[1]), note)
|
|
1713
|
+
pcnt = len(patterns)
|
|
1714
|
+
if pcnt == 0: return note # no pattern temporal matches
|
|
1715
|
+
if pcnt > 2:
|
|
1716
|
+
self.pglog("{}-{}: TOO many ({}) temporal patterns".format(lfile, note, pcnt), self.PGOPT['emllog'])
|
|
1717
|
+
return onote
|
|
1718
|
+
if pcnt == 2: # replace start time
|
|
1719
|
+
if onote: # get start time from existing note
|
|
1720
|
+
replace = r"{}{}{}".format(seps[0], patterns[0], seps[1])
|
|
1721
|
+
ms = re.match(r'^(.*){}(.*){}'.format(replace, self.params['PD'][0]), note)
|
|
1722
|
+
if ms:
|
|
1723
|
+
init = ms.group(1)
|
|
1724
|
+
sp = ms.group(2)
|
|
1725
|
+
ms = re.search(r'{}(.+){}'.format(init, sp), onote)
|
|
1726
|
+
if ms:
|
|
1727
|
+
sdate = ms.group(1)
|
|
1728
|
+
note = re.sub(replace, sdate, note, 1)
|
|
1729
|
+
elif tempinfo['FQ']: # get start time
|
|
1730
|
+
(sdate, shour) = self.addfrequency(edate, ehour, tempinfo['FQ'], 0)
|
|
1731
|
+
note = self.replace_pattern(note, sdate, shour, None, 1)
|
|
1732
|
+
return self.replace_pattern(note, edate, ehour) # repalce end time now
|
|
1733
|
+
|
|
1734
|
+
# get data file status info
|
|
1735
|
+
def file_status_info(self, lfile, rfile, tempinfo):
|
|
1736
|
+
# check and cache new data info
|
|
1737
|
+
finfo = self.check_local_file(lfile, 33, self.PGOPT['wrnlog']) # 33 = 1 + 32
|
|
1738
|
+
if not finfo:
|
|
1739
|
+
self.PGOPT['chksm'] = ''
|
|
1740
|
+
self.PGOPT['fsize'] = 0
|
|
1741
|
+
return
|
|
1742
|
+
fdate = finfo['date_modified']
|
|
1743
|
+
ftime = finfo['time_modified']
|
|
1744
|
+
fhour = None
|
|
1745
|
+
ms = re.match(r'^(\d+):', ftime)
|
|
1746
|
+
if ms: four = int(ms.group(1))
|
|
1747
|
+
self.PGOPT['fsize'] = finfo['data_size']
|
|
1748
|
+
self.PGOPT['chksm'] = finfo['checksum']
|
|
1749
|
+
if rfile and lfile != rfile:
|
|
1750
|
+
finfo = self.check_local_file(rfile, 1, self.PGOPT['wrnlog'])
|
|
1751
|
+
if finfo and self.cmptime(finfo['date_modified'], finfo['time_modified'], fdate, ftime) < 0:
|
|
1752
|
+
fdate = finfo['date_modified']
|
|
1753
|
+
ftime = finfo['time_modified']
|
|
1754
|
+
ms = re.match(r'^(\d+):', ftime)
|
|
1755
|
+
if ms: four = int(ms.group(1))
|
|
1756
|
+
self.PGOPT['fdate'] = fdate
|
|
1757
|
+
self.PGOPT['ftime'] = ftime
|
|
1758
|
+
self.PGOPT['fhour'] = fhour
|
|
1759
|
+
if 'RE' in self.params: # reset end data/time/hour
|
|
1760
|
+
if tempinfo['NX']:
|
|
1761
|
+
if tempinfo['NX'][3]:
|
|
1762
|
+
(fdate, fhour) = self.adddatehour(fdate, fhour, -tempinfo['NX'][0], -tempinfo['NX'][1],
|
|
1763
|
+
-tempinfo['NX'][2], -tempinfo['NX'][3])
|
|
1764
|
+
else:
|
|
1765
|
+
fdate = self.adddate(fdate, -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2])
|
|
1766
|
+
while True:
|
|
1767
|
+
(edate, ehour) = self.addfrequency(tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'], 1)
|
|
1768
|
+
edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
|
|
1769
|
+
if self.diffdatehour(edate, ehour, fdate, fhour) > 0: break
|
|
1770
|
+
tempinfo['edate'] = edate
|
|
1771
|
+
tempinfo['ehour'] = ehour
|
|
1772
|
+
|
|
1773
|
+
# check if a Server file is aged enough for download# return 1 if valid, 0 if not aged enough, -1 if cannot check
|
|
1774
|
+
def check_agetime(self, dcmd, sfile, atime):
|
|
1775
|
+
info = self.check_server_file(dcmd, 1)
|
|
1776
|
+
if not info:
|
|
1777
|
+
sact = self.get_download_action(dcmd)
|
|
1778
|
+
(stat, derr) = self.parse_download_error(self.PGOPT['STATUS'], sact)
|
|
1779
|
+
self.PGOPT['STATUS'] = derr
|
|
1780
|
+
self.pglog("{}: cannot check file age\n{}".format(sfile, self.PGOPT['STATUS']), self.PGOPT['emlerr'])
|
|
1781
|
+
return stat
|
|
1782
|
+
ahour = None
|
|
1783
|
+
if atime[3]:
|
|
1784
|
+
ms = re.match(r'^(\d+):', info['time_modified'])
|
|
1785
|
+
if ms: ahour = int(ms.group(1))
|
|
1786
|
+
(adate, ahour) = self.adddatehour(info['date_modified'], ahour, atime[0], atime[1], atime[2], atime[3])
|
|
1787
|
+
if self.diffdatehour(self.params['CD'], self.params['CH'], adate, ahour) >= 0:
|
|
1788
|
+
return 1
|
|
1789
|
+
if ahour is None:
|
|
1790
|
+
self.pglog(("{}: original {} file ready by {}\n".format(sfile, info['ftype'], info['date_modified']) +
|
|
1791
|
+
"but NOT aged enough for retrieving yet by " + self.params['CD']), self.PGOPT['emllog'])
|
|
1792
|
+
else:
|
|
1793
|
+
self.pglog(("{}: original {} file ready by {}:{:02}\n".format(sfile, info['ftype'], info['date_modified'], ahour) +
|
|
1794
|
+
"but NOT aged enough for retrieving yet by {}:{:02}".format(self.params['CD'], self.params['CH'])), self.PGOPT['emllog'])
|
|
1795
|
+
return 0 # otherwise server file is not aged enough
|
|
1796
|
+
|
|
1797
|
+
# check if a Server file is changed with different size# return 1 - file changed, 2 - new file retrieved, 3 - force redlownload,
|
|
1798
|
+
# 0 - no change , -1 - error check, -2 - cannot check
|
|
1799
|
+
def check_newer_file(self, dcmd, cfile, ainfo):
|
|
1800
|
+
if cfile:
|
|
1801
|
+
finfo = self.check_local_file(cfile, 33, self.PGOPT['wrnlog'])
|
|
1802
|
+
if not finfo: return 3 # download if can not check newer
|
|
1803
|
+
else:
|
|
1804
|
+
finfo = {'isfile': 0, 'checksum': ainfo['chksm'], 'data_size': ainfo['asize'],
|
|
1805
|
+
'date_modified': ainfo['adate'], 'time_modified': ainfo['atime']}
|
|
1806
|
+
cinfo = self.check_server_file(dcmd, 33, cfile)
|
|
1807
|
+
if not cinfo:
|
|
1808
|
+
sact = self.get_download_action(dcmd)
|
|
1809
|
+
(stat, derr) = self.parse_download_error(self.PGOPT['STATUS'], sact)
|
|
1810
|
+
self.PGOPT['STATUS'] = derr
|
|
1811
|
+
return stat
|
|
1812
|
+
stat = 2 if cinfo['ftype'] == "WGET" else 1
|
|
1813
|
+
if finfo['isfile'] and cfile == cinfo['fname'] and finfo['data_size'] and cinfo['data_size'] and cinfo['data_size'] != finfo['data_size']:
|
|
1814
|
+
return stat
|
|
1815
|
+
self.PGOPT['STATUS'] = ''
|
|
1816
|
+
if (finfo['data_size'] != cinfo['data_size'] or 'checksum' not in cinfo or
|
|
1817
|
+
'checksum' not in finfo or finfo['checksum'] != cinfo['checksum']):
|
|
1818
|
+
if 'HO' in self.params and cinfo['ftype'] == "FTP":
|
|
1819
|
+
(cdate, ctime) = self.addhour(cinfo['date_modified'], cinfo['time_modified'], -self.params['HO'][0])
|
|
1820
|
+
else:
|
|
1821
|
+
cdate = cinfo['date_modified']
|
|
1822
|
+
ctime = cinfo['time_modified']
|
|
1823
|
+
if self.cmptime(cdate, ctime, finfo['date_modified'], finfo['time_modified']) > 0:
|
|
1824
|
+
msg = "{} Newer {} {}: {} {} {}".format(self.params['DS'], cinfo['ftype'], cinfo['fname'], cdate, ctime, cinfo['data_size'])
|
|
1825
|
+
if 'checksum' in cinfo: msg += " " + cinfo['checksum']
|
|
1826
|
+
msg += "; {}: ".format(cfile if cfile else "archived")
|
|
1827
|
+
msg += "{} {} {}".format(finfo['date_modified'], finfo['time_modified'], finfo['data_size'])
|
|
1828
|
+
if 'checksum' in finfo: msg += " " + finfo['checksum']
|
|
1829
|
+
self.pglog(msg, self.PGOPT['wrnlog'])
|
|
1830
|
+
return stat
|
|
1831
|
+
if 'adate' in ainfo:
|
|
1832
|
+
self.PGOPT['STATUS'] = "archived: {} {}".format(ainfo['adate'], ainfo['atime'])
|
|
1833
|
+
elif cfile:
|
|
1834
|
+
self.PGOPT['STATUS'] += "local copy timestamp: {} {}".format(finfo['date_modified'], finfo['time_modified'])
|
|
1835
|
+
if 'note' in cinfo:
|
|
1836
|
+
self.PGOPT['STATUS'] += "\n" + cinfo['note']
|
|
1837
|
+
return 0
|
|
1838
|
+
|
|
1839
|
+
# get download action name
|
|
1840
|
+
def get_download_action(self, dcmd):
|
|
1841
|
+
if not dcmd: return "download"
|
|
1842
|
+
dact = "DOWNLOAD"
|
|
1843
|
+
ms = re.search(r'(^|\S\/)tar\s+-(\w+)\s', dcmd)
|
|
1844
|
+
if ms:
|
|
1845
|
+
taropt = ms.group(2)
|
|
1846
|
+
dact = "UNTAR" if taropt.find('x') > -1 else "TAR"
|
|
1847
|
+
else:
|
|
1848
|
+
ms = re.match(r'^\s*(\S+)', dcmd)
|
|
1849
|
+
if ms:
|
|
1850
|
+
dact = op.basename(ms.group(1))
|
|
1851
|
+
if dact == "wc":
|
|
1852
|
+
ms = re.search(r'\|\s*(\S+)', dcmd)
|
|
1853
|
+
if ms: dact = op.basename(ms.group(1))
|
|
1854
|
+
return dact
|
|
1855
|
+
|
|
1856
|
+
# change to working directory if not there yet
|
|
1857
|
+
def change_workdir(self, wdir, locinfo, edate, ehour, FQ):
|
|
1858
|
+
if 'WD' in self.params and self.params['WD'][0]: wdir = self.params['WD'][0]
|
|
1859
|
+
if not wdir:
|
|
1860
|
+
return self.pglog(locinfo + ": MISS working directory", self.PGOPT['emlerr'])
|
|
1861
|
+
else:
|
|
1862
|
+
wdir = self.replace_environments(wdir)
|
|
1863
|
+
wdir = self.replace_pattern(wdir, edate, ehour, FQ)
|
|
1864
|
+
if not self.change_local_directory(wdir, self.PGOPT['emllog']): return 0
|
|
1865
|
+
return 1
|
|
1866
|
+
|
|
1867
|
+
# clean the working copies of remote and local files/directories
|
|
1868
|
+
def clean_files(self, cleancmd, edate, ehour, lfiles, rfiles, freq):
|
|
1869
|
+
lfile = ' '.join(lfiles) if lfiles else ''
|
|
1870
|
+
cleancmd = self.replace_pattern(cleancmd, edate, ehour, freq)
|
|
1871
|
+
cleancmd = self.executable_command(cleancmd, lfile, None, None, None, rfiles)
|
|
1872
|
+
self.PGLOG['ERR2STD'] = [self.PGLOG['MISSFILE']]
|
|
1873
|
+
self.pgsystem(cleancmd, self.PGOPT['emllog'], 5)
|
|
1874
|
+
self.PGLOG['ERR2STD'] = []
|
|
1875
|
+
|
|
1876
|
+
# clean files rematching pattern on given date/hour
|
|
1877
|
+
def clean_older_files(self, cleancmd, workdir, locinfo, edate, locfile, rmtrecs, rcnt, tempinfo):
|
|
1878
|
+
rfiles = None
|
|
1879
|
+
lfiles = self.get_local_names(locfile, tempinfo, edate)
|
|
1880
|
+
self.change_workdir(workdir, locinfo, edate, tempinfo['ehour'], tempinfo['FQ'])
|
|
1881
|
+
if rcnt and cleancmd.find(' -RF') > 0:
|
|
1882
|
+
rfiles = self.get_all_remote_files(rmtrecs, rcnt, tempinfo, edate)
|
|
1883
|
+
self.clean_files(cleancmd, edate, tempinfo['ehour'], lfiles, rfiles, tempinfo['FQ'])
|
|
1884
|
+
|
|
1885
|
+
# get all remote file names for one update period
|
|
1886
|
+
def get_all_remote_files(self, rmtrecs, rcnt, tempinfo, edate):
|
|
1887
|
+
rfiles = []
|
|
1888
|
+
for i in range(rcnt): # processs each remote record
|
|
1889
|
+
rmtrec = self.onerecord(rmtrecs, i)
|
|
1890
|
+
file = rmtrec['remotefile']
|
|
1891
|
+
if not file: continue
|
|
1892
|
+
files = self.get_remote_names(file, rmtrec, file, tempinfo, edate)
|
|
1893
|
+
if files: rfiles.extend(files)
|
|
1894
|
+
return rfiles
|
|
1895
|
+
|
|
1896
|
+
# check remote file status and sed email to specialist for irregular update cases
|
|
1897
|
+
def check_dataset_status(self):
|
|
1898
|
+
if 'CD' in self.params:
|
|
1899
|
+
self.params['CD'] = self.format_date(self.params['CD']) # standard format in case not yet
|
|
1900
|
+
else:
|
|
1901
|
+
self.params['CD'] = self.curdate() # default to current date
|
|
1902
|
+
condition = "specialist = '{}'".format(self.params['LN'])
|
|
1903
|
+
if 'ED' not in self.params: condition += " AND enddate < '{}'".format(self.params['CD'])
|
|
1904
|
+
if 'DS' in self.params: condition += " AND dsid = '{}'".format(self.params['DS'])
|
|
1905
|
+
s = self.file_condition('dlupdt', ('L' if 'LI' in self.params else "FIXA"), None, 1)
|
|
1906
|
+
if s: condition += " AND " + s
|
|
1907
|
+
condition += " ORDER BY dsid, execorder, lindex"
|
|
1908
|
+
locrecs = self.pgmget("dlupdt", "*", condition, self.PGOPT['extlog'])
|
|
1909
|
+
loccnt = len(locrecs['locfile']) if locrecs else 0
|
|
1910
|
+
if not loccnt: return self.pglog("No Update record found for checking update status on {} for '{}'".format(self.params['CD'], self.params['LN']), self.PGOPT['wrnlog'])
|
|
1911
|
+
s = "s" if loccnt > 1 else ""
|
|
1912
|
+
self.pglog("Check {} record{} for update status...".format(loccnt, s), self.PGOPT['wrnlog'])
|
|
1913
|
+
for i in range(loccnt):
|
|
1914
|
+
locrec = self.onerecord(locrecs, i)
|
|
1915
|
+
if loccnt == 1 and 'LI' in self.params and 'LF' in self.params and len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']:
|
|
1916
|
+
locrec['locfile'] = self.params['LF'][0]
|
|
1917
|
+
self.check_locfile_status(locrec)
|
|
1918
|
+
if self.PGOPT['lcnt'] or self.PGLOG['ERRMSG']:
|
|
1919
|
+
if self.PGOPT['lcnt']:
|
|
1920
|
+
loccnt = self.PGOPT['lcnt']
|
|
1921
|
+
s = "s" if (loccnt > 1) else ""
|
|
1922
|
+
self.SUBJECT = "DSUPDT Status of {} update record{}".format(loccnt, s)
|
|
1923
|
+
if 'DS' in self.params: self.SUBJECT += " for {}".format(self.params['DS'])
|
|
1924
|
+
self.TOPMSG = " ready for update of {} local file{}".format(loccnt, s)
|
|
1925
|
+
s = "s" if (self.PGOPT['rcnt'] > 1) else ""
|
|
1926
|
+
self.TOPMSG = "{}/{} remote{}{}".format(self.PGOPT['ucnt'], self.PGOPT['rcnt'], s, self.TOPMSG)
|
|
1927
|
+
else:
|
|
1928
|
+
self.pglog("No local file ready for checking {} on {} for {}".format(self.SUBJECT, self.params['CD'], self.params['LN']), self.PGOPT['wrnlog'])
|
|
1929
|
+
self.SUBJECT = self.TOPMSG = None
|
|
1930
|
+
if self.PGOPT['UCNTL']:
|
|
1931
|
+
self.reset_control_time()
|
|
1932
|
+
if self.SUBJECT: self.SUBJECT += "-C{}".format(self.PGOPT['UCNTL']['cindex'])
|
|
1933
|
+
|
|
1934
|
+
# check update status for a given local file
|
|
1935
|
+
def check_locfile_status(self, locrec):
|
|
1936
|
+
loccnd = "lindex = {}".format(locrec['lindex'])
|
|
1937
|
+
lfile = locrec['locfile']
|
|
1938
|
+
locinfo = "{}-L{}".format(locrec['dsid'], locrec['lindex'])
|
|
1939
|
+
if not lfile: return self.pglog(locinfo + ": local file name NOT specified", self.PGOPT['emlerr'])
|
|
1940
|
+
locinfo += "-" + lfile
|
|
1941
|
+
tempinfo = self.get_tempinfo(locrec, locinfo, 0)
|
|
1942
|
+
if not tempinfo: return 0 # simply return if miss temporal info for update
|
|
1943
|
+
rmtcnd = loccnd
|
|
1944
|
+
rcnd = self.file_condition('drupdt', ('D' if 'DO' in self.params else "RS"), None, 1)
|
|
1945
|
+
if rcnd: rmtcnd += " AND " + rcnd
|
|
1946
|
+
rmtrecs = self.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", self.PGOPT['extlog'])
|
|
1947
|
+
rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0
|
|
1948
|
+
if rcnt == 0:
|
|
1949
|
+
if rcnd and self.pgget("drupdt", "", loccnd):
|
|
1950
|
+
return self.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), self.PGOPT['emlerr'])
|
|
1951
|
+
rcnt = 1 # create a empty record remote file
|
|
1952
|
+
rmtrecs = {'lindex': locrec['lindex'], 'remotefile': None, 'serverfile': None}
|
|
1953
|
+
if rcnt == 1:
|
|
1954
|
+
if 'RF' in self.params and len(self.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and self.params['RF'][0] == rmtrecs['remotefile'][0]):
|
|
1955
|
+
rmtrecs['remotefile'][0] = self.params['RF'][0]
|
|
1956
|
+
if 'SF' in self.params and len(self.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and self.params['SF'][0] == rmtrecs['serverfile'][0]):
|
|
1957
|
+
rmtrecs['serverfile'][0] = self.params['SF'][0]
|
|
1958
|
+
ecnt = len(tempinfo['ED'])
|
|
1959
|
+
self.PGOPT['lindex'] = locrec['lindex']
|
|
1960
|
+
logact = self.PGOPT['emllog']
|
|
1961
|
+
retcnt = 0
|
|
1962
|
+
for i in range(ecnt):
|
|
1963
|
+
if self.ALLCNT > 1 and i > 0:
|
|
1964
|
+
tempinfo = self.get_tempinfo(locrec, locinfo, i)
|
|
1965
|
+
if not tempinfo: break
|
|
1966
|
+
edate = tempinfo['ED'][0]
|
|
1967
|
+
ehour = tempinfo['EH'][0]
|
|
1968
|
+
else:
|
|
1969
|
+
edate = tempinfo['ED'][i]
|
|
1970
|
+
ehour = tempinfo['EH'][i]
|
|
1971
|
+
tempinfo['edate'] = edate
|
|
1972
|
+
if ehour != None:
|
|
1973
|
+
tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour)
|
|
1974
|
+
tempinfo['ehour'] = ehour
|
|
1975
|
+
else:
|
|
1976
|
+
tempinfo['einfo'] = "end data date {}".format(edate)
|
|
1977
|
+
tempinfo['ehour'] = None
|
|
1978
|
+
if 'GZ' in self.params: tempinfo['einfo'] += "(UTC)"
|
|
1979
|
+
lfile = self.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ'])
|
|
1980
|
+
locinfo = "{}-L{}-{}".format(locrec['dsid'], locrec['lindex'], lfile)
|
|
1981
|
+
self.pglog("{}: Check Update Status for {}".format(locinfo, tempinfo['einfo']), logact)
|
|
1982
|
+
logact = self.PGOPT['emlsep']
|
|
1983
|
+
self.PGOPT['lcnt'] += 1
|
|
1984
|
+
j = 0
|
|
1985
|
+
while j < rcnt: # check each remote record, stop checking if error
|
|
1986
|
+
pgrec = self.onerecord(rmtrecs, j)
|
|
1987
|
+
if not self.check_remote_status(pgrec, lfile, locrec, locinfo, tempinfo) and 'CA' not in self.params:
|
|
1988
|
+
break
|
|
1989
|
+
j += 1
|
|
1990
|
+
if j == 0: break
|
|
1991
|
+
self.PGOPT['lindex'] = 0
|
|
1992
|
+
return (1 if retcnt > 0 else 0)
|
|
1993
|
+
|
|
1994
|
+
# check update status for given remote file
|
|
1995
|
+
def check_remote_status(self, rmtrec, lfile, locrec, locinfo, tempinfo):
|
|
1996
|
+
rfile = rmtrec['remotefile']
|
|
1997
|
+
rmtinfo = locinfo
|
|
1998
|
+
if not rfile:
|
|
1999
|
+
rfile = lfile
|
|
2000
|
+
rcnt = 1
|
|
2001
|
+
if rfile != locrec['locfile']: rmtinfo += "-" + rfile
|
|
2002
|
+
tempinfo['DC'] = (self.params['DC'][0] if ('DC' in self.params and self.params['DC'][0]) else
|
|
2003
|
+
(rmtrec['download'] if rmtrec['download'] else locrec['download']))
|
|
2004
|
+
rfiles = self.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo)
|
|
2005
|
+
rcnt = len(rfiles) if rfiles else 0
|
|
2006
|
+
if not rcnt: return self.pglog(rmtinfo + ": NO remote file name identified", self.PGOPT['emlerr'])
|
|
2007
|
+
self.PGOPT['rcnt'] += rcnt # accumulate remote file counts
|
|
2008
|
+
if tempinfo['DC']:
|
|
2009
|
+
self.PGOPT['PCNT'] = self.count_pattern_path(tempinfo['DC'])
|
|
2010
|
+
tempinfo['DC'] = None
|
|
2011
|
+
sfile = rmtrec['serverfile']
|
|
2012
|
+
if sfile and sfile != rfile:
|
|
2013
|
+
sfiles = self.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo)
|
|
2014
|
+
scnt = len(sfiles) if sfiles else 0
|
|
2015
|
+
if scnt != rcnt:
|
|
2016
|
+
self.PGOPT['rstat'] = -2
|
|
2017
|
+
return self.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), self.PGOPT['emlerr'])
|
|
2018
|
+
else:
|
|
2019
|
+
sfiles = rfiles
|
|
2020
|
+
scnt = rcnt
|
|
2021
|
+
dcnt = 0
|
|
2022
|
+
for i in range(rcnt):
|
|
2023
|
+
rmtinfo = locinfo
|
|
2024
|
+
rfile = rfiles[i]
|
|
2025
|
+
if rfile['fname'] != lfile: rmtinfo += "-" + rfile['fname']
|
|
2026
|
+
sfile = sfiles[i]
|
|
2027
|
+
if sfile['fname'] != rfile['fname']: rmtinfo += "-" + sfile['fname']
|
|
2028
|
+
rcmd = rfile['rcmd']
|
|
2029
|
+
if not rcmd:
|
|
2030
|
+
return self.pglog(rmtinfo + ": Missing download command", self.PGOPT['emlerr'])
|
|
2031
|
+
elif not sfile['ready']:
|
|
2032
|
+
self.pglog(rmtinfo + ": NOT Ready yet for update", self.PGOPT['emllog'])
|
|
2033
|
+
break
|
|
2034
|
+
dcnt += 1
|
|
2035
|
+
return 1 if dcnt else 0
|
|
2036
|
+
|
|
2037
|
+
# process the update control records
|
|
2038
|
+
def process_update_controls(self):
|
|
2039
|
+
ctime = self.curtime(1)
|
|
2040
|
+
if not ('CI' in self.params or 'DS' in self.params):
|
|
2041
|
+
self.set_default_value("SN", self.params['LN'])
|
|
2042
|
+
condition = ("(pid = 0 OR lockhost = '{}') AND cntltime <= '{}'".format(self.PGLOG['HOSTNAME'], ctime) +
|
|
2043
|
+
self.self.get_hash_condition('dcupdt') + " ORDER BY hostname DESC, cntltime")
|
|
2044
|
+
pgrecs = self.pgmget("dcupdt", "*", condition, self.PGOPT['extlog'])
|
|
2045
|
+
self.ALLCNT = len(pgrecs['cindex']) if pgrecs else 0
|
|
2046
|
+
if self.ALLCNT == 0:
|
|
2047
|
+
return self.pglog("No update control record idetified due for process", self.LOGWRN)
|
|
2048
|
+
s = 's' if self.ALLCNT > 1 else ''
|
|
2049
|
+
self.pglog("Process {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG)
|
|
2050
|
+
pcnt = 0
|
|
2051
|
+
for i in range(self.ALLCNT):
|
|
2052
|
+
pcnt += self.process_one_control(self.onerecord(pgrecs, i))
|
|
2053
|
+
if pcnt > 1 and not ('CI' in self.params or 'DS' in self.params): break
|
|
2054
|
+
rmsg = "{} of {} update control{} reprocessed by {}".format(pcnt, self.ALLCNT, s, self.PGLOG['CURUID'])
|
|
2055
|
+
if self.PGLOG['CURUID'] != self.params['LN']: rmsg += " for " + self.params['LN']
|
|
2056
|
+
self.pglog(rmsg, self.PGOPT['wrnlog'])
|
|
2057
|
+
|
|
2058
|
+
# process one update control
|
|
2059
|
+
def process_one_control(self, pgrec):
|
|
2060
|
+
cidx = pgrec['cindex']
|
|
2061
|
+
cstr = "Control Index {}".format(cidx)
|
|
2062
|
+
if not pgrec['action']: return self.pglog(cstr + ": Miss update action", self.PGOPT['errlog'])
|
|
2063
|
+
if not (self.OPTS[pgrec['action']][0]&self.PGOPT['CNTLACTS']):
|
|
2064
|
+
return self.pglog("{}: Invalid dsupdt action '{}'".format(cstr, pgrec['action']), self.PGOPT['errlog'])
|
|
2065
|
+
if not pgrec['frequency']: return self.pglog(cstr + ": Miss update Frequency", self.PGOPT['errlog'])
|
|
2066
|
+
if pgrec['pid'] > 0 and self.check_process(pgrec['pid']):
|
|
2067
|
+
if 'CI' in self.params: self.pglog("{}: Under processing {}/{}".format(cstr, pgrec['pid'], self.PGLOG['HOSTNAME']), self.PGOPT['wrnlog'])
|
|
2068
|
+
return 0
|
|
2069
|
+
if pgrec['specialist'] != self.params['LN']:
|
|
2070
|
+
return self.pglog("{}: must be specialist '{}' to process".format(cstr, pgrec['specialist']), self.PGOPT['errlog'])
|
|
2071
|
+
if not ('ED' in self.params or self.valid_data_time(pgrec, cstr, self.PGOPT['wrnlog'])):
|
|
2072
|
+
return 0
|
|
2073
|
+
cmd = "dsupdt "
|
|
2074
|
+
if pgrec['dsid']: cmd += pgrec['dsid'] + ' '
|
|
2075
|
+
cmd += "{} -CI {} ".format(pgrec['action'], cidx)
|
|
2076
|
+
if self.PGLOG['CURUID'] != self.params['LN']: cmd += "-LN " + self.params['LN']
|
|
2077
|
+
cmd += "-d -b"
|
|
2078
|
+
# make sure it is not locked
|
|
2079
|
+
if self.lock_update_control(cidx, 0, self.PGOPT['errlog']) <= 0: return 0
|
|
2080
|
+
self.pglog("{}-{}{}: {}".format(self.PGLOG['HOSTNAME'], pgrec['specialist'], self.current_datetime(), cmd), self.LOGWRN|self.FRCLOG)
|
|
2081
|
+
os.system(cmd + " &")
|
|
2082
|
+
return 1
|
|
2083
|
+
|
|
2084
|
+
# move the previous archived version controlled files
|
|
2085
|
+
def move_archived_file(self, ainfo, archived):
|
|
2086
|
+
stat = 0
|
|
2087
|
+
if 'wfile' in ainfo:
|
|
2088
|
+
type = ainfo['wtype']
|
|
2089
|
+
pgrec = ainfo['types'][type]
|
|
2090
|
+
if pgrec and pgrec['vindex']:
|
|
2091
|
+
tofile = fromfile = ainfo['wfile']
|
|
2092
|
+
ftype = "Web"
|
|
2093
|
+
ttype = " Saved"
|
|
2094
|
+
i = 0
|
|
2095
|
+
while True: # create tofile name
|
|
2096
|
+
if i > 0: tofile = "{}.vbu{}".format(fromfile, i)
|
|
2097
|
+
if not self.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(self.params['DS'], tofile), self.PGOPT['extlog']):
|
|
2098
|
+
break
|
|
2099
|
+
i += 1
|
|
2100
|
+
stat = self.pgsystem("dsarch {} MV -WF {} -WT {} -SF {} -ST V -KM -TS".format(self.params['DS'], fromfile, type, tofile), self.PGOPT['emerol'], 5)
|
|
2101
|
+
if stat == 0 and ainfo['sfile']:
|
|
2102
|
+
type = ainfo['stype']
|
|
2103
|
+
pgrec = ainfo['types'][type]
|
|
2104
|
+
if pgrec and pgrec['vindex']:
|
|
2105
|
+
fromfile = ainfo['sfile']
|
|
2106
|
+
ftype = "Saved"
|
|
2107
|
+
ttype = ''
|
|
2108
|
+
i = 0
|
|
2109
|
+
while True: # create tofile name
|
|
2110
|
+
tofile = "{}.vbu{}".format(fromfile, i)
|
|
2111
|
+
if not self.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(self.params['DS'], tofile), self.PGOPT['extlog']):
|
|
2112
|
+
break
|
|
2113
|
+
i += 1
|
|
2114
|
+
stat = self.pgsystem("dsarch {} MV -RF {} -OT {} -SF {} -ST V".format(self.params['DS'], fromfile, type, tofile), self.PGOPT['emerol'], 5)
|
|
2115
|
+
if stat:
|
|
2116
|
+
self.PGOPT['vcnt'] += 1
|
|
2117
|
+
if 'NE' in self.params or 'EE' in self.params:
|
|
2118
|
+
if 'NE' in self.params: del self.params['NE']
|
|
2119
|
+
if 'EE' in self.params: del self.params['EE']
|
|
2120
|
+
self.params['SE'] = 1 # email summary at least
|
|
2121
|
+
self.PGOPT['emllog'] |= self.EMEROL
|
|
2122
|
+
self.pglog("{}-{}-{}: Found newer version-conrolled {} file; move to{} type V {}".format(self.params['DS'], type, fromfile, ftype, ttype, tofile), self.PGOPT['emlsum'])
|
|
2123
|
+
archived = 0
|
|
2124
|
+
return archived
|
|
2125
|
+
|
|
2126
|
+
# main function to excecute this script
|
|
2127
|
+
def main():
|
|
2128
|
+
object = DsUpdt()
|
|
2129
|
+
object.read_parameters()
|
|
2130
|
+
object.start_actions()
|
|
2131
|
+
object.pgexit(0)
|
|
2132
|
+
|
|
2133
|
+
# call main() to start program
|
|
2134
|
+
if __name__ == "__main__": main()
|