rda-python-dsupdt 2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2134 @@
1
+ #!/usr/bin/env python3
2
+ ##################################################################################
3
+ # Title: dsupdt
4
+ # Author: Zaihua Ji, zji@ucar.edu
5
+ # Date: 10/10/2020
6
+ # 2025-02-05 transferred to package rda_python_dsupdt from
7
+ # https://github.com/NCAR/rda-utility-programs.git
8
+ # 2025-12-08 convert to class DsUpdt
9
+ # Purpose: python utility program to download remote files,
10
+ # process downloaded files and create local file, and
11
+ # archive local files onto RDA Server
12
+ # save information of web online data files or Saved files into RDADB
13
+ # Github: https://github.com/NCAR/rda-python-dsupdt.git
14
+ ##################################################################################
15
+
16
+ import sys
17
+ import os
18
+ import re
19
+ from os import path as op
20
+ from .pg_updt import PgUpdt
21
+ from rda_python_common.pg_split import PgSplit
22
+
23
+ class DsUpdt(PgUpdt, PgSplit):
24
+ def __init__(self):
25
+ super().__init__() # initialize parent class
26
+ self.TEMPINFO = {}
27
+ self.TOPMSG = self.SUBJECT = self.ACTSTR = None
28
+ self.ALLCNT = 0
29
+ self.DEFTYPES = {'WT': 'D', 'ST': 'P', 'QT': 'B'}
30
+
31
+ # main function to run dsupdt
32
+ def read_parameters(self):
33
+ self.set_help_path(__file__)
34
+ aname = 'dsupdt'
35
+ self.parsing_input(aname)
36
+ self.check_enough_options(self.PGOPT['CACT'], self.PGOPT['ACTS'])
37
+
38
+ # start actions of dsupdt
39
+ def start_actions(self):
40
+ if self.PGOPT['ACTS']&self.OPTS['CU'][0]:
41
+ if 'CI' in self.params:
42
+ if self.cache_update_control(self.params['CI'][0], 1):
43
+ self.check_dataset_status()
44
+ else:
45
+ self.ALLCNT = self.get_option_count(["ED", "EH"])
46
+ self.check_dataset_status(0)
47
+ elif self.PGOPT['ACTS'] == self.OPTS['DL'][0]:
48
+ if 'CI' in self.params:
49
+ self.ALLCNT = len(self.params['CI'])
50
+ self.delete_control_info()
51
+ elif 'RF' in self.params:
52
+ self.ALLCNT = len(self.params['RF'])
53
+ self.delete_remote_info()
54
+ else:
55
+ self.ALLCNT = len(self.params['LI'])
56
+ self.delete_local_info()
57
+ elif self.OPTS[self.PGOPT['CACT']][0]&self.OPTS['GA'][0]:
58
+ self.get_update_info()
59
+ elif self.PGOPT['CACT'] == 'PC':
60
+ self.process_update_controls()
61
+ elif self.PGOPT['ACTS'] == self.OPTS['SA'][0]:
62
+ if 'IF' not in self.params:
63
+ self.action_error("Missing input file via Option -IF")
64
+ if self.get_input_info(self.params['IF'], 'DCUPDT'):
65
+ self.check_enough_options('SC', self.OPTS['SC'][0])
66
+ self.ALLCNT = len(self.params['CI'])
67
+ self.set_control_info()
68
+ if self.get_input_info(self.params['IF'], 'DLUPDT'):
69
+ self.check_enough_options('SL', self.OPTS['SL'][0])
70
+ self.ALLCNT = len(self.params['LI'])
71
+ self.set_local_info()
72
+ if self.get_input_info(self.params['IF'], 'DRUPDT') and self.params['RF']:
73
+ self.check_enough_options('SR', self.OPTS['SR'][0])
74
+ self.ALLCNT = len(self.params['RF']) if 'RF' in self.params else 0
75
+ self.set_remote_info()
76
+ elif self.PGOPT['ACTS'] == self.OPTS['SC'][0]:
77
+ self.ALLCNT = len(self.params['CI'])
78
+ self.set_control_info()
79
+ elif self.PGOPT['ACTS'] == self.OPTS['SL'][0]:
80
+ self.ALLCNT = len(self.params['LI'])
81
+ self.set_local_info()
82
+ elif self.PGOPT['ACTS'] == self.OPTS['SR'][0]:
83
+ self.ALLCNT = len(self.params['RF'])
84
+ self.set_remote_info()
85
+ elif self.PGOPT['ACTS']&self.OPTS['UF'][0]:
86
+ if 'CI' in self.params:
87
+ if self.cache_update_control(self.params['CI'][0], 1): self.dataset_update()
88
+ else:
89
+ self.ALLCNT = self.get_option_count(["ED", "EH"])
90
+ self.dataset_update()
91
+ elif self.PGOPT['ACTS'] == self.OPTS['UL'][0]:
92
+ if 'CI' in self.params:
93
+ self.ALLCNT = len(self.params['CI'])
94
+ self.unlock_control_info()
95
+ if 'LI' in self.params:
96
+ self.ALLCNT = len(self.params['LI'])
97
+ self.unlock_update_info()
98
+ if self.SUBJECT and 'NE' not in self.params and (self.PGLOG['ERRCNT'] or 'EE' not in self.params):
99
+ self.SUBJECT += " on " + self.PGLOG['HOSTNAME']
100
+ self.set_email("{}: {}".format(self.SUBJECT, self.TOPMSG), self.EMLTOP)
101
+ if self.ACTSTR: self.SUBJECT = "{} for {}".format(self.ACTSTR, self.SUBJECT)
102
+ if self.PGSIG['PPID'] > 1: self.SUBJECT += " in CPID {}".format(self.PGSIG['PID'])
103
+ if self.PGLOG['ERRCNT'] > 0: self.SUBJECT += " With Error"
104
+ if self.PGLOG['DSCHECK']:
105
+ self.build_customized_email("dscheck", "einfo", "cindex = {}".format(self.PGLOG['DSCHECK']['cindex']),
106
+ self.SUBJECT, self.PGOPT['wrnlog'])
107
+ elif self.PGOPT['UCNTL']:
108
+ self.build_customized_email("dcupdt", "einfo", "cindex = {}".format(self.PGOPT['UCNTL']['cindex']),
109
+ self.SUBJECT, self.PGOPT['wrnlog'])
110
+ else:
111
+ self.pglog(self.SUBJECT, self.PGOPT['wrnlog']|self.SNDEML)
112
+ if self.PGLOG['DSCHECK']:
113
+ if self.PGLOG['ERRMSG']:
114
+ self.record_dscheck_error(self.PGLOG['ERRMSG'])
115
+ else:
116
+ self.record_dscheck_status("D")
117
+ if self.OPTS[self.PGOPT['CACT']][2]: self.cmdlog() # log end time if not getting only action
118
+
119
+ # delete update control records for given dsid and control indices
120
+ def delete_control_info(self):
121
+ s = 's' if self.ALLCNT > 1 else ''
122
+ self.pglog("Delete {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG)
123
+ delcnt = modcnt = 0
124
+ for i in range(self.ALLCNT):
125
+ cidx = self.lock_update_control(self.params['CI'][i], 2, self.PGOPT['extlog'])
126
+ if cidx <= 0: continue
127
+ ccnd = "cindex = {}".format(cidx)
128
+ delcnt += self.pgdel("dcupdt", ccnd, self.PGOPT['extlog'])
129
+ modcnt += self.pgexec("UPDATE dlupdt SET cindex = 0 WHERE " + ccnd, self.PGOPT['extlog'])
130
+ self.pglog("{} of {} update control record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
131
+ if modcnt > 0:
132
+ s = 's' if modcnt > 1 else ''
133
+ self.pglog("{} associated local file record{} modified".format(modcnt, s), self.PGOPT['wrnlog'])
134
+
135
+ # delete local files for given dsid and locfile indices
136
+ def delete_local_info(self):
137
+ s = 's' if self.ALLCNT > 1 else ''
138
+ self.pglog("Delete {} Locfile record{} ...".format(self.ALLCNT, s), self.WARNLG)
139
+ dcnt = delcnt = 0
140
+ for i in range(self.ALLCNT):
141
+ lidx = self.params['LI'][i]
142
+ lcnd = "lindex = {}".format(lidx)
143
+ if self.lock_update(lidx, None, 2, self.PGOPT['errlog']) <= 0: continue
144
+ cnt = self.pgget("drupdt", "", lcnd, self.PGOPT['extlog'])
145
+ if cnt > 0:
146
+ ss = 's' if cnt > 1 else ''
147
+ self.pglog("Delete {} associated remote file record{} for Locfile index {} ...".format(cnt, ss, lidx), self.WARNLG)
148
+ dcnt += self.pgdel("drupdt", lcnd, self.PGOPT['extlog'])
149
+ delcnt += self.pgdel("dlupdt", lcnd, self.PGOPT['extlog'])
150
+ self.pglog("{} of {} Locfile record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
151
+ if dcnt > 0:
152
+ s = "s" if (dcnt > 1) else ""
153
+ self.pglog("{} associated Remote file record{} deleted too".format(dcnt, s), self.PGOPT['wrnlog'])
154
+
155
+ # delete update remote files for given dsid and remote files/locfile indices
156
+ def delete_remote_info(self):
157
+ s = 's' if self.ALLCNT > 1 else ''
158
+ self.pglog("Delete {} remote file record{} ...".format(self.ALLCNT, s), self.WARNLG)
159
+ self.validate_multiple_options(self.ALLCNT, ["LI", "DO"])
160
+ delcnt = 0
161
+ for i in range(self.ALLCNT):
162
+ lcnd = "lindex = {} AND remotefile = '{}'".format(self.params['LI'][i], self.params['RF'][i])
163
+ if 'DO' in self.params: lcnd += " AND dindex = {}".format(self.params['DO'][i])
164
+ delcnt += self.pgdel("drupdt", lcnd, self.PGOPT['extlog'])
165
+ self.pglog("{} of {} remote file record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
166
+
167
+ # get update control information
168
+ def get_control_info(self):
169
+ tname = "dcupdt"
170
+ hash = self.TBLHASH[tname]
171
+ self.pglog("Get update control info of {} from RDADB ...".format(self.params['DS']), self.WARNLG)
172
+ lens = fnames = None
173
+ if 'FN' in self.params: fnames = self.params['FN']
174
+ fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['dcall'])
175
+ onames = self.params['ON'] if 'ON' in self.params else "C"
176
+ condition = self.file_condition(tname) + self.get_order_string(onames, tname)
177
+ pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
178
+ if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
179
+ self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS']))
180
+ if self.PGOPT['CACT'] == "GA": self.OUTPUT.write("[{}]\n".format(tname.upper()))
181
+ self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
182
+ if pgrecs:
183
+ cnt = self.print_column_format(pgrecs, fnames, hash, lens)
184
+ s = 's' if cnt > 1 else ''
185
+ self.pglog("{} update control record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
186
+ else:
187
+ self.pglog("no update control record retrieved", self.PGOPT['wrnlog'])
188
+
189
+ # get local file update information
190
+ def get_local_info(self):
191
+ tname = "dlupdt"
192
+ hash = self.TBLHASH[tname]
193
+ self.pglog("Get local file update info of {} from RDADB ...".format(self.params['DS']), self.WARNLG)
194
+ lens = fnames = None
195
+ if 'FN' in self.params: fnames = self.params['FN']
196
+ fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['dlall'])
197
+ onames = self.params['ON'] if 'ON' in self.params else "XL"
198
+ condition = self.file_condition(tname) + self.get_order_string(onames, tname)
199
+ pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
200
+ if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
201
+ if self.PGOPT['CACT'] == "GL":
202
+ self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS']))
203
+ else:
204
+ self.OUTPUT.write("[{}]\n".format(tname.upper()))
205
+ self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
206
+ if pgrecs:
207
+ cnt = self.print_column_format(pgrecs, fnames, hash, lens)
208
+ s = 's' if cnt > 1 else ''
209
+ self.pglog("{} locfile record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
210
+ else:
211
+ self.pglog("no locfile record retrieved", self.PGOPT['wrnlog'])
212
+
213
+ # get remote file update information
214
+ def get_remote_info(self):
215
+ tname = "drupdt"
216
+ hash = self.TBLHASH[tname]
217
+ self.pglog("Get remote file update info of {} from RDADB ...".format(self.params['DS']), self.WARNLG)
218
+ lens = fnames = None
219
+ if 'FN' in self.params: fnames = self.params['FN']
220
+ fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['drall'])
221
+ onames = self.params['ON'] if 'ON' in self.params else "LDF"
222
+ condition = self.file_condition(tname) + self.get_order_string(onames, tname)
223
+ pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog'])
224
+ if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash)
225
+ if self.PGOPT['CACT'] == "GR":
226
+ self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS']))
227
+ else:
228
+ self.OUTPUT.write("[{}]\n".format(tname.upper()))
229
+ self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n")
230
+ if pgrecs:
231
+ cnt = self.print_column_format(pgrecs, fnames, hash, lens)
232
+ s = 's' if cnt > 1 else ''
233
+ self.pglog("{} remote file record{} retrieved".format(cnt, s), self.PGOPT['wrnlog'])
234
+ else:
235
+ self.pglog("no remote file record retrieved", self.PGOPT['wrnlog'])
236
+
237
+ # add or modify update control information
238
+ def set_control_info(self):
239
+ tname = 'dcupdt'
240
+ s = 's' if self.ALLCNT > 1 else ''
241
+ self.pglog("Set {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG)
242
+ addcnt = modcnt = 0
243
+ flds = self.get_field_keys(tname, None, 'C')
244
+ if not flds: return self.pglog("Nothing to set for update control!", self.PGOPT['errlog'])
245
+ self.validate_multiple_values(tname, self.ALLCNT, flds)
246
+ fields = self.get_string_fields(flds, tname)
247
+ for i in range(self.ALLCNT):
248
+ cidx = self.params['CI'][i]
249
+ if cidx > 0:
250
+ if self.lock_update_control(cidx, 2, self.PGOPT['errlog']) <= 0: continue
251
+ cnd = "cindex = {}".format(cidx)
252
+ pgrec = self.pgget(tname, fields, cnd, self.PGOPT['errlog'])
253
+ if not pgrec: self.action_error("Error get update control record for " + cnd)
254
+ else:
255
+ pgrec = None
256
+ record = self.build_record(flds, pgrec, tname, i)
257
+ if record:
258
+ if 'pindex' in record and record['pindex'] and not self.pgget("dcupdt", "", "cindex = {}".format(record['pindex'])):
259
+ self.action_error("Parent control Index {} is not in RDADB".format(record['pindex']))
260
+ if 'action' in record and not re.match(r'^({})$'.format(self.PGOPT['UPDTACTS']), record['action']):
261
+ self.action_error("Action Name '{}' must be one of dsupdt Actions ({})".format(record['action'], self.PGOPT['UPDTACTS']))
262
+ if pgrec:
263
+ record['pid'] = 0
264
+ record['lockhost'] = ''
265
+ modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['errlog']|self.DODFLT)
266
+ else:
267
+ record['dsid'] = self.params['DS']
268
+ if 'specialist' not in record: record['specialist'] = self.params['LN']
269
+ addcnt += self.pgadd(tname, record, self.PGOPT['errlog']|self.DODFLT)
270
+ elif cidx: # unlock
271
+ self.lock_update_control(cidx, 0, self.PGOPT['errlog'])
272
+ self.pglog("{}/{} of {} control record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
273
+
274
+ # add or modify local file update information
275
+ def set_local_info(self):
276
+ tname = 'dlupdt'
277
+ s = 's' if self.ALLCNT > 1 else ''
278
+ self.pglog("Set {} local file record{} ...".format(self.ALLCNT, s), self.WARNLG)
279
+ addcnt = modcnt = 0
280
+ flds = self.get_field_keys(tname, None, 'L')
281
+ if 'RO' in self.params and 'XO' not in self.params: flds += 'X'
282
+ if not flds: return self.pglog("Nothing to set for update local file!", self.PGOPT['errlog'])
283
+ self.validate_multiple_values(tname, self.ALLCNT, flds)
284
+ fields = self.get_string_fields(flds, tname)
285
+ for i in range(self.ALLCNT):
286
+ lidx = self.params['LI'][i]
287
+ if lidx > 0:
288
+ if self.lock_update(lidx, None, 2, self.PGOPT['errlog']) <= 0: continue
289
+ cnd = "lindex = {}".format(lidx)
290
+ pgrec = self.pgget(tname, fields, cnd, self.PGOPT['errlog'])
291
+ if not pgrec: self.action_error("Error get Local file record for " + cnd)
292
+ else:
293
+ pgrec = None
294
+ if 'RO' in self.params: self.params['XO'][i] = self.get_next_exec_order(self.params['DS'], 0)
295
+ record = self.build_record(flds, pgrec, tname, i)
296
+ if record:
297
+ if 'cindex' in record and record['cindex'] and not self.pgget("dcupdt", "", "cindex = {}".format(record['cindex'])):
298
+ self.action_error("Update control Index {} is not in RDADB".format(record['cindex']))
299
+ if 'action' in record and not re.match(r'^({})$'.format(self.PGOPT['ARCHACTS']), record['action']):
300
+ self.action_error("Action Name '{}' must be one of dsarch Actions ({})".format(record['action'], self.PGOPT['ARCHACTS']))
301
+ if pgrec:
302
+ if 'VI' in record and not record['VI'] and pgrec['missdate']: record['missdate'] = record['misshour'] = None
303
+ record['pid'] = 0
304
+ record['hostname'] = 0
305
+ modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['errlog']|self.DODFLT)
306
+ else:
307
+ record['dsid'] = self.params['DS']
308
+ if 'specialist' not in record: record['specialist'] = self.params['LN']
309
+ if 'execorder' not in record: record['execorder'] = self.get_next_exec_order(self.params['DS'], 1)
310
+ addcnt += self.pgadd(tname, record, self.PGOPT['errlog']|self.DODFLT)
311
+ elif lidx: # unlock
312
+ self.lock_update(lidx, None, 0, self.PGOPT['errlog'])
313
+ self.pglog("{}/{} of {} Locfile record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
314
+
315
+ # add or modify remote file update information
316
+ def set_remote_info(self):
317
+ tname = 'drupdt'
318
+ s = 's' if self.ALLCNT > 1 else ''
319
+ self.pglog("Set {} update remote file{} ...".format(self.ALLCNT, s), self.WARNLG)
320
+ addcnt = modcnt = 0
321
+ flds = self.get_field_keys(tname)
322
+ if not flds: return self.pglog("Nothing to set for update remote file!", self.PGOPT['errlog'])
323
+ self.validate_multiple_values(tname, self.ALLCNT, flds)
324
+ fields = self.get_string_fields(flds, tname)
325
+ for i in range(self.ALLCNT):
326
+ lidx = self.params['LI'][i]
327
+ didx = self.params['DO'][i] if 'DO' in self.params else 0
328
+ cnd = "lindex = {} AND remotefile = '{}' AND dindex = {}".format(lidx, self.params['RF'][i], didx)
329
+ pgrec = self.pgget("drupdt", fields, cnd, self.PGOPT['errlog'])
330
+ record = self.build_record(flds, pgrec, tname, i)
331
+ if record:
332
+ if 'lindex' in record and record['lindex'] and not self.pgget("dlupdt", "", "lindex = {}".format(record['lindex'])):
333
+ self.action_error("Local file Index {} is not in RDADB".format(record['lindex']))
334
+ if pgrec:
335
+ modcnt += self.pgupdt("drupdt", record, cnd, self.PGOPT['errlog']|self.DODFLT)
336
+ else:
337
+ record['lindex'] = lidx
338
+ record['dsid'] = self.params['DS']
339
+ addcnt += self.pgadd("drupdt", record, self.PGOPT['errlog']|self.DODFLT)
340
+ self.pglog("{}/{} of {} remote file record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog'])
341
+
342
+ # unlock update records for given locfile indices
343
+ def unlock_update_info(self):
344
+ s = 's' if self.ALLCNT > 1 else ''
345
+ self.pglog("Unlock {} update locfile{} ...".format(self.ALLCNT, s), self.WARNLG)
346
+ modcnt = 0
347
+ for lidx in self.params['LI']:
348
+ cnd = "lindex = {}".format(lidx)
349
+ pgrec = self.pgget("dlupdt", "pid, hostname", cnd, self.PGOPT['extlog'])
350
+ if not pgrec:
351
+ self.pglog("{}: Local File Not exists".format(lidx), self.PGOPT['errlog'])
352
+ elif not pgrec['pid']:
353
+ self.pglog("{}: Local File Not locked".format(lidx), self.PGOPT['wrnlog'])
354
+ elif self.lock_update(lidx, None, -1, self.PGOPT['errlog']) > 0:
355
+ modcnt += 1
356
+ self.pglog("{}: Local File Unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog'])
357
+ elif (self.check_host_down(None, pgrec['hostname']) and
358
+ self.lock_update(lidx, None, -2, self.PGOPT['errlog']) > 0):
359
+ modcnt += 1
360
+ self.pglog("{}: Local File Force unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog'])
361
+ else:
362
+ self.pglog("{}: Local File Unable to unlock {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog'])
363
+ self.pglog("{} of {} local file record{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN)
364
+
365
+ # unlock update control records for given locfile indices
366
+ def unlock_control_info(self):
367
+ s = 's' if self.ALLCNT > 1 else ''
368
+ self.pglog("Unlock {} update control{} ...".format(self.ALLCNT, s), self.WARNLG)
369
+ modcnt = 0
370
+ for cidx in self.params['CI']:
371
+ pgrec = self.pgget("dcupdt", "pid, lockhost", "cindex = {}".format(cidx), self.PGOPT['extlog'])
372
+ if not pgrec:
373
+ self.pglog("{}: Update Control Not exists".format(cidx), self.PGOPT['errlog'])
374
+ elif not pgrec['pid']:
375
+ self.pglog("{}: Update Control Not locked".format(cidx), self.PGOPT['wrnlog'])
376
+ elif self.lock_update_control(cidx, -1, self.PGOPT['extlog']) > 0:
377
+ modcnt += 1
378
+ self.pglog("{}: Update Control Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
379
+ elif (self.check_host_down(None, pgrec['lockhost']) and
380
+ self.lock_update_control(cidx, -2, self.PGOPT['extlog']) > 0):
381
+ modcnt += 1
382
+ self.pglog("{}: Update Control Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
383
+ else:
384
+ self.pglog("{}: Undate Control Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog'])
385
+ self.pglog("{} of {} update control record{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN)
386
+
387
+ # get update info of local and remote files owned by login name
388
+ def get_update_info(self):
389
+ if 'DS' in self.params:
390
+ dsids = {'dsid': [self.params['DS']]}
391
+ dscnt = 1
392
+ else:
393
+ tname = "dlupdt"
394
+ cnd = self.file_condition(tname, None, None, 1)
395
+ if not cnd:
396
+ self.set_default_value("SN", self.params['LN'])
397
+ cnd = self.file_condition(tname, None, None, 1)
398
+ dsids = self.pgmget(tname, "DISTINCT dsid", cnd, self.PGOPT['extlog'])
399
+ dscnt = len(dsids['dsid']) if dsids else 0
400
+ if dscnt == 0:
401
+ return self.pglog("NO dataset identified for giving condition", self.PGOPT['wrnlog'])
402
+ elif dscnt > 1:
403
+ self.pglog("Get Update Info for {} datasets".format(dscnt), self.PGOPT['wrnlog'])
404
+ self.PGOPT['AUTODS'] = dscnt
405
+ for i in range(dscnt):
406
+ self.params['DS'] = dsids['dsid'][i]
407
+ if self.PGOPT['ACTS'] == self.OPTS['GC'][0]:
408
+ self.get_control_info()
409
+ elif self.PGOPT['ACTS'] == self.OPTS['GL'][0]:
410
+ self.get_local_info()
411
+ elif self.PGOPT['ACTS'] == self.OPTS['GR'][0]:
412
+ self.get_remote_info()
413
+ else:
414
+ if 'ON' in self.params: del self.params['ON'] # use default order string
415
+ if 'FN' not in self.params: self.params['FN'] = 'ALL'
416
+ if self.PGOPT['ACTS']&self.OPTS['GC'][0]: self.get_control_info()
417
+ if self.PGOPT['ACTS']&self.OPTS['GL'][0]: self.get_local_info()
418
+ if self.PGOPT['ACTS']&self.OPTS['GR'][0]: self.get_remote_info()
419
+ if dscnt > 1: self.pglog("Update Info of {} datasets retrieved".format(dscnt), self.PGOPT['wrnlog'])
420
+
421
+ # gather due datasets for data update
422
+ def dataset_update(self):
423
+ actcnd = "specialist = '{}'".format(self.params['LN'])
424
+ if self.PGOPT['ACTS']&self.OPTS['AF'][0]: actcnd += " AND action IN ('AW', 'AS', 'AQ')"
425
+ (self.PGOPT['CURDATE'], self.PGOPT['CURHOUR']) = self.curdatehour()
426
+ if 'CD' not in self.params: self.params['CD'] = self.PGOPT['CURDATE'] # default to current date
427
+ if 'CH' not in self.params: self.params['CH'] = self.PGOPT['CURHOUR'] # default to current hour
428
+ if self.ALLCNT > 1 and self.params['MU']: del self.params['MU']
429
+ if 'CN' in self.params and 'RD' in self.params: del self.params['CN']
430
+ if 'CN' in self.params or 'RD' in self.params or 'RA' in self.params:
431
+ if 'MO' in self.params: del self.params['MO']
432
+ elif 'MO' not in self.params and self.PGOPT['CACT'] == "UF":
433
+ self.params['MO'] = -1
434
+ if 'DS' in self.params:
435
+ dsids = [self.params['DS']]
436
+ dscnt = 1
437
+ else:
438
+ if 'CI' not in self.params: actcnd += " AND cindex = 0"
439
+ loccnd = self.file_condition('dlupdt', "LQFIXA", None, 1)
440
+ dscnd = actcnd
441
+ if loccnd: dscnd += " AND " + loccnd
442
+ pgrecs = self.pgmget("dlupdt", "DISTINCT dsid", dscnd, self.PGOPT['extlog'])
443
+ dsids = pgrecs['dsid'] if pgrecs else []
444
+ dscnt = len(dsids)
445
+ if not dscnt: return self.pglog("NO dataset is due for update on {} for {}".format(self.params['CD'], self.params['LN']), self.PGOPT['wrnlog'])
446
+ self.PGOPT['AUTODS'] = dscnt
447
+ actcnd += " ORDER BY execorder, lindex"
448
+ if self.PGLOG['DSCHECK']:
449
+ fcnt = 0
450
+ for i in range(dscnt):
451
+ self.params['DS'] = dsids[i]
452
+ loccnd = self.file_condition('dlupdt', "LQFIXA")
453
+ locrecs = self.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), self.PGOPT['extlog'])
454
+ loccnt = len(locrecs['locfile']) if locrecs else 0
455
+ if loccnt == 0: continue
456
+ for j in range(loccnt):
457
+ locrec = self.onerecord(locrecs, j)
458
+ if (loccnt == 1 and 'LI' in self.params and 'LF' in self.params and
459
+ len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']):
460
+ locrec['locfile'] = self.params['LF'][0]
461
+ fcnt += self.file_update(locrec, self.LOGWRN, 1)
462
+ self.set_dscheck_fcount(fcnt, self.LOGERR)
463
+ # check and update data for each dataset
464
+ logact = self.PGOPT['emllog']
465
+ acnt = ucnt = 0
466
+ for i in range(dscnt):
467
+ self.params['DS'] = dsids[i]
468
+ loccnd = self.file_condition('dlupdt', "LQFIXA")
469
+ locrecs = self.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), self.PGOPT['extlog'])
470
+ loccnt = len(locrecs['locfile']) if locrecs else 0
471
+ if loccnt == 0:
472
+ s = "-UC{}".format(self.params['CI'][0]) if ('CI' in self.params and len(self.params['CI']) == 1) else ""
473
+ self.pglog("{}{}: no config record of local file found to update for '{}'".format(self.params['DS'], s, self.params['LN']), self.PGOPT['wrnlog'])
474
+ continue
475
+ s = 's' if loccnt > 1 else ''
476
+ self.pglog("{}: {} for {} update record{}".format(self.params['DS'], self.PGOPT['CACT'], loccnt, s), logact)
477
+ logact = self.PGOPT['emlsep']
478
+ for j in range(loccnt):
479
+ locrec = self.onerecord(locrecs, j)
480
+ if (loccnt == 1 and 'LI' in self.params and 'LF' in self.params and
481
+ len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']):
482
+ locrec['locfile'] = self.params['LF'][0]
483
+ if locrec['cindex']:
484
+ if 'CI' not in self.params:
485
+ self.params['CI'] = [locrec['cindex']]
486
+ self.cache_update_control(locrec['cindex'], 0)
487
+ if 'CN' in self.params and 'RD' in self.params: del self.params['CN']
488
+ if 'CN' in self.params or 'RD' in self.params or 'RA' in self.params:
489
+ if 'MO' in self.params: del self.params['MO']
490
+ elif 'MO' not in self.params and self.PGOPT['CACT'] == "UF":
491
+ self.params['MO'] = -1
492
+ elif locrec['cindex'] != self.params['CI'][0]:
493
+ self.pglog("{}-{}: Skipped due to control index {} mismatches {}".format(self.params['DS'], locrec['lindex'], locrec['cindex'], self.params['CI'][0]), self.PGOPT['emlerr'])
494
+ continue
495
+ self.PGOPT['rstat'] = 1 # reset remote download status for each local file
496
+ if self.PGSIG['MPROC'] > 1: acnt += 1
497
+ fcnt = self.file_update(locrec, logact)
498
+ if self.PGSIG['PPID'] > 1:
499
+ if self.PGOPT['AUTODS'] > 1: self.PGOPT['AUTODS'] = dscnt = 1
500
+ acnt = ucnt = 0 # reinitialize counts for child process
501
+ break # stop loop in child
502
+ if self.PGSIG['MPROC'] > 1:
503
+ if fcnt == 0:
504
+ break # quit
505
+ else:
506
+ if fcnt > 0: ucnt += 1 # record update count, s is either -1 or 1
507
+ continue # non-daemon parent
508
+ if 'QE' in self.params and fcnt <= 0: break
509
+ if self.PGOPT['vcnt'] > 0:
510
+ self.renew_internal_version(self.params['DS'], self.PGOPT['vcnt'])
511
+ self.PGOPT['vcnt'] = 0
512
+ if self.PGSIG['MPROC'] > 1:
513
+ if not self.PGSIG['QUIT'] and j == loccnt: continue
514
+ break
515
+ if self.PGOPT['rcnt']:
516
+ if self.PGOPT['CACT'] == "DR":
517
+ acnt += self.PGOPT['rcnt']
518
+ ucnt += self.PGOPT['dcnt']
519
+ s = 's' if self.PGOPT['rcnt'] > 1 else ''
520
+ if loccnt > 1:
521
+ self.pglog("{}: {} of {} rfile{} gotten!".format(self.params['DS'], self.PGOPT['dcnt'], self.PGOPT['rcnt'], s), self.PGOPT['emllog'])
522
+ self.PGOPT['rcnt'] = self.PGOPT['dcnt'] = 0
523
+ if self.PGOPT['lcnt']:
524
+ if self.PGOPT['CACT'] == "BL" or self.PGOPT['CACT'] == "PB":
525
+ acnt += self.PGOPT['lcnt']
526
+ ucnt += self.PGOPT['bcnt']
527
+ s = 's' if self.PGOPT['lcnt'] > 1 else ''
528
+ if loccnt > 1 and self.PGOPT['bcnt'] > 0:
529
+ self.pglog("{}: {} of {} lfile{} built!".format(self.params['DS'], self.PGOPT['bcnt'], self.PGOPT['lcnt'], s), self.PGOPT['emllog'])
530
+ self.PGOPT['lcnt'] = self.PGOPT['bcnt'] = 0
531
+ if self.PGOPT['acnt']:
532
+ acnt += self.PGOPT['acnt']
533
+ ucnt += self.PGOPT['ucnt']
534
+ s = 's' if self.PGOPT['acnt'] > 1 else ''
535
+ self.pglog("{}: {} of {} local file{} archived!".format(self.params['DS'], self.PGOPT['ucnt'], self.PGOPT['acnt'], s),
536
+ (self.PGOPT['emlsum'] if dscnt > 1 else self.PGOPT['emllog']))
537
+ self.PGOPT['acnt'] = self.PGOPT['ucnt'] = 0
538
+ if self.PGSIG['PPID'] > 1: break # stop loop child
539
+ if acnt > 0:
540
+ self.TOPMSG = detail = ""
541
+ if self.PGSIG['MPROC'] > 1:
542
+ s = 's' if acnt > 1 else ''
543
+ self.ACTSTR = "{} of {} CPIDs{} for 'dsupdt {}' started".format(ucnt, acnt, s, self.PGOPT['CACT'])
544
+ else:
545
+ s = 's' if ucnt > 1 else ''
546
+ self.TOPMSG = ""
547
+ if self.PGOPT['CACT'] == "DR":
548
+ atype = "remote file{} gotten".format(s)
549
+ elif self.PGOPT['CACT'] == "BL" or self.PGOPT['CACT'] == "PB":
550
+ atype = "local file{} built".format(s)
551
+ else:
552
+ atype = "local file{} archived".format(s)
553
+ if self.PGOPT['rdcnt'] > 0:
554
+ s = 's' if self.PGOPT['rdcnt'] > 1 else ''
555
+ self.TOPMSG = "{} remote server file{} downloaded and ".format(self.PGOPT['rdcnt'], s)
556
+ if self.PGOPT['udcnt'] > 0:
557
+ if detail: detail += " & "
558
+ detail += "{} Web Online".format(self.PGOPT['udcnt'])
559
+ if self.PGOPT['uncnt'] > 0:
560
+ if detail: detail += " & "
561
+ detail += "{} Glade Only".format(self.PGOPT['uncnt'])
562
+ if self.PGOPT['uwcnt'] > 0:
563
+ if detail: detail += " & "
564
+ detail += "{} Web".format(self.PGOPT['uwcnt'])
565
+ if self.PGOPT['uscnt'] > 0:
566
+ if detail: detail += " & "
567
+ detail += "{} Saved".format(self.PGOPT['uscnt'])
568
+ if self.PGOPT['qbcnt'] > 0:
569
+ if detail: detail += " & "
570
+ detail += "{} Quasar Backup".format(self.PGOPT['qbcnt'])
571
+ if self.PGOPT['qdcnt'] > 0:
572
+ if detail: detail += " & "
573
+ detail += "{} Quasar Drdata".format(self.PGOPT['qdcnt'])
574
+ self.ACTSTR = "{} {}".format(ucnt, atype)
575
+ self.TOPMSG += self.ACTSTR
576
+ if detail: self.TOPMSG += " ({})".format(detail)
577
+ if dscnt > 1:
578
+ self.pglog("{} datasets: {}".format(dscnt, self.TOPMSG), self.PGOPT['emlsum'])
579
+ self.SUBJECT = "DSUPDT of "
580
+ if self.PGOPT['AUTODS'] < 2:
581
+ self.SUBJECT += self.params['DS'].upper()
582
+ else:
583
+ self.SUBJECT += "{} Datasets".format(self.PGOPT['AUTODS'])
584
+ if self.PGOPT['UCNTL']:
585
+ self.reset_control_time()
586
+ if self.SUBJECT: self.SUBJECT += "-C{}".format(self.PGOPT['UCNTL']['cindex'])
587
+
588
+ # renew internal version number for given dataset
589
+ def renew_internal_version(self, dsid, vcnt):
590
+ s = 's' if vcnt > 1 else ''
591
+ cmd = "dsarch {} SV -NV -DE '{} Data file{} rearchived'".format(dsid, vcnt, s)
592
+ if self.pgsystem(cmd, self.PGOPT['emerol'], 5): # 1 + 4
593
+ pgrec = self.pgget('dsvrsn', '*', "dsid = '{}' and status = 'A'".format(dsid), self.PGOPT['emerol'])
594
+ if pgrec:
595
+ vmsg = "set to {} for DOI {}".format(pgrec['iversion'], pgrec['doi'])
596
+ else:
597
+ vmsg = 'renewed'
598
+ self.pglog("{}: {} Data file{} rearchived, Internal version number {}".format(dsid, vcnt, s, vmsg), self.PGOPT['emlsum'])
599
+
600
+ # cach the total count of files to be archived
601
+ def count_caching(self, locrec, locinfo):
602
+ files = self.expand_serial_pattern(locrec['locfile'])
603
+ scnt = len(files) if files else 1
604
+ if self.ALLCNT > 1:
605
+ ecnt = self.ALLCNT
606
+ else:
607
+ tinfo = self.TEMPINFO[locrec['lindex']] = self.get_tempinfo(locrec, locinfo, 0)
608
+ ecnt = len(tinfo['ED']) if tinfo else 1
609
+ return ecnt * scnt
610
+
611
+ # gather/archive due data file for update of each local file
612
+ def file_update(self, locrec, logact, caching = 0):
613
+ lfile = locrec['locfile']
614
+ endonly = retcnt = 0
615
+ lindex = locrec['lindex']
616
+ loccnd = "lindex = {}".format(lindex)
617
+ locinfo = "{}-L{}".format(locrec['dsid'], lindex)
618
+ if not lfile:
619
+ if caching:
620
+ return None
621
+ else:
622
+ return self.pglog(locinfo + ": local file name NOT specified", self.PGOPT['emlerr'])
623
+ locinfo += "-" + lfile
624
+ if locrec['specialist'] != self.params['LN']:
625
+ if caching:
626
+ return None
627
+ else:
628
+ return self.pglog("{}: owner '{}', NOT '{}'".format(locinfo, locrec['specialist'], self.params['LN']), self.PGOPT['emlerr'])
629
+ if caching: return self.count_caching(locrec, locinfo)
630
+ tempinfo = self.TEMPINFO[lindex] if lindex in self.TEMPINFO else self.get_tempinfo(locrec, locinfo, 0)
631
+ if not tempinfo: return 0 # simply return if miss temporal info for update
632
+ rmtcnd = loccnd
633
+ rcnd = self.file_condition('drupdt', ('D' if 'DO' in self.params else "RS"), None, 1)
634
+ if rcnd: rmtcnd += " AND " + rcnd
635
+ rmtrecs = self.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", self.PGOPT['extlog'])
636
+ rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0
637
+ if rcnt == 0:
638
+ if rcnd and self.pgget("drupdt", "", loccnd):
639
+ return self.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), self.PGOPT['emlerr'])
640
+ # create a empty record remote file
641
+ rcnt = 1
642
+ rmtrecs = {'lindex': [lindex], 'dindex': [0]}
643
+ rflds = ['remotefile', 'serverfile', 'download', 'begintime', 'endtime', 'tinterval']
644
+ for rfld in rflds: rmtrecs[rfld] = [None]
645
+ if rcnt == 1:
646
+ if 'RF' in self.params and len(self.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and self.params['RF'][0] == rmtrecs['remotefile'][0]):
647
+ rmtrecs['remotefile'][0] = self.params['RF'][0]
648
+ if 'SF' in self.params and len(self.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and self.params['SF'][0] == rmtrecs['serverfile'][0]):
649
+ rmtrecs['serverfile'][0] = self.params['SF'][0]
650
+ ecnt = self.ALLCNT if self.ALLCNT > 1 else len(tempinfo['ED']) # should be at least one
651
+ if self.PGSIG['MPROC'] > 1:
652
+ pname = "updt{}".format(lindex)
653
+ pid = self.start_child(pname, self.PGOPT['wrnlog'], 1) # try to start a child process
654
+ if pid <= 0: return pid # failed to start a child process
655
+ if self.PGSIG['PPID'] > 1:
656
+ self.set_email() # empty email in child process
657
+ self.PGOPT['acnt'] = self.PGOPT['ucnt'] = 0
658
+ else:
659
+ edate = tempinfo['ED'][0]
660
+ ehour = tempinfo['EH'][0]
661
+ lfile = self.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ'])
662
+ locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile)
663
+ if ecnt > 1: locinfo += ", {} Update Periods".format(ecnt)
664
+ self.pglog("CPID {} for 'dsupdt {}' of {}".format(self.pname2cpid(pname), self.PGOPT['CACT'], locinfo), self.PGOPT['emllog'])
665
+ return 1 # no further action in non-daemon program
666
+ if self.lock_update(lindex, locinfo, 1, self.PGOPT['emllog']) <= 0: return 0
667
+ self.PGOPT['lindex'] = lindex
668
+ tempinfo['prcmd'] = self.params['PR'][0] if 'PR' in self.params else locrec['processremote']
669
+ tempinfo['blcmd'] = self.params['BC'][0] if 'BC' in self.params else locrec['buildcmd']
670
+ postcnt = -1
671
+ if self.PGOPT['UCNTL'] and self.PGOPT['CACT'] == self.PGOPT['UCNTL']['action']:
672
+ tempinfo['postcmd'] = self.params['XC'][0] if 'XC' in self.params else self.PGOPT['UCNTL']['execcmd']
673
+ if tempinfo['postcmd']: postcnt = 0
674
+ setmiss = 1 if tempinfo['VD'] else 0
675
+ ufile = uinfo = None
676
+ rscnt = ucnt = lcnt = 0
677
+ for i in range(ecnt):
678
+ if self.ALLCNT > 1 and i > 0:
679
+ tempinfo = self.get_tempinfo(locrec, locinfo, i)
680
+ if not tempinfo: break
681
+ edate = tempinfo['ED'][0]
682
+ ehour = tempinfo['EH'][0]
683
+ else:
684
+ edate = tempinfo['ED'][i]
685
+ ehour = tempinfo['EH'][i]
686
+ if 'RE' in self.params and i and self.diffdatehour(edate, ehour, tempinfo['edate'], tempinfo['ehour']) <= 0:
687
+ continue
688
+ if ucnt and tempinfo['RS'] == 1 and i%20 == 0: self.refresh_metadata(locrec['dsid'])
689
+ tempinfo['edate'] = edate
690
+ if ehour != None:
691
+ tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour)
692
+ tempinfo['ehour'] = ehour
693
+ else:
694
+ tempinfo['einfo'] = "end data date {}".format(edate)
695
+ tempinfo['ehour'] = None
696
+ if 'GZ' in self.params: tempinfo['einfo'] += "(UTC)"
697
+ locfiles = self.get_local_names(locrec['locfile'], tempinfo)
698
+ lcnt = len(locfiles) if locfiles else 0
699
+ if not lcnt: break
700
+ rmtcnt = acnt = ccnt = ut = 0
701
+ rfiles = rfile = None
702
+ if tempinfo['RS'] == 0 and lcnt > 2: tempinfo['RS'] = 1
703
+ for l in range(lcnt):
704
+ if self.PGLOG['DSCHECK'] and ((l+1)%20) == 0:
705
+ self.add_dscheck_dcount(20, 0, self.PGOPT['extlog'])
706
+ lfile = locfiles[l]
707
+ locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile)
708
+ tempinfo['gotnew'] = tempinfo['archived'] = 0
709
+ tempinfo['ainfo'] = None
710
+ tempinfo['ainfo'] = self.file_archive_info(lfile, locrec, tempinfo)
711
+ if not tempinfo['ainfo']: continue
712
+ if tempinfo['ainfo']['archived'] == tempinfo['ainfo']['archcnt']:
713
+ ufile = "{} at {} {}".format(lfile, tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime'])
714
+ tempinfo['archived'] = 1
715
+ if 'MO' in self.params:
716
+ if self.params['MO'] < 0:
717
+ self.pglog("{}: {} already for {}".format(locinfo, self.PGOPT['CACT'], tempinfo['einfo']), self.PGOPT['emlsum'])
718
+ if i == 0: self.pglog("Add Mode option -RA if you want to re-archive", self.PGOPT['wrnlog'])
719
+ if 'UT' in self.params or 'ED' not in self.params: ut = 1
720
+ retcnt += 1
721
+ continue
722
+ else:
723
+ if self.PGOPT['ACTS']&self.OPTS['AF'][0]: uinfo = locinfo
724
+ self.pglog("{}: {} for {}".format(locinfo, self.PGOPT['CACT'], tempinfo['einfo']), logact)
725
+ if not self.change_workdir(locrec['workdir'], locinfo, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']):
726
+ break
727
+ if self.PGOPT['ACTS']&self.OPTS['AF'][0]: self.PGOPT['acnt'] += 1
728
+ if self.PGOPT['ACTS']&self.OPTS['BL'][0]: self.PGOPT['lcnt'] += 1
729
+ opt = 1 if tempinfo['AQ'] else 65 # 1+64(remove small file)
730
+ linfo = self.check_local_file(lfile, opt, self.PGOPT['emerol'])
731
+ cnt = -1
732
+ if rmtcnt > 0:
733
+ cnt = rmtcnt
734
+ rfile = rfiles[l]
735
+ else:
736
+ dr = 1 if self.PGOPT['ACTS']&self.OPTS['PB'][0] else 0
737
+ if linfo and self.PGOPT['CACT'] == "BL" and not tempinfo['prcmd']: dr = 0 # skip download for BL only
738
+ if dr:
739
+ dfiles = None
740
+ for j in range(rcnt): # processs each remote record
741
+ pgrec = self.onerecord(rmtrecs, j)
742
+ if dfiles and pgrec['remotefile'] == rfile and not self.PGOPT['mcnt']:
743
+ continue # skip
744
+ rfile = pgrec['remotefile']
745
+ act = 0 if locrec['action'] == 'AQ' else self.PGOPT['ACTS']&self.OPTS['DR'][0]
746
+ dfiles = self.download_remote_files(pgrec, lfile, linfo, locrec, locinfo, tempinfo, act)
747
+ if self.PGOPT['rstat'] < 0:
748
+ i = ecnt
749
+ break
750
+ if dfiles: rfiles = self.joinarray(rfiles, dfiles)
751
+ rmtcnt = len(rfiles) if rfiles else 0
752
+ if rmtcnt > 0:
753
+ if lcnt > 1 and rmtcnt != lcnt:
754
+ self.pglog("{}: {} files found for {} local files".format(locrec['locinfo'], rmtcnt, lcnt), self.PGOPT['emlerr'])
755
+ i = ecnt
756
+ break
757
+ cnt = rmtcnt
758
+ rfile = rfiles[l] if lcnt > 1 else rfiles[rmtcnt-1] # record the break remote file name
759
+ else:
760
+ rfile = None
761
+ if linfo and self.PGOPT['rstat'] == 0: self.PGOPT['rstat'] = 1
762
+ if cnt != 0 and self.PGOPT['rstat'] > 0:
763
+ if self.PGOPT['ACTS']&(self.OPTS['BL'][0]|self.OPTS['AF'][0]):
764
+ if cnt < 0 and linfo:
765
+ if tempinfo['archived'] and self.PGOPT['CACT'] == "UF" and not tempinfo['gotnew']:
766
+ if self.PGOPT['ACTS']&self.OPTS['AF'][0] and 'RA' not in self.params:
767
+ self.pglog(lfile + ": local file archived already", self.PGOPT['emllog'])
768
+ cnt = 0
769
+ else:
770
+ if self.PGOPT['ACTS']&self.OPTS['BL'][0]:
771
+ self.pglog(lfile + ": local file exists already", self.PGOPT['emllog'])
772
+ cnt = 1
773
+ elif rmtcnt == lcnt and lfile == rfile:
774
+ if self.PGOPT['ACTS']&self.OPTS['BL'][0]:
775
+ self.pglog(lfile + ": local file same as remote file", self.PGOPT['emllog'])
776
+ elif not (self.PGOPT['ACTS']&self.OPTS['BL'][0]):
777
+ self.pglog(lfile + ": local file not built yet", self.PGOPT['emlerr'])
778
+ cnt = 0
779
+ else:
780
+ cnt = self.build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l)
781
+ if cnt and 'lfile' in tempinfo:
782
+ lfile = tempinfo['lfile']
783
+ del tempinfo['lfile']
784
+ if cnt != 0 and (self.PGOPT['ACTS']&self.OPTS['AF'][0]):
785
+ self.file_status_info(lfile, rfile, tempinfo)
786
+ cnt = self.archive_data_file(lfile, locrec, tempinfo, i)
787
+ if cnt > 0:
788
+ ucnt += 1
789
+ if tempinfo['RS'] == 1: rscnt += 1
790
+ if postcnt > -1: postcnt += 1
791
+ elif cnt > 0:
792
+ cnt = 0
793
+ if cnt > 0 and self.PGOPT['rstat'] > 0:
794
+ ccnt += 1
795
+ elif 'UT' in self.params or tempinfo['archived']:
796
+ ut = 1
797
+ if cnt > 0: acnt += 1
798
+ if self.PGLOG['DSCHECK']:
799
+ self.add_dscheck_dcount(lcnt%20, 0, self.PGOPT['extlog'])
800
+ if ccnt == lcnt and (self.PGOPT['ACTS']&self.OPTS['CF'][0]) and locrec['cleancmd']:
801
+ if tempinfo['CVD'] and self.diffdate(edate, tempinfo['CVD']) > 0:
802
+ self.clean_older_files(locrec['cleancmd'], locrec['workdir'], locinfo, tempinfo['CVD'], locrec['locfile'], rmtrecs, rcnt, tempinfo)
803
+ else:
804
+ if not rfiles and rcnt and locrec['cleancmd'].find(' -RF') > -1:
805
+ rfiles = self.get_all_remote_files(rmtrecs, rcnt, tempinfo, edate)
806
+ self.clean_files(locrec['cleancmd'], edate, ehour, locfiles, rfiles, tempinfo['FQ'])
807
+ if self.PGOPT['ACTS']&self.OPTS['AF'][0] or self.PGOPT['UCNTL'] and self.PGOPT['CACT'] == self.PGOPT['UCNTL']['action']:
808
+ rmonly = 1 if self.PGOPT['rstat'] > 0 else 0
809
+ if ccnt == lcnt:
810
+ self.reset_update_time(locinfo, locrec, tempinfo, ccnt, endonly)
811
+ elif ut:
812
+ self.reset_update_time(locinfo, locrec, tempinfo, acnt, endonly)
813
+ else:
814
+ if self.PGOPT['rstat'] == 0:
815
+ if tempinfo['VD'] and self.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) < 0:
816
+ self.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update
817
+ self.PGOPT['rstat'] = 1 # reset remote download status
818
+ elif 'IE' in self.params:
819
+ if tempinfo['VD'] and self.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) >= 0:
820
+ endonly = 1
821
+ self.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update
822
+ self.PGOPT['rstat'] = 1 # reset remote download status
823
+ if setmiss: setmiss = self.set_miss_time(lfile, locrec, tempinfo, rmonly)
824
+ if postcnt > 0:
825
+ postcmd = self.executable_command(self.replace_pattern(tempinfo['postcmd'], edate, ehour, tempinfo['FQ']),
826
+ lfile, self.params['DS'], edate, ehour)
827
+ self.pgsystem(postcmd, self.PGOPT['emllog'], 5)
828
+ postcnt = 0
829
+ if rscnt >= self.PGOPT['RSMAX']:
830
+ self.refresh_metadata(locrec['dsid'])
831
+ rscnt = 0
832
+ if self.PGOPT['rstat'] < -1 or self.PGOPT['rstat'] < 0 and 'QE' in self.params: break # unrecoverable errors
833
+ if rscnt > 0: self.refresh_metadata(locrec['dsid'])
834
+ if ufile and uinfo and ucnt == 0:
835
+ self.pglog("{}: Last successful update - {}".format(uinfo, ufile), self.PGOPT['emlsum'])
836
+ self.lock_update(lindex, locinfo, 0, self.PGOPT['errlog'])
837
+ self.PGOPT['lindex'] = 0
838
+ return retcnt
839
+
840
+ # refresh the gathered metadata with speed up option -R and -S
841
+ def refresh_metadata(self, dsid):
842
+ sx = "{} -d {} -r".format(self.PGOPT['scm'], dsid)
843
+ if self.PGOPT['wtidx']:
844
+ if 0 in self.PGOPT['wtidx']:
845
+ self.pgsystem(sx + 'w all', self.PGOPT['emllog'], 5)
846
+ else:
847
+ for tidx in self.PGOPT['wtidx']:
848
+ self.pgsystem("{}w {}".format(sx, tidx), self.PGOPT['emllog'], 5)
849
+ self.PGOPT['wtidx'] = {}
850
+
851
+ # retrieve remote files# act: > 0 - create filenames and get data files physically; 0 - create filenames only
852
+ def download_remote_files(self, rmtrec, lfile, linfo, locrec, locinfo, tempinfo, act = 0):
853
+ emlsum = self.PGOPT['emlsum'] if self.PGOPT['CACT'] == "DR" else self.PGOPT['emllog']
854
+ rfile = rmtrec['remotefile']
855
+ rmtinfo = locinfo
856
+ dfiles = []
857
+ if not rfile:
858
+ rfile = lfile
859
+ rcnt = 1
860
+ if rfile != locrec['locfile']: rmtinfo += "-" + rfile
861
+ if act:
862
+ tempinfo['DC'] = (self.params['DC'][0] if 'DC' in self.params and self.params['DC'][0] else
863
+ (rmtrec['download'] if rmtrec['download'] else locrec['download']))
864
+ rfiles = self.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo)
865
+ rcnt = len(rfiles) if rfiles else 0
866
+ if rcnt == 0:
867
+ self.PGOPT['rstat'] = -2
868
+ return self.pglog(rmtinfo + ": NO remote file name identified", self.PGOPT['emlerr'])
869
+ self.PGOPT['rcnt'] += rcnt # accumulate remote file counts
870
+ if tempinfo['DC']: tempinfo['DC'] = None
871
+ if act: # get file names on remote server and create download command
872
+ sfile = rmtrec['serverfile']
873
+ if sfile and sfile != rfile:
874
+ sfiles = self.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo)
875
+ scnt = len(sfiles) if sfiles else 0
876
+ if scnt != rcnt:
877
+ self.PGOPT['rstat'] = -2
878
+ return self.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), self.PGOPT['emlerr'])
879
+ else:
880
+ sfiles = rfiles
881
+ scnt = rcnt
882
+ if tempinfo['AQ']:
883
+ tstr = tempinfo['AQ']
884
+ if tstr == 'Web':
885
+ rpath = "{}/{}/".format(self.PGLOG['DSDHOME'], self.params['DS'])
886
+ else:
887
+ rpath = "{}/{}/{}/".format(self.PGLOG['DECSHOME'], self.params['DS'], tempinfo['ST'])
888
+ else:
889
+ tstr = 'Remote'
890
+ rpath = ''
891
+ ks = 1 if 'KS' in self.params else 0
892
+ self.PGOPT['mcnt'] = ocnt = ecnt = scnt = dcnt = ncnt = 0
893
+ omsize = self.PGLOG['MINSIZE']
894
+ if 'VS' in tempinfo and 'VS' not in self.params: self.PGLOG['MINSIZE'] = tempinfo['VS']
895
+ for i in range(rcnt):
896
+ rfile = rfiles[i]
897
+ rname = rfile['fname']
898
+ rcmd = rfile['rcmd']
899
+ rinfo = self.check_local_file(rpath + rname, 65, self.PGOPT['emerol']) # 65 = 1 + 64
900
+ gotnew = 0
901
+ if not act:
902
+ if rinfo:
903
+ dfiles.append(rname)
904
+ dcnt += 1
905
+ else:
906
+ ecnt += 1
907
+ if rfile['amiss']:
908
+ self.pglog(rname + ": SKIP for NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr'])
909
+ self.PGOPT['mcnt'] += 1
910
+ elif 'IE' in self.params:
911
+ self.pglog(rname + ": NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr'])
912
+ self.PGOPT['rstat'] = -1
913
+ else:
914
+ self.pglog(rname + ": ERROR for NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr'])
915
+ self.PGOPT['rstat'] = -2
916
+ break
917
+ continue
918
+ elif rinfo and 'RD' not in self.params:
919
+ if not rcmd:
920
+ dfiles.append(rname)
921
+ dcnt += 1
922
+ if tempinfo['archived']:
923
+ if 'CN' not in self.params:
924
+ ocnt += 1
925
+ elif self.cmptime(rinfo['date_modified'], rinfo['time_modified'], tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) < 1:
926
+ ocnt += 1
927
+ self.pglog("{}: ARCHIVED, NO newer remote file {} found".format(lfile, rname), self.PGOPT['emllog'])
928
+ continue
929
+ elif 'CN' in self.params:
930
+ if rfile['ready'] == -1: # out of check new period already
931
+ dfiles.append(rname)
932
+ dcnt += 1
933
+ if tempinfo['archived']: ocnt += 1
934
+ continue
935
+ elif self.cmptime(rinfo['date_modified'], rinfo['time_modified'], rfile['date'], rfile['time']) >= 0:
936
+ dfiles.append(rname)
937
+ dcnt += 1
938
+ if tempinfo['archived']:
939
+ ocnt += 1
940
+ else:
941
+ self.pglog(rname + ": IS local already", self.PGOPT['emllog'])
942
+ continue
943
+ sfile = sfiles[i]
944
+ sname = sfile['fname']
945
+ sinfo = rinfo if sname == rname else self.check_local_file(sname, 65, self.PGOPT['emerol'])
946
+ dact = self.get_download_action(rcmd)
947
+ rdcnt = 1 if re.search(r'(ncftpget|wget) ', dact) else 0
948
+ dcmd = derr = ""
949
+ info0 = cfile = pcmd = bname = None
950
+ ftype = "remote" if sname == rname else "server"
951
+ if sinfo:
952
+ if rcmd:
953
+ if 'RD' in self.params:
954
+ self.pglog(sname + ": ftype file is local, Try dact again", self.PGOPT['emllog'])
955
+ elif ('CN' not in self.params and
956
+ self.cmptime(sinfo['date_modified'], sinfo['time_modified'], sfile['date'], sfile['time']) >= 0):
957
+ rcmd = None # do not need download again
958
+ else:
959
+ self.pglog("{}: USE the local copy of {} file for NO download command".format(sname, ftype), self.PGOPT['emllog'])
960
+ elif not rcmd:
961
+ if tempinfo['archived']:
962
+ ocnt += 1
963
+ self.pglog("{}: ARCHIVED, NO need get {} file {} again for NO download command".format(lfile, ftype, sname), emlsum)
964
+ else:
965
+ ecnt += 1
966
+ if rfile['amiss']:
967
+ self.pglog(rname + ": SKIP missing remote file for NO download command", self.PGOPT['emlerr'])
968
+ self.PGOPT['mcnt'] += 1
969
+ elif 'IE' in self.params:
970
+ self.pglog(rname + ": MISS remote file for NO download command", self.PGOPT['emlerr'])
971
+ self.PGOPT['rstat'] = -1
972
+ else:
973
+ self.pglog(rname + ": ERROR missing remote file for NO download command", self.PGOPT['emlerr'])
974
+ self.PGOPT['rstat'] = -2
975
+ break
976
+ continue
977
+ if rcmd: # try to download now
978
+ if not sfile['ready']:
979
+ self.PGOPT['rstat'] = 0
980
+ self.pglog("{}: {} file NOT Ready yet".format(sname, ftype), self.PGOPT['emllog'])
981
+ ecnt += 1
982
+ break
983
+ if 'CN' in self.params:
984
+ if sinfo:
985
+ cfile = sname
986
+ elif rinfo:
987
+ cfile = rname
988
+ info0 = rinfo
989
+ elif rcnt == 1 and linfo:
990
+ cfile = lfile
991
+ info0 = linfo
992
+ elif tempinfo['archived']:
993
+ cfile = ''
994
+ dcmd = self.executable_command(rcmd, sname, self.params['DS'], sfile['date'], sfile['hour'])
995
+ if tempinfo['AT']:
996
+ stat = self.check_agetime(dcmd, sname, tempinfo['AT'])
997
+ if stat <= 0:
998
+ self.PGOPT['rstat'] = stat
999
+ ecnt += 1
1000
+ break
1001
+ if cfile != None:
1002
+ stat = self.check_newer_file(dcmd, cfile, tempinfo['ainfo'])
1003
+ if stat > 0:
1004
+ if cfile != sname:
1005
+ if stat < 3: self.pglog("{}: Found newer {} file {}".format(cfile, ftype, sname), emlsum)
1006
+ else:
1007
+ if stat < 3: self.pglog("{}: Found newer {} file".format(cfile, ftype), emlsum)
1008
+ if stat == 2: # file redlownloaded, reget file info
1009
+ sinfo = self.check_local_file(sname, 64, self.PGOPT['emerol'])
1010
+ else: # force download file
1011
+ cfile = None
1012
+ else:
1013
+ if stat < 0:
1014
+ if self.PGOPT['STATUS']:
1015
+ if cfile != sname:
1016
+ self.pglog("{}: Error check newer {} file {}\n{}".format(cfile, ftype, sname, self.PGOPT['STATUS']), self.PGOPT['emlerr'])
1017
+ else:
1018
+ self.pglog("{}: Error check newer {} file\n{}".format(cfile, ftype, self.PGOPT['STATUS']), self.PGOPT['emlerr'])
1019
+ else:
1020
+ if cfile != sname:
1021
+ self.pglog("{}: Cannot check newer {} file {} via {}".format(cfile, ftype, sname, dcmd), self.PGOPT['emlsum'])
1022
+ else:
1023
+ self.pglog("{}: Cannot check newer {} file via {}".format(cfile, ftype, dcmd), self.PGOPT['emlsum'])
1024
+ if stat < -1: # uncrecoverable error
1025
+ self.PGOPT['rstat'] = stat
1026
+ ecnt += 1
1027
+ break
1028
+ elif cfile and cfile != sname:
1029
+ self.pglog("{}: NO newer {} file {} found\n{}".format(cfile, ftype, sname, self.PGOPT['STATUS']), emlsum)
1030
+ else:
1031
+ self.pglog("{}: NO newer {} file found\n{}".format(sname, ftype, self.PGOPT['STATUS']), emlsum)
1032
+ if tempinfo['archived']:
1033
+ ncnt += 1
1034
+ if rcnt == 1: continue
1035
+ if not info0: info0 = sinfo
1036
+ sinfo = None
1037
+ if not cfile:
1038
+ if op.isfile(sname) and self.pgsystem("mv -f {} {}.rd".format(sname, sname), self.PGOPT['emerol'], 4):
1039
+ bname = sname + ".rd"
1040
+ if not info0: info0 = self.check_local_file(bname, 64, self.PGOPT['emerol'])
1041
+ if dcmd.find('wget ') > -1: self.slow_web_access(dcmd)
1042
+ self.pgsystem(dcmd, self.PGOPT['wrnlog'], 257) # 1 + 256
1043
+ derr = self.PGLOG['SYSERR']
1044
+ sinfo = self.check_local_file(sname, 70, self.PGOPT['emerol'])
1045
+ if sinfo:
1046
+ mode = 0o664 if sinfo['isfile'] else 0o775
1047
+ if mode != sinfo['mode']: self.set_local_mode(sname, sinfo['isfile'], mode, sinfo['mode'], sinfo['logname'], self.PGOPT['emerol'])
1048
+ (stat, derr) = self.parse_download_error(derr, dact, sinfo)
1049
+ if stat < -1: # uncrecoverable error
1050
+ self.pglog("{}: error {}\n{}".format(sname, dcmd, derr), self.PGOPT['emlerr'])
1051
+ self.PGOPT['rstat'] = stat
1052
+ ecnt += 1
1053
+ break
1054
+ elif stat > 0 and self.PGLOG['DSCHECK'] and sinfo:
1055
+ self.add_dscheck_dcount(0, sinfo['data_size'], self.PGOPT['extlog'])
1056
+ if sinfo:
1057
+ if info0:
1058
+ if info0['data_size'] == sinfo['data_size'] and bname:
1059
+ if self.compare_md5sum(bname, sname, self.PGOPT['emlsum']):
1060
+ self.pglog("{}: GOT same size, but different content, {} file via {}".format(sname, ftype, dact), self.PGOPT['emlsum'])
1061
+ tempinfo['gotnew'] = gotnew = 1
1062
+ self.PGOPT['rdcnt'] += rdcnt
1063
+ scnt += 1
1064
+ else:
1065
+ self.pglog("{}: GOT same {} file via {}".format(sname, ftype, dact), emlsum)
1066
+ if rinfo and rname != sname and 'KS' not in self.params:
1067
+ self.pgsystem("rm -f " + sname, self.PGOPT['emllog'], 5)
1068
+ sinfo = None
1069
+ if tempinfo['archived']:
1070
+ ncnt += 1
1071
+ else:
1072
+ self.pglog("{}: GOT different {} file via {}".format(sname, ftype, dact), self.PGOPT['emlsum'])
1073
+ tempinfo['gotnew'] = gotnew = 1
1074
+ self.PGOPT['rdcnt'] += rdcnt
1075
+ scnt += 1
1076
+ if bname: self.pgsystem("rm -rf " + bname, self.PGOPT['emerol'], 4)
1077
+ elif rcmd:
1078
+ self.pglog("{}: GOT {} file via {}".format(sname, ftype, dact), emlsum)
1079
+ self.PGOPT['rdcnt'] += rdcnt
1080
+ scnt += 1
1081
+ self.PGOPT['dcnt'] += 1
1082
+ if tempinfo['prcmd']: pcmd = tempinfo['prcmd']
1083
+ elif info0:
1084
+ if bname:
1085
+ self.pglog("{}: RETAIN the older {} file".format(sname, ftype), emlsum)
1086
+ self.pgsystem("mv -f {} {}".format(bname, sname), self.PGOPT['emerol'], 4)
1087
+ if tempinfo['prcmd']: pcmd = tempinfo['prcmd']
1088
+ sinfo = info0
1089
+ elif cfile:
1090
+ if tempinfo['archived']:
1091
+ ocnt += 1
1092
+ elif rcnt == 1:
1093
+ if tempinfo['prcmd']: pcmd = tempinfo['prcmd']
1094
+ if cfile == sname:
1095
+ sinfo = info0
1096
+ elif not rinfo and cfile == lfile:
1097
+ continue
1098
+ elif not cfile:
1099
+ ecnt += 1
1100
+ if sfile['amiss']:
1101
+ self.pglog("{}: SKIP {} file for FAIL {}\n{}".format(sname, ftype, dact, derr), self.PGOPT['emlsum'])
1102
+ self.PGOPT['mcnt'] += 1
1103
+ else:
1104
+ self.PGOPT['rstat'] = 0 if 'IE' in self.params else -1
1105
+ if not derr or derr and derr.find(self.PGLOG['MISSFILE']) > -1:
1106
+ msg = "{}: NOT Available for {}\n".format(sname, dact)
1107
+ self.set_email(msg, self.PGOPT['emlsum'])
1108
+ if derr: self.pglog(derr, self.PGOPT['emllog'])
1109
+ else:
1110
+ self.pglog("{}: ERROR {}\n{}".format(sname, dact, derr), self.PGOPT['emlerr'])
1111
+ if self.PGOPT['rstat'] < 0: break
1112
+ continue
1113
+ else:
1114
+ ecnt += 1
1115
+ if sfile['amiss']: self.PGOPT['mcnt'] += 1
1116
+ continue
1117
+ if sinfo:
1118
+ if rname == sname:
1119
+ rinfo = sinfo
1120
+ elif not rinfo or gotnew:
1121
+ if rinfo: self.pgsystem("rm -f " + rname, self.PGOPT['emerol'], 5)
1122
+ if self.convert_files(rname, sname, ks, self.PGOPT['emerol']):
1123
+ rinfo = self.check_local_file(rname, 64, self.PGOPT['emerol'])
1124
+ else:
1125
+ self.PGOPT['rstat'] = -1
1126
+ ecnt += 1
1127
+ break
1128
+ if not rinfo:
1129
+ ecnt += 1
1130
+ if sfile['amiss']:
1131
+ self.pglog(rname + ": SKIP missing remote file", self.PGOPT['emlsum'])
1132
+ self.PGOPT['mcnt'] += 1
1133
+ elif 'IE' in self.params:
1134
+ self.pglog(rname + ": MISS remote file", self.PGOPT['emlerr'])
1135
+ self.PGOPT['rstat'] = -1
1136
+ else:
1137
+ self.pglog(rname + ": ERROR missing remote file", self.PGOPT['emlerr'])
1138
+ self.PGOPT['rstat'] = -2
1139
+ break
1140
+ continue
1141
+ if pcmd:
1142
+ pcmd = self.executable_command(self.replace_pattern(pcmd, rfile['date'], rfile['hour'], tempinfo['FQ']),
1143
+ rname, self.params['DS'], rfile['date'], rfile['hour'])
1144
+ if not self.pgsystem(pcmd, self.PGOPT['emllog'], 259):
1145
+ if self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.PGOPT['emlerr'])
1146
+ self.PGOPT['rstat'] = -1
1147
+ ecnt += 1
1148
+ break
1149
+ dfiles.append(rname)
1150
+ dcnt += 1
1151
+ self.PGLOG['MINSIZE'] = omsize
1152
+ if ncnt == rcnt:
1153
+ self.PGOPT['rstat'] = 0
1154
+ if dcnt > 0: dcnt = 0
1155
+ elif ecnt > 0:
1156
+ s = 's' if rcnt > 1 else ''
1157
+ if dcnt > scnt:
1158
+ self.pglog("{}/{} of {} rfile{} obtained/at local".format(scnt, dcnt, rcnt, s), self.PGOPT['emllog'])
1159
+ else:
1160
+ self.pglog("{} of {} rfile{} obtained".format(scnt, rcnt, s), self.PGOPT['emllog'])
1161
+ if dcnt > 0 and ocnt > 0: dcnt = 0
1162
+ elif ocnt == rcnt:
1163
+ self.PGOPT['rstat'] = 0
1164
+ return dfiles if self.PGOPT['rstat'] == 1 and dcnt > 0 else None
1165
+
1166
+ # build up local files
1167
+ def build_local_file(self, rfiles, lfile, linfo, locrec, tempinfo, lcnt, l):
1168
+ emlsum = self.PGOPT['emlsum'] if (self.PGOPT['ACTS'] == self.OPTS['BL'][0]) else self.PGOPT['emllog']
1169
+ if lcnt > 1:
1170
+ rcnt = 1
1171
+ rmax = l + 1
1172
+ else:
1173
+ rmax = rcnt = len(rfiles) if rfiles else 0
1174
+ rbfile = None
1175
+ if linfo:
1176
+ if rcnt == 1 and lfile == rfiles[l]: return 1
1177
+ if self.pgsystem("mv -f {} {}".format(lfile, rbfile), self.PGOPT['emerol'], 4):
1178
+ rbfile = lfile + '.rb'
1179
+ else:
1180
+ s = op.dirname(lfile)
1181
+ if s and not op.isdir(s): self.make_local_directory(s, self.PGOPT['emllog']|self.EXITLG)
1182
+ cext = None
1183
+ if locrec['options']:
1184
+ ms = re.search(r'-AF\s+([\w\.]+)', locrec['options'], re.I)
1185
+ if ms:
1186
+ fmt = ms.group(1)
1187
+ ms = re.search(r'(\w+)\.TAR(\.|$)', fmt, re.I)
1188
+ if ms: # check compression before tarring
1189
+ fmt = ms.group(1)
1190
+ ms = re.match(r'^({})$'.format(self.CMPSTR), fmt, re.I)
1191
+ if ms: cext = '.' + fmt
1192
+ if tempinfo['blcmd']:
1193
+ blcmd = self.executable_command(self.replace_pattern(tempinfo['blcmd'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']),
1194
+ lfile, self.params['DS'], tempinfo['edate'], tempinfo['ehour'])
1195
+ if not self.pgsystem(blcmd, self.PGOPT['emllog']) or self.local_file_size(lfile, 2, self.PGOPT['emerol']) <= 0:
1196
+ ret = self.pglog("{}: error build {}".format(blcmd, lfile), self.PGOPT['emlerr'])
1197
+ else:
1198
+ self.PGOPT['bcnt'] += 1
1199
+ ret = 1
1200
+ if rbfile:
1201
+ if ret:
1202
+ self.pgsystem("rm -rf " + rbfile, self.PGOPT['emerol'], 4)
1203
+ else:
1204
+ self.pglog(lfile + ": RETAIN the older local file", emlsum)
1205
+ self.pgsystem("mv -f {} {}".format(rbfile, lfile), self.PGOPT['emerol'], 4)
1206
+ return ret
1207
+ if lfile[0] == '!': # executable for build up local file name
1208
+ blcmd = self.executable_command(lfile[1:], None, self.params['DS'], tempinfo['edate'], tempinfo['ehour'])
1209
+ lfile = self.pgsystem(blcmd, self.PGOPT['emllog'], 21)
1210
+ if lfile and self.local_file_size(lfile, 2, self.PGOPT['emerol']) > 0:
1211
+ tempinfo['lfile'] = lfile
1212
+ return 1
1213
+ else:
1214
+ return self.pglog("{}: error build {}".format(blcmd, lfile), self.PGOPT['emlerr'])
1215
+ if rcnt == 0 and not linfo: return 0 # no remote file found to build local file
1216
+ ret = 1
1217
+ kr = 1 if 'KR' in self.params else 0
1218
+ if rcnt == 1 and not op.isdir(rfiles[l]):
1219
+ rfile = rfiles[l]
1220
+ else:
1221
+ ms = re.match(r'^(.+)\.({})$'.format(self.CMPSTR), lfile, re.I)
1222
+ rfile = ms.group(1) if ms else lfile
1223
+ fd = None
1224
+ if tempinfo['AQ']:
1225
+ if not self.validate_one_infile(rfile, self.params['DS']): return 0
1226
+ fd = open(rfile, 'w')
1227
+ fd.write(tempinfo['AQ'] + "File\n")
1228
+ for i in range(rmax):
1229
+ tfile = rfiles[i]
1230
+ if fd:
1231
+ fd.write(tfile + "\n")
1232
+ continue
1233
+ if op.isfile(tfile) and cext and not re.search(r'{}$'.format(cext), tfile, re.I):
1234
+ ms = re.match(r'^(.+)\.({})$'.format(self.CMPSTR), tfile, re.I)
1235
+ if ms: tfile = ms.group(1)
1236
+ tfile += cext
1237
+ if not self.convert_files(tfile, rfiles[i], kr, self.PGOPT['emllog']):
1238
+ if op.exists(rfile): self.pgsystem("rm -f " + rfile, self.PGOPT['emllog'])
1239
+ ret = self.pglog("{}: QUIT converting file from {}".format(rfile, tfile), self.PGOPT['emllog'])
1240
+ break
1241
+ cmd = "tar -{}vf {} {}".format('u' if i else 'c', rfile, tfile)
1242
+ ret = self.pgsystem(cmd, self.PGOPT['emllog'])
1243
+ if not ret: break
1244
+ if fd:
1245
+ ret = -1
1246
+ fd.close()
1247
+ if op.exists(rfile):
1248
+ s = "s" if rcnt > 1 else ""
1249
+ if tempinfo['AQ']:
1250
+ self.pglog("{}: input file CREATED for backing up {} {} file{}".format(rfile, rcnt, tempinfo['AQ'], s), emlsum)
1251
+ else:
1252
+ self.pglog("{}: tar file CREATED from {} file{}".format(rfile, rcnt, s), emlsum)
1253
+ else:
1254
+ ret = self.pglog(rfile + ": ERROR creating tar file", self.PGOPT['emlerr'])
1255
+ if ret > 0:
1256
+ if lfile != rfile:
1257
+ ret = self.convert_files(lfile, rfile, kr, self.PGOPT['emllog'])
1258
+ if ret: self.pglog("{}: BUILT from {}".format(lfile, rfile), emlsum)
1259
+ if ret:
1260
+ fsize = self.local_file_size(lfile, 3, self.PGOPT['emerol'])
1261
+ if fsize > 0:
1262
+ self.PGOPT['bcnt'] += 1
1263
+ if self.PGLOG['DSCHECK']: self.add_dscheck_dcount(0, fsize, self.PGOPT['extlog'])
1264
+ else:
1265
+ ret = 0
1266
+ if rbfile:
1267
+ if ret:
1268
+ self.pgsystem("rm -rf " + rbfile, self.PGOPT['emerol'], 4)
1269
+ else:
1270
+ self.pglog(lfile + ": RETAIN the older local file", emlsum)
1271
+ self.pgsystem("mv -f {} {}".format(rbfile, lfile), self.PGOPT['emerol'], 4)
1272
+ return 1 if ret else 0
1273
+
1274
+ # append data type to options for given type name if not in options
1275
+ def append_data_type(self, tname, options):
1276
+ mp = r'(^|\s)-{}(\s|$)'.format(tname)
1277
+ if not re.search(mp, options, re.I): options += " -{} {}".format(tname, self.DEFTYPES[tname])
1278
+ return options
1279
+
1280
+ # get data type from options for given type name, and default one if not in options
1281
+ def get_data_type(self, tname, options):
1282
+ mp = r'(^|\s)-{}\s+(\w)(\s|$)'.format(tname)
1283
+ ms = re.search(mp, options, re.I)
1284
+ return ms.group(2) if ms else self.DEFTYPES[tname]
1285
+
1286
+ # archive a data file
1287
+ def archive_data_file(self, lfile, locrec, tempinfo, eidx):
1288
+ growing = -1
1289
+ if tempinfo['ainfo']:
1290
+ ainfo = tempinfo['ainfo']
1291
+ if ainfo['vindex']: growing = self.is_growing_file(locrec['locfile'], tempinfo['FQ'])
1292
+ tempinfo['ainfo'] = None # clean the archive info recorded earlier
1293
+ else:
1294
+ ainfo = {'archived': 0, 'note': None} # reference to empty hash
1295
+ self.pglog("{}: start {} for {}".format(lfile, locrec['action'], tempinfo['einfo']), self.PGOPT['emllog'])
1296
+ options = locrec['options'] if locrec['options'] else ""
1297
+ act = locrec['action']
1298
+ archfile = None
1299
+ if locrec['archfile']: archfile = self.replace_pattern(locrec['archfile'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'])
1300
+ if act == 'AW':
1301
+ if archfile and 'wfile' not in ainfo: ainfo['wfile'] = archfile
1302
+ options = self.append_data_type('WT', options)
1303
+ elif act == 'AS':
1304
+ if archfile and 'sfile' not in ainfo: ainfo['sfile'] = archfile
1305
+ options = self.append_data_type('ST', options)
1306
+ elif act == 'AQ':
1307
+ if archfile and 'bfile' not in ainfo: ainfo['bfile'] = archfile
1308
+ options = self.append_data_type('QT', options)
1309
+ if tempinfo['archived'] and not ('RA' in self.params and growing > 0):
1310
+ if (ainfo['chksm'] and ainfo['chksm'] == self.PGOPT['chksm'] or
1311
+ ainfo['asize'] and ainfo['asize'] == self.PGOPT['fsize'] and
1312
+ self.cmptime(self.PGOPT['fdate'], self.PGOPT['ftime'], ainfo['adate'], ainfo['atime']) >= 0):
1313
+ if 'RA' not in self.params:
1314
+ amsg = "{}: ARCHIVED by {}".format(lfile, ainfo['adate'])
1315
+ if tempinfo['ehour'] != None: amsg += ":{:02}".format(ainfo['ahour'])
1316
+ self.pglog(amsg, self.PGOPT['emllog'])
1317
+ if eidx == 0: self.pglog("Add Mode option -RA if you want to re-archive", self.PGOPT['emllog'])
1318
+ return -1
1319
+ elif growing == 0:
1320
+ growing = -1
1321
+ if growing == 0: tempinfo['archived'] = self.move_archived_file(ainfo, tempinfo['archived'])
1322
+ if tempinfo['AQ']:
1323
+ ifopt = 'IF'
1324
+ else:
1325
+ ifopt = 'LF'
1326
+ acmd = "dsarch {} {} -{} {}".format(self.params['DS'], act, ifopt, lfile)
1327
+ gcmd = None
1328
+ if 'wfile' in ainfo: acmd += " -WF " + ainfo['wfile']
1329
+ if 'sfile' in ainfo: acmd += " -SF " + ainfo['sfile']
1330
+ if 'bfile' in ainfo: acmd += " -QF " + ainfo['bfile']
1331
+ if self.PGOPT['chksm']: acmd += " -MC " + self.PGOPT['chksm']
1332
+ if growing > 0 and not re.search(r'(^|\s)-GF(\s|$)', options, re.I): acmd += " -GF"
1333
+ if 'MD' in self.params and not re.search(r'(^|\s)-MD(\s|$)', options, re.I): acmd += " -MD"
1334
+ if not re.search(r'(^|\s)-NE(\s|$)', options, re.I): acmd += " -NE" # no email in dsarch
1335
+ if tempinfo['gotnew'] and not re.search(r'(^|\s)-OE(\s|$)', options, re.I): acmd += " -OE"
1336
+ if 'VS' in self.params:
1337
+ acmd += " -VS {}".format(self.params['VS'])
1338
+ if 'VS' in tempinfo: options = re.sub(r'-VS\s+\d+\s*', '', options, flags=re.I)
1339
+ if re.search(r'(^|\s)-GX(\s|$)', options, re.I):
1340
+ wfile = ainfo['wfile'] if 'wfile' in ainfo else ainfo['afile']
1341
+ ms = re.search(r'(^|\s)-DF (\w+)(\s|$)', options, re.I)
1342
+ fmt = ms.ms.group(2).lower() if ms else None
1343
+ if wfile and fmt:
1344
+ if fmt == "netcdf": fmt = "cf" + fmt
1345
+ rs = " -R -S" if tempinfo['RS'] == 1 else ''
1346
+ gcmd = "gatherxml -d {} -f {}{} {}".format(self.params['DS'], fmt, rs, wfile)
1347
+ options = re.sub(r'-GX\s*', '', options, flags=re.I)
1348
+ fnote = None
1349
+ if locrec['note'] and not re.search(r'(^|\s)-DE(\s|$)', options, re.I):
1350
+ note = self.build_data_note(ainfo['note'], lfile, locrec, tempinfo)
1351
+ if note:
1352
+ if re.search(r'(\n|\"|\')', note): # if found \n or ' or ", create temporary input file
1353
+ fnote = self.params['DS'] + ".note"
1354
+ nd = open(fnote, 'w')
1355
+ nd.write("DE<:>\n{}<:>\n".format(note))
1356
+ nd.close()
1357
+ acmd += " -IF " + fnote
1358
+ else:
1359
+ acmd += " -DE '{}'".format(note)
1360
+ if options:
1361
+ if locrec['cleancmd']: options = re.sub(r'(^-NW\s+|\s+-NW$)', '', options, 1, re.I)
1362
+ acmd += " " + self.replace_pattern(options, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'])
1363
+ ret = self.pgsystem(acmd, self.PGOPT['emerol'], 69) # 1 + 4 + 64
1364
+ if gcmd: self.pgsystem(gcmd, self.PGOPT['emerol'], 5)
1365
+ if fnote: self.pgsystem("rm -f " + fnote, self.PGOPT['emerol'], 4)
1366
+ tempinfo['ainfo'] = self.file_archive_info(lfile, locrec, tempinfo)
1367
+ note = self.count_update_files(ainfo, tempinfo['ainfo'], ret, tempinfo['RS'])
1368
+ self.pglog("{}: UPDATED({}) for {}".format(lfile, locrec['action'], tempinfo['einfo']), self.PGOPT['emlsum'])
1369
+ return ret
1370
+
1371
+ # count files updated
1372
+ def count_update_files(self, oinfo, ninfo, success, rsopt):
1373
+ nrecs = ninfo['types'] if ninfo else {}
1374
+ orecs = oinfo['types'] if oinfo else {}
1375
+ astrs = []
1376
+ astr = ""
1377
+ for type in nrecs:
1378
+ nrec = nrecs[type]
1379
+ orec = orecs[type] if type in orecs else None
1380
+ if 'sfile' in nrec:
1381
+ atype = "Saved {} File".format(self.STYPE[type])
1382
+ elif 'bfile' in nrec:
1383
+ atype = "Quasar backup {} File".format(self.BTYPE[type])
1384
+ else:
1385
+ atype = "RDA {} File".format(self.WTYPE[type])
1386
+ if rsopt == 1:
1387
+ tidx = nrec['tindex'] if nrec['tindex'] else 0
1388
+ self.PGOPT['wtidx'][tidx] = 1
1389
+ if (not orec or
1390
+ nrec['data_size'] != orec['data_size'] or
1391
+ self.cmptime(orec['date_modified'], orec['time_modified'], nrec['date_modified'], nrec['time_modified']) or
1392
+ not (nrec['checksum'] and orec['checksum'] and nrec['checksum'] == orec['checksum'])):
1393
+ if 'sfile' in nrec:
1394
+ self.PGOPT['uscnt'] += 1
1395
+ elif 'bfile' in nrec:
1396
+ if type == 'D': self.PGOPT['qdcnt'] += 1
1397
+ self.PGOPT['qbcnt'] += 1
1398
+ elif type == 'D':
1399
+ self.PGOPT['udcnt'] += 1
1400
+ elif type == 'N':
1401
+ self.PGOPT['uncnt'] += 1
1402
+ else:
1403
+ self.PGOPT['uwcnt'] += 1
1404
+ astrs.append("{} {}rchived".format(atype, "Re-a" if orec else "A"))
1405
+ if self.PGLOG['DSCHECK']:
1406
+ self.add_dscheck_dcount(0, nrec['data_size'], self.PGOPT['extlog'])
1407
+ if astrs:
1408
+ self.PGOPT['ucnt'] += 1
1409
+ if len(astrs) < ninfo['archcnt']:
1410
+ if success:
1411
+ astr = " Successful, but only "
1412
+ else:
1413
+ astr = " Partially finished, "
1414
+ astr += ', '.join(astrs)
1415
+ else:
1416
+ if success:
1417
+ astr = " Successful, but NO file Re-archived"
1418
+ else:
1419
+ astr = " Failed, NO file {}rchived".format('Re-a' if oinfo['present'] == ninfo['archcnt'] else "A")
1420
+ if astr:
1421
+ s = "s" if ninfo['archcnt'] > 1 else ""
1422
+ astr += " of {} archfile{}".format(ninfo['archcnt'], s)
1423
+ return astr
1424
+
1425
+ # get the temporal info in local and remote file names and the possible values# between the break update and the current date
1426
+ # BTW, change to working directory
1427
+ def get_tempinfo(self, locrec, locinfo, eidx = 0):
1428
+ # get data end date for update action
1429
+ edate = self.params['ED'][eidx] if ('ED' in self.params and self.params['ED'][eidx]) else locrec['enddate']
1430
+ if not edate: return self.pglog(locinfo + ": MISS End Data Date for local update", self.PGOPT['emlerr'])
1431
+ ehour = self.params['EH'][eidx] if ('EH' in self.params and self.params['EH'][eidx] != None) else locrec['endhour']
1432
+ if not isinstance(edate, str): edate = str(edate)
1433
+ if ehour is None and self.pgget('drupdt', '', "lindex = {} and tinterval like '%H'".format(locrec['lindex'])):
1434
+ return self.pglog(locinfo + ": MISS End Data Hour for hourly remote update", self.PGOPT['emlerr'])
1435
+ if locrec['validint']:
1436
+ val = locrec['validint']
1437
+ elif self.PGOPT['UCNTL'] and self.PGOPT['UCNTL']['validint']:
1438
+ val = self.PGOPT['UCNTL']['validint']
1439
+ else:
1440
+ val = None
1441
+ tempinfo = {'AT': None, 'DC': None, 'ED': [], 'EH': [], 'VI': None,
1442
+ 'VD': None, 'VH': None, 'CVD': None, 'NX': None, 'FQ': None,
1443
+ 'QU': None, 'EP': 0, 'RS': -1, 'AQ': None}
1444
+ if val: val = self.get_control_time(val, "Valid Internal")
1445
+ if val:
1446
+ tempinfo['VI'] = val
1447
+ if ehour is None and val[3]: ehour = 0
1448
+ val = self.get_control_time(locrec['agetime'], "File Age Time")
1449
+ if val:
1450
+ tempinfo['AT'] = val
1451
+ if ehour is None and val[3]: ehour = 0
1452
+ frequency = self.params['FQ'][0] if 'FQ' in self.params else locrec['frequency']
1453
+ if frequency: # get data update frequency info
1454
+ (val, unit) = self.get_control_frequency(frequency)
1455
+ if val:
1456
+ tempinfo['FQ'] = val
1457
+ tempinfo['QU'] = unit # update frequency unit of meassure
1458
+ else:
1459
+ locinfo = self.replace_pattern(locinfo, edate, ehour)
1460
+ return self.pglog("{}: {}".format(locinfo, unit), self.PGOPT['emlerr'])
1461
+ if locrec['endperiod']: tempinfo['EP'] = locrec['endperiod']
1462
+ if val[3] and ehour is None: ehour = 0
1463
+ edate = self.enddate(edate, tempinfo['EP'], unit, tempinfo['FQ'][6])
1464
+ elif 'MU' in self.params or 'CP' in self.params:
1465
+ locinfo = self.replace_pattern(locinfo, edate, ehour)
1466
+ return self.pglog(locinfo + ": MISS frequency for Update", self.PGOPT['emlerr'])
1467
+ val = self.get_control_time(locrec['nextdue'], "Due Internval")
1468
+ if val:
1469
+ tempinfo['NX'] = val
1470
+ if ehour is None and val[3]: ehour = 0
1471
+ # check if allow missing remote file
1472
+ if 'MR' in self.params and self.params['MR'][0]:
1473
+ tempinfo['amiss'] = self.params['MR'][0]
1474
+ elif locrec['missremote']:
1475
+ tempinfo['amiss'] = locrec['missremote']
1476
+ else:
1477
+ tempinfo['amiss'] = 'N'
1478
+ options = locrec['options']
1479
+ if locrec['action'] == 'AQ':
1480
+ if options:
1481
+ ms = re.search(r'-(ST|WT)\s+(\w)', options)
1482
+ if ms:
1483
+ if ms.group(1) == 'ST':
1484
+ tempinfo['AQ'] = 'Saved'
1485
+ tempinfo['ST'] = ms.group(2)
1486
+ else:
1487
+ tempinfo['AQ'] = 'Web'
1488
+ else:
1489
+ return self.pglog("{}: MISS -ST or -WT to backup {}".format(options, locinfo), self.PGOPT['emlerr'])
1490
+ else:
1491
+ return self.pglog("Set -ST or -WT in Options to backup {}".format(locinfo), self.PGOPT['emlerr'])
1492
+ if (options and re.search(r'(^|\s)-GX(\s|$)', options, re.I) and
1493
+ not re.search(r'(^|\s)-RS(\s|$)', options, re.I)):
1494
+ tempinfo['RS'] = 0 # set to 1 if need pass -RS to dsarch
1495
+ ddate = edate
1496
+ dhour = ehour
1497
+ dcnt = 0
1498
+ self.PGOPT['wtidx'] = {}
1499
+ if options:
1500
+ ms = re.search(r'-VS\s+(\d+)', options, re.I)
1501
+ if ms: tempinfo['VS'] = int(ms.group(1))
1502
+ if tempinfo['VI']:
1503
+ if tempinfo['VI'][3]:
1504
+ (vdate, vhour) = self.adddatehour(self.PGOPT['CURDATE'], self.PGOPT['CURHOUR'], -tempinfo['VI'][0],
1505
+ -tempinfo['VI'][1], -tempinfo['VI'][2], -tempinfo['VI'][3])
1506
+ else:
1507
+ vdate = self.adddate(self.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -tempinfo['VI'][2])
1508
+ vhour = self.PGOPT['CURHOUR']
1509
+ if 'CN' in self.params and locrec['cleancmd']:
1510
+ tempinfo['CVD'] = self.adddate(self.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -(1+tempinfo['VI'][2]))
1511
+ tempinfo['setmiss'] = 1
1512
+ if self.diffdatehour(edate, ehour, vdate, vhour) < 0:
1513
+ vdate = edate
1514
+ vhour = ehour
1515
+ if tempinfo['amiss'] == 'N' and locrec['missdate']:
1516
+ dhour = self.diffdatehour(vdate, vhour, locrec['missdate'], locrec['misshour'])
1517
+ if dhour > 0:
1518
+ if dhour > 240:
1519
+ record = {'missdate': None, 'misshour': None}
1520
+ self.pgupdt("dlupdt", record, "lindex = {}".format(locrec['lindex']))
1521
+ else:
1522
+ vdate = locrec['missdate']
1523
+ vhour = locrec['misshour']
1524
+ if vdate and not isinstance(vdate, str): vdate = str(vdate)
1525
+ tempinfo['VD'] = vdate
1526
+ tempinfo['VH'] = vhour
1527
+ if 'ED' not in self.params and self.diffdatehour(edate, ehour, vdate, vhour) > 0:
1528
+ edate = vdate
1529
+ if tempinfo['FQ']:
1530
+ if tempinfo['EP'] or tempinfo['QU'] == 'M':
1531
+ edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
1532
+ while True:
1533
+ (udate, uhour) = self.addfrequency(edate, ehour, tempinfo['FQ'], -1)
1534
+ if self.diffdatehour(udate, uhour, vdate, vhour) < 0: break
1535
+ edate = udate
1536
+ ehour = uhour
1537
+ if tempinfo['EP'] or tempinfo['QU'] == 'M':
1538
+ edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
1539
+ vdate = self.params['CD']
1540
+ vhour = self.params['CH']
1541
+ if tempinfo['NX']:
1542
+ if tempinfo['NX'][3]:
1543
+ (udate, uhour) = self.adddatehour(self.PGOPT['CURDATE'], vhour, -tempinfo['NX'][0],
1544
+ -tempinfo['NX'][1], -tempinfo['NX'][2], -tempinfo['NX'][3])
1545
+ else:
1546
+ udate = self.adddate(self.PGOPT['CURDATE'], -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2])
1547
+ uhour = vhour
1548
+ if self.diffdatehour(udate, uhour, vdate, vhour) <= 0:
1549
+ vdate = udate
1550
+ vhour = uhour
1551
+ if 'CP' in self.params: (vdate, vhour) = self.addfrequency(vdate, vhour, tempinfo['FQ'], 1)
1552
+ fupdate = 1 if 'FU' in self.params else 0
1553
+ while fupdate or self.diffdatehour(edate, ehour, vdate, vhour) <= 0:
1554
+ tempinfo['ED'].append(edate)
1555
+ if ehour != None and tempinfo['QU'] != 'H':
1556
+ tempinfo['EH'].append(23)
1557
+ else:
1558
+ tempinfo['EH'].append(ehour)
1559
+ if 'MU' not in self.params: break
1560
+ if tempinfo['RS'] == 0 and dcnt < 3:
1561
+ if self.diffdatehour(edate, ehour, ddate, dhour) >= 0: dcnt += 1
1562
+ (edate, ehour) = self.addfrequency(edate, ehour, tempinfo['FQ'], 1)
1563
+ edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
1564
+ fupdate = 0
1565
+ if tempinfo['RS'] == 0 and dcnt > 2: tempinfo['RS'] = 1
1566
+ if not tempinfo['ED']: # no end time found, update not due yet
1567
+ if tempinfo['NX']:
1568
+ (udate, uhour) = self.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3])
1569
+ else:
1570
+ udate = edate
1571
+ uhour = ehour
1572
+ locinfo = self.replace_pattern(locinfo, edate, ehour, tempinfo['FQ'])
1573
+ vdate = self.params['CD']
1574
+ val = "Update data"
1575
+ if tempinfo['NX']: val += " due"
1576
+ if uhour is None:
1577
+ locinfo += ": {} on {}".format(val, udate)
1578
+ else:
1579
+ locinfo += ": {} at {}:{:02}".format(val, udate, uhour)
1580
+ vdate += ":{:02}".format(self.params['CH'])
1581
+ return self.pglog("{} NOT due yet by {}".format(locinfo, vdate), self.PGOPT['emllog'])
1582
+ return tempinfo
1583
+
1584
+ # get archived file info
1585
+ def file_archive_info(self, lfile, locrec, tempinfo):
1586
+ if tempinfo['ainfo'] != None: return tempinfo['ainfo']
1587
+ edate = tempinfo['edate']
1588
+ ehour = tempinfo['ehour']
1589
+ ainfo = {'archcnt': 0, 'archived': 0, 'present': 0, 'vindex': 0, 'types': {}, 'note': None, 'afile' : None}
1590
+ growing = self.is_growing_file(locrec['locfile'], tempinfo['FQ'])
1591
+ if growing:
1592
+ if tempinfo['NX']:
1593
+ (udate, uhour) = self.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3])
1594
+ else:
1595
+ udate = edate
1596
+ uhour = ehour
1597
+ if self.PGLOG['GMTZ'] and uhour != None: # convert to local times
1598
+ (udate, uhour) = self.adddatehour(udate, uhour, 0, 0, 0, -self.PGLOG['GMTZ'])
1599
+ options = locrec['options'] if locrec['options'] else ""
1600
+ act = locrec['action']
1601
+ locrec['gindex'] = self.get_group_index(options, edate, ehour, tempinfo['FQ'])
1602
+ dsid = self.params['DS']
1603
+ gcnd = "gindex = {}".format(locrec['gindex'])
1604
+ cnd = "dsid = '{}' AND {}".format(dsid, gcnd)
1605
+ mmiss = 0
1606
+ if re.match(r'^A(B|W)$', act): # check existing web files
1607
+ ainfo['archcnt'] = 1
1608
+ ms = re.search(r'(^|\s)-WT\s+(\w)(\s|$)', options, re.I)
1609
+ type = self.get_data_type('WT', options)
1610
+ if locrec['archfile']:
1611
+ afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ'])
1612
+ else:
1613
+ afile = lfile if re.search(r'(^|\s)-KP(\s|$)', lfile, re.I) else op.basename(lfile)
1614
+ ms =re.search(r'(^|\s)-WP\s+(\S+)', options, re.I)
1615
+ if ms:
1616
+ path = self.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ'])
1617
+ else:
1618
+ path = self.get_group_field_path(locrec['gindex'], dsid, 'webpath')
1619
+ if path: afile = self.join_paths(path, afile)
1620
+ ainfo['afile'] = afile
1621
+ wrec = self.pgget_wfile(dsid, "*", "{} AND type = '{}' AND wfile = '{}'".format(gcnd, type, afile), self.PGOPT['extlog'])
1622
+ if wrec:
1623
+ ainfo['wfile'] = wrec['wfile']
1624
+ adate = ainfo['adate'] = str(wrec['date_modified'])
1625
+ atime = ainfo['atime'] = str(wrec['time_modified'])
1626
+ ahour = None
1627
+ if atime:
1628
+ ms = re.match(r'^(\d+):', atime)
1629
+ if ms: ahour = int(ms.group(1))
1630
+ ainfo['ahour'] = ahour
1631
+ ainfo['asize'] = wrec['data_size']
1632
+ ainfo['chksm'] = wrec['checksum'] if wrec['checksum'] else ''
1633
+ ainfo['note'] = wrec['note']
1634
+ ainfo['types'][type] = wrec
1635
+ ainfo['wtype'] = type
1636
+ if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1
1637
+ if wrec['vindex']: ainfo['vindex'] = wrec['vindex']
1638
+ ainfo['present'] += 1
1639
+ if act == 'AS': # check existing save files
1640
+ ainfo['archcnt'] = 1
1641
+ type = self.get_data_type('ST', options)
1642
+ if locrec['archfile']:
1643
+ afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ'])
1644
+ else:
1645
+ afile = lfile if re.search(r'(^|\s)-KP(\s|$)', options, re.I) else op.basename(lfile)
1646
+ ms = re.search(r'(^|\s)-SP\s+(\S+)', options, re.I)
1647
+ if ms:
1648
+ path = self.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ'])
1649
+ else:
1650
+ path = self.get_group_field_path(locrec['gindex'], self.params['DS'], 'savedpath')
1651
+ if path: afile = self.join_paths(path, afile)
1652
+ srec = self.pgget("sfile", "*", "{} AND type = '{}' AND sfile = '{}'".format(cnd, type, afile), self.PGOPT['extlog'])
1653
+ if srec:
1654
+ ainfo['sfile'] = srec['sfile']
1655
+ adate = ainfo['adate'] = str(srec['date_modified'])
1656
+ atime = ainfo['atime'] = str(srec['time_modified'])
1657
+ ahour = None
1658
+ if atime:
1659
+ ms = re.match(r'^(\d+):', atime)
1660
+ if ms: ahour = int(ms.group(1))
1661
+ ainfo['asize'] = srec['data_size']
1662
+ ainfo['chksm'] = srec['checksum'] if srec['checksum'] else ''
1663
+ ainfo['note'] = srec['note']
1664
+ ainfo['types'][type] = srec
1665
+ ainfo['stype'] = type
1666
+ if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1
1667
+ if srec['vindex']: ainfo['vindex'] = srec['vindex']
1668
+ ainfo['present'] += 1
1669
+ if act == 'AQ': # check existing quasar backup files
1670
+ ainfo['archcnt'] = 1
1671
+ type = self.get_data_type('QT', options)
1672
+ if locrec['archfile']:
1673
+ afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ'])
1674
+ else:
1675
+ return self.pglog(lfile + ": Miss Backup file name via (FA|FileArchived)", self.PGOPT['emlerr'])
1676
+ brec = self.pgget("bfile", "*", "dsid = '{}' AND type = '{}' AND bfile = '{}'".format(self.params['DS'], type, afile), self.PGOPT['extlog'])
1677
+ if brec:
1678
+ ainfo['bfile'] = brec['bfile']
1679
+ adate = ainfo['adate'] = str(brec['date_modified'])
1680
+ atime = ainfo['atime'] = str(brec['time_modified'])
1681
+ ahour = None
1682
+ if atime:
1683
+ ms = re.match(r'^(\d+):', atime)
1684
+ if ms: ahour = int(ms.group(1))
1685
+ ainfo['asize'] = brec['data_size']
1686
+ ainfo['chksm'] = brec['checksum'] if brec['checksum'] else ''
1687
+ ainfo['note'] = brec['note']
1688
+ ainfo['types'][type] = brec
1689
+ ainfo['btype'] = type
1690
+ if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1
1691
+ ainfo['present'] += 1
1692
+ if ainfo['archcnt'] == 0:
1693
+ self.pglog("{}: unknown archive action {}".format(lfile, act), self.PGOPT['extlog'])
1694
+ return ainfo # always returns a hash reference for archiving info
1695
+
1696
+ # build up data note based on temporal info, keep the begin timestamp# for existing record; change end timestamp only if new data added
1697
+ # return None if no change for existing note
1698
+ def build_data_note(self, onote, lfile, locrec, tempinfo):
1699
+ note = locrec['note']
1700
+ if not note: return onote
1701
+ seps = self.params['PD']
1702
+ match = "[^{}]+".format(seps[1])
1703
+ edate = tempinfo['edate']
1704
+ ehour = tempinfo['ehour']
1705
+ if note[0] == '!': # executable for build up data note
1706
+ cmd = self.executable_command(1, None, None, edate)
1707
+ if not cmd: return 0
1708
+ return self.pgsystem(cmd, self.PGOPT['emllog'], 21)
1709
+ # repalce generic patterns first
1710
+ note = self.replace_pattern(note, None) # replace generic patterns first
1711
+ # get temporal patterns
1712
+ patterns = re.findall(r'{}({}){}'.format(seps[0], match, seps[1]), note)
1713
+ pcnt = len(patterns)
1714
+ if pcnt == 0: return note # no pattern temporal matches
1715
+ if pcnt > 2:
1716
+ self.pglog("{}-{}: TOO many ({}) temporal patterns".format(lfile, note, pcnt), self.PGOPT['emllog'])
1717
+ return onote
1718
+ if pcnt == 2: # replace start time
1719
+ if onote: # get start time from existing note
1720
+ replace = r"{}{}{}".format(seps[0], patterns[0], seps[1])
1721
+ ms = re.match(r'^(.*){}(.*){}'.format(replace, self.params['PD'][0]), note)
1722
+ if ms:
1723
+ init = ms.group(1)
1724
+ sp = ms.group(2)
1725
+ ms = re.search(r'{}(.+){}'.format(init, sp), onote)
1726
+ if ms:
1727
+ sdate = ms.group(1)
1728
+ note = re.sub(replace, sdate, note, 1)
1729
+ elif tempinfo['FQ']: # get start time
1730
+ (sdate, shour) = self.addfrequency(edate, ehour, tempinfo['FQ'], 0)
1731
+ note = self.replace_pattern(note, sdate, shour, None, 1)
1732
+ return self.replace_pattern(note, edate, ehour) # repalce end time now
1733
+
1734
+ # get data file status info
1735
+ def file_status_info(self, lfile, rfile, tempinfo):
1736
+ # check and cache new data info
1737
+ finfo = self.check_local_file(lfile, 33, self.PGOPT['wrnlog']) # 33 = 1 + 32
1738
+ if not finfo:
1739
+ self.PGOPT['chksm'] = ''
1740
+ self.PGOPT['fsize'] = 0
1741
+ return
1742
+ fdate = finfo['date_modified']
1743
+ ftime = finfo['time_modified']
1744
+ fhour = None
1745
+ ms = re.match(r'^(\d+):', ftime)
1746
+ if ms: four = int(ms.group(1))
1747
+ self.PGOPT['fsize'] = finfo['data_size']
1748
+ self.PGOPT['chksm'] = finfo['checksum']
1749
+ if rfile and lfile != rfile:
1750
+ finfo = self.check_local_file(rfile, 1, self.PGOPT['wrnlog'])
1751
+ if finfo and self.cmptime(finfo['date_modified'], finfo['time_modified'], fdate, ftime) < 0:
1752
+ fdate = finfo['date_modified']
1753
+ ftime = finfo['time_modified']
1754
+ ms = re.match(r'^(\d+):', ftime)
1755
+ if ms: four = int(ms.group(1))
1756
+ self.PGOPT['fdate'] = fdate
1757
+ self.PGOPT['ftime'] = ftime
1758
+ self.PGOPT['fhour'] = fhour
1759
+ if 'RE' in self.params: # reset end data/time/hour
1760
+ if tempinfo['NX']:
1761
+ if tempinfo['NX'][3]:
1762
+ (fdate, fhour) = self.adddatehour(fdate, fhour, -tempinfo['NX'][0], -tempinfo['NX'][1],
1763
+ -tempinfo['NX'][2], -tempinfo['NX'][3])
1764
+ else:
1765
+ fdate = self.adddate(fdate, -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2])
1766
+ while True:
1767
+ (edate, ehour) = self.addfrequency(tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'], 1)
1768
+ edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6])
1769
+ if self.diffdatehour(edate, ehour, fdate, fhour) > 0: break
1770
+ tempinfo['edate'] = edate
1771
+ tempinfo['ehour'] = ehour
1772
+
1773
+ # check if a Server file is aged enough for download# return 1 if valid, 0 if not aged enough, -1 if cannot check
1774
+ def check_agetime(self, dcmd, sfile, atime):
1775
+ info = self.check_server_file(dcmd, 1)
1776
+ if not info:
1777
+ sact = self.get_download_action(dcmd)
1778
+ (stat, derr) = self.parse_download_error(self.PGOPT['STATUS'], sact)
1779
+ self.PGOPT['STATUS'] = derr
1780
+ self.pglog("{}: cannot check file age\n{}".format(sfile, self.PGOPT['STATUS']), self.PGOPT['emlerr'])
1781
+ return stat
1782
+ ahour = None
1783
+ if atime[3]:
1784
+ ms = re.match(r'^(\d+):', info['time_modified'])
1785
+ if ms: ahour = int(ms.group(1))
1786
+ (adate, ahour) = self.adddatehour(info['date_modified'], ahour, atime[0], atime[1], atime[2], atime[3])
1787
+ if self.diffdatehour(self.params['CD'], self.params['CH'], adate, ahour) >= 0:
1788
+ return 1
1789
+ if ahour is None:
1790
+ self.pglog(("{}: original {} file ready by {}\n".format(sfile, info['ftype'], info['date_modified']) +
1791
+ "but NOT aged enough for retrieving yet by " + self.params['CD']), self.PGOPT['emllog'])
1792
+ else:
1793
+ self.pglog(("{}: original {} file ready by {}:{:02}\n".format(sfile, info['ftype'], info['date_modified'], ahour) +
1794
+ "but NOT aged enough for retrieving yet by {}:{:02}".format(self.params['CD'], self.params['CH'])), self.PGOPT['emllog'])
1795
+ return 0 # otherwise server file is not aged enough
1796
+
1797
+ # check if a Server file is changed with different size# return 1 - file changed, 2 - new file retrieved, 3 - force redlownload,
1798
+ # 0 - no change , -1 - error check, -2 - cannot check
1799
+ def check_newer_file(self, dcmd, cfile, ainfo):
1800
+ if cfile:
1801
+ finfo = self.check_local_file(cfile, 33, self.PGOPT['wrnlog'])
1802
+ if not finfo: return 3 # download if can not check newer
1803
+ else:
1804
+ finfo = {'isfile': 0, 'checksum': ainfo['chksm'], 'data_size': ainfo['asize'],
1805
+ 'date_modified': ainfo['adate'], 'time_modified': ainfo['atime']}
1806
+ cinfo = self.check_server_file(dcmd, 33, cfile)
1807
+ if not cinfo:
1808
+ sact = self.get_download_action(dcmd)
1809
+ (stat, derr) = self.parse_download_error(self.PGOPT['STATUS'], sact)
1810
+ self.PGOPT['STATUS'] = derr
1811
+ return stat
1812
+ stat = 2 if cinfo['ftype'] == "WGET" else 1
1813
+ if finfo['isfile'] and cfile == cinfo['fname'] and finfo['data_size'] and cinfo['data_size'] and cinfo['data_size'] != finfo['data_size']:
1814
+ return stat
1815
+ self.PGOPT['STATUS'] = ''
1816
+ if (finfo['data_size'] != cinfo['data_size'] or 'checksum' not in cinfo or
1817
+ 'checksum' not in finfo or finfo['checksum'] != cinfo['checksum']):
1818
+ if 'HO' in self.params and cinfo['ftype'] == "FTP":
1819
+ (cdate, ctime) = self.addhour(cinfo['date_modified'], cinfo['time_modified'], -self.params['HO'][0])
1820
+ else:
1821
+ cdate = cinfo['date_modified']
1822
+ ctime = cinfo['time_modified']
1823
+ if self.cmptime(cdate, ctime, finfo['date_modified'], finfo['time_modified']) > 0:
1824
+ msg = "{} Newer {} {}: {} {} {}".format(self.params['DS'], cinfo['ftype'], cinfo['fname'], cdate, ctime, cinfo['data_size'])
1825
+ if 'checksum' in cinfo: msg += " " + cinfo['checksum']
1826
+ msg += "; {}: ".format(cfile if cfile else "archived")
1827
+ msg += "{} {} {}".format(finfo['date_modified'], finfo['time_modified'], finfo['data_size'])
1828
+ if 'checksum' in finfo: msg += " " + finfo['checksum']
1829
+ self.pglog(msg, self.PGOPT['wrnlog'])
1830
+ return stat
1831
+ if 'adate' in ainfo:
1832
+ self.PGOPT['STATUS'] = "archived: {} {}".format(ainfo['adate'], ainfo['atime'])
1833
+ elif cfile:
1834
+ self.PGOPT['STATUS'] += "local copy timestamp: {} {}".format(finfo['date_modified'], finfo['time_modified'])
1835
+ if 'note' in cinfo:
1836
+ self.PGOPT['STATUS'] += "\n" + cinfo['note']
1837
+ return 0
1838
+
1839
+ # get download action name
1840
+ def get_download_action(self, dcmd):
1841
+ if not dcmd: return "download"
1842
+ dact = "DOWNLOAD"
1843
+ ms = re.search(r'(^|\S\/)tar\s+-(\w+)\s', dcmd)
1844
+ if ms:
1845
+ taropt = ms.group(2)
1846
+ dact = "UNTAR" if taropt.find('x') > -1 else "TAR"
1847
+ else:
1848
+ ms = re.match(r'^\s*(\S+)', dcmd)
1849
+ if ms:
1850
+ dact = op.basename(ms.group(1))
1851
+ if dact == "wc":
1852
+ ms = re.search(r'\|\s*(\S+)', dcmd)
1853
+ if ms: dact = op.basename(ms.group(1))
1854
+ return dact
1855
+
1856
+ # change to working directory if not there yet
1857
+ def change_workdir(self, wdir, locinfo, edate, ehour, FQ):
1858
+ if 'WD' in self.params and self.params['WD'][0]: wdir = self.params['WD'][0]
1859
+ if not wdir:
1860
+ return self.pglog(locinfo + ": MISS working directory", self.PGOPT['emlerr'])
1861
+ else:
1862
+ wdir = self.replace_environments(wdir)
1863
+ wdir = self.replace_pattern(wdir, edate, ehour, FQ)
1864
+ if not self.change_local_directory(wdir, self.PGOPT['emllog']): return 0
1865
+ return 1
1866
+
1867
+ # clean the working copies of remote and local files/directories
1868
+ def clean_files(self, cleancmd, edate, ehour, lfiles, rfiles, freq):
1869
+ lfile = ' '.join(lfiles) if lfiles else ''
1870
+ cleancmd = self.replace_pattern(cleancmd, edate, ehour, freq)
1871
+ cleancmd = self.executable_command(cleancmd, lfile, None, None, None, rfiles)
1872
+ self.PGLOG['ERR2STD'] = [self.PGLOG['MISSFILE']]
1873
+ self.pgsystem(cleancmd, self.PGOPT['emllog'], 5)
1874
+ self.PGLOG['ERR2STD'] = []
1875
+
1876
+ # clean files rematching pattern on given date/hour
1877
+ def clean_older_files(self, cleancmd, workdir, locinfo, edate, locfile, rmtrecs, rcnt, tempinfo):
1878
+ rfiles = None
1879
+ lfiles = self.get_local_names(locfile, tempinfo, edate)
1880
+ self.change_workdir(workdir, locinfo, edate, tempinfo['ehour'], tempinfo['FQ'])
1881
+ if rcnt and cleancmd.find(' -RF') > 0:
1882
+ rfiles = self.get_all_remote_files(rmtrecs, rcnt, tempinfo, edate)
1883
+ self.clean_files(cleancmd, edate, tempinfo['ehour'], lfiles, rfiles, tempinfo['FQ'])
1884
+
1885
+ # get all remote file names for one update period
1886
+ def get_all_remote_files(self, rmtrecs, rcnt, tempinfo, edate):
1887
+ rfiles = []
1888
+ for i in range(rcnt): # processs each remote record
1889
+ rmtrec = self.onerecord(rmtrecs, i)
1890
+ file = rmtrec['remotefile']
1891
+ if not file: continue
1892
+ files = self.get_remote_names(file, rmtrec, file, tempinfo, edate)
1893
+ if files: rfiles.extend(files)
1894
+ return rfiles
1895
+
1896
+ # check remote file status and sed email to specialist for irregular update cases
1897
+ def check_dataset_status(self):
1898
+ if 'CD' in self.params:
1899
+ self.params['CD'] = self.format_date(self.params['CD']) # standard format in case not yet
1900
+ else:
1901
+ self.params['CD'] = self.curdate() # default to current date
1902
+ condition = "specialist = '{}'".format(self.params['LN'])
1903
+ if 'ED' not in self.params: condition += " AND enddate < '{}'".format(self.params['CD'])
1904
+ if 'DS' in self.params: condition += " AND dsid = '{}'".format(self.params['DS'])
1905
+ s = self.file_condition('dlupdt', ('L' if 'LI' in self.params else "FIXA"), None, 1)
1906
+ if s: condition += " AND " + s
1907
+ condition += " ORDER BY dsid, execorder, lindex"
1908
+ locrecs = self.pgmget("dlupdt", "*", condition, self.PGOPT['extlog'])
1909
+ loccnt = len(locrecs['locfile']) if locrecs else 0
1910
+ if not loccnt: return self.pglog("No Update record found for checking update status on {} for '{}'".format(self.params['CD'], self.params['LN']), self.PGOPT['wrnlog'])
1911
+ s = "s" if loccnt > 1 else ""
1912
+ self.pglog("Check {} record{} for update status...".format(loccnt, s), self.PGOPT['wrnlog'])
1913
+ for i in range(loccnt):
1914
+ locrec = self.onerecord(locrecs, i)
1915
+ if loccnt == 1 and 'LI' in self.params and 'LF' in self.params and len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']:
1916
+ locrec['locfile'] = self.params['LF'][0]
1917
+ self.check_locfile_status(locrec)
1918
+ if self.PGOPT['lcnt'] or self.PGLOG['ERRMSG']:
1919
+ if self.PGOPT['lcnt']:
1920
+ loccnt = self.PGOPT['lcnt']
1921
+ s = "s" if (loccnt > 1) else ""
1922
+ self.SUBJECT = "DSUPDT Status of {} update record{}".format(loccnt, s)
1923
+ if 'DS' in self.params: self.SUBJECT += " for {}".format(self.params['DS'])
1924
+ self.TOPMSG = " ready for update of {} local file{}".format(loccnt, s)
1925
+ s = "s" if (self.PGOPT['rcnt'] > 1) else ""
1926
+ self.TOPMSG = "{}/{} remote{}{}".format(self.PGOPT['ucnt'], self.PGOPT['rcnt'], s, self.TOPMSG)
1927
+ else:
1928
+ self.pglog("No local file ready for checking {} on {} for {}".format(self.SUBJECT, self.params['CD'], self.params['LN']), self.PGOPT['wrnlog'])
1929
+ self.SUBJECT = self.TOPMSG = None
1930
+ if self.PGOPT['UCNTL']:
1931
+ self.reset_control_time()
1932
+ if self.SUBJECT: self.SUBJECT += "-C{}".format(self.PGOPT['UCNTL']['cindex'])
1933
+
1934
+ # check update status for a given local file
1935
+ def check_locfile_status(self, locrec):
1936
+ loccnd = "lindex = {}".format(locrec['lindex'])
1937
+ lfile = locrec['locfile']
1938
+ locinfo = "{}-L{}".format(locrec['dsid'], locrec['lindex'])
1939
+ if not lfile: return self.pglog(locinfo + ": local file name NOT specified", self.PGOPT['emlerr'])
1940
+ locinfo += "-" + lfile
1941
+ tempinfo = self.get_tempinfo(locrec, locinfo, 0)
1942
+ if not tempinfo: return 0 # simply return if miss temporal info for update
1943
+ rmtcnd = loccnd
1944
+ rcnd = self.file_condition('drupdt', ('D' if 'DO' in self.params else "RS"), None, 1)
1945
+ if rcnd: rmtcnd += " AND " + rcnd
1946
+ rmtrecs = self.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", self.PGOPT['extlog'])
1947
+ rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0
1948
+ if rcnt == 0:
1949
+ if rcnd and self.pgget("drupdt", "", loccnd):
1950
+ return self.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), self.PGOPT['emlerr'])
1951
+ rcnt = 1 # create a empty record remote file
1952
+ rmtrecs = {'lindex': locrec['lindex'], 'remotefile': None, 'serverfile': None}
1953
+ if rcnt == 1:
1954
+ if 'RF' in self.params and len(self.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and self.params['RF'][0] == rmtrecs['remotefile'][0]):
1955
+ rmtrecs['remotefile'][0] = self.params['RF'][0]
1956
+ if 'SF' in self.params and len(self.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and self.params['SF'][0] == rmtrecs['serverfile'][0]):
1957
+ rmtrecs['serverfile'][0] = self.params['SF'][0]
1958
+ ecnt = len(tempinfo['ED'])
1959
+ self.PGOPT['lindex'] = locrec['lindex']
1960
+ logact = self.PGOPT['emllog']
1961
+ retcnt = 0
1962
+ for i in range(ecnt):
1963
+ if self.ALLCNT > 1 and i > 0:
1964
+ tempinfo = self.get_tempinfo(locrec, locinfo, i)
1965
+ if not tempinfo: break
1966
+ edate = tempinfo['ED'][0]
1967
+ ehour = tempinfo['EH'][0]
1968
+ else:
1969
+ edate = tempinfo['ED'][i]
1970
+ ehour = tempinfo['EH'][i]
1971
+ tempinfo['edate'] = edate
1972
+ if ehour != None:
1973
+ tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour)
1974
+ tempinfo['ehour'] = ehour
1975
+ else:
1976
+ tempinfo['einfo'] = "end data date {}".format(edate)
1977
+ tempinfo['ehour'] = None
1978
+ if 'GZ' in self.params: tempinfo['einfo'] += "(UTC)"
1979
+ lfile = self.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ'])
1980
+ locinfo = "{}-L{}-{}".format(locrec['dsid'], locrec['lindex'], lfile)
1981
+ self.pglog("{}: Check Update Status for {}".format(locinfo, tempinfo['einfo']), logact)
1982
+ logact = self.PGOPT['emlsep']
1983
+ self.PGOPT['lcnt'] += 1
1984
+ j = 0
1985
+ while j < rcnt: # check each remote record, stop checking if error
1986
+ pgrec = self.onerecord(rmtrecs, j)
1987
+ if not self.check_remote_status(pgrec, lfile, locrec, locinfo, tempinfo) and 'CA' not in self.params:
1988
+ break
1989
+ j += 1
1990
+ if j == 0: break
1991
+ self.PGOPT['lindex'] = 0
1992
+ return (1 if retcnt > 0 else 0)
1993
+
1994
+ # check update status for given remote file
1995
+ def check_remote_status(self, rmtrec, lfile, locrec, locinfo, tempinfo):
1996
+ rfile = rmtrec['remotefile']
1997
+ rmtinfo = locinfo
1998
+ if not rfile:
1999
+ rfile = lfile
2000
+ rcnt = 1
2001
+ if rfile != locrec['locfile']: rmtinfo += "-" + rfile
2002
+ tempinfo['DC'] = (self.params['DC'][0] if ('DC' in self.params and self.params['DC'][0]) else
2003
+ (rmtrec['download'] if rmtrec['download'] else locrec['download']))
2004
+ rfiles = self.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo)
2005
+ rcnt = len(rfiles) if rfiles else 0
2006
+ if not rcnt: return self.pglog(rmtinfo + ": NO remote file name identified", self.PGOPT['emlerr'])
2007
+ self.PGOPT['rcnt'] += rcnt # accumulate remote file counts
2008
+ if tempinfo['DC']:
2009
+ self.PGOPT['PCNT'] = self.count_pattern_path(tempinfo['DC'])
2010
+ tempinfo['DC'] = None
2011
+ sfile = rmtrec['serverfile']
2012
+ if sfile and sfile != rfile:
2013
+ sfiles = self.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo)
2014
+ scnt = len(sfiles) if sfiles else 0
2015
+ if scnt != rcnt:
2016
+ self.PGOPT['rstat'] = -2
2017
+ return self.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), self.PGOPT['emlerr'])
2018
+ else:
2019
+ sfiles = rfiles
2020
+ scnt = rcnt
2021
+ dcnt = 0
2022
+ for i in range(rcnt):
2023
+ rmtinfo = locinfo
2024
+ rfile = rfiles[i]
2025
+ if rfile['fname'] != lfile: rmtinfo += "-" + rfile['fname']
2026
+ sfile = sfiles[i]
2027
+ if sfile['fname'] != rfile['fname']: rmtinfo += "-" + sfile['fname']
2028
+ rcmd = rfile['rcmd']
2029
+ if not rcmd:
2030
+ return self.pglog(rmtinfo + ": Missing download command", self.PGOPT['emlerr'])
2031
+ elif not sfile['ready']:
2032
+ self.pglog(rmtinfo + ": NOT Ready yet for update", self.PGOPT['emllog'])
2033
+ break
2034
+ dcnt += 1
2035
+ return 1 if dcnt else 0
2036
+
2037
+ # process the update control records
2038
+ def process_update_controls(self):
2039
+ ctime = self.curtime(1)
2040
+ if not ('CI' in self.params or 'DS' in self.params):
2041
+ self.set_default_value("SN", self.params['LN'])
2042
+ condition = ("(pid = 0 OR lockhost = '{}') AND cntltime <= '{}'".format(self.PGLOG['HOSTNAME'], ctime) +
2043
+ self.self.get_hash_condition('dcupdt') + " ORDER BY hostname DESC, cntltime")
2044
+ pgrecs = self.pgmget("dcupdt", "*", condition, self.PGOPT['extlog'])
2045
+ self.ALLCNT = len(pgrecs['cindex']) if pgrecs else 0
2046
+ if self.ALLCNT == 0:
2047
+ return self.pglog("No update control record idetified due for process", self.LOGWRN)
2048
+ s = 's' if self.ALLCNT > 1 else ''
2049
+ self.pglog("Process {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG)
2050
+ pcnt = 0
2051
+ for i in range(self.ALLCNT):
2052
+ pcnt += self.process_one_control(self.onerecord(pgrecs, i))
2053
+ if pcnt > 1 and not ('CI' in self.params or 'DS' in self.params): break
2054
+ rmsg = "{} of {} update control{} reprocessed by {}".format(pcnt, self.ALLCNT, s, self.PGLOG['CURUID'])
2055
+ if self.PGLOG['CURUID'] != self.params['LN']: rmsg += " for " + self.params['LN']
2056
+ self.pglog(rmsg, self.PGOPT['wrnlog'])
2057
+
2058
+ # process one update control
2059
+ def process_one_control(self, pgrec):
2060
+ cidx = pgrec['cindex']
2061
+ cstr = "Control Index {}".format(cidx)
2062
+ if not pgrec['action']: return self.pglog(cstr + ": Miss update action", self.PGOPT['errlog'])
2063
+ if not (self.OPTS[pgrec['action']][0]&self.PGOPT['CNTLACTS']):
2064
+ return self.pglog("{}: Invalid dsupdt action '{}'".format(cstr, pgrec['action']), self.PGOPT['errlog'])
2065
+ if not pgrec['frequency']: return self.pglog(cstr + ": Miss update Frequency", self.PGOPT['errlog'])
2066
+ if pgrec['pid'] > 0 and self.check_process(pgrec['pid']):
2067
+ if 'CI' in self.params: self.pglog("{}: Under processing {}/{}".format(cstr, pgrec['pid'], self.PGLOG['HOSTNAME']), self.PGOPT['wrnlog'])
2068
+ return 0
2069
+ if pgrec['specialist'] != self.params['LN']:
2070
+ return self.pglog("{}: must be specialist '{}' to process".format(cstr, pgrec['specialist']), self.PGOPT['errlog'])
2071
+ if not ('ED' in self.params or self.valid_data_time(pgrec, cstr, self.PGOPT['wrnlog'])):
2072
+ return 0
2073
+ cmd = "dsupdt "
2074
+ if pgrec['dsid']: cmd += pgrec['dsid'] + ' '
2075
+ cmd += "{} -CI {} ".format(pgrec['action'], cidx)
2076
+ if self.PGLOG['CURUID'] != self.params['LN']: cmd += "-LN " + self.params['LN']
2077
+ cmd += "-d -b"
2078
+ # make sure it is not locked
2079
+ if self.lock_update_control(cidx, 0, self.PGOPT['errlog']) <= 0: return 0
2080
+ self.pglog("{}-{}{}: {}".format(self.PGLOG['HOSTNAME'], pgrec['specialist'], self.current_datetime(), cmd), self.LOGWRN|self.FRCLOG)
2081
+ os.system(cmd + " &")
2082
+ return 1
2083
+
2084
+ # move the previous archived version controlled files
2085
+ def move_archived_file(self, ainfo, archived):
2086
+ stat = 0
2087
+ if 'wfile' in ainfo:
2088
+ type = ainfo['wtype']
2089
+ pgrec = ainfo['types'][type]
2090
+ if pgrec and pgrec['vindex']:
2091
+ tofile = fromfile = ainfo['wfile']
2092
+ ftype = "Web"
2093
+ ttype = " Saved"
2094
+ i = 0
2095
+ while True: # create tofile name
2096
+ if i > 0: tofile = "{}.vbu{}".format(fromfile, i)
2097
+ if not self.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(self.params['DS'], tofile), self.PGOPT['extlog']):
2098
+ break
2099
+ i += 1
2100
+ stat = self.pgsystem("dsarch {} MV -WF {} -WT {} -SF {} -ST V -KM -TS".format(self.params['DS'], fromfile, type, tofile), self.PGOPT['emerol'], 5)
2101
+ if stat == 0 and ainfo['sfile']:
2102
+ type = ainfo['stype']
2103
+ pgrec = ainfo['types'][type]
2104
+ if pgrec and pgrec['vindex']:
2105
+ fromfile = ainfo['sfile']
2106
+ ftype = "Saved"
2107
+ ttype = ''
2108
+ i = 0
2109
+ while True: # create tofile name
2110
+ tofile = "{}.vbu{}".format(fromfile, i)
2111
+ if not self.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(self.params['DS'], tofile), self.PGOPT['extlog']):
2112
+ break
2113
+ i += 1
2114
+ stat = self.pgsystem("dsarch {} MV -RF {} -OT {} -SF {} -ST V".format(self.params['DS'], fromfile, type, tofile), self.PGOPT['emerol'], 5)
2115
+ if stat:
2116
+ self.PGOPT['vcnt'] += 1
2117
+ if 'NE' in self.params or 'EE' in self.params:
2118
+ if 'NE' in self.params: del self.params['NE']
2119
+ if 'EE' in self.params: del self.params['EE']
2120
+ self.params['SE'] = 1 # email summary at least
2121
+ self.PGOPT['emllog'] |= self.EMEROL
2122
+ self.pglog("{}-{}-{}: Found newer version-conrolled {} file; move to{} type V {}".format(self.params['DS'], type, fromfile, ftype, ttype, tofile), self.PGOPT['emlsum'])
2123
+ archived = 0
2124
+ return archived
2125
+
2126
+ # main function to excecute this script
2127
+ def main():
2128
+ object = DsUpdt()
2129
+ object.read_parameters()
2130
+ object.start_actions()
2131
+ object.pgexit(0)
2132
+
2133
+ # call main() to start program
2134
+ if __name__ == "__main__": main()