rda-python-common 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1534 @@
1
+ #
2
+ ###############################################################################
3
+ #
4
+ # Title : pg_util.py -- module for misc utilities.
5
+ # Author : Zaihua Ji, zji@ucar.edu
6
+ # Date : 07/27/2020
7
+ # 2025-01-10 transferred to package rda_python_common from
8
+ # https://github.com/NCAR/rda-shared-libraries.git
9
+ # 2025-11-20 convert to class PgUtil
10
+ # Purpose : python library module for global misc utilities
11
+ #
12
+ # Github : https://github.com/NCAR/rda-python-common.git
13
+ #
14
+ ###############################################################################
15
+ #
16
+ import os
17
+ import re
18
+ import time
19
+ import datetime
20
+ import calendar
21
+ import glob
22
+ from os import path as op
23
+ from .pg_log import PgLOG
24
+
25
+ class PgUtil(PgLOG):
26
+
27
+ def __init__(self):
28
+ super().__init__() # initialize parent class
29
+ self.DATEFMTS = {
30
+ 'C' : '(CC|C)', # century
31
+ 'Y' : '(YYYY|YY00|YYY|YY|YEAR|YR|Y)', # YYY means decade
32
+ 'Q' : '(QQ|Q)', # quarter
33
+ 'M' : '(Month|Mon|MM|M)', # numeric or string month
34
+ 'W' : '(Week|Www|W)', # string or numeric weedday
35
+ 'D' : '(DDD|DD|D)', # days in year or month
36
+ 'H' : '(HHH|HH|H)', # hours in month or day
37
+ 'N' : '(NNNN|NN|N)', # minutes in day or hour
38
+ 'S' : '(SSSS|SS|S)' # seconds in hour or minute
39
+ }
40
+ self.MONTHS = [
41
+ "january", "february", "march", "april", "may", "june",
42
+ "july", "august", "september", "october", "november", "december"
43
+ ]
44
+ self.MNS = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]
45
+ self.WDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"]
46
+ self.WDS = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"]
47
+ self.MDAYS = [365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
48
+
49
+ # dt: optional given date in format of "YYYY-MM-DD"
50
+ # return weekday: 0 - Sunday, 1 - Monday, ..., 6 - Saturday
51
+ def get_weekday(self, date = None):
52
+ if date is None:
53
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
54
+ else:
55
+ ct = time.strptime(str(date), "%Y-%m-%d")
56
+ return (ct[6]+1)%7
57
+
58
+ # mn: given month string like "Jan" or "January", or numeric number 1 to 12
59
+ # Return: numeric Month if not fmt (default); three-charater or full month names for given fmt
60
+ def get_month(self, mn, fmt = None):
61
+ if not isinstance(mn, int):
62
+ if re.match(r'^\d+$', mn):
63
+ mn = int(mn)
64
+ else:
65
+ for m in range(12):
66
+ if re.match(mn, self.MONTHS[m], re.I):
67
+ mn = m + 1
68
+ break
69
+ if fmt and mn > 0 and mn < 13:
70
+ slen = len(fmt)
71
+ if slen == 2:
72
+ smn = "{:02}".format(mn)
73
+ elif re.match(r'^mon', fmt, re.I):
74
+ smn = self.MNS[mn-1] if slen == 3 else self.MONTHS[mn-1]
75
+ if re.match(r'^Mon', fmt):
76
+ smn = smn.capitalize()
77
+ elif re.match(r'^MON', fmt):
78
+ smn = smn.upper()
79
+ else:
80
+ smn = str(mn)
81
+ return smn
82
+ else:
83
+ return mn
84
+
85
+ # wday: given weekday string like "Sun" or "Sunday", or numeric number 0 to 6
86
+ # Return: numeric Weekday if !fmt (default); three-charater or full week name for given fmt
87
+ def get_wday(self, wday, fmt = None):
88
+ if not isinstance(wday, int):
89
+ if re.match(r'^\d+$', wday):
90
+ wday = int(wday)
91
+ else:
92
+ for w in range(7):
93
+ if re.match(wday, self.WDAYS[w], re.I):
94
+ wday = w
95
+ break
96
+ if fmt and wday >= 0 and wday <= 6:
97
+ slen = len(fmt)
98
+ if slen == 4:
99
+ swday = self.WDAYS[w]
100
+ if re.match(r'^We', fmt):
101
+ swday = swday.capitalize()
102
+ elif re.match(r'^WE', fmt):
103
+ swday = swday.upper()
104
+ elif slen == 3:
105
+ swday = self.WDS[wday]
106
+ if re.match(r'^Ww', fmt):
107
+ swday = swday.capitalize()
108
+ elif re.match(r'^WW', fmt):
109
+ swday = swday.upper()
110
+ else:
111
+ swday = str(wday)
112
+ return swday
113
+ else:
114
+ return wday
115
+
116
+ # file: given file name
117
+ # Return: type if given file name is a valid online file; '' otherwise
118
+ @staticmethod
119
+ def valid_online_file(file, type = None, exists = None):
120
+ if exists is None or exists:
121
+ if not op.exists(file): return '' # file does not exist
122
+ bname = op.basename(file)
123
+ if re.match(r'^,.*', bname): return '' # hidden file
124
+ if re.search(r'index\.(htm|html|shtml)$', bname, re.I): return '' # index file
125
+ if type and type != 'D': return type
126
+ if re.search(r'\.(doc|php|html|shtml)(\.|$)', bname, re.I): return '' # file with special extention
127
+ return 'D'
128
+
129
+ # Return: current time string in format of HH:MM:SS
130
+ def curtime(self, getdate = False):
131
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
132
+ fmt = "%Y-%m-%d %H:%M:%S" if getdate else "%H:%M:%S"
133
+ return time.strftime(fmt, ct)
134
+
135
+ # wrapper function of curtime(True) to get datetime in form of YYYY-MM-DD HH:NN:SS
136
+ def curdatetime(self):
137
+ return self.curtime(True)
138
+
139
+ # fmt: optional date format, defaults to YYYY-MM-DD
140
+ # Return: current (date, hour)
141
+ def curdatehour(self, fmt = None):
142
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
143
+ dt = self.fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct)
144
+ return [dt, ct[3]]
145
+
146
+ # tm: optional time in seconds since the Epoch
147
+ # Return: current date and time strings
148
+ def get_date_time(self, tm = None):
149
+ act = ct = None
150
+ if tm == None:
151
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
152
+ elif isinstance(tm, str):
153
+ act = tm.split(' ')
154
+ elif isinstance(tm, (int, float)):
155
+ ct = time.localtime(tm)
156
+ elif isinstance(tm, datetime.datetime):
157
+ act = str(tm).split(' ')
158
+ elif isinstance(tm, datetime.date):
159
+ act = [str(tm), '00:00:00']
160
+ elif isinstance(tm, datetime.time):
161
+ act = [None, str(tm)]
162
+ if ct == None:
163
+ return act if act else None
164
+ else:
165
+ return [time.strftime("%Y-%m-%d", ct), time.strftime("%H:%M:%S", ct)]
166
+
167
+ # tm: optional time in seconds since the Epoch
168
+ # Return: current datetime strings
169
+ def get_datetime(self, tm = None):
170
+ if tm == None:
171
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
172
+ return time.strftime("%Y-%m-%d %H:%M:%S", ct)
173
+ elif isinstance(tm, str):
174
+ return tm
175
+ elif isinstance(tm, (int, float)):
176
+ ct = time.localtime(tm)
177
+ return time.strftime("%Y-%m-%d %H:%M:%S", ct)
178
+ elif isinstance(tm, datetime.datetime):
179
+ return str(tm)
180
+ elif isinstance(tm, datetime.date):
181
+ return (str(tm) + ' 00:00:00')
182
+ return tm
183
+
184
+ # file: file name, get curent timestamp if missed
185
+ # Return: timestsmp string in format of 'YYYYMMDDHHMMSS
186
+ def timestamp(self, file = None):
187
+ if file is None:
188
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
189
+ else:
190
+ mt = os.stat(file).st_mtime # file last modified time
191
+ ct = time.gmtime(mt) if self.PGLOG['GMTZ'] else time.localtime(mt)
192
+ return time.strftime("%Y%m%d%H%M%S", ct)
193
+
194
+ # dt: datetime string
195
+ # check date/time and set to default one if empty date
196
+ @staticmethod
197
+ def check_datetime(date, default):
198
+ if not date: return default
199
+ if not isinstance(date, str): date = str(date)
200
+ if re.match(r'^0000', date): return default
201
+ return date
202
+
203
+ # fmt: date format, default to "YYYY-MM-DD"
204
+ # Return: new formated current date string
205
+ def curdate(self, fmt = None):
206
+ ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime()
207
+ return self.fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct)
208
+
209
+ # check given string to identify temporal pattern and their units
210
+ # defined in (keys self.DATEFMTS)
211
+ def temporal_pattern_units(self, string, seps):
212
+ mkeys = ['D', 'Q', 'M', 'C', 'Y', 'H', 'N', 'S']
213
+ units = {}
214
+ match = seps[0] + "([^" + seps[1] + "]+)" + seps[1]
215
+ patterns = re.findall(match, string)
216
+ for pattern in patterns:
217
+ # skip generic pattern and current time
218
+ if re.match(r'^(P\d*|C.+C)$', pattern, re.I): continue
219
+ for mkey in mkeys:
220
+ ms = re.findall(self.DATEFMTS[mkey], pattern, re.I)
221
+ if ms:
222
+ if mkey == 'Q':
223
+ units[mkey] = 3
224
+ elif mkey == 'C':
225
+ units[mkey] = 100
226
+ else:
227
+ units[mkey] = 1
228
+ for m in ms:
229
+ pattern = pattern.replace(m, '', 1)
230
+ return units
231
+
232
+ # format output for given date and hour
233
+ def format_datehour(self, date, hour, tofmt = None, fromfmt = None):
234
+ if date:
235
+ datehour = self.format_date(str(date), tofmt, fromfmt)
236
+ elif tofmt:
237
+ datehour = tofmt
238
+ else:
239
+ datehour = ''
240
+ if hour != None:
241
+ if tofmt:
242
+ fmts = re.findall(self.DATEFMTS['H'], datehour, re.I)
243
+ for fmt in fmts:
244
+ if len(fmt) > 1:
245
+ shr = "{:02}".format(int(hour))
246
+ else:
247
+ shr = str(hour)
248
+ datehour = re.sub(fmt, shr, datehour, 1)
249
+ else:
250
+ datehour += " {:02}".format(int(hour))
251
+ return datehour
252
+
253
+ # split a date, time or datetime into an array according to
254
+ # the sep value; str to int for digital values
255
+ @staticmethod
256
+ def split_datetime(sdt, sep = r'\D'):
257
+ if not isinstance(sdt, str): sdt = str(sdt)
258
+ adt = re.split(sep, sdt)
259
+ acnt = len(adt)
260
+ for i in range(acnt):
261
+ if re.match(r'^\d+$', adt[i]): adt[i] = int(adt[i])
262
+ return adt
263
+
264
+ # date: given date in format of fromfmt
265
+ # tofmt: date formats; ex. "Month D, YYYY"
266
+ # fromfmt: date formats, default to YYYY-MM-DD
267
+ # Return: new formated date string according to tofmt
268
+ def format_date(self, cdate, tofmt = None, fromfmt = None):
269
+ if not cdate: return cdate
270
+ if not isinstance(cdate, str): cdate = str(cdate)
271
+ dates = [None, None, None]
272
+ sep = '|'
273
+ mns = sep.join(self.MNS)
274
+ months = sep.join(self.MONTHS)
275
+ mkeys = ['D', 'M', 'Q', 'Y', 'C', 'H']
276
+ PATTERNS = [r'(\d\d\d\d)', r'(\d+)', r'(\d\d)',
277
+ r'(\d\d\d)', '(' + mns + ')', '(' + months + ')']
278
+ if not fromfmt:
279
+ if not tofmt:
280
+ if re.match(r'^\d\d\d\d-\d\d-\d\d$', cdate): return cdate # no need formatting
281
+ ms = re.match(r'^\d+(\W)\d+(\W)\d+', cdate)
282
+ if ms:
283
+ fromfmt = "Y" + ms.group(1) + "M" + ms.group(2) + "D"
284
+ else:
285
+ self.pglog(cdate + ": Invalid date, should be in format YYYY-MM-DD", self.LGEREX)
286
+ pattern = fromfmt
287
+ fmts = {}
288
+ formats = {}
289
+ for mkey in mkeys:
290
+ ms = re.search(self.DATEFMTS[mkey], pattern, re.I)
291
+ if ms:
292
+ fmts[mkey] = ms.group(1)
293
+ pattern = re.sub(fmts[mkey], '', pattern)
294
+ cnt = 0
295
+ for mkey in fmts:
296
+ fmt = fmts[mkey]
297
+ i = len(fmt)
298
+ if mkey == 'D':
299
+ if i == 4: i = 1
300
+ elif mkey == 'M':
301
+ if i == 3: i = 4
302
+ elif mkey == 'Y':
303
+ if i == 4: i = 0
304
+ formats[fromfmt.find(fmt)] = fmt
305
+ fromfmt = fromfmt.replace(fmt, PATTERNS[i])
306
+ cnt += 1
307
+ ms = re.findall(fromfmt, cdate)
308
+ mcnt = len(ms[0]) if ms else 0
309
+ i = 0
310
+ for k in sorted(formats):
311
+ if i >= mcnt: break
312
+ fmt = formats[k]
313
+ val = ms[0][i]
314
+ if re.match(r'^Y', fmt, re.I):
315
+ dates[0] = int(val)
316
+ if len(fmt) == 3: dates[0] *= 10
317
+ elif re.match(r'^C', fmt, re.I):
318
+ dates[0] = 100 * int(val) # year at end of century
319
+ elif re.match(r'^M', fmt, re.I):
320
+ if re.match(r'^Mon', fmt, re.I):
321
+ dates[1] = self.get_month(val)
322
+ else:
323
+ dates[1] = int(val)
324
+ elif re.match(r'^Q', fmt, re.I):
325
+ dates[1] = 3 * int(val) # month at end of quarter
326
+ elif re.match(r'^H', fmt, re.I): # hour
327
+ dates.append(int(val))
328
+ else: # day
329
+ dates[2] = int(val)
330
+ i += 1
331
+ if len(dates) > 3:
332
+ cdate = self.fmtdatehour(dates[0], dates[1], dates[2], dates[3], tofmt)
333
+ else:
334
+ cdate = self.fmtdate(dates[0], dates[1], dates[2], tofmt)
335
+ return cdate
336
+
337
+ # yr: year value
338
+ # mn: month value, 1-12
339
+ # dy: day of the month
340
+ # hr: hour of the day
341
+ # nn: minute of the hour
342
+ # ss: second of the minute
343
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD HH:NN:SS"
344
+ # Return: new formated datehour string
345
+ def fmtdatetime(self, yr, mn, dy, hr = None, nn = None, ss = None, tofmt = None):
346
+ if not tofmt: tofmt = "YYYY-MM-DD HH:NN:SS"
347
+ tms = [ss, nn, hr, dy]
348
+ fks = ['S', 'N', 'H']
349
+ ups = [60, 60, 24]
350
+ # adjust second/minute/hour values out of range
351
+ for i in range(3):
352
+ if tms[i] != None and tms[i+1] != None:
353
+ if tms[i] < 0:
354
+ while tms[i] < 0:
355
+ tms[i] += ups[i]
356
+ tms[i+1] -= 1
357
+ elif tms[i] >= ups[i]:
358
+ while tms[i] >= ups[i]:
359
+ tms[i] -= ups[i]
360
+ tms[i+1] += 1
361
+ sdt = self.fmtdate(yr, mn, dy, tofmt)
362
+ # format second/minute/hour values
363
+ for i in range(3):
364
+ if tms[i] != None:
365
+ ms = re.search(self.DATEFMTS[fks[i]], sdt, re.I)
366
+ if ms:
367
+ fmt = ms.group(1)
368
+ if len(fmt) == 2:
369
+ str = "{:02}".format(tms[i])
370
+ else:
371
+ str = str(tms[i])
372
+ sdt = re.sub(fmt, str, sdt, 1)
373
+ return sdt
374
+
375
+ # yr: year value
376
+ # mn: month value, 1-12
377
+ # dy: day of the month
378
+ # hr: hour of the day
379
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD:HH"
380
+ # Return: new formated datehour string
381
+ def fmtdatehour(self, yr, mn, dy, hr, tofmt = None):
382
+ if not tofmt: tofmt = "YYYY-MM-DD:HH"
383
+ if hr != None and dy != None: # adjust hour value out of range
384
+ if hr < 0:
385
+ while hr < 0:
386
+ hr += 24
387
+ dy -= 1
388
+ elif hr > 23:
389
+ while hr > 23:
390
+ hr -= 24
391
+ dy += 1
392
+ datehour = self.fmtdate(yr, mn, dy, tofmt)
393
+ if hr != None:
394
+ ms = re.search(self.DATEFMTS['H'], datehour, re.I)
395
+ if ms:
396
+ fmt = ms.group(1)
397
+ if len(fmt) == 2:
398
+ shr = "{:02}".format(hr)
399
+ else:
400
+ shr = str(hr)
401
+ datehour = re.sub(fmt, shr, datehour, 1)
402
+ return datehour
403
+
404
+ # yr: year value
405
+ # mn: month value, 1-12
406
+ # dy: day of the month
407
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD"
408
+ # Return: new formated date string
409
+ def fmtdate(self, yr, mn, dy, tofmt = None):
410
+ (y, m, d) = self.adjust_ymd(yr, mn, dy)
411
+ if not tofmt or tofmt == 'YYYY-MM-DD': return "{}-{:02}-{:02}".format(y, m, d)
412
+ if dy != None:
413
+ md = re.search(self.DATEFMTS['D'], tofmt, re.I)
414
+ if md:
415
+ fmt = md.group(1) # day
416
+ slen = len(fmt)
417
+ if slen > 2: # days of the year
418
+ for i in range(1, m): d += self.MDAYS[i]
419
+ sdy = "{:03}".format(d)
420
+ elif slen == 2:
421
+ sdy = "{:02}".format(d)
422
+ else:
423
+ sdy = str(d)
424
+ tofmt = re.sub(fmt, sdy, tofmt, 1)
425
+ if mn != None:
426
+ md = re.search(self.DATEFMTS['M'], tofmt, re.I)
427
+ if md:
428
+ fmt = md.group(1) # month
429
+ slen = len(fmt)
430
+ if slen == 2:
431
+ smn = "{:02}".format(m)
432
+ elif re.match(r'^mon', fmt, re.I):
433
+ smn = self.MNS[m-1] if slen == 3 else self.MONTHS[m-1]
434
+ if re.match(r'^Mo', fmt):
435
+ smn = smn.capitalize()
436
+ elif re.match(r'^MO', fmt):
437
+ smn = smn.upper()
438
+ else:
439
+ smn = str(m)
440
+ tofmt = re.sub(fmt, smn, tofmt, 1)
441
+ else:
442
+ md = re.search(self.DATEFMTS['Q'], tofmt, re.I)
443
+ if md:
444
+ fmt = md.group(1) # quarter
445
+ m = int((m+2)/3)
446
+ smn = "{:02}".format(m) if len(fmt) == 2 else str(m)
447
+ tofmt = re.sub(fmt, smn, tofmt, 1)
448
+ if yr != None:
449
+ md = re.search(self.DATEFMTS['Y'], tofmt, re.I)
450
+ if md:
451
+ fmt = md.group(1) # year
452
+ slen = len(fmt)
453
+ if slen == 2:
454
+ syr = "{:02}".format(y%100)
455
+ elif slen == 3: # decade
456
+ if y > 999: y = int(y/10)
457
+ syr = "{:03}".format(y)
458
+ else:
459
+ if re.search(r'^YY00', fmt, re.I): y = 100*int(y/100) # hundred years
460
+ syr = "{:04}".format(y)
461
+ tofmt = re.sub(fmt, syr, tofmt, 1)
462
+ else:
463
+ md = re.search(self.DATEFMTS['C'], tofmt, re.I)
464
+ if md:
465
+ fmt = md.group(1) # century
466
+ slen = len(fmt)
467
+ if y > 999:
468
+ y = 1 + int(y/100)
469
+ elif y > 99:
470
+ y = 1 + int(yr/10)
471
+ syr = "{:02}".format(y)
472
+ tofmt = re.sub(fmt, syr, tofmt, 1)
473
+ return tofmt
474
+
475
+ # format given date and time into standard timestamp
476
+ @staticmethod
477
+ def join_datetime(sdate, stime):
478
+ if not sdate: return None
479
+ if not stime: stime = "00:00:00"
480
+ if not isinstance(sdate, str): sdate = str(sdate)
481
+ if not isinstance(stime, str): stime = str(stime)
482
+ if re.match(r'^\d:', stime): stime = '0' + stime
483
+ return "{} {}".format(sdate, stime)
484
+ fmttime = join_datetime
485
+
486
+ # split a date or datetime into an array of [date, time]
487
+ @staticmethod
488
+ def date_and_time(sdt):
489
+ if not sdt: return [None, None]
490
+ if not isinstance(sdt, str): sdt = str(sdt)
491
+ adt = re.split(' ', sdt)
492
+ acnt = len(adt)
493
+ if acnt == 1: adt.append('00:00:00')
494
+ return adt
495
+
496
+ # convert given date/time to unix epoch time; -1 if cannot
497
+ @staticmethod
498
+ def unixtime(stime):
499
+ pt = [0]*9
500
+ if not isinstance(stime, str): stime = str(stime)
501
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', stime)
502
+ if ms:
503
+ for i in range(3):
504
+ pt[i] = int(ms.group(i+1))
505
+ ms = re.search(r'^(\d+):(\d+):(\d+)$', stime)
506
+ if ms:
507
+ for i in range(3):
508
+ pt[i+3] = int(ms.group(i+1))
509
+ pt[8] = -1
510
+ return time.mktime(time.struct_time(pt))
511
+
512
+ # sdate: start date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD'
513
+ # edate: end date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD'
514
+ # Return: list of start and end dates in format of YYYY-MM-DD
515
+ def daterange(self, sdate, edate):
516
+ if sdate:
517
+ if not isinstance(sdate, str): sdate = str(sdate)
518
+ if not re.search(r'\d+-\d+-\d+', sdate):
519
+ ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', sdate)
520
+ if ms:
521
+ sdate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4))
522
+ else:
523
+ ms = re.match(r'^(\W*)(\d+)(\W*)$', sdate)
524
+ if ms:
525
+ sdate = "{}{}-01-01{}".format(ms.group(1), ms.group(2), ms.group(3))
526
+ if edate:
527
+ if not isinstance(edate, str): edate = str(edate)
528
+ if not re.search(r'\d+-\d+-\d+', edate):
529
+ ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', edate)
530
+ if ms:
531
+ edate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4))
532
+ edate = self.adddate(edate, 0, 1, -1)
533
+ else:
534
+ ms = re.match(r'^(\W*)(\d+)(\W*)$', edate)
535
+ if ms:
536
+ edate = "{}{}-12-31{}".format(ms.group(1), ms.group(2), ms.group(3))
537
+ return [sdate, edate]
538
+
539
+ # date to datetime range
540
+ @staticmethod
541
+ def dtrange(dates):
542
+ date = dates[0]
543
+ if date:
544
+ if not isinstance(date, str): date = str(date)
545
+ dates[0] = date + ' 00:00:00'
546
+ date = dates[1]
547
+ if date:
548
+ if not isinstance(date, str): date = str(date)
549
+ dates[1] = date + ' 23:59:59'
550
+ return dates
551
+
552
+ # sdate: starting date in format of 'YYYY-MM-DD'
553
+ # edate: ending date
554
+ # fmt: period format, ex. "YYYYMon-YYYMon", default to "YYYYMM-YYYYMM"
555
+ # Return: a string of formated period
556
+ def format_period(self, sdate, edate, fmt = None):
557
+ period = ''
558
+ if not fmt:
559
+ sfmt = efmt = "YYYYMM"
560
+ sep = '-'
561
+ else:
562
+ ms = re.match(r'^(.*)(\s*-\s*)(.*)$', fmt)
563
+ if ms:
564
+ (sfmt, sep, efmt) = ms.groups()
565
+ else:
566
+ sfmt = fmt
567
+ efmt = None
568
+ sep = ''
569
+ if sdate:
570
+ if not isinstance(sdate, str): sdate = str(sdate)
571
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', sdate)
572
+ if ms:
573
+ (yr, mn, dy) = ms.groups()
574
+ period = self.fmtdate(int(yr), int(mn), int(dy), sfmt)
575
+ if sep: period += sep
576
+ if efmt:
577
+ if re.search(r'current', efmt, re.I):
578
+ period += efmt
579
+ elif edate:
580
+ if not isinstance(edate, str): edate = str(edate)
581
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', edate)
582
+ if ms:
583
+ (yr, mn, dy) = ms.groups()
584
+ period += self.fmtdate(int(yr), int(mn), int(dy), efmt)
585
+ return period
586
+
587
+ # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
588
+ # newid: True to format a new dsid; defaults to False for now
589
+ # returns a new or old dsid according to the newid option
590
+ def format_dataset_id(self, dsid, newid = None, logact = None):
591
+ if newid is None: newid = self.PGLOG['NEWDSID']
592
+ if logact is None: logact = self.LGEREX
593
+ dsid = str(dsid)
594
+ ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid)
595
+ if ms:
596
+ ids = list(ms.groups())
597
+ if ids[0] not in self.PGLOG['DSIDCHRS']:
598
+ if logact: self.pglog("{}: dsid leading character must be '{}'".format(dsid, self.PGLOG['DSIDCHRS']), logact)
599
+ return dsid
600
+ if newid: return dsid
601
+ if ids[2][:2] != '00':
602
+ if logact: self.pglog(dsid + ": Cannot convert new dsid to old format", logact)
603
+ return dsid
604
+ return 'ds{}.{}'.format(ids[1], ids[2][2])
605
+ ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid, re.I)
606
+ if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid)
607
+ if ms:
608
+ if newid:
609
+ return "d{}00{}".format(ms.group(1), ms.group(3))
610
+ else:
611
+ return 'ds{}.{}'.format(ms.group(1), ms.group(3))
612
+ if logact: self.pglog(dsid + ": invalid dataset id", logact)
613
+ return dsid
614
+
615
+ # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
616
+ # newid: True to format a new dsid; defaults to False for now
617
+ # returns a new or old metadata dsid according to the newid option
618
+ def metadata_dataset_id(self, dsid, newid = None, logact = None):
619
+ if newid is None: newid = self.PGLOG['NEWDSID']
620
+ if logact is None: logact = self.LGEREX
621
+ ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid)
622
+ if ms:
623
+ ids = list(ms.groups())
624
+ if ids[0] not in self.PGLOG['DSIDCHRS']:
625
+ if logact: self.pglog("{}: dsid leading character must be '{}'".format(dsid, self.PGLOG['DSIDCHRS']), logact)
626
+ return dsid
627
+ if newid: return dsid
628
+ if ids[2][:2] != '00':
629
+ if logact: self.pglog(dsid + ": Cannot convert new dsid to old format", logact)
630
+ return dsid
631
+ return '{}.{}'.format(ids[1], ids[2][2])
632
+ ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid)
633
+ if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid)
634
+ if ms:
635
+ if newid:
636
+ return "d{}00{}".format(ms.group(1), ms.group(3))
637
+ else:
638
+ return '{}.{}'.format(ms.group(1), ms.group(3))
639
+ if logact: self.pglog(dsid + ": invalid dataset id", logact)
640
+ return dsid
641
+
642
+ # idstr: string holding a dsid in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
643
+ # and find it according to the flag value O (Old), N (New) or B (Both) formats
644
+ # returns dsid if found in given id string; None otherwise
645
+ def find_dataset_id(self, idstr, flag = 'B', logact = 0):
646
+ if flag in 'NB':
647
+ ms = re.search(r'(^|\W)(([a-z])\d{6})($|\D)', idstr)
648
+ if ms and ms.group(3) in self.PGLOG['DSIDCHRS']: return ms.group(2)
649
+ if flag in 'OB':
650
+ ms = re.search(r'(^|\W)(ds\d\d\d(\.|)\d)($|\D)', idstr)
651
+ if not ms: ms = re.search(r'(^|\W)(\d\d\d\.\d)($|\D)', idstr)
652
+ if ms: return ms.group(2)
653
+ if logact: self.pglog("{} : No valid dsid found for flag {}".format(idstr, flag), logact)
654
+ return None
655
+
656
+ # find and convert all found dsids according to old/new dsids
657
+ # for newid = False/True
658
+ def convert_dataset_ids(self, idstr, newid = None, logact = 0):
659
+ if newid is None: newid = self.PGLOG['NEWDSID']
660
+ flag = 'O' if newid else 'N'
661
+ cnt = 0
662
+ if idstr:
663
+ while True:
664
+ dsid = self.find_dataset_id(idstr, flag = flag)
665
+ if not dsid: break
666
+ ndsid = self.format_dataset_id(dsid, newid = newid, logact = logact)
667
+ if ndsid != dsid: idstr = idstr.replace(dsid, ndsid)
668
+ cnt += 1
669
+ return (idstr, cnt)
670
+
671
+ # records: dict of mutiple records,
672
+ # idx: index of the records to return
673
+ # Return: a dict to the idx record out of records
674
+ @staticmethod
675
+ def onerecord(records, idx):
676
+ record = {}
677
+ for fld in records:
678
+ record[fld] = records[fld][idx]
679
+ return record
680
+
681
+ # records: dict of mutiple records,
682
+ # record: record to add
683
+ # idx: index of the record to add
684
+ # Return: add a record to a dict of lists
685
+ @staticmethod
686
+ def addrecord(records, record, idx):
687
+ if records is None: records = {} # initialize dist of lists structure
688
+ if not records:
689
+ for key in record:
690
+ records[key] = []
691
+ for key in record:
692
+ slen = len(records[key])
693
+ if idx < slen:
694
+ records[key][idx] = record[key]
695
+ else:
696
+ while idx > slen:
697
+ records[key].append(None)
698
+ slen += 1
699
+ records[key].append(record[key])
700
+ return records
701
+
702
+ # convert a hash with multiple rows from pgmget() to an array of hashes
703
+ @staticmethod
704
+ def hash2array(hrecs, hkeys = None):
705
+ if not hkeys: hkeys = list(hrecs)
706
+ acnt = len(hrecs[hkeys[0]]) if hrecs and hkeys[0] in hrecs else 0
707
+ arecs = [None]*acnt
708
+ for i in range(acnt):
709
+ arec = {}
710
+ for hkey in hkeys: arec[hkey] = hrecs[hkey][i]
711
+ arecs[i] = arec
712
+ return arecs
713
+
714
+ # convert an array of hashes to a hash with multiple rows for pgmget()
715
+ @staticmethod
716
+ def array2hash(arecs, hkeys = None):
717
+ hrecs = {}
718
+ acnt = len(arecs) if arecs else 0
719
+ if acnt > 0:
720
+ if not hkeys: hkeys = list(arecs[0])
721
+ for hkey in hkeys:
722
+ hrecs[hkey] = [None]*acnt
723
+ for i in range(acnt): hrecs[hkey][i] = arecs[i][hkey]
724
+ return hrecs
725
+
726
+ # records: dict of mutiple records,
727
+ # opt: 0 - column count,
728
+ # 1 - row count,
729
+ # 2 - both
730
+ # Return: a single number or list of two dependend on given opt
731
+ @staticmethod
732
+ def hashcount(records, opt = 0):
733
+ ret = [0, 0]
734
+ if records:
735
+ clen = len(records)
736
+ if opt == 0 or opt == 2:
737
+ ret[0] = clen
738
+ if opt == 1 or opt == 2:
739
+ ret[1] = len(next(iter(records.values())))
740
+ return ret if opt == 2 else ret[opt]
741
+
742
+ # adict: dict a
743
+ # bdict: dict b
744
+ # default: default values if missed
745
+ # unique: unique join if set
746
+ # Return: the joined dict records with default value for missing ones
747
+ # For unique join, a record in bdict must not be contained in adict already
748
+ @staticmethod
749
+ def joinhash(adict, bdict, default = None, unique = None):
750
+ if not bdict: return adict
751
+ if not adict: return bdict
752
+ akeys = list(adict.keys())
753
+ bkeys = list(bdict.keys())
754
+ acnt = len(adict[akeys[0]])
755
+ bcnt = len(bdict[bkeys[0]])
756
+ ckeys = [] # common keys for unique joins
757
+ # check and assign default value for missing keys in adict
758
+ for bkey in bkeys:
759
+ if bkey in akeys:
760
+ if unique and bkey not in ckeys: ckeys.append(bkey)
761
+ else:
762
+ adict[bkey] = [default]*acnt
763
+ # check and assign default value for missing keys in bdict
764
+ for akey in akeys:
765
+ if akey in bkeys:
766
+ if unique and akey not in ckeys: ckeys.append(akey)
767
+ else:
768
+ bdict[akey] = [default]*bcnt
769
+ if unique: # append bdict
770
+ kcnt = len(ckeys)
771
+ for i in range(bcnt):
772
+ j = 0
773
+ while(j < acnt):
774
+ k = 0
775
+ for ckey in ckeys:
776
+ if PgUtil.pgcmp(adict[ckey][j], bdict[ckey][i]): break
777
+ k += 1
778
+ if k >= kcnt: break
779
+ j += 1
780
+
781
+ if j >= acnt:
782
+ for key in adict:
783
+ adict[key].append(bdict[key][i])
784
+ else:
785
+ for key in adict:
786
+ adict[key].extend(bdict[key])
787
+ return adict
788
+
789
+ # lst1: list 1
790
+ # lst2: list 2
791
+ # unique: unique join if set
792
+ # Return: the joined list
793
+ @staticmethod
794
+ def joinarray(lst1, lst2, unique = None):
795
+ if not lst2: return lst1
796
+ if not lst1: return lst2
797
+ cnt1 = len(lst1)
798
+ cnt2 = len(lst2)
799
+ if unique:
800
+ for i in (cnt2):
801
+ for j in (cnt1):
802
+ if PgUtil.pgcmp(lst1[j], lst2[i]) != 0: break
803
+ if j >= cnt1:
804
+ lst1.append(lst2[i])
805
+ else:
806
+ lst1.extend(lst2)
807
+ return lst1
808
+
809
+ # Function: crosshash(ahash, bhash)
810
+ # Return: a reference to the cross-joined hash records
811
+ @staticmethod
812
+ def crosshash(ahash, bhash):
813
+ if not bhash: return ahash
814
+ if not ahash: return bhash
815
+ akeys = list(ahash.keys())
816
+ bkeys = list(bhash.keys())
817
+ acnt = len(ahash[akeys[0]])
818
+ bcnt = len(bhash[bkeys[0]])
819
+ rets = {}
820
+ for key in akeys: rets[key] = []
821
+ for key in bkeys: rets[key] = []
822
+ for i in range(acnt):
823
+ for j in range(bcnt):
824
+ for key in akeys: rets[key].append(ahash[key][i])
825
+ for key in bkeys: rets[key].append(bhash[key][j])
826
+ return rets
827
+
828
+ # strip database and table names for a field name
829
+ @staticmethod
830
+ def strip_field(field):
831
+ ms = re.search(r'\.([^\.]+)$', field)
832
+ if ms: field = ms.group(1)
833
+ return field
834
+
835
+ # pgrecs: dict obterned from pgmget()
836
+ # flds: list of single letter fields to be sorted on
837
+ # hash: table dict for pre-defined fields
838
+ # patterns: optional list of temporal patterns for order fields
839
+ # Return: a sorted dict list
840
+ def sorthash(self, pgrecs, flds, hash, patterns = None):
841
+ fcnt = len(flds) # count of fields to be sorted on
842
+ # set sorting order, descenting (-1) or ascenting (1)
843
+ # get the full field names to be sorted on
844
+ desc = [1]*fcnt
845
+ fields = []
846
+ nums = [1]*fcnt # initialize each column as numerical
847
+ for i in range(fcnt):
848
+ if flds[i].islower(): desc[i] = -1
849
+ fld = self.strip_field(hash[flds[i].upper()][1])
850
+ fields.append(fld)
851
+ count = len(pgrecs[fields[0]]) # row count of pgrecs
852
+ if count < 2: return pgrecs # no need of sording
853
+ pcnt = len(patterns) if patterns else 0
854
+ # prepare the dict list for sortting
855
+ srecs = []
856
+ for i in range(count):
857
+ pgrec = self.onerecord(pgrecs, i)
858
+ rec = []
859
+ for j in range(fcnt):
860
+ if j < pcnt and patterns[j]:
861
+ # get the temporal part of each value matching the pattern
862
+ val = self.format_date(pgrec[fields[j]], "YYYYMMDDHH", patterns[j])
863
+ else:
864
+ # sort on the whole value if no pattern given
865
+ val = pgrec[fields[j]]
866
+ if nums[j]: nums[j] = self.pgnum(val)
867
+ rec.append(val)
868
+ rec.append(i) # add column to cache the row index
869
+ srecs.append(rec)
870
+ srecs = self.quicksort(srecs, 0, count-1, desc, fcnt, nums)
871
+ # sort pgrecs according the cached row index column in ordered srecs
872
+ rets = {}
873
+ for fld in pgrecs:
874
+ rets[fld] = []
875
+ for i in range(count):
876
+ pgrec = self.onerecord(pgrecs, srecs[i][fcnt])
877
+ for fld in pgrecs:
878
+ rets[fld].append(pgrec[fld])
879
+ return rets
880
+
881
+ # Return: the number of days bewteen date1 and date2
882
+ @staticmethod
883
+ def diffdate(date1, date2):
884
+ ut1 = ut2 = 0
885
+ if date1: ut1 = PgUtil.unixtime(date1)
886
+ if date2: ut2 = PgUtil.unixtime(date2)
887
+ return round((ut1 - ut2)/86400) # 24*60*60
888
+
889
+ # Return: the number of seconds bewteen time1 and time2
890
+ @staticmethod
891
+ def difftime(time1, time2):
892
+ ut1 = ut2 = 0
893
+ if time1: ut1 = PgUtil.unixtime(time1)
894
+ if time2: ut2 = PgUtil.unixtime(time2)
895
+ return round(ut1 - ut2)
896
+ diffdatetime = difftime
897
+
898
+ # Return: the number of days between date and '1970-01-01 00:00:00'
899
+ @staticmethod
900
+ def get_days(cdate):
901
+ return PgUtil.diffdate(str(cdate), '1970-01-01')
902
+
903
+ # Function: get_month_days(date)
904
+ # Return: the number of days in given month
905
+ @staticmethod
906
+ def get_month_days(cdate):
907
+ ms = re.match(r'^(\d+)-(\d+)', str(cdate))
908
+ if ms:
909
+ yr = int(ms.group(1))
910
+ mn = int(ms.group(2))
911
+ return calendar.monthrange(yr, mn)[1]
912
+ else:
913
+ return 0
914
+
915
+ # Function: validate_date(date)
916
+ # Return: a date in format of YYYY-MM-DD thar all year/month/day are validated
917
+ @staticmethod
918
+ def validate_date(cdate):
919
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', str(cdate))
920
+ if ms:
921
+ (yr, mn, dy) = (int(m) for m in ms.groups())
922
+ if yr < 1000:
923
+ yr += 2000
924
+ elif yr > 9999:
925
+ yr %= 10000
926
+ if mn < 1:
927
+ mn = 1
928
+ elif mn > 12:
929
+ mn = 12
930
+ md = calendar.monthrange(yr, mn)[1]
931
+ if dy < 1:
932
+ dy = 1
933
+ elif dy > md:
934
+ dy = md
935
+ cdate = '{}-{:02d}-{:02d}'.format(yr, mn, dy)
936
+ return cdate
937
+
938
+ # Function: get_date(days)
939
+ # Return: the date in format of "YYYY-MM-DD" for given number of days
940
+ # from '1970-01-01 00:00:00'
941
+ def get_date(self, days):
942
+ return self.adddate('1970-01-01', 0, 0, int(days))
943
+
944
+ # compare date/hour and return the different hours
945
+ @staticmethod
946
+ def diffdatehour(date1, hour1, date2, hour2):
947
+ if hour1 is None: hour1 = 23
948
+ if hour2 is None: hour2 = 23
949
+ return (hour1 - hour2) + 24*PgUtil.diffdate(date1, date2)
950
+
951
+ # hour difference between GMT and local time
952
+ def diffgmthour(self):
953
+ tg = time.gmtime()
954
+ tl = time.localtime()
955
+ dg = self.fmtdate(tg[0], tg[1], tg[2])
956
+ dl = self.fmtdate(tl[0], tl[1], tl[2])
957
+ hg = tg[3]
958
+ hl = tl[3]
959
+ return self.diffdatehour(dg, hg, dl, hl)
960
+
961
+ # compare date and time (if given) and return 1, 0 and -1
962
+ @staticmethod
963
+ def cmptime(date1, time1, date2, time2):
964
+ stime1 = PgUtil.join_datetime(date1, time1)
965
+ stime2 = PgUtil.join_datetime(date2, time2)
966
+ return PgUtil.pgcmp(stime1, stime2)
967
+
968
+ # date: the original date in format of 'YYYY-MM-DD',
969
+ # mf: the number of month fractions to add
970
+ # nf: number of fractions of a month
971
+ # Return: new date
972
+ def addmonth(self, cdate, mf, nf = 1):
973
+ if not mf: return cdate
974
+ if not nf or nf < 2: return self.adddate(cdate, 0, mf, 0)
975
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)$', cdate)
976
+ if ms:
977
+ (syr, smn, sdy) = ms.groups()
978
+ yr = int(syr)
979
+ mn = int(smn)
980
+ ody = int(sdy)
981
+ dy = 0 # set to end of previous month
982
+ ndy = int(30/nf) # number of days in each fraction
983
+ while ody > ndy:
984
+ dy += ndy
985
+ ody -= ndy
986
+ dy += mf * ndy
987
+ if mf > 0:
988
+ while dy >= 30:
989
+ dy -= 30
990
+ mn += 1
991
+ else:
992
+ while dy < 0:
993
+ dy += 30
994
+ mn -= 1
995
+ dy += ody
996
+ cdate = self.fmtdate(yr, mn, dy)
997
+ return cdate
998
+
999
+ # add yr years & mn months to yearmonth ym in format YYYYMM
1000
+ @staticmethod
1001
+ def addyearmonth(ym, yr, mn):
1002
+ if yr == None: yr = 0
1003
+ if mn == None: mn = 0
1004
+ ms =re.match(r'^(\d\d\d\d)(\d\d)$', ym)
1005
+ if ms:
1006
+ (syr, smn) = ms.groups()
1007
+ yr = int(syr)
1008
+ mn = int(smn)
1009
+ if mn < 0:
1010
+ while mn < 0:
1011
+ yr -= 1
1012
+ mn += 12
1013
+ else:
1014
+ while mn > 12:
1015
+ yr += 1
1016
+ mn -= 12
1017
+ ym = "{:04}{:02}".format(yr, mn)
1018
+ return ym
1019
+
1020
+ # set number of days in Beburary for Leap year according PGLOG['NOLEAP']
1021
+ def set_leap_mdays(self, year):
1022
+ if not self.PGLOG['NOLEAP'] and calendar.isleap(year):
1023
+ self.MDAYS[0] = 366
1024
+ self.MDAYS[2] = 29
1025
+ ret = 1
1026
+ else:
1027
+ self.MDAYS[0] = 365
1028
+ self.MDAYS[2] = 28
1029
+ ret = 0
1030
+ return ret
1031
+
1032
+ # wrap on calendar.isleap()
1033
+ is_leapyear = calendar.isleap
1034
+
1035
+ # reutn 1 if is end of month
1036
+ def is_end_month(self, yr, mn, dy):
1037
+ self.set_leap_mdays(yr)
1038
+ return 1 if dy == self.MDAYS[mn] else 0
1039
+
1040
+ # adust the year, month and day values that are out of ranges
1041
+ def adjust_ymd(self, yr, mn, dy):
1042
+ if yr is None: yr = 1970
1043
+ if mn is None: mn = 1
1044
+ if dy is None: dy = 1
1045
+ while True:
1046
+ if mn > 12:
1047
+ yr += 1
1048
+ mn -= 12
1049
+ continue
1050
+ elif mn < 1:
1051
+ yr -= 1
1052
+ mn += 12
1053
+ continue
1054
+ self.set_leap_mdays(yr)
1055
+ if dy < 1:
1056
+ if(dy < -self.MDAYS[0]):
1057
+ yr -= 1
1058
+ dy += self.MDAYS[0]
1059
+ else:
1060
+ mn -= 1
1061
+ if mn < 1:
1062
+ yr -= 1
1063
+ mn += 12
1064
+ dy += self.MDAYS[mn]
1065
+ continue
1066
+ elif dy > self.MDAYS[mn]:
1067
+ if(dy > self.MDAYS[0]):
1068
+ dy -= self.MDAYS[0]
1069
+ yr += 1
1070
+ else:
1071
+ dy -= self.MDAYS[mn]
1072
+ mn += 1
1073
+ continue
1074
+ break
1075
+ return [yr, mn, dy]
1076
+
1077
+ # date: the original date in format of 'YYYY-MM-DD',
1078
+ # yr: the number of years to add/subtract from the odate for positive/negative value,
1079
+ # mn: the number of months to add/subtract from the odate for positive/negative value,
1080
+ # dy: the number of days to add/subtract from the odate for positive/negative value)
1081
+ # Return: new date
1082
+ def adddate(self, cdate, yr, mn = 0, dy = 0, tofmt = None):
1083
+ if not cdate: return cdate
1084
+ if not isinstance(cdate, str): cdate = str(cdate)
1085
+ if yr is None:
1086
+ yr = 0
1087
+ elif isinstance(yr, str):
1088
+ yr = int(yr)
1089
+ if mn is None:
1090
+ mn = 0
1091
+ elif isinstance(mn, str):
1092
+ mn = int(mn)
1093
+ if dy is None:
1094
+ dy = 0
1095
+ elif isinstance(dy, str):
1096
+ dy = int(dy)
1097
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', cdate)
1098
+ if not ms: return cdate # non-standard date format
1099
+ (nyr, nmn, ndy) = (int(m) for m in ms.groups())
1100
+ mend = 0
1101
+ if mn and ndy > 27: mend = self.is_end_month(nyr, nmn, ndy)
1102
+ if yr: nyr += yr
1103
+ if mn:
1104
+ (nyr, nmn, tdy) = self.adjust_ymd(nyr, nmn+mn+1, 0)
1105
+ if mend: ndy = tdy
1106
+ if dy: ndy += dy
1107
+ return self.fmtdate(nyr, nmn, ndy, tofmt)
1108
+ addNoLeapDate = adddate
1109
+
1110
+ # add given hours to the initial date and time
1111
+ def addhour(self, sdate, stime, nhour):
1112
+ if nhour and isinstance(nhour, str): nhour = int(nhour)
1113
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1114
+ if stime and not isinstance(stime, str): stime = str(stime)
1115
+ if not nhour: return [sdate, stime]
1116
+ hr = dy = 0
1117
+ ms = re.match(r'^(\d+)', stime)
1118
+ if ms:
1119
+ shr = ms.group(1)
1120
+ hr = int(shr) + nhour
1121
+ if hr < 0:
1122
+ while hr < 0:
1123
+ dy -= 1
1124
+ hr += 24
1125
+ else:
1126
+ while hr > 23:
1127
+ dy += 1
1128
+ hr -= 24
1129
+ shour = "{:02}".format(hr)
1130
+ if shr != shour: stime = re.sub(shr, shour, stime, 1)
1131
+ if dy: sdate = self.adddate(sdate, 0, 0, dy)
1132
+ return [sdate, stime]
1133
+
1134
+ # add given years, months, days and hours to the initial date and hour
1135
+ def adddatehour(self, sdate, nhour, yr, mn, dy, hr = 0):
1136
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1137
+ if hr:
1138
+ if nhour != None:
1139
+ if isinstance(nhour, str): nhour = int(nhour)
1140
+ hr += nhour
1141
+ if hr < 0:
1142
+ while hr < 0:
1143
+ dy -= 1
1144
+ hr += 24
1145
+ else:
1146
+ while hr > 23:
1147
+ dy += 1
1148
+ hr -= 24
1149
+ if nhour != None: nhour = hr
1150
+ if yr or mn or dy: sdate = self.adddate(sdate, yr, mn, dy)
1151
+ return [sdate, nhour]
1152
+
1153
+ # add given yyyy, mm, dd, hh, nn, ss to sdatetime
1154
+ # if nf, add fraction of month only
1155
+ def adddatetime(self, sdatetime, yy, mm, dd, hh, nn, ss, nf = 0):
1156
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1157
+ (sdate, stime) = re.split(' ', sdatetime)
1158
+ if hh or nn or ss: (sdate, stime) = self.addtime(sdate, stime, hh, nn, ss)
1159
+ if nf:
1160
+ sdate = self.addmonth(sdate, mm, nf)
1161
+ mm = 0
1162
+ if yy or mm or dd: sdate = self.adddate(sdate, yy, mm, dd)
1163
+ return "{} {}".format(sdate, stime)
1164
+
1165
+ # add given yyyy, mm, dd, hh, nn, ss to sdatetime
1166
+ # if nf, add fraction of month only
1167
+ def adddatetime(self, sdatetime, yy, mm, dd, hh, nn, ss, nf = 0):
1168
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1169
+ (sdate, stime) = re.split(' ', sdatetime)
1170
+ if hh or nn or ss: (sdate, stime) = self.addtime(sdate, stime, hh, nn, ss)
1171
+ if nf:
1172
+ sdate = self.addmonth(sdate, mm, nf)
1173
+ mm = 0
1174
+ if yy or mm or dd: sdate = self.adddate(sdate, yy, mm, dd)
1175
+ return "{} {}".format(sdate, stime)
1176
+
1177
+ # add given hours, minutes and seconds to the initial date and time
1178
+ def addtime(self, sdate, stime, h, m, s):
1179
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1180
+ if stime and not isinstance(stime, str): sdate = str(stime)
1181
+ ups = (60, 60, 24)
1182
+ tms = [0, 0, 0, 0] # (sec, min, hour, day)
1183
+ if s: tms[0] += s
1184
+ if m: tms[1] += m
1185
+ if h: tms[2] += h
1186
+ if stime:
1187
+ ms = re.match(r'^(\d+):(\d+):(\d+)$', stime)
1188
+ if ms:
1189
+ tms[2] += int(ms.group(1))
1190
+ tms[1] += int(ms.group(2))
1191
+ tms[0] += int(ms.group(3))
1192
+ for i in range(3):
1193
+ if tms[i] < 0:
1194
+ while tms[i] < 0:
1195
+ tms[i] += ups[i]
1196
+ tms[i+1] -= 1
1197
+ elif tms[i] >= ups[i]:
1198
+ while tms[i] >= ups[i]:
1199
+ tms[i] -= ups[i]
1200
+ tms[i+1] += 1
1201
+ stime = "{:02}:{:02}:{:02}".format(tms[2], tms[1], tms[0])
1202
+ if tms[3]: sdate = self.adddate(sdate, 0, 0, tms[3])
1203
+ return [sdate, stime]
1204
+
1205
+ # add time interval array to datetime
1206
+ # opt = -1 - minus, 0 - begin time, 1 - add (default)
1207
+ def addintervals(self, sdatetime, intv, opt = 1):
1208
+ if not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1209
+ if not intv: return sdatetime
1210
+ tv = [0]*7
1211
+ i = 0
1212
+ for v in intv:
1213
+ tv[i] = v
1214
+ i += 1
1215
+ # assume the given datetime is end of the current interval;
1216
+ # add one second to set it to beginning of the next one
1217
+ if opt == 0: sdatetime = self.adddatetime(sdatetime, 0, 0, 0 ,0, 0, 1)
1218
+ if opt < 1: # negative intervals for minus
1219
+ for i in range(6):
1220
+ if tv[i]: tv[i] = -tv[i]
1221
+ return self.adddatetime(sdatetime, tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6])
1222
+
1223
+ # adjust end date to the specified day days for frequency of year/month/week
1224
+ # end of period if days == 0
1225
+ # nf - number of fractions of a month, for unit of 'M' only
1226
+ def enddate(self, sdate, days, unit, nf = 0):
1227
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1228
+ if days and isinstance(days, str): days = int(days)
1229
+ if not (unit and unit in 'YMW'): return sdate
1230
+ if unit == 'Y':
1231
+ ms = re.match(r'^(\d+)', sdate)
1232
+ if ms:
1233
+ yr = int(ms.group(1))
1234
+ if days:
1235
+ mn = 1
1236
+ dy = days
1237
+ else:
1238
+ mn = 12
1239
+ dy = 31
1240
+ sdate = self.fmtdate(yr, mn, dy)
1241
+ elif unit == 'M':
1242
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', sdate)
1243
+ if ms:
1244
+ (yr, mn, dy) = (int(m) for m in ms.groups())
1245
+ else:
1246
+ ms = re.match(r'^(\d+)-(\d+)', sdate)
1247
+ if ms:
1248
+ (yr, mn) = (int(m) for m in ms.groups())
1249
+ dy = 1
1250
+ else:
1251
+ return sdate
1252
+ if not nf or nf == 1:
1253
+ nd = days if days else calendar.monthrange(yr, mn)[1]
1254
+ if nd != dy: sdate = self.fmtdate(yr, mn, nd)
1255
+ else:
1256
+ val = int(30/nf)
1257
+ if dy >= 28:
1258
+ mf = nf
1259
+ else:
1260
+ mf = int(dy/val)
1261
+ if (mf*val) < dy: mf += 1
1262
+ if days:
1263
+ dy = (mf-1)*val + days
1264
+ elif mf < nf:
1265
+ dy = mf*val
1266
+ else:
1267
+ mn += 1
1268
+ dy = 0
1269
+ sdate = self.fmtdate(yr, mn, dy)
1270
+ elif unit == 'W':
1271
+ val = self.get_weekday(sdate)
1272
+ if days != val: sdate = self.adddate(sdate, 0, 0, days-val)
1273
+ return sdate
1274
+
1275
+ # adjust end time to the specified h/n/s for frequency of hour/mimute/second
1276
+ def endtime(self, stime, unit):
1277
+ if stime and not isinstance(stime, str): stime = str(stime)
1278
+ if not (unit and unit in 'HNS'): return stime
1279
+ if stime:
1280
+ tm = self.split_datetime(stime, 'T')
1281
+ else:
1282
+ tm = [0, 0, 0]
1283
+ if unit == 'H':
1284
+ tm[1] = tm[2] = 59
1285
+ elif unit == 'N':
1286
+ tm[2] = 59
1287
+ elif unit != 'S':
1288
+ tm[0] = 23
1289
+ tm[1] = tm[2] = 59
1290
+ return "{:02}:{:02}:{:02}".format(tm[0], tm[1]. tm[2])
1291
+
1292
+ # adjust end time to the specified h/n/s for frequency of year/month/week/day/hour/mimute/second
1293
+ def enddatetime(self, sdatetime, unit, days = 0, nf = 0):
1294
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1295
+ if not (unit and unit in 'YMWDHNS'): return sdatetime
1296
+ (sdate, stime) = re.split(' ', sdatetime)
1297
+ if unit in 'HNS':
1298
+ stime = self.endtime(stime, unit)
1299
+ else:
1300
+ sdate = self.enddate(sdate, days, unit, nf)
1301
+ return "{} {}".format(sdate, stime)
1302
+
1303
+ # get the string length dynamically
1304
+ @staticmethod
1305
+ def get_column_length(colname, values):
1306
+ clen = len(colname) if colname else 2 # initial column length as the length of column title
1307
+ for val in values:
1308
+ if val is None: continue
1309
+ sval = str(val)
1310
+ if sval and not re.search(r'\n', sval):
1311
+ slen = len(sval)
1312
+ if slen > clen: clen = slen
1313
+ return clen
1314
+
1315
+ # Function: hour2time()
1316
+ # Return: time string in format of date HH:MM:SS
1317
+ @staticmethod
1318
+ def hour2time(sdate, nhour, endtime = 0):
1319
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1320
+ stime = "{:02}:".format(nhour)
1321
+ if endtime:
1322
+ stime += "59:59"
1323
+ else:
1324
+ stime += "00:00"
1325
+ if sdate:
1326
+ return "{} {}".format(sdate, stime)
1327
+ else:
1328
+ return stime
1329
+
1330
+ # Function: time2hour()
1331
+ # Return: list of date and hour
1332
+ @staticmethod
1333
+ def time2hour(stime):
1334
+ sdate = nhour = None
1335
+ times = stime.split(' ')
1336
+ if len(times) == 2:
1337
+ sdate = times[0]
1338
+ stime = times[1]
1339
+ ms = re.match(r'^(\d+)', stime)
1340
+ if ms: nhour = int(ms.group(1))
1341
+ return [sdate, nhour]
1342
+
1343
+ # get the all column widths
1344
+ @staticmethod
1345
+ def all_column_widths(pgrecs, flds, tdict):
1346
+ colcnt = len(flds)
1347
+ lens = [0]*colcnt
1348
+ for i in range(colcnt):
1349
+ fld = flds[i]
1350
+ if fld not in tdict: continue
1351
+ field = PgUtil.strip_field(tdict[fld][1])
1352
+ lens[i] = PgUtil.get_column_length(None, pgrecs[field])
1353
+ return lens
1354
+
1355
+ # check a give value, return 1 if numeric, 0 therwise
1356
+ @staticmethod
1357
+ def pgnum(val):
1358
+ if not isinstance(val, str): val = str(val)
1359
+ ms = re.match(r'^\-{0,1}(\d+|\d+\.\d*|d*\.\d+)([eE]\-{0,1}\d+)*$', val)
1360
+ return 1 if ms else 0
1361
+
1362
+ # Function: pgcmp(val1, val2)
1363
+ # Return: 0 if both empty or two values are identilcal; -1 if val1 < val2; otherwise 1
1364
+ @staticmethod
1365
+ def pgcmp(val1, val2, ignorecase = 0, num = 0):
1366
+ if val1 is None:
1367
+ if val2 is None:
1368
+ return 0
1369
+ else:
1370
+ return -1
1371
+ elif val2 is None:
1372
+ return 1
1373
+ typ1 = type(val1)
1374
+ typ2 = type(val2)
1375
+ if typ1 != typ2:
1376
+ if num:
1377
+ if typ1 is str:
1378
+ typ1 = int
1379
+ val1 = int(val1)
1380
+ if typ2 is str:
1381
+ typ2 = int
1382
+ val2 = int(val2)
1383
+ else:
1384
+ if typ1 != str:
1385
+ typ1 = str
1386
+ val1 = str(val1)
1387
+ if typ2 != str:
1388
+ typ2 = str
1389
+ val2 = str(val2)
1390
+ if typ1 is str:
1391
+ if num:
1392
+ if typ1 is str and PgUtil.pgnum(val1) and PgUtil.pgnum(val2):
1393
+ val1 = int(val1)
1394
+ val2 = int(val2)
1395
+ elif ignorecase:
1396
+ val1 = val1.lower()
1397
+ val2 = val2.lower()
1398
+ if val1 > val2:
1399
+ return 1
1400
+ elif val1 < val2:
1401
+ return -1
1402
+ else:
1403
+ return 0
1404
+
1405
+ # infiles: initial file list
1406
+ # Return: final file list with all the subdirectories expanded
1407
+ @staticmethod
1408
+ def recursive_files(infiles):
1409
+ ofiles = []
1410
+ for file in infiles:
1411
+ if op.isdir(file):
1412
+ ofiles.extend(PgUtil.recursive_files(glob.glob(file + "/*")))
1413
+ else:
1414
+ ofiles.append(file)
1415
+ return ofiles
1416
+
1417
+ # lidx: lower index limit (including)
1418
+ # hidx: higher index limit (excluding)
1419
+ # key: string value to be searched,
1420
+ # list: reference to a sorted list where the key is searched)
1421
+ # Return: index if found; -1 otherwise
1422
+ @staticmethod
1423
+ def asearch(lidx, hidx, key, list):
1424
+ ret = -1
1425
+ if (hidx - lidx) < 11: # use linear search for less than 11 items
1426
+ for midx in range(lidx, hidx):
1427
+ if key == list[midx]:
1428
+ ret = midx
1429
+ break
1430
+ else:
1431
+ midx = (lidx + hidx)/2
1432
+ if key == list[midx]:
1433
+ ret = midx
1434
+ elif key < list[midx]:
1435
+ ret = PgUtil.asearch(lidx, midx, key, list)
1436
+ else:
1437
+ ret = PgUtil.asearch(midx + 1, hidx, key, list)
1438
+ return ret
1439
+
1440
+ # lidx: lower index limit (including)
1441
+ # hidx: higher index limit (excluding)
1442
+ # key: string value to be searched,
1443
+ # list: reference to a sorted list where the key is searched)
1444
+ # Return: index if found; -1 otherwise
1445
+ @staticmethod
1446
+ def psearch(lidx, hidx, key, list):
1447
+ ret = -1
1448
+ if (hidx - lidx) < 11: # use linear search for less than 11 items
1449
+ for midx in range(lidx, hidx):
1450
+ if re.search(list[midx], key):
1451
+ ret = midx
1452
+ break
1453
+ else:
1454
+ midx = int((lidx + hidx)/2)
1455
+ if re.search(list[midx], key):
1456
+ ret = midx
1457
+ elif key < list[midx]:
1458
+ ret = PgUtil.psearch(lidx, midx, key, list)
1459
+ else:
1460
+ ret = PgUtil.psearch(midx + 1, hidx, key, list)
1461
+ return ret
1462
+
1463
+ # quicksort for pattern
1464
+ @staticmethod
1465
+ def quicksort(srecs, lo, hi, desc, cnt, nums = None):
1466
+ i = lo
1467
+ j = hi
1468
+ mrec = srecs[int((lo+hi)/2)]
1469
+ while True:
1470
+ while PgUtil.cmp_records(srecs[i], mrec, desc, cnt, nums) < 0: i += 1
1471
+ while PgUtil.cmp_records(srecs[j], mrec, desc, cnt, nums) > 0: j -= 1
1472
+ if i <= j:
1473
+ if i < j:
1474
+ tmp = srecs[i]
1475
+ srecs[i] = srecs[j]
1476
+ srecs[j] = tmp
1477
+ i += 1
1478
+ j -= 1
1479
+ if i > j: break
1480
+ #recursion
1481
+ if lo < j: srecs = PgUtil.quicksort(srecs, lo, j, desc, cnt, nums)
1482
+ if i < hi: srecs = PgUtil.quicksort(srecs, i, hi, desc, cnt, nums)
1483
+ return srecs
1484
+
1485
+ # compare two arrays
1486
+ @staticmethod
1487
+ def cmp_records(arec, brec, desc, cnt, nums):
1488
+ for i in range(cnt):
1489
+ num = nums[i] if nums else 0
1490
+ ret = PgUtil.pgcmp(arec[i], brec[i], 0, num)
1491
+ if ret != 0:
1492
+ return (ret*desc[i])
1493
+ return 0 # identical records
1494
+
1495
+ # format one floating point value
1496
+ @staticmethod
1497
+ def format_float_value(val, precision = 2):
1498
+ units = ('B', 'KB', 'MB', 'GB', 'TB', 'PB')
1499
+ if val is None:
1500
+ return ''
1501
+ elif not isinstance(val, int):
1502
+ val = int(val)
1503
+ idx = 0
1504
+ while val >= 1000 and idx < 5:
1505
+ val /= 1000
1506
+ idx += 1
1507
+ return "{:.{}f}{}".format(val, precision, units[idx])
1508
+
1509
+ # check a file is a ASCII text one
1510
+ # return 1 if yes, 0 if not; or -1 if file not checkable
1511
+ @staticmethod
1512
+ def is_text_file(fname, blocksize = 256, threshhold = 0.1):
1513
+ # File doesn't exist or is not a regular file
1514
+ if not op.exists(fname) or not op.isfile(fname): return -1
1515
+ if op.getsize(fname) == 0: return 1 # Empty files are considered text
1516
+ try:
1517
+ buffer = None
1518
+ with open(fname, 'rb') as f:
1519
+ buffer = f.read(blocksize)
1520
+ # Check for null bytes (a strong indicator of a binary file)
1521
+ if not buffer or b'\0' in buffer: return 0
1522
+ text_characters = (
1523
+ b'\t\n\r\f\v' + # Whitespace characters
1524
+ bytes(range(32, 127)) # Printable ASCII characters
1525
+ )
1526
+ non_text_count = 0
1527
+ for byte in buffer:
1528
+ if byte not in text_characters:
1529
+ non_text_count += 1 # Count non-text characters
1530
+ # If a significant portion of the buffer consists of non-text characters,
1531
+ # it's likely a binary file.
1532
+ return 1 if((non_text_count/len(buffer)) < threshhold) else 0
1533
+ except IOError:
1534
+ return -1 # Handle cases where the file cannot be opened or read