rda-python-common 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rda-python-common might be problematic. Click here for more details.

@@ -0,0 +1,1853 @@
1
+ #
2
+ ###############################################################################
3
+ #
4
+ # Title : PgUtil.py -- module for misc utilities.
5
+ # Author : Zaihua Ji, zji@ucar.edu
6
+ # Date : 07/27/2020
7
+ # Purpose : python library module for global misc utilities
8
+ #
9
+ # Work File : $DSSHOME/lib/python/PgUtil.py
10
+ # Github : https://github.com/NCAR/rda-shared-libraries.git
11
+ #
12
+ ###############################################################################
13
+ #
14
+ import os
15
+ import re
16
+ import time
17
+ import datetime
18
+ import calendar
19
+ import glob
20
+ from os import path as op
21
+ import PgLOG
22
+
23
+ DATEFMTS = {
24
+ 'C' : '(CC|C)', # century
25
+ 'Y' : '(YYYY|YY00|YYY|YY|YEAR|YR|Y)', # YYY means decade
26
+ 'Q' : '(QQ|Q)', # quarter
27
+ 'M' : '(Month|Mon|MM|M)', # numeric or string month
28
+ 'W' : '(Week|Www|W)', # string or numeric weedday
29
+ 'D' : '(DDD|DD|D)', # days in year or month
30
+ 'H' : '(HHH|HH|H)', # hours in month or day
31
+ 'N' : '(NNNN|NN|N)', # minutes in day or hour
32
+ 'S' : '(SSSS|SS|S)' # seconds in hour or minute
33
+ }
34
+
35
+ MONTHS = [
36
+ "january", "february", "march", "april", "may", "june",
37
+ "july", "august", "september", "october", "november", "december"
38
+ ]
39
+ MNS = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]
40
+ WDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"]
41
+
42
+ WDS = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"]
43
+ MDAYS = [365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
44
+
45
+ #
46
+ # dt: optional given date in format of "YYYY-MM-DD"
47
+ # return weekday: 0 - Sunday, 1 - Monday, ..., 6 - Saturday
48
+ #
49
+ def get_weekday(date = None):
50
+
51
+ if date is None:
52
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
53
+ else:
54
+ ct = time.strptime(str(date), "%Y-%m-%d")
55
+
56
+ return (ct[6]+1)%7
57
+
58
+ #
59
+ # mn: given month string like "Jan" or "January", or numeric number 1 to 12
60
+ # Return: numeric Month if not fmt (default); three-charater or full month names for given fmt
61
+ #
62
+ def get_month(mn, fmt = None):
63
+
64
+ if not isinstance(mn, int):
65
+ if re.match(r'^\d+$', mn):
66
+ mn = int(mn)
67
+ else:
68
+ for m in range(12):
69
+ if re.match(mn, MONTHS[m], re.I):
70
+ mn = m + 1
71
+ break
72
+
73
+ if fmt and mn > 0 and mn < 13:
74
+ slen = len(fmt)
75
+ if slen == 2:
76
+ smn = "{:02}".format(mn)
77
+ elif re.match(r'^mon', fmt, re.I):
78
+ smn = MNS[mn-1] if slen == 3 else MONTHS[mn-1]
79
+ if re.match(r'^Mon', fmt):
80
+ smn = smn.capitalize()
81
+ elif re.match(r'^MON', fmt):
82
+ smn = smn.upper()
83
+ else:
84
+ smn = str(mn)
85
+ return smn
86
+ else:
87
+ return mn
88
+
89
+ #
90
+ # wday: given weekday string like "Sun" or "Sunday", or numeric number 0 to 6
91
+ # Return: numeric Weekday if !fmt (default); three-charater or full week name for given fmt
92
+ #
93
+ def get_wday(wday, fmt = None):
94
+
95
+ if not isinstance(wday, int):
96
+ if re.match(r'^\d+$', wday):
97
+ wday = int(wday)
98
+ else:
99
+ for w in range(7):
100
+ if re.match(wday, WDAYS[w], re.I):
101
+ wday = w
102
+ break
103
+
104
+ if fmt and wday >= 0 and wday <= 6:
105
+ slen = len(fmt)
106
+ if slen == 4:
107
+ swday = WDAYS[w]
108
+ if re.match(r'^We', fmt):
109
+ swday = swday.capitalize()
110
+ elif re.match(r'^WE', fmt):
111
+ swday = swday.upper()
112
+ elif slen == 3:
113
+ swday = WDS[wday]
114
+ if re.match(r'^Ww', fmt):
115
+ swday = swday.capitalize()
116
+ elif re.match(r'^WW', fmt):
117
+ swday = swday.upper()
118
+ else:
119
+ swday = str(wday)
120
+ return swday
121
+ else:
122
+ return wday
123
+
124
+ #
125
+ # file: given file name
126
+ # Return: type if given file name is a valid online file; '' otherwise
127
+ #
128
+ def valid_online_file(file, type = None, exists = None):
129
+
130
+ if exists is None or exists:
131
+ if not op.exists(file): return '' # file does not exist
132
+
133
+ bname = op.basename(file)
134
+ if re.match(r'^,.*', bname): return '' # hidden file
135
+
136
+ if re.search(r'index\.(htm|html|shtml)$', bname, re.I): return '' # index file
137
+
138
+ if type and type != 'D': return type
139
+
140
+ if re.search(r'\.(doc|php|html|shtml)(\.|$)', bname, re.I): return '' # file with special extention
141
+
142
+ return 'D'
143
+
144
+ #
145
+ # Return: current time string in format of HH:MM:SS
146
+ #
147
+ def curtime(getdate = False):
148
+
149
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
150
+
151
+ fmt = "%Y-%m-%d %H:%M:%S" if getdate else "%H:%M:%S"
152
+
153
+ return time.strftime(fmt, ct)
154
+
155
+ #
156
+ # wrapper function of curtime(True) to get datetime in form of YYYY-MM-DD HH:NN:SS
157
+ #
158
+ def curdatetime():
159
+ return curtime(True)
160
+
161
+ #
162
+ # fmt: optional date format, defaults to YYYY-MM-DD
163
+ # Return: current (date, hour)
164
+ #
165
+ def curdatehour(fmt = None):
166
+
167
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
168
+
169
+ dt = fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct)
170
+
171
+ return [dt, ct[3]]
172
+
173
+ #
174
+ # tm: optional time in seconds since the Epoch
175
+ # Return: current date and time strings
176
+ #
177
+ def get_date_time(tm = None):
178
+
179
+ act = ct = None
180
+ if tm == None:
181
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
182
+ elif isinstance(tm, str):
183
+ act = tm.split(' ')
184
+ elif isinstance(tm, (int, float)):
185
+ ct = time.localtime(tm)
186
+ elif isinstance(tm, datetime.datetime):
187
+ act = str(tm).split(' ')
188
+ elif isinstance(tm, datetime.date):
189
+ act = [str(tm), '00:00:00']
190
+ elif isinstance(tm, datetime.time):
191
+ act = [None, str(tm)]
192
+
193
+ if ct == None:
194
+ return act if act else None
195
+ else:
196
+ return [time.strftime("%Y-%m-%d", ct), time.strftime("%H:%M:%S", ct)]
197
+
198
+ #
199
+ # tm: optional time in seconds since the Epoch
200
+ # Return: current datetime strings
201
+ #
202
+ def get_datetime(tm = None):
203
+
204
+ if tm == None:
205
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
206
+ return time.strftime("%Y-%m-%d %H:%M:%S", ct)
207
+ elif isinstance(tm, str):
208
+ return tm
209
+ elif isinstance(tm, (int, float)):
210
+ ct = time.localtime(tm)
211
+ return time.strftime("%Y-%m-%d %H:%M:%S", ct)
212
+ elif isinstance(tm, datetime.datetime):
213
+ return str(tm)
214
+ elif isinstance(tm, datetime.date):
215
+ return (str(tm) + ' 00:00:00')
216
+
217
+ return tm
218
+
219
+
220
+ #
221
+ # file: file name, get curent timestamp if missed
222
+ # Return: timestsmp string in format of 'YYYYMMDDHHMMSS
223
+ #
224
+ def timestamp(file = None):
225
+
226
+ if file is None:
227
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
228
+ else:
229
+ mt = os.stat(file).st_mtime # file last modified time
230
+ ct = time.gmtime(mt) if PgLOG.PGLOG['GMTZ'] else time.localtime(mt)
231
+
232
+ return time.strftime("%Y%m%d%H%M%S", ct)
233
+
234
+ #
235
+ # dt: datetime string
236
+ # check date/time and set to default one if empty date
237
+ #
238
+ def check_datetime(date, default):
239
+
240
+ if not date: return default
241
+ if not isinstance(date, str): date = str(date)
242
+ if re.match(r'^0000', date): return default
243
+
244
+ return date
245
+
246
+ #
247
+ # fmt: date format, default to "YYYY-MM-DD"
248
+ # Return: new formated current date string
249
+ #
250
+ def curdate(fmt = None):
251
+
252
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
253
+
254
+ return fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct)
255
+
256
+ #
257
+ # check given string to identify temporal pattern and their units
258
+ # defined in (keys DATEFMTS)
259
+ #
260
+ def temporal_pattern_units(string, seps):
261
+
262
+ mkeys = ['D', 'Q', 'M', 'C', 'Y', 'H', 'N', 'S']
263
+ units = {}
264
+ match = seps[0] + "([^" + seps[1] + "]+)" + seps[1]
265
+ patterns = re.findall(match, string)
266
+
267
+ for pattern in patterns:
268
+ # skip generic pattern and current time
269
+ if re.match(r'^(P\d*|C.+C)$', pattern, re.I): continue
270
+
271
+ for mkey in mkeys:
272
+ ms = re.findall(DATEFMTS[mkey], pattern, re.I)
273
+ if ms:
274
+ if mkey == 'Q':
275
+ units[mkey] = 3
276
+ elif mkey == 'C':
277
+ units[mkey] = 100
278
+ else:
279
+ units[mkey] = 1
280
+
281
+ for m in ms:
282
+ pattern = pattern.replace(m, '', 1)
283
+
284
+ return units
285
+
286
+ #
287
+ # format output for given date and hour
288
+ #
289
+ def format_datehour(date, hour, tofmt = None, fromfmt = None):
290
+
291
+ if date:
292
+ datehour = format_date(str(date), tofmt, fromfmt)
293
+ elif tofmt:
294
+ datehour = tofmt
295
+ else:
296
+ datehour = ''
297
+
298
+ if hour != None:
299
+ if tofmt:
300
+ fmts = re.findall(DATEFMTS['H'], datehour, re.I)
301
+ for fmt in fmts:
302
+ if len(fmt) > 1:
303
+ shr = "{:02}".format(int(hour))
304
+ else:
305
+ shr = str(hour)
306
+ datehour = re.sub(fmt, shr, datehour, 1)
307
+ else:
308
+ datehour += " {:02}".format(int(hour))
309
+
310
+ return datehour
311
+
312
+ #
313
+ # split a date, time or datetime into an array according to
314
+ # the sep value; str to int for digital values
315
+ #
316
+ def split_datetime(sdt, sep = '\D'):
317
+
318
+ if not isinstance(sdt, str): sdt = str(sdt)
319
+ adt = re.split(sep, sdt)
320
+ acnt = len(adt)
321
+ for i in range(acnt):
322
+ if re.match('^\d+$', adt[i]): adt[i] = int(adt[i])
323
+ return adt
324
+
325
+ #
326
+ # date: given date in format of fromfmt
327
+ # tofmt: date formats; ex. "Month D, YYYY"
328
+ # fromfmt: date formats, default to YYYY-MM-DD
329
+ # Return: new formated date string according to tofmt
330
+ #
331
+ def format_date(cdate, tofmt = None, fromfmt = None):
332
+
333
+ if not cdate: return cdate
334
+ if not isinstance(cdate, str): cdate = str(cdate)
335
+ dates = [None, None, None]
336
+ sep = '|'
337
+ mns = sep.join(MNS)
338
+ months = sep.join(MONTHS)
339
+ mkeys = ['D', 'M', 'Q', 'Y', 'C', 'H']
340
+ PATTERNS = ['(\d\d\d\d)', '(\d+)', '(\d\d)',
341
+ '(\d\d\d)', '(' + mns + ')', '(' + months + ')']
342
+
343
+ if not fromfmt:
344
+ if not tofmt:
345
+ if re.match(r'^\d\d\d\d-\d\d-\d\d$', cdate): return cdate # no need formatting
346
+ ms = re.match(r'^\d+(\W)\d+(\W)\d+', cdate)
347
+ if ms:
348
+ fromfmt = "Y" + ms.group(1) + "M" + ms.group(2) + "D"
349
+ else:
350
+ PgLOG.pglog(cdate + ": Invalid date, should be in format YYYY-MM-DD", PgLOG.LGEREX)
351
+
352
+ pattern = fromfmt
353
+ fmts = {}
354
+ formats = {}
355
+ for mkey in mkeys:
356
+ ms = re.search(DATEFMTS[mkey], pattern, re.I)
357
+ if ms:
358
+ fmts[mkey] = ms.group(1)
359
+ pattern = re.sub(fmts[mkey], '', pattern)
360
+
361
+ cnt = 0
362
+ for mkey in fmts:
363
+ fmt = fmts[mkey]
364
+ i = len(fmt)
365
+ if mkey == 'D':
366
+ if i == 4: i = 1
367
+ elif mkey == 'M':
368
+ if i == 3: i = 4
369
+ elif mkey == 'Y':
370
+ if i == 4: i = 0
371
+ formats[fromfmt.find(fmt)] = fmt
372
+ fromfmt = fromfmt.replace(fmt, PATTERNS[i])
373
+ cnt += 1
374
+
375
+ ms = re.findall(fromfmt, cdate)
376
+ mcnt = len(ms[0]) if ms else 0
377
+ i = 0
378
+ for k in sorted(formats):
379
+ if i >= mcnt: break
380
+ fmt = formats[k]
381
+ val = ms[0][i]
382
+ if re.match(r'^Y', fmt, re.I):
383
+ dates[0] = int(val)
384
+ if len(fmt) == 3: dates[0] *= 10
385
+ elif re.match(r'^C', fmt, re.I):
386
+ dates[0] = 100 * int(val) # year at end of century
387
+ elif re.match(r'^M', fmt, re.I):
388
+ if re.match(r'^Mon', fmt, re.I):
389
+ dates[1] = get_month(val)
390
+ else:
391
+ dates[1] = int(val)
392
+ elif re.match(r'^Q', fmt, re.I):
393
+ dates[1] = 3 * int(val) # month at end of quarter
394
+ elif re.match(r'^H', fmt, re.I): # hour
395
+ dates.append(int(val))
396
+ else: # day
397
+ dates[2] = int(val)
398
+ i += 1
399
+
400
+ if len(dates) > 3:
401
+ cdate = fmtdatehour(dates[0], dates[1], dates[2], dates[3], tofmt)
402
+ else:
403
+ cdate = fmtdate(dates[0], dates[1], dates[2], tofmt)
404
+
405
+ return cdate
406
+
407
+ #
408
+ # yr: year value
409
+ # mn: month value, 1-12
410
+ # dy: day of the month
411
+ # hr: hour of the day
412
+ # nn: minute of the hour
413
+ # ss: second of the minute
414
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD HH:NN:SS"
415
+ # Return: new formated datehour string
416
+ #
417
+ def fmtdatetime(yr, mn, dy, hr = None, nn = None, ss = None, tofmt = None):
418
+
419
+ if not tofmt: tofmt = "YYYY-MM-DD HH:NN:SS"
420
+
421
+ tms = [ss, nn, hr, dy]
422
+ fks = ['S', 'N', 'H']
423
+ ups = [60, 60, 24]
424
+
425
+ # adjust second/minute/hour values out of range
426
+ for i in range(3):
427
+ if tms[i] != None and tms[i+1] != None:
428
+ if tms[i] < 0:
429
+ while tms[i] < 0:
430
+ tms[i] += ups[i]
431
+ tms[i+1] -= 1
432
+ elif tms[i] >= ups[i]:
433
+ while tms[i] >= ups[i]:
434
+ tms[i] -= ups[i]
435
+ tms[i+1] += 1
436
+
437
+ sdt = fmtdate(yr, mn, dy, tofmt)
438
+
439
+ # format second/minute/hour values
440
+ for i in range(3):
441
+ if tms[i] != None:
442
+ ms = re.search(DATEFMTS[fks[i]], sdt, re.I)
443
+ if ms:
444
+ fmt = ms.group(1)
445
+ if len(fmt) == 2:
446
+ str = "{:02}".format(tms[i])
447
+ else:
448
+ str = str(tms[i])
449
+ sdt = re.sub(fmt, str, sdt, 1)
450
+
451
+ return sdt
452
+
453
+ #
454
+ # yr: year value
455
+ # mn: month value, 1-12
456
+ # dy: day of the month
457
+ # hr: hour of the day
458
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD:HH"
459
+ # Return: new formated datehour string
460
+ #
461
+ def fmtdatehour(yr, mn, dy, hr, tofmt = None):
462
+
463
+ if not tofmt: tofmt = "YYYY-MM-DD:HH"
464
+
465
+ if hr != None and dy != None: # adjust hour value out of range
466
+ if hr < 0:
467
+ while hr < 0:
468
+ hr += 24
469
+ dy -= 1
470
+ elif hr > 23:
471
+ while hr > 23:
472
+ hr -= 24
473
+ dy += 1
474
+
475
+ datehour = fmtdate(yr, mn, dy, tofmt)
476
+
477
+ if hr != None:
478
+ ms = re.search(DATEFMTS['H'], datehour, re.I)
479
+ if ms:
480
+ fmt = ms.group(1)
481
+ if len(fmt) == 2:
482
+ shr = "{:02}".format(hr)
483
+ else:
484
+ shr = str(hr)
485
+ datehour = re.sub(fmt, shr, datehour, 1)
486
+
487
+ return datehour
488
+
489
+ #
490
+ # yr: year value
491
+ # mn: month value, 1-12
492
+ # dy: day of the month
493
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD"
494
+ # Return: new formated date string
495
+ #
496
+ def fmtdate(yr, mn, dy, tofmt = None):
497
+
498
+ (y, m, d) = adjust_ymd(yr, mn, dy)
499
+ if not tofmt or tofmt == 'YYYY-MM-DD': return "{}-{:02}-{:02}".format(y, m, d)
500
+
501
+ if dy != None:
502
+ md = re.search(DATEFMTS['D'], tofmt, re.I)
503
+ if md:
504
+ fmt = md.group(1) # day
505
+ slen = len(fmt)
506
+ if slen > 2: # days of the year
507
+ for i in range(1, m): d += MDAYS[i]
508
+ sdy = "{:03}".format(d)
509
+ elif slen == 2:
510
+ sdy = "{:02}".format(d)
511
+ else:
512
+ sdy = str(d)
513
+ tofmt = re.sub(fmt, sdy, tofmt, 1)
514
+
515
+ if mn != None:
516
+ md = re.search(DATEFMTS['M'], tofmt, re.I)
517
+ if md:
518
+ fmt = md.group(1) # month
519
+ slen = len(fmt)
520
+ if slen == 2:
521
+ smn = "{:02}".format(m)
522
+ elif re.match(r'^mon', fmt, re.I):
523
+ smn = MNS[m-1] if slen == 3 else MONTHS[m-1]
524
+ if re.match(r'^Mo', fmt):
525
+ smn = smn.capitalize()
526
+ elif re.match(r'^MO', fmt):
527
+ smn = smn.upper()
528
+ else:
529
+ smn = str(m)
530
+ tofmt = re.sub(fmt, smn, tofmt, 1)
531
+ else:
532
+ md = re.search(DATEFMTS['Q'], tofmt, re.I)
533
+ if md:
534
+ fmt = md.group(1) # quarter
535
+ m = int((m+2)/3)
536
+ smn = "{:02}".format(m) if len(fmt) == 2 else str(m)
537
+ tofmt = re.sub(fmt, smn, tofmt, 1)
538
+
539
+ if yr != None:
540
+ md = re.search(DATEFMTS['Y'], tofmt, re.I)
541
+ if md:
542
+ fmt = md.group(1) # year
543
+ slen = len(fmt)
544
+ if slen == 2:
545
+ syr = "{:02}".format(y%100)
546
+ elif slen == 3: # decade
547
+ if y > 999: y = int(y/10)
548
+ syr = "{:03}".format(y)
549
+ else:
550
+ if re.search(r'^YY00', fmt, re.I): y = 100*int(y/100) # hundred years
551
+ syr = "{:04}".format(y)
552
+ tofmt = re.sub(fmt, syr, tofmt, 1)
553
+ else:
554
+ md = re.search(DATEFMTS['C'], tofmt, re.I)
555
+ if md:
556
+ fmt = md.group(1) # century
557
+ slen = len(fmt)
558
+ if y > 999:
559
+ y = 1 + int(y/100)
560
+ elif y > 99:
561
+ y = 1 + int(yr/10)
562
+ syr = "{:02}".format(y)
563
+ tofmt = re.sub(fmt, syr, tofmt, 1)
564
+
565
+ return tofmt
566
+
567
+ #
568
+ # format given date and time into standard timestamp
569
+ #
570
+ def join_datetime(sdate, stime):
571
+
572
+ if not sdate: return None
573
+ if not stime: stime = "00:00:00"
574
+ if not isinstance(sdate, str): sdate = str(sdate)
575
+ if not isinstance(stime, str): stime = str(stime)
576
+ if re.match(r'^\d:', stime): stime = '0' + stime
577
+
578
+ return "{} {}".format(sdate, stime)
579
+
580
+ fmttime = join_datetime
581
+
582
+ #
583
+ # split a date or datetime into an array of [date, time]
584
+ #
585
+ def date_and_time(sdt):
586
+
587
+ if not sdt: return [None, None]
588
+ if not isinstance(sdt, str): sdt = str(sdt)
589
+ adt = re.split(' ', sdt)
590
+ acnt = len(adt)
591
+ if acnt == 1: adt.append('00:00:00')
592
+ return adt
593
+
594
+ #
595
+ # convert given date/time to unix epoch time; -1 if cannot
596
+ #
597
+ def unixtime(stime):
598
+
599
+ pt = [0]*9
600
+ if not isinstance(stime, str): stime = str(stime)
601
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', stime)
602
+ if ms:
603
+ for i in range(3):
604
+ pt[i] = int(ms.group(i+1))
605
+ ms = re.search(r'^(\d+):(\d+):(\d+)$', stime)
606
+ if ms:
607
+ for i in range(3):
608
+ pt[i+3] = int(ms.group(i+1))
609
+
610
+ pt[8] = -1
611
+ return time.mktime(time.struct_time(pt))
612
+
613
+ #
614
+ # sdate: start date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD'
615
+ # edate: end date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD'
616
+ # Return: list of start and end dates in format of YYYY-MM-DD
617
+ #
618
+ def daterange(sdate, edate):
619
+
620
+ if sdate:
621
+ if not isinstance(sdate, str): sdate = str(sdate)
622
+ if not re.search(r'\d+-\d+-\d+', sdate):
623
+ ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', sdate)
624
+ if ms:
625
+ sdate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4))
626
+ else:
627
+ ms = re.match(r'^(\W*)(\d+)(\W*)$', sdate)
628
+ if ms:
629
+ sdate = "{}{}-01-01{}".format(ms.group(1), ms.group(2), ms.group(3))
630
+
631
+ if edate:
632
+ if not isinstance(edate, str): edate = str(edate)
633
+ if not re.search(r'\d+-\d+-\d+', edate):
634
+ ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', edate)
635
+ if ms:
636
+ edate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4))
637
+ edate = adddate(edate, 0, 1, -1)
638
+ else:
639
+ ms = re.match(r'^(\W*)(\d+)(\W*)$', edate)
640
+ if ms:
641
+ edate = "{}{}-12-31{}".format(ms.group(1), ms.group(2), ms.group(3))
642
+
643
+ return [sdate, edate]
644
+
645
+ #
646
+ # date to datetime range
647
+ #
648
+ def dtrange(dates):
649
+
650
+ date = dates[0]
651
+ if date:
652
+ if not isinstance(date, str): date = str(date)
653
+ dates[0] = date + ' 00:00:00'
654
+ date = dates[1]
655
+ if date:
656
+ if not isinstance(date, str): date = str(date)
657
+ dates[1] = date + ' 23:59:59'
658
+
659
+ return dates
660
+
661
+ #
662
+ # sdate: starting date in format of 'YYYY-MM-DD'
663
+ # edate: ending date
664
+ # fmt: period format, ex. "YYYYMon-YYYMon", default to "YYYYMM-YYYYMM"
665
+ # Return: a string of formated period
666
+ #
667
+ def format_period(sdate, edate, fmt = None):
668
+
669
+ period = ''
670
+
671
+ if not fmt:
672
+ sfmt = efmt = "YYYYMM"
673
+ sep = '-'
674
+ else:
675
+ ms = re.match(r'^(.*)(\s*-\s*)(.*)$', fmt)
676
+ if ms:
677
+ (sfmt, sep, efmt) = ms.groups()
678
+ else:
679
+ sfmt = fmt
680
+ efmt = None
681
+ sep = ''
682
+
683
+ if sdate:
684
+ if not isinstance(sdate, str): sdate = str(sdate)
685
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', sdate)
686
+ if ms:
687
+ (yr, mn, dy) = ms.groups()
688
+ period = fmtdate(int(yr), int(mn), int(dy), sfmt)
689
+
690
+ if sep: period += sep
691
+
692
+ if efmt:
693
+ if re.search(r'current', efmt, re.I):
694
+ period += efmt
695
+ elif edate:
696
+ if not isinstance(edate, str): edate = str(edate)
697
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', edate)
698
+ if ms:
699
+ (yr, mn, dy) = ms.groups()
700
+ period += fmtdate(int(yr), int(mn), int(dy), efmt)
701
+
702
+ return period
703
+
704
+ #
705
+ # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
706
+ # newid: True to format a new dsid; defaults to False for now
707
+ # returns a new or old dsid according to the newid option
708
+ #
709
+ def format_dataset_id(dsid, newid = PgLOG.PGLOG['NEWDSID'], logact = PgLOG.LGEREX):
710
+
711
+ dsid = str(dsid)
712
+ ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid)
713
+ if ms:
714
+ ids = list(ms.groups())
715
+ if ids[0] not in PgLOG.PGLOG['DSIDCHRS']:
716
+ if logact: PgLOG.pglog("{}: dsid leading character must be '{}'".format(dsid, PgLOG.PGLOG['DSIDCHRS']), logact)
717
+ return dsid
718
+ if newid: return dsid
719
+ if ids[2][:2] != '00':
720
+ if logact: PgLOG.pglog(dsid + ": Cannot convert new dsid to old format", logact)
721
+ return dsid
722
+ return 'ds{}.{}'.format(ids[1], ids[2][2])
723
+
724
+ ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid, re.I)
725
+ if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid)
726
+ if ms:
727
+ if newid:
728
+ return "d{}00{}".format(ms.group(1), ms.group(3))
729
+ else:
730
+ return 'ds{}.{}'.format(ms.group(1), ms.group(3))
731
+
732
+ if logact: PgLOG.pglog(dsid + ": invalid dataset id", logact)
733
+ return dsid
734
+
735
+ #
736
+ # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
737
+ # newid: True to format a new dsid; defaults to False for now
738
+ # returns a new or old metadata dsid according to the newid option
739
+ #
740
+ def metadata_dataset_id(dsid, newid = PgLOG.PGLOG['NEWDSID'], logact = PgLOG.LGEREX):
741
+
742
+ ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid)
743
+ if ms:
744
+ ids = list(ms.groups())
745
+ if ids[0] not in PgLOG.PGLOG['DSIDCHRS']:
746
+ if logact: PgLOG.pglog("{}: dsid leading character must be '{}'".format(dsid, PgLOG.PGLOG['DSIDCHRS']), logact)
747
+ return dsid
748
+ if newid: return dsid
749
+ if ids[2][:2] != '00':
750
+ if logact: PgLOG.pglog(dsid + ": Cannot convert new dsid to old format", logact)
751
+ return dsid
752
+ return '{}.{}'.format(ids[1], ids[2][2])
753
+
754
+ ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid)
755
+ if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid)
756
+ if ms:
757
+ if newid:
758
+ return "d{}00{}".format(ms.group(1), ms.group(3))
759
+ else:
760
+ return '{}.{}'.format(ms.group(1), ms.group(3))
761
+
762
+ if logact: PgLOG.pglog(dsid + ": invalid dataset id", logact)
763
+ return dsid
764
+
765
+
766
+ #
767
+ # idstr: string holding a dsid in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
768
+ # and find it according to the flag value O (Old), N (New) or B (Both) formats
769
+ # returns dsid if found in given id string; None otherwise
770
+ #
771
+ def find_dataset_id(idstr, flag = 'B', logact = 0):
772
+
773
+ if flag in 'NB':
774
+ ms = re.search(r'(^|\W)(([a-z])\d{6})($|\D)', idstr)
775
+ if ms and ms.group(3) in PgLOG.PGLOG['DSIDCHRS']: return ms.group(2)
776
+ if flag in 'OB':
777
+ ms = re.search(r'(^|\W)(ds\d\d\d(\.|)\d)($|\D)', idstr)
778
+ if not ms: ms = re.search(r'(^|\W)(\d\d\d\.\d)($|\D)', idstr)
779
+ if ms: return ms.group(2)
780
+
781
+ if logact: PgLOG.pglog("{} : No valid dsid found for flag {}".format(idstr, flag), logact)
782
+ return None
783
+
784
+ #
785
+ # find and convert all found dsids according to old/new dsids
786
+ # for newid = False/True
787
+ #
788
+ def convert_dataset_ids(idstr, newid = PgLOG.PGLOG['NEWDSID'], logact = 0):
789
+
790
+ flag = 'O' if newid else 'N'
791
+ cnt = 0
792
+ if idstr:
793
+ while True:
794
+ dsid = find_dataset_id(idstr, flag = flag)
795
+ if not dsid: break
796
+ ndsid = format_dataset_id(dsid, newid = newid, logact = logact)
797
+ if ndsid != dsid: idstr = idstr.replace(dsid, ndsid)
798
+ cnt += 1
799
+
800
+ return (idstr, cnt)
801
+
802
+ #
803
+ # records: dict of mutiple records,
804
+ # idx: index of the records to return
805
+ # Return: a dict to the idx record out of records
806
+ #
807
+ def onerecord(records, idx):
808
+
809
+ record = {}
810
+
811
+ for fld in records:
812
+ record[fld] = records[fld][idx]
813
+
814
+ return record
815
+
816
+ #
817
+ # records: dict of mutiple records,
818
+ # record: record to add
819
+ # idx: index of the record to add
820
+ # Return: add a record to a dict of lists
821
+ #
822
+ def addrecord(records, record, idx):
823
+
824
+ if records is None: records = {} # initialize dist of lists structure
825
+ if not records:
826
+ for key in record:
827
+ records[key] = []
828
+
829
+ for key in record:
830
+ slen = len(records[key])
831
+ if idx < slen:
832
+ records[key][idx] = record[key]
833
+ else:
834
+ while idx > slen:
835
+ records[key].append(None)
836
+ slen += 1
837
+ records[key].append(record[key])
838
+
839
+ return records
840
+
841
+ #
842
+ # convert a hash with multiple rows from pgmget() to an array of hashes
843
+ #
844
+ def hash2array(hrecs, hkeys = None):
845
+
846
+ if not hkeys: hkeys = list(hrecs)
847
+ acnt = len(hrecs[hkeys[0]]) if hrecs and hkeys[0] in hrecs else 0
848
+ arecs = [None]*acnt
849
+ for i in range(acnt):
850
+ arec = {}
851
+ for hkey in hkeys: arec[hkey] = hrecs[hkey][i]
852
+ arecs[i] = arec
853
+
854
+ return arecs
855
+
856
+ #
857
+ # convert an array of hashes to a hash with multiple rows for pgmget()
858
+ #
859
+ def array2hash(arecs, hkeys = None):
860
+
861
+ hrecs = {}
862
+ acnt = len(arecs) if arecs else 0
863
+ if acnt > 0:
864
+ if not hkeys: hkeys = list(arecs[0])
865
+ for hkey in hkeys:
866
+ hrecs[hkey] = [None]*acnt
867
+ for i in range(acnt): hrecs[hkey][i] = arecs[i][hkey]
868
+
869
+ return hrecs
870
+
871
+ #
872
+ # records: dict of mutiple records,
873
+ # opt: 0 - column count,
874
+ # 1 - row count,
875
+ # 2 - both
876
+ # Return: a single number or list of two dependend on given opt
877
+ #
878
+ def hashcount(records, opt = 0):
879
+
880
+ ret = [0, 0]
881
+
882
+ if records:
883
+ clen = len(records)
884
+ if opt == 0 or opt == 2:
885
+ ret[0] = clen
886
+ if opt == 1 or opt == 2:
887
+ ret[1] = len(next(iter(records.values())))
888
+
889
+ return ret if opt == 2 else ret[opt]
890
+
891
+ #
892
+ # adict: dict a
893
+ # bdict: dict b
894
+ # default: default values if missed
895
+ # unique: unique join if set
896
+ # Return: the joined dict records with default value for missing ones
897
+ # For unique join, a record in bdict must not be contained in adict already
898
+ #
899
+ def joinhash(adict, bdict, default = None, unique = None):
900
+
901
+ if not bdict: return adict
902
+ if not adict: return bdict
903
+
904
+ akeys = list(adict.keys())
905
+ bkeys = list(bdict.keys())
906
+ acnt = len(adict[akeys[0]])
907
+ bcnt = len(bdict[bkeys[0]])
908
+ ckeys = [] # common keys for unique joins
909
+
910
+ # check and assign default value for missing keys in adict
911
+ for bkey in bkeys:
912
+ if bkey in akeys:
913
+ if unique and bkey not in ckeys: ckeys.append(bkey)
914
+ else:
915
+ adict[bkey] = [default]*acnt
916
+
917
+ # check and assign default value for missing keys in bdict
918
+ for akey in akeys:
919
+ if akey in bkeys:
920
+ if unique and akey not in ckeys: ckeys.append(akey)
921
+ else:
922
+ bdict[akey] = [default]*bcnt
923
+
924
+ if unique: # append bdict
925
+ kcnt = len(ckeys)
926
+ for i in range(bcnt):
927
+ j = 0
928
+ while(j < acnt):
929
+ k = 0
930
+ for ckey in ckeys:
931
+ if pgcmp(adict[ckey][j], bdict[ckey][i]): break
932
+ k += 1
933
+ if k >= kcnt: break
934
+ j += 1
935
+
936
+ if j >= acnt:
937
+ for key in adict:
938
+ adict[key].append(bdict[key][i])
939
+ else:
940
+ for key in adict:
941
+ adict[key].extend(bdict[key])
942
+
943
+ return adict
944
+
945
+ #
946
+ # lst1: list 1
947
+ # lst2: list 2
948
+ # unique: unique join if set
949
+ # Return: the joined list
950
+ #
951
+ def joinarray(lst1, lst2, unique = None):
952
+
953
+ if not lst2: return lst1
954
+ if not lst1: return lst2
955
+
956
+ cnt1 = len(lst1)
957
+ cnt2 = len(lst2)
958
+
959
+ if unique:
960
+ for i in (cnt2):
961
+ for j in (cnt1):
962
+ if pgcmp(lst1[j], lst2[i]) != 0: break
963
+ if j >= cnt1:
964
+ lst1.append(lst2[i])
965
+ else:
966
+ lst1.extend(lst2)
967
+
968
+ return lst1
969
+
970
+ #
971
+ # Function: crosshash(ahash, bhash)
972
+ # Return: a reference to the cross-joined hash records
973
+ #
974
+ def crosshash(ahash, bhash):
975
+
976
+ if not bhash: return ahash
977
+ if not ahash: return bhash
978
+
979
+ akeys = list(ahash.keys())
980
+ bkeys = list(bhash.keys())
981
+ acnt = len(ahash[akeys[0]])
982
+ bcnt = len(bhash[bkeys[0]])
983
+ rets = {}
984
+ for key in akeys: rets[key] = []
985
+ for key in bkeys: rets[key] = []
986
+ for i in range(acnt):
987
+ for j in range(bcnt):
988
+ for key in akeys: rets[key].append(ahash[key][i])
989
+ for key in bkeys: rets[key].append(bhash[key][j])
990
+
991
+ return rets
992
+
993
+ #
994
+ # strip database and table names for a field name
995
+ #
996
+ def strip_field(field):
997
+ ms = re.search(r'\.([^\.]+)$', field)
998
+ if ms: field = ms.group(1)
999
+
1000
+ return field
1001
+
1002
+ #
1003
+ # pgrecs: dict obterned from pgmget()
1004
+ # flds: list of single letter fields to be sorted on
1005
+ # hash: table dict for pre-defined fields
1006
+ # patterns: optional list of temporal patterns for order fields
1007
+ # Return: a sorted dict list
1008
+ #
1009
+ def sorthash(pgrecs, flds, hash, patterns = None):
1010
+
1011
+ fcnt = len(flds) # count of fields to be sorted on
1012
+
1013
+ # set sorting order, descenting (-1) or ascenting (1)
1014
+ # get the full field names to be sorted on
1015
+ desc = [1]*fcnt
1016
+ fields = []
1017
+ nums = [1]*fcnt # initialize each column as numerical
1018
+ for i in range(fcnt):
1019
+ if flds[i].islower(): desc[i] = -1
1020
+ fld = strip_field(hash[flds[i].upper()][1])
1021
+ fields.append(fld)
1022
+
1023
+ count = len(pgrecs[fields[0]]) # row count of pgrecs
1024
+
1025
+ if count < 2: return pgrecs # no need of sording
1026
+ pcnt = len(patterns) if patterns else 0
1027
+
1028
+ # prepare the dict list for sortting
1029
+ srecs = []
1030
+ for i in range(count):
1031
+ pgrec = onerecord(pgrecs, i)
1032
+ rec = []
1033
+ for j in range(fcnt):
1034
+ if j < pcnt and patterns[j]:
1035
+ # get the temporal part of each value matching the pattern
1036
+ val = format_date(pgrec[fields[j]], "YYYYMMDDHH", patterns[j])
1037
+ else:
1038
+ # sort on the whole value if no pattern given
1039
+ val = pgrec[fields[j]]
1040
+
1041
+ if nums[j]: nums[j] = pgnum(val)
1042
+ rec.append(val)
1043
+ rec.append(i) # add column to cache the row index
1044
+ srecs.append(rec)
1045
+
1046
+ srecs = quicksort(srecs, 0, count-1, desc, fcnt, nums)
1047
+
1048
+ # sort pgrecs according the cached row index column in ordered srecs
1049
+ rets = {}
1050
+ for fld in pgrecs:
1051
+ rets[fld] = []
1052
+
1053
+ for i in range(count):
1054
+ pgrec = onerecord(pgrecs, srecs[i][fcnt])
1055
+ for fld in pgrecs:
1056
+ rets[fld].append(pgrec[fld])
1057
+
1058
+ return rets
1059
+
1060
+ #
1061
+ # Return: the number of days bewteen date1 and date2
1062
+ #
1063
+ def diffdate(date1, date2):
1064
+
1065
+ ut1 = ut2 = 0
1066
+ if date1: ut1 = unixtime(date1)
1067
+ if date2: ut2 = unixtime(date2)
1068
+ return round((ut1 - ut2)/86400) # 24*60*60
1069
+
1070
+ #
1071
+ # Return: the number of seconds bewteen time1 and time2
1072
+ #
1073
+ def difftime(time1, time2):
1074
+
1075
+ ut1 = ut2 = 0
1076
+ if time1: ut1 = unixtime(time1)
1077
+ if time2: ut2 = unixtime(time2)
1078
+ return round(ut1 - ut2)
1079
+
1080
+ diffdatetime = difftime
1081
+
1082
+ #
1083
+ # Return: the number of days between date and '1970-01-01 00:00:00'
1084
+ #
1085
+ def get_days(cdate):
1086
+
1087
+ return diffdate(str(cdate), '1970-01-01')
1088
+
1089
+ #
1090
+ # Function: get_month_days(date)
1091
+ #
1092
+ # Return: the number of days in given month
1093
+ #
1094
+ def get_month_days(cdate):
1095
+
1096
+ ms = re.match(r'^(\d+)-(\d+)', str(cdate))
1097
+ if ms:
1098
+ yr = int(ms.group(1))
1099
+ mn = int(ms.group(2))
1100
+ return calendar.monthrange(yr, mn)[1]
1101
+ else:
1102
+ return 0
1103
+
1104
+ #
1105
+ # Function: validate_date(date)
1106
+ #
1107
+ # Return: a date in format of YYYY-MM-DD thar all year/month/day are validated
1108
+ #
1109
+ def validate_date(cdate):
1110
+
1111
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', str(cdate))
1112
+ if ms:
1113
+ (yr, mn, dy) = (int(m) for m in ms.groups())
1114
+ if yr < 1000:
1115
+ yr += 2000
1116
+ elif yr > 9999:
1117
+ yr %= 10000
1118
+ if mn < 1:
1119
+ mn = 1
1120
+ elif mn > 12:
1121
+ mn = 12
1122
+ md = calendar.monthrange(yr, mn)[1]
1123
+ if dy < 1:
1124
+ dy = 1
1125
+ elif dy > md:
1126
+ dy = md
1127
+ cdate = '{}-{:02d}-{:02d}'.format(yr, mn, dy)
1128
+
1129
+ return cdate
1130
+
1131
+ #
1132
+ # Function: get_date(days)
1133
+ #
1134
+ # Return: the date in format of "YYYY-MM-DD" for given number of days
1135
+ # from '1970-01-01 00:00:00'
1136
+ #
1137
+ def get_date(days):
1138
+
1139
+ return adddate('1970-01-01', 0, 0, int(days))
1140
+
1141
+ #
1142
+ # compare date/hour and return the different hours
1143
+ #
1144
+ def diffdatehour(date1, hour1, date2, hour2):
1145
+
1146
+ if hour1 is None: hour1 = 23
1147
+ if hour2 is None: hour2 = 23
1148
+ return (hour1 - hour2) + 24*diffdate(date1, date2)
1149
+
1150
+ #
1151
+ # hour difference between GMT and local time
1152
+ #
1153
+ def diffgmthour():
1154
+
1155
+ tg = time.gmtime()
1156
+ tl = time.localtime()
1157
+ dg = fmtdate(tg[0], tg[1], tg[2])
1158
+ dl = fmtdate(tl[0], tl[1], tl[2])
1159
+ hg = tg[3]
1160
+ hl = tl[3]
1161
+
1162
+ return diffdatehour(dg, hg, dl, hl)
1163
+
1164
+ #
1165
+ # compare date and time (if given) and return 1, 0 and -1
1166
+ #
1167
+ def cmptime(date1, time1, date2, time2):
1168
+
1169
+ stime1 = join_datetime(date1, time1)
1170
+ stime2 = join_datetime(date2, time2)
1171
+
1172
+ return pgcmp(stime1, stime2)
1173
+
1174
+ #
1175
+ # date: the original date in format of 'YYYY-MM-DD',
1176
+ # mf: the number of month fractions to add
1177
+ # nf: number of fractions of a month
1178
+ # Return: new date
1179
+ #
1180
+ def addmonth(cdate, mf, nf = 1):
1181
+
1182
+ if not mf: return cdate
1183
+ if not nf or nf < 2: return adddate(cdate, 0, mf, 0)
1184
+
1185
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)$', cdate)
1186
+ if ms:
1187
+ (syr, smn, sdy) = ms.groups()
1188
+ yr = int(syr)
1189
+ mn = int(smn)
1190
+ ody = int(sdy)
1191
+ dy = 0 # set to end of previous month
1192
+ ndy = int(30/nf) # number of days in each fraction
1193
+ while ody > ndy:
1194
+ dy += ndy
1195
+ ody -= ndy
1196
+
1197
+ dy += mf * ndy
1198
+ if mf > 0:
1199
+ while dy >= 30:
1200
+ dy -= 30
1201
+ mn += 1
1202
+ else:
1203
+ while dy < 0:
1204
+ dy += 30
1205
+ mn -= 1
1206
+
1207
+ dy += ody
1208
+ cdate = fmtdate(yr, mn, dy)
1209
+
1210
+ return cdate
1211
+
1212
+ # add yr years & mn months to yearmonth ym in format YYYYMM
1213
+ def addyearmonth(ym, yr, mn):
1214
+
1215
+ if yr == None: yr = 0
1216
+ if mn == None: mn = 0
1217
+
1218
+ ms =re.match(r'^(\d\d\d\d)(\d\d)$', ym)
1219
+ if ms:
1220
+ (syr, smn) = ms.groups()
1221
+ yr = int(syr)
1222
+ mn = int(smn)
1223
+ if mn < 0:
1224
+ while mn < 0:
1225
+ yr -= 1
1226
+ mn += 12
1227
+ else:
1228
+ while mn > 12:
1229
+ yr += 1
1230
+ mn -= 12
1231
+
1232
+ ym = "{:04}{:02}".format(yr, mn)
1233
+
1234
+ return ym
1235
+
1236
+ #
1237
+ # a wrapper to adddate()
1238
+ #
1239
+ def addNoLeapDate(cdate, yr, mn, dy): return adddate(cdate, yr, mn, dy)
1240
+
1241
+ #
1242
+ # set number of days in Beburary for Leap year according PgLOG.PGLOG['NOLEAP']
1243
+ #
1244
+ def set_leap_mdays(year):
1245
+
1246
+ if not PgLOG.PGLOG['NOLEAP'] and calendar.isleap(year):
1247
+ MDAYS[0] = 366
1248
+ MDAYS[2] = 29
1249
+ ret = 1
1250
+ else:
1251
+ MDAYS[0] = 365
1252
+ MDAYS[2] = 28
1253
+ ret = 0
1254
+ return ret
1255
+
1256
+ #
1257
+ # wrap on calendar.isleap()
1258
+ #
1259
+ def is_leapyear(year): return calendar.isleap(year)
1260
+
1261
+ #
1262
+ # reutn 1 if is end of month
1263
+ #
1264
+ def is_end_month(yr, mn, dy):
1265
+
1266
+ set_leap_mdays(yr)
1267
+ return 1 if dy == MDAYS[mn] else 0
1268
+
1269
+ #
1270
+ # adust the year, month and day values that are out of ranges
1271
+ #
1272
+ def adjust_ymd(yr, mn, dy):
1273
+
1274
+ if yr is None: yr = 1970
1275
+ if mn is None: mn = 1
1276
+ if dy is None: dy = 1
1277
+
1278
+ while True:
1279
+ if mn > 12:
1280
+ yr += 1
1281
+ mn -= 12
1282
+ continue
1283
+ elif mn < 1:
1284
+ yr -= 1
1285
+ mn += 12
1286
+ continue
1287
+
1288
+ set_leap_mdays(yr)
1289
+
1290
+ if dy < 1:
1291
+ if(dy < -MDAYS[0]):
1292
+ yr -= 1
1293
+ dy += MDAYS[0]
1294
+ else:
1295
+ mn -= 1
1296
+ if mn < 1:
1297
+ yr -= 1
1298
+ mn += 12
1299
+ dy += MDAYS[mn]
1300
+ continue
1301
+ elif dy > MDAYS[mn]:
1302
+ if(dy > MDAYS[0]):
1303
+ dy -= MDAYS[0]
1304
+ yr += 1
1305
+ else:
1306
+ dy -= MDAYS[mn]
1307
+ mn += 1
1308
+ continue
1309
+
1310
+ break
1311
+
1312
+ return [yr, mn, dy]
1313
+
1314
+ #
1315
+ # date: the original date in format of 'YYYY-MM-DD',
1316
+ # yr: the number of years to add/subtract from the odate for positive/negative value,
1317
+ # mn: the number of months to add/subtract from the odate for positive/negative value,
1318
+ # dy: the number of days to add/subtract from the odate for positive/negative value)
1319
+ #
1320
+ # Return: new date
1321
+ #
1322
+ def adddate(cdate, yr, mn = 0, dy = 0, tofmt = None):
1323
+
1324
+ if not cdate: return cdate
1325
+ if not isinstance(cdate, str): cdate = str(cdate)
1326
+ if yr is None:
1327
+ yr = 0
1328
+ elif isinstance(yr, str):
1329
+ yr = int(yr)
1330
+ if mn is None:
1331
+ mn = 0
1332
+ elif isinstance(mn, str):
1333
+ mn = int(mn)
1334
+ if dy is None:
1335
+ dy = 0
1336
+ elif isinstance(dy, str):
1337
+ dy = int(dy)
1338
+
1339
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', cdate)
1340
+ if not ms: return cdate # non-standard date format
1341
+ (nyr, nmn, ndy) = (int(m) for m in ms.groups())
1342
+ mend = 0
1343
+ if mn and ndy > 27: mend = is_end_month(nyr, nmn, ndy)
1344
+ if yr: nyr += yr
1345
+ if mn:
1346
+ (nyr, nmn, tdy) = adjust_ymd(nyr, nmn+mn+1, 0)
1347
+ if mend: ndy = tdy
1348
+ if dy: ndy += dy
1349
+
1350
+ return fmtdate(nyr, nmn, ndy, tofmt)
1351
+
1352
+ #
1353
+ # add given hours to the initial date and time
1354
+ #
1355
+ def addhour(sdate, stime, nhour):
1356
+
1357
+ if nhour and isinstance(nhour, str): nhour = int(nhour)
1358
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1359
+ if stime and not isinstance(stime, str): stime = str(stime)
1360
+ if not nhour: return [sdate, stime]
1361
+
1362
+ hr = dy = 0
1363
+ ms = re.match(r'^(\d+)', stime)
1364
+ if ms:
1365
+ shr = ms.group(1)
1366
+ hr = int(shr) + nhour
1367
+ if hr < 0:
1368
+ while hr < 0:
1369
+ dy -= 1
1370
+ hr += 24
1371
+ else:
1372
+ while hr > 23:
1373
+ dy += 1
1374
+ hr -= 24
1375
+
1376
+ shour = "{:02}".format(hr)
1377
+ if shr != shour: stime = re.sub(shr, shour, stime, 1)
1378
+ if dy: sdate = adddate(sdate, 0, 0, dy)
1379
+
1380
+ return [sdate, stime]
1381
+
1382
+ #
1383
+ # add given years, months, days and hours to the initial date and hour
1384
+ #
1385
+ def adddatehour(sdate, nhour, yr, mn, dy, hr = 0):
1386
+
1387
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1388
+ if hr:
1389
+ if nhour != None:
1390
+ if isinstance(nhour, str): nhour = int(nhour)
1391
+ hr += nhour
1392
+ if hr < 0:
1393
+ while hr < 0:
1394
+ dy -= 1
1395
+ hr += 24
1396
+ else:
1397
+ while hr > 23:
1398
+ dy += 1
1399
+ hr -= 24
1400
+ if nhour != None: nhour = hr
1401
+
1402
+ if yr or mn or dy: sdate = adddate(sdate, yr, mn, dy)
1403
+
1404
+ return [sdate, nhour]
1405
+
1406
+ #
1407
+ # add given yyyy, mm, dd, hh, nn, ss to sdatetime
1408
+ # if nf, add fraction of month only
1409
+ #
1410
+ def adddatetime(sdatetime, yy, mm, dd, hh, nn, ss, nf = 0):
1411
+
1412
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1413
+ (sdate, stime) = re.split(' ', sdatetime)
1414
+
1415
+ if hh or nn or ss: (sdate, stime) = addtime(sdate, stime, hh, nn, ss)
1416
+ if nf:
1417
+ sdate = addmonth(sdate, mm, nf)
1418
+ mm = 0
1419
+ if yy or mm or dd: sdate = adddate(sdate, yy, mm, dd)
1420
+
1421
+ return "{} {}".format(sdate, stime)
1422
+
1423
+ #
1424
+ # add given yyyy, mm, dd, hh, nn, ss to sdatetime
1425
+ # if nf, add fraction of month only
1426
+ #
1427
+ def adddatetime(sdatetime, yy, mm, dd, hh, nn, ss, nf = 0):
1428
+
1429
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1430
+ (sdate, stime) = re.split(' ', sdatetime)
1431
+
1432
+ if hh or nn or ss: (sdate, stime) = addtime(sdate, stime, hh, nn, ss)
1433
+ if nf:
1434
+ sdate = addmonth(sdate, mm, nf)
1435
+ mm = 0
1436
+ if yy or mm or dd: sdate = adddate(sdate, yy, mm, dd)
1437
+
1438
+ return "{} {}".format(sdate, stime)
1439
+
1440
+ #
1441
+ # add given hours, minutes and seconds to the initial date and time
1442
+ #
1443
+ def addtime(sdate, stime, h, m, s):
1444
+
1445
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1446
+ if stime and not isinstance(stime, str): sdate = str(stime)
1447
+ ups = (60, 60, 24)
1448
+ tms = [0, 0, 0, 0] # (sec, min, hour, day)
1449
+
1450
+ if s: tms[0] += s
1451
+ if m: tms[1] += m
1452
+ if h: tms[2] += h
1453
+ if stime:
1454
+ ms = re.match(r'^(\d+):(\d+):(\d+)$', stime)
1455
+ if ms:
1456
+ tms[2] += int(ms.group(1))
1457
+ tms[1] += int(ms.group(2))
1458
+ tms[0] += int(ms.group(3))
1459
+
1460
+ for i in range(3):
1461
+ if tms[i] < 0:
1462
+ while tms[i] < 0:
1463
+ tms[i] += ups[i]
1464
+ tms[i+1] -= 1
1465
+ elif tms[i] >= ups[i]:
1466
+ while tms[i] >= ups[i]:
1467
+ tms[i] -= ups[i]
1468
+ tms[i+1] += 1
1469
+
1470
+ stime = "{:02}:{:02}:{:02}".format(tms[2], tms[1], tms[0])
1471
+ if tms[3]: sdate = adddate(sdate, 0, 0, tms[3])
1472
+
1473
+ return [sdate, stime]
1474
+
1475
+ #
1476
+ # add time interval array to datetime
1477
+ # opt = -1 - minus, 0 - begin time, 1 - add (default)
1478
+ #
1479
+ def addintervals(sdatetime, intv, opt = 1):
1480
+
1481
+ if not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1482
+ if not intv: return sdatetime
1483
+ tv = [0]*7
1484
+ i = 0
1485
+ for v in intv:
1486
+ tv[i] = v
1487
+ i += 1
1488
+
1489
+ # assume the given datetime is end of the current interval;
1490
+ # add one second to set it to beginning of the next one
1491
+ if opt == 0: sdatetime = adddatetime(sdatetime, 0, 0, 0 ,0, 0, 1)
1492
+
1493
+ if opt < 1: # negative intervals for minus
1494
+ for i in range(6):
1495
+ if tv[i]: tv[i] = -tv[i]
1496
+
1497
+ return adddatetime(sdatetime, tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6])
1498
+
1499
+ #
1500
+ # adjust end date to the specified day days for frequency of year/month/week
1501
+ # end of period if days == 0
1502
+ # nf - number of fractions of a month, for unit of 'M' only
1503
+ #
1504
+ def enddate(sdate, days, unit, nf = 0):
1505
+
1506
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1507
+ if days and isinstance(days, str): days = int(days)
1508
+ if not (unit and unit in 'YMW'): return sdate
1509
+
1510
+ if unit == 'Y':
1511
+ ms = re.match(r'^(\d+)', sdate)
1512
+ if ms:
1513
+ yr = int(ms.group(1))
1514
+ if days:
1515
+ mn = 1
1516
+ dy = days
1517
+ else:
1518
+ mn = 12
1519
+ dy = 31
1520
+ sdate = fmtdate(yr, mn, dy)
1521
+ elif unit == 'M':
1522
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', sdate)
1523
+ if ms:
1524
+ (yr, mn, dy) = (int(m) for m in ms.groups())
1525
+ else:
1526
+ ms = re.match(r'^(\d+)-(\d+)', sdate)
1527
+ if ms:
1528
+ (yr, mn) = (int(m) for m in ms.groups())
1529
+ dy = 1
1530
+ else:
1531
+ return sdate
1532
+
1533
+ if not nf or nf == 1:
1534
+ nd = days if days else calendar.monthrange(yr, mn)[1]
1535
+ if nd != dy: sdate = fmtdate(yr, mn, nd)
1536
+ else:
1537
+ val = int(30/nf)
1538
+ if dy >= 28:
1539
+ mf = nf
1540
+ else:
1541
+ mf = int(dy/val)
1542
+ if (mf*val) < dy: mf += 1
1543
+ if days:
1544
+ dy = (mf-1)*val + days
1545
+ elif mf < nf:
1546
+ dy = mf*val
1547
+ else:
1548
+ mn += 1
1549
+ dy = 0
1550
+ sdate = fmtdate(yr, mn, dy)
1551
+ elif unit == 'W':
1552
+ val = get_weekday(sdate)
1553
+ if days != val: sdate = adddate(sdate, 0, 0, days-val)
1554
+
1555
+ return sdate
1556
+
1557
+ #
1558
+ # adjust end time to the specified h/n/s for frequency of hour/mimute/second
1559
+ #
1560
+ def endtime(stime, unit):
1561
+
1562
+ if stime and not isinstance(stime, str): stime = str(stime)
1563
+ if not (unit and unit in 'HNS'): return stime
1564
+
1565
+ if stime:
1566
+ tm = split_datetime(stime, 'T')
1567
+ else:
1568
+ tm = [0, 0, 0]
1569
+
1570
+ if unit == 'H':
1571
+ tm[1] = tm[2] = 59
1572
+ elif unit == 'N':
1573
+ tm[2] = 59
1574
+ elif unit != 'S':
1575
+ tm[0] = 23
1576
+ tm[1] = tm[2] = 59
1577
+
1578
+ return "{:02}:{:02}:{:02}".format(tm[0], tm[1]. tm[2])
1579
+
1580
+ #
1581
+ # adjust end time to the specified h/n/s for frequency of year/month/week/day/hour/mimute/second
1582
+ #
1583
+ def enddatetime(sdatetime, unit, days = 0, nf = 0):
1584
+
1585
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1586
+ if not (unit and unit in 'YMWDHNS'): return sdatetime
1587
+ (sdate, stime) = re.split(' ', sdatetime)
1588
+
1589
+ if unit in 'HNS':
1590
+ stime = endtime(stime, unit)
1591
+ else:
1592
+ sdate = enddate(sdate, days, unit, nf)
1593
+ return "{} {}".format(sdate, stime)
1594
+
1595
+ #
1596
+ # get the string length dynamically
1597
+ #
1598
+ def get_column_length(colname, values):
1599
+
1600
+ clen = len(colname) if colname else 2 # initial column length as the length of column title
1601
+
1602
+ for val in values:
1603
+ if val is None: continue
1604
+ sval = str(val)
1605
+ if sval and not re.search(r'\n', sval):
1606
+ slen = len(sval)
1607
+ if slen > clen: clen = slen
1608
+
1609
+ return clen
1610
+
1611
+ #
1612
+ # Function: hour2time()
1613
+ # Return: time string in format of date HH:MM:SS
1614
+ #
1615
+ def hour2time(sdate, nhour, endtime = 0):
1616
+
1617
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1618
+ stime = "{:02}:".format(nhour)
1619
+ if endtime:
1620
+ stime += "59:59"
1621
+ else:
1622
+ stime += "00:00"
1623
+
1624
+ if sdate:
1625
+ return "{} {}".format(sdate, stime)
1626
+ else:
1627
+ return stime
1628
+
1629
+ #
1630
+ # Function: time2hour()
1631
+ # Return: list of date and hour
1632
+ #
1633
+ def time2hour(stime):
1634
+
1635
+ sdate = nhour = None
1636
+ times = stime.split(' ')
1637
+
1638
+ if len(times) == 2:
1639
+ sdate = times[0]
1640
+ stime = times[1]
1641
+
1642
+ ms = re.match(r'^(\d+)', stime)
1643
+ if ms: nhour = int(ms.group(1))
1644
+
1645
+ return [sdate, nhour]
1646
+
1647
+ #
1648
+ # get the all column widths
1649
+ #
1650
+ def all_column_widths(pgrecs, flds, tdict):
1651
+
1652
+ colcnt = len(flds)
1653
+ lens = [0]*colcnt
1654
+ for i in range(colcnt):
1655
+ fld = flds[i]
1656
+ if fld not in tdict: continue
1657
+ field = strip_field(tdict[fld][1])
1658
+ lens[i] = get_column_length(None, pgrecs[field])
1659
+
1660
+ return lens
1661
+
1662
+ #
1663
+ # check a give value, return 1 if numeric, 0 therwise
1664
+ #
1665
+ def pgnum(val):
1666
+
1667
+ if not isinstance(val, str): val = str(val)
1668
+ ms = re.match(r'^\-{0,1}(\d+|\d+\.\d*|d*\.\d+)([eE]\-{0,1}\d+)*$', val)
1669
+ return 1 if ms else 0
1670
+
1671
+ #
1672
+ # Function: pgcmp(val1, val2)
1673
+ # Return: 0 if both empty or two values are identilcal; -1 if val1 < val2; otherwise 1
1674
+ #
1675
+ def pgcmp(val1, val2, ignorecase = 0, num = 0):
1676
+
1677
+ if val1 is None:
1678
+ if val2 is None:
1679
+ return 0
1680
+ else:
1681
+ return -1
1682
+ elif val2 is None:
1683
+ return 1
1684
+ typ1 = type(val1)
1685
+ typ2 = type(val2)
1686
+ if typ1 != typ2:
1687
+ if num:
1688
+ if typ1 is str:
1689
+ typ1 = int
1690
+ val1 = int(val1)
1691
+ if typ2 is str:
1692
+ typ2 = int
1693
+ val2 = int(val2)
1694
+ else:
1695
+ if typ1 != str:
1696
+ typ1 = str
1697
+ val1 = str(val1)
1698
+ if typ2 != str:
1699
+ typ2 = str
1700
+ val2 = str(val2)
1701
+
1702
+ if typ1 is str:
1703
+ if num:
1704
+ if typ1 is str and pgnum(val1) and pgnum(val2):
1705
+ val1 = int(val1)
1706
+ val2 = int(val2)
1707
+ elif ignorecase:
1708
+ val1 = val1.lower()
1709
+ val2 = val2.lower()
1710
+
1711
+ if val1 > val2:
1712
+ return 1
1713
+ elif val1 < val2:
1714
+ return -1
1715
+ else:
1716
+ return 0
1717
+
1718
+ #
1719
+ # infiles: initial file list
1720
+ # Return: final file list with all the subdirectories expanded
1721
+ #
1722
+ def recursive_files(infiles):
1723
+
1724
+ ofiles = []
1725
+
1726
+ for file in infiles:
1727
+ if op.isdir(file):
1728
+ ofiles.extend(recursive_files(glob.glob(file + "/*")))
1729
+ else:
1730
+ ofiles.append(file)
1731
+
1732
+ return ofiles
1733
+
1734
+ #
1735
+ # lidx: lower index limit (including)
1736
+ # hidx: higher index limit (excluding)
1737
+ # key: string value to be searched,
1738
+ # list: reference to a sorted list where the key is searched)
1739
+ # Return: index if found; -1 otherwise
1740
+ #
1741
+ def asearch(lidx, hidx, key, list):
1742
+
1743
+ ret = -1
1744
+ if (hidx - lidx) < 11: # use linear search for less than 11 items
1745
+ for midx in range(lidx, hidx):
1746
+ if key == list[midx]:
1747
+ ret = midx
1748
+ break
1749
+ else:
1750
+ midx = (lidx + hidx)/2
1751
+ if key == list[midx]:
1752
+ ret = midx
1753
+ elif key < list[midx]:
1754
+ ret = asearch(lidx, midx, key, list)
1755
+ else:
1756
+ ret = asearch(midx + 1, hidx, key, list)
1757
+
1758
+ return ret
1759
+
1760
+ #
1761
+ # lidx: lower index limit (including)
1762
+ # hidx: higher index limit (excluding)
1763
+ # key: string value to be searched,
1764
+ # list: reference to a sorted list where the key is searched)
1765
+ # Return: index if found; -1 otherwise
1766
+ #
1767
+ def psearch(lidx, hidx, key, list):
1768
+
1769
+ ret = -1
1770
+ if (hidx - lidx) < 11: # use linear search for less than 11 items
1771
+ for midx in range(lidx, hidx):
1772
+ if re.search(list[midx], key):
1773
+ ret = midx
1774
+ break
1775
+ else:
1776
+ midx = int((lidx + hidx)/2)
1777
+ if re.search(list[midx], key):
1778
+ ret = midx
1779
+ elif key < list[midx]:
1780
+ ret = psearch(lidx, midx, key, list)
1781
+ else:
1782
+ ret = psearch(midx + 1, hidx, key, list)
1783
+
1784
+ return ret
1785
+
1786
+ #
1787
+ # quicksort for pattern
1788
+ #
1789
+ def quicksort(srecs, lo, hi, desc, cnt, nums = None):
1790
+
1791
+ i = lo
1792
+ j = hi
1793
+ mrec = srecs[int((lo+hi)/2)]
1794
+
1795
+ while True:
1796
+ while cmp_records(srecs[i], mrec, desc, cnt, nums) < 0: i += 1
1797
+ while cmp_records(srecs[j], mrec, desc, cnt, nums) > 0: j -= 1
1798
+ if i <= j:
1799
+ if i < j:
1800
+ tmp = srecs[i]
1801
+ srecs[i] = srecs[j]
1802
+ srecs[j] = tmp
1803
+ i += 1
1804
+ j -= 1
1805
+ if i > j: break
1806
+
1807
+ #recursion
1808
+ if lo < j: srecs = quicksort(srecs, lo, j, desc, cnt, nums)
1809
+ if i < hi: srecs = quicksort(srecs, i, hi, desc, cnt, nums)
1810
+
1811
+ return srecs
1812
+
1813
+ def cmp_records(arec, brec, desc, cnt, nums):
1814
+
1815
+ for i in range(cnt):
1816
+ num = nums[i] if nums else 0
1817
+ ret = pgcmp(arec[i], brec[i], 0, num)
1818
+ if ret != 0:
1819
+ return (ret*desc[i])
1820
+
1821
+ return 0 # identical records
1822
+
1823
+ #
1824
+ # format one floating point value
1825
+ #
1826
+ def format_float_value(val, precision = 2):
1827
+
1828
+ units = ('B', 'KB', 'MB', 'GB', 'TB', 'PB')
1829
+
1830
+ if val is None:
1831
+ return ''
1832
+ elif not isinstance(val, int):
1833
+ val = int(val)
1834
+
1835
+ idx = 0
1836
+ while val >= 1000 and idx < 5:
1837
+ val /= 1000
1838
+ idx += 1
1839
+
1840
+ return "{:.{}f}{}".format(val, precision, units[idx])
1841
+
1842
+ #
1843
+ # check a file is a ASCII text one
1844
+ # return 1 if yes, 0 if not; or -1 if file not exists
1845
+ #
1846
+ def is_text_file(fname):
1847
+
1848
+ ret = -1
1849
+ if op.isfile(fname):
1850
+ buf = PgLOG.pgsystem("file -b " + fname, PgLOG.LOGWRN, 20)
1851
+ ret = 1 if buf and re.search(r'(^|\s)(text|script|data)', buf) else 0
1852
+
1853
+ return ret