rda-python-common 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1854 @@
1
+ #
2
+ ###############################################################################
3
+ #
4
+ # Title : PgUtil.py -- module for misc utilities.
5
+ # Author : Zaihua Ji, zji@ucar.edu
6
+ # Date : 07/27/2020
7
+ # 2025-01-10 transferred to package rda_python_common from
8
+ # https://github.com/NCAR/rda-shared-libraries.git
9
+ # Purpose : python library module for global misc utilities
10
+ #
11
+ # Github : https://github.com/NCAR/rda-python-common.git
12
+ #
13
+ ###############################################################################
14
+ #
15
+ import os
16
+ import re
17
+ import time
18
+ import datetime
19
+ import calendar
20
+ import glob
21
+ from os import path as op
22
+ from . import PgLOG
23
+
24
+ DATEFMTS = {
25
+ 'C' : '(CC|C)', # century
26
+ 'Y' : '(YYYY|YY00|YYY|YY|YEAR|YR|Y)', # YYY means decade
27
+ 'Q' : '(QQ|Q)', # quarter
28
+ 'M' : '(Month|Mon|MM|M)', # numeric or string month
29
+ 'W' : '(Week|Www|W)', # string or numeric weedday
30
+ 'D' : '(DDD|DD|D)', # days in year or month
31
+ 'H' : '(HHH|HH|H)', # hours in month or day
32
+ 'N' : '(NNNN|NN|N)', # minutes in day or hour
33
+ 'S' : '(SSSS|SS|S)' # seconds in hour or minute
34
+ }
35
+
36
+ MONTHS = [
37
+ "january", "february", "march", "april", "may", "june",
38
+ "july", "august", "september", "october", "november", "december"
39
+ ]
40
+ MNS = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]
41
+ WDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"]
42
+
43
+ WDS = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"]
44
+ MDAYS = [365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
45
+
46
+ #
47
+ # dt: optional given date in format of "YYYY-MM-DD"
48
+ # return weekday: 0 - Sunday, 1 - Monday, ..., 6 - Saturday
49
+ #
50
+ def get_weekday(date = None):
51
+
52
+ if date is None:
53
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
54
+ else:
55
+ ct = time.strptime(str(date), "%Y-%m-%d")
56
+
57
+ return (ct[6]+1)%7
58
+
59
+ #
60
+ # mn: given month string like "Jan" or "January", or numeric number 1 to 12
61
+ # Return: numeric Month if not fmt (default); three-charater or full month names for given fmt
62
+ #
63
+ def get_month(mn, fmt = None):
64
+
65
+ if not isinstance(mn, int):
66
+ if re.match(r'^\d+$', mn):
67
+ mn = int(mn)
68
+ else:
69
+ for m in range(12):
70
+ if re.match(mn, MONTHS[m], re.I):
71
+ mn = m + 1
72
+ break
73
+
74
+ if fmt and mn > 0 and mn < 13:
75
+ slen = len(fmt)
76
+ if slen == 2:
77
+ smn = "{:02}".format(mn)
78
+ elif re.match(r'^mon', fmt, re.I):
79
+ smn = MNS[mn-1] if slen == 3 else MONTHS[mn-1]
80
+ if re.match(r'^Mon', fmt):
81
+ smn = smn.capitalize()
82
+ elif re.match(r'^MON', fmt):
83
+ smn = smn.upper()
84
+ else:
85
+ smn = str(mn)
86
+ return smn
87
+ else:
88
+ return mn
89
+
90
+ #
91
+ # wday: given weekday string like "Sun" or "Sunday", or numeric number 0 to 6
92
+ # Return: numeric Weekday if !fmt (default); three-charater or full week name for given fmt
93
+ #
94
+ def get_wday(wday, fmt = None):
95
+
96
+ if not isinstance(wday, int):
97
+ if re.match(r'^\d+$', wday):
98
+ wday = int(wday)
99
+ else:
100
+ for w in range(7):
101
+ if re.match(wday, WDAYS[w], re.I):
102
+ wday = w
103
+ break
104
+
105
+ if fmt and wday >= 0 and wday <= 6:
106
+ slen = len(fmt)
107
+ if slen == 4:
108
+ swday = WDAYS[w]
109
+ if re.match(r'^We', fmt):
110
+ swday = swday.capitalize()
111
+ elif re.match(r'^WE', fmt):
112
+ swday = swday.upper()
113
+ elif slen == 3:
114
+ swday = WDS[wday]
115
+ if re.match(r'^Ww', fmt):
116
+ swday = swday.capitalize()
117
+ elif re.match(r'^WW', fmt):
118
+ swday = swday.upper()
119
+ else:
120
+ swday = str(wday)
121
+ return swday
122
+ else:
123
+ return wday
124
+
125
+ #
126
+ # file: given file name
127
+ # Return: type if given file name is a valid online file; '' otherwise
128
+ #
129
+ def valid_online_file(file, type = None, exists = None):
130
+
131
+ if exists is None or exists:
132
+ if not op.exists(file): return '' # file does not exist
133
+
134
+ bname = op.basename(file)
135
+ if re.match(r'^,.*', bname): return '' # hidden file
136
+
137
+ if re.search(r'index\.(htm|html|shtml)$', bname, re.I): return '' # index file
138
+
139
+ if type and type != 'D': return type
140
+
141
+ if re.search(r'\.(doc|php|html|shtml)(\.|$)', bname, re.I): return '' # file with special extention
142
+
143
+ return 'D'
144
+
145
+ #
146
+ # Return: current time string in format of HH:MM:SS
147
+ #
148
+ def curtime(getdate = False):
149
+
150
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
151
+
152
+ fmt = "%Y-%m-%d %H:%M:%S" if getdate else "%H:%M:%S"
153
+
154
+ return time.strftime(fmt, ct)
155
+
156
+ #
157
+ # wrapper function of curtime(True) to get datetime in form of YYYY-MM-DD HH:NN:SS
158
+ #
159
+ def curdatetime():
160
+ return curtime(True)
161
+
162
+ #
163
+ # fmt: optional date format, defaults to YYYY-MM-DD
164
+ # Return: current (date, hour)
165
+ #
166
+ def curdatehour(fmt = None):
167
+
168
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
169
+
170
+ dt = fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct)
171
+
172
+ return [dt, ct[3]]
173
+
174
+ #
175
+ # tm: optional time in seconds since the Epoch
176
+ # Return: current date and time strings
177
+ #
178
+ def get_date_time(tm = None):
179
+
180
+ act = ct = None
181
+ if tm == None:
182
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
183
+ elif isinstance(tm, str):
184
+ act = tm.split(' ')
185
+ elif isinstance(tm, (int, float)):
186
+ ct = time.localtime(tm)
187
+ elif isinstance(tm, datetime.datetime):
188
+ act = str(tm).split(' ')
189
+ elif isinstance(tm, datetime.date):
190
+ act = [str(tm), '00:00:00']
191
+ elif isinstance(tm, datetime.time):
192
+ act = [None, str(tm)]
193
+
194
+ if ct == None:
195
+ return act if act else None
196
+ else:
197
+ return [time.strftime("%Y-%m-%d", ct), time.strftime("%H:%M:%S", ct)]
198
+
199
+ #
200
+ # tm: optional time in seconds since the Epoch
201
+ # Return: current datetime strings
202
+ #
203
+ def get_datetime(tm = None):
204
+
205
+ if tm == None:
206
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
207
+ return time.strftime("%Y-%m-%d %H:%M:%S", ct)
208
+ elif isinstance(tm, str):
209
+ return tm
210
+ elif isinstance(tm, (int, float)):
211
+ ct = time.localtime(tm)
212
+ return time.strftime("%Y-%m-%d %H:%M:%S", ct)
213
+ elif isinstance(tm, datetime.datetime):
214
+ return str(tm)
215
+ elif isinstance(tm, datetime.date):
216
+ return (str(tm) + ' 00:00:00')
217
+
218
+ return tm
219
+
220
+
221
+ #
222
+ # file: file name, get curent timestamp if missed
223
+ # Return: timestsmp string in format of 'YYYYMMDDHHMMSS
224
+ #
225
+ def timestamp(file = None):
226
+
227
+ if file is None:
228
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
229
+ else:
230
+ mt = os.stat(file).st_mtime # file last modified time
231
+ ct = time.gmtime(mt) if PgLOG.PGLOG['GMTZ'] else time.localtime(mt)
232
+
233
+ return time.strftime("%Y%m%d%H%M%S", ct)
234
+
235
+ #
236
+ # dt: datetime string
237
+ # check date/time and set to default one if empty date
238
+ #
239
+ def check_datetime(date, default):
240
+
241
+ if not date: return default
242
+ if not isinstance(date, str): date = str(date)
243
+ if re.match(r'^0000', date): return default
244
+
245
+ return date
246
+
247
+ #
248
+ # fmt: date format, default to "YYYY-MM-DD"
249
+ # Return: new formated current date string
250
+ #
251
+ def curdate(fmt = None):
252
+
253
+ ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime()
254
+
255
+ return fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct)
256
+
257
+ #
258
+ # check given string to identify temporal pattern and their units
259
+ # defined in (keys DATEFMTS)
260
+ #
261
+ def temporal_pattern_units(string, seps):
262
+
263
+ mkeys = ['D', 'Q', 'M', 'C', 'Y', 'H', 'N', 'S']
264
+ units = {}
265
+ match = seps[0] + "([^" + seps[1] + "]+)" + seps[1]
266
+ patterns = re.findall(match, string)
267
+
268
+ for pattern in patterns:
269
+ # skip generic pattern and current time
270
+ if re.match(r'^(P\d*|C.+C)$', pattern, re.I): continue
271
+
272
+ for mkey in mkeys:
273
+ ms = re.findall(DATEFMTS[mkey], pattern, re.I)
274
+ if ms:
275
+ if mkey == 'Q':
276
+ units[mkey] = 3
277
+ elif mkey == 'C':
278
+ units[mkey] = 100
279
+ else:
280
+ units[mkey] = 1
281
+
282
+ for m in ms:
283
+ pattern = pattern.replace(m, '', 1)
284
+
285
+ return units
286
+
287
+ #
288
+ # format output for given date and hour
289
+ #
290
+ def format_datehour(date, hour, tofmt = None, fromfmt = None):
291
+
292
+ if date:
293
+ datehour = format_date(str(date), tofmt, fromfmt)
294
+ elif tofmt:
295
+ datehour = tofmt
296
+ else:
297
+ datehour = ''
298
+
299
+ if hour != None:
300
+ if tofmt:
301
+ fmts = re.findall(DATEFMTS['H'], datehour, re.I)
302
+ for fmt in fmts:
303
+ if len(fmt) > 1:
304
+ shr = "{:02}".format(int(hour))
305
+ else:
306
+ shr = str(hour)
307
+ datehour = re.sub(fmt, shr, datehour, 1)
308
+ else:
309
+ datehour += " {:02}".format(int(hour))
310
+
311
+ return datehour
312
+
313
+ #
314
+ # split a date, time or datetime into an array according to
315
+ # the sep value; str to int for digital values
316
+ #
317
+ def split_datetime(sdt, sep = r'\D'):
318
+
319
+ if not isinstance(sdt, str): sdt = str(sdt)
320
+ adt = re.split(sep, sdt)
321
+ acnt = len(adt)
322
+ for i in range(acnt):
323
+ if re.match(r'^\d+$', adt[i]): adt[i] = int(adt[i])
324
+ return adt
325
+
326
+ #
327
+ # date: given date in format of fromfmt
328
+ # tofmt: date formats; ex. "Month D, YYYY"
329
+ # fromfmt: date formats, default to YYYY-MM-DD
330
+ # Return: new formated date string according to tofmt
331
+ #
332
+ def format_date(cdate, tofmt = None, fromfmt = None):
333
+
334
+ if not cdate: return cdate
335
+ if not isinstance(cdate, str): cdate = str(cdate)
336
+ dates = [None, None, None]
337
+ sep = '|'
338
+ mns = sep.join(MNS)
339
+ months = sep.join(MONTHS)
340
+ mkeys = ['D', 'M', 'Q', 'Y', 'C', 'H']
341
+ PATTERNS = [r'(\d\d\d\d)', r'(\d+)', r'(\d\d)',
342
+ r'(\d\d\d)', '(' + mns + ')', '(' + months + ')']
343
+
344
+ if not fromfmt:
345
+ if not tofmt:
346
+ if re.match(r'^\d\d\d\d-\d\d-\d\d$', cdate): return cdate # no need formatting
347
+ ms = re.match(r'^\d+(\W)\d+(\W)\d+', cdate)
348
+ if ms:
349
+ fromfmt = "Y" + ms.group(1) + "M" + ms.group(2) + "D"
350
+ else:
351
+ PgLOG.pglog(cdate + ": Invalid date, should be in format YYYY-MM-DD", PgLOG.LGEREX)
352
+
353
+ pattern = fromfmt
354
+ fmts = {}
355
+ formats = {}
356
+ for mkey in mkeys:
357
+ ms = re.search(DATEFMTS[mkey], pattern, re.I)
358
+ if ms:
359
+ fmts[mkey] = ms.group(1)
360
+ pattern = re.sub(fmts[mkey], '', pattern)
361
+
362
+ cnt = 0
363
+ for mkey in fmts:
364
+ fmt = fmts[mkey]
365
+ i = len(fmt)
366
+ if mkey == 'D':
367
+ if i == 4: i = 1
368
+ elif mkey == 'M':
369
+ if i == 3: i = 4
370
+ elif mkey == 'Y':
371
+ if i == 4: i = 0
372
+ formats[fromfmt.find(fmt)] = fmt
373
+ fromfmt = fromfmt.replace(fmt, PATTERNS[i])
374
+ cnt += 1
375
+
376
+ ms = re.findall(fromfmt, cdate)
377
+ mcnt = len(ms[0]) if ms else 0
378
+ i = 0
379
+ for k in sorted(formats):
380
+ if i >= mcnt: break
381
+ fmt = formats[k]
382
+ val = ms[0][i]
383
+ if re.match(r'^Y', fmt, re.I):
384
+ dates[0] = int(val)
385
+ if len(fmt) == 3: dates[0] *= 10
386
+ elif re.match(r'^C', fmt, re.I):
387
+ dates[0] = 100 * int(val) # year at end of century
388
+ elif re.match(r'^M', fmt, re.I):
389
+ if re.match(r'^Mon', fmt, re.I):
390
+ dates[1] = get_month(val)
391
+ else:
392
+ dates[1] = int(val)
393
+ elif re.match(r'^Q', fmt, re.I):
394
+ dates[1] = 3 * int(val) # month at end of quarter
395
+ elif re.match(r'^H', fmt, re.I): # hour
396
+ dates.append(int(val))
397
+ else: # day
398
+ dates[2] = int(val)
399
+ i += 1
400
+
401
+ if len(dates) > 3:
402
+ cdate = fmtdatehour(dates[0], dates[1], dates[2], dates[3], tofmt)
403
+ else:
404
+ cdate = fmtdate(dates[0], dates[1], dates[2], tofmt)
405
+
406
+ return cdate
407
+
408
+ #
409
+ # yr: year value
410
+ # mn: month value, 1-12
411
+ # dy: day of the month
412
+ # hr: hour of the day
413
+ # nn: minute of the hour
414
+ # ss: second of the minute
415
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD HH:NN:SS"
416
+ # Return: new formated datehour string
417
+ #
418
+ def fmtdatetime(yr, mn, dy, hr = None, nn = None, ss = None, tofmt = None):
419
+
420
+ if not tofmt: tofmt = "YYYY-MM-DD HH:NN:SS"
421
+
422
+ tms = [ss, nn, hr, dy]
423
+ fks = ['S', 'N', 'H']
424
+ ups = [60, 60, 24]
425
+
426
+ # adjust second/minute/hour values out of range
427
+ for i in range(3):
428
+ if tms[i] != None and tms[i+1] != None:
429
+ if tms[i] < 0:
430
+ while tms[i] < 0:
431
+ tms[i] += ups[i]
432
+ tms[i+1] -= 1
433
+ elif tms[i] >= ups[i]:
434
+ while tms[i] >= ups[i]:
435
+ tms[i] -= ups[i]
436
+ tms[i+1] += 1
437
+
438
+ sdt = fmtdate(yr, mn, dy, tofmt)
439
+
440
+ # format second/minute/hour values
441
+ for i in range(3):
442
+ if tms[i] != None:
443
+ ms = re.search(DATEFMTS[fks[i]], sdt, re.I)
444
+ if ms:
445
+ fmt = ms.group(1)
446
+ if len(fmt) == 2:
447
+ str = "{:02}".format(tms[i])
448
+ else:
449
+ str = str(tms[i])
450
+ sdt = re.sub(fmt, str, sdt, 1)
451
+
452
+ return sdt
453
+
454
+ #
455
+ # yr: year value
456
+ # mn: month value, 1-12
457
+ # dy: day of the month
458
+ # hr: hour of the day
459
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD:HH"
460
+ # Return: new formated datehour string
461
+ #
462
+ def fmtdatehour(yr, mn, dy, hr, tofmt = None):
463
+
464
+ if not tofmt: tofmt = "YYYY-MM-DD:HH"
465
+
466
+ if hr != None and dy != None: # adjust hour value out of range
467
+ if hr < 0:
468
+ while hr < 0:
469
+ hr += 24
470
+ dy -= 1
471
+ elif hr > 23:
472
+ while hr > 23:
473
+ hr -= 24
474
+ dy += 1
475
+
476
+ datehour = fmtdate(yr, mn, dy, tofmt)
477
+
478
+ if hr != None:
479
+ ms = re.search(DATEFMTS['H'], datehour, re.I)
480
+ if ms:
481
+ fmt = ms.group(1)
482
+ if len(fmt) == 2:
483
+ shr = "{:02}".format(hr)
484
+ else:
485
+ shr = str(hr)
486
+ datehour = re.sub(fmt, shr, datehour, 1)
487
+
488
+ return datehour
489
+
490
+ #
491
+ # yr: year value
492
+ # mn: month value, 1-12
493
+ # dy: day of the month
494
+ # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD"
495
+ # Return: new formated date string
496
+ #
497
+ def fmtdate(yr, mn, dy, tofmt = None):
498
+
499
+ (y, m, d) = adjust_ymd(yr, mn, dy)
500
+ if not tofmt or tofmt == 'YYYY-MM-DD': return "{}-{:02}-{:02}".format(y, m, d)
501
+
502
+ if dy != None:
503
+ md = re.search(DATEFMTS['D'], tofmt, re.I)
504
+ if md:
505
+ fmt = md.group(1) # day
506
+ slen = len(fmt)
507
+ if slen > 2: # days of the year
508
+ for i in range(1, m): d += MDAYS[i]
509
+ sdy = "{:03}".format(d)
510
+ elif slen == 2:
511
+ sdy = "{:02}".format(d)
512
+ else:
513
+ sdy = str(d)
514
+ tofmt = re.sub(fmt, sdy, tofmt, 1)
515
+
516
+ if mn != None:
517
+ md = re.search(DATEFMTS['M'], tofmt, re.I)
518
+ if md:
519
+ fmt = md.group(1) # month
520
+ slen = len(fmt)
521
+ if slen == 2:
522
+ smn = "{:02}".format(m)
523
+ elif re.match(r'^mon', fmt, re.I):
524
+ smn = MNS[m-1] if slen == 3 else MONTHS[m-1]
525
+ if re.match(r'^Mo', fmt):
526
+ smn = smn.capitalize()
527
+ elif re.match(r'^MO', fmt):
528
+ smn = smn.upper()
529
+ else:
530
+ smn = str(m)
531
+ tofmt = re.sub(fmt, smn, tofmt, 1)
532
+ else:
533
+ md = re.search(DATEFMTS['Q'], tofmt, re.I)
534
+ if md:
535
+ fmt = md.group(1) # quarter
536
+ m = int((m+2)/3)
537
+ smn = "{:02}".format(m) if len(fmt) == 2 else str(m)
538
+ tofmt = re.sub(fmt, smn, tofmt, 1)
539
+
540
+ if yr != None:
541
+ md = re.search(DATEFMTS['Y'], tofmt, re.I)
542
+ if md:
543
+ fmt = md.group(1) # year
544
+ slen = len(fmt)
545
+ if slen == 2:
546
+ syr = "{:02}".format(y%100)
547
+ elif slen == 3: # decade
548
+ if y > 999: y = int(y/10)
549
+ syr = "{:03}".format(y)
550
+ else:
551
+ if re.search(r'^YY00', fmt, re.I): y = 100*int(y/100) # hundred years
552
+ syr = "{:04}".format(y)
553
+ tofmt = re.sub(fmt, syr, tofmt, 1)
554
+ else:
555
+ md = re.search(DATEFMTS['C'], tofmt, re.I)
556
+ if md:
557
+ fmt = md.group(1) # century
558
+ slen = len(fmt)
559
+ if y > 999:
560
+ y = 1 + int(y/100)
561
+ elif y > 99:
562
+ y = 1 + int(yr/10)
563
+ syr = "{:02}".format(y)
564
+ tofmt = re.sub(fmt, syr, tofmt, 1)
565
+
566
+ return tofmt
567
+
568
+ #
569
+ # format given date and time into standard timestamp
570
+ #
571
+ def join_datetime(sdate, stime):
572
+
573
+ if not sdate: return None
574
+ if not stime: stime = "00:00:00"
575
+ if not isinstance(sdate, str): sdate = str(sdate)
576
+ if not isinstance(stime, str): stime = str(stime)
577
+ if re.match(r'^\d:', stime): stime = '0' + stime
578
+
579
+ return "{} {}".format(sdate, stime)
580
+
581
+ fmttime = join_datetime
582
+
583
+ #
584
+ # split a date or datetime into an array of [date, time]
585
+ #
586
+ def date_and_time(sdt):
587
+
588
+ if not sdt: return [None, None]
589
+ if not isinstance(sdt, str): sdt = str(sdt)
590
+ adt = re.split(' ', sdt)
591
+ acnt = len(adt)
592
+ if acnt == 1: adt.append('00:00:00')
593
+ return adt
594
+
595
+ #
596
+ # convert given date/time to unix epoch time; -1 if cannot
597
+ #
598
+ def unixtime(stime):
599
+
600
+ pt = [0]*9
601
+ if not isinstance(stime, str): stime = str(stime)
602
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', stime)
603
+ if ms:
604
+ for i in range(3):
605
+ pt[i] = int(ms.group(i+1))
606
+ ms = re.search(r'^(\d+):(\d+):(\d+)$', stime)
607
+ if ms:
608
+ for i in range(3):
609
+ pt[i+3] = int(ms.group(i+1))
610
+
611
+ pt[8] = -1
612
+ return time.mktime(time.struct_time(pt))
613
+
614
+ #
615
+ # sdate: start date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD'
616
+ # edate: end date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD'
617
+ # Return: list of start and end dates in format of YYYY-MM-DD
618
+ #
619
+ def daterange(sdate, edate):
620
+
621
+ if sdate:
622
+ if not isinstance(sdate, str): sdate = str(sdate)
623
+ if not re.search(r'\d+-\d+-\d+', sdate):
624
+ ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', sdate)
625
+ if ms:
626
+ sdate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4))
627
+ else:
628
+ ms = re.match(r'^(\W*)(\d+)(\W*)$', sdate)
629
+ if ms:
630
+ sdate = "{}{}-01-01{}".format(ms.group(1), ms.group(2), ms.group(3))
631
+
632
+ if edate:
633
+ if not isinstance(edate, str): edate = str(edate)
634
+ if not re.search(r'\d+-\d+-\d+', edate):
635
+ ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', edate)
636
+ if ms:
637
+ edate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4))
638
+ edate = adddate(edate, 0, 1, -1)
639
+ else:
640
+ ms = re.match(r'^(\W*)(\d+)(\W*)$', edate)
641
+ if ms:
642
+ edate = "{}{}-12-31{}".format(ms.group(1), ms.group(2), ms.group(3))
643
+
644
+ return [sdate, edate]
645
+
646
+ #
647
+ # date to datetime range
648
+ #
649
+ def dtrange(dates):
650
+
651
+ date = dates[0]
652
+ if date:
653
+ if not isinstance(date, str): date = str(date)
654
+ dates[0] = date + ' 00:00:00'
655
+ date = dates[1]
656
+ if date:
657
+ if not isinstance(date, str): date = str(date)
658
+ dates[1] = date + ' 23:59:59'
659
+
660
+ return dates
661
+
662
+ #
663
+ # sdate: starting date in format of 'YYYY-MM-DD'
664
+ # edate: ending date
665
+ # fmt: period format, ex. "YYYYMon-YYYMon", default to "YYYYMM-YYYYMM"
666
+ # Return: a string of formated period
667
+ #
668
+ def format_period(sdate, edate, fmt = None):
669
+
670
+ period = ''
671
+
672
+ if not fmt:
673
+ sfmt = efmt = "YYYYMM"
674
+ sep = '-'
675
+ else:
676
+ ms = re.match(r'^(.*)(\s*-\s*)(.*)$', fmt)
677
+ if ms:
678
+ (sfmt, sep, efmt) = ms.groups()
679
+ else:
680
+ sfmt = fmt
681
+ efmt = None
682
+ sep = ''
683
+
684
+ if sdate:
685
+ if not isinstance(sdate, str): sdate = str(sdate)
686
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', sdate)
687
+ if ms:
688
+ (yr, mn, dy) = ms.groups()
689
+ period = fmtdate(int(yr), int(mn), int(dy), sfmt)
690
+
691
+ if sep: period += sep
692
+
693
+ if efmt:
694
+ if re.search(r'current', efmt, re.I):
695
+ period += efmt
696
+ elif edate:
697
+ if not isinstance(edate, str): edate = str(edate)
698
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', edate)
699
+ if ms:
700
+ (yr, mn, dy) = ms.groups()
701
+ period += fmtdate(int(yr), int(mn), int(dy), efmt)
702
+
703
+ return period
704
+
705
+ #
706
+ # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
707
+ # newid: True to format a new dsid; defaults to False for now
708
+ # returns a new or old dsid according to the newid option
709
+ #
710
+ def format_dataset_id(dsid, newid = PgLOG.PGLOG['NEWDSID'], logact = PgLOG.LGEREX):
711
+
712
+ dsid = str(dsid)
713
+ ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid)
714
+ if ms:
715
+ ids = list(ms.groups())
716
+ if ids[0] not in PgLOG.PGLOG['DSIDCHRS']:
717
+ if logact: PgLOG.pglog("{}: dsid leading character must be '{}'".format(dsid, PgLOG.PGLOG['DSIDCHRS']), logact)
718
+ return dsid
719
+ if newid: return dsid
720
+ if ids[2][:2] != '00':
721
+ if logact: PgLOG.pglog(dsid + ": Cannot convert new dsid to old format", logact)
722
+ return dsid
723
+ return 'ds{}.{}'.format(ids[1], ids[2][2])
724
+
725
+ ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid, re.I)
726
+ if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid)
727
+ if ms:
728
+ if newid:
729
+ return "d{}00{}".format(ms.group(1), ms.group(3))
730
+ else:
731
+ return 'ds{}.{}'.format(ms.group(1), ms.group(3))
732
+
733
+ if logact: PgLOG.pglog(dsid + ": invalid dataset id", logact)
734
+ return dsid
735
+
736
+ #
737
+ # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
738
+ # newid: True to format a new dsid; defaults to False for now
739
+ # returns a new or old metadata dsid according to the newid option
740
+ #
741
+ def metadata_dataset_id(dsid, newid = PgLOG.PGLOG['NEWDSID'], logact = PgLOG.LGEREX):
742
+
743
+ ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid)
744
+ if ms:
745
+ ids = list(ms.groups())
746
+ if ids[0] not in PgLOG.PGLOG['DSIDCHRS']:
747
+ if logact: PgLOG.pglog("{}: dsid leading character must be '{}'".format(dsid, PgLOG.PGLOG['DSIDCHRS']), logact)
748
+ return dsid
749
+ if newid: return dsid
750
+ if ids[2][:2] != '00':
751
+ if logact: PgLOG.pglog(dsid + ": Cannot convert new dsid to old format", logact)
752
+ return dsid
753
+ return '{}.{}'.format(ids[1], ids[2][2])
754
+
755
+ ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid)
756
+ if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid)
757
+ if ms:
758
+ if newid:
759
+ return "d{}00{}".format(ms.group(1), ms.group(3))
760
+ else:
761
+ return '{}.{}'.format(ms.group(1), ms.group(3))
762
+
763
+ if logact: PgLOG.pglog(dsid + ": invalid dataset id", logact)
764
+ return dsid
765
+
766
+
767
+ #
768
+ # idstr: string holding a dsid in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN
769
+ # and find it according to the flag value O (Old), N (New) or B (Both) formats
770
+ # returns dsid if found in given id string; None otherwise
771
+ #
772
+ def find_dataset_id(idstr, flag = 'B', logact = 0):
773
+
774
+ if flag in 'NB':
775
+ ms = re.search(r'(^|\W)(([a-z])\d{6})($|\D)', idstr)
776
+ if ms and ms.group(3) in PgLOG.PGLOG['DSIDCHRS']: return ms.group(2)
777
+ if flag in 'OB':
778
+ ms = re.search(r'(^|\W)(ds\d\d\d(\.|)\d)($|\D)', idstr)
779
+ if not ms: ms = re.search(r'(^|\W)(\d\d\d\.\d)($|\D)', idstr)
780
+ if ms: return ms.group(2)
781
+
782
+ if logact: PgLOG.pglog("{} : No valid dsid found for flag {}".format(idstr, flag), logact)
783
+ return None
784
+
785
+ #
786
+ # find and convert all found dsids according to old/new dsids
787
+ # for newid = False/True
788
+ #
789
+ def convert_dataset_ids(idstr, newid = PgLOG.PGLOG['NEWDSID'], logact = 0):
790
+
791
+ flag = 'O' if newid else 'N'
792
+ cnt = 0
793
+ if idstr:
794
+ while True:
795
+ dsid = find_dataset_id(idstr, flag = flag)
796
+ if not dsid: break
797
+ ndsid = format_dataset_id(dsid, newid = newid, logact = logact)
798
+ if ndsid != dsid: idstr = idstr.replace(dsid, ndsid)
799
+ cnt += 1
800
+
801
+ return (idstr, cnt)
802
+
803
+ #
804
+ # records: dict of mutiple records,
805
+ # idx: index of the records to return
806
+ # Return: a dict to the idx record out of records
807
+ #
808
+ def onerecord(records, idx):
809
+
810
+ record = {}
811
+
812
+ for fld in records:
813
+ record[fld] = records[fld][idx]
814
+
815
+ return record
816
+
817
+ #
818
+ # records: dict of mutiple records,
819
+ # record: record to add
820
+ # idx: index of the record to add
821
+ # Return: add a record to a dict of lists
822
+ #
823
+ def addrecord(records, record, idx):
824
+
825
+ if records is None: records = {} # initialize dist of lists structure
826
+ if not records:
827
+ for key in record:
828
+ records[key] = []
829
+
830
+ for key in record:
831
+ slen = len(records[key])
832
+ if idx < slen:
833
+ records[key][idx] = record[key]
834
+ else:
835
+ while idx > slen:
836
+ records[key].append(None)
837
+ slen += 1
838
+ records[key].append(record[key])
839
+
840
+ return records
841
+
842
+ #
843
+ # convert a hash with multiple rows from pgmget() to an array of hashes
844
+ #
845
+ def hash2array(hrecs, hkeys = None):
846
+
847
+ if not hkeys: hkeys = list(hrecs)
848
+ acnt = len(hrecs[hkeys[0]]) if hrecs and hkeys[0] in hrecs else 0
849
+ arecs = [None]*acnt
850
+ for i in range(acnt):
851
+ arec = {}
852
+ for hkey in hkeys: arec[hkey] = hrecs[hkey][i]
853
+ arecs[i] = arec
854
+
855
+ return arecs
856
+
857
+ #
858
+ # convert an array of hashes to a hash with multiple rows for pgmget()
859
+ #
860
+ def array2hash(arecs, hkeys = None):
861
+
862
+ hrecs = {}
863
+ acnt = len(arecs) if arecs else 0
864
+ if acnt > 0:
865
+ if not hkeys: hkeys = list(arecs[0])
866
+ for hkey in hkeys:
867
+ hrecs[hkey] = [None]*acnt
868
+ for i in range(acnt): hrecs[hkey][i] = arecs[i][hkey]
869
+
870
+ return hrecs
871
+
872
+ #
873
+ # records: dict of mutiple records,
874
+ # opt: 0 - column count,
875
+ # 1 - row count,
876
+ # 2 - both
877
+ # Return: a single number or list of two dependend on given opt
878
+ #
879
+ def hashcount(records, opt = 0):
880
+
881
+ ret = [0, 0]
882
+
883
+ if records:
884
+ clen = len(records)
885
+ if opt == 0 or opt == 2:
886
+ ret[0] = clen
887
+ if opt == 1 or opt == 2:
888
+ ret[1] = len(next(iter(records.values())))
889
+
890
+ return ret if opt == 2 else ret[opt]
891
+
892
+ #
893
+ # adict: dict a
894
+ # bdict: dict b
895
+ # default: default values if missed
896
+ # unique: unique join if set
897
+ # Return: the joined dict records with default value for missing ones
898
+ # For unique join, a record in bdict must not be contained in adict already
899
+ #
900
+ def joinhash(adict, bdict, default = None, unique = None):
901
+
902
+ if not bdict: return adict
903
+ if not adict: return bdict
904
+
905
+ akeys = list(adict.keys())
906
+ bkeys = list(bdict.keys())
907
+ acnt = len(adict[akeys[0]])
908
+ bcnt = len(bdict[bkeys[0]])
909
+ ckeys = [] # common keys for unique joins
910
+
911
+ # check and assign default value for missing keys in adict
912
+ for bkey in bkeys:
913
+ if bkey in akeys:
914
+ if unique and bkey not in ckeys: ckeys.append(bkey)
915
+ else:
916
+ adict[bkey] = [default]*acnt
917
+
918
+ # check and assign default value for missing keys in bdict
919
+ for akey in akeys:
920
+ if akey in bkeys:
921
+ if unique and akey not in ckeys: ckeys.append(akey)
922
+ else:
923
+ bdict[akey] = [default]*bcnt
924
+
925
+ if unique: # append bdict
926
+ kcnt = len(ckeys)
927
+ for i in range(bcnt):
928
+ j = 0
929
+ while(j < acnt):
930
+ k = 0
931
+ for ckey in ckeys:
932
+ if pgcmp(adict[ckey][j], bdict[ckey][i]): break
933
+ k += 1
934
+ if k >= kcnt: break
935
+ j += 1
936
+
937
+ if j >= acnt:
938
+ for key in adict:
939
+ adict[key].append(bdict[key][i])
940
+ else:
941
+ for key in adict:
942
+ adict[key].extend(bdict[key])
943
+
944
+ return adict
945
+
946
+ #
947
+ # lst1: list 1
948
+ # lst2: list 2
949
+ # unique: unique join if set
950
+ # Return: the joined list
951
+ #
952
+ def joinarray(lst1, lst2, unique = None):
953
+
954
+ if not lst2: return lst1
955
+ if not lst1: return lst2
956
+
957
+ cnt1 = len(lst1)
958
+ cnt2 = len(lst2)
959
+
960
+ if unique:
961
+ for i in (cnt2):
962
+ for j in (cnt1):
963
+ if pgcmp(lst1[j], lst2[i]) != 0: break
964
+ if j >= cnt1:
965
+ lst1.append(lst2[i])
966
+ else:
967
+ lst1.extend(lst2)
968
+
969
+ return lst1
970
+
971
+ #
972
+ # Function: crosshash(ahash, bhash)
973
+ # Return: a reference to the cross-joined hash records
974
+ #
975
+ def crosshash(ahash, bhash):
976
+
977
+ if not bhash: return ahash
978
+ if not ahash: return bhash
979
+
980
+ akeys = list(ahash.keys())
981
+ bkeys = list(bhash.keys())
982
+ acnt = len(ahash[akeys[0]])
983
+ bcnt = len(bhash[bkeys[0]])
984
+ rets = {}
985
+ for key in akeys: rets[key] = []
986
+ for key in bkeys: rets[key] = []
987
+ for i in range(acnt):
988
+ for j in range(bcnt):
989
+ for key in akeys: rets[key].append(ahash[key][i])
990
+ for key in bkeys: rets[key].append(bhash[key][j])
991
+
992
+ return rets
993
+
994
+ #
995
+ # strip database and table names for a field name
996
+ #
997
+ def strip_field(field):
998
+ ms = re.search(r'\.([^\.]+)$', field)
999
+ if ms: field = ms.group(1)
1000
+
1001
+ return field
1002
+
1003
+ #
1004
+ # pgrecs: dict obterned from pgmget()
1005
+ # flds: list of single letter fields to be sorted on
1006
+ # hash: table dict for pre-defined fields
1007
+ # patterns: optional list of temporal patterns for order fields
1008
+ # Return: a sorted dict list
1009
+ #
1010
+ def sorthash(pgrecs, flds, hash, patterns = None):
1011
+
1012
+ fcnt = len(flds) # count of fields to be sorted on
1013
+
1014
+ # set sorting order, descenting (-1) or ascenting (1)
1015
+ # get the full field names to be sorted on
1016
+ desc = [1]*fcnt
1017
+ fields = []
1018
+ nums = [1]*fcnt # initialize each column as numerical
1019
+ for i in range(fcnt):
1020
+ if flds[i].islower(): desc[i] = -1
1021
+ fld = strip_field(hash[flds[i].upper()][1])
1022
+ fields.append(fld)
1023
+
1024
+ count = len(pgrecs[fields[0]]) # row count of pgrecs
1025
+
1026
+ if count < 2: return pgrecs # no need of sording
1027
+ pcnt = len(patterns) if patterns else 0
1028
+
1029
+ # prepare the dict list for sortting
1030
+ srecs = []
1031
+ for i in range(count):
1032
+ pgrec = onerecord(pgrecs, i)
1033
+ rec = []
1034
+ for j in range(fcnt):
1035
+ if j < pcnt and patterns[j]:
1036
+ # get the temporal part of each value matching the pattern
1037
+ val = format_date(pgrec[fields[j]], "YYYYMMDDHH", patterns[j])
1038
+ else:
1039
+ # sort on the whole value if no pattern given
1040
+ val = pgrec[fields[j]]
1041
+
1042
+ if nums[j]: nums[j] = pgnum(val)
1043
+ rec.append(val)
1044
+ rec.append(i) # add column to cache the row index
1045
+ srecs.append(rec)
1046
+
1047
+ srecs = quicksort(srecs, 0, count-1, desc, fcnt, nums)
1048
+
1049
+ # sort pgrecs according the cached row index column in ordered srecs
1050
+ rets = {}
1051
+ for fld in pgrecs:
1052
+ rets[fld] = []
1053
+
1054
+ for i in range(count):
1055
+ pgrec = onerecord(pgrecs, srecs[i][fcnt])
1056
+ for fld in pgrecs:
1057
+ rets[fld].append(pgrec[fld])
1058
+
1059
+ return rets
1060
+
1061
+ #
1062
+ # Return: the number of days bewteen date1 and date2
1063
+ #
1064
+ def diffdate(date1, date2):
1065
+
1066
+ ut1 = ut2 = 0
1067
+ if date1: ut1 = unixtime(date1)
1068
+ if date2: ut2 = unixtime(date2)
1069
+ return round((ut1 - ut2)/86400) # 24*60*60
1070
+
1071
+ #
1072
+ # Return: the number of seconds bewteen time1 and time2
1073
+ #
1074
+ def difftime(time1, time2):
1075
+
1076
+ ut1 = ut2 = 0
1077
+ if time1: ut1 = unixtime(time1)
1078
+ if time2: ut2 = unixtime(time2)
1079
+ return round(ut1 - ut2)
1080
+
1081
+ diffdatetime = difftime
1082
+
1083
+ #
1084
+ # Return: the number of days between date and '1970-01-01 00:00:00'
1085
+ #
1086
+ def get_days(cdate):
1087
+
1088
+ return diffdate(str(cdate), '1970-01-01')
1089
+
1090
+ #
1091
+ # Function: get_month_days(date)
1092
+ #
1093
+ # Return: the number of days in given month
1094
+ #
1095
+ def get_month_days(cdate):
1096
+
1097
+ ms = re.match(r'^(\d+)-(\d+)', str(cdate))
1098
+ if ms:
1099
+ yr = int(ms.group(1))
1100
+ mn = int(ms.group(2))
1101
+ return calendar.monthrange(yr, mn)[1]
1102
+ else:
1103
+ return 0
1104
+
1105
+ #
1106
+ # Function: validate_date(date)
1107
+ #
1108
+ # Return: a date in format of YYYY-MM-DD thar all year/month/day are validated
1109
+ #
1110
+ def validate_date(cdate):
1111
+
1112
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', str(cdate))
1113
+ if ms:
1114
+ (yr, mn, dy) = (int(m) for m in ms.groups())
1115
+ if yr < 1000:
1116
+ yr += 2000
1117
+ elif yr > 9999:
1118
+ yr %= 10000
1119
+ if mn < 1:
1120
+ mn = 1
1121
+ elif mn > 12:
1122
+ mn = 12
1123
+ md = calendar.monthrange(yr, mn)[1]
1124
+ if dy < 1:
1125
+ dy = 1
1126
+ elif dy > md:
1127
+ dy = md
1128
+ cdate = '{}-{:02d}-{:02d}'.format(yr, mn, dy)
1129
+
1130
+ return cdate
1131
+
1132
+ #
1133
+ # Function: get_date(days)
1134
+ #
1135
+ # Return: the date in format of "YYYY-MM-DD" for given number of days
1136
+ # from '1970-01-01 00:00:00'
1137
+ #
1138
+ def get_date(days):
1139
+
1140
+ return adddate('1970-01-01', 0, 0, int(days))
1141
+
1142
+ #
1143
+ # compare date/hour and return the different hours
1144
+ #
1145
+ def diffdatehour(date1, hour1, date2, hour2):
1146
+
1147
+ if hour1 is None: hour1 = 23
1148
+ if hour2 is None: hour2 = 23
1149
+ return (hour1 - hour2) + 24*diffdate(date1, date2)
1150
+
1151
+ #
1152
+ # hour difference between GMT and local time
1153
+ #
1154
+ def diffgmthour():
1155
+
1156
+ tg = time.gmtime()
1157
+ tl = time.localtime()
1158
+ dg = fmtdate(tg[0], tg[1], tg[2])
1159
+ dl = fmtdate(tl[0], tl[1], tl[2])
1160
+ hg = tg[3]
1161
+ hl = tl[3]
1162
+
1163
+ return diffdatehour(dg, hg, dl, hl)
1164
+
1165
+ #
1166
+ # compare date and time (if given) and return 1, 0 and -1
1167
+ #
1168
+ def cmptime(date1, time1, date2, time2):
1169
+
1170
+ stime1 = join_datetime(date1, time1)
1171
+ stime2 = join_datetime(date2, time2)
1172
+
1173
+ return pgcmp(stime1, stime2)
1174
+
1175
+ #
1176
+ # date: the original date in format of 'YYYY-MM-DD',
1177
+ # mf: the number of month fractions to add
1178
+ # nf: number of fractions of a month
1179
+ # Return: new date
1180
+ #
1181
+ def addmonth(cdate, mf, nf = 1):
1182
+
1183
+ if not mf: return cdate
1184
+ if not nf or nf < 2: return adddate(cdate, 0, mf, 0)
1185
+
1186
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)$', cdate)
1187
+ if ms:
1188
+ (syr, smn, sdy) = ms.groups()
1189
+ yr = int(syr)
1190
+ mn = int(smn)
1191
+ ody = int(sdy)
1192
+ dy = 0 # set to end of previous month
1193
+ ndy = int(30/nf) # number of days in each fraction
1194
+ while ody > ndy:
1195
+ dy += ndy
1196
+ ody -= ndy
1197
+
1198
+ dy += mf * ndy
1199
+ if mf > 0:
1200
+ while dy >= 30:
1201
+ dy -= 30
1202
+ mn += 1
1203
+ else:
1204
+ while dy < 0:
1205
+ dy += 30
1206
+ mn -= 1
1207
+
1208
+ dy += ody
1209
+ cdate = fmtdate(yr, mn, dy)
1210
+
1211
+ return cdate
1212
+
1213
+ # add yr years & mn months to yearmonth ym in format YYYYMM
1214
+ def addyearmonth(ym, yr, mn):
1215
+
1216
+ if yr == None: yr = 0
1217
+ if mn == None: mn = 0
1218
+
1219
+ ms =re.match(r'^(\d\d\d\d)(\d\d)$', ym)
1220
+ if ms:
1221
+ (syr, smn) = ms.groups()
1222
+ yr = int(syr)
1223
+ mn = int(smn)
1224
+ if mn < 0:
1225
+ while mn < 0:
1226
+ yr -= 1
1227
+ mn += 12
1228
+ else:
1229
+ while mn > 12:
1230
+ yr += 1
1231
+ mn -= 12
1232
+
1233
+ ym = "{:04}{:02}".format(yr, mn)
1234
+
1235
+ return ym
1236
+
1237
+ #
1238
+ # a wrapper to adddate()
1239
+ #
1240
+ def addNoLeapDate(cdate, yr, mn, dy): return adddate(cdate, yr, mn, dy)
1241
+
1242
+ #
1243
+ # set number of days in Beburary for Leap year according PgLOG.PGLOG['NOLEAP']
1244
+ #
1245
+ def set_leap_mdays(year):
1246
+
1247
+ if not PgLOG.PGLOG['NOLEAP'] and calendar.isleap(year):
1248
+ MDAYS[0] = 366
1249
+ MDAYS[2] = 29
1250
+ ret = 1
1251
+ else:
1252
+ MDAYS[0] = 365
1253
+ MDAYS[2] = 28
1254
+ ret = 0
1255
+ return ret
1256
+
1257
+ #
1258
+ # wrap on calendar.isleap()
1259
+ #
1260
+ def is_leapyear(year): return calendar.isleap(year)
1261
+
1262
+ #
1263
+ # reutn 1 if is end of month
1264
+ #
1265
+ def is_end_month(yr, mn, dy):
1266
+
1267
+ set_leap_mdays(yr)
1268
+ return 1 if dy == MDAYS[mn] else 0
1269
+
1270
+ #
1271
+ # adust the year, month and day values that are out of ranges
1272
+ #
1273
+ def adjust_ymd(yr, mn, dy):
1274
+
1275
+ if yr is None: yr = 1970
1276
+ if mn is None: mn = 1
1277
+ if dy is None: dy = 1
1278
+
1279
+ while True:
1280
+ if mn > 12:
1281
+ yr += 1
1282
+ mn -= 12
1283
+ continue
1284
+ elif mn < 1:
1285
+ yr -= 1
1286
+ mn += 12
1287
+ continue
1288
+
1289
+ set_leap_mdays(yr)
1290
+
1291
+ if dy < 1:
1292
+ if(dy < -MDAYS[0]):
1293
+ yr -= 1
1294
+ dy += MDAYS[0]
1295
+ else:
1296
+ mn -= 1
1297
+ if mn < 1:
1298
+ yr -= 1
1299
+ mn += 12
1300
+ dy += MDAYS[mn]
1301
+ continue
1302
+ elif dy > MDAYS[mn]:
1303
+ if(dy > MDAYS[0]):
1304
+ dy -= MDAYS[0]
1305
+ yr += 1
1306
+ else:
1307
+ dy -= MDAYS[mn]
1308
+ mn += 1
1309
+ continue
1310
+
1311
+ break
1312
+
1313
+ return [yr, mn, dy]
1314
+
1315
+ #
1316
+ # date: the original date in format of 'YYYY-MM-DD',
1317
+ # yr: the number of years to add/subtract from the odate for positive/negative value,
1318
+ # mn: the number of months to add/subtract from the odate for positive/negative value,
1319
+ # dy: the number of days to add/subtract from the odate for positive/negative value)
1320
+ #
1321
+ # Return: new date
1322
+ #
1323
+ def adddate(cdate, yr, mn = 0, dy = 0, tofmt = None):
1324
+
1325
+ if not cdate: return cdate
1326
+ if not isinstance(cdate, str): cdate = str(cdate)
1327
+ if yr is None:
1328
+ yr = 0
1329
+ elif isinstance(yr, str):
1330
+ yr = int(yr)
1331
+ if mn is None:
1332
+ mn = 0
1333
+ elif isinstance(mn, str):
1334
+ mn = int(mn)
1335
+ if dy is None:
1336
+ dy = 0
1337
+ elif isinstance(dy, str):
1338
+ dy = int(dy)
1339
+
1340
+ ms = re.search(r'(\d+)-(\d+)-(\d+)', cdate)
1341
+ if not ms: return cdate # non-standard date format
1342
+ (nyr, nmn, ndy) = (int(m) for m in ms.groups())
1343
+ mend = 0
1344
+ if mn and ndy > 27: mend = is_end_month(nyr, nmn, ndy)
1345
+ if yr: nyr += yr
1346
+ if mn:
1347
+ (nyr, nmn, tdy) = adjust_ymd(nyr, nmn+mn+1, 0)
1348
+ if mend: ndy = tdy
1349
+ if dy: ndy += dy
1350
+
1351
+ return fmtdate(nyr, nmn, ndy, tofmt)
1352
+
1353
+ #
1354
+ # add given hours to the initial date and time
1355
+ #
1356
+ def addhour(sdate, stime, nhour):
1357
+
1358
+ if nhour and isinstance(nhour, str): nhour = int(nhour)
1359
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1360
+ if stime and not isinstance(stime, str): stime = str(stime)
1361
+ if not nhour: return [sdate, stime]
1362
+
1363
+ hr = dy = 0
1364
+ ms = re.match(r'^(\d+)', stime)
1365
+ if ms:
1366
+ shr = ms.group(1)
1367
+ hr = int(shr) + nhour
1368
+ if hr < 0:
1369
+ while hr < 0:
1370
+ dy -= 1
1371
+ hr += 24
1372
+ else:
1373
+ while hr > 23:
1374
+ dy += 1
1375
+ hr -= 24
1376
+
1377
+ shour = "{:02}".format(hr)
1378
+ if shr != shour: stime = re.sub(shr, shour, stime, 1)
1379
+ if dy: sdate = adddate(sdate, 0, 0, dy)
1380
+
1381
+ return [sdate, stime]
1382
+
1383
+ #
1384
+ # add given years, months, days and hours to the initial date and hour
1385
+ #
1386
+ def adddatehour(sdate, nhour, yr, mn, dy, hr = 0):
1387
+
1388
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1389
+ if hr:
1390
+ if nhour != None:
1391
+ if isinstance(nhour, str): nhour = int(nhour)
1392
+ hr += nhour
1393
+ if hr < 0:
1394
+ while hr < 0:
1395
+ dy -= 1
1396
+ hr += 24
1397
+ else:
1398
+ while hr > 23:
1399
+ dy += 1
1400
+ hr -= 24
1401
+ if nhour != None: nhour = hr
1402
+
1403
+ if yr or mn or dy: sdate = adddate(sdate, yr, mn, dy)
1404
+
1405
+ return [sdate, nhour]
1406
+
1407
+ #
1408
+ # add given yyyy, mm, dd, hh, nn, ss to sdatetime
1409
+ # if nf, add fraction of month only
1410
+ #
1411
+ def adddatetime(sdatetime, yy, mm, dd, hh, nn, ss, nf = 0):
1412
+
1413
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1414
+ (sdate, stime) = re.split(' ', sdatetime)
1415
+
1416
+ if hh or nn or ss: (sdate, stime) = addtime(sdate, stime, hh, nn, ss)
1417
+ if nf:
1418
+ sdate = addmonth(sdate, mm, nf)
1419
+ mm = 0
1420
+ if yy or mm or dd: sdate = adddate(sdate, yy, mm, dd)
1421
+
1422
+ return "{} {}".format(sdate, stime)
1423
+
1424
+ #
1425
+ # add given yyyy, mm, dd, hh, nn, ss to sdatetime
1426
+ # if nf, add fraction of month only
1427
+ #
1428
+ def adddatetime(sdatetime, yy, mm, dd, hh, nn, ss, nf = 0):
1429
+
1430
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1431
+ (sdate, stime) = re.split(' ', sdatetime)
1432
+
1433
+ if hh or nn or ss: (sdate, stime) = addtime(sdate, stime, hh, nn, ss)
1434
+ if nf:
1435
+ sdate = addmonth(sdate, mm, nf)
1436
+ mm = 0
1437
+ if yy or mm or dd: sdate = adddate(sdate, yy, mm, dd)
1438
+
1439
+ return "{} {}".format(sdate, stime)
1440
+
1441
+ #
1442
+ # add given hours, minutes and seconds to the initial date and time
1443
+ #
1444
+ def addtime(sdate, stime, h, m, s):
1445
+
1446
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1447
+ if stime and not isinstance(stime, str): sdate = str(stime)
1448
+ ups = (60, 60, 24)
1449
+ tms = [0, 0, 0, 0] # (sec, min, hour, day)
1450
+
1451
+ if s: tms[0] += s
1452
+ if m: tms[1] += m
1453
+ if h: tms[2] += h
1454
+ if stime:
1455
+ ms = re.match(r'^(\d+):(\d+):(\d+)$', stime)
1456
+ if ms:
1457
+ tms[2] += int(ms.group(1))
1458
+ tms[1] += int(ms.group(2))
1459
+ tms[0] += int(ms.group(3))
1460
+
1461
+ for i in range(3):
1462
+ if tms[i] < 0:
1463
+ while tms[i] < 0:
1464
+ tms[i] += ups[i]
1465
+ tms[i+1] -= 1
1466
+ elif tms[i] >= ups[i]:
1467
+ while tms[i] >= ups[i]:
1468
+ tms[i] -= ups[i]
1469
+ tms[i+1] += 1
1470
+
1471
+ stime = "{:02}:{:02}:{:02}".format(tms[2], tms[1], tms[0])
1472
+ if tms[3]: sdate = adddate(sdate, 0, 0, tms[3])
1473
+
1474
+ return [sdate, stime]
1475
+
1476
+ #
1477
+ # add time interval array to datetime
1478
+ # opt = -1 - minus, 0 - begin time, 1 - add (default)
1479
+ #
1480
+ def addintervals(sdatetime, intv, opt = 1):
1481
+
1482
+ if not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1483
+ if not intv: return sdatetime
1484
+ tv = [0]*7
1485
+ i = 0
1486
+ for v in intv:
1487
+ tv[i] = v
1488
+ i += 1
1489
+
1490
+ # assume the given datetime is end of the current interval;
1491
+ # add one second to set it to beginning of the next one
1492
+ if opt == 0: sdatetime = adddatetime(sdatetime, 0, 0, 0 ,0, 0, 1)
1493
+
1494
+ if opt < 1: # negative intervals for minus
1495
+ for i in range(6):
1496
+ if tv[i]: tv[i] = -tv[i]
1497
+
1498
+ return adddatetime(sdatetime, tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6])
1499
+
1500
+ #
1501
+ # adjust end date to the specified day days for frequency of year/month/week
1502
+ # end of period if days == 0
1503
+ # nf - number of fractions of a month, for unit of 'M' only
1504
+ #
1505
+ def enddate(sdate, days, unit, nf = 0):
1506
+
1507
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1508
+ if days and isinstance(days, str): days = int(days)
1509
+ if not (unit and unit in 'YMW'): return sdate
1510
+
1511
+ if unit == 'Y':
1512
+ ms = re.match(r'^(\d+)', sdate)
1513
+ if ms:
1514
+ yr = int(ms.group(1))
1515
+ if days:
1516
+ mn = 1
1517
+ dy = days
1518
+ else:
1519
+ mn = 12
1520
+ dy = 31
1521
+ sdate = fmtdate(yr, mn, dy)
1522
+ elif unit == 'M':
1523
+ ms = re.match(r'^(\d+)-(\d+)-(\d+)', sdate)
1524
+ if ms:
1525
+ (yr, mn, dy) = (int(m) for m in ms.groups())
1526
+ else:
1527
+ ms = re.match(r'^(\d+)-(\d+)', sdate)
1528
+ if ms:
1529
+ (yr, mn) = (int(m) for m in ms.groups())
1530
+ dy = 1
1531
+ else:
1532
+ return sdate
1533
+
1534
+ if not nf or nf == 1:
1535
+ nd = days if days else calendar.monthrange(yr, mn)[1]
1536
+ if nd != dy: sdate = fmtdate(yr, mn, nd)
1537
+ else:
1538
+ val = int(30/nf)
1539
+ if dy >= 28:
1540
+ mf = nf
1541
+ else:
1542
+ mf = int(dy/val)
1543
+ if (mf*val) < dy: mf += 1
1544
+ if days:
1545
+ dy = (mf-1)*val + days
1546
+ elif mf < nf:
1547
+ dy = mf*val
1548
+ else:
1549
+ mn += 1
1550
+ dy = 0
1551
+ sdate = fmtdate(yr, mn, dy)
1552
+ elif unit == 'W':
1553
+ val = get_weekday(sdate)
1554
+ if days != val: sdate = adddate(sdate, 0, 0, days-val)
1555
+
1556
+ return sdate
1557
+
1558
+ #
1559
+ # adjust end time to the specified h/n/s for frequency of hour/mimute/second
1560
+ #
1561
+ def endtime(stime, unit):
1562
+
1563
+ if stime and not isinstance(stime, str): stime = str(stime)
1564
+ if not (unit and unit in 'HNS'): return stime
1565
+
1566
+ if stime:
1567
+ tm = split_datetime(stime, 'T')
1568
+ else:
1569
+ tm = [0, 0, 0]
1570
+
1571
+ if unit == 'H':
1572
+ tm[1] = tm[2] = 59
1573
+ elif unit == 'N':
1574
+ tm[2] = 59
1575
+ elif unit != 'S':
1576
+ tm[0] = 23
1577
+ tm[1] = tm[2] = 59
1578
+
1579
+ return "{:02}:{:02}:{:02}".format(tm[0], tm[1]. tm[2])
1580
+
1581
+ #
1582
+ # adjust end time to the specified h/n/s for frequency of year/month/week/day/hour/mimute/second
1583
+ #
1584
+ def enddatetime(sdatetime, unit, days = 0, nf = 0):
1585
+
1586
+ if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime)
1587
+ if not (unit and unit in 'YMWDHNS'): return sdatetime
1588
+ (sdate, stime) = re.split(' ', sdatetime)
1589
+
1590
+ if unit in 'HNS':
1591
+ stime = endtime(stime, unit)
1592
+ else:
1593
+ sdate = enddate(sdate, days, unit, nf)
1594
+ return "{} {}".format(sdate, stime)
1595
+
1596
+ #
1597
+ # get the string length dynamically
1598
+ #
1599
+ def get_column_length(colname, values):
1600
+
1601
+ clen = len(colname) if colname else 2 # initial column length as the length of column title
1602
+
1603
+ for val in values:
1604
+ if val is None: continue
1605
+ sval = str(val)
1606
+ if sval and not re.search(r'\n', sval):
1607
+ slen = len(sval)
1608
+ if slen > clen: clen = slen
1609
+
1610
+ return clen
1611
+
1612
+ #
1613
+ # Function: hour2time()
1614
+ # Return: time string in format of date HH:MM:SS
1615
+ #
1616
+ def hour2time(sdate, nhour, endtime = 0):
1617
+
1618
+ if sdate and not isinstance(sdate, str): sdate = str(sdate)
1619
+ stime = "{:02}:".format(nhour)
1620
+ if endtime:
1621
+ stime += "59:59"
1622
+ else:
1623
+ stime += "00:00"
1624
+
1625
+ if sdate:
1626
+ return "{} {}".format(sdate, stime)
1627
+ else:
1628
+ return stime
1629
+
1630
+ #
1631
+ # Function: time2hour()
1632
+ # Return: list of date and hour
1633
+ #
1634
+ def time2hour(stime):
1635
+
1636
+ sdate = nhour = None
1637
+ times = stime.split(' ')
1638
+
1639
+ if len(times) == 2:
1640
+ sdate = times[0]
1641
+ stime = times[1]
1642
+
1643
+ ms = re.match(r'^(\d+)', stime)
1644
+ if ms: nhour = int(ms.group(1))
1645
+
1646
+ return [sdate, nhour]
1647
+
1648
+ #
1649
+ # get the all column widths
1650
+ #
1651
+ def all_column_widths(pgrecs, flds, tdict):
1652
+
1653
+ colcnt = len(flds)
1654
+ lens = [0]*colcnt
1655
+ for i in range(colcnt):
1656
+ fld = flds[i]
1657
+ if fld not in tdict: continue
1658
+ field = strip_field(tdict[fld][1])
1659
+ lens[i] = get_column_length(None, pgrecs[field])
1660
+
1661
+ return lens
1662
+
1663
+ #
1664
+ # check a give value, return 1 if numeric, 0 therwise
1665
+ #
1666
+ def pgnum(val):
1667
+
1668
+ if not isinstance(val, str): val = str(val)
1669
+ ms = re.match(r'^\-{0,1}(\d+|\d+\.\d*|d*\.\d+)([eE]\-{0,1}\d+)*$', val)
1670
+ return 1 if ms else 0
1671
+
1672
+ #
1673
+ # Function: pgcmp(val1, val2)
1674
+ # Return: 0 if both empty or two values are identilcal; -1 if val1 < val2; otherwise 1
1675
+ #
1676
+ def pgcmp(val1, val2, ignorecase = 0, num = 0):
1677
+
1678
+ if val1 is None:
1679
+ if val2 is None:
1680
+ return 0
1681
+ else:
1682
+ return -1
1683
+ elif val2 is None:
1684
+ return 1
1685
+ typ1 = type(val1)
1686
+ typ2 = type(val2)
1687
+ if typ1 != typ2:
1688
+ if num:
1689
+ if typ1 is str:
1690
+ typ1 = int
1691
+ val1 = int(val1)
1692
+ if typ2 is str:
1693
+ typ2 = int
1694
+ val2 = int(val2)
1695
+ else:
1696
+ if typ1 != str:
1697
+ typ1 = str
1698
+ val1 = str(val1)
1699
+ if typ2 != str:
1700
+ typ2 = str
1701
+ val2 = str(val2)
1702
+
1703
+ if typ1 is str:
1704
+ if num:
1705
+ if typ1 is str and pgnum(val1) and pgnum(val2):
1706
+ val1 = int(val1)
1707
+ val2 = int(val2)
1708
+ elif ignorecase:
1709
+ val1 = val1.lower()
1710
+ val2 = val2.lower()
1711
+
1712
+ if val1 > val2:
1713
+ return 1
1714
+ elif val1 < val2:
1715
+ return -1
1716
+ else:
1717
+ return 0
1718
+
1719
+ #
1720
+ # infiles: initial file list
1721
+ # Return: final file list with all the subdirectories expanded
1722
+ #
1723
+ def recursive_files(infiles):
1724
+
1725
+ ofiles = []
1726
+
1727
+ for file in infiles:
1728
+ if op.isdir(file):
1729
+ ofiles.extend(recursive_files(glob.glob(file + "/*")))
1730
+ else:
1731
+ ofiles.append(file)
1732
+
1733
+ return ofiles
1734
+
1735
+ #
1736
+ # lidx: lower index limit (including)
1737
+ # hidx: higher index limit (excluding)
1738
+ # key: string value to be searched,
1739
+ # list: reference to a sorted list where the key is searched)
1740
+ # Return: index if found; -1 otherwise
1741
+ #
1742
+ def asearch(lidx, hidx, key, list):
1743
+
1744
+ ret = -1
1745
+ if (hidx - lidx) < 11: # use linear search for less than 11 items
1746
+ for midx in range(lidx, hidx):
1747
+ if key == list[midx]:
1748
+ ret = midx
1749
+ break
1750
+ else:
1751
+ midx = (lidx + hidx)/2
1752
+ if key == list[midx]:
1753
+ ret = midx
1754
+ elif key < list[midx]:
1755
+ ret = asearch(lidx, midx, key, list)
1756
+ else:
1757
+ ret = asearch(midx + 1, hidx, key, list)
1758
+
1759
+ return ret
1760
+
1761
+ #
1762
+ # lidx: lower index limit (including)
1763
+ # hidx: higher index limit (excluding)
1764
+ # key: string value to be searched,
1765
+ # list: reference to a sorted list where the key is searched)
1766
+ # Return: index if found; -1 otherwise
1767
+ #
1768
+ def psearch(lidx, hidx, key, list):
1769
+
1770
+ ret = -1
1771
+ if (hidx - lidx) < 11: # use linear search for less than 11 items
1772
+ for midx in range(lidx, hidx):
1773
+ if re.search(list[midx], key):
1774
+ ret = midx
1775
+ break
1776
+ else:
1777
+ midx = int((lidx + hidx)/2)
1778
+ if re.search(list[midx], key):
1779
+ ret = midx
1780
+ elif key < list[midx]:
1781
+ ret = psearch(lidx, midx, key, list)
1782
+ else:
1783
+ ret = psearch(midx + 1, hidx, key, list)
1784
+
1785
+ return ret
1786
+
1787
+ #
1788
+ # quicksort for pattern
1789
+ #
1790
+ def quicksort(srecs, lo, hi, desc, cnt, nums = None):
1791
+
1792
+ i = lo
1793
+ j = hi
1794
+ mrec = srecs[int((lo+hi)/2)]
1795
+
1796
+ while True:
1797
+ while cmp_records(srecs[i], mrec, desc, cnt, nums) < 0: i += 1
1798
+ while cmp_records(srecs[j], mrec, desc, cnt, nums) > 0: j -= 1
1799
+ if i <= j:
1800
+ if i < j:
1801
+ tmp = srecs[i]
1802
+ srecs[i] = srecs[j]
1803
+ srecs[j] = tmp
1804
+ i += 1
1805
+ j -= 1
1806
+ if i > j: break
1807
+
1808
+ #recursion
1809
+ if lo < j: srecs = quicksort(srecs, lo, j, desc, cnt, nums)
1810
+ if i < hi: srecs = quicksort(srecs, i, hi, desc, cnt, nums)
1811
+
1812
+ return srecs
1813
+
1814
+ def cmp_records(arec, brec, desc, cnt, nums):
1815
+
1816
+ for i in range(cnt):
1817
+ num = nums[i] if nums else 0
1818
+ ret = pgcmp(arec[i], brec[i], 0, num)
1819
+ if ret != 0:
1820
+ return (ret*desc[i])
1821
+
1822
+ return 0 # identical records
1823
+
1824
+ #
1825
+ # format one floating point value
1826
+ #
1827
+ def format_float_value(val, precision = 2):
1828
+
1829
+ units = ('B', 'KB', 'MB', 'GB', 'TB', 'PB')
1830
+
1831
+ if val is None:
1832
+ return ''
1833
+ elif not isinstance(val, int):
1834
+ val = int(val)
1835
+
1836
+ idx = 0
1837
+ while val >= 1000 and idx < 5:
1838
+ val /= 1000
1839
+ idx += 1
1840
+
1841
+ return "{:.{}f}{}".format(val, precision, units[idx])
1842
+
1843
+ #
1844
+ # check a file is a ASCII text one
1845
+ # return 1 if yes, 0 if not; or -1 if file not exists
1846
+ #
1847
+ def is_text_file(fname):
1848
+
1849
+ ret = -1
1850
+ if op.isfile(fname):
1851
+ buf = PgLOG.pgsystem("file -b " + fname, PgLOG.LOGWRN, 20)
1852
+ ret = 1 if buf and re.search(r'(^|\s)(text|script|data)', buf) else 0
1853
+
1854
+ return ret