cdxcore 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/util.py ADDED
@@ -0,0 +1,1041 @@
1
+ """
2
+ Basic utilities for Python such as type management, formatting, some trivial timers
3
+ """
4
+
5
+ import datetime as datetime
6
+ import types as types
7
+ import psutil as psutil
8
+ from collections.abc import Mapping, Collection
9
+ import sys as sys
10
+ import time as time
11
+ from collections import OrderedDict
12
+ from sortedcontainers import SortedDict
13
+ import numpy as np
14
+ import pandas as pd
15
+ import os as os
16
+ import warnings as warnings
17
+ from collections.abc import Callable, Collection
18
+
19
+ # =============================================================================
20
+ # basic indentification short cuts
21
+ # =============================================================================
22
+
23
+ __types_functions = None
24
+
25
+ def types_functions():
26
+ """ Returns all types.* considered function """
27
+ global __types_functions
28
+ if __types_functions is None:
29
+ __types_functions = set()
30
+ try: __types_functions.add(types.FunctionType)
31
+ except: pass
32
+ try: __types_functions.add(types.LambdaType)
33
+ except: pass
34
+ try: __types_functions.add(types.CodeType)
35
+ except: pass
36
+ #types.MappingProxyType
37
+ #types.SimpleNamespace
38
+ try: __types_functions.add(types.GeneratorType)
39
+ except: pass
40
+ try: __types_functions.add(types.CoroutineType)
41
+ except: pass
42
+ try: __types_functions.add(types.AsyncGeneratorType)
43
+ except: pass
44
+ try: __types_functions.add(types.MethodType)
45
+ except: pass
46
+ try: __types_functions.add(types.BuiltinFunctionType)
47
+ except: pass
48
+ try: __types_functions.add(types.BuiltinMethodType)
49
+ except: pass
50
+ try: __types_functions.add(types.WrapperDescriptorType)
51
+ except: pass
52
+ try: __types_functions.add(types.MethodWrapperType)
53
+ except: pass
54
+ try: __types_functions.add(types.MethodDescriptorType)
55
+ except: pass
56
+ try: __types_functions.add(types.ClassMethodDescriptorType)
57
+ except: pass
58
+ #types.ModuleType,
59
+ #types.TracebackType,
60
+ #types.FrameType,
61
+ try: __types_functions.add(types.GetSetDescriptorType)
62
+ except: pass
63
+ try: __types_functions.add(types.MemberDescriptorType)
64
+ except: pass
65
+ try: __types_functions.add(types.DynamicClassAttribute)
66
+ except: pass
67
+ __types_functions = tuple(__types_functions)
68
+ return __types_functions
69
+
70
+ def isFunction(f) -> bool:
71
+ """
72
+ Checks whether 'f' is a function in an extended sense.
73
+ Check 'types_functions' for what is tested against.
74
+ In particular it does not test positive for properties.
75
+ """
76
+ return isinstance(f,types_functions())
77
+
78
+ def isAtomic( o ):
79
+ """ Returns true if 'o' is a string, int, float, date or bool, or a numpy generic """
80
+ if type(o) in [str,int,bool,float,datetime.date]:
81
+ return True
82
+ if isinstance(o,np.generic):
83
+ return True
84
+ return False
85
+
86
+ def isFloat( o ):
87
+ """ Checks whether a type is a float """
88
+ if type(o) is float:
89
+ return True
90
+ if isinstance(o,np.floating):
91
+ return True
92
+ return False
93
+
94
+ # =============================================================================
95
+ # exceptions
96
+ # =============================================================================
97
+
98
+ def _verify( cond : bool, msgf : Callable, exception : Exception = Exception, **msg_kwargs ):
99
+ """
100
+ Verifies 'cond' and raises an exception of type 'exception' with message 'msgf()' if cond is not True.
101
+ The message itself is generated by calling msgf() if it is a callable, or by str.fomrat(msgf,**msg_kwargs) if it is a string.
102
+ That means that the message is only formatted if the conditon was not met and an exception is to be thrown:
103
+
104
+ Functional use case:
105
+ x=1
106
+ verify(x==1, lambda : f"Error: x is {x}") # <- the use of 'lambda' delays generation of the error message
107
+
108
+ If 'msgf' is a string, then str.format(msgf, **msg_kwargs) is called:
109
+ x=1
110
+ verify(x==1, "Error: x is {x}", x=x ) # <- do *not* use f-string !
111
+
112
+ Basically
113
+ verify(cond, msgf, exception, **msg_kwargs)
114
+ is functionally equivalent to
115
+ if not cond: raise exception(msgf())
116
+ or, if 'msgf' is a string:
117
+ if not cond: raise exception(str.format(msgf,**msg_kwargs))
118
+
119
+ Parameters
120
+ ----------
121
+ cond:
122
+ condition to be tested. An exception is thrown if it is not true
123
+ msgf:
124
+ function to call to generate the error message if 'cond' is False,
125
+ or str.format formatting string using {} [this is *not* an f-string].
126
+ In both case the message is only generated if the conditon 'cond' is False.
127
+
128
+ Two main use cases:
129
+ For complicated formatting, use a lambda function which returns an f-string.
130
+ For simple formatting, use a non-fstring.
131
+ exception:
132
+
133
+ Anything the function msfg() returns is passed to the constructor
134
+ of the 'exception', except if msgf() returns itself an Exception
135
+ object. In that case that will be raised.
136
+ The function msgf cannot return None.
137
+ exception:
138
+ Exception type to raise.
139
+ msg_kwargs :
140
+ Keywords for msgf if msgf is a string; must be empty otherwise.
141
+ """
142
+ if not bool(cond):
143
+ if not isinstance( msgf, str ):
144
+ assert len(msg_kwargs) == 0, ("Superflous arguments passed", str(msg_kwargs)[:100] )
145
+ msg = msgf()
146
+ assert not msg is None, ("'msgf' returned None")
147
+ if isinstance(msg, Exception):
148
+ raise msg
149
+ else:
150
+ msg = str.format(msgf,**msg_kwargs)
151
+ raise exception( msg )
152
+
153
+ _warn_skips = (os.path.dirname(__file__),)
154
+
155
+ def _warn( message : str, category : Warning = RuntimeWarning, stack_level : int = 1 ):
156
+ """ Standard warning """
157
+ warnings.warn( message=str(message), category=category, stacklevel=stack_level, skip_file_prefixes=_warn_skips )
158
+
159
+ def _warn_if( cond : bool, msgf : Callable, *, category : Warning = RuntimeWarning, stack_level : int = 1, **msg_kwargs ):
160
+ """
161
+ Tests 'cond' and issues a warning with message 'msgf()'.
162
+ The message itself is generated by calling msgf() if it is a callable, or by str.fomrat(msgf,**msg_kwargs) if it is a string.
163
+ That means that the message is only formatted if the conditon was met and the warning is printed.
164
+
165
+ Functional use case:
166
+ x=1
167
+ warn_if(x!=0, lambda : f"Warn: x is {x}") # <- the use of 'lambda' delays generation of the warning message
168
+
169
+ If 'msgf' is a string, then str.format(msgf, **msg_kwargs) is called:
170
+ x=1
171
+ verify(x!=1, "Warn: x is {x}", x=x ) # <- do *not* use f-string !
172
+
173
+ Basically
174
+ warn_if(cond, msgf, exception, **msg_kwargs)
175
+ is functionally equivalent to
176
+ if cond: warn(msgf())
177
+ or, if 'msgf' is a string:
178
+ if cond: warn(str.format(msgf,**msg_kwargs))
179
+
180
+ Parameters
181
+ ----------
182
+ cond:
183
+ condition to be tested. An exception is thrown if it is not true
184
+ msgf:
185
+ function to call to generate the error message if 'cond' is False,
186
+ or str.format formatting string using {} [this is *not* an f-string].
187
+ In both case the message is only generated if the conditon 'cond' is False.
188
+
189
+ Two main use cases:
190
+ For complicated formatting, use a lambda function which returns an f-string.
191
+ For simple formatting, use a non-fstring.
192
+ exception:
193
+ exception:
194
+ Exception type to raise.
195
+ msg_kwargs :
196
+ Keywords for msgf if msgf is a string; must be empty otherwise.
197
+ """
198
+ if bool(cond):
199
+ if not isinstance( msgf, str ):
200
+ assert len(msg_kwargs) == 0, ("Superflous arguments passed?", str(msg_kwargs)[:100] )
201
+ msg = msgf()
202
+ else:
203
+ msg = str.format(msgf,**msg_kwargs)
204
+ _warn( msg, category=category, stack_level=stack_level )
205
+
206
+ # =============================================================================
207
+ # python basics
208
+ # =============================================================================
209
+
210
+ def _get_recursive_size(obj, seen=None):
211
+ """
212
+ Recursive helper for sizeof
213
+ """
214
+ if seen is None:
215
+ seen = set() # Keep track of seen objects to avoid double-counting
216
+
217
+ # Get the size of the current object
218
+ size = sys.getsizeof(obj)
219
+
220
+ # Avoid counting the same object twice
221
+ if id(obj) in seen:
222
+ return 0
223
+ seen.add(id(obj))
224
+
225
+ if isinstance( obj, np.ndarray ):
226
+ size += obj.nbytes
227
+ elif isinstance(obj, Mapping):
228
+ for key, value in obj.items():
229
+ size += _get_recursive_size(key, seen)
230
+ size += _get_recursive_size(value, seen)
231
+ elif isinstance(obj, Collection):
232
+ for item in obj:
233
+ size += _get_recursive_size(item, seen)
234
+ else:
235
+ try:
236
+ size += _get_recursive_size( obj.__dict__, seen )
237
+ except:
238
+ pass
239
+ try:
240
+ size += _get_recursive_size( obj.__slots__, seen )
241
+ except:
242
+ pass
243
+ return size
244
+
245
+ def getsizeof(obj):
246
+ """
247
+ Approximates the size of 'obj'.
248
+ In addition to sys.getsizeof this function also iterates through embedded containers.
249
+ """
250
+ return _get_recursive_size(obj,None)
251
+
252
+ # =============================================================================
253
+ # string formatting
254
+ # =============================================================================
255
+
256
+ def _fmt( text : str, args = None, kwargs = None ) -> str:
257
+ """ Utility function. See fmt() """
258
+ if text.find('%') == -1:
259
+ return text
260
+ if not args is None and len(args) > 0:
261
+ assert kwargs is None or len(kwargs) == 0, "Cannot specify both 'args' and 'kwargs'"
262
+ return text % tuple(args)
263
+ if not kwargs is None and len(kwargs) > 0:
264
+ return text % kwargs
265
+ return text
266
+
267
+ def fmt(text : str,*args,**kwargs) -> str:
268
+ """
269
+ String formatting made easy
270
+ text - pattern
271
+ Examples
272
+ fmt("The is one = %ld", 1)
273
+ fmt("The is text = %s", 1.3)
274
+ fmt("Using keywords: one=%(one)d, two=%(two)d", two=2, one=1)
275
+ """
276
+ return _fmt(text,args,kwargs)
277
+
278
+ def prnt(text : str,*args,**kwargs) -> str:
279
+ """ Prints a fmt() string. """
280
+ print(_fmt(text,args,kwargs))
281
+
282
+ def write(text : str,*args,**kwargs) -> str:
283
+ """ Prints a fmt() string without EOL, e.g. uses print(fmt(..),end='') """
284
+ print(_fmt(text,args,kwargs),end='')
285
+
286
+ def fmt_seconds( seconds : float, *, eps : float = 1E-8 ) -> str:
287
+ """ Print nice format string for seconds, e.g. '23s' for seconds=23, or 1:10 for seconds=70 """
288
+ assert eps>=0., ("'eps' must not be negative")
289
+ if seconds < -eps:
290
+ return "-" + fmt_seconds(-seconds, eps=eps)
291
+
292
+ if seconds <= eps:
293
+ return "0s"
294
+ if seconds < 0.01:
295
+ return "%.3gms" % (seconds*1000.)
296
+ if seconds < 2.:
297
+ return "%.2gs" % seconds
298
+ seconds = int(seconds)
299
+ if seconds < 60:
300
+ return "%lds" % seconds
301
+ if seconds < 60*60:
302
+ return "%ld:%02ld" % (seconds//60, seconds%60)
303
+ return "%ld:%02ld:%02ld" % (seconds//60//60, (seconds//60)%60, seconds%60)
304
+
305
+ def fmt_list( lst : list, *, none : str = "-", link : str = "and", sort : bool = False ) -> str:
306
+ """
307
+ Returns a nicely formatted list of string with commas
308
+
309
+ Parameters
310
+ ----------
311
+ lst : list. The list() operator is applied to it, so it will resolve dictionaries and generators.
312
+ none : string used when list was empty
313
+ link : string used to connect the last item. Default is 'and'
314
+ If the list is [1,2,3] then the function will return 1, 2 and 3
315
+ sort : whether to sort the list
316
+
317
+ Returns
318
+ -------
319
+ String of the list.
320
+ """
321
+ if lst is None:
322
+ return str(none)
323
+ lst = list(lst)
324
+ if len(lst) == 0:
325
+ return none
326
+ if len(lst) == 1:
327
+ return str(lst[0])
328
+ if sort:
329
+ lst = sorted(lst)
330
+ if link=="," or link=="":
331
+ link = ", "
332
+ elif link == "and": # make the default fast
333
+ link = " and "
334
+ elif link[:1] == ",":
335
+ link = ", " + link[1:].strip() + " "
336
+ else:
337
+ link = " " + link.strip() + " "
338
+
339
+ s = ""
340
+ for k in lst[:-1]:
341
+ s += str(k) + ", "
342
+ return s[:-2] + link + str(lst[-1])
343
+
344
+ def fmt_dict( dct : dict, *, sort : bool = False, none : str = "-", link : str = "and" ) -> str:
345
+ """
346
+ Return a nice readable representation of a dictionary
347
+ This assumes that the elements of the dictionary itself can be formatted well with 'str()'
348
+
349
+ For a dictionary dict(a=1,b=2,c=3) this function will return a: 1, b: 2, and c: 3
350
+
351
+ Parameters
352
+ ----------
353
+ x : dict
354
+ sort : whether to sort the keys
355
+ none : string to be used if dictionary is empty
356
+ link : string to be used to link the last element to the previous string
357
+
358
+ Returns
359
+ -------
360
+ String
361
+ """
362
+ if len(dct) == 0:
363
+ return str(none)
364
+ if sort:
365
+ keys = sorted(dct)
366
+ else:
367
+ keys = list(dct)
368
+ strs = [ str(k) + ": " + str(dct[k]) for k in keys ]
369
+ return fmt_list( strs, none=none, link=link, sort=False )
370
+
371
+ def fmt_digits( uint : int, sep : str = "," ):
372
+ """
373
+ String representation of 'uint' with 1000 separators
374
+ So 10000 becomes "10,000".
375
+
376
+ Parameters
377
+ --------
378
+ uint : integer
379
+ The number. The function will int() the input which allows
380
+ for processing of a number of inputs (such as strings) but
381
+ might cut off floating point numbers.
382
+ sep : str
383
+ Separator, ","" by default
384
+ Returns
385
+ -------
386
+ String
387
+ """
388
+ if isinstance( uint, float ):
389
+ raise ValueError("float value provided", uint)
390
+ uint = int(uint)
391
+ if uint < 0:
392
+ return "-" + fmt_digits( -uint, sep )
393
+ assert uint >= 0
394
+ if uint < 1000:
395
+ return "%ld" % uint
396
+ else:
397
+ return fmt_digits(uint//1000, sep) + ( sep + "%03ld" % (uint % 1000) )
398
+
399
+ def fmt_big_number( number : int ) -> str:
400
+ """
401
+ Return a formatted big number string, e.g. 12.35M instead of all digits.
402
+ Uses decimal system and "B" for billions.
403
+ Use fmt_big_byte_number for byte sizes ie 1024 units.
404
+
405
+ Parameters
406
+ ----------
407
+ number : int
408
+ Returns
409
+ -------
410
+ String number
411
+ """
412
+ if isinstance( number, float ):
413
+ raise ValueError("float value provided", number)
414
+ if number < 0:
415
+ return "-" + fmt_big_number(-number)
416
+ if number >= 10**13:
417
+ number = number/(10**12)
418
+
419
+ if number > 10*3:
420
+ intg = int(number)
421
+ rest = number - intg
422
+ lead = fmt_digits(intg)
423
+ rest = "%.2f" % round(rest,2)
424
+ return f"{lead}{rest[1:]}T"
425
+ else:
426
+ number = round(number,2)
427
+ return "%gT" % number
428
+ if number >= 10**10:
429
+ number = number/(10**9)
430
+ number = round(number,2)
431
+ return "%gB" % number
432
+ if number >= 10**7:
433
+ number = number/(10**6)
434
+ number = round(number,2)
435
+ return "%gM" % number
436
+ if number >= 10**4:
437
+ number = number/(10**3)
438
+ number = round(number,2)
439
+ return "%gK" % number
440
+ return str(number)
441
+
442
+ def fmt_big_byte_number( byte_cnt : int, str_B = True ) -> str:
443
+ """
444
+ Return a formatted big number string, e.g. 12.35M instead of all digits.
445
+
446
+ Parameters
447
+ ----------
448
+ byte_cnt : int
449
+ str_B : bool
450
+ If true, return GB, MB and KB. If False, return G, M, K
451
+ If 'byte_cnt' is less than 10KB, then this will add 'bytes'
452
+ e.g. '1024 bytes'
453
+ Returns
454
+ -------
455
+ String number
456
+ """
457
+ if isinstance( byte_cnt, float ):
458
+ raise ValueError("float value provided", byte_cnt)
459
+ if byte_cnt < 0:
460
+ return "-" + fmt_big_byte_number(-byte_cnt,str_B=str_B)
461
+ if byte_cnt >= 10*1024*1024*1024*1024:
462
+ byte_cnt = byte_cnt/(1024*1024*1024*1024)
463
+ if byte_cnt > 1024:
464
+ intg = int(byte_cnt)
465
+ rest = byte_cnt - intg
466
+ lead = fmt_digits(intg)
467
+ rest = "%.2f" % round(rest,2)
468
+ s = f"{lead}{rest[1:]}T"
469
+ else:
470
+ byte_cnt = round(byte_cnt,2)
471
+ s = "%gT" % byte_cnt
472
+ elif byte_cnt >= 10*1024*1024*1024:
473
+ byte_cnt = byte_cnt/(1024*1024*1024)
474
+ byte_cnt = round(byte_cnt,2)
475
+ s = "%gG" % byte_cnt
476
+ elif byte_cnt >= 10*1024*1024:
477
+ byte_cnt = byte_cnt/(1024*1024)
478
+ byte_cnt = round(byte_cnt,2)
479
+ s = "%gM" % byte_cnt
480
+ elif byte_cnt >= 10*1024:
481
+ byte_cnt = byte_cnt/1024
482
+ byte_cnt = round(byte_cnt,2)
483
+ s = "%gK" % byte_cnt
484
+ else:
485
+ if byte_cnt==1:
486
+ return "1" if not str_B else "1 byte"
487
+ return str(byte_cnt) if not str_B else f"{byte_cnt} bytes"
488
+ return s if not str_B else s+"B"
489
+
490
+ def fmt_datetime(dt : datetime.datetime, *,
491
+ sep : str = ':',
492
+ ignore_ms : bool = False,
493
+ ignore_tz : bool = True
494
+ ) -> str:
495
+ """
496
+ Returns string for 'dt' of the form "YYYY-MM-DD HH:MM:SS" if 'dt' is a datetime,
497
+ or a the respective version for time or date.
498
+
499
+ Microseconds are added as digits:
500
+ "YYYY-MM-DD HH:MM:SS,MICROSECONDS"
501
+
502
+ Optinally a time zone is added via:
503
+ "YYYY-MM-DD HH:MM:SS+HH"
504
+ "YYYY-MM-DD HH:MM:SS+HH:MM"
505
+
506
+ Parameters
507
+ ----------
508
+ dt : datetime, date, or time
509
+ String represent this.
510
+ sep : str
511
+ Seperator for hours, minutes, seconds. The default ':' looks better
512
+ but is not suitable for filenames
513
+ ignore_ms : bool
514
+ Whether to ignore microseconds. Default False
515
+ ignore_tz : bool
516
+ Whether to ignore the time zone. Default True
517
+ Returns
518
+ -------
519
+ String, see above.
520
+ """
521
+ if not isinstance(dt, datetime.datetime):
522
+ if isinstance(dt, datetime.date):
523
+ return fmt_date(dt)
524
+ else:
525
+ assert isinstance(dt, datetime.time), "'dt' must be datetime.datetime, datetime.date, or datetime.time. Found %s" % type(dt)
526
+ return fmt_time(dt,sep=sep,ignore_ms=ignore_ms)
527
+
528
+ s = fmt_date(dt.date()) + " " +\
529
+ fmt_time(dt.timetz(),sep=sep,ignore_ms=ignore_ms)
530
+
531
+ if ignore_tz or dt.tzinfo is None:
532
+ return s
533
+
534
+ # time zone handling
535
+ # pretty obscure: https://docs.python.org/3/library/datetime.html#tzinfo-objects
536
+ tzd = dt.tzinfo.utcoffset(dt)
537
+ assert not tzd is None, ("tzinfo.utcoffset() returned None")
538
+ assert tzd.microseconds == 0, ("Timezone date offset with microseconds found", tzd )
539
+ seconds = tzd.days * 24*60*60 + tzd.seconds
540
+ if seconds==0:
541
+ return s
542
+ sign = "+" if seconds >= 0 else "-"
543
+ seconds = abs(seconds)
544
+ hours = seconds//(60*60)
545
+ minutes = (seconds//60)%60
546
+ seconds = seconds%60
547
+ if minutes == 0:
548
+ s += sign + str(hours)
549
+ else:
550
+ s += f"{sign}{hours}{sep}{minutes:02d}"
551
+ return s
552
+
553
+ def fmt_date(dt : datetime.date) -> str:
554
+ """
555
+ Returns string representation for date 'dt' of the form YYYY-MM-DD
556
+ If passed a datetime, it will extract its date().
557
+ """
558
+ if isinstance(dt, datetime.datetime):
559
+ dt = dt.date()
560
+ assert isinstance(dt, datetime.date), "'dt' must be datetime.date. Found %s" % type(dt)
561
+ return f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d}"
562
+
563
+ def fmt_time(dt : datetime.time, *,
564
+ sep : str = ':',
565
+ ignore_ms : bool = False
566
+ ) -> str:
567
+ """
568
+ Returns string for 'dt' of the form "HH:MM:SS" if 'dt'.
569
+
570
+ Microseconds are added as digits:
571
+ "HH:MM:SS,MICROSECONDS"
572
+
573
+ Optinally a time zone is added via:
574
+ "HH:MM:SS+HH"
575
+
576
+ If passed a datetime, it will extract its time().
577
+ Note that while datetime.time objects may carry a tzinfo object,
578
+ the corresponding otcoffset() function returns None without
579
+ providing a 'dt' parameter, see https://docs.python.org/3/library/datetime.html#tzinfo-objects
580
+ We bypass this inconsistency by only allowing datetime to process time zones.
581
+
582
+
583
+ Parameters
584
+ ----------
585
+ dt : time
586
+ String represent this.
587
+ sep : str
588
+ Seperator for hours, minutes, seconds. The default ':' looks better
589
+ but is not suitable for filenames
590
+ ignore_ms : bool
591
+ Whether to ignore microseconds. Default False
592
+
593
+ Returns
594
+ -------
595
+ String, see above.
596
+ """
597
+ if isinstance(dt, datetime.datetime):
598
+ dt = dt.timetz()
599
+
600
+ assert isinstance(dt, datetime.time), "'dt' must be datetime.time. Found %s" % type(dt)
601
+ if ignore_ms or dt.microsecond == 0:
602
+ return f"{dt.hour:02d}{sep}{dt.minute:02d}{sep}{dt.second:02d}"
603
+ else:
604
+ return f"{dt.hour:02d}{sep}{dt.minute:02d}{sep}{dt.second:02d},{dt.microsecond}"
605
+
606
+ def fmt_timedelta(dt : datetime.timedelta, *,
607
+ sep : str = "" ) -> str:
608
+ """
609
+ Returns string representation for a time delta in the form DD:HH:MM:SS,MS
610
+
611
+
612
+ Parameters
613
+ ----------
614
+ dt : timedelta
615
+ Timedelta.
616
+ sep :
617
+ Identify the three separators: between days and HMS and between microseconds:
618
+ DD*HH*MM*SS*MS
619
+ 0 1 1 2
620
+ 'sep' can be a string, in which case:
621
+ * If it is an empty string, all separators are ''
622
+ * A single character will be reused for all separators
623
+ * If the string has length 2, then the last character is used for '2'
624
+ * If the string has length 3, then the chracters are used accordingly
625
+ 'sep' can also be a collection ie a tuple or list. In this case each element
626
+ is used accordingly.
627
+
628
+ Returns
629
+ -------
630
+ String with leading sign. Returns "" if timedelta is 0.
631
+ """
632
+ assert isinstance(dt, datetime.timedelta), "'dt' must be datetime.timedelta. Found %s" % type(dt)
633
+
634
+ if isinstance(sep, str):
635
+ if len(sep) == 0:
636
+ sepd = ''
637
+ sephms = ''
638
+ sepms = ''
639
+ elif len(sep) == 1:
640
+ sepd = sep
641
+ sephms = sep
642
+ sepms = sep
643
+ elif len(sep) == 2:
644
+ sepd = sep[0]
645
+ sephms = sep[0]
646
+ sepms = sep[-1]
647
+ else:
648
+ if len(sep) != 3: raise ValueError(f"'sep': if a string is provided, its length must not exceed 3. Found '{sep}'")
649
+ sepd = sep[0]
650
+ sephms = sep[1]
651
+ sepms = sep[2]
652
+ elif isinstance(sep, Collection):
653
+ if len(sep) != 3: raise ValueError("'sep': if a collection is provided, it must be of length 3")
654
+ sepd = str( sep[0] ) if not sep[0] is None else ""
655
+ sephms = str( sep[1] ) if not sep[1] is None else ""
656
+ sepms = str( sep[2] ) if not sep[2] is None else ""
657
+
658
+ microseconds = (dt.seconds + dt.days*24*60*60)*1000000+dt.microseconds
659
+ if microseconds==0:
660
+ return ""
661
+
662
+ sign = "+" if microseconds >= 0 else "-"
663
+ microseconds = abs(microseconds)
664
+
665
+ if microseconds < 1000000:
666
+ return f"{sign}{microseconds}ms"
667
+
668
+ seconds = microseconds//1000000
669
+ microseconds = microseconds%1000000
670
+ rest = "" if microseconds == 0 else f"{sepms}{microseconds}ms"
671
+
672
+ if seconds < 60:
673
+ return f"{sign}{seconds}s{rest}"
674
+
675
+ minutes = seconds//60
676
+ seconds = seconds%60
677
+ rest = rest if seconds==0 else f"{sephms}{seconds}s{rest}"
678
+ if minutes < 60:
679
+ return f"{sign}{minutes}m{rest}"
680
+
681
+ hours = minutes//60
682
+ minutes = minutes%60
683
+ rest = rest if minutes==0 else f"{sephms}{minutes}m{rest}"
684
+ if hours <= 24:
685
+ return f"{sign}{hours}h{rest}"
686
+
687
+ days = hours//24
688
+ hours = hours%24
689
+ rest = rest if hours==0 else f"{sepd}{hours}h{rest}"
690
+ return f"{sign}{days}d{rest}"
691
+
692
+ def fmt_now() -> str:
693
+ """ Returns string for 'now' """
694
+ return fmt_datetime(datetime.datetime.now())
695
+
696
+ DEF_FILE_NAME_MAP = {
697
+ '/' : "_",
698
+ '\\': "_",
699
+ '|' : "_",
700
+ ':' : ";",
701
+ '>' : ")",
702
+ '<' : "(",
703
+ '?' : "!",
704
+ '*' : "@",
705
+ }
706
+ INVALID_FILE_NAME_CHARCTERS = set(DEF_FILE_NAME_MAP)
707
+
708
+ def fmt_filename( s : str , by : str = DEF_FILE_NAME_MAP ):
709
+ """
710
+ Replaces invalid filename characters by a differnet character.
711
+ The returned string is technically a valid file name under both windows and linux.
712
+
713
+ However, that does not prevent the filename to be a reserved name, for example "." or "..".
714
+
715
+ Parameters
716
+ ----------
717
+ s : str
718
+ Input string
719
+ by :
720
+ Either a single character or a dictionary with elements.
721
+ """
722
+
723
+ if isinstance(by, Mapping):
724
+ for c in INVALID_FILE_NAME_CHARCTERS:
725
+ s = s.replace(c, by[c])
726
+ else:
727
+ assert isinstance(by, str), ("by: 'str' or mapping expected", type(by))
728
+ for c in INVALID_FILE_NAME_CHARCTERS:
729
+ s = s.replace(c, by)
730
+ return s
731
+
732
+ class WriteLine(object):
733
+ """
734
+ Class to manage the current text output line.
735
+ This class is a thin wrapper around print(text + '\r', end='') or IPython.display.display()
736
+ to ensure the current line is cleared correctly when replaced with the next line.
737
+
738
+ Example 1 (how to use \r and \n)
739
+ write = WriteLine("Initializing...")
740
+ import time
741
+ for i in range(10):
742
+ time.sleep(1)
743
+ write("\rRunning %g%% ...", round(float(i+1)/float(10)*100,0))
744
+ write(" done.\nProcess finished.\n")
745
+
746
+ Example 2 (line length is getting shorter)
747
+ write = WriteLine("Initializing...")
748
+ import time
749
+ for i in range(10):
750
+ time.sleep(1)
751
+ write("\r" + ("#" * (9-i)))
752
+ write("\rProcess finished.\n")
753
+ """
754
+
755
+ def __init__(self, text : str = "", *kargs, **kwargs):
756
+ """
757
+ Creates a new WriteLine object which manages the current print output line.
758
+ Subsequent calls to __call__() will replace the text in the current line using `\r` in text mode, or a display() object in jupyter
759
+
760
+ Parameters
761
+ ----------
762
+ text : str
763
+ Classic formatting text. 'text' may not contain newlines (\n) except at the end.
764
+ kargs, kwargs:
765
+ Formatting arguments.
766
+ """
767
+ self._last_len = 0
768
+ if text != "":
769
+ self(text,*kargs,**kwargs)
770
+
771
+ def __call__(self, text : str, *kargs, **kwargs ):
772
+ """
773
+ Print lines of text.
774
+ The last line of 'text' becomes the current line and will be overwritten by the next line.
775
+
776
+ Parameters
777
+ ----------
778
+ text : str
779
+ Classic formatting text. 'text' may not contain newlines (\n) except at the end.
780
+ kargs, kwargs:
781
+ Formatting arguments.
782
+ """
783
+ text = _fmt(text,kargs,kwargs)
784
+ lines = text.split("\n")
785
+ assert len(lines) > 0, "Internal error"
786
+
787
+ for line in lines[:-1]:
788
+ self._write_line(line)
789
+ self.cr()
790
+ if len(lines[-1]) > 0:
791
+ self._write_line(lines[-1])
792
+ sys.stdout.flush()
793
+
794
+ def cr(self):
795
+ """ Creates a new line. """
796
+ sys.stdout.write("\n")
797
+ sys.stdout.flush()
798
+ self._last_len = 0
799
+
800
+ def _write_line(self, line):
801
+ """ Write a line; no newlines """
802
+ assert not '\n' in line, "Error: found newline in '%s'" % line
803
+ if line == "":
804
+ return
805
+ i = line.rfind('\r')
806
+ if i == -1:
807
+ # no `\r': append text to current line
808
+ sys.stdout.write(line)
809
+ self._last_len += len(line)
810
+ else:
811
+ # found '\r': clear previous line and print new line
812
+ line = line[i+1:]
813
+ if len(line) < self._last_len:
814
+ sys.stdout.write("\r" + (" " * self._last_len)) # clear current line
815
+ sys.stdout.write("\r" + line)
816
+ self._last_len = len(line)
817
+
818
+ # =============================================================================
819
+ # Conversion of arbitrary python elements into re-usable versions
820
+ # =============================================================================
821
+
822
+ # deprecated
823
+ def plain( inn, *, sorted_dicts : bool = False,
824
+ native_np : bool = False,
825
+ dt_to_str : bool = False):
826
+ """
827
+ Converts a python structure into a simple atomic/list/dictionary collection such
828
+ that it can be read without the specific imports used inside this program.
829
+ or example, objects are converted into dictionaries of their data fields.
830
+
831
+ Parameters
832
+ ----------
833
+ inn : some object
834
+ sorted_dicts : Use SortedDicts instead of dicts.
835
+ native_np : convert numpy to Python natives.
836
+ dt_to_str : convert date times to strings
837
+
838
+ Hans Buehler, Dec 2013
839
+ """
840
+ def rec_plain( x ):
841
+ return plain( x, sorted_dicts=sorted_dicts, native_np=native_np, dt_to_str=dt_to_str )
842
+ # basics
843
+ if isAtomic(inn) or inn is None:
844
+ return inn
845
+ if isinstance(inn,(datetime.time,datetime.date,datetime.datetime)):
846
+ return fmt_datetime(inn) if dt_to_str else inn
847
+ if not np is None:
848
+ if isinstance(inn,np.ndarray):
849
+ return inn if not native_np else rec_plain( inn.tolist() )
850
+ if isinstance(inn, np.integer):
851
+ return int(inn)
852
+ elif isinstance(inn, np.floating):
853
+ return float(inn)
854
+ # can't handle functions --> return None
855
+ if isFunction(inn) or isinstance(inn,property):
856
+ return None
857
+ # dictionaries
858
+ if isinstance(inn,Mapping):
859
+ r = { k: rec_plain(v) for k, v in inn.items() if not isFunction(v) and not isinstance(v,property) }
860
+ return r if not sorted_dicts else SortedDict(r)
861
+ # pandas
862
+ if not pd is None and isinstance(inn,pd.DataFrame):
863
+ rec_plain(inn.columns)
864
+ rec_plain(inn.index)
865
+ rec_plain(inn.to_numpy())
866
+ return
867
+ # lists, tuples and everything which looks like it --> lists
868
+ if isinstance(inn,Collection):
869
+ return [ rec_plain(k) for k in inn ]
870
+ # handle objects as dictionaries, removing all functions
871
+ if not getattr(inn,"__dict__",None) is None:
872
+ return rec_plain(inn.__dict__)
873
+ # nothing we can do
874
+ raise TypeError(fmt("Cannot handle type %s", type(inn)))
875
+
876
+ # =============================================================================
877
+ # Misc Jupyter
878
+ # =============================================================================
879
+
880
+ def is_jupyter():
881
+ """
882
+ Wheher we operate in a jupter session
883
+ Somewhat unreliable function. Use with care
884
+ """
885
+ parent_process = psutil.Process().parent().cmdline()[-1]
886
+ return 'jupyter' in parent_process
887
+
888
+ # =============================================================================
889
+ # Misc
890
+ # =============================================================================
891
+
892
+ class TrackTiming(object):
893
+ """
894
+ Simplistic class to track the time it takes to run sequential tasks.
895
+ Usage:
896
+
897
+ timer = TrackTiming() # clock starts
898
+
899
+ # do job 1
900
+ timer += "Job 1 done"
901
+
902
+ # do job 2
903
+ timer += "Job 2 done"
904
+
905
+ print( timer.summary() )
906
+ """
907
+
908
+ def __init__(self):
909
+ """ Initialize a new tracked timer """
910
+ self.reset_all()
911
+
912
+ def reset_all(self):
913
+ """ Reset timer, and clear all tracked items """
914
+ self._tracked = OrderedDict()
915
+ self._current = time.time()
916
+
917
+ def reset_timer(self):
918
+ """ Reset the timer to current time """
919
+ self._current = time.time()
920
+
921
+ def track(self, text, *args, **kwargs ):
922
+ """ Track 'text', formatted with 'args' and 'kwargs' """
923
+ text = _fmt(text,args,kwargs)
924
+ self += text
925
+
926
+ def __iadd__(self, text : str):
927
+ """ Track 'text' """
928
+ text = str(text)
929
+ now = time.time()
930
+ dt = now - self._current
931
+ if text in self._tracked:
932
+ self._tracked[text] += dt
933
+ else:
934
+ self._tracked[text] = dt
935
+ self._current = now
936
+ return self
937
+
938
+ def __str__(self):
939
+ """ Returns summary """
940
+ return self.summary()
941
+
942
+ @property
943
+ def tracked(self) -> list:
944
+ """ Returns dictionary of tracked texts """
945
+ return self._tracked
946
+
947
+ def summary(self, frmat : str = "%(text)s: %(fmt_seconds)s", jn_fmt : str = ", " ) -> str:
948
+ """
949
+ Generate summary string by applying some formatting
950
+
951
+ Parameters
952
+ ----------
953
+ format : str
954
+ Format string. Arguments are 'text', 'seconds' (as int) and 'fmt_seconds' (as text, see fmt_seconds())
955
+ jn_fmt : str
956
+ String to be used between two texts
957
+ Returns
958
+ -------
959
+ The combined summary string
960
+ """
961
+ s = ""
962
+ for text, seconds in self._tracked.items():
963
+ tr_txt = frmat % dict( text=text, seconds=seconds, fmt_seconds=fmt_seconds(seconds))
964
+ s = tr_txt if s=="" else s+jn_fmt+tr_txt
965
+ return s
966
+
967
+ # =============================================================================
968
+ # Misc
969
+ # =============================================================================
970
+
971
+ class Timer(object):
972
+ """
973
+ Micro utility which allows keeing track of time using 'with'
974
+
975
+ with Timer() as t:
976
+ .... do somthing ...
977
+ print(f"This took {t}.")
978
+ """
979
+
980
+ def __init__(self):
981
+ self.time = time.time()
982
+ self.intv = None
983
+
984
+ def reset(self):
985
+ self.time = time.time()
986
+ self.intv = None
987
+
988
+ def __enter__(self):
989
+ self.reset()
990
+ return self
991
+
992
+ def __str__(self):
993
+ return self.fmt_seconds
994
+
995
+ def interval_test( self, interval : float ):
996
+ """
997
+ Tests if 'interval' seconds have passed.
998
+ If yes, reset timer and return True. Otherwise return False
999
+
1000
+ Usage:
1001
+ ------
1002
+ tme = Timer()
1003
+ for i in range(n):
1004
+ if tme.test_dt_seconds(2.): print(f"\r{i+1}/{n} done. Time taken so far {tme}.", end='', flush=True)
1005
+ print("\rDone. This took {tme}.")
1006
+ """
1007
+ if interval is None:
1008
+ self.intv = self.seconds
1009
+ return True
1010
+ if self.intv is None:
1011
+ self.intv = self.seconds
1012
+ return True
1013
+ if self.seconds - self.intv > interval:
1014
+ self.intv = self.seconds
1015
+ return True
1016
+ return False
1017
+
1018
+ @property
1019
+ def fmt_seconds(self):
1020
+ return fmt_seconds(self.seconds)
1021
+
1022
+ @property
1023
+ def seconds(self):
1024
+ return time.time() - self.time
1025
+
1026
+ @property
1027
+ def minutes(self):
1028
+ return self.seconds / 60.
1029
+
1030
+ @property
1031
+ def hours(self):
1032
+ return self.minutes / 60.
1033
+
1034
+ def __exit__(self, *kargs, **wargs):
1035
+ return False
1036
+
1037
+
1038
+
1039
+
1040
+
1041
+