kkpyutil 1.40.0__tar.gz → 1.41.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kkpyutil
3
- Version: 1.40.0
3
+ Version: 1.41.0
4
4
  Summary: Building blocks for sysadmin and DevOps
5
5
  Home-page: https://github.com/kakyoism/kkpyutil/
6
6
  License: MIT
@@ -58,9 +58,8 @@ import uuid
58
58
  import warnings
59
59
  from types import SimpleNamespace
60
60
 
61
- #
62
- # Globals
63
- #
61
+ # region globals
62
+
64
63
  _script_dir = osp.abspath(osp.dirname(__file__))
65
64
  TXT_CODEC = 'utf-8' # Importable.
66
65
  LOCALE_CODEC = locale.getpreferredencoding()
@@ -70,23 +69,30 @@ PLATFORM = platform.system()
70
69
  if PLATFORM == 'Windows':
71
70
  import winreg
72
71
 
72
+ # endregion
73
+
74
+
75
+ # region classes
73
76
 
74
- class SingletonDecorator:
77
+ class ClassicSingleton:
78
+ def __new__(cls):
79
+ if not hasattr(cls, 'instance'):
80
+ cls.instance = super(ClassicSingleton, cls).__new__(cls)
81
+ return cls.instance
82
+
83
+
84
+ class BorgSingleton:
75
85
  """
76
- Decorator to build Singleton class, single-inheritance only.
77
- Usage:
78
- class MyClass: ...
79
- myobj = SingletonDecorator(MyClass, args, kwargs)
86
+ - Borg pattern: all instances share the same state, but not the same identity
87
+ - override _shared_borg_state to avoid child polluting states of parent instances
88
+ - ref: https://www.geeksforgeeks.org/singleton-pattern-in-python-a-complete-guide/
80
89
  """
90
+ _shared_borg_state = {}
81
91
 
82
- def __init__(self, klass, *args, **kwargs):
83
- self.klass = klass
84
- self.instance = None
85
-
86
- def __call__(self, *args, **kwargs):
87
- if self.instance is None:
88
- self.instance = self.klass(*args, **kwargs)
89
- return self.instance
92
+ def __new__(cls, *args, **kwargs):
93
+ obj = super(BorgSingleton, cls).__new__(cls, *args, **kwargs)
94
+ obj.__dict__ = cls._shared_borg_state
95
+ return obj
90
96
 
91
97
 
92
98
  class LowPassLogFilter(object):
@@ -127,6 +133,288 @@ class BandPassLogFilter(object):
127
133
  return self.__levelbounds[0] <= log.levelno <= self.__levelbounds[1]
128
134
 
129
135
 
136
+ class OfflineJSON:
137
+ def __init__(self, file_path):
138
+ self.path = file_path
139
+
140
+ def exists(self):
141
+ return osp.isfile(self.path)
142
+
143
+ def load(self):
144
+ return load_json(self.path) if self.exists() else None
145
+
146
+ def save(self, data: dict):
147
+ save_json(self.path, data)
148
+
149
+ def merge(self, props: dict):
150
+ data = self.load()
151
+ if not data:
152
+ return self.save(props)
153
+ data.update(props)
154
+ self.save(data)
155
+ return data
156
+
157
+
158
+ def get_platform_tmp_dir():
159
+ plat_dir_map = {
160
+ 'Windows': osp.join(str(os.getenv('LOCALAPPDATA')), 'Temp'),
161
+ 'Darwin': osp.expanduser('~/Library/Caches'),
162
+ 'Linux': '/tmp'
163
+ }
164
+ return plat_dir_map.get(PLATFORM)
165
+
166
+
167
+ class RerunLock:
168
+ """
169
+ - Lock process from reentering when seeing lock file on disk
170
+ - use semaphore-like behaviour with an instance limit
171
+ - Because lockfile is created by pyutil, we also save the occupier pid and .py path (name) in it
172
+ - if name is a path, e.g., __file__, then lockfile will be named after its basename
173
+ """
174
+
175
+ def __init__(self, name, folder=None, logger=None, max_instances=1):
176
+ folder = folder or osp.join(get_platform_tmp_dir(), '_util')
177
+ filename = f'lock_{extract_path_stem(name)}.{os.getpid()}.lock.json'
178
+ self.name = name
179
+ self.lockFile = osp.join(folder, filename)
180
+ self.nMaxInstances = max_instances
181
+ self.logger = logger or glogger
182
+ # CAUTION:
183
+ # - windows grpc server crashes with signals:
184
+ # - ValueError: signal only works in main thread of the main interpreter
185
+ # - signals are disabled for windows
186
+ if threading.current_thread() is threading.main_thread():
187
+ common_sigs = [
188
+ signal.SIGABRT,
189
+ signal.SIGFPE,
190
+ signal.SIGILL,
191
+ signal.SIGINT,
192
+ signal.SIGSEGV,
193
+ signal.SIGTERM,
194
+ ]
195
+ plat_sigs = [
196
+ signal.SIGBREAK,
197
+ # CAUTION
198
+ # - CTRL_C_EVENT, CTRL_BREAK_EVENT not working on Windows
199
+ # signal.CTRL_C_EVENT,
200
+ # signal.CTRL_BREAK_EVENT,
201
+ ] if PLATFORM == 'Windows' else [
202
+ # CAUTION:
203
+ # - SIGCHLD as an alias is safe to ignore
204
+ # - SIGKILL must be handled by os.kill()
205
+ signal.SIGALRM,
206
+ signal.SIGBUS,
207
+ # signal.SIGCHLD,
208
+ # - SIGCONT: CTRL+Z is allowed for bg process
209
+ # signal.SIGCONT,
210
+ signal.SIGHUP,
211
+ # signal.SIGKILL,
212
+ signal.SIGPIPE,
213
+ ]
214
+ for sig in common_sigs + plat_sigs:
215
+ signal.signal(sig, self.handle_signal)
216
+ # cleanup zombie locks due to runtime exceptions
217
+ locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
218
+ zombie_locks = [lock for lock in locks if not is_pid_running(int(lock.split(".")[1]))]
219
+ for lock in zombie_locks:
220
+ safe_remove(osp.join(osp.dirname(self.lockFile), lock))
221
+
222
+ def lock(self):
223
+ locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
224
+ is_locked = len(locks) >= self.nMaxInstances
225
+ if is_locked:
226
+ locker_pids = [int(lock.split(".")[1]) for lock in locks]
227
+ self.logger.warning(f'{self.name} is locked by processes: {locker_pids}. Will block new instances until unlocked.')
228
+ return False
229
+ save_json(self.lockFile, {
230
+ 'pid': os.getpid(),
231
+ 'name': self.name,
232
+ })
233
+ # CAUTION: race condition: saving needs a sec, it's up to application to await lockfile
234
+ return True
235
+
236
+ def unlock(self):
237
+ try:
238
+ os.remove(self.lockFile)
239
+ except FileNotFoundError:
240
+ self.logger.warning(f'{self.name} already unlocked. Safely ignored.')
241
+ return False
242
+ except Exception:
243
+ failure = traceback.format_exc()
244
+ self.logger.error(f""""\
245
+ Failed to unlock {self.name}:
246
+ Details:
247
+ {failure}
248
+
249
+ Advice:
250
+ - Delete the lock by hand: {self.lockFile}""")
251
+ return False
252
+ return True
253
+
254
+ def unlock_all(self):
255
+ locks = glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{osp.basename(self.name)}.*.lock.json'))
256
+ for lock in locks:
257
+ os.remove(lock)
258
+ return True
259
+
260
+ def is_locked(self):
261
+ return osp.isfile(self.lockFile)
262
+
263
+ def handle_signal(self, sig, frame):
264
+ msg = f'Terminated due to signal: {signal.Signals(sig).name}; Will unlock'
265
+ self.logger.warning(msg)
266
+ self.unlock()
267
+ raise RuntimeError(msg)
268
+
269
+
270
+ class Tracer:
271
+ """
272
+ - custom module-ignore rules
273
+ - trace calls and returns
274
+ - exclude first, then include
275
+ - usage: use in source code
276
+ - tracer = util.Tracer(exclude_funcname_pattern='stop')
277
+ - tracer.start()
278
+ - # add traceable code here
279
+ - tracer.stop()
280
+ """
281
+
282
+ def __init__(self,
283
+ excluded_modules: set[str] = None,
284
+ exclude_filename_pattern: str = None,
285
+ include_filename_pattern: str = None,
286
+ exclude_funcname_pattern: str = None,
287
+ include_funcname_pattern: str = None,
288
+ trace_func=None,
289
+ exclude_builtins=True):
290
+ self.exclMods = {'builtins'} if excluded_modules is None else excluded_modules
291
+ self.exclFilePatt = re.compile(exclude_filename_pattern) if exclude_filename_pattern else None
292
+ self.inclFilePatt = re.compile(include_filename_pattern) if include_filename_pattern else None
293
+ self.exclFuncPatt = re.compile(exclude_funcname_pattern) if exclude_funcname_pattern else None
294
+ self.inclFuncPatt = re.compile(include_funcname_pattern) if include_funcname_pattern else None
295
+ self.traceFunc = trace_func
296
+ if exclude_builtins:
297
+ self.ignore_stdlibs()
298
+
299
+ def start(self):
300
+ sys.settrace(self.traceFunc or self._trace_calls_and_returns)
301
+
302
+ @staticmethod
303
+ def stop():
304
+ sys.settrace(None)
305
+
306
+ def ignore_stdlibs(self):
307
+ def _get_stdlib_module_names():
308
+ import distutils.sysconfig
309
+ stdlib_dir = distutils.sysconfig.get_python_lib(standard_lib=True)
310
+ return {f.replace(".py", "") for f in os.listdir(stdlib_dir)}
311
+
312
+ py_ver = sys.version_info
313
+ std_libs = set(sys.stdlib_module_names) if py_ver.major >= 3 and py_ver.minor >= 10 else _get_stdlib_module_names()
314
+ self.exclMods.update(std_libs)
315
+
316
+ def _trace_calls_and_returns(self, frame, event, arg):
317
+ """
318
+ track hook for function calls. Usage:
319
+ sys.settrace(trace_calls_and_returns)
320
+ """
321
+ if event not in ('call', 'return'):
322
+ return
323
+ module_name = frame.f_globals.get('__name__')
324
+ if module_name is not None and module_name in self.exclMods:
325
+ return
326
+ filename = frame.f_code.co_filename
327
+ if self.exclFilePatt and self.exclFuncPatt.search(filename):
328
+ return
329
+ if self.inclFilePatt and not self.inclFilePatt.search(filename):
330
+ return
331
+ func_name = frame.f_code.co_name
332
+ if self.exclFuncPatt and self.exclFuncPatt.search(func_name):
333
+ return
334
+ if self.inclFuncPatt and not self.inclFuncPatt.search(func_name):
335
+ return
336
+ line_number = frame.f_lineno
337
+ line = linecache.getline(filename, line_number).strip()
338
+ if event == 'call':
339
+ args = ', '.join(f'{arg}={repr(frame.f_locals[arg])}' for arg in frame.f_code.co_varnames[:frame.f_code.co_argcount])
340
+ print(f'Call: {module_name}.{func_name}({args}) - {line}')
341
+ return self._trace_calls_and_returns
342
+ print(f'Call: {module_name}.{func_name} => {arg} - {line}')
343
+
344
+
345
+ class Cache:
346
+ """
347
+ cross-session caching: using temp-file to retrieve data based on hash changes
348
+ - constraints:
349
+ - data retrieval/parsing is expensive
350
+ - one cache per data-source
351
+ - cache is a mediator b/w app and data-source as a retriever only, cuz user's saving intent is always towards source, no need to cache a saving action
352
+ - for cross-session caching, save hash into cache, then when instantiate cache object, always load hash from cache to compare with incoming hash
353
+ - app must provide retriever function: retriever(src) -> json_data
354
+ - because it'd cost the same to retrieve data from a json-file source as from cache, so no json default is provided
355
+ - e.g., loading a complex tree-structure from a file:
356
+ - tree_cache = Cache('/path/to/file.tree', lambda: src: load_data(src), '/tmp/my_app')
357
+ - # ... later
358
+ - cached_tree_data = tree_cache.retrieve()
359
+ """
360
+
361
+ def __init__(self, data_source, data_retriever, cache_dir=get_platform_tmp_dir(), cache_type='cache', algo='checksum', source_seed='6ba7b810-9dad-11d1-80b4-00c04fd430c8'):
362
+ assert algo in ['checksum', 'mtime']
363
+ self.srcURL = data_source
364
+ self.retriever = data_retriever
365
+ # use a fixed namespace for each data-source to ensure inter-session consistency
366
+ namespace = uuid.UUID(str(source_seed))
367
+ uid = str(uuid.uuid5(namespace, self.srcURL))
368
+ self.cacheFile = osp.join(cache_dir, f'{uid}.{cache_type}.json')
369
+ self.hashAlgo = algo
370
+ # first comparison needs
371
+ self.prevSrcHash = load_json(self.cacheFile).get('hash') if osp.isfile(self.cacheFile) else None
372
+
373
+ def retrieve(self):
374
+ if self._compare_hash():
375
+ return self.update()
376
+ return load_json(self.cacheFile)['data']
377
+
378
+ def update(self):
379
+ """
380
+ - update cache directly
381
+ - useful when app needs to force update cache
382
+ """
383
+ data = self.retriever(self.srcURL)
384
+ container = {
385
+ 'data': data,
386
+ 'hash': self.prevSrcHash,
387
+ }
388
+ save_json(self.cacheFile, container)
389
+ return data
390
+
391
+ def _compare_hash(self):
392
+ in_src_hash = self._compute_hash()
393
+ if changed := in_src_hash != self.prevSrcHash or self.prevSrcHash is None:
394
+ self.prevSrcHash = in_src_hash
395
+ return changed
396
+
397
+ def _compute_hash(self):
398
+ hash_algo_map = {
399
+ 'checksum': self._compute_hash_as_checksum,
400
+ 'mtime': self._compute_hash_as_modified_time,
401
+ }
402
+ return hash_algo_map[self.hashAlgo]()
403
+
404
+ def _compute_hash_as_checksum(self):
405
+ return get_md5_checksum(self.srcURL)
406
+
407
+ def _compute_hash_as_modified_time(self):
408
+ try:
409
+ return osp.getmtime(self.srcURL)
410
+ except FileNotFoundError:
411
+ return None
412
+
413
+ # endregion
414
+
415
+
416
+ # region functions
417
+
130
418
  def get_platform_home_dir():
131
419
  home_envvar = 'USERPROFILE' if PLATFORM == 'Windows' else 'HOME'
132
420
  return os.getenv(home_envvar)
@@ -141,15 +429,6 @@ def get_platform_appdata_dir(winroam=True):
141
429
  return plat_dir_map.get(PLATFORM)
142
430
 
143
431
 
144
- def get_platform_tmp_dir():
145
- plat_dir_map = {
146
- 'Windows': osp.join(str(os.getenv('LOCALAPPDATA')), 'Temp'),
147
- 'Darwin': osp.expanduser('~/Library/Caches'),
148
- 'Linux': '/tmp'
149
- }
150
- return plat_dir_map.get(PLATFORM)
151
-
152
-
153
432
  def get_posix_shell_cfgfile():
154
433
  return os.path.expanduser('~/.bash_profile' if os.getenv('SHELL') == '/bin/bash' else '~/.zshrc')
155
434
 
@@ -232,6 +511,10 @@ def build_default_logger(logdir, name=None, verbose=False):
232
511
  return logging.getLogger(name or 'default')
233
512
 
234
513
 
514
+ def find_log_path(logger):
515
+ return next((handler.baseFilename for handler in logger.handlers if isinstance(handler, logging.FileHandler)), None)
516
+
517
+
235
518
  glogger = build_default_logger(logdir=osp.join(get_platform_tmp_dir(), '_util'), name='util', verbose=True)
236
519
  glogger.setLevel(logging.DEBUG)
237
520
 
@@ -332,107 +615,32 @@ def throw(err_cls, detail, advice):
332
615
 
333
616
 
334
617
  def is_python3():
335
- return sys.version_info[0] > 2
336
-
337
-
338
- def load_json(path, as_namespace=False, encoding=TXT_CODEC):
339
- """
340
- - Load Json configuration file.
341
- - supports UTF-8 only, due to no way to support mixed encodings
342
- - most usecases involve either utf-8 or mixed encodings
343
- - windows users must fix their region and localization setup via control panel
344
- """
345
- with open(path, 'r', encoding=encoding, errors='backslashreplace', newline=None) as f:
346
- text = f.read()
347
- return json.loads(text) if not as_namespace else json.loads(text, object_hook=lambda d: SimpleNamespace(**d))
348
-
349
-
350
- def save_json(path, config, encoding=TXT_CODEC):
351
- """
352
- Use io.open(), aka open() with py3 to produce a file object that encodes
353
- Unicode as you write, then use json.dump() to write to that file.
354
- Validate keys to avoid JSON and program out-of-sync.
355
- """
356
- dict_config = vars(config) if isinstance(config, types.SimpleNamespace) else config
357
- par_dir = osp.split(path)[0]
358
- os.makedirs(par_dir, exist_ok=True)
359
- with open(path, 'w', encoding=encoding) as f:
360
- return json.dump(dict_config, f, ensure_ascii=False, indent=4)
361
-
362
-
363
- class Tracer:
364
- """
365
- - custom module-ignore rules
366
- - trace calls and returns
367
- - exclude first, then include
368
- - usage: use in source code
369
- - tracer = util.Tracer(exclude_funcname_pattern='stop')
370
- - tracer.start()
371
- - # add traceable code here
372
- - tracer.stop()
373
- """
374
-
375
- def __init__(self,
376
- excluded_modules: set[str] = None,
377
- exclude_filename_pattern: str = None,
378
- include_filename_pattern: str = None,
379
- exclude_funcname_pattern: str = None,
380
- include_funcname_pattern: str = None,
381
- trace_func=None,
382
- exclude_builtins=True):
383
- self.exclMods = {'builtins'} if excluded_modules is None else excluded_modules
384
- self.exclFilePatt = re.compile(exclude_filename_pattern) if exclude_filename_pattern else None
385
- self.inclFilePatt = re.compile(include_filename_pattern) if include_filename_pattern else None
386
- self.exclFuncPatt = re.compile(exclude_funcname_pattern) if exclude_funcname_pattern else None
387
- self.inclFuncPatt = re.compile(include_funcname_pattern) if include_funcname_pattern else None
388
- self.traceFunc = trace_func
389
- if exclude_builtins:
390
- self.ignore_stdlibs()
391
-
392
- def start(self):
393
- sys.settrace(self.traceFunc or self._trace_calls_and_returns)
394
-
395
- @staticmethod
396
- def stop():
397
- sys.settrace(None)
618
+ return sys.version_info[0] > 2
398
619
 
399
- def ignore_stdlibs(self):
400
- def _get_stdlib_module_names():
401
- import distutils.sysconfig
402
- stdlib_dir = distutils.sysconfig.get_python_lib(standard_lib=True)
403
- return {f.replace(".py", "") for f in os.listdir(stdlib_dir)}
404
620
 
405
- py_ver = sys.version_info
406
- std_libs = set(sys.stdlib_module_names) if py_ver.major >= 3 and py_ver.minor >= 10 else _get_stdlib_module_names()
407
- self.exclMods.update(std_libs)
621
+ def load_json(path, as_namespace=False, encoding=TXT_CODEC):
622
+ """
623
+ - Load Json configuration file.
624
+ - supports UTF-8 only, due to no way to support mixed encodings
625
+ - most usecases involve either utf-8 or mixed encodings
626
+ - windows users must fix their region and localization setup via control panel
627
+ """
628
+ with open(path, 'r', encoding=encoding, errors='backslashreplace', newline=None) as f:
629
+ text = f.read()
630
+ return json.loads(text) if not as_namespace else json.loads(text, object_hook=lambda d: SimpleNamespace(**d))
408
631
 
409
- def _trace_calls_and_returns(self, frame, event, arg):
410
- """
411
- track hook for function calls. Usage:
412
- sys.settrace(trace_calls_and_returns)
413
- """
414
- if event not in ('call', 'return'):
415
- return
416
- module_name = frame.f_globals.get('__name__')
417
- if module_name is not None and module_name in self.exclMods:
418
- return
419
- filename = frame.f_code.co_filename
420
- if self.exclFilePatt and self.exclFuncPatt.search(filename):
421
- return
422
- if self.inclFilePatt and not self.inclFilePatt.search(filename):
423
- return
424
- func_name = frame.f_code.co_name
425
- if self.exclFuncPatt and self.exclFuncPatt.search(func_name):
426
- return
427
- if self.inclFuncPatt and not self.inclFuncPatt.search(func_name):
428
- return
429
- line_number = frame.f_lineno
430
- line = linecache.getline(filename, line_number).strip()
431
- if event == 'call':
432
- args = ', '.join(f'{arg}={repr(frame.f_locals[arg])}' for arg in frame.f_code.co_varnames[:frame.f_code.co_argcount])
433
- print(f'Call: {module_name}.{func_name}({args}) - {line}')
434
- return self._trace_calls_and_returns
435
- print(f'Call: {module_name}.{func_name} => {arg} - {line}')
632
+
633
+ def save_json(path, config, encoding=TXT_CODEC):
634
+ """
635
+ Use io.open(), aka open() with py3 to produce a file object that encodes
636
+ Unicode as you write, then use json.dump() to write to that file.
637
+ Validate keys to avoid JSON and program out-of-sync.
638
+ """
639
+ dict_config = vars(config) if isinstance(config, types.SimpleNamespace) else config
640
+ par_dir = osp.split(path)[0]
641
+ os.makedirs(par_dir, exist_ok=True)
642
+ with open(path, 'w', encoding=encoding) as f:
643
+ return json.dump(dict_config, f, ensure_ascii=False, indent=4)
436
644
 
437
645
 
438
646
  def get_md5_checksum(file):
@@ -776,109 +984,6 @@ def match_files_except_lines(file1, file2, excluded=None):
776
984
  return content1 == content2
777
985
 
778
986
 
779
- class RerunLock:
780
- """
781
- - Lock process from reentering when seeing lock file on disk
782
- - use semaphore-like behaviour with an instance limit
783
- - Because lockfile is created by pyutil, we also save the occupier pid and .py path (name) in it
784
- - if name is a path, e.g., __file__, then lockfile will be named after its basename
785
- """
786
-
787
- def __init__(self, name, folder=None, logger=None, max_instances=1):
788
- folder = folder or osp.join(get_platform_tmp_dir(), '_util')
789
- filename = f'lock_{extract_path_stem(name)}.{os.getpid()}.lock.json'
790
- self.name = name
791
- self.lockFile = osp.join(folder, filename)
792
- self.nMaxInstances = max_instances
793
- self.logger = logger or glogger
794
- # CAUTION:
795
- # - windows grpc server crashes with signals:
796
- # - ValueError: signal only works in main thread of the main interpreter
797
- # - signals are disabled for windows
798
- if threading.current_thread() is threading.main_thread():
799
- common_sigs = [
800
- signal.SIGABRT,
801
- signal.SIGFPE,
802
- signal.SIGILL,
803
- signal.SIGINT,
804
- signal.SIGSEGV,
805
- signal.SIGTERM,
806
- ]
807
- plat_sigs = [
808
- signal.SIGBREAK,
809
- # CAUTION
810
- # - CTRL_C_EVENT, CTRL_BREAK_EVENT not working on Windows
811
- # signal.CTRL_C_EVENT,
812
- # signal.CTRL_BREAK_EVENT,
813
- ] if PLATFORM == 'Windows' else [
814
- # CAUTION:
815
- # - SIGCHLD as an alias is safe to ignore
816
- # - SIGKILL must be handled by os.kill()
817
- signal.SIGALRM,
818
- signal.SIGBUS,
819
- # signal.SIGCHLD,
820
- # - SIGCONT: CTRL+Z is allowed for bg process
821
- # signal.SIGCONT,
822
- signal.SIGHUP,
823
- # signal.SIGKILL,
824
- signal.SIGPIPE,
825
- ]
826
- for sig in common_sigs + plat_sigs:
827
- signal.signal(sig, self.handle_signal)
828
- # cleanup zombie locks due to runtime exceptions
829
- locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
830
- zombie_locks = [lock for lock in locks if not is_pid_running(int(lock.split(".")[1]))]
831
- for lock in zombie_locks:
832
- safe_remove(osp.join(osp.dirname(self.lockFile), lock))
833
-
834
- def lock(self):
835
- locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
836
- is_locked = len(locks) >= self.nMaxInstances
837
- if is_locked:
838
- locker_pids = [int(lock.split(".")[1]) for lock in locks]
839
- self.logger.warning(f'{self.name} is locked by processes: {locker_pids}. Will block new instances until unlocked.')
840
- return False
841
- save_json(self.lockFile, {
842
- 'pid': os.getpid(),
843
- 'name': self.name,
844
- })
845
- # CAUTION: race condition: saving needs a sec, it's up to application to await lockfile
846
- return True
847
-
848
- def unlock(self):
849
- try:
850
- os.remove(self.lockFile)
851
- except FileNotFoundError:
852
- self.logger.warning(f'{self.name} already unlocked. Safely ignored.')
853
- return False
854
- except Exception:
855
- failure = traceback.format_exc()
856
- self.logger.error(f""""\
857
- Failed to unlock {self.name}:
858
- Details:
859
- {failure}
860
-
861
- Advice:
862
- - Delete the lock by hand: {self.lockFile}""")
863
- return False
864
- return True
865
-
866
- def unlock_all(self):
867
- locks = glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{osp.basename(self.name)}.*.lock.json'))
868
- for lock in locks:
869
- os.remove(lock)
870
- return True
871
-
872
- def is_locked(self):
873
- return osp.isfile(self.lockFile)
874
-
875
- def handle_signal(self, sig, frame):
876
- msg = f'Terminated due to signal: {signal.Signals(sig).name}; Will unlock'
877
- self.logger.warning(msg)
878
- self.unlock()
879
- raise RuntimeError(msg)
880
-
881
-
882
987
  def rerun_lock(name, folder=None, logger=glogger, max_instances=1):
883
988
  """Decorator for reentrance locking on functions"""
884
989
 
@@ -2534,75 +2639,6 @@ def inspect_obj(obj):
2534
2639
  return {'type': type_name, 'attrs': attrs, 'repr': repr(obj), 'details': details}
2535
2640
 
2536
2641
 
2537
- class Cache:
2538
- """
2539
- cross-session caching: using temp-file to retrieve data based on hash changes
2540
- - constraints:
2541
- - data retrieval/parsing is expensive
2542
- - one cache per data-source
2543
- - cache is a mediator b/w app and data-source as a retriever only, cuz user's saving intent is always towards source, no need to cache a saving action
2544
- - for cross-session caching, save hash into cache, then when instantiate cache object, always load hash from cache to compare with incoming hash
2545
- - app must provide retriever function: retriever(src) -> json_data
2546
- - because it'd cost the same to retrieve data from a json-file source as from cache, so no json default is provided
2547
- - e.g., loading a complex tree-structure from a file:
2548
- - tree_cache = Cache('/path/to/file.tree', lambda: src: load_data(src), '/tmp/my_app')
2549
- - # ... later
2550
- - cached_tree_data = tree_cache.retrieve()
2551
- """
2552
-
2553
- def __init__(self, data_source, data_retriever, cache_dir=get_platform_tmp_dir(), cache_type='cache', algo='checksum', source_seed='6ba7b810-9dad-11d1-80b4-00c04fd430c8'):
2554
- assert algo in ['checksum', 'mtime']
2555
- self.srcURL = data_source
2556
- self.retriever = data_retriever
2557
- # use a fixed namespace for each data-source to ensure inter-session consistency
2558
- namespace = uuid.UUID(str(source_seed))
2559
- uid = str(uuid.uuid5(namespace, self.srcURL))
2560
- self.cacheFile = osp.join(cache_dir, f'{uid}.{cache_type}.json')
2561
- self.hashAlgo = algo
2562
- # first comparison needs
2563
- self.prevSrcHash = load_json(self.cacheFile).get('hash') if osp.isfile(self.cacheFile) else None
2564
-
2565
- def retrieve(self):
2566
- if self._compare_hash():
2567
- return self.update()
2568
- return load_json(self.cacheFile)['data']
2569
-
2570
- def update(self):
2571
- """
2572
- - update cache directly
2573
- - useful when app needs to force update cache
2574
- """
2575
- data = self.retriever(self.srcURL)
2576
- container = {
2577
- 'data': data,
2578
- 'hash': self.prevSrcHash,
2579
- }
2580
- save_json(self.cacheFile, container)
2581
- return data
2582
-
2583
- def _compare_hash(self):
2584
- in_src_hash = self._compute_hash()
2585
- if changed := in_src_hash != self.prevSrcHash or self.prevSrcHash is None:
2586
- self.prevSrcHash = in_src_hash
2587
- return changed
2588
-
2589
- def _compute_hash(self):
2590
- hash_algo_map = {
2591
- 'checksum': self._compute_hash_as_checksum,
2592
- 'mtime': self._compute_hash_as_modified_time,
2593
- }
2594
- return hash_algo_map[self.hashAlgo]()
2595
-
2596
- def _compute_hash_as_checksum(self):
2597
- return get_md5_checksum(self.srcURL)
2598
-
2599
- def _compute_hash_as_modified_time(self):
2600
- try:
2601
- return osp.getmtime(self.srcURL)
2602
- except FileNotFoundError:
2603
- return None
2604
-
2605
-
2606
2642
  def mem_caching(maxsize=None):
2607
2643
  """
2608
2644
  - per-process lru caching for multiple data sources
@@ -2844,18 +2880,6 @@ def indent(code_or_lines, spaces_per_indent=4):
2844
2880
  return '\n'.join(indented) if isinstance(code_or_lines, str) else indented
2845
2881
 
2846
2882
 
2847
- def find_log_path(logger):
2848
- """
2849
- - logger must be a python logger
2850
- """
2851
- for handler in logger.handlers:
2852
- if isinstance(handler, logging.FileHandler):
2853
- return handler.baseFilename
2854
- # use next() to get the first handler
2855
- # if not found, raise StopIteration
2856
- return next(filter(lambda h: isinstance(h, logging.FileHandler), logger.handlers), None)
2857
-
2858
-
2859
2883
  def collect_file_tree(root):
2860
2884
  return [file for file in glob.glob(osp.join(root, '**'), recursive=True) if osp.isfile(file)]
2861
2885
 
@@ -2951,27 +2975,7 @@ def json_from_text(json_str):
2951
2975
  except json.JSONDecodeError as e:
2952
2976
  return None, e
2953
2977
 
2954
-
2955
- class OfflineJSON:
2956
- def __init__(self, file_path):
2957
- self.path = file_path
2958
-
2959
- def exists(self):
2960
- return osp.isfile(self.path)
2961
-
2962
- def load(self):
2963
- return load_json(self.path) if self.exists() else None
2964
-
2965
- def save(self, data: dict):
2966
- save_json(self.path, data)
2967
-
2968
- def merge(self, props: dict):
2969
- data = self.load()
2970
- if not data:
2971
- return self.save(props)
2972
- data.update(props)
2973
- self.save(data)
2974
- return data
2978
+ # endregion
2975
2979
 
2976
2980
 
2977
2981
  def _test():
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "kkpyutil"
3
- version = "1.40.0"
3
+ version = "1.41.0"
4
4
  description = "Building blocks for sysadmin and DevOps"
5
5
  authors = ["Beinan Li <li.beinan@gmail.com>"]
6
6
  maintainers = ["Beinan Li <li.beinan@gmail.com>"]
File without changes
File without changes