kkpyutil 1.40.0__tar.gz → 1.41.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/PKG-INFO +1 -1
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/kkpyutil.py +332 -328
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/pyproject.toml +1 -1
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/LICENSE +0 -0
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/README.md +0 -0
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/kkpyutil_helper/windows/kkttssave.ps1 +0 -0
- {kkpyutil-1.40.0 → kkpyutil-1.41.0}/kkpyutil_helper/windows/kkttsspeak.ps1 +0 -0
|
@@ -58,9 +58,8 @@ import uuid
|
|
|
58
58
|
import warnings
|
|
59
59
|
from types import SimpleNamespace
|
|
60
60
|
|
|
61
|
-
#
|
|
62
|
-
|
|
63
|
-
#
|
|
61
|
+
# region globals
|
|
62
|
+
|
|
64
63
|
_script_dir = osp.abspath(osp.dirname(__file__))
|
|
65
64
|
TXT_CODEC = 'utf-8' # Importable.
|
|
66
65
|
LOCALE_CODEC = locale.getpreferredencoding()
|
|
@@ -70,23 +69,30 @@ PLATFORM = platform.system()
|
|
|
70
69
|
if PLATFORM == 'Windows':
|
|
71
70
|
import winreg
|
|
72
71
|
|
|
72
|
+
# endregion
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# region classes
|
|
73
76
|
|
|
74
|
-
class
|
|
77
|
+
class ClassicSingleton:
|
|
78
|
+
def __new__(cls):
|
|
79
|
+
if not hasattr(cls, 'instance'):
|
|
80
|
+
cls.instance = super(ClassicSingleton, cls).__new__(cls)
|
|
81
|
+
return cls.instance
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class BorgSingleton:
|
|
75
85
|
"""
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
myobj = SingletonDecorator(MyClass, args, kwargs)
|
|
86
|
+
- Borg pattern: all instances share the same state, but not the same identity
|
|
87
|
+
- override _shared_borg_state to avoid child polluting states of parent instances
|
|
88
|
+
- ref: https://www.geeksforgeeks.org/singleton-pattern-in-python-a-complete-guide/
|
|
80
89
|
"""
|
|
90
|
+
_shared_borg_state = {}
|
|
81
91
|
|
|
82
|
-
def
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def __call__(self, *args, **kwargs):
|
|
87
|
-
if self.instance is None:
|
|
88
|
-
self.instance = self.klass(*args, **kwargs)
|
|
89
|
-
return self.instance
|
|
92
|
+
def __new__(cls, *args, **kwargs):
|
|
93
|
+
obj = super(BorgSingleton, cls).__new__(cls, *args, **kwargs)
|
|
94
|
+
obj.__dict__ = cls._shared_borg_state
|
|
95
|
+
return obj
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
class LowPassLogFilter(object):
|
|
@@ -127,6 +133,288 @@ class BandPassLogFilter(object):
|
|
|
127
133
|
return self.__levelbounds[0] <= log.levelno <= self.__levelbounds[1]
|
|
128
134
|
|
|
129
135
|
|
|
136
|
+
class OfflineJSON:
|
|
137
|
+
def __init__(self, file_path):
|
|
138
|
+
self.path = file_path
|
|
139
|
+
|
|
140
|
+
def exists(self):
|
|
141
|
+
return osp.isfile(self.path)
|
|
142
|
+
|
|
143
|
+
def load(self):
|
|
144
|
+
return load_json(self.path) if self.exists() else None
|
|
145
|
+
|
|
146
|
+
def save(self, data: dict):
|
|
147
|
+
save_json(self.path, data)
|
|
148
|
+
|
|
149
|
+
def merge(self, props: dict):
|
|
150
|
+
data = self.load()
|
|
151
|
+
if not data:
|
|
152
|
+
return self.save(props)
|
|
153
|
+
data.update(props)
|
|
154
|
+
self.save(data)
|
|
155
|
+
return data
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_platform_tmp_dir():
|
|
159
|
+
plat_dir_map = {
|
|
160
|
+
'Windows': osp.join(str(os.getenv('LOCALAPPDATA')), 'Temp'),
|
|
161
|
+
'Darwin': osp.expanduser('~/Library/Caches'),
|
|
162
|
+
'Linux': '/tmp'
|
|
163
|
+
}
|
|
164
|
+
return plat_dir_map.get(PLATFORM)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class RerunLock:
|
|
168
|
+
"""
|
|
169
|
+
- Lock process from reentering when seeing lock file on disk
|
|
170
|
+
- use semaphore-like behaviour with an instance limit
|
|
171
|
+
- Because lockfile is created by pyutil, we also save the occupier pid and .py path (name) in it
|
|
172
|
+
- if name is a path, e.g., __file__, then lockfile will be named after its basename
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(self, name, folder=None, logger=None, max_instances=1):
|
|
176
|
+
folder = folder or osp.join(get_platform_tmp_dir(), '_util')
|
|
177
|
+
filename = f'lock_{extract_path_stem(name)}.{os.getpid()}.lock.json'
|
|
178
|
+
self.name = name
|
|
179
|
+
self.lockFile = osp.join(folder, filename)
|
|
180
|
+
self.nMaxInstances = max_instances
|
|
181
|
+
self.logger = logger or glogger
|
|
182
|
+
# CAUTION:
|
|
183
|
+
# - windows grpc server crashes with signals:
|
|
184
|
+
# - ValueError: signal only works in main thread of the main interpreter
|
|
185
|
+
# - signals are disabled for windows
|
|
186
|
+
if threading.current_thread() is threading.main_thread():
|
|
187
|
+
common_sigs = [
|
|
188
|
+
signal.SIGABRT,
|
|
189
|
+
signal.SIGFPE,
|
|
190
|
+
signal.SIGILL,
|
|
191
|
+
signal.SIGINT,
|
|
192
|
+
signal.SIGSEGV,
|
|
193
|
+
signal.SIGTERM,
|
|
194
|
+
]
|
|
195
|
+
plat_sigs = [
|
|
196
|
+
signal.SIGBREAK,
|
|
197
|
+
# CAUTION
|
|
198
|
+
# - CTRL_C_EVENT, CTRL_BREAK_EVENT not working on Windows
|
|
199
|
+
# signal.CTRL_C_EVENT,
|
|
200
|
+
# signal.CTRL_BREAK_EVENT,
|
|
201
|
+
] if PLATFORM == 'Windows' else [
|
|
202
|
+
# CAUTION:
|
|
203
|
+
# - SIGCHLD as an alias is safe to ignore
|
|
204
|
+
# - SIGKILL must be handled by os.kill()
|
|
205
|
+
signal.SIGALRM,
|
|
206
|
+
signal.SIGBUS,
|
|
207
|
+
# signal.SIGCHLD,
|
|
208
|
+
# - SIGCONT: CTRL+Z is allowed for bg process
|
|
209
|
+
# signal.SIGCONT,
|
|
210
|
+
signal.SIGHUP,
|
|
211
|
+
# signal.SIGKILL,
|
|
212
|
+
signal.SIGPIPE,
|
|
213
|
+
]
|
|
214
|
+
for sig in common_sigs + plat_sigs:
|
|
215
|
+
signal.signal(sig, self.handle_signal)
|
|
216
|
+
# cleanup zombie locks due to runtime exceptions
|
|
217
|
+
locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
|
|
218
|
+
zombie_locks = [lock for lock in locks if not is_pid_running(int(lock.split(".")[1]))]
|
|
219
|
+
for lock in zombie_locks:
|
|
220
|
+
safe_remove(osp.join(osp.dirname(self.lockFile), lock))
|
|
221
|
+
|
|
222
|
+
def lock(self):
|
|
223
|
+
locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
|
|
224
|
+
is_locked = len(locks) >= self.nMaxInstances
|
|
225
|
+
if is_locked:
|
|
226
|
+
locker_pids = [int(lock.split(".")[1]) for lock in locks]
|
|
227
|
+
self.logger.warning(f'{self.name} is locked by processes: {locker_pids}. Will block new instances until unlocked.')
|
|
228
|
+
return False
|
|
229
|
+
save_json(self.lockFile, {
|
|
230
|
+
'pid': os.getpid(),
|
|
231
|
+
'name': self.name,
|
|
232
|
+
})
|
|
233
|
+
# CAUTION: race condition: saving needs a sec, it's up to application to await lockfile
|
|
234
|
+
return True
|
|
235
|
+
|
|
236
|
+
def unlock(self):
|
|
237
|
+
try:
|
|
238
|
+
os.remove(self.lockFile)
|
|
239
|
+
except FileNotFoundError:
|
|
240
|
+
self.logger.warning(f'{self.name} already unlocked. Safely ignored.')
|
|
241
|
+
return False
|
|
242
|
+
except Exception:
|
|
243
|
+
failure = traceback.format_exc()
|
|
244
|
+
self.logger.error(f""""\
|
|
245
|
+
Failed to unlock {self.name}:
|
|
246
|
+
Details:
|
|
247
|
+
{failure}
|
|
248
|
+
|
|
249
|
+
Advice:
|
|
250
|
+
- Delete the lock by hand: {self.lockFile}""")
|
|
251
|
+
return False
|
|
252
|
+
return True
|
|
253
|
+
|
|
254
|
+
def unlock_all(self):
|
|
255
|
+
locks = glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{osp.basename(self.name)}.*.lock.json'))
|
|
256
|
+
for lock in locks:
|
|
257
|
+
os.remove(lock)
|
|
258
|
+
return True
|
|
259
|
+
|
|
260
|
+
def is_locked(self):
|
|
261
|
+
return osp.isfile(self.lockFile)
|
|
262
|
+
|
|
263
|
+
def handle_signal(self, sig, frame):
|
|
264
|
+
msg = f'Terminated due to signal: {signal.Signals(sig).name}; Will unlock'
|
|
265
|
+
self.logger.warning(msg)
|
|
266
|
+
self.unlock()
|
|
267
|
+
raise RuntimeError(msg)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class Tracer:
|
|
271
|
+
"""
|
|
272
|
+
- custom module-ignore rules
|
|
273
|
+
- trace calls and returns
|
|
274
|
+
- exclude first, then include
|
|
275
|
+
- usage: use in source code
|
|
276
|
+
- tracer = util.Tracer(exclude_funcname_pattern='stop')
|
|
277
|
+
- tracer.start()
|
|
278
|
+
- # add traceable code here
|
|
279
|
+
- tracer.stop()
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
def __init__(self,
|
|
283
|
+
excluded_modules: set[str] = None,
|
|
284
|
+
exclude_filename_pattern: str = None,
|
|
285
|
+
include_filename_pattern: str = None,
|
|
286
|
+
exclude_funcname_pattern: str = None,
|
|
287
|
+
include_funcname_pattern: str = None,
|
|
288
|
+
trace_func=None,
|
|
289
|
+
exclude_builtins=True):
|
|
290
|
+
self.exclMods = {'builtins'} if excluded_modules is None else excluded_modules
|
|
291
|
+
self.exclFilePatt = re.compile(exclude_filename_pattern) if exclude_filename_pattern else None
|
|
292
|
+
self.inclFilePatt = re.compile(include_filename_pattern) if include_filename_pattern else None
|
|
293
|
+
self.exclFuncPatt = re.compile(exclude_funcname_pattern) if exclude_funcname_pattern else None
|
|
294
|
+
self.inclFuncPatt = re.compile(include_funcname_pattern) if include_funcname_pattern else None
|
|
295
|
+
self.traceFunc = trace_func
|
|
296
|
+
if exclude_builtins:
|
|
297
|
+
self.ignore_stdlibs()
|
|
298
|
+
|
|
299
|
+
def start(self):
|
|
300
|
+
sys.settrace(self.traceFunc or self._trace_calls_and_returns)
|
|
301
|
+
|
|
302
|
+
@staticmethod
|
|
303
|
+
def stop():
|
|
304
|
+
sys.settrace(None)
|
|
305
|
+
|
|
306
|
+
def ignore_stdlibs(self):
|
|
307
|
+
def _get_stdlib_module_names():
|
|
308
|
+
import distutils.sysconfig
|
|
309
|
+
stdlib_dir = distutils.sysconfig.get_python_lib(standard_lib=True)
|
|
310
|
+
return {f.replace(".py", "") for f in os.listdir(stdlib_dir)}
|
|
311
|
+
|
|
312
|
+
py_ver = sys.version_info
|
|
313
|
+
std_libs = set(sys.stdlib_module_names) if py_ver.major >= 3 and py_ver.minor >= 10 else _get_stdlib_module_names()
|
|
314
|
+
self.exclMods.update(std_libs)
|
|
315
|
+
|
|
316
|
+
def _trace_calls_and_returns(self, frame, event, arg):
|
|
317
|
+
"""
|
|
318
|
+
track hook for function calls. Usage:
|
|
319
|
+
sys.settrace(trace_calls_and_returns)
|
|
320
|
+
"""
|
|
321
|
+
if event not in ('call', 'return'):
|
|
322
|
+
return
|
|
323
|
+
module_name = frame.f_globals.get('__name__')
|
|
324
|
+
if module_name is not None and module_name in self.exclMods:
|
|
325
|
+
return
|
|
326
|
+
filename = frame.f_code.co_filename
|
|
327
|
+
if self.exclFilePatt and self.exclFuncPatt.search(filename):
|
|
328
|
+
return
|
|
329
|
+
if self.inclFilePatt and not self.inclFilePatt.search(filename):
|
|
330
|
+
return
|
|
331
|
+
func_name = frame.f_code.co_name
|
|
332
|
+
if self.exclFuncPatt and self.exclFuncPatt.search(func_name):
|
|
333
|
+
return
|
|
334
|
+
if self.inclFuncPatt and not self.inclFuncPatt.search(func_name):
|
|
335
|
+
return
|
|
336
|
+
line_number = frame.f_lineno
|
|
337
|
+
line = linecache.getline(filename, line_number).strip()
|
|
338
|
+
if event == 'call':
|
|
339
|
+
args = ', '.join(f'{arg}={repr(frame.f_locals[arg])}' for arg in frame.f_code.co_varnames[:frame.f_code.co_argcount])
|
|
340
|
+
print(f'Call: {module_name}.{func_name}({args}) - {line}')
|
|
341
|
+
return self._trace_calls_and_returns
|
|
342
|
+
print(f'Call: {module_name}.{func_name} => {arg} - {line}')
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class Cache:
|
|
346
|
+
"""
|
|
347
|
+
cross-session caching: using temp-file to retrieve data based on hash changes
|
|
348
|
+
- constraints:
|
|
349
|
+
- data retrieval/parsing is expensive
|
|
350
|
+
- one cache per data-source
|
|
351
|
+
- cache is a mediator b/w app and data-source as a retriever only, cuz user's saving intent is always towards source, no need to cache a saving action
|
|
352
|
+
- for cross-session caching, save hash into cache, then when instantiate cache object, always load hash from cache to compare with incoming hash
|
|
353
|
+
- app must provide retriever function: retriever(src) -> json_data
|
|
354
|
+
- because it'd cost the same to retrieve data from a json-file source as from cache, so no json default is provided
|
|
355
|
+
- e.g., loading a complex tree-structure from a file:
|
|
356
|
+
- tree_cache = Cache('/path/to/file.tree', lambda: src: load_data(src), '/tmp/my_app')
|
|
357
|
+
- # ... later
|
|
358
|
+
- cached_tree_data = tree_cache.retrieve()
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
def __init__(self, data_source, data_retriever, cache_dir=get_platform_tmp_dir(), cache_type='cache', algo='checksum', source_seed='6ba7b810-9dad-11d1-80b4-00c04fd430c8'):
|
|
362
|
+
assert algo in ['checksum', 'mtime']
|
|
363
|
+
self.srcURL = data_source
|
|
364
|
+
self.retriever = data_retriever
|
|
365
|
+
# use a fixed namespace for each data-source to ensure inter-session consistency
|
|
366
|
+
namespace = uuid.UUID(str(source_seed))
|
|
367
|
+
uid = str(uuid.uuid5(namespace, self.srcURL))
|
|
368
|
+
self.cacheFile = osp.join(cache_dir, f'{uid}.{cache_type}.json')
|
|
369
|
+
self.hashAlgo = algo
|
|
370
|
+
# first comparison needs
|
|
371
|
+
self.prevSrcHash = load_json(self.cacheFile).get('hash') if osp.isfile(self.cacheFile) else None
|
|
372
|
+
|
|
373
|
+
def retrieve(self):
|
|
374
|
+
if self._compare_hash():
|
|
375
|
+
return self.update()
|
|
376
|
+
return load_json(self.cacheFile)['data']
|
|
377
|
+
|
|
378
|
+
def update(self):
|
|
379
|
+
"""
|
|
380
|
+
- update cache directly
|
|
381
|
+
- useful when app needs to force update cache
|
|
382
|
+
"""
|
|
383
|
+
data = self.retriever(self.srcURL)
|
|
384
|
+
container = {
|
|
385
|
+
'data': data,
|
|
386
|
+
'hash': self.prevSrcHash,
|
|
387
|
+
}
|
|
388
|
+
save_json(self.cacheFile, container)
|
|
389
|
+
return data
|
|
390
|
+
|
|
391
|
+
def _compare_hash(self):
|
|
392
|
+
in_src_hash = self._compute_hash()
|
|
393
|
+
if changed := in_src_hash != self.prevSrcHash or self.prevSrcHash is None:
|
|
394
|
+
self.prevSrcHash = in_src_hash
|
|
395
|
+
return changed
|
|
396
|
+
|
|
397
|
+
def _compute_hash(self):
|
|
398
|
+
hash_algo_map = {
|
|
399
|
+
'checksum': self._compute_hash_as_checksum,
|
|
400
|
+
'mtime': self._compute_hash_as_modified_time,
|
|
401
|
+
}
|
|
402
|
+
return hash_algo_map[self.hashAlgo]()
|
|
403
|
+
|
|
404
|
+
def _compute_hash_as_checksum(self):
|
|
405
|
+
return get_md5_checksum(self.srcURL)
|
|
406
|
+
|
|
407
|
+
def _compute_hash_as_modified_time(self):
|
|
408
|
+
try:
|
|
409
|
+
return osp.getmtime(self.srcURL)
|
|
410
|
+
except FileNotFoundError:
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
# endregion
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# region functions
|
|
417
|
+
|
|
130
418
|
def get_platform_home_dir():
|
|
131
419
|
home_envvar = 'USERPROFILE' if PLATFORM == 'Windows' else 'HOME'
|
|
132
420
|
return os.getenv(home_envvar)
|
|
@@ -141,15 +429,6 @@ def get_platform_appdata_dir(winroam=True):
|
|
|
141
429
|
return plat_dir_map.get(PLATFORM)
|
|
142
430
|
|
|
143
431
|
|
|
144
|
-
def get_platform_tmp_dir():
|
|
145
|
-
plat_dir_map = {
|
|
146
|
-
'Windows': osp.join(str(os.getenv('LOCALAPPDATA')), 'Temp'),
|
|
147
|
-
'Darwin': osp.expanduser('~/Library/Caches'),
|
|
148
|
-
'Linux': '/tmp'
|
|
149
|
-
}
|
|
150
|
-
return plat_dir_map.get(PLATFORM)
|
|
151
|
-
|
|
152
|
-
|
|
153
432
|
def get_posix_shell_cfgfile():
|
|
154
433
|
return os.path.expanduser('~/.bash_profile' if os.getenv('SHELL') == '/bin/bash' else '~/.zshrc')
|
|
155
434
|
|
|
@@ -232,6 +511,10 @@ def build_default_logger(logdir, name=None, verbose=False):
|
|
|
232
511
|
return logging.getLogger(name or 'default')
|
|
233
512
|
|
|
234
513
|
|
|
514
|
+
def find_log_path(logger):
|
|
515
|
+
return next((handler.baseFilename for handler in logger.handlers if isinstance(handler, logging.FileHandler)), None)
|
|
516
|
+
|
|
517
|
+
|
|
235
518
|
glogger = build_default_logger(logdir=osp.join(get_platform_tmp_dir(), '_util'), name='util', verbose=True)
|
|
236
519
|
glogger.setLevel(logging.DEBUG)
|
|
237
520
|
|
|
@@ -332,107 +615,32 @@ def throw(err_cls, detail, advice):
|
|
|
332
615
|
|
|
333
616
|
|
|
334
617
|
def is_python3():
|
|
335
|
-
return sys.version_info[0] > 2
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def load_json(path, as_namespace=False, encoding=TXT_CODEC):
|
|
339
|
-
"""
|
|
340
|
-
- Load Json configuration file.
|
|
341
|
-
- supports UTF-8 only, due to no way to support mixed encodings
|
|
342
|
-
- most usecases involve either utf-8 or mixed encodings
|
|
343
|
-
- windows users must fix their region and localization setup via control panel
|
|
344
|
-
"""
|
|
345
|
-
with open(path, 'r', encoding=encoding, errors='backslashreplace', newline=None) as f:
|
|
346
|
-
text = f.read()
|
|
347
|
-
return json.loads(text) if not as_namespace else json.loads(text, object_hook=lambda d: SimpleNamespace(**d))
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def save_json(path, config, encoding=TXT_CODEC):
|
|
351
|
-
"""
|
|
352
|
-
Use io.open(), aka open() with py3 to produce a file object that encodes
|
|
353
|
-
Unicode as you write, then use json.dump() to write to that file.
|
|
354
|
-
Validate keys to avoid JSON and program out-of-sync.
|
|
355
|
-
"""
|
|
356
|
-
dict_config = vars(config) if isinstance(config, types.SimpleNamespace) else config
|
|
357
|
-
par_dir = osp.split(path)[0]
|
|
358
|
-
os.makedirs(par_dir, exist_ok=True)
|
|
359
|
-
with open(path, 'w', encoding=encoding) as f:
|
|
360
|
-
return json.dump(dict_config, f, ensure_ascii=False, indent=4)
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
class Tracer:
|
|
364
|
-
"""
|
|
365
|
-
- custom module-ignore rules
|
|
366
|
-
- trace calls and returns
|
|
367
|
-
- exclude first, then include
|
|
368
|
-
- usage: use in source code
|
|
369
|
-
- tracer = util.Tracer(exclude_funcname_pattern='stop')
|
|
370
|
-
- tracer.start()
|
|
371
|
-
- # add traceable code here
|
|
372
|
-
- tracer.stop()
|
|
373
|
-
"""
|
|
374
|
-
|
|
375
|
-
def __init__(self,
|
|
376
|
-
excluded_modules: set[str] = None,
|
|
377
|
-
exclude_filename_pattern: str = None,
|
|
378
|
-
include_filename_pattern: str = None,
|
|
379
|
-
exclude_funcname_pattern: str = None,
|
|
380
|
-
include_funcname_pattern: str = None,
|
|
381
|
-
trace_func=None,
|
|
382
|
-
exclude_builtins=True):
|
|
383
|
-
self.exclMods = {'builtins'} if excluded_modules is None else excluded_modules
|
|
384
|
-
self.exclFilePatt = re.compile(exclude_filename_pattern) if exclude_filename_pattern else None
|
|
385
|
-
self.inclFilePatt = re.compile(include_filename_pattern) if include_filename_pattern else None
|
|
386
|
-
self.exclFuncPatt = re.compile(exclude_funcname_pattern) if exclude_funcname_pattern else None
|
|
387
|
-
self.inclFuncPatt = re.compile(include_funcname_pattern) if include_funcname_pattern else None
|
|
388
|
-
self.traceFunc = trace_func
|
|
389
|
-
if exclude_builtins:
|
|
390
|
-
self.ignore_stdlibs()
|
|
391
|
-
|
|
392
|
-
def start(self):
|
|
393
|
-
sys.settrace(self.traceFunc or self._trace_calls_and_returns)
|
|
394
|
-
|
|
395
|
-
@staticmethod
|
|
396
|
-
def stop():
|
|
397
|
-
sys.settrace(None)
|
|
618
|
+
return sys.version_info[0] > 2
|
|
398
619
|
|
|
399
|
-
def ignore_stdlibs(self):
|
|
400
|
-
def _get_stdlib_module_names():
|
|
401
|
-
import distutils.sysconfig
|
|
402
|
-
stdlib_dir = distutils.sysconfig.get_python_lib(standard_lib=True)
|
|
403
|
-
return {f.replace(".py", "") for f in os.listdir(stdlib_dir)}
|
|
404
620
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
621
|
+
def load_json(path, as_namespace=False, encoding=TXT_CODEC):
|
|
622
|
+
"""
|
|
623
|
+
- Load Json configuration file.
|
|
624
|
+
- supports UTF-8 only, due to no way to support mixed encodings
|
|
625
|
+
- most usecases involve either utf-8 or mixed encodings
|
|
626
|
+
- windows users must fix their region and localization setup via control panel
|
|
627
|
+
"""
|
|
628
|
+
with open(path, 'r', encoding=encoding, errors='backslashreplace', newline=None) as f:
|
|
629
|
+
text = f.read()
|
|
630
|
+
return json.loads(text) if not as_namespace else json.loads(text, object_hook=lambda d: SimpleNamespace(**d))
|
|
408
631
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
return
|
|
422
|
-
if self.inclFilePatt and not self.inclFilePatt.search(filename):
|
|
423
|
-
return
|
|
424
|
-
func_name = frame.f_code.co_name
|
|
425
|
-
if self.exclFuncPatt and self.exclFuncPatt.search(func_name):
|
|
426
|
-
return
|
|
427
|
-
if self.inclFuncPatt and not self.inclFuncPatt.search(func_name):
|
|
428
|
-
return
|
|
429
|
-
line_number = frame.f_lineno
|
|
430
|
-
line = linecache.getline(filename, line_number).strip()
|
|
431
|
-
if event == 'call':
|
|
432
|
-
args = ', '.join(f'{arg}={repr(frame.f_locals[arg])}' for arg in frame.f_code.co_varnames[:frame.f_code.co_argcount])
|
|
433
|
-
print(f'Call: {module_name}.{func_name}({args}) - {line}')
|
|
434
|
-
return self._trace_calls_and_returns
|
|
435
|
-
print(f'Call: {module_name}.{func_name} => {arg} - {line}')
|
|
632
|
+
|
|
633
|
+
def save_json(path, config, encoding=TXT_CODEC):
|
|
634
|
+
"""
|
|
635
|
+
Use io.open(), aka open() with py3 to produce a file object that encodes
|
|
636
|
+
Unicode as you write, then use json.dump() to write to that file.
|
|
637
|
+
Validate keys to avoid JSON and program out-of-sync.
|
|
638
|
+
"""
|
|
639
|
+
dict_config = vars(config) if isinstance(config, types.SimpleNamespace) else config
|
|
640
|
+
par_dir = osp.split(path)[0]
|
|
641
|
+
os.makedirs(par_dir, exist_ok=True)
|
|
642
|
+
with open(path, 'w', encoding=encoding) as f:
|
|
643
|
+
return json.dump(dict_config, f, ensure_ascii=False, indent=4)
|
|
436
644
|
|
|
437
645
|
|
|
438
646
|
def get_md5_checksum(file):
|
|
@@ -776,109 +984,6 @@ def match_files_except_lines(file1, file2, excluded=None):
|
|
|
776
984
|
return content1 == content2
|
|
777
985
|
|
|
778
986
|
|
|
779
|
-
class RerunLock:
|
|
780
|
-
"""
|
|
781
|
-
- Lock process from reentering when seeing lock file on disk
|
|
782
|
-
- use semaphore-like behaviour with an instance limit
|
|
783
|
-
- Because lockfile is created by pyutil, we also save the occupier pid and .py path (name) in it
|
|
784
|
-
- if name is a path, e.g., __file__, then lockfile will be named after its basename
|
|
785
|
-
"""
|
|
786
|
-
|
|
787
|
-
def __init__(self, name, folder=None, logger=None, max_instances=1):
|
|
788
|
-
folder = folder or osp.join(get_platform_tmp_dir(), '_util')
|
|
789
|
-
filename = f'lock_{extract_path_stem(name)}.{os.getpid()}.lock.json'
|
|
790
|
-
self.name = name
|
|
791
|
-
self.lockFile = osp.join(folder, filename)
|
|
792
|
-
self.nMaxInstances = max_instances
|
|
793
|
-
self.logger = logger or glogger
|
|
794
|
-
# CAUTION:
|
|
795
|
-
# - windows grpc server crashes with signals:
|
|
796
|
-
# - ValueError: signal only works in main thread of the main interpreter
|
|
797
|
-
# - signals are disabled for windows
|
|
798
|
-
if threading.current_thread() is threading.main_thread():
|
|
799
|
-
common_sigs = [
|
|
800
|
-
signal.SIGABRT,
|
|
801
|
-
signal.SIGFPE,
|
|
802
|
-
signal.SIGILL,
|
|
803
|
-
signal.SIGINT,
|
|
804
|
-
signal.SIGSEGV,
|
|
805
|
-
signal.SIGTERM,
|
|
806
|
-
]
|
|
807
|
-
plat_sigs = [
|
|
808
|
-
signal.SIGBREAK,
|
|
809
|
-
# CAUTION
|
|
810
|
-
# - CTRL_C_EVENT, CTRL_BREAK_EVENT not working on Windows
|
|
811
|
-
# signal.CTRL_C_EVENT,
|
|
812
|
-
# signal.CTRL_BREAK_EVENT,
|
|
813
|
-
] if PLATFORM == 'Windows' else [
|
|
814
|
-
# CAUTION:
|
|
815
|
-
# - SIGCHLD as an alias is safe to ignore
|
|
816
|
-
# - SIGKILL must be handled by os.kill()
|
|
817
|
-
signal.SIGALRM,
|
|
818
|
-
signal.SIGBUS,
|
|
819
|
-
# signal.SIGCHLD,
|
|
820
|
-
# - SIGCONT: CTRL+Z is allowed for bg process
|
|
821
|
-
# signal.SIGCONT,
|
|
822
|
-
signal.SIGHUP,
|
|
823
|
-
# signal.SIGKILL,
|
|
824
|
-
signal.SIGPIPE,
|
|
825
|
-
]
|
|
826
|
-
for sig in common_sigs + plat_sigs:
|
|
827
|
-
signal.signal(sig, self.handle_signal)
|
|
828
|
-
# cleanup zombie locks due to runtime exceptions
|
|
829
|
-
locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
|
|
830
|
-
zombie_locks = [lock for lock in locks if not is_pid_running(int(lock.split(".")[1]))]
|
|
831
|
-
for lock in zombie_locks:
|
|
832
|
-
safe_remove(osp.join(osp.dirname(self.lockFile), lock))
|
|
833
|
-
|
|
834
|
-
def lock(self):
|
|
835
|
-
locks = [osp.basename(lock) for lock in glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{extract_path_stem(self.name)}.*.lock.json'))]
|
|
836
|
-
is_locked = len(locks) >= self.nMaxInstances
|
|
837
|
-
if is_locked:
|
|
838
|
-
locker_pids = [int(lock.split(".")[1]) for lock in locks]
|
|
839
|
-
self.logger.warning(f'{self.name} is locked by processes: {locker_pids}. Will block new instances until unlocked.')
|
|
840
|
-
return False
|
|
841
|
-
save_json(self.lockFile, {
|
|
842
|
-
'pid': os.getpid(),
|
|
843
|
-
'name': self.name,
|
|
844
|
-
})
|
|
845
|
-
# CAUTION: race condition: saving needs a sec, it's up to application to await lockfile
|
|
846
|
-
return True
|
|
847
|
-
|
|
848
|
-
def unlock(self):
|
|
849
|
-
try:
|
|
850
|
-
os.remove(self.lockFile)
|
|
851
|
-
except FileNotFoundError:
|
|
852
|
-
self.logger.warning(f'{self.name} already unlocked. Safely ignored.')
|
|
853
|
-
return False
|
|
854
|
-
except Exception:
|
|
855
|
-
failure = traceback.format_exc()
|
|
856
|
-
self.logger.error(f""""\
|
|
857
|
-
Failed to unlock {self.name}:
|
|
858
|
-
Details:
|
|
859
|
-
{failure}
|
|
860
|
-
|
|
861
|
-
Advice:
|
|
862
|
-
- Delete the lock by hand: {self.lockFile}""")
|
|
863
|
-
return False
|
|
864
|
-
return True
|
|
865
|
-
|
|
866
|
-
def unlock_all(self):
|
|
867
|
-
locks = glob.glob(osp.join(osp.dirname(self.lockFile), f'lock_{osp.basename(self.name)}.*.lock.json'))
|
|
868
|
-
for lock in locks:
|
|
869
|
-
os.remove(lock)
|
|
870
|
-
return True
|
|
871
|
-
|
|
872
|
-
def is_locked(self):
|
|
873
|
-
return osp.isfile(self.lockFile)
|
|
874
|
-
|
|
875
|
-
def handle_signal(self, sig, frame):
|
|
876
|
-
msg = f'Terminated due to signal: {signal.Signals(sig).name}; Will unlock'
|
|
877
|
-
self.logger.warning(msg)
|
|
878
|
-
self.unlock()
|
|
879
|
-
raise RuntimeError(msg)
|
|
880
|
-
|
|
881
|
-
|
|
882
987
|
def rerun_lock(name, folder=None, logger=glogger, max_instances=1):
|
|
883
988
|
"""Decorator for reentrance locking on functions"""
|
|
884
989
|
|
|
@@ -2534,75 +2639,6 @@ def inspect_obj(obj):
|
|
|
2534
2639
|
return {'type': type_name, 'attrs': attrs, 'repr': repr(obj), 'details': details}
|
|
2535
2640
|
|
|
2536
2641
|
|
|
2537
|
-
class Cache:
|
|
2538
|
-
"""
|
|
2539
|
-
cross-session caching: using temp-file to retrieve data based on hash changes
|
|
2540
|
-
- constraints:
|
|
2541
|
-
- data retrieval/parsing is expensive
|
|
2542
|
-
- one cache per data-source
|
|
2543
|
-
- cache is a mediator b/w app and data-source as a retriever only, cuz user's saving intent is always towards source, no need to cache a saving action
|
|
2544
|
-
- for cross-session caching, save hash into cache, then when instantiate cache object, always load hash from cache to compare with incoming hash
|
|
2545
|
-
- app must provide retriever function: retriever(src) -> json_data
|
|
2546
|
-
- because it'd cost the same to retrieve data from a json-file source as from cache, so no json default is provided
|
|
2547
|
-
- e.g., loading a complex tree-structure from a file:
|
|
2548
|
-
- tree_cache = Cache('/path/to/file.tree', lambda: src: load_data(src), '/tmp/my_app')
|
|
2549
|
-
- # ... later
|
|
2550
|
-
- cached_tree_data = tree_cache.retrieve()
|
|
2551
|
-
"""
|
|
2552
|
-
|
|
2553
|
-
def __init__(self, data_source, data_retriever, cache_dir=get_platform_tmp_dir(), cache_type='cache', algo='checksum', source_seed='6ba7b810-9dad-11d1-80b4-00c04fd430c8'):
|
|
2554
|
-
assert algo in ['checksum', 'mtime']
|
|
2555
|
-
self.srcURL = data_source
|
|
2556
|
-
self.retriever = data_retriever
|
|
2557
|
-
# use a fixed namespace for each data-source to ensure inter-session consistency
|
|
2558
|
-
namespace = uuid.UUID(str(source_seed))
|
|
2559
|
-
uid = str(uuid.uuid5(namespace, self.srcURL))
|
|
2560
|
-
self.cacheFile = osp.join(cache_dir, f'{uid}.{cache_type}.json')
|
|
2561
|
-
self.hashAlgo = algo
|
|
2562
|
-
# first comparison needs
|
|
2563
|
-
self.prevSrcHash = load_json(self.cacheFile).get('hash') if osp.isfile(self.cacheFile) else None
|
|
2564
|
-
|
|
2565
|
-
def retrieve(self):
|
|
2566
|
-
if self._compare_hash():
|
|
2567
|
-
return self.update()
|
|
2568
|
-
return load_json(self.cacheFile)['data']
|
|
2569
|
-
|
|
2570
|
-
def update(self):
|
|
2571
|
-
"""
|
|
2572
|
-
- update cache directly
|
|
2573
|
-
- useful when app needs to force update cache
|
|
2574
|
-
"""
|
|
2575
|
-
data = self.retriever(self.srcURL)
|
|
2576
|
-
container = {
|
|
2577
|
-
'data': data,
|
|
2578
|
-
'hash': self.prevSrcHash,
|
|
2579
|
-
}
|
|
2580
|
-
save_json(self.cacheFile, container)
|
|
2581
|
-
return data
|
|
2582
|
-
|
|
2583
|
-
def _compare_hash(self):
|
|
2584
|
-
in_src_hash = self._compute_hash()
|
|
2585
|
-
if changed := in_src_hash != self.prevSrcHash or self.prevSrcHash is None:
|
|
2586
|
-
self.prevSrcHash = in_src_hash
|
|
2587
|
-
return changed
|
|
2588
|
-
|
|
2589
|
-
def _compute_hash(self):
|
|
2590
|
-
hash_algo_map = {
|
|
2591
|
-
'checksum': self._compute_hash_as_checksum,
|
|
2592
|
-
'mtime': self._compute_hash_as_modified_time,
|
|
2593
|
-
}
|
|
2594
|
-
return hash_algo_map[self.hashAlgo]()
|
|
2595
|
-
|
|
2596
|
-
def _compute_hash_as_checksum(self):
|
|
2597
|
-
return get_md5_checksum(self.srcURL)
|
|
2598
|
-
|
|
2599
|
-
def _compute_hash_as_modified_time(self):
|
|
2600
|
-
try:
|
|
2601
|
-
return osp.getmtime(self.srcURL)
|
|
2602
|
-
except FileNotFoundError:
|
|
2603
|
-
return None
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
2642
|
def mem_caching(maxsize=None):
|
|
2607
2643
|
"""
|
|
2608
2644
|
- per-process lru caching for multiple data sources
|
|
@@ -2844,18 +2880,6 @@ def indent(code_or_lines, spaces_per_indent=4):
|
|
|
2844
2880
|
return '\n'.join(indented) if isinstance(code_or_lines, str) else indented
|
|
2845
2881
|
|
|
2846
2882
|
|
|
2847
|
-
def find_log_path(logger):
|
|
2848
|
-
"""
|
|
2849
|
-
- logger must be a python logger
|
|
2850
|
-
"""
|
|
2851
|
-
for handler in logger.handlers:
|
|
2852
|
-
if isinstance(handler, logging.FileHandler):
|
|
2853
|
-
return handler.baseFilename
|
|
2854
|
-
# use next() to get the first handler
|
|
2855
|
-
# if not found, raise StopIteration
|
|
2856
|
-
return next(filter(lambda h: isinstance(h, logging.FileHandler), logger.handlers), None)
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
2883
|
def collect_file_tree(root):
|
|
2860
2884
|
return [file for file in glob.glob(osp.join(root, '**'), recursive=True) if osp.isfile(file)]
|
|
2861
2885
|
|
|
@@ -2951,27 +2975,7 @@ def json_from_text(json_str):
|
|
|
2951
2975
|
except json.JSONDecodeError as e:
|
|
2952
2976
|
return None, e
|
|
2953
2977
|
|
|
2954
|
-
|
|
2955
|
-
class OfflineJSON:
|
|
2956
|
-
def __init__(self, file_path):
|
|
2957
|
-
self.path = file_path
|
|
2958
|
-
|
|
2959
|
-
def exists(self):
|
|
2960
|
-
return osp.isfile(self.path)
|
|
2961
|
-
|
|
2962
|
-
def load(self):
|
|
2963
|
-
return load_json(self.path) if self.exists() else None
|
|
2964
|
-
|
|
2965
|
-
def save(self, data: dict):
|
|
2966
|
-
save_json(self.path, data)
|
|
2967
|
-
|
|
2968
|
-
def merge(self, props: dict):
|
|
2969
|
-
data = self.load()
|
|
2970
|
-
if not data:
|
|
2971
|
-
return self.save(props)
|
|
2972
|
-
data.update(props)
|
|
2973
|
-
self.save(data)
|
|
2974
|
-
return data
|
|
2978
|
+
# endregion
|
|
2975
2979
|
|
|
2976
2980
|
|
|
2977
2981
|
def _test():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|