python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. py_misc_utils/__init__.py +0 -0
  2. py_misc_utils/abs_timeout.py +12 -0
  3. py_misc_utils/alog.py +311 -0
  4. py_misc_utils/app_main.py +179 -0
  5. py_misc_utils/archive_streamer.py +112 -0
  6. py_misc_utils/assert_checks.py +118 -0
  7. py_misc_utils/ast_utils.py +121 -0
  8. py_misc_utils/async_manager.py +189 -0
  9. py_misc_utils/break_control.py +63 -0
  10. py_misc_utils/buffered_iterator.py +35 -0
  11. py_misc_utils/cached_file.py +507 -0
  12. py_misc_utils/call_limiter.py +26 -0
  13. py_misc_utils/call_result_selector.py +13 -0
  14. py_misc_utils/cleanups.py +85 -0
  15. py_misc_utils/cmd.py +97 -0
  16. py_misc_utils/compression.py +116 -0
  17. py_misc_utils/cond_waiter.py +13 -0
  18. py_misc_utils/context_base.py +18 -0
  19. py_misc_utils/context_managers.py +67 -0
  20. py_misc_utils/core_utils.py +577 -0
  21. py_misc_utils/daemon_process.py +252 -0
  22. py_misc_utils/data_cache.py +46 -0
  23. py_misc_utils/date_utils.py +90 -0
  24. py_misc_utils/debug.py +24 -0
  25. py_misc_utils/dyn_modules.py +50 -0
  26. py_misc_utils/dynamod.py +103 -0
  27. py_misc_utils/env_config.py +35 -0
  28. py_misc_utils/executor.py +239 -0
  29. py_misc_utils/file_overwrite.py +29 -0
  30. py_misc_utils/fin_wrap.py +77 -0
  31. py_misc_utils/fp_utils.py +47 -0
  32. py_misc_utils/fs/__init__.py +0 -0
  33. py_misc_utils/fs/file_fs.py +127 -0
  34. py_misc_utils/fs/ftp_fs.py +242 -0
  35. py_misc_utils/fs/gcs_fs.py +196 -0
  36. py_misc_utils/fs/http_fs.py +241 -0
  37. py_misc_utils/fs/s3_fs.py +417 -0
  38. py_misc_utils/fs_base.py +133 -0
  39. py_misc_utils/fs_utils.py +207 -0
  40. py_misc_utils/gcs_fs.py +169 -0
  41. py_misc_utils/gen_indices.py +54 -0
  42. py_misc_utils/gfs.py +371 -0
  43. py_misc_utils/git_repo.py +77 -0
  44. py_misc_utils/global_namespace.py +110 -0
  45. py_misc_utils/http_async_fetcher.py +139 -0
  46. py_misc_utils/http_server.py +196 -0
  47. py_misc_utils/http_utils.py +143 -0
  48. py_misc_utils/img_utils.py +20 -0
  49. py_misc_utils/infix_op.py +20 -0
  50. py_misc_utils/inspect_utils.py +205 -0
  51. py_misc_utils/iostream.py +21 -0
  52. py_misc_utils/iter_file.py +117 -0
  53. py_misc_utils/key_wrap.py +46 -0
  54. py_misc_utils/lazy_import.py +25 -0
  55. py_misc_utils/lockfile.py +164 -0
  56. py_misc_utils/mem_size.py +64 -0
  57. py_misc_utils/mirror_from.py +72 -0
  58. py_misc_utils/mmap.py +16 -0
  59. py_misc_utils/module_utils.py +196 -0
  60. py_misc_utils/moving_average.py +19 -0
  61. py_misc_utils/msgpack_streamer.py +26 -0
  62. py_misc_utils/multi_wait.py +24 -0
  63. py_misc_utils/multiprocessing.py +102 -0
  64. py_misc_utils/named_array.py +224 -0
  65. py_misc_utils/no_break.py +46 -0
  66. py_misc_utils/no_except.py +32 -0
  67. py_misc_utils/np_ml_framework.py +184 -0
  68. py_misc_utils/np_utils.py +346 -0
  69. py_misc_utils/ntuple_utils.py +38 -0
  70. py_misc_utils/num_utils.py +54 -0
  71. py_misc_utils/obj.py +73 -0
  72. py_misc_utils/object_cache.py +100 -0
  73. py_misc_utils/object_tracker.py +88 -0
  74. py_misc_utils/ordered_set.py +71 -0
  75. py_misc_utils/osfd.py +27 -0
  76. py_misc_utils/packet.py +22 -0
  77. py_misc_utils/parquet_streamer.py +69 -0
  78. py_misc_utils/pd_utils.py +254 -0
  79. py_misc_utils/periodic_task.py +61 -0
  80. py_misc_utils/pickle_wrap.py +121 -0
  81. py_misc_utils/pipeline.py +98 -0
  82. py_misc_utils/remap_pickle.py +50 -0
  83. py_misc_utils/resource_manager.py +155 -0
  84. py_misc_utils/rnd_utils.py +56 -0
  85. py_misc_utils/run_once.py +19 -0
  86. py_misc_utils/scheduler.py +135 -0
  87. py_misc_utils/select_params.py +300 -0
  88. py_misc_utils/signal.py +141 -0
  89. py_misc_utils/skl_utils.py +270 -0
  90. py_misc_utils/split.py +147 -0
  91. py_misc_utils/state.py +53 -0
  92. py_misc_utils/std_module.py +56 -0
  93. py_misc_utils/stream_dataframe.py +176 -0
  94. py_misc_utils/streamed_file.py +144 -0
  95. py_misc_utils/tempdir.py +79 -0
  96. py_misc_utils/template_replace.py +51 -0
  97. py_misc_utils/tensor_stream.py +269 -0
  98. py_misc_utils/thread_context.py +33 -0
  99. py_misc_utils/throttle.py +30 -0
  100. py_misc_utils/time_trigger.py +18 -0
  101. py_misc_utils/timegen.py +11 -0
  102. py_misc_utils/traceback.py +49 -0
  103. py_misc_utils/tracking_executor.py +91 -0
  104. py_misc_utils/transform_array.py +42 -0
  105. py_misc_utils/uncompress.py +35 -0
  106. py_misc_utils/url_fetcher.py +157 -0
  107. py_misc_utils/utils.py +538 -0
  108. py_misc_utils/varint.py +50 -0
  109. py_misc_utils/virt_array.py +52 -0
  110. py_misc_utils/weak_call.py +33 -0
  111. py_misc_utils/work_results.py +100 -0
  112. py_misc_utils/writeback_file.py +43 -0
  113. python_misc_utils-0.2.dist-info/METADATA +36 -0
  114. python_misc_utils-0.2.dist-info/RECORD +117 -0
  115. python_misc_utils-0.2.dist-info/WHEEL +5 -0
  116. python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
  117. python_misc_utils-0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,507 @@
1
+ import collections
2
+ import datetime
3
+ import functools
4
+ import hashlib
5
+ import os
6
+ import re
7
+ import shutil
8
+ import time
9
+ import yaml
10
+
11
+ from . import alog
12
+ from . import assert_checks as tas
13
+ from . import core_utils as cu
14
+ from . import file_overwrite as fow
15
+ from . import fin_wrap as fw
16
+ from . import fs_utils as fsu
17
+ from . import lockfile as lockf
18
+ from . import no_except as nox
19
+ from . import obj
20
+ from . import osfd
21
+ from . import tempdir as tmpd
22
+
23
+
24
+ _DroppedBlock = collections.namedtuple('DroppedBlock', 'name, sres, cid, offset')
25
+
26
+
27
+ class Meta(obj.Obj):
28
+ pass
29
+
30
+
31
+ class CachedBlockFile:
32
+
33
+ METAFILE = 'META'
34
+ BLOCKSDIR = 'blocks'
35
+ LINKSDIR = 'links'
36
+ WHOLE_OFFSET = -1
37
+ CID_SIZE = 16
38
+ BLOCKSIZE = 32 * 1024**2
39
+
40
+ def __init__(self, path, reader, meta=None, close_fn=None):
41
+ self._path = path
42
+ self._reader = reader
43
+ self._close_fn = close_fn
44
+ self.meta = self.load_meta(path) if meta is None else meta
45
+
46
+ @classmethod
47
+ def default_meta(cls):
48
+ return Meta(url=None, size=None, block_size=cls.BLOCKSIZE)
49
+
50
+ @classmethod
51
+ def prepare_meta(cls, meta, **kwargs):
52
+ cmeta = cls.default_meta()
53
+ cmeta.update_from(meta)
54
+ cmeta.update(**kwargs)
55
+
56
+ cid = hashlib.sha1(cmeta.tag.encode()).hexdigest()[: cls.CID_SIZE]
57
+ cmeta.update(cid=cid)
58
+
59
+ return cmeta
60
+
61
+ @classmethod
62
+ def remove(cls, path):
63
+ try:
64
+ fsu.safe_rmtree(path, ignore_errors=True)
65
+
66
+ return True
67
+ except:
68
+ return False
69
+
70
+ @classmethod
71
+ def create(cls, path, meta):
72
+ tpath = fsu.temp_path(nspath=path)
73
+ try:
74
+ os.makedirs(tpath, exist_ok=True)
75
+ os.mkdir(cls.blocks_dir(tpath))
76
+ os.mkdir(cls.links_dir(tpath))
77
+
78
+ cls.save_meta(tpath, meta)
79
+
80
+ os.rename(tpath, path)
81
+ except:
82
+ shutil.rmtree(tpath, ignore_errors=True)
83
+ raise
84
+
85
+ def _fblock_path(self, offset):
86
+ return self.fblock_path(self._path, self.meta.cid, offset)
87
+
88
+ def _fetch_block(self, offset):
89
+ bpath = self._fblock_path(offset)
90
+ with lockf.LockFile(bpath):
91
+ if (sres := fsu.stat(bpath)) is None:
92
+ tpath = fsu.temp_path(nspath=bpath)
93
+ try:
94
+ rsize = self._reader.read_block(tpath, offset, self.meta.block_size)
95
+ if rsize > 0:
96
+ os.replace(tpath, bpath)
97
+ if offset == self.WHOLE_OFFSET:
98
+ self._make_link(bpath)
99
+ except:
100
+ fsu.maybe_remove(tpath)
101
+ raise
102
+ else:
103
+ rsize = sres.st_size
104
+
105
+ return rsize, bpath
106
+
107
+ def _make_link(self, bpath):
108
+ lpath = self.local_link()
109
+ if not os.path.exists(lpath):
110
+ try:
111
+ os.makedirs(os.path.dirname(lpath), exist_ok=True)
112
+ os.link(bpath, lpath)
113
+ os.chmod(lpath, 0o444)
114
+ except Exception as ex:
115
+ alog.warning(f'Unable to create link: {bpath} -> {lpath}')
116
+
117
+ def _try_block(self, boffset, offset):
118
+ bpath = self._fblock_path(boffset)
119
+ try:
120
+ with osfd.OsFd(bpath, os.O_RDONLY) as fd:
121
+ sres = os.stat(fd)
122
+ if sres.st_size >= offset:
123
+ os.lseek(fd, offset, os.SEEK_SET)
124
+ size = min(self.meta.block_size, sres.st_size - offset)
125
+
126
+ return os.read(fd, size)
127
+ except FileNotFoundError:
128
+ pass
129
+
130
+ def _translate_offset(self, offset):
131
+ has_whole_content = True
132
+ if self._reader.support_blocks():
133
+ # Even if the reader supports blocks, we might have cached the whole content
134
+ # at once, so make sure we do not waste the cached whole content.
135
+ bpath = self._fblock_path(self.WHOLE_OFFSET)
136
+ has_whole_content = os.path.exists(bpath)
137
+
138
+ if has_whole_content:
139
+ boffset = self.WHOLE_OFFSET
140
+ else:
141
+ boffset, offset = offset, 0
142
+
143
+ return boffset, offset
144
+
145
+ def close(self):
146
+ if self._close_fn is not None:
147
+ self._close_fn()
148
+ self._close_fn = None
149
+
150
+ def cacheall(self):
151
+ size, bpath = self._fetch_block(self.WHOLE_OFFSET)
152
+
153
+ return self.local_link() if size > 0 else None
154
+
155
+ def read_block(self, offset):
156
+ tas.check_eq(offset % self.meta.block_size, 0,
157
+ msg=f'Block offset ({offset}) must be multiple of {self.meta.block_size}')
158
+
159
+ boffset, offset = self._translate_offset(offset)
160
+
161
+ data = self._try_block(boffset, offset)
162
+ if data is None:
163
+ read_size, _ = self._fetch_block(boffset)
164
+ if read_size > 0:
165
+ data = self._try_block(boffset, offset)
166
+
167
+ return data
168
+
169
+ def size(self):
170
+ size = self.meta.size
171
+ if size is None:
172
+ size, _ = self._fetch_block(self.WHOLE_OFFSET)
173
+ meta = self.meta.clone(size=size)
174
+ self.save_meta(self._path, meta)
175
+ self.meta = meta
176
+
177
+ return size
178
+
179
+ def locked(self):
180
+ return lockf.LockFile(self._path)
181
+
182
+ def local_link(self):
183
+ return self.flink_path(self._path, self.meta.cid, self.meta.url)
184
+
185
+ @classmethod
186
+ def blocks_dir(cls, path):
187
+ return os.path.join(path, cls.BLOCKSDIR)
188
+
189
+ @classmethod
190
+ def links_dir(cls, path):
191
+ return os.path.join(path, cls.LINKSDIR)
192
+
193
+ @classmethod
194
+ def fblock_path(cls, path, cid, offset):
195
+ block_id = f'block-{cid}-{offset}' if offset >= 0 else f'block-{cid}'
196
+
197
+ return os.path.join(cls.blocks_dir(path), block_id)
198
+
199
+ @classmethod
200
+ def parse_block_file(cls, fname):
201
+ m = re.match(r'block\-([^\-]+)(\-(\d+))?$', fname)
202
+ if m:
203
+ offset = m.group(3)
204
+ offset = int(offset) if offset is not None else cls.WHOLE_OFFSET
205
+
206
+ return m.group(1), offset
207
+
208
+ @classmethod
209
+ def flink_path(cls, path, cid, url):
210
+ lpath = os.path.join(cls.links_dir(path), cid)
211
+
212
+ return os.path.join(lpath, os.path.basename(url))
213
+
214
+ @classmethod
215
+ def purge_blocks(cls, path, max_age=None):
216
+ meta = cls.load_meta(path)
217
+
218
+ bpath = cls.blocks_dir(path)
219
+ dropped = []
220
+ with os.scandir(bpath) as sdit:
221
+ for dentry in sdit:
222
+ if dentry.is_file():
223
+ pbf = cls.parse_block_file(dentry.name)
224
+ if pbf is not None:
225
+ cid, offset = pbf
226
+ if cid != meta.cid:
227
+ dropped.append(_DroppedBlock(name=dentry.name,
228
+ sres=dentry.stat(),
229
+ cid=cid,
230
+ offset=offset))
231
+
232
+ max_age = max_age or int(os.getenv('GFS_CACHE_MAXAGE', 300))
233
+ for dblock in dropped:
234
+ if (time.time() - dblock.sres.st_mtime) > max_age:
235
+ try:
236
+ alog.info(f'Removing block file {dblock.name} from {path} ({meta})')
237
+ os.remove(os.path.join(bpath, dblock.name))
238
+ except Exception as ex:
239
+ alog.warning(f'Unable to purge block file from {dblock.name} from {path}: {ex}')
240
+
241
+ lpath = cls.flink_path(path, dblock.cid, meta.url)
242
+ nox.qno_except(fsu.safe_rmtree, os.path.dirname(lpath), ignore_errors=True)
243
+
244
+ return meta
245
+
246
+ @classmethod
247
+ def fmeta_path(cls, path):
248
+ return os.path.join(path, cls.METAFILE)
249
+
250
+ @classmethod
251
+ def save_meta(cls, path, meta):
252
+ mpath = cls.fmeta_path(path)
253
+ with fow.FileOverwrite(mpath) as fd:
254
+ yaml.dump(meta.as_dict(), fd, default_flow_style=False)
255
+
256
+ @classmethod
257
+ def load_meta(cls, path):
258
+ mpath = cls.fmeta_path(path)
259
+ with open(mpath, mode='r') as fd:
260
+ meta = yaml.safe_load(fd)
261
+
262
+ return Meta(**meta)
263
+
264
+ @classmethod
265
+ def validate(cls, path):
266
+ try:
267
+ return cls.load_meta(path)
268
+ except:
269
+ pass
270
+
271
+
272
+ class CachedFile:
273
+
274
+ def __init__(self, cbf, block_size=None):
275
+ fw.fin_wrap(self, 'cbf', cbf, finfn=cbf.close)
276
+ self._block_size = block_size or cbf.meta.block_size
277
+ self._offset = 0
278
+ self._block_start = 0
279
+ self._block = None
280
+
281
+ def close(self):
282
+ cbf = self.cbf
283
+ if cbf is not None:
284
+ fw.fin_wrap(self, 'cbf', None, cleanup=True)
285
+
286
+ @property
287
+ def closed(self):
288
+ return self.cbf is None
289
+
290
+ def seek(self, pos, whence=os.SEEK_SET):
291
+ if whence == os.SEEK_SET:
292
+ offset = pos
293
+ elif whence == os.SEEK_CUR:
294
+ offset = self._offset + pos
295
+ elif whence == os.SEEK_END:
296
+ offset = self.cbf.size() + pos
297
+ else:
298
+ alog.xraise(ValueError, f'Invalid seek mode: {whence}')
299
+
300
+ tas.check_le(offset, self.cbf.size(), msg=f'Offset out of range')
301
+ tas.check_ge(offset, 0, msg=f'Offset out of range')
302
+
303
+ self._offset = offset
304
+
305
+ return offset
306
+
307
+ def tell(self):
308
+ return self._offset
309
+
310
+ def _ensure_buffer(self, offset):
311
+ boffset = offset - self._block_start
312
+ if self._block is None or boffset < 0 or boffset >= len(self._block):
313
+ block_offset = (offset // self._block_size) * self._block_size
314
+
315
+ self._block = memoryview(self.cbf.read_block(block_offset))
316
+ self._block_start = block_offset
317
+ boffset = offset - block_offset
318
+
319
+ return boffset
320
+
321
+ def _max_size(self, size):
322
+ available = self.cbf.size() - self._offset
323
+
324
+ return available if size < 0 else min(size, available)
325
+
326
+ def read(self, size=-1):
327
+ rsize = self._max_size(size)
328
+
329
+ parts = []
330
+ while rsize > 0:
331
+ boffset = self._ensure_buffer(self._offset)
332
+
333
+ csize = min(rsize, len(self._block) - boffset)
334
+ parts.append(self._block[boffset: boffset + csize])
335
+ self._offset += csize
336
+ rsize -= csize
337
+
338
+ return b''.join(parts)
339
+
340
+ def read1(self, size=-1):
341
+ return self.read(size=size)
342
+
343
+ def peek(self, size=0):
344
+ if size > 0:
345
+ boffset = self._ensure_buffer(self._offset)
346
+ csize = min(size, len(self._block) - boffset)
347
+
348
+ return self._block[boffset: boffset + csize].tobytes()
349
+
350
+ return b''
351
+
352
+ def readline(self, size=-1):
353
+ rsize = self._max_size(size)
354
+
355
+ parts = []
356
+ while rsize > 0:
357
+ boffset = self._ensure_buffer(self._offset)
358
+
359
+ csize = min(rsize, len(self._block) - boffset)
360
+ cdata = self._block[boffset: boffset + csize]
361
+
362
+ pos = cu.vfind(cdata, b'\n')
363
+ if pos >= 0:
364
+ parts.append(cdata[: pos + 1])
365
+ self._offset += pos + 1
366
+ break
367
+ else:
368
+ self._offset += csize
369
+ rsize -= csize
370
+
371
+ return b''.join(parts)
372
+
373
+ def flush(self):
374
+ pass
375
+
376
+ def readable(self):
377
+ return not self.closed
378
+
379
+ def seekable(self):
380
+ return not self.closed
381
+
382
+ def writable(self):
383
+ return False
384
+
385
+ def __enter__(self):
386
+ return self
387
+
388
+ def __exit__(self, *exc):
389
+ self.close()
390
+
391
+ return False
392
+
393
+
394
+ class CacheInterface:
395
+
396
+ def __init__(self, cache_dir):
397
+ self._cache_dir = cache_dir
398
+
399
+ def _open(self, cfpath, url, meta, reader, close_fn=None, **kwargs):
400
+ with lockf.LockFile(cfpath):
401
+ meta = CachedBlockFile.prepare_meta(meta, url=url)
402
+ if (xmeta := CachedBlockFile.validate(cfpath)) is None:
403
+ CachedBlockFile.create(cfpath, meta)
404
+ else:
405
+ if xmeta.cid != meta.cid:
406
+ alog.debug(f'Updating meta of {cfpath}: {xmeta} -> {meta}')
407
+ CachedBlockFile.save_meta(cfpath, meta)
408
+
409
+ return CachedFile(CachedBlockFile(cfpath, reader, meta=meta, close_fn=close_fn))
410
+
411
+ def open(self, url, meta, reader, **kwargs):
412
+ uncached = kwargs.pop('uncached', False)
413
+ if uncached:
414
+ tmp_path = tmpd.create()
415
+ cfpath = _get_cache_path(tmp_path, url)
416
+ close_fn = functools.partial(fsu.safe_rmtree, tmp_path, ignore_errors=True)
417
+ else:
418
+ cfpath = _get_cache_path(self._cache_dir, url)
419
+ close_fn = None
420
+
421
+ return self._open(cfpath, url, meta, reader, close_fn=close_fn, **kwargs)
422
+
423
+ def as_local(self, url, meta, reader, **kwargs):
424
+ cfile = self.open(url, meta, reader, **kwargs)
425
+
426
+ local_path = cfile.cbf.cacheall()
427
+ tas.check_is_not_none(local_path, msg=f'Unable to materialize a local path: {url}')
428
+
429
+ return local_path
430
+
431
+
432
+ def _get_cache_path(cache_dir, url):
433
+ uhash = hashlib.sha1(url.encode()).hexdigest()
434
+
435
+ return os.path.join(cache_dir, uhash)
436
+
437
+
438
+ _CacheFileStats = collections.namedtuple(
439
+ 'CacheFileStats', 'path, mtime, size, meta',
440
+ )
441
+
442
+ def cleanup_cache(cache_dir, max_age=None, max_size=None):
443
+ alog.verbose(f'Cache cleanup running: {cache_dir}')
444
+
445
+ if os.path.isdir(cache_dir):
446
+ cache_files = []
447
+ with os.scandir(cache_dir) as sdit:
448
+ for dentry in sdit:
449
+ if dentry.is_dir():
450
+ cfpath = os.path.join(cache_dir, dentry.name)
451
+ with lockf.LockFile(cfpath):
452
+ try:
453
+ meta = CachedBlockFile.purge_blocks(cfpath, max_age=max_age)
454
+
455
+ cfsize = fsu.du(cfpath)
456
+ sres = os.stat(CachedBlockFile.fmeta_path(cfpath))
457
+ cache_files.append(_CacheFileStats(path=cfpath,
458
+ mtime=sres.st_mtime,
459
+ size=cfsize,
460
+ meta=meta))
461
+ except Exception as ex:
462
+ alog.warning(f'Unable to purge blocks from {cfpath}: {ex}')
463
+
464
+ cache_files = sorted(cache_files, key=lambda cfs: cfs.mtime, reverse=True)
465
+ max_size = max_size or int(os.getenv('GFS_CACHE_MAXSIZE', 16 * 1024**3))
466
+
467
+ cache_size = 0
468
+ for cfs in cache_files:
469
+ cache_size += cfs.size
470
+ if cache_size >= max_size:
471
+ alog.info(f'Dropping cache for {cfs.meta.url} stored at {cfs.path}')
472
+ with lockf.LockFile(cfs.path):
473
+ CachedBlockFile.remove(cfs.path)
474
+
475
+ alog.debug0(f'Cache size was {cu.size_str(cache_size)} (size will be trimmed ' \
476
+ f'to {cu.size_str(max_size)})')
477
+
478
+
479
+ def make_tag(**kwargs):
480
+ stag = ','.join(f'{k}={v}' for k, v in kwargs.items())
481
+
482
+ return hashlib.sha1(stag.encode()).hexdigest()
483
+
484
+
485
+ _CLEANUP_PERIOD = int(os.getenv('GFS_CACHE_CLEANUP_PERIOD', 8 * 3600))
486
+
487
+ def _cleanup_check(path):
488
+ lpath = os.path.join(path, '.last_cleanup')
489
+ if (sres := fsu.stat(lpath)) is None:
490
+ do_cleanup = os.path.isdir(path)
491
+ else:
492
+ do_cleanup = time.time() > sres.st_mtime + _CLEANUP_PERIOD
493
+
494
+ if do_cleanup:
495
+ alog.debug(f'Triggering cache cleanup: {path}')
496
+ cleanup_cache(path)
497
+ with open(lpath, mode='w') as fd:
498
+ fd.write(datetime.datetime.now().isoformat(timespec='microseconds'))
499
+
500
+ return path
501
+
502
+
503
+ def get_cache_dir(path):
504
+ cdpath = os.path.join(fsu.normpath(path), 'gfs')
505
+
506
+ return _cleanup_check(cdpath)
507
+
@@ -0,0 +1,26 @@
1
+ import threading
2
+
3
+ from . import traceback as tb
4
+
5
+
6
+ _LOCK = threading.Lock()
7
+ _TB = dict()
8
+
9
+
10
+ def trigger(filename, count):
11
+ frame = tb.get_frame_after(filename)
12
+ if frame is not None:
13
+ tb = frame.f_code.co_filename, frame.f_lineno
14
+ with _LOCK:
15
+ c = _TB.get(tb, 0)
16
+ _TB[tb] = c + 1
17
+
18
+ return count > c
19
+
20
+ return True
21
+
22
+
23
+ def limit_call(count, fn, *args, _filename=None, **kwargs):
24
+ if trigger(_filename or __file__, count):
25
+ return fn(*args, **kwargs)
26
+
@@ -0,0 +1,13 @@
1
+ import functools
2
+
3
+
4
+ def select(fn, idx):
5
+
6
+ @functools.wraps(fn)
7
+ def wrapper(*args, **kwargs):
8
+ res = fn(*args, **kwargs)
9
+
10
+ return res[idx]
11
+
12
+ return wrapper
13
+
@@ -0,0 +1,85 @@
1
+ import atexit
2
+ import collections
3
+ import threading
4
+
5
+ from . import alog
6
+ from . import global_namespace as gns
7
+
8
+
9
+ _Cleaner = collections.namedtuple('Cleaner', 'fn, args, kwargs')
10
+
11
+ class _Cleanups:
12
+
13
+ def __init__(self):
14
+ self._lock = threading.Lock()
15
+ self._nextid = 0
16
+ self._cleaners = dict()
17
+
18
+ # The run() API is called from a "finally" clause of the multiprocessing module,
19
+ # which is the preferred path since we know eveything is up at that time. But we
20
+ # also register an atexit callback for cases (like child prcesses) which do not
21
+ # end up going out the multiprocessing path (although every child process using
22
+ # this library should be created with the multiprocessing.create_process() API).
23
+ atexit.register(self.run)
24
+
25
+ def register(self, fn, *args, **kwargs):
26
+ with self._lock:
27
+ cid = self._nextid
28
+ self._cleaners[cid] = _Cleaner(fn=fn, args=args, kwargs=kwargs)
29
+ self._nextid += 1
30
+
31
+ return cid
32
+
33
+ def unregister(self, cid, run=False):
34
+ with self._lock:
35
+ cleaner = self._cleaners.pop(cid, None)
36
+
37
+ if cleaner is not None and run:
38
+ self._run_cleaner(fn, args, kwargs)
39
+
40
+ return cleaner
41
+
42
+ def _run_cleaner(self, cleaner):
43
+ try:
44
+ cleaner.fn(*cleaner.args, **cleaner.kwargs)
45
+ except Exception as ex:
46
+ alog.exception(ex, exmsg=f'Exception while running cleanups')
47
+
48
+ def run(self):
49
+ with self._lock:
50
+ cleaners = self._cleaners
51
+ self._cleaners = dict()
52
+
53
+ # Sort by reverse ID, which is reverse register order.
54
+ cids = sorted(cleaners.keys(), reverse=True)
55
+
56
+ for cleaner in (cleaners[cid] for cid in cids):
57
+ self._run_cleaner(cleaner)
58
+
59
+
60
+ _CLEANUPS = gns.Var(f'{__name__}.CLEANUPS',
61
+ fork_init=True,
62
+ defval=lambda: _Cleanups())
63
+
64
+ def _cleanups():
65
+ return gns.get(_CLEANUPS)
66
+
67
+
68
+ def register(fn, *args, **kwargs):
69
+ return _cleanups().register(fn, *args, **kwargs)
70
+
71
+
72
+ # Decorator style registration.
73
+ def reg(fn):
74
+ register(fn)
75
+
76
+ return fn
77
+
78
+
79
+ def unregister(cid, run=False):
80
+ return _cleanups().unregister(cid, run=run)
81
+
82
+
83
+ def run():
84
+ _cleanups().run()
85
+