python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. py_misc_utils/__init__.py +0 -0
  2. py_misc_utils/abs_timeout.py +12 -0
  3. py_misc_utils/alog.py +311 -0
  4. py_misc_utils/app_main.py +179 -0
  5. py_misc_utils/archive_streamer.py +112 -0
  6. py_misc_utils/assert_checks.py +118 -0
  7. py_misc_utils/ast_utils.py +121 -0
  8. py_misc_utils/async_manager.py +189 -0
  9. py_misc_utils/break_control.py +63 -0
  10. py_misc_utils/buffered_iterator.py +35 -0
  11. py_misc_utils/cached_file.py +507 -0
  12. py_misc_utils/call_limiter.py +26 -0
  13. py_misc_utils/call_result_selector.py +13 -0
  14. py_misc_utils/cleanups.py +85 -0
  15. py_misc_utils/cmd.py +97 -0
  16. py_misc_utils/compression.py +116 -0
  17. py_misc_utils/cond_waiter.py +13 -0
  18. py_misc_utils/context_base.py +18 -0
  19. py_misc_utils/context_managers.py +67 -0
  20. py_misc_utils/core_utils.py +577 -0
  21. py_misc_utils/daemon_process.py +252 -0
  22. py_misc_utils/data_cache.py +46 -0
  23. py_misc_utils/date_utils.py +90 -0
  24. py_misc_utils/debug.py +24 -0
  25. py_misc_utils/dyn_modules.py +50 -0
  26. py_misc_utils/dynamod.py +103 -0
  27. py_misc_utils/env_config.py +35 -0
  28. py_misc_utils/executor.py +239 -0
  29. py_misc_utils/file_overwrite.py +29 -0
  30. py_misc_utils/fin_wrap.py +77 -0
  31. py_misc_utils/fp_utils.py +47 -0
  32. py_misc_utils/fs/__init__.py +0 -0
  33. py_misc_utils/fs/file_fs.py +127 -0
  34. py_misc_utils/fs/ftp_fs.py +242 -0
  35. py_misc_utils/fs/gcs_fs.py +196 -0
  36. py_misc_utils/fs/http_fs.py +241 -0
  37. py_misc_utils/fs/s3_fs.py +417 -0
  38. py_misc_utils/fs_base.py +133 -0
  39. py_misc_utils/fs_utils.py +207 -0
  40. py_misc_utils/gcs_fs.py +169 -0
  41. py_misc_utils/gen_indices.py +54 -0
  42. py_misc_utils/gfs.py +371 -0
  43. py_misc_utils/git_repo.py +77 -0
  44. py_misc_utils/global_namespace.py +110 -0
  45. py_misc_utils/http_async_fetcher.py +139 -0
  46. py_misc_utils/http_server.py +196 -0
  47. py_misc_utils/http_utils.py +143 -0
  48. py_misc_utils/img_utils.py +20 -0
  49. py_misc_utils/infix_op.py +20 -0
  50. py_misc_utils/inspect_utils.py +205 -0
  51. py_misc_utils/iostream.py +21 -0
  52. py_misc_utils/iter_file.py +117 -0
  53. py_misc_utils/key_wrap.py +46 -0
  54. py_misc_utils/lazy_import.py +25 -0
  55. py_misc_utils/lockfile.py +164 -0
  56. py_misc_utils/mem_size.py +64 -0
  57. py_misc_utils/mirror_from.py +72 -0
  58. py_misc_utils/mmap.py +16 -0
  59. py_misc_utils/module_utils.py +196 -0
  60. py_misc_utils/moving_average.py +19 -0
  61. py_misc_utils/msgpack_streamer.py +26 -0
  62. py_misc_utils/multi_wait.py +24 -0
  63. py_misc_utils/multiprocessing.py +102 -0
  64. py_misc_utils/named_array.py +224 -0
  65. py_misc_utils/no_break.py +46 -0
  66. py_misc_utils/no_except.py +32 -0
  67. py_misc_utils/np_ml_framework.py +184 -0
  68. py_misc_utils/np_utils.py +346 -0
  69. py_misc_utils/ntuple_utils.py +38 -0
  70. py_misc_utils/num_utils.py +54 -0
  71. py_misc_utils/obj.py +73 -0
  72. py_misc_utils/object_cache.py +100 -0
  73. py_misc_utils/object_tracker.py +88 -0
  74. py_misc_utils/ordered_set.py +71 -0
  75. py_misc_utils/osfd.py +27 -0
  76. py_misc_utils/packet.py +22 -0
  77. py_misc_utils/parquet_streamer.py +69 -0
  78. py_misc_utils/pd_utils.py +254 -0
  79. py_misc_utils/periodic_task.py +61 -0
  80. py_misc_utils/pickle_wrap.py +121 -0
  81. py_misc_utils/pipeline.py +98 -0
  82. py_misc_utils/remap_pickle.py +50 -0
  83. py_misc_utils/resource_manager.py +155 -0
  84. py_misc_utils/rnd_utils.py +56 -0
  85. py_misc_utils/run_once.py +19 -0
  86. py_misc_utils/scheduler.py +135 -0
  87. py_misc_utils/select_params.py +300 -0
  88. py_misc_utils/signal.py +141 -0
  89. py_misc_utils/skl_utils.py +270 -0
  90. py_misc_utils/split.py +147 -0
  91. py_misc_utils/state.py +53 -0
  92. py_misc_utils/std_module.py +56 -0
  93. py_misc_utils/stream_dataframe.py +176 -0
  94. py_misc_utils/streamed_file.py +144 -0
  95. py_misc_utils/tempdir.py +79 -0
  96. py_misc_utils/template_replace.py +51 -0
  97. py_misc_utils/tensor_stream.py +269 -0
  98. py_misc_utils/thread_context.py +33 -0
  99. py_misc_utils/throttle.py +30 -0
  100. py_misc_utils/time_trigger.py +18 -0
  101. py_misc_utils/timegen.py +11 -0
  102. py_misc_utils/traceback.py +49 -0
  103. py_misc_utils/tracking_executor.py +91 -0
  104. py_misc_utils/transform_array.py +42 -0
  105. py_misc_utils/uncompress.py +35 -0
  106. py_misc_utils/url_fetcher.py +157 -0
  107. py_misc_utils/utils.py +538 -0
  108. py_misc_utils/varint.py +50 -0
  109. py_misc_utils/virt_array.py +52 -0
  110. py_misc_utils/weak_call.py +33 -0
  111. py_misc_utils/work_results.py +100 -0
  112. py_misc_utils/writeback_file.py +43 -0
  113. python_misc_utils-0.2.dist-info/METADATA +36 -0
  114. python_misc_utils-0.2.dist-info/RECORD +117 -0
  115. python_misc_utils-0.2.dist-info/WHEEL +5 -0
  116. python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
  117. python_misc_utils-0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,239 @@
1
+ import collections
2
+ import heapq
3
+ import os
4
+ import threading
5
+ import time
6
+ import weakref
7
+
8
+ from . import alog
9
+ from . import cond_waiter as cwait
10
+ from . import utils as ut
11
+
12
+
13
+ _ExceptionWrapper = collections.namedtuple('ExceptionWrapper', 'exception')
14
+
15
+
16
+ class Task:
17
+
18
+ def __init__(self, fn, args=None, kwargs=None, aresult=None):
19
+ self._fn = fn
20
+ self._args = args or ()
21
+ self._kwargs = kwargs or dict()
22
+ self._aresult = aresult
23
+
24
+ def __call__(self):
25
+ try:
26
+ fnres = self._fn(*self._args, **self._kwargs)
27
+ except Exception as ex:
28
+ alog.exception(ex, exmsg=f'Exception while running task')
29
+ fnres = _ExceptionWrapper(exception=ex)
30
+
31
+ if self._aresult is not None:
32
+ self._aresult.set(fnres)
33
+
34
+
35
+ class _Void:
36
+ pass
37
+
38
+ VOID = _Void()
39
+
40
+ class AsyncResult:
41
+
42
+ def __init__(self):
43
+ self._cond = threading.Condition(lock=threading.Lock())
44
+ self._result = VOID
45
+
46
+ def set(self, result):
47
+ with self._cond:
48
+ self._result = result
49
+ self._cond.notify_all()
50
+
51
+ def wait(self, timeout=None):
52
+ with self._cond:
53
+ # No need for a loop here, as the condition is signaled only when result
54
+ # is set by the producer.
55
+ if self._result is VOID:
56
+ self._cond.wait(timeout=timeout)
57
+
58
+ if isinstance(self._result, _ExceptionWrapper):
59
+ raise self._result.exception
60
+
61
+ return self._result
62
+
63
+
64
+ class Queue:
65
+
66
+ def __init__(self):
67
+ self._lock = threading.Lock()
68
+ self._cond = threading.Condition(lock=self._lock)
69
+ self._queue = collections.deque()
70
+ self._stopped = 0
71
+
72
+ def put(self, task):
73
+ with self._lock:
74
+ self._queue.append(task)
75
+ self._cond.notify()
76
+
77
+ return len(self._queue)
78
+
79
+ def get(self, timeout=None):
80
+ with self._lock:
81
+ while True:
82
+ # Even in case of stopped queue, always return pending items if available.
83
+ if self._queue:
84
+ return self._queue.popleft()
85
+ if self._stopped > 0 or not self._cond.wait(timeout=timeout):
86
+ break
87
+
88
+ def start(self):
89
+ with self._lock:
90
+ self._stopped -= 1
91
+
92
+ def stop(self):
93
+ with self._lock:
94
+ self._stopped += 1
95
+ self._cond.notify_all()
96
+
97
+ def __len__(self):
98
+ with self._lock:
99
+ return len(self._queue)
100
+
101
+
102
+ class _Worker:
103
+
104
+ def __init__(self, executor, queue, name, idle_timeout=None):
105
+ self.executor = executor
106
+ self.queue = queue
107
+ self.idle_timeout = idle_timeout
108
+ self.thread = threading.Thread(target=self._run, name=name, daemon=True)
109
+ self.thread.start()
110
+
111
+ def _unregister(self):
112
+ executor = self.executor()
113
+ if executor is not None:
114
+ executor._unregister_worker(self)
115
+
116
+ def _run(self):
117
+ while True:
118
+ task = self.queue.get(timeout=self.idle_timeout)
119
+
120
+ if task is None:
121
+ break
122
+
123
+ task()
124
+ del task
125
+
126
+ self._unregister()
127
+
128
+ @property
129
+ def ident(self):
130
+ return self.thread.ident
131
+
132
+ def join(self):
133
+ self.thread.join()
134
+
135
+
136
+ def _compute_num_threads(min_threads, max_threads):
137
+ if max_threads is None:
138
+ max_threads = max(8, int(os.cpu_count() * 1.5))
139
+ if min_threads is None:
140
+ min_threads = max(2, max_threads // 4)
141
+
142
+ return min_threads, max_threads
143
+
144
+
145
+ class Executor:
146
+
147
+ def __init__(self, max_threads=None, min_threads=None, name_prefix=None,
148
+ idle_timeout=None):
149
+ self._min_threads, self._max_threads = _compute_num_threads(min_threads, max_threads)
150
+ self._name_prefix = name_prefix or 'Executor'
151
+ self._idle_timeout = idle_timeout or ut.getenv('EXECUTOR_IDLE_TIMEOUT', dtype=int, defval=5)
152
+ self._lock = threading.Lock()
153
+ self._queue = Queue()
154
+ self._workers = dict()
155
+ self._thread_counter = 0
156
+ self._idle_cond = threading.Condition(lock=self._lock)
157
+
158
+ def _unregister_worker(self, worker):
159
+ alog.spam(f'Unregistering worker thread {worker.ident}')
160
+ with self._lock:
161
+ self._workers.pop(worker.ident, None)
162
+ if not self._workers:
163
+ self._idle_cond.notify_all()
164
+
165
+ def _new_name(self):
166
+ self._thread_counter += 1
167
+
168
+ return f'{self._name_prefix}-{self._thread_counter}'
169
+
170
+ def _maybe_add_worker(self, queued):
171
+ num_threads = len(self._workers)
172
+ if ((queued > 1 and num_threads < self._max_threads) or num_threads < self._min_threads):
173
+ # Up to min_threads the workers should never quit, so they get None as
174
+ # timeout, while the one after that will get _idle_timeout which will
175
+ # make them quit if no task is fetched within such timeout.
176
+ idle_timeout = self._idle_timeout if num_threads > self._min_threads else None
177
+
178
+ worker = _Worker(weakref.ref(self), self._queue, self._new_name(),
179
+ idle_timeout=idle_timeout)
180
+
181
+ self._workers[worker.ident] = worker
182
+
183
+ alog.spam(f'New thread #{num_threads} with ID {worker.ident}')
184
+
185
+ def _submit_task(self, task):
186
+ with self._lock:
187
+ queued = self._queue.put(task)
188
+ self._maybe_add_worker(queued)
189
+
190
+ def submit(self, fn, *args, **kwargs):
191
+ self._submit_task(Task(fn, args=args, kwargs=kwargs))
192
+
193
+ def submit_result(self, fn, *args, **kwargs):
194
+ aresult = AsyncResult()
195
+
196
+ self._submit_task(Task(fn, args=args, kwargs=kwargs, aresult=aresult))
197
+
198
+ return aresult
199
+
200
+ def shutdown(self):
201
+ alog.debug0(f'Stopping executor')
202
+ self._queue.stop()
203
+ with self._lock:
204
+ alog.debug0(f'Waiting executor workers exit')
205
+ while self._workers:
206
+ self._idle_cond.wait()
207
+
208
+ def wait_for_idle(self, timeout=None, timegen=None, waiter=None):
209
+ alog.debug0(f'Waiting for idle ...')
210
+
211
+ waiter = waiter or cwait.CondWaiter(timeout=timeout, timegen=timegen)
212
+ self._queue.stop()
213
+ try:
214
+ with self._lock:
215
+ while self._workers:
216
+ if not waiter.wait(self._idle_cond):
217
+ return False
218
+ finally:
219
+ self._queue.start()
220
+ alog.debug0(f'Waiting for idle ... done')
221
+
222
+ return True
223
+
224
+
225
+ _LOCK = threading.Lock()
226
+ _EXECUTOR = None
227
+
228
+ def common_executor():
229
+ global _EXECUTOR
230
+
231
+ with _LOCK:
232
+ if _EXECUTOR is None:
233
+ _EXECUTOR = Executor(
234
+ max_threads=ut.getenv('EXECUTOR_WORKERS', dtype=int),
235
+ name_prefix=os.getenv('EXECUTOR_NAME', 'CommonExecutor'),
236
+ )
237
+
238
+ return _EXECUTOR
239
+
@@ -0,0 +1,29 @@
1
+ from . import gfs
2
+
3
+
4
+ class FileOverwrite:
5
+
6
+ def __init__(self, dest, mode='w', **kwargs):
7
+ self._dest = dest
8
+ self._path = gfs.path_of(dest)
9
+ self._mode = mode
10
+ self._kwargs = kwargs
11
+ self._tmpfile = None
12
+
13
+ def __enter__(self):
14
+ if self._path is not None:
15
+ self._tmpfile = gfs.TempFile(nspath=self._path, mode=self._mode, **self._kwargs)
16
+
17
+ return self._tmpfile.open()
18
+ else:
19
+ return self._dest
20
+
21
+ def __exit__(self, *exc):
22
+ if self._tmpfile is not None:
23
+ try:
24
+ self._tmpfile.replace(self._path)
25
+ finally:
26
+ self._tmpfile.close()
27
+
28
+ return False
29
+
@@ -0,0 +1,77 @@
1
+ import weakref
2
+
3
+ from . import assert_checks as tas
4
+
5
+
6
+ def _finalizer_name(name):
7
+ return f'__{name}_finalizer'
8
+
9
+
10
+ class fin_wrap:
11
+
12
+ def __init__(self, parent, name, obj, finfn=None, cleanup=False):
13
+ setattr(parent, name, obj)
14
+ fname = _finalizer_name(name)
15
+ if obj is not None:
16
+ tas.check_is_not_none(finfn, msg=f'Missing finalize function argument')
17
+
18
+ setattr(parent, fname, self)
19
+ self._finalizer = weakref.finalize(self, finfn)
20
+ else:
21
+ fwrap = getattr(parent, fname, None)
22
+ if fwrap is not None:
23
+ delattr(parent, fname)
24
+ if cleanup:
25
+ fwrap._finalizer()
26
+ else:
27
+ fwrap._finalizer.detach()
28
+
29
+
30
+ class _Wrapper:
31
+ pass
32
+
33
+ def fin_wrap_np(obj, finfn, name='v'):
34
+ wrapper = _Wrapper()
35
+ fin_wrap(wrapper, name, obj, finfn=finfn)
36
+
37
+ return wrapper
38
+
39
+
40
+ _OBJ_NAME = 'wrapped_obj'
41
+ _RESERVED_NAMES = {_OBJ_NAME, _finalizer_name(_OBJ_NAME)}
42
+
43
+ class FinWrapper:
44
+
45
+ def __init__(self, obj, finfn):
46
+ fin_wrap(self, _OBJ_NAME, obj, finfn=finfn)
47
+
48
+ def __getattribute__(self, name):
49
+ pd = super().__getattribute__('__dict__')
50
+ obj = pd[_OBJ_NAME]
51
+
52
+ return getattr(obj, name)
53
+
54
+ def __getattr__(self, name):
55
+ pd = super().__getattribute__('__dict__')
56
+ obj = pd[_OBJ_NAME]
57
+
58
+ return getattr(obj, name)
59
+
60
+ def __setattr__(self, name, value):
61
+ pd = super().__getattribute__('__dict__')
62
+ if name in _RESERVED_NAMES:
63
+ pd[name] = value
64
+ else:
65
+ obj = pd[_OBJ_NAME]
66
+
67
+ setattr(obj, name, value)
68
+
69
+ def __delattr__(self, name):
70
+ pd = super().__getattribute__('__dict__')
71
+ if name in _RESERVED_NAMES:
72
+ pd.pop(name)
73
+ else:
74
+ obj = pd[_OBJ_NAME]
75
+
76
+ delattr(obj, name)
77
+
@@ -0,0 +1,47 @@
1
+ import math
2
+
3
+
4
+ def exp_bias(nx):
5
+ return (1 << (nx - 1)) - 1
6
+
7
+
8
+ def real_to_bits(v, nx, nm):
9
+ xm, xe = math.frexp(math.fabs(v))
10
+
11
+ e = (xe + exp_bias(nx) - 1) if xm != 0 else 0
12
+ m = int(xm * (1 << (nm + 1)))
13
+
14
+ return 1 if v < 0 else 0, e, m
15
+
16
+
17
+ def pack_bits(s, e, m, nx, nm):
18
+ return (s << (nx + nm)) | (e << nm) | m
19
+
20
+
21
+ def real_to_packedbits(v, nx, nm):
22
+ s, e, m = real_to_bits(v, nx, nm)
23
+
24
+ return pack_bits(s, e, m, nx, nm)
25
+
26
+
27
+ def _bits(v, pos, n):
28
+ return (v >> pos) & ((1 << n) - 1)
29
+
30
+
31
+ def packedbits_to_real(v, nx, nm):
32
+ s, e, m = _bits(v, nx + nm, 1), _bits(v, nm, nx), _bits(v, 0, nm)
33
+
34
+ if e == 0 and m == 0:
35
+ return 0.0
36
+ if e == ((1 << nx) - 1):
37
+ return math.inf if m == 0 else math.nan
38
+
39
+ xm = float(m | (1 << nm))
40
+ re = e - exp_bias(nx) - nm - 1
41
+ if re >= 0:
42
+ rm = xm * (1 << re)
43
+ else:
44
+ rm = xm / (1 << (-re))
45
+
46
+ return rm if s == 0 else -rm
47
+
File without changes
@@ -0,0 +1,127 @@
1
+ import hashlib
2
+ import os
3
+ import shutil
4
+
5
+ from .. import alog
6
+ from .. import assert_checks as tas
7
+ from .. import cached_file as chf
8
+ from .. import fs_base as fsb
9
+ from .. import fs_utils as fsu
10
+ from .. import osfd
11
+
12
+
13
+ class FileReader:
14
+
15
+ def __init__(self, path):
16
+ self._path = path
17
+
18
+ @classmethod
19
+ def tag(cls, sres):
20
+ return chf.make_tag(size=sres.st_size, mtime=sres.st_mtime)
21
+
22
+ def support_blocks(self):
23
+ return True
24
+
25
+ def read_block(self, bpath, offset, size):
26
+ if offset != chf.CachedBlockFile.WHOLE_OFFSET:
27
+ with (osfd.OsFd(self._path, os.O_RDONLY) as rfd,
28
+ osfd.OsFd(bpath, os.O_CREAT | os.O_TRUNC | os.O_WRONLY, mode=0o440) as wfd):
29
+ if os.lseek(rfd, offset, os.SEEK_SET) != offset:
30
+ alog.xraise(RuntimeError, f'Unable to seek {self._path} at offset {offset}')
31
+ data = os.read(rfd, size)
32
+ os.write(wfd, data)
33
+
34
+ return len(data)
35
+ else:
36
+ with open(self._path, mode='rb') as rfd:
37
+ bfd = os.open(bpath, os.O_CREAT | os.O_TRUNC | os.O_WRONLY, mode=0o440)
38
+ with open(bfd, mode='wb') as wfd:
39
+ shutil.copyfileobj(rfd, wfd)
40
+
41
+ return os.path.getsize(bpath)
42
+
43
+
44
+ class FileFs(fsb.FsBase):
45
+
46
+ ID = 'file'
47
+ IDS = (ID,)
48
+
49
+ def __init__(self, cache_iface=None, **kwargs):
50
+ super().__init__(cache_iface=cache_iface, **kwargs)
51
+
52
+ def norm_url(self, url):
53
+ return fsu.normpath(url)
54
+
55
+ def _create_tag(self, sres):
56
+ return FileReader.tag(sres)
57
+
58
+ def stat(self, url):
59
+ sres = os.stat(url)
60
+
61
+ return fsb.DirEntry(name=os.path.basename(url),
62
+ path=url,
63
+ etag=self._create_tag(sres),
64
+ st_mode=sres.st_mode,
65
+ st_size=sres.st_size,
66
+ st_ctime=sres.st_ctime,
67
+ st_mtime=sres.st_mtime)
68
+
69
+ def open(self, url, mode, **kwargs):
70
+ return open(url, mode=mode)
71
+
72
+ def remove(self, url):
73
+ os.remove(url)
74
+
75
+ def rename(self, src_url, dest_url):
76
+ os.rename(src_url, dest_url)
77
+
78
+ def replace(self, src_url, dest_url):
79
+ os.replace(src_url, dest_url)
80
+
81
+ def mkdir(self, url, mode=None):
82
+ os.mkdir(url, mode=mode or 0o777)
83
+
84
+ def makedirs(self, url, mode=None, exist_ok=None):
85
+ os.makedirs(url, mode=mode or 0o777, exist_ok=exist_ok or False)
86
+
87
+ def rmdir(self, url):
88
+ os.rmdir(url)
89
+
90
+ def rmtree(self, url, ignore_errors=None):
91
+ fsu.safe_rmtree(url, ignore_errors=ignore_errors or False)
92
+
93
+ def list(self, url):
94
+ with os.scandir(url) as sdit:
95
+ for de in sdit:
96
+ sres = de.stat()
97
+
98
+ yield fsb.DirEntry(name=de.name,
99
+ path=os.path.join(url, de.name),
100
+ etag=self._create_tag(sres),
101
+ st_mode=sres.st_mode,
102
+ st_size=sres.st_size,
103
+ st_ctime=sres.st_ctime,
104
+ st_mtime=sres.st_mtime)
105
+
106
+ def put_file(self, url, data_gen):
107
+ with open(url, mode='wb') as fd:
108
+ for data in data_gen:
109
+ fd.write(data)
110
+
111
+ def get_file(self, url):
112
+ with open(url, mode='rb') as fd:
113
+ for data in fsu.enum_chunks(fd):
114
+ yield data
115
+
116
+ def as_local(self, url, **kwargs):
117
+ return url
118
+
119
+ def link(self, src_url, dest_url):
120
+ os.link(src_url, dest_url)
121
+
122
+ def symlink(self, src_url, dest_url):
123
+ os.symlink(src_url, dest_url)
124
+
125
+
126
+ FILE_SYSTEMS = (FileFs,)
127
+