python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. py_misc_utils/__init__.py +0 -0
  2. py_misc_utils/abs_timeout.py +12 -0
  3. py_misc_utils/alog.py +311 -0
  4. py_misc_utils/app_main.py +179 -0
  5. py_misc_utils/archive_streamer.py +112 -0
  6. py_misc_utils/assert_checks.py +118 -0
  7. py_misc_utils/ast_utils.py +121 -0
  8. py_misc_utils/async_manager.py +189 -0
  9. py_misc_utils/break_control.py +63 -0
  10. py_misc_utils/buffered_iterator.py +35 -0
  11. py_misc_utils/cached_file.py +507 -0
  12. py_misc_utils/call_limiter.py +26 -0
  13. py_misc_utils/call_result_selector.py +13 -0
  14. py_misc_utils/cleanups.py +85 -0
  15. py_misc_utils/cmd.py +97 -0
  16. py_misc_utils/compression.py +116 -0
  17. py_misc_utils/cond_waiter.py +13 -0
  18. py_misc_utils/context_base.py +18 -0
  19. py_misc_utils/context_managers.py +67 -0
  20. py_misc_utils/core_utils.py +577 -0
  21. py_misc_utils/daemon_process.py +252 -0
  22. py_misc_utils/data_cache.py +46 -0
  23. py_misc_utils/date_utils.py +90 -0
  24. py_misc_utils/debug.py +24 -0
  25. py_misc_utils/dyn_modules.py +50 -0
  26. py_misc_utils/dynamod.py +103 -0
  27. py_misc_utils/env_config.py +35 -0
  28. py_misc_utils/executor.py +239 -0
  29. py_misc_utils/file_overwrite.py +29 -0
  30. py_misc_utils/fin_wrap.py +77 -0
  31. py_misc_utils/fp_utils.py +47 -0
  32. py_misc_utils/fs/__init__.py +0 -0
  33. py_misc_utils/fs/file_fs.py +127 -0
  34. py_misc_utils/fs/ftp_fs.py +242 -0
  35. py_misc_utils/fs/gcs_fs.py +196 -0
  36. py_misc_utils/fs/http_fs.py +241 -0
  37. py_misc_utils/fs/s3_fs.py +417 -0
  38. py_misc_utils/fs_base.py +133 -0
  39. py_misc_utils/fs_utils.py +207 -0
  40. py_misc_utils/gcs_fs.py +169 -0
  41. py_misc_utils/gen_indices.py +54 -0
  42. py_misc_utils/gfs.py +371 -0
  43. py_misc_utils/git_repo.py +77 -0
  44. py_misc_utils/global_namespace.py +110 -0
  45. py_misc_utils/http_async_fetcher.py +139 -0
  46. py_misc_utils/http_server.py +196 -0
  47. py_misc_utils/http_utils.py +143 -0
  48. py_misc_utils/img_utils.py +20 -0
  49. py_misc_utils/infix_op.py +20 -0
  50. py_misc_utils/inspect_utils.py +205 -0
  51. py_misc_utils/iostream.py +21 -0
  52. py_misc_utils/iter_file.py +117 -0
  53. py_misc_utils/key_wrap.py +46 -0
  54. py_misc_utils/lazy_import.py +25 -0
  55. py_misc_utils/lockfile.py +164 -0
  56. py_misc_utils/mem_size.py +64 -0
  57. py_misc_utils/mirror_from.py +72 -0
  58. py_misc_utils/mmap.py +16 -0
  59. py_misc_utils/module_utils.py +196 -0
  60. py_misc_utils/moving_average.py +19 -0
  61. py_misc_utils/msgpack_streamer.py +26 -0
  62. py_misc_utils/multi_wait.py +24 -0
  63. py_misc_utils/multiprocessing.py +102 -0
  64. py_misc_utils/named_array.py +224 -0
  65. py_misc_utils/no_break.py +46 -0
  66. py_misc_utils/no_except.py +32 -0
  67. py_misc_utils/np_ml_framework.py +184 -0
  68. py_misc_utils/np_utils.py +346 -0
  69. py_misc_utils/ntuple_utils.py +38 -0
  70. py_misc_utils/num_utils.py +54 -0
  71. py_misc_utils/obj.py +73 -0
  72. py_misc_utils/object_cache.py +100 -0
  73. py_misc_utils/object_tracker.py +88 -0
  74. py_misc_utils/ordered_set.py +71 -0
  75. py_misc_utils/osfd.py +27 -0
  76. py_misc_utils/packet.py +22 -0
  77. py_misc_utils/parquet_streamer.py +69 -0
  78. py_misc_utils/pd_utils.py +254 -0
  79. py_misc_utils/periodic_task.py +61 -0
  80. py_misc_utils/pickle_wrap.py +121 -0
  81. py_misc_utils/pipeline.py +98 -0
  82. py_misc_utils/remap_pickle.py +50 -0
  83. py_misc_utils/resource_manager.py +155 -0
  84. py_misc_utils/rnd_utils.py +56 -0
  85. py_misc_utils/run_once.py +19 -0
  86. py_misc_utils/scheduler.py +135 -0
  87. py_misc_utils/select_params.py +300 -0
  88. py_misc_utils/signal.py +141 -0
  89. py_misc_utils/skl_utils.py +270 -0
  90. py_misc_utils/split.py +147 -0
  91. py_misc_utils/state.py +53 -0
  92. py_misc_utils/std_module.py +56 -0
  93. py_misc_utils/stream_dataframe.py +176 -0
  94. py_misc_utils/streamed_file.py +144 -0
  95. py_misc_utils/tempdir.py +79 -0
  96. py_misc_utils/template_replace.py +51 -0
  97. py_misc_utils/tensor_stream.py +269 -0
  98. py_misc_utils/thread_context.py +33 -0
  99. py_misc_utils/throttle.py +30 -0
  100. py_misc_utils/time_trigger.py +18 -0
  101. py_misc_utils/timegen.py +11 -0
  102. py_misc_utils/traceback.py +49 -0
  103. py_misc_utils/tracking_executor.py +91 -0
  104. py_misc_utils/transform_array.py +42 -0
  105. py_misc_utils/uncompress.py +35 -0
  106. py_misc_utils/url_fetcher.py +157 -0
  107. py_misc_utils/utils.py +538 -0
  108. py_misc_utils/varint.py +50 -0
  109. py_misc_utils/virt_array.py +52 -0
  110. py_misc_utils/weak_call.py +33 -0
  111. py_misc_utils/work_results.py +100 -0
  112. py_misc_utils/writeback_file.py +43 -0
  113. python_misc_utils-0.2.dist-info/METADATA +36 -0
  114. python_misc_utils-0.2.dist-info/RECORD +117 -0
  115. python_misc_utils-0.2.dist-info/WHEEL +5 -0
  116. python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
  117. python_misc_utils-0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,118 @@
1
+ import inspect
2
+ import logging
3
+ import os
4
+
5
+ # Note: This is a core module, which cannot have local imports!
6
+ # Using the logging module above to not add the alog module dependency here,
7
+ # since that requires more dependencies to be pulled.
8
+ # At the end, alog.log(...) is exactly the same as logging.log(...) in substance.
9
+
10
+
11
+ def _get_loc(path, lineno):
12
+ if os.path.isfile(path):
13
+ with open(path, mode='r') as fd:
14
+ lines = fd.read().splitlines()
15
+
16
+ return lines[lineno - 1] if len(lines) > lineno else None
17
+
18
+
19
+ def _get_caller_info(n_back):
20
+ frame = inspect.stack()[n_back + 1][0]
21
+ caller = inspect.getframeinfo(frame)
22
+ loc = _get_loc(caller.filename, caller.lineno)
23
+ if loc:
24
+ return f'{caller.filename}:{caller.lineno}: {loc.lstrip()}'
25
+
26
+ return f'{caller.filename}:{caller.lineno}'
27
+
28
+
29
+ def _report_fail(level, op, *args, **kwargs):
30
+ fmsg = kwargs.get('msg')
31
+ cinfo = _get_caller_info(2)
32
+ if fmsg:
33
+ cinfo = f'{cinfo}; {fmsg}'
34
+ if op:
35
+ if callable(op):
36
+ fname = getattr(op, '__name__', str(op))
37
+ arg_list = ', '.join([str(arg) for arg in args])
38
+ res = kwargs.get('res')
39
+ if res:
40
+ res_op = kwargs['res_op']
41
+ msg = f'{fname}({arg_list}) {res_op} {res} failed from {cinfo}'
42
+ else:
43
+ msg = f'{fname}({arg_list}) failed from {cinfo}'
44
+ else:
45
+ assert len(args) == 2, len(args)
46
+ msg = f'{args[0]} {op} {args[1]} failed from {cinfo}'
47
+ else:
48
+ msg = f'Check failed from {cinfo}'
49
+
50
+ logging.log(level, msg)
51
+
52
+ raise AssertionError(msg)
53
+
54
+
55
+ def check(a, level=logging.ERROR, msg=None):
56
+ if not a:
57
+ _report_fail(level, None, msg=msg)
58
+
59
+
60
+ def check_fn(fn, *args, level=logging.ERROR, msg=None):
61
+ if not fn(*args):
62
+ _report_fail(level, fn, *args, msg=msg)
63
+
64
+
65
+ def check_fnres_eq(res, fn, *args, level=logging.ERROR, msg=None):
66
+ if not (fn(*args) == res):
67
+ _report_fail(level, fn, *args, res=res, res_op='==', msg=msg)
68
+
69
+
70
+ def check_fnres_ne(res, fn, *args, level=logging.ERROR, msg=None):
71
+ if fn(*args) == res:
72
+ _report_fail(level, fn, *args, res=res, res_op='!=', msg=msg)
73
+
74
+
75
+ def check_is_none(a, level=logging.ERROR, msg=None):
76
+ if a is not None:
77
+ _report_fail(level, '==', a, 'None', msg=msg)
78
+
79
+
80
+ def check_is_not_none(a, level=logging.ERROR, msg=None):
81
+ if a is None:
82
+ _report_fail(level, '!=', a, 'None', msg=msg)
83
+
84
+
85
+ def check_in(a, b, level=logging.ERROR, msg=None):
86
+ if a not in b:
87
+ _report_fail(level, 'in', a, b, msg=msg)
88
+
89
+
90
+ def check_eq(a, b, level=logging.ERROR, msg=None):
91
+ if not (a == b):
92
+ _report_fail(level, '==', a, b, msg=msg)
93
+
94
+
95
+ def check_ne(a, b, level=logging.ERROR, msg=None):
96
+ if not (a != b):
97
+ _report_fail(level, '!=', a, b, msg=msg)
98
+
99
+
100
+ def check_le(a, b, level=logging.ERROR, msg=None):
101
+ if not (a <= b):
102
+ _report_fail(level, '<=', a, b, msg=msg)
103
+
104
+
105
+ def check_ge(a, b, level=logging.ERROR, msg=None):
106
+ if not (a >= b):
107
+ _report_fail(level, '>=', a, b, msg=msg)
108
+
109
+
110
+ def check_lt(a, b, level=logging.ERROR, msg=None):
111
+ if not (a < b):
112
+ _report_fail(level, '<', a, b, msg=msg)
113
+
114
+
115
+ def check_gt(a, b, level=logging.ERROR, msg=None):
116
+ if not (a > b):
117
+ _report_fail(level, '>', a, b, msg=msg)
118
+
@@ -0,0 +1,121 @@
1
+ import ast
2
+ import logging
3
+ import os
4
+
5
+ from . import alog
6
+
7
+
8
+ def _ends_with_return(slist):
9
+ return slist and isinstance(slist[-1], ast.Return)
10
+
11
+
12
+ def _ifize_stmt_list(slist):
13
+ ni = None
14
+ for i, node in enumerate(slist):
15
+ if isinstance(node, ast.If):
16
+ ni = i
17
+ break
18
+
19
+ if ni is not None and ni + 1 < len(slist):
20
+ ifnode = slist[ni]
21
+ orelse = ifnode.orelse or []
22
+
23
+ # If one branch of an IF ends with RETURN, and the other does not,
24
+ # move the remaining of the statements after the IF, within the branch
25
+ # which does not have the RETURN.
26
+ # Example (open 'else'), turns:
27
+ #
28
+ # op1
29
+ # if cond:
30
+ # op2
31
+ # return x
32
+ # else:
33
+ # op3
34
+ # op4
35
+ # return y
36
+ #
37
+ # Into:
38
+ # op1
39
+ # if cond:
40
+ # op2
41
+ # return x
42
+ # else:
43
+ # op3
44
+ # op4
45
+ # return y
46
+ #
47
+ # Example (open 'if'), turns:
48
+ #
49
+ # op1
50
+ # if cond:
51
+ # op2
52
+ # else:
53
+ # op3
54
+ # return x
55
+ # op4
56
+ # return y
57
+ #
58
+ # Into:
59
+ # op1
60
+ # if cond:
61
+ # op2
62
+ # op4
63
+ # return y
64
+ # else:
65
+ # op3
66
+ # return x
67
+ #
68
+ # IOW, it makes sure that if within an AST instruction list, the first IF (if any),
69
+ # will be the last instruction of the list.
70
+ if _ends_with_return(ifnode.body):
71
+ if not _ends_with_return(orelse):
72
+ remlist = _ifize_stmt_list(slist[ni + 1:])
73
+ orelse.extend(remlist)
74
+ ifnode.orelse = orelse
75
+ slist = slist[: ni + 1]
76
+
77
+ elif _ends_with_return(orelse):
78
+ remlist = _ifize_stmt_list(slist[ni + 1:])
79
+ ifnode.body.extend(remlist)
80
+ slist = slist[: ni + 1]
81
+
82
+ return slist
83
+
84
+
85
+ def ifize(node):
86
+ for field, value in ast.iter_fields(node):
87
+ if isinstance(value, list):
88
+ for lvalue in value:
89
+ if isinstance(lvalue, ast.AST):
90
+ ifize(lvalue)
91
+
92
+ xlist = _ifize_stmt_list(value)
93
+ if xlist is not value:
94
+ setattr(node, field, xlist)
95
+ elif isinstance(value, ast.AST):
96
+ ifize(value)
97
+
98
+
99
+ def dump(node, indent=None):
100
+ if indent is None:
101
+ indent = os.getenv('AST_INDENT')
102
+ if indent is not None:
103
+ indent = int(indent)
104
+
105
+ return ast.dump(node, indent=indent)
106
+
107
+
108
+ def static_eval(node, eval_globals, eval_locals, filename=None):
109
+ if isinstance(node, ast.stmt):
110
+ mod = ast.Module(body=[node], type_ignores=[])
111
+ cmod = compile(mod, filename=filename or '<ast_eval>', mode='exec')
112
+ exec(cmod, eval_globals, eval_locals)
113
+ elif isinstance(node, ast.expr):
114
+ expr = ast.Expression(body=node)
115
+ cexpr = compile(expr, filename=filename or '<ast_eval>', mode='eval')
116
+ value = eval(cexpr, eval_globals, eval_locals)
117
+
118
+ return value
119
+ else:
120
+ alog.xraise(ValueError, f'Invalid AST node: {dump(node)}')
121
+
@@ -0,0 +1,189 @@
1
+ import asyncio
2
+ import collections
3
+ import multiprocessing
4
+ import os
5
+ import queue
6
+ import threading
7
+
8
+ import numpy as np
9
+
10
+ from . import cleanups
11
+ from . import global_namespace as gns
12
+ from . import multiprocessing as mp
13
+ from . import work_results as wres
14
+
15
+
16
+ class AsyncContext:
17
+
18
+ def __init__(self):
19
+ self._contexts = dict()
20
+
21
+ async def add(self, name, context):
22
+ result = await context.__aenter__()
23
+ self._contexts[name] = (context, result)
24
+
25
+ return result
26
+
27
+ async def remove(self, name):
28
+ context, _ = self._contexts.pop(name)
29
+ await context.__aexit__(None, None, None)
30
+
31
+ async def get(self, name, context_ctor):
32
+ context_result = self._contexts.get(name)
33
+ if context_result is None:
34
+ result = await self.add(name, context_ctor())
35
+ else:
36
+ result = context_result[1]
37
+
38
+ return result
39
+
40
+ async def close(self, *exc):
41
+ rexc = exc or (None, None, None)
42
+ needs_raise = False
43
+ for context, _ in self._contexts.values():
44
+ exitres = await context.__aexit__(*rexc)
45
+ needs_raise = needs_raise or not exitres
46
+
47
+ self._contexts = dict()
48
+ if needs_raise and rexc[1] is not None:
49
+ raise rexc[1]
50
+
51
+ async def __aenter__(self):
52
+ return self
53
+
54
+ async def __aexit__(self, *exc):
55
+ await self.close(*exc)
56
+
57
+ return False
58
+
59
+
60
+ Work = collections.namedtuple('Work', 'id, ctor')
61
+
62
+ class _Worker:
63
+
64
+ def __init__(self, mpctx, wid, out_queue):
65
+ self._wid = wid
66
+ self._out_queue = out_queue
67
+ self._in_queue = mpctx.Queue()
68
+ self._proc = mp.create_process(self._run, context=mpctx)
69
+ self._proc.start()
70
+
71
+ def _run(self):
72
+ loop = asyncio.new_event_loop()
73
+ asyncio.set_event_loop(loop)
74
+
75
+ thread = threading.Thread(target=self._work_feeder, args=(loop,), daemon=True)
76
+ thread.start()
77
+
78
+ loop.run_forever()
79
+ thread.join()
80
+
81
+ async def _task_runner(self, context, work):
82
+ try:
83
+ task = work.ctor(context=context)
84
+
85
+ result = await task
86
+ except Exception as ex:
87
+ result = wres.WorkException(ex, workid=work.id)
88
+
89
+ self._out_queue.put((self._wid, work.id, result))
90
+
91
+ def _work_feeder(self, loop):
92
+ context = AsyncContext()
93
+
94
+ while True:
95
+ work = self._in_queue.get()
96
+ if work is None:
97
+ break
98
+
99
+ asyncio.run_coroutine_threadsafe(self._task_runner(context, work), loop)
100
+
101
+ asyncio.run_coroutine_threadsafe(self._shutdown(context, loop), loop)
102
+
103
+ @classmethod
104
+ async def _shutdown(cls, context, loop):
105
+ await context.close()
106
+ loop.stop()
107
+
108
+ def stop(self):
109
+ self._in_queue.put(None)
110
+ self._proc.join()
111
+
112
+ def enqueue_work(self, work_id, work_ctor):
113
+ self._in_queue.put(Work(id=work_id, ctor=work_ctor))
114
+
115
+
116
+ class AsyncManager:
117
+
118
+ def __init__(self, num_workers=None, mpctx=multiprocessing):
119
+ num_workers = num_workers or os.cpu_count()
120
+
121
+ self._out_queue = mpctx.Queue()
122
+ self._workers = [_Worker(mpctx, i, self._out_queue) for i in range(num_workers)]
123
+ self._lock = threading.Lock()
124
+ self._queued = np.zeros(num_workers, dtype=np.int64)
125
+
126
+ def close(self):
127
+ for worker in self._workers:
128
+ worker.stop()
129
+
130
+ def enqueue_work(self, work_id, work_ctor):
131
+ with self._lock:
132
+ wid = np.argmin(self._queued)
133
+ self._queued[wid] += 1
134
+ worker = self._workers[wid]
135
+
136
+ worker.enqueue_work(work_id, work_ctor)
137
+
138
+ def fetch_result(self, block=True, timeout=None):
139
+ try:
140
+ wid, work_id, result = self._out_queue.get(block=block, timeout=timeout)
141
+
142
+ with self._lock:
143
+ self._queued[wid] -= 1
144
+
145
+ return work_id, result
146
+ except queue.Empty:
147
+ pass
148
+
149
+ def __enter__(self):
150
+ return self
151
+
152
+ def __exit__(self, *exc):
153
+ self.close()
154
+
155
+ return False
156
+
157
+
158
+ class AsyncRunner:
159
+
160
+ def __init__(self):
161
+ self._loop = asyncio.new_event_loop()
162
+ self._thread = threading.Thread(target=self._async_runner, daemon=True)
163
+ self._thread.start()
164
+ self._cid = cleanups.register(self.stop)
165
+
166
+ def _async_runner(self):
167
+ asyncio.set_event_loop(self._loop)
168
+ self._loop.run_forever()
169
+
170
+ def stop(self):
171
+ self._loop.call_soon_threadsafe(self._loop.stop)
172
+ self._thread.join()
173
+
174
+ def run(self, coro):
175
+ return asyncio.run_coroutine_threadsafe(coro, self._loop)
176
+
177
+
178
+
179
+ _ASYNC_RUNNER = gns.Var(f'{__name__}.ASYNC_RUNNER',
180
+ fork_init=True,
181
+ defval=lambda: AsyncRunner())
182
+
183
+ def _async_runner():
184
+ return gns.get(_ASYNC_RUNNER)
185
+
186
+
187
+ def run_async(coro):
188
+ return _async_runner().run(coro)
189
+
@@ -0,0 +1,63 @@
1
+ import signal
2
+ import threading
3
+
4
+ from . import fin_wrap as fw
5
+ from . import signal as sgn
6
+
7
+
8
+ _LOCK = threading.Lock()
9
+ _HANDLERS = set()
10
+
11
+
12
+ def _handler(sig, frame):
13
+ with _LOCK:
14
+ for h in _HANDLERS:
15
+ h.trigger(frame)
16
+
17
+ return sgn.HANDLED
18
+
19
+
20
+ class BreakControl:
21
+
22
+ def __init__(self):
23
+ self._hit = False
24
+ self._frame = None
25
+
26
+ def open(self):
27
+ with _LOCK:
28
+ if not _HANDLERS:
29
+ sgn.signal(signal.SIGINT, _handler)
30
+ _HANDLERS.add(self)
31
+
32
+ return self
33
+
34
+ def close(self):
35
+ with _LOCK:
36
+ _HANDLERS.remove(self)
37
+ if not _HANDLERS:
38
+ sgn.unsignal(signal.SIGINT, _handler)
39
+
40
+ def __enter__(self):
41
+ return self.open()
42
+
43
+ def __exit__(self, *exc):
44
+ self.close()
45
+
46
+ return False
47
+
48
+ def trigger(self, frame):
49
+ self._hit = True
50
+ self._frame = frame
51
+
52
+ def hit(self):
53
+ return self._hit
54
+
55
+ def frame(self):
56
+ return self._frame
57
+
58
+
59
+ def create():
60
+ bc = BreakControl()
61
+
62
+ return fw.fin_wrap_np(bc.open(), bc.close)
63
+
@@ -0,0 +1,35 @@
1
+ import collections
2
+
3
+
4
+ IterData = collections.namedtuple('IterData', 'n, left, data')
5
+
6
+
7
+ class BufferedIterator:
8
+
9
+ def __init__(self, data, buffer_size):
10
+ self._data = data
11
+ self._buffer_size = buffer_size
12
+
13
+ def generate(self):
14
+ queue, n = [], 0
15
+ for data in self._data:
16
+ if len(queue) < self._buffer_size:
17
+ queue.append(data)
18
+ else:
19
+ cidx = n % len(queue)
20
+ cdata = queue[cidx]
21
+ queue[cidx] = data
22
+ yield IterData(n=n, left=len(queue), data=cdata)
23
+
24
+ n += 1
25
+
26
+ for i in range(len(queue)):
27
+ cidx = n % len(queue)
28
+ cdata = queue[cidx]
29
+ yield IterData(n=n, left=len(queue) - i - 1, data=cdata)
30
+ n += 1
31
+
32
+ def __iter__(self):
33
+ return self.generate()
34
+
35
+