python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. py_misc_utils/__init__.py +0 -0
  2. py_misc_utils/abs_timeout.py +12 -0
  3. py_misc_utils/alog.py +311 -0
  4. py_misc_utils/app_main.py +179 -0
  5. py_misc_utils/archive_streamer.py +112 -0
  6. py_misc_utils/assert_checks.py +118 -0
  7. py_misc_utils/ast_utils.py +121 -0
  8. py_misc_utils/async_manager.py +189 -0
  9. py_misc_utils/break_control.py +63 -0
  10. py_misc_utils/buffered_iterator.py +35 -0
  11. py_misc_utils/cached_file.py +507 -0
  12. py_misc_utils/call_limiter.py +26 -0
  13. py_misc_utils/call_result_selector.py +13 -0
  14. py_misc_utils/cleanups.py +85 -0
  15. py_misc_utils/cmd.py +97 -0
  16. py_misc_utils/compression.py +116 -0
  17. py_misc_utils/cond_waiter.py +13 -0
  18. py_misc_utils/context_base.py +18 -0
  19. py_misc_utils/context_managers.py +67 -0
  20. py_misc_utils/core_utils.py +577 -0
  21. py_misc_utils/daemon_process.py +252 -0
  22. py_misc_utils/data_cache.py +46 -0
  23. py_misc_utils/date_utils.py +90 -0
  24. py_misc_utils/debug.py +24 -0
  25. py_misc_utils/dyn_modules.py +50 -0
  26. py_misc_utils/dynamod.py +103 -0
  27. py_misc_utils/env_config.py +35 -0
  28. py_misc_utils/executor.py +239 -0
  29. py_misc_utils/file_overwrite.py +29 -0
  30. py_misc_utils/fin_wrap.py +77 -0
  31. py_misc_utils/fp_utils.py +47 -0
  32. py_misc_utils/fs/__init__.py +0 -0
  33. py_misc_utils/fs/file_fs.py +127 -0
  34. py_misc_utils/fs/ftp_fs.py +242 -0
  35. py_misc_utils/fs/gcs_fs.py +196 -0
  36. py_misc_utils/fs/http_fs.py +241 -0
  37. py_misc_utils/fs/s3_fs.py +417 -0
  38. py_misc_utils/fs_base.py +133 -0
  39. py_misc_utils/fs_utils.py +207 -0
  40. py_misc_utils/gcs_fs.py +169 -0
  41. py_misc_utils/gen_indices.py +54 -0
  42. py_misc_utils/gfs.py +371 -0
  43. py_misc_utils/git_repo.py +77 -0
  44. py_misc_utils/global_namespace.py +110 -0
  45. py_misc_utils/http_async_fetcher.py +139 -0
  46. py_misc_utils/http_server.py +196 -0
  47. py_misc_utils/http_utils.py +143 -0
  48. py_misc_utils/img_utils.py +20 -0
  49. py_misc_utils/infix_op.py +20 -0
  50. py_misc_utils/inspect_utils.py +205 -0
  51. py_misc_utils/iostream.py +21 -0
  52. py_misc_utils/iter_file.py +117 -0
  53. py_misc_utils/key_wrap.py +46 -0
  54. py_misc_utils/lazy_import.py +25 -0
  55. py_misc_utils/lockfile.py +164 -0
  56. py_misc_utils/mem_size.py +64 -0
  57. py_misc_utils/mirror_from.py +72 -0
  58. py_misc_utils/mmap.py +16 -0
  59. py_misc_utils/module_utils.py +196 -0
  60. py_misc_utils/moving_average.py +19 -0
  61. py_misc_utils/msgpack_streamer.py +26 -0
  62. py_misc_utils/multi_wait.py +24 -0
  63. py_misc_utils/multiprocessing.py +102 -0
  64. py_misc_utils/named_array.py +224 -0
  65. py_misc_utils/no_break.py +46 -0
  66. py_misc_utils/no_except.py +32 -0
  67. py_misc_utils/np_ml_framework.py +184 -0
  68. py_misc_utils/np_utils.py +346 -0
  69. py_misc_utils/ntuple_utils.py +38 -0
  70. py_misc_utils/num_utils.py +54 -0
  71. py_misc_utils/obj.py +73 -0
  72. py_misc_utils/object_cache.py +100 -0
  73. py_misc_utils/object_tracker.py +88 -0
  74. py_misc_utils/ordered_set.py +71 -0
  75. py_misc_utils/osfd.py +27 -0
  76. py_misc_utils/packet.py +22 -0
  77. py_misc_utils/parquet_streamer.py +69 -0
  78. py_misc_utils/pd_utils.py +254 -0
  79. py_misc_utils/periodic_task.py +61 -0
  80. py_misc_utils/pickle_wrap.py +121 -0
  81. py_misc_utils/pipeline.py +98 -0
  82. py_misc_utils/remap_pickle.py +50 -0
  83. py_misc_utils/resource_manager.py +155 -0
  84. py_misc_utils/rnd_utils.py +56 -0
  85. py_misc_utils/run_once.py +19 -0
  86. py_misc_utils/scheduler.py +135 -0
  87. py_misc_utils/select_params.py +300 -0
  88. py_misc_utils/signal.py +141 -0
  89. py_misc_utils/skl_utils.py +270 -0
  90. py_misc_utils/split.py +147 -0
  91. py_misc_utils/state.py +53 -0
  92. py_misc_utils/std_module.py +56 -0
  93. py_misc_utils/stream_dataframe.py +176 -0
  94. py_misc_utils/streamed_file.py +144 -0
  95. py_misc_utils/tempdir.py +79 -0
  96. py_misc_utils/template_replace.py +51 -0
  97. py_misc_utils/tensor_stream.py +269 -0
  98. py_misc_utils/thread_context.py +33 -0
  99. py_misc_utils/throttle.py +30 -0
  100. py_misc_utils/time_trigger.py +18 -0
  101. py_misc_utils/timegen.py +11 -0
  102. py_misc_utils/traceback.py +49 -0
  103. py_misc_utils/tracking_executor.py +91 -0
  104. py_misc_utils/transform_array.py +42 -0
  105. py_misc_utils/uncompress.py +35 -0
  106. py_misc_utils/url_fetcher.py +157 -0
  107. py_misc_utils/utils.py +538 -0
  108. py_misc_utils/varint.py +50 -0
  109. py_misc_utils/virt_array.py +52 -0
  110. py_misc_utils/weak_call.py +33 -0
  111. py_misc_utils/work_results.py +100 -0
  112. py_misc_utils/writeback_file.py +43 -0
  113. python_misc_utils-0.2.dist-info/METADATA +36 -0
  114. python_misc_utils-0.2.dist-info/RECORD +117 -0
  115. python_misc_utils-0.2.dist-info/WHEEL +5 -0
  116. python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
  117. python_misc_utils-0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,98 @@
1
+ from . import assert_checks as tas
2
+ from . import core_utils as cu
3
+
4
+
5
+ class Pipeline:
6
+
7
+ def __init__(self, *elems):
8
+ self._elems = list(elems)
9
+
10
+ def __len__(self):
11
+ return len(self._elems)
12
+
13
+ def __getitem__(self, i):
14
+ return self._elems[i]
15
+
16
+ def __iter__(self):
17
+ return iter(self._elems)
18
+
19
+ def append(self, elem):
20
+ tas.check(callable(elem), msg=f'Pipeline elements must be callable: {type(elem)}')
21
+ self._elems.append(elem)
22
+
23
+ return len(self._elems) - 1
24
+
25
+ def extend(self, elems):
26
+ for elem in elems:
27
+ self.append(elem)
28
+
29
+ def pop(self, i=None):
30
+ return self._elems.pop(i) if i is not None else self._elems.pop()
31
+
32
+ def elems(self):
33
+ return tuple(self._elems)
34
+
35
+ def _apply(self, elem, data):
36
+ if cu.is_iterator(data):
37
+ if isinstance(elem, IterElement):
38
+ return elem(data)
39
+ else:
40
+ return _iter_process(elem, data)
41
+ elif isinstance(elem, IterElement):
42
+ return elem([data])
43
+ else:
44
+ return elem(data)
45
+
46
+ def __call__(self, x):
47
+ y = x
48
+ for elem in self._elems:
49
+ y = self._apply(elem, y)
50
+
51
+ return y
52
+
53
+ def __repr__(self):
54
+ return '\n'.join(f'[{i}] {repr(elem)}' for i, elem in enumerate(self._elems))
55
+
56
+ def clone(self):
57
+ # If a Pipeline elements has a state, it must implement the clone() API.
58
+ elems = [cu.maybe_call_dv(elem, 'clone', elem) for elem in self._elems]
59
+
60
+ return Pipeline(*elems)
61
+
62
+ def flush(self):
63
+ y = None
64
+ for elem in self._elems:
65
+ flush_fn = getattr(elem, 'flush', None)
66
+ if callable(flush_fn):
67
+ y = flush_fn(y or ())
68
+ elif y is not None:
69
+ y = self._apply(elem, y)
70
+
71
+ return y
72
+
73
+
74
+ # Exception thrown by members of iterative pipelines when they want to stop the
75
+ # stream of data, signaling that nothing more will be allowed through it.
76
+ # Returning an empty iterator/generator will not cut it, as this is a legal
77
+ # return value in case of batching elements.
78
+ # Think about a pipeline element which is a filter which should pass through the
79
+ # first N samples, for example.
80
+ class HaltedPipeline(Exception):
81
+ pass
82
+
83
+
84
+ # The Pipeline can also be used with data which is returned as iterators, where
85
+ # there is not a 1:1 mapping between input and output.
86
+ # Think about a pipeline element which absorbs data, and return batches of it (or
87
+ # absorbs text and emits token indices).
88
+ # The non-iterator approach would not work, as for many inputs there are no ouputs
89
+ # at all (till the batch size is reached).
90
+ # When used in such fashion, pipeline elements whould inherit from IterElement.
91
+ class IterElement:
92
+ pass
93
+
94
+
95
+ def _iter_process(elem, data):
96
+ for x in data:
97
+ yield elem(x)
98
+
@@ -0,0 +1,50 @@
1
+ import functools
2
+ import io
3
+ import pickle
4
+
5
+ from . import alog
6
+ from . import module_utils as mu
7
+
8
+
9
+ class Unpickler(pickle.Unpickler):
10
+
11
+ def __init__(self, *args,
12
+ remaps=None,
13
+ safe_refs=None,
14
+ **kwargs):
15
+ super().__init__(*args, **kwargs)
16
+ self._remaps = remaps or dict()
17
+ self._safe_refs = set(safe_refs) if safe_refs is not None else None
18
+
19
+ def find_class(self, module, name):
20
+ fqname = f'{module}.{name}'
21
+ remap = self._remaps.get(fqname, fqname)
22
+ if remap != fqname:
23
+ alog.debug(f'Unpickle remapping: {fqname} -> {remap}')
24
+ elif self._safe_refs is not None and fqname not in self._safe_refs:
25
+ alog.xraise(RuntimeError, f'Unsafe reference: {fqname}')
26
+
27
+ return mu.import_module_names(remap)[0]
28
+
29
+
30
+ def load(*args, **kwargs):
31
+ unpickler = Unpickler(*args, **kwargs)
32
+
33
+ return unpickler.load()
34
+
35
+
36
+ def loads(data, *args, **kwargs):
37
+ memfd = io.BytesIO(data)
38
+ unpickler = Unpickler(memfd, *args, **kwargs)
39
+
40
+ return unpickler.load()
41
+
42
+
43
+ def make_module(**kwargs):
44
+ module = mu.clone_module('pickle')
45
+
46
+ module.load = functools.partial(load, **kwargs)
47
+ module.loads = functools.partial(loads, **kwargs)
48
+
49
+ return module
50
+
@@ -0,0 +1,155 @@
1
+ import collections
2
+ import functools
3
+ import logging
4
+ import multiprocessing
5
+ import multiprocessing.managers as mpmgr
6
+ import os
7
+ import queue
8
+ import tempfile
9
+ import threading
10
+ import time
11
+ import weakref
12
+
13
+ from . import alog
14
+ from . import daemon_process as dp
15
+
16
+
17
+ class ResourceManager:
18
+
19
+ def __init__(self):
20
+ self._lock = threading.Lock()
21
+ self._resources = collections.defaultdict(dict)
22
+
23
+ def get(self, cls, ctor, name, *args, **kwargs):
24
+ alog.debug(f'Get resource {cls}.{name}')
25
+ with self._lock:
26
+ cdict = self._resources[cls]
27
+ res = cdict.get(name)
28
+ if res is not None:
29
+ res = res()
30
+ if res is None:
31
+ alog.debug(f'Creating resource {cls}.{name}')
32
+ res = ctor(*args, **kwargs)
33
+ cdict[name] = weakref.ref(res)
34
+
35
+ return res
36
+
37
+ def delete(self, cls, name):
38
+ alog.debug(f'Remove resource {cls}.{name}')
39
+ with self._lock:
40
+ cdict = self._resources[cls]
41
+ cdict.pop(name, None)
42
+
43
+
44
+ _RESMGR = ResourceManager()
45
+
46
+ def get_resource_manager():
47
+ return _RESMGR
48
+
49
+
50
+ def _create_manager(*args, register_fn=None, **kwargs):
51
+ # https://github.com/python/cpython/blob/2f56c68dec97002fdd8563a0e4977b75eb191ab9/Lib/multiprocessing/managers.py#L1043
52
+ # https://github.com/python/cpython/blob/4cba0e66c29b46afbb1eee1d0428f5a2f5b891bb/Lib/multiprocessing/managers.py#L189
53
+ manager = mpmgr.SyncManager(*args, **kwargs)
54
+
55
+ resmgr = get_resource_manager()
56
+
57
+ manager.register('get_lock',
58
+ functools.partial(resmgr.get, 'LOCKS', threading.Lock),
59
+ mpmgr.AcquirerProxy)
60
+ manager.register('rm_lock',
61
+ functools.partial(resmgr.delete, 'LOCKS'))
62
+
63
+ manager.register('get_event',
64
+ functools.partial(resmgr.get, 'EVENTS', threading.Event),
65
+ mpmgr.EventProxy)
66
+ manager.register('rm_event',
67
+ functools.partial(resmgr.delete, 'EVENTS'))
68
+
69
+ manager.register('get_condition',
70
+ functools.partial(resmgr.get, 'CONDITIONS', threading.Condition),
71
+ mpmgr.ConditionProxy)
72
+ manager.register('rm_condition',
73
+ functools.partial(resmgr.delete, 'CONDITIONS'))
74
+
75
+ manager.register('get_barrier',
76
+ functools.partial(resmgr.get, 'BARRIERS', threading.Barrier),
77
+ mpmgr.BarrierProxy)
78
+ manager.register('rm_barrier',
79
+ functools.partial(resmgr.delete, 'BARRIERS'))
80
+
81
+ manager.register('get_queue',
82
+ functools.partial(resmgr.get, 'QUEUES', queue.Queue))
83
+ manager.register('rm_queue',
84
+ functools.partial(resmgr.delete, 'QUEUES'))
85
+
86
+ manager.register('get_lifo',
87
+ functools.partial(resmgr.get, 'LIFOS', queue.LifoQueue))
88
+ manager.register('rm_lifo',
89
+ functools.partial(resmgr.delete, 'LIFOS'))
90
+
91
+ manager.register('get_namespace',
92
+ functools.partial(resmgr.get, 'NAMESPACES', mpmgr.Namespace),
93
+ mpmgr.NamespaceProxy)
94
+ manager.register('rm_namespace',
95
+ functools.partial(resmgr.delete, 'NAMESPACES'))
96
+
97
+ if register_fn is not None:
98
+ register_fn(manager, resmgr)
99
+
100
+ return manager
101
+
102
+
103
+ def _get_logdir():
104
+ logdir = os.path.join(tempfile.gettempdir(), 'log')
105
+ os.makedirs(logdir, exist_ok=True)
106
+
107
+ return logdir
108
+
109
+
110
+ def _server_runner(name, *args, **kwargs):
111
+ alog.basic_setup(log_level=os.getenv('RESMGR_LOG_LEVEL', 'INFO'),
112
+ log_file=os.path.join(_get_logdir(), f'{name}.log'))
113
+
114
+ alog.info(f'[{name}] server starting')
115
+ manager = _create_manager(*args, **kwargs)
116
+
117
+ try:
118
+ server = manager.get_server()
119
+ server.serve_forever()
120
+ except Exception as ex:
121
+ alog.error(f'[{name}] server start failed: {ex}')
122
+ finally:
123
+ alog.info(f'[{name}] server gone!')
124
+
125
+
126
+ def get_manager(name, *args, **kwargs):
127
+ daemon = dp.Daemon(name)
128
+ while daemon.getpid() is None:
129
+ alog.info(f'[{name}] Starting server daemon')
130
+ try:
131
+ daemon.start(functools.partial(_server_runner, name, *args, **kwargs))
132
+ except FileExistsError as ex:
133
+ pass
134
+ time.sleep(0.5)
135
+
136
+ alog.info(f'[{name}] Connecting to server')
137
+
138
+ manager = _create_manager(*args, **kwargs)
139
+ while True:
140
+ try:
141
+ manager.connect()
142
+ alog.info(f'[{name}] Connected to the manager')
143
+ break
144
+ except Exception as ex:
145
+ alog.debug(f'[{name}] Connection failed, retrying ...: {ex}')
146
+ time.sleep(0.5)
147
+
148
+ return manager
149
+
150
+
151
+ def stop_manager(name):
152
+ daemon = dp.Daemon(name)
153
+
154
+ return daemon.stop()
155
+
@@ -0,0 +1,56 @@
1
+ import binascii
2
+ import random
3
+ import string
4
+ import struct
5
+
6
+ import numpy as np
7
+
8
+ from . import assert_checks as tas
9
+
10
+
11
+ def compute_seed(seed):
12
+ if isinstance(seed, int):
13
+ seed = binascii.crc32(struct.pack('=q', seed))
14
+ elif isinstance(seed, float):
15
+ seed = binascii.crc32(struct.pack('=d', seed))
16
+ elif isinstance(seed, bytes):
17
+ seed = binascii.crc32(seed)
18
+ elif isinstance(seed, str):
19
+ seed = binascii.crc32(seed.encode())
20
+ else:
21
+ seed = binascii.crc32(struct.pack('=Q', hash(seed)))
22
+
23
+ return seed
24
+
25
+
26
+ def manual_seed(seed):
27
+ cseed = compute_seed(seed)
28
+
29
+ np.random.seed(cseed)
30
+ random.seed(cseed)
31
+
32
+ return cseed
33
+
34
+
35
+ def choices(weights, n):
36
+ return random.choices(range(len(weights)), weights=weights, k=n)
37
+
38
+
39
+ def shuffle(args):
40
+ return random.sample(args, k=len(args))
41
+
42
+
43
+ def uniform(center, delta=None, pct=None):
44
+ if pct is not None:
45
+ delta = abs(center * pct)
46
+
47
+ tas.check_is_not_none(delta, msg=f'Either delta or pct must be provided')
48
+
49
+ return random.uniform(center - delta, center + delta)
50
+
51
+
52
+ def rand_string(n):
53
+ rng = random.SystemRandom()
54
+
55
+ return ''.join(rng.choices(string.ascii_lowercase + string.digits, k=n))
56
+
@@ -0,0 +1,19 @@
1
+ import functools
2
+ import threading
3
+
4
+
5
+ def run_once(fn):
6
+
7
+ @functools.wraps(fn)
8
+ def wrapper(*args, **kwargs):
9
+ with wrapper._lock:
10
+ if not wrapper._has_run:
11
+ wrapper._has_run = True
12
+
13
+ return fn(*args, **kwargs)
14
+
15
+ wrapper._lock = threading.Lock()
16
+ wrapper._has_run = False
17
+
18
+ return wrapper
19
+
@@ -0,0 +1,135 @@
1
+ import collections
2
+ import heapq
3
+ import os
4
+ import threading
5
+ import time
6
+ import uuid
7
+
8
+ from . import alog
9
+ from . import executor as xe
10
+ from . import fin_wrap as fw
11
+ from . import timegen as tg
12
+ from . import utils as ut
13
+
14
+
15
+ Event = collections.namedtuple(
16
+ 'Event',
17
+ 'time, sequence, ref, action, argument, kwargs')
18
+
19
+
20
+ class Scheduler:
21
+
22
+ def __init__(self, timegen=None, executor=None, max_workers=None, name='Scheduler'):
23
+ self._queue = []
24
+ self._sequence = 0
25
+ self._lock = threading.Lock()
26
+ self._cond = threading.Condition(lock=self._lock)
27
+ self.timegen = tg.TimeGen() if timegen is None else timegen
28
+
29
+ if executor is not None:
30
+ self.executor = executor
31
+ else:
32
+ executor = xe.Executor(max_threads=max_workers, name_prefix=name)
33
+ fw.fin_wrap(self, 'executor', executor, finfn=executor.shutdown)
34
+
35
+ self._runner = threading.Thread(target=self._run, daemon=True)
36
+ self._runner.start()
37
+
38
+ def _run_event(self, event):
39
+ try:
40
+ event.action(*event.argument, **event.kwargs)
41
+ except Exception as ex:
42
+ alog.exception(ex, exmsg=f'Exception while running scheduled action')
43
+
44
+ def _run(self):
45
+ while True:
46
+ now, event = self.timegen.now(), None
47
+ with self._lock:
48
+ timeout = (self._queue[0].time - now) if self._queue else None
49
+ if timeout is None or timeout > 0:
50
+ self.timegen.wait(self._cond, timeout=timeout)
51
+ else:
52
+ event = heapq.heappop(self._queue)
53
+
54
+ if event is not None:
55
+ self.executor.submit(self._run_event, event)
56
+
57
+ def gen_unique_ref(self):
58
+ return str(uuid.uuid4())
59
+
60
+ def enterabs(self, ts, action, ref=None, argument=(), kwargs={}):
61
+ with self._lock:
62
+ event = Event(time=ts,
63
+ sequence=self._sequence,
64
+ ref=ref,
65
+ action=action,
66
+ argument=argument,
67
+ kwargs=kwargs)
68
+ self._sequence += 1
69
+
70
+ heapq.heappush(self._queue, event)
71
+ if id(event) == id(self._queue[0]):
72
+ self._cond.notify()
73
+
74
+ return event
75
+
76
+ def enter(self, delay, action, ref=None, argument=(), kwargs={}):
77
+ return self.enterabs(self.timegen.now() + delay, action,
78
+ ref=ref,
79
+ argument=argument,
80
+ kwargs=kwargs)
81
+
82
+ def _cancel_fn(self, fn):
83
+ events = []
84
+ with self._lock:
85
+ pos = []
86
+ for i, qe in enumerate(self._queue):
87
+ if fn(qe):
88
+ events.append(qe)
89
+ pos.append(i)
90
+ if pos:
91
+ # Positions are added in asceending order above, here we pop them in
92
+ # descending order to avoid invalidating positions.
93
+ for i in range(len(pos) - 1, -1, -1):
94
+ self._queue.pop(pos[i])
95
+
96
+ heapq.heapify(self._queue)
97
+
98
+ return events
99
+
100
+ def cancel(self, event):
101
+ if isinstance(event, (list, tuple)):
102
+ ids = set([id(e) for e in event])
103
+ return self._cancel_fn(lambda qe: id(qe) in ids)
104
+
105
+ return self._cancel_fn(lambda qe: id(qe) == id(event))
106
+
107
+ def ref_cancel(self, ref):
108
+ if isinstance(ref, (list, tuple)):
109
+ refs = set(ref)
110
+ return self._cancel_fn(lambda qe: qe.ref in refs)
111
+
112
+ return self._cancel_fn(lambda qe: qe.ref == ref)
113
+
114
+ def get_events(self, fn):
115
+ events = []
116
+ with self._lock:
117
+ for qe in self._queue:
118
+ if fn(qe):
119
+ events.append(qe)
120
+
121
+ return events
122
+
123
+
124
+ _LOCK = threading.Lock()
125
+ _SCHEDULER = None
126
+
127
+ def common_scheduler():
128
+ global _SCHEDULER
129
+
130
+ with _LOCK:
131
+ if _SCHEDULER is None:
132
+ _SCHEDULER = Scheduler(executor=xe.common_executor())
133
+
134
+ return _SCHEDULER
135
+