python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. py_misc_utils/__init__.py +0 -0
  2. py_misc_utils/abs_timeout.py +12 -0
  3. py_misc_utils/alog.py +311 -0
  4. py_misc_utils/app_main.py +179 -0
  5. py_misc_utils/archive_streamer.py +112 -0
  6. py_misc_utils/assert_checks.py +118 -0
  7. py_misc_utils/ast_utils.py +121 -0
  8. py_misc_utils/async_manager.py +189 -0
  9. py_misc_utils/break_control.py +63 -0
  10. py_misc_utils/buffered_iterator.py +35 -0
  11. py_misc_utils/cached_file.py +507 -0
  12. py_misc_utils/call_limiter.py +26 -0
  13. py_misc_utils/call_result_selector.py +13 -0
  14. py_misc_utils/cleanups.py +85 -0
  15. py_misc_utils/cmd.py +97 -0
  16. py_misc_utils/compression.py +116 -0
  17. py_misc_utils/cond_waiter.py +13 -0
  18. py_misc_utils/context_base.py +18 -0
  19. py_misc_utils/context_managers.py +67 -0
  20. py_misc_utils/core_utils.py +577 -0
  21. py_misc_utils/daemon_process.py +252 -0
  22. py_misc_utils/data_cache.py +46 -0
  23. py_misc_utils/date_utils.py +90 -0
  24. py_misc_utils/debug.py +24 -0
  25. py_misc_utils/dyn_modules.py +50 -0
  26. py_misc_utils/dynamod.py +103 -0
  27. py_misc_utils/env_config.py +35 -0
  28. py_misc_utils/executor.py +239 -0
  29. py_misc_utils/file_overwrite.py +29 -0
  30. py_misc_utils/fin_wrap.py +77 -0
  31. py_misc_utils/fp_utils.py +47 -0
  32. py_misc_utils/fs/__init__.py +0 -0
  33. py_misc_utils/fs/file_fs.py +127 -0
  34. py_misc_utils/fs/ftp_fs.py +242 -0
  35. py_misc_utils/fs/gcs_fs.py +196 -0
  36. py_misc_utils/fs/http_fs.py +241 -0
  37. py_misc_utils/fs/s3_fs.py +417 -0
  38. py_misc_utils/fs_base.py +133 -0
  39. py_misc_utils/fs_utils.py +207 -0
  40. py_misc_utils/gcs_fs.py +169 -0
  41. py_misc_utils/gen_indices.py +54 -0
  42. py_misc_utils/gfs.py +371 -0
  43. py_misc_utils/git_repo.py +77 -0
  44. py_misc_utils/global_namespace.py +110 -0
  45. py_misc_utils/http_async_fetcher.py +139 -0
  46. py_misc_utils/http_server.py +196 -0
  47. py_misc_utils/http_utils.py +143 -0
  48. py_misc_utils/img_utils.py +20 -0
  49. py_misc_utils/infix_op.py +20 -0
  50. py_misc_utils/inspect_utils.py +205 -0
  51. py_misc_utils/iostream.py +21 -0
  52. py_misc_utils/iter_file.py +117 -0
  53. py_misc_utils/key_wrap.py +46 -0
  54. py_misc_utils/lazy_import.py +25 -0
  55. py_misc_utils/lockfile.py +164 -0
  56. py_misc_utils/mem_size.py +64 -0
  57. py_misc_utils/mirror_from.py +72 -0
  58. py_misc_utils/mmap.py +16 -0
  59. py_misc_utils/module_utils.py +196 -0
  60. py_misc_utils/moving_average.py +19 -0
  61. py_misc_utils/msgpack_streamer.py +26 -0
  62. py_misc_utils/multi_wait.py +24 -0
  63. py_misc_utils/multiprocessing.py +102 -0
  64. py_misc_utils/named_array.py +224 -0
  65. py_misc_utils/no_break.py +46 -0
  66. py_misc_utils/no_except.py +32 -0
  67. py_misc_utils/np_ml_framework.py +184 -0
  68. py_misc_utils/np_utils.py +346 -0
  69. py_misc_utils/ntuple_utils.py +38 -0
  70. py_misc_utils/num_utils.py +54 -0
  71. py_misc_utils/obj.py +73 -0
  72. py_misc_utils/object_cache.py +100 -0
  73. py_misc_utils/object_tracker.py +88 -0
  74. py_misc_utils/ordered_set.py +71 -0
  75. py_misc_utils/osfd.py +27 -0
  76. py_misc_utils/packet.py +22 -0
  77. py_misc_utils/parquet_streamer.py +69 -0
  78. py_misc_utils/pd_utils.py +254 -0
  79. py_misc_utils/periodic_task.py +61 -0
  80. py_misc_utils/pickle_wrap.py +121 -0
  81. py_misc_utils/pipeline.py +98 -0
  82. py_misc_utils/remap_pickle.py +50 -0
  83. py_misc_utils/resource_manager.py +155 -0
  84. py_misc_utils/rnd_utils.py +56 -0
  85. py_misc_utils/run_once.py +19 -0
  86. py_misc_utils/scheduler.py +135 -0
  87. py_misc_utils/select_params.py +300 -0
  88. py_misc_utils/signal.py +141 -0
  89. py_misc_utils/skl_utils.py +270 -0
  90. py_misc_utils/split.py +147 -0
  91. py_misc_utils/state.py +53 -0
  92. py_misc_utils/std_module.py +56 -0
  93. py_misc_utils/stream_dataframe.py +176 -0
  94. py_misc_utils/streamed_file.py +144 -0
  95. py_misc_utils/tempdir.py +79 -0
  96. py_misc_utils/template_replace.py +51 -0
  97. py_misc_utils/tensor_stream.py +269 -0
  98. py_misc_utils/thread_context.py +33 -0
  99. py_misc_utils/throttle.py +30 -0
  100. py_misc_utils/time_trigger.py +18 -0
  101. py_misc_utils/timegen.py +11 -0
  102. py_misc_utils/traceback.py +49 -0
  103. py_misc_utils/tracking_executor.py +91 -0
  104. py_misc_utils/transform_array.py +42 -0
  105. py_misc_utils/uncompress.py +35 -0
  106. py_misc_utils/url_fetcher.py +157 -0
  107. py_misc_utils/utils.py +538 -0
  108. py_misc_utils/varint.py +50 -0
  109. py_misc_utils/virt_array.py +52 -0
  110. py_misc_utils/weak_call.py +33 -0
  111. py_misc_utils/work_results.py +100 -0
  112. py_misc_utils/writeback_file.py +43 -0
  113. python_misc_utils-0.2.dist-info/METADATA +36 -0
  114. python_misc_utils-0.2.dist-info/RECORD +117 -0
  115. python_misc_utils-0.2.dist-info/WHEEL +5 -0
  116. python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
  117. python_misc_utils-0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,269 @@
1
+ import bisect
2
+ import collections
3
+ import os
4
+ import pickle
5
+ import re
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.utils.data as data_utils
10
+
11
+ from . import alog
12
+ from . import assert_checks as tas
13
+ from . import core_utils as cu
14
+ from . import utils as ut
15
+
16
+
17
+ _STATE_FILE = 'state.pkl'
18
+
19
+
20
+ def _check_shapes(prev_shape, new_shape):
21
+ if tuple(prev_shape[1:]) != tuple(new_shape[1:]):
22
+ alog.xraise(RuntimeError, f'Shapes are not compatible: {new_shape} vs {prev_shape}')
23
+
24
+
25
+ def _load_stream_tensors(path):
26
+ stream_tensors = []
27
+ for tname in os.listdir(path):
28
+ # File names within the stream tensors folder is ID.npy.
29
+ tid, ext = os.path.splitext(tname)
30
+ tas.check_eq(ext, '.npy')
31
+
32
+ tid = int(tid)
33
+ stream_tensors = cu.idx_expand(stream_tensors, tid)
34
+
35
+ tpath = os.path.join(path, tname)
36
+ stream_tensors[tid] = np.lib.format.open_memmap(tpath, mode='r')
37
+
38
+ return tuple(stream_tensors)
39
+
40
+
41
+ def _load_tensors(path):
42
+ tensors = []
43
+ for name in os.listdir(path):
44
+ spath = os.path.join(path, name)
45
+ if re.match(r'\d+$', name) and os.path.isdir(spath):
46
+ streamno = int(name)
47
+ tensors = cu.idx_expand(tensors, streamno, filler=())
48
+ tensors[streamno] = _load_stream_tensors(spath)
49
+
50
+ return tuple(tensors)
51
+
52
+
53
+ def _get_sizes(tensors):
54
+ sizes = []
55
+ for stream_tensors in tensors:
56
+ stream_sizes = [0]
57
+ for tensor in stream_tensors:
58
+ stream_sizes.append(stream_sizes[-1] + len(tensor))
59
+
60
+ sizes.append(tuple(stream_sizes))
61
+
62
+ return tuple(sizes)
63
+
64
+
65
+ def _get_shapes(tensors):
66
+ shapes = []
67
+ for stream_tensors in tensors:
68
+ shape = None
69
+ for tensor in stream_tensors:
70
+ if shape is None:
71
+ shape = list(tensor.shape)
72
+ else:
73
+ _check_shapes(shape, tensor.shape)
74
+ shape[0] += len(tensor)
75
+
76
+ if shapes and shapes[0][0] != shape[0]:
77
+ alog.xraise(RuntimeError, f'All the tensor streams must have the same major dimension: {shapes[0][0]} vs {shape[0]}')
78
+ shapes.append(tuple(shape))
79
+
80
+ return tuple(shapes)
81
+
82
+
83
+ class _ChunkList:
84
+
85
+ def __init__(self, init=None):
86
+ self._data = [init] if init is not None else []
87
+ self._size = init.nbytes if init is not None else 0
88
+
89
+ def size(self):
90
+ return self._size
91
+
92
+ def append(self, t):
93
+ self._data.append(t)
94
+ self._size += t.nbytes
95
+
96
+ def coalesce(self):
97
+ return np.concatenate(self._data)
98
+
99
+
100
+ class Writer:
101
+
102
+ def __init__(self, path, chunk_size=100 * 1024 * 1024):
103
+ if os.path.exists(path):
104
+ alog.xraise(RuntimeError, f'Tensor stream folder must not exist: {path}')
105
+ os.mkdir(path)
106
+ self._path = path
107
+ self._chunk_size = chunk_size
108
+ self._chunks = []
109
+ self._shapes = []
110
+ self._indices = []
111
+
112
+ # Note that the tensors handed over to the write() API will become owned by
113
+ # the Writer obect, and cannot be written over after the write operation.
114
+ def write(self, *args):
115
+ size = len(args[0]) if args else 0
116
+ if not self._chunks:
117
+ self._chunks = [_ChunkList(init=t) for t in args]
118
+ self._shapes = [t.shape for t in args]
119
+ self._indices = [0] * len(args)
120
+ for i in range(len(args)):
121
+ if size != len(args[i]):
122
+ alog.xraise(RuntimeError, f'The major dimension of a write operation must match: {size} vs {len(args[i])}')
123
+ os.mkdir(os.path.join(self._path, str(i)))
124
+ else:
125
+ if len(args) != len(self._chunks):
126
+ alog.xraise(RuntimeError, f'Written streams count must match: {len(args)} vs {len(self._chunks)}')
127
+ for i, t in enumerate(args):
128
+ if size != len(t):
129
+ alog.xraise(RuntimeError, f'The major dimension of a write operation must match: {size} vs {len(args[i])}')
130
+ _check_shapes(self._shapes[i], t.shape)
131
+ self._chunks[i].append(t)
132
+
133
+ self.flush(final=False)
134
+
135
+ def flush(self, final=True, state=None):
136
+ for i, chunk in enumerate(self._chunks):
137
+ if chunk is not None and chunk.size() > 0 and (final or chunk.size() >= self._chunk_size):
138
+ path = os.path.join(self._path, str(i), str(self._indices[i]) + '.npy')
139
+ np.save(path, chunk.coalesce())
140
+
141
+ self._indices[i] += 1
142
+ self._chunks[i] = _ChunkList()
143
+
144
+ if state is not None:
145
+ with open(os.path.join(self._path, _STATE_FILE), mode='wb') as f:
146
+ pickle.dump(state, f, protocol=ut.pickle_proto())
147
+
148
+
149
+ class Reader:
150
+
151
+ def __init__(self, path, transforms=None):
152
+ if not os.path.isdir(path):
153
+ alog.xraise(RuntimeError, f'Tensor stream folder does not exist: {path}')
154
+ self._path = path
155
+ self._tensors = _load_tensors(path)
156
+ self._sizes = _get_sizes(self._tensors)
157
+ self.shape = _get_shapes(self._tensors)
158
+ self.num_streams = len(self._tensors)
159
+ self.state = dict()
160
+ self._transforms = list(transforms) if transforms else None
161
+
162
+ state_path = os.path.join(path, _STATE_FILE)
163
+ if os.path.exists(state_path):
164
+ with open(state_path, mode='rb') as f:
165
+ self.state = pickle.load(f)
166
+
167
+ @property
168
+ def dtype(self):
169
+ return tuple([self._tensors[n][0].dtype for n in range(self.num_streams)])
170
+
171
+ def __len__(self):
172
+ lens = [self.shape[i][0] for i in range(self.num_streams)]
173
+ tas.check(all(lens[0] == l for l in lens), msg=f'Mismatching sizes: {lens}')
174
+
175
+ return lens[0] if lens else 0
176
+
177
+ def tensor_sequence(self, streamno):
178
+ if streamno < 0 or streamno >= self.num_streams:
179
+ alog.xraise(RuntimeError, f'Bad stream number {streamno}, must be >= 0 and < {self.num_streams}')
180
+
181
+ return self._tensors[streamno]
182
+
183
+ def get_slice(self, streamno, start, size=None):
184
+ if streamno < 0 or streamno >= self.num_streams:
185
+ alog.xraise(RuntimeError, f'Bad stream number {streamno}, must be >= 0 and < {self.num_streams}')
186
+
187
+ stream_tensors = self._tensors[streamno]
188
+ stream_sizes = self._sizes[streamno]
189
+ stream_shape = self.shape[streamno]
190
+
191
+ if start < 0 or start >= stream_shape[0]:
192
+ alog.xraise(IndexError, f'Invalid slice start index {start}, must be >= 0 and < {stream_shape[0]}')
193
+
194
+ if size is None:
195
+ size = stream_shape[0] - start
196
+ else:
197
+ size = min(size, stream_shape[0] - start)
198
+
199
+ pos = bisect.bisect_right(stream_sizes, start) - 1
200
+ tensor = stream_tensors[pos]
201
+ tpos = start - stream_sizes[pos]
202
+ tas.check_ge(tpos, 0)
203
+
204
+ tsize = min(size, len(tensor) - tpos)
205
+ slices = [tensor[tpos: tpos + tsize]]
206
+ rsize = size - tsize
207
+ while rsize > 0:
208
+ pos += 1
209
+ tensor = stream_tensors[pos]
210
+ tsize = min(rsize, len(tensor))
211
+ rsize -= tsize
212
+ slices.append(tensor[: tsize])
213
+
214
+ sliced_tensor = np.concatenate(slices) if len(slices) > 1 else slices[0]
215
+ if self._transforms:
216
+ sliced_tensor = self._transforms[streamno](sliced_tensor)
217
+
218
+ return sliced_tensor
219
+
220
+ def get_slices(self, start, size=None):
221
+ return [self.get_slice(x, start, size=size) for x in range(self.num_streams)]
222
+
223
+
224
+ class StreamArray(collections.abc.Sequence):
225
+
226
+ def __init__(self, reader, streamno):
227
+ super().__init__()
228
+ self.reader = reader
229
+ self.streamno = streamno
230
+ self.shape = reader.shape[streamno]
231
+
232
+ def __getitem__(self, i):
233
+ if isinstance(i, slice):
234
+ start, end, step = i.indices(len(self))
235
+ if step != 1:
236
+ parts = []
237
+ for x in range(start, end, step):
238
+ parts.append(self.reader.get_slice(self.streamno, x, size=1))
239
+ return np.concatenate(parts)
240
+
241
+ return self.reader.get_slice(self.streamno, start, size=end - start)
242
+
243
+ return np.squeeze(self.reader.get_slice(self.streamno, i, size=1), axis=0)
244
+
245
+ def __len__(self):
246
+ return self.shape[0]
247
+
248
+ def to_numpy(self, dtype=None):
249
+ npa = self.reader.get_slice(self.streamno, 0)
250
+
251
+ return npa.astype(dtype) if dtype is not None else npa
252
+
253
+ def __array__(self, dtype=None):
254
+ return self.to_numpy(dtype=dtype)
255
+
256
+
257
+ class Dataset(data_utils.Dataset):
258
+
259
+ def __init__(self, path, transforms=None):
260
+ super().__init__()
261
+ self.reader = Reader(path, transforms=transforms)
262
+
263
+ def __len__(self):
264
+ shape = self.reader.shape
265
+ return shape[0][0] if shape else 0
266
+
267
+ def __getitem__(self, i):
268
+ return tuple([torch.from_numpy(x) for x in self.reader.get_slices(i, size=1)])
269
+
@@ -0,0 +1,33 @@
1
+ import threading
2
+
3
+
4
+ _TLS = threading.local()
5
+
6
+ class Context:
7
+
8
+ def __init__(self, name, obj):
9
+ self._name = name
10
+ self._obj = obj
11
+
12
+ def __enter__(self):
13
+ stack = getattr(_TLS, self._name, None)
14
+ if stack is None:
15
+ stack = []
16
+ setattr(_TLS, self._name, stack)
17
+
18
+ stack.append(self._obj)
19
+
20
+ return self._obj
21
+
22
+ def __exit__(self, *exc):
23
+ stack = getattr(_TLS, self._name)
24
+ obj = stack.pop()
25
+
26
+ return False
27
+
28
+
29
+ def get_context(name):
30
+ stack = getattr(_TLS, name, None)
31
+
32
+ return stack[-1] if stack else None
33
+
@@ -0,0 +1,30 @@
1
+ import threading
2
+ import time
3
+
4
+
5
+ class Throttle:
6
+
7
+ def __init__(self, xsec_limit):
8
+ self._secsx = 1.0 / xsec_limit if xsec_limit > 0 else None
9
+ self._last = None
10
+ self._lock = threading.Lock()
11
+
12
+ def _wait_time(self):
13
+ if self._secsx is None:
14
+ return 0
15
+ with self._lock:
16
+ now = time.time()
17
+ if self._last is None:
18
+ self._last = now - self._secsx
19
+ horizon = self._last + self._secsx
20
+ self._last = max(horizon, now)
21
+
22
+ return horizon - now
23
+
24
+ def trigger(self):
25
+ wt = self._wait_time()
26
+ if wt > 0:
27
+ time.sleep(wt)
28
+
29
+ return self._lock
30
+
@@ -0,0 +1,18 @@
1
+ import time
2
+
3
+
4
+ class TimeTrigger:
5
+
6
+ def __init__(self, interval):
7
+ self._interval = interval
8
+ self.next = time.time() + interval
9
+
10
+ def __bool__(self):
11
+ if self._interval:
12
+ if (now := time.time()) >= self.next:
13
+ self.next = now + self._interval
14
+
15
+ return True
16
+
17
+ return False
18
+
@@ -0,0 +1,11 @@
1
+ import time
2
+
3
+
4
+ class TimeGen:
5
+
6
+ def now(self):
7
+ return time.time()
8
+
9
+ def wait(self, cond, timeout=None):
10
+ return cond.wait(timeout=timeout)
11
+
@@ -0,0 +1,49 @@
1
+ import sys
2
+
3
+
4
+ # Copied from logging module ...
5
+ if hasattr(sys, '_getframe'):
6
+
7
+ def _get_frame():
8
+ return sys._getframe(1)
9
+
10
+ else:
11
+
12
+ def _get_frame():
13
+ try:
14
+ raise Exception
15
+ except Exception as exc:
16
+ return exc.__traceback__.tb_frame.f_back
17
+
18
+
19
+ def get_frame(n=0):
20
+ frame = _get_frame().f_back
21
+ while n > 0 and frame is not None:
22
+ frame = frame.f_back
23
+ n -= 1
24
+
25
+ return frame
26
+
27
+
28
+ def get_frame_after(filename):
29
+ frame, inscope = _get_frame(), False
30
+ while frame is not None:
31
+ if not inscope:
32
+ if filename == frame.f_code.co_filename:
33
+ inscope = True
34
+ elif filename != frame.f_code.co_filename:
35
+ break
36
+
37
+ frame = frame.f_back
38
+
39
+ return frame
40
+
41
+
42
+ def walk_stack(frame=None):
43
+ if frame is None:
44
+ frame = get_frame(1)
45
+
46
+ while frame is not None:
47
+ yield frame
48
+ frame = frame.f_back
49
+
@@ -0,0 +1,91 @@
1
+ import threading
2
+ import weakref
3
+
4
+ from . import alog
5
+ from . import cond_waiter as cwait
6
+ from . import executor as xe
7
+
8
+
9
+ def _wrap_task(executor, tid, fn, *args, **kwargs):
10
+ eref = weakref.ref(executor)
11
+ del executor
12
+
13
+ def wfn():
14
+ try:
15
+ return fn(*args, **kwargs)
16
+ finally:
17
+ xtor = eref()
18
+ if xtor is not None:
19
+ xtor._report_done(tid)
20
+
21
+ return wfn
22
+
23
+
24
+ class TrackingExecutor:
25
+
26
+ def __init__(self, executor=None):
27
+ self.executor = executor if executor is not None else xe.common_executor()
28
+ self._lock = threading.Lock()
29
+ self._task_id = 0
30
+ self._pending = set()
31
+ self._pending_cv = threading.Condition(lock=self._lock)
32
+
33
+ def _report_done(self, tid):
34
+ with self._lock:
35
+ self._pending.remove(tid)
36
+ self._pending_cv.notify_all()
37
+
38
+ def _wrap(self, fn, *args, **kwargs):
39
+ with self._lock:
40
+ wfn = _wrap_task(self, self._task_id, fn, *args, **kwargs)
41
+ self._pending.add(self._task_id)
42
+ self._task_id += 1
43
+
44
+ return wfn, self._task_id - 1
45
+
46
+ def submit(self, fn, *args, **kwargs):
47
+ wfn, tid = self._wrap(fn, *args, **kwargs)
48
+ try:
49
+ self.executor.submit(wfn)
50
+ except Exception:
51
+ self._report_done(tid)
52
+ raise
53
+
54
+ return tid
55
+
56
+ def submit_result(self, fn, *args, **kwargs):
57
+ wfn, tid = self._wrap(fn, *args, **kwargs)
58
+ try:
59
+ return self.executor.submit_result(wfn)
60
+ except Exception:
61
+ self._report_done(tid)
62
+ raise
63
+
64
+ def shutdown(self):
65
+ self.executor.shutdown()
66
+ self.wait()
67
+
68
+ def wait(self, tids=None, timeout=None, timegen=None, waiter=None):
69
+ waiter = waiter or cwait.CondWaiter(timeout=timeout, timegen=timegen)
70
+ if not tids:
71
+ with self._lock:
72
+ while self._pending:
73
+ if not waiter.wait(self._pending_cv):
74
+ break
75
+
76
+ return not self._pending
77
+ else:
78
+ stids = set(tids)
79
+ with self._lock:
80
+ while True:
81
+ rem = stids & self._pending
82
+ if not (rem and waiter.wait(self._pending_cv)):
83
+ break
84
+
85
+ return not rem
86
+
87
+ def wait_for_idle(self, timeout=None, timegen=None):
88
+ waiter = cwait.CondWaiter(timeout=timeout, timegen=timegen)
89
+
90
+ return self.wait(waiter=waiter) and self.executor.wait_for_idle(waiter=waiter)
91
+
@@ -0,0 +1,42 @@
1
+ import collections
2
+
3
+ import numpy as np
4
+
5
+ from . import core_utils as cu
6
+ from . import np_utils as npu
7
+
8
+
9
+ class TransformArray(collections.abc.Sequence):
10
+
11
+ def __init__(self, data, pipeline):
12
+ super().__init__()
13
+ self.data = data
14
+ self._pipeline = pipeline
15
+ self.shape = cu.compute_shape(data)
16
+
17
+ def __getitem__(self, idx):
18
+ if isinstance(idx, slice):
19
+ start, end, step = idx.indices(len(self))
20
+ slices = [self.data[i] for i in range(start, end, step)]
21
+
22
+ return __class__(npu.maybe_stack_slices(slices), self._pipeline)
23
+
24
+ return self._pipeline(self.data[idx])
25
+
26
+ def __len__(self):
27
+ return len(self.data)
28
+
29
+ def to_numpy(self, dtype=None):
30
+ slices = [self[i] for i in range(len(self))]
31
+ if not slices:
32
+ return np.empty((0,))
33
+ if not isinstance(slices[0], np.ndarray):
34
+ slices = [np.array(x) for x in slices]
35
+
36
+ npa = np.stack(slices, axis=0)
37
+
38
+ return npa.astype(dtype) if dtype is not None else npa
39
+
40
+ def __array__(self, dtype=None):
41
+ return self.to_numpy(dtype=dtype)
42
+
@@ -0,0 +1,35 @@
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+
5
+ from . import alog
6
+ from . import compression as comp
7
+
8
+
9
+ class Uncompress:
10
+
11
+ def __init__(self, path):
12
+ self._path = path
13
+ self._tempdir = None
14
+
15
+ def __enter__(self):
16
+ bpath, ext = os.path.splitext(self._path)
17
+
18
+ decomp = comp.decompressor(ext)
19
+ if decomp is not None:
20
+ self._tempdir = tempfile.mkdtemp()
21
+ rpath = os.path.join(self._tempdir, os.path.basename(bpath))
22
+
23
+ decomp(self._path, rpath)
24
+ shutil.copystat(self._path, rpath)
25
+ else:
26
+ rpath = self._path
27
+
28
+ return rpath
29
+
30
+ def __exit__(self, *exc):
31
+ if self._tempdir is not None:
32
+ shutil.rmtree(self._tempdir, ignore_errors=True)
33
+
34
+ return False
35
+