PyPI - python-misc-utils - Versions diffs - 0.2__py3-none-any.whl - Mend

python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

py_misc_utils/__init__.py +0 -0
py_misc_utils/abs_timeout.py +12 -0
py_misc_utils/alog.py +311 -0
py_misc_utils/app_main.py +179 -0
py_misc_utils/archive_streamer.py +112 -0
py_misc_utils/assert_checks.py +118 -0
py_misc_utils/ast_utils.py +121 -0
py_misc_utils/async_manager.py +189 -0
py_misc_utils/break_control.py +63 -0
py_misc_utils/buffered_iterator.py +35 -0
py_misc_utils/cached_file.py +507 -0
py_misc_utils/call_limiter.py +26 -0
py_misc_utils/call_result_selector.py +13 -0
py_misc_utils/cleanups.py +85 -0
py_misc_utils/cmd.py +97 -0
py_misc_utils/compression.py +116 -0
py_misc_utils/cond_waiter.py +13 -0
py_misc_utils/context_base.py +18 -0
py_misc_utils/context_managers.py +67 -0
py_misc_utils/core_utils.py +577 -0
py_misc_utils/daemon_process.py +252 -0
py_misc_utils/data_cache.py +46 -0
py_misc_utils/date_utils.py +90 -0
py_misc_utils/debug.py +24 -0
py_misc_utils/dyn_modules.py +50 -0
py_misc_utils/dynamod.py +103 -0
py_misc_utils/env_config.py +35 -0
py_misc_utils/executor.py +239 -0
py_misc_utils/file_overwrite.py +29 -0
py_misc_utils/fin_wrap.py +77 -0
py_misc_utils/fp_utils.py +47 -0
py_misc_utils/fs/__init__.py +0 -0
py_misc_utils/fs/file_fs.py +127 -0
py_misc_utils/fs/ftp_fs.py +242 -0
py_misc_utils/fs/gcs_fs.py +196 -0
py_misc_utils/fs/http_fs.py +241 -0
py_misc_utils/fs/s3_fs.py +417 -0
py_misc_utils/fs_base.py +133 -0
py_misc_utils/fs_utils.py +207 -0
py_misc_utils/gcs_fs.py +169 -0
py_misc_utils/gen_indices.py +54 -0
py_misc_utils/gfs.py +371 -0
py_misc_utils/git_repo.py +77 -0
py_misc_utils/global_namespace.py +110 -0
py_misc_utils/http_async_fetcher.py +139 -0
py_misc_utils/http_server.py +196 -0
py_misc_utils/http_utils.py +143 -0
py_misc_utils/img_utils.py +20 -0
py_misc_utils/infix_op.py +20 -0
py_misc_utils/inspect_utils.py +205 -0
py_misc_utils/iostream.py +21 -0
py_misc_utils/iter_file.py +117 -0
py_misc_utils/key_wrap.py +46 -0
py_misc_utils/lazy_import.py +25 -0
py_misc_utils/lockfile.py +164 -0
py_misc_utils/mem_size.py +64 -0
py_misc_utils/mirror_from.py +72 -0
py_misc_utils/mmap.py +16 -0
py_misc_utils/module_utils.py +196 -0
py_misc_utils/moving_average.py +19 -0
py_misc_utils/msgpack_streamer.py +26 -0
py_misc_utils/multi_wait.py +24 -0
py_misc_utils/multiprocessing.py +102 -0
py_misc_utils/named_array.py +224 -0
py_misc_utils/no_break.py +46 -0
py_misc_utils/no_except.py +32 -0
py_misc_utils/np_ml_framework.py +184 -0
py_misc_utils/np_utils.py +346 -0
py_misc_utils/ntuple_utils.py +38 -0
py_misc_utils/num_utils.py +54 -0
py_misc_utils/obj.py +73 -0
py_misc_utils/object_cache.py +100 -0
py_misc_utils/object_tracker.py +88 -0
py_misc_utils/ordered_set.py +71 -0
py_misc_utils/osfd.py +27 -0
py_misc_utils/packet.py +22 -0
py_misc_utils/parquet_streamer.py +69 -0
py_misc_utils/pd_utils.py +254 -0
py_misc_utils/periodic_task.py +61 -0
py_misc_utils/pickle_wrap.py +121 -0
py_misc_utils/pipeline.py +98 -0
py_misc_utils/remap_pickle.py +50 -0
py_misc_utils/resource_manager.py +155 -0
py_misc_utils/rnd_utils.py +56 -0
py_misc_utils/run_once.py +19 -0
py_misc_utils/scheduler.py +135 -0
py_misc_utils/select_params.py +300 -0
py_misc_utils/signal.py +141 -0
py_misc_utils/skl_utils.py +270 -0
py_misc_utils/split.py +147 -0
py_misc_utils/state.py +53 -0
py_misc_utils/std_module.py +56 -0
py_misc_utils/stream_dataframe.py +176 -0
py_misc_utils/streamed_file.py +144 -0
py_misc_utils/tempdir.py +79 -0
py_misc_utils/template_replace.py +51 -0
py_misc_utils/tensor_stream.py +269 -0
py_misc_utils/thread_context.py +33 -0
py_misc_utils/throttle.py +30 -0
py_misc_utils/time_trigger.py +18 -0
py_misc_utils/timegen.py +11 -0
py_misc_utils/traceback.py +49 -0
py_misc_utils/tracking_executor.py +91 -0
py_misc_utils/transform_array.py +42 -0
py_misc_utils/uncompress.py +35 -0
py_misc_utils/url_fetcher.py +157 -0
py_misc_utils/utils.py +538 -0
py_misc_utils/varint.py +50 -0
py_misc_utils/virt_array.py +52 -0
py_misc_utils/weak_call.py +33 -0
py_misc_utils/work_results.py +100 -0
py_misc_utils/writeback_file.py +43 -0
python_misc_utils-0.2.dist-info/METADATA +36 -0
python_misc_utils-0.2.dist-info/RECORD +117 -0
python_misc_utils-0.2.dist-info/WHEEL +5 -0
python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
python_misc_utils-0.2.dist-info/top_level.txt +1 -0

py_misc_utils/tensor_stream.py ADDED Viewed

@@ -0,0 +1,269 @@
+import bisect
+import collections
+import os
+import pickle
+import re
+import numpy as np
+import torch
+import torch.utils.data as data_utils
+from . import alog
+from . import assert_checks as tas
+from . import core_utils as cu
+from . import utils as ut
+_STATE_FILE = 'state.pkl'
+def _check_shapes(prev_shape, new_shape):
+  if tuple(prev_shape[1:]) != tuple(new_shape[1:]):
+    alog.xraise(RuntimeError, f'Shapes are not compatible: {new_shape} vs {prev_shape}')
+def _load_stream_tensors(path):
+  stream_tensors = []
+  for tname in os.listdir(path):
+    # File names within the stream tensors folder is ID.npy.
+    tid, ext = os.path.splitext(tname)
+    tas.check_eq(ext, '.npy')
+    tid = int(tid)
+    stream_tensors = cu.idx_expand(stream_tensors, tid)
+    tpath = os.path.join(path, tname)
+    stream_tensors[tid] = np.lib.format.open_memmap(tpath, mode='r')
+  return tuple(stream_tensors)
+def _load_tensors(path):
+  tensors = []
+  for name in os.listdir(path):
+    spath = os.path.join(path, name)
+    if re.match(r'\d+$', name) and os.path.isdir(spath):
+      streamno = int(name)
+      tensors = cu.idx_expand(tensors, streamno, filler=())
+      tensors[streamno] = _load_stream_tensors(spath)
+  return tuple(tensors)
+def _get_sizes(tensors):
+  sizes = []
+  for stream_tensors in tensors:
+    stream_sizes = [0]
+    for tensor in stream_tensors:
+      stream_sizes.append(stream_sizes[-1] + len(tensor))
+    sizes.append(tuple(stream_sizes))
+  return tuple(sizes)
+def _get_shapes(tensors):
+  shapes = []
+  for stream_tensors in tensors:
+    shape = None
+    for tensor in stream_tensors:
+      if shape is None:
+        shape = list(tensor.shape)
+      else:
+        _check_shapes(shape, tensor.shape)
+        shape[0] += len(tensor)
+    if shapes and shapes[0][0] != shape[0]:
+      alog.xraise(RuntimeError, f'All the tensor streams must have the same major dimension: {shapes[0][0]} vs {shape[0]}')
+    shapes.append(tuple(shape))
+  return tuple(shapes)
+class _ChunkList:
+  def __init__(self, init=None):
+    self._data = [init] if init is not None else []
+    self._size = init.nbytes if init is not None else 0
+  def size(self):
+    return self._size
+  def append(self, t):
+    self._data.append(t)
+    self._size += t.nbytes
+  def coalesce(self):
+    return np.concatenate(self._data)
+class Writer:
+  def __init__(self, path, chunk_size=100 * 1024 * 1024):
+    if os.path.exists(path):
+      alog.xraise(RuntimeError, f'Tensor stream folder must not exist: {path}')
+    os.mkdir(path)
+    self._path = path
+    self._chunk_size = chunk_size
+    self._chunks = []
+    self._shapes = []
+    self._indices = []
+  # Note that the tensors handed over to the write() API will become owned by
+  # the Writer obect, and cannot be written over after the write operation.
+  def write(self, *args):
+    size = len(args[0]) if args else 0
+    if not self._chunks:
+      self._chunks = [_ChunkList(init=t) for t in args]
+      self._shapes = [t.shape for t in args]
+      self._indices = [0] * len(args)
+      for i in range(len(args)):
+        if size != len(args[i]):
+          alog.xraise(RuntimeError, f'The major dimension of a write operation must match: {size} vs {len(args[i])}')
+        os.mkdir(os.path.join(self._path, str(i)))
+    else:
+      if len(args) != len(self._chunks):
+        alog.xraise(RuntimeError, f'Written streams count must match: {len(args)} vs {len(self._chunks)}')
+      for i, t in enumerate(args):
+        if size != len(t):
+          alog.xraise(RuntimeError, f'The major dimension of a write operation must match: {size} vs {len(args[i])}')
+        _check_shapes(self._shapes[i], t.shape)
+        self._chunks[i].append(t)
+    self.flush(final=False)
+  def flush(self, final=True, state=None):
+    for i, chunk in enumerate(self._chunks):
+      if chunk is not None and chunk.size() > 0 and (final or chunk.size() >= self._chunk_size):
+        path = os.path.join(self._path, str(i), str(self._indices[i]) + '.npy')
+        np.save(path, chunk.coalesce())
+        self._indices[i] += 1
+        self._chunks[i] = _ChunkList()
+    if state is not None:
+      with open(os.path.join(self._path, _STATE_FILE), mode='wb') as f:
+        pickle.dump(state, f, protocol=ut.pickle_proto())
+class Reader:
+  def __init__(self, path, transforms=None):
+    if not os.path.isdir(path):
+      alog.xraise(RuntimeError, f'Tensor stream folder does not exist: {path}')
+    self._path = path
+    self._tensors = _load_tensors(path)
+    self._sizes = _get_sizes(self._tensors)
+    self.shape = _get_shapes(self._tensors)
+    self.num_streams = len(self._tensors)
+    self.state = dict()
+    self._transforms = list(transforms) if transforms else None
+    state_path = os.path.join(path, _STATE_FILE)
+    if os.path.exists(state_path):
+      with open(state_path, mode='rb') as f:
+        self.state = pickle.load(f)
+  @property
+  def dtype(self):
+    return tuple([self._tensors[n][0].dtype for n in range(self.num_streams)])
+  def __len__(self):
+    lens = [self.shape[i][0] for i in range(self.num_streams)]
+    tas.check(all(lens[0] == l for l in lens), msg=f'Mismatching sizes: {lens}')
+    return lens[0] if lens else 0
+  def tensor_sequence(self, streamno):
+    if streamno < 0 or streamno >= self.num_streams:
+      alog.xraise(RuntimeError, f'Bad stream number {streamno}, must be >= 0 and < {self.num_streams}')
+    return self._tensors[streamno]
+  def get_slice(self, streamno, start, size=None):
+    if streamno < 0 or streamno >= self.num_streams:
+      alog.xraise(RuntimeError, f'Bad stream number {streamno}, must be >= 0 and < {self.num_streams}')
+    stream_tensors = self._tensors[streamno]
+    stream_sizes = self._sizes[streamno]
+    stream_shape = self.shape[streamno]
+    if start < 0 or start >= stream_shape[0]:
+      alog.xraise(IndexError, f'Invalid slice start index {start}, must be >= 0 and < {stream_shape[0]}')
+    if size is None:
+      size = stream_shape[0] - start
+    else:
+      size = min(size, stream_shape[0] - start)
+    pos = bisect.bisect_right(stream_sizes, start) - 1
+    tensor = stream_tensors[pos]
+    tpos = start - stream_sizes[pos]
+    tas.check_ge(tpos, 0)
+    tsize = min(size, len(tensor) - tpos)
+    slices = [tensor[tpos: tpos + tsize]]
+    rsize = size - tsize
+    while rsize > 0:
+      pos += 1
+      tensor = stream_tensors[pos]
+      tsize = min(rsize, len(tensor))
+      rsize -= tsize
+      slices.append(tensor[: tsize])
+    sliced_tensor = np.concatenate(slices) if len(slices) > 1 else slices[0]
+    if self._transforms:
+      sliced_tensor = self._transforms[streamno](sliced_tensor)
+    return sliced_tensor
+  def get_slices(self, start, size=None):
+    return [self.get_slice(x, start, size=size) for x in range(self.num_streams)]
+class StreamArray(collections.abc.Sequence):
+  def __init__(self, reader, streamno):
+    super().__init__()
+    self.reader = reader
+    self.streamno = streamno
+    self.shape = reader.shape[streamno]
+  def __getitem__(self, i):
+    if isinstance(i, slice):
+      start, end, step = i.indices(len(self))
+      if step != 1:
+        parts = []
+        for x in range(start, end, step):
+          parts.append(self.reader.get_slice(self.streamno, x, size=1))
+        return np.concatenate(parts)
+      return self.reader.get_slice(self.streamno, start, size=end - start)
+    return np.squeeze(self.reader.get_slice(self.streamno, i, size=1), axis=0)
+  def __len__(self):
+    return self.shape[0]
+  def to_numpy(self, dtype=None):
+    npa = self.reader.get_slice(self.streamno, 0)
+    return npa.astype(dtype) if dtype is not None else npa
+  def __array__(self, dtype=None):
+    return self.to_numpy(dtype=dtype)
+class Dataset(data_utils.Dataset):
+  def __init__(self, path, transforms=None):
+    super().__init__()
+    self.reader = Reader(path, transforms=transforms)
+  def __len__(self):
+    shape = self.reader.shape
+    return shape[0][0] if shape else 0
+  def __getitem__(self, i):
+    return tuple([torch.from_numpy(x) for x in self.reader.get_slices(i, size=1)])

py_misc_utils/thread_context.py ADDED Viewed

@@ -0,0 +1,33 @@
+import threading
+_TLS = threading.local()
+class Context:
+  def __init__(self, name, obj):
+    self._name = name
+    self._obj = obj
+  def __enter__(self):
+    stack = getattr(_TLS, self._name, None)
+    if stack is None:
+      stack = []
+      setattr(_TLS, self._name, stack)
+    stack.append(self._obj)
+    return self._obj
+  def __exit__(self, *exc):
+    stack = getattr(_TLS, self._name)
+    obj = stack.pop()
+    return False
+def get_context(name):
+  stack = getattr(_TLS, name, None)
+  return stack[-1] if stack else None

py_misc_utils/throttle.py ADDED Viewed

@@ -0,0 +1,30 @@
+import threading
+import time
+class Throttle:
+  def __init__(self, xsec_limit):
+    self._secsx = 1.0 / xsec_limit if xsec_limit > 0 else None
+    self._last = None
+    self._lock = threading.Lock()
+  def _wait_time(self):
+    if self._secsx is None:
+      return 0
+    with self._lock:
+      now = time.time()
+      if self._last is None:
+        self._last = now - self._secsx
+      horizon = self._last + self._secsx
+      self._last = max(horizon, now)
+      return horizon - now
+  def trigger(self):
+    wt = self._wait_time()
+    if wt > 0:
+      time.sleep(wt)
+    return self._lock

py_misc_utils/time_trigger.py ADDED Viewed

@@ -0,0 +1,18 @@
+import time
+class TimeTrigger:
+  def __init__(self, interval):
+    self._interval = interval
+    self.next = time.time() + interval
+  def __bool__(self):
+    if self._interval:
+      if (now := time.time()) >= self.next:
+        self.next = now + self._interval
+        return True
+    return False

py_misc_utils/timegen.py ADDED Viewed

@@ -0,0 +1,11 @@
+import time
+class TimeGen:
+  def now(self):
+    return time.time()
+  def wait(self, cond, timeout=None):
+    return cond.wait(timeout=timeout)

py_misc_utils/traceback.py ADDED Viewed

@@ -0,0 +1,49 @@
+import sys
+# Copied from logging module ...
+if hasattr(sys, '_getframe'):
+  def _get_frame():
+    return sys._getframe(1)
+else:
+  def _get_frame():
+    try:
+      raise Exception
+    except Exception as exc:
+      return exc.__traceback__.tb_frame.f_back
+def get_frame(n=0):
+  frame = _get_frame().f_back
+  while n > 0 and frame is not None:
+    frame = frame.f_back
+    n -= 1
+  return frame
+def get_frame_after(filename):
+  frame, inscope = _get_frame(), False
+  while frame is not None:
+    if not inscope:
+      if filename == frame.f_code.co_filename:
+        inscope = True
+    elif filename != frame.f_code.co_filename:
+      break
+    frame = frame.f_back
+  return frame
+def walk_stack(frame=None):
+  if frame is None:
+    frame = get_frame(1)
+  while frame is not None:
+    yield frame
+    frame = frame.f_back

py_misc_utils/tracking_executor.py ADDED Viewed

@@ -0,0 +1,91 @@
+import threading
+import weakref
+from . import alog
+from . import cond_waiter as cwait
+from . import executor as xe
+def _wrap_task(executor, tid, fn, *args, **kwargs):
+  eref = weakref.ref(executor)
+  del executor
+  def wfn():
+    try:
+      return fn(*args, **kwargs)
+    finally:
+      xtor = eref()
+      if xtor is not None:
+        xtor._report_done(tid)
+  return wfn
+class TrackingExecutor:
+  def __init__(self, executor=None):
+    self.executor = executor if executor is not None else xe.common_executor()
+    self._lock = threading.Lock()
+    self._task_id = 0
+    self._pending = set()
+    self._pending_cv = threading.Condition(lock=self._lock)
+  def _report_done(self, tid):
+    with self._lock:
+      self._pending.remove(tid)
+      self._pending_cv.notify_all()
+  def _wrap(self, fn, *args, **kwargs):
+    with self._lock:
+      wfn = _wrap_task(self, self._task_id, fn, *args, **kwargs)
+      self._pending.add(self._task_id)
+      self._task_id += 1
+      return wfn, self._task_id - 1
+  def submit(self, fn, *args, **kwargs):
+    wfn, tid = self._wrap(fn, *args, **kwargs)
+    try:
+      self.executor.submit(wfn)
+    except Exception:
+      self._report_done(tid)
+      raise
+    return tid
+  def submit_result(self, fn, *args, **kwargs):
+    wfn, tid = self._wrap(fn, *args, **kwargs)
+    try:
+      return self.executor.submit_result(wfn)
+    except Exception:
+      self._report_done(tid)
+      raise
+  def shutdown(self):
+    self.executor.shutdown()
+    self.wait()
+  def wait(self, tids=None, timeout=None, timegen=None, waiter=None):
+    waiter = waiter or cwait.CondWaiter(timeout=timeout, timegen=timegen)
+    if not tids:
+      with self._lock:
+        while self._pending:
+          if not waiter.wait(self._pending_cv):
+            break
+        return not self._pending
+    else:
+      stids = set(tids)
+      with self._lock:
+        while True:
+          rem = stids & self._pending
+          if not (rem and waiter.wait(self._pending_cv)):
+            break
+        return not rem
+  def wait_for_idle(self, timeout=None, timegen=None):
+    waiter = cwait.CondWaiter(timeout=timeout, timegen=timegen)
+    return self.wait(waiter=waiter) and self.executor.wait_for_idle(waiter=waiter)

py_misc_utils/transform_array.py ADDED Viewed

@@ -0,0 +1,42 @@
+import collections
+import numpy as np
+from . import core_utils as cu
+from . import np_utils as npu
+class TransformArray(collections.abc.Sequence):
+  def __init__(self, data, pipeline):
+    super().__init__()
+    self.data = data
+    self._pipeline = pipeline
+    self.shape = cu.compute_shape(data)
+  def __getitem__(self, idx):
+    if isinstance(idx, slice):
+      start, end, step = idx.indices(len(self))
+      slices = [self.data[i] for i in range(start, end, step)]
+      return __class__(npu.maybe_stack_slices(slices), self._pipeline)
+    return self._pipeline(self.data[idx])
+  def __len__(self):
+    return len(self.data)
+  def to_numpy(self, dtype=None):
+    slices = [self[i] for i in range(len(self))]
+    if not slices:
+      return np.empty((0,))
+    if not isinstance(slices[0], np.ndarray):
+      slices = [np.array(x) for x in slices]
+    npa = np.stack(slices, axis=0)
+    return npa.astype(dtype) if dtype is not None else npa
+  def __array__(self, dtype=None):
+    return self.to_numpy(dtype=dtype)

py_misc_utils/uncompress.py ADDED Viewed

@@ -0,0 +1,35 @@
+import os
+import shutil
+import tempfile
+from . import alog
+from . import compression as comp
+class Uncompress:
+  def __init__(self, path):
+    self._path = path
+    self._tempdir = None
+  def __enter__(self):
+    bpath, ext = os.path.splitext(self._path)
+    decomp = comp.decompressor(ext)
+    if decomp is not None:
+      self._tempdir = tempfile.mkdtemp()
+      rpath = os.path.join(self._tempdir, os.path.basename(bpath))
+      decomp(self._path, rpath)
+      shutil.copystat(self._path, rpath)
+    else:
+      rpath = self._path
+    return rpath
+  def __exit__(self, *exc):
+    if self._tempdir is not None:
+      shutil.rmtree(self._tempdir, ignore_errors=True)
+    return False