PyPI - python-misc-utils - Versions diffs - 0.2__py3-none-any.whl - Mend

python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

py_misc_utils/__init__.py +0 -0
py_misc_utils/abs_timeout.py +12 -0
py_misc_utils/alog.py +311 -0
py_misc_utils/app_main.py +179 -0
py_misc_utils/archive_streamer.py +112 -0
py_misc_utils/assert_checks.py +118 -0
py_misc_utils/ast_utils.py +121 -0
py_misc_utils/async_manager.py +189 -0
py_misc_utils/break_control.py +63 -0
py_misc_utils/buffered_iterator.py +35 -0
py_misc_utils/cached_file.py +507 -0
py_misc_utils/call_limiter.py +26 -0
py_misc_utils/call_result_selector.py +13 -0
py_misc_utils/cleanups.py +85 -0
py_misc_utils/cmd.py +97 -0
py_misc_utils/compression.py +116 -0
py_misc_utils/cond_waiter.py +13 -0
py_misc_utils/context_base.py +18 -0
py_misc_utils/context_managers.py +67 -0
py_misc_utils/core_utils.py +577 -0
py_misc_utils/daemon_process.py +252 -0
py_misc_utils/data_cache.py +46 -0
py_misc_utils/date_utils.py +90 -0
py_misc_utils/debug.py +24 -0
py_misc_utils/dyn_modules.py +50 -0
py_misc_utils/dynamod.py +103 -0
py_misc_utils/env_config.py +35 -0
py_misc_utils/executor.py +239 -0
py_misc_utils/file_overwrite.py +29 -0
py_misc_utils/fin_wrap.py +77 -0
py_misc_utils/fp_utils.py +47 -0
py_misc_utils/fs/__init__.py +0 -0
py_misc_utils/fs/file_fs.py +127 -0
py_misc_utils/fs/ftp_fs.py +242 -0
py_misc_utils/fs/gcs_fs.py +196 -0
py_misc_utils/fs/http_fs.py +241 -0
py_misc_utils/fs/s3_fs.py +417 -0
py_misc_utils/fs_base.py +133 -0
py_misc_utils/fs_utils.py +207 -0
py_misc_utils/gcs_fs.py +169 -0
py_misc_utils/gen_indices.py +54 -0
py_misc_utils/gfs.py +371 -0
py_misc_utils/git_repo.py +77 -0
py_misc_utils/global_namespace.py +110 -0
py_misc_utils/http_async_fetcher.py +139 -0
py_misc_utils/http_server.py +196 -0
py_misc_utils/http_utils.py +143 -0
py_misc_utils/img_utils.py +20 -0
py_misc_utils/infix_op.py +20 -0
py_misc_utils/inspect_utils.py +205 -0
py_misc_utils/iostream.py +21 -0
py_misc_utils/iter_file.py +117 -0
py_misc_utils/key_wrap.py +46 -0
py_misc_utils/lazy_import.py +25 -0
py_misc_utils/lockfile.py +164 -0
py_misc_utils/mem_size.py +64 -0
py_misc_utils/mirror_from.py +72 -0
py_misc_utils/mmap.py +16 -0
py_misc_utils/module_utils.py +196 -0
py_misc_utils/moving_average.py +19 -0
py_misc_utils/msgpack_streamer.py +26 -0
py_misc_utils/multi_wait.py +24 -0
py_misc_utils/multiprocessing.py +102 -0
py_misc_utils/named_array.py +224 -0
py_misc_utils/no_break.py +46 -0
py_misc_utils/no_except.py +32 -0
py_misc_utils/np_ml_framework.py +184 -0
py_misc_utils/np_utils.py +346 -0
py_misc_utils/ntuple_utils.py +38 -0
py_misc_utils/num_utils.py +54 -0
py_misc_utils/obj.py +73 -0
py_misc_utils/object_cache.py +100 -0
py_misc_utils/object_tracker.py +88 -0
py_misc_utils/ordered_set.py +71 -0
py_misc_utils/osfd.py +27 -0
py_misc_utils/packet.py +22 -0
py_misc_utils/parquet_streamer.py +69 -0
py_misc_utils/pd_utils.py +254 -0
py_misc_utils/periodic_task.py +61 -0
py_misc_utils/pickle_wrap.py +121 -0
py_misc_utils/pipeline.py +98 -0
py_misc_utils/remap_pickle.py +50 -0
py_misc_utils/resource_manager.py +155 -0
py_misc_utils/rnd_utils.py +56 -0
py_misc_utils/run_once.py +19 -0
py_misc_utils/scheduler.py +135 -0
py_misc_utils/select_params.py +300 -0
py_misc_utils/signal.py +141 -0
py_misc_utils/skl_utils.py +270 -0
py_misc_utils/split.py +147 -0
py_misc_utils/state.py +53 -0
py_misc_utils/std_module.py +56 -0
py_misc_utils/stream_dataframe.py +176 -0
py_misc_utils/streamed_file.py +144 -0
py_misc_utils/tempdir.py +79 -0
py_misc_utils/template_replace.py +51 -0
py_misc_utils/tensor_stream.py +269 -0
py_misc_utils/thread_context.py +33 -0
py_misc_utils/throttle.py +30 -0
py_misc_utils/time_trigger.py +18 -0
py_misc_utils/timegen.py +11 -0
py_misc_utils/traceback.py +49 -0
py_misc_utils/tracking_executor.py +91 -0
py_misc_utils/transform_array.py +42 -0
py_misc_utils/uncompress.py +35 -0
py_misc_utils/url_fetcher.py +157 -0
py_misc_utils/utils.py +538 -0
py_misc_utils/varint.py +50 -0
py_misc_utils/virt_array.py +52 -0
py_misc_utils/weak_call.py +33 -0
py_misc_utils/work_results.py +100 -0
py_misc_utils/writeback_file.py +43 -0
python_misc_utils-0.2.dist-info/METADATA +36 -0
python_misc_utils-0.2.dist-info/RECORD +117 -0
python_misc_utils-0.2.dist-info/WHEEL +5 -0
python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
python_misc_utils-0.2.dist-info/top_level.txt +1 -0

py_misc_utils/np_utils.py ADDED Viewed

@@ -0,0 +1,346 @@
+import numpy as np
+from . import alog
+from . import assert_checks as tas
+from . import core_utils as cu
+def diff_split(data, mask_fn, sort=True):
+  if sort:
+    indices = np.argsort(data)
+    sdata = np.take_along_axis(data, indices, axis=None)
+  else:
+    indices, sdata = None, data
+  diff = np.diff(sdata)
+  mask = mask_fn(diff)
+  msteps = np.flatnonzero(np.asarray(mask))
+  sindices = np.arange(len(data))
+  splits = np.split(sindices, msteps + 1)
+  # Caller should use data[result[i]] to fetch split data.
+  return [indices[s] for s in splits] if indices is not None else splits
+def group_splits(data, mask_fn):
+  diff = np.diff(data)
+  mask = mask_fn(diff)
+  return np.flatnonzero(np.asarray(mask))
+def group_by_delta(data, mask_fn):
+  return np.split(data, group_splits(data, mask_fn) + 1)
+def fillna(data, copy=False, defval=0):
+  if copy:
+    data = data.copy()
+  fdata = data.flatten()
+  mask = np.where(np.isnan(fdata))[0]
+  for r in group_by_delta(mask, lambda x: x != 1):
+    if r.size == 0:
+      continue
+    vi = r[-1] + 1
+    if vi < len(fdata):
+      rv = fdata[vi]
+    else:
+      vi = r[0] - 1
+      if vi >= 0:
+        rv = fdata[vi]
+      else:
+        rv = defval
+    fdata[r] = rv
+  return fdata.reshape(data.shape)
+def infer_np_dtype(dtypes):
+  dtype = None
+  for t in dtypes:
+    if dtype is None or t == np.float64:
+      dtype = t
+    elif t == np.float32:
+      if dtype.itemsize > t.itemsize:
+        dtype = np.float64
+      else:
+        dtype = t
+    elif dtype != np.float64:
+      if dtype == np.float32 and t.itemsize > dtype.itemsize:
+        dtype = np.float64
+      elif t.itemsize > dtype.itemsize:
+        dtype = t
+  return dtype if dtype is not None else np.float32
+def maybe_stack_slices(slices, axis=0):
+  if slices and isinstance(slices[0], np.ndarray):
+    return np.stack(slices, axis=axis)
+  return slices
+def to_numpy(data):
+  if isinstance(data, np.ndarray):
+    return data
+  npfn = getattr(data, 'to_numpy', None)
+  if callable(npfn):
+    return npfn()
+  # This is PyTorch ...
+  npfn = getattr(data, 'numpy', None)
+  if callable(npfn):
+    return npfn(force=True)
+  return np.array(data)
+def is_numeric(dtype):
+  return np.issubdtype(dtype, np.number)
+def is_integer(dtype):
+  return np.issubdtype(dtype, np.integer)
+def is_floating(dtype):
+  return np.issubdtype(dtype, np.floating)
+def is_numpy(v):
+  return type(v).__module__ == np.__name__
+def is_sorted(data, descending=False):
+  if not isinstance(data, np.ndarray):
+    data = np.array(data)
+  if descending:
+    return np.all(data[:-1] >= data[1:])
+  return np.all(data[:-1] <= data[1:])
+def astype(data, col, dtype):
+  if cu.isdict(dtype):
+    cdtype = dtype.get(col)
+  elif is_numeric(data.dtype):
+    cdtype = dtype
+  else:
+    cdtype = None
+  return data if cdtype is None else data.astype(cdtype)
+def softmax(x):
+  e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
+  return e_x / np.sum(e_x, axis=-1, keepdims=True)
+def categorical(un_probs, n=None):
+  probs = softmax(un_probs)
+  if probs.ndim == 1:
+    values = np.random.choice(len(probs), size=n or 1, p=probs)
+    return values[0] if n is None else values
+  else:
+    fprobs = np.reshape(probs, (-1, probs.shape[-1]))
+    values = []
+    for p in fprobs:
+      values.append(np.random.choice(len(p), size=n or 1, p=p))
+    catv = np.vstack(values)
+    return np.reshape(catv, tuple(probs.shape[: -1]) + (catv.shape[-1],))
+def onehot(values, num_categories=None):
+  if num_categories is None:
+    num_categories = np.max(values) + 1
+  else:
+    tas.check_lt(np.max(values), num_categories)
+  return np.eye(num_categories)[values]
+def normalize(data, axis=None):
+  mean = np.mean(data, axis=axis)
+  std = np.std(data, axis=axis)
+  if std.ndim > 0:
+    std[np.where(std == 0.0)] = 1.0
+  elif std == 0.0:
+    std = 1.0
+  return (data - mean) / std
+def moving_average(data, window, include_current=True):
+  weights = np.ones(window, dtype=data.dtype) / window
+  pdata = np.pad(data, (window, window), mode='edge')
+  cdata = np.convolve(pdata, weights, mode='valid')
+  base = 1 if include_current else 0
+  return cdata[base: base + len(data)]
+def rolling_window(a, window):
+  shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
+  strides = a.strides + (a.strides[-1],)
+  return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
+def shift(data, pos=1):
+  result = np.empty_like(data)
+  if pos > 0:
+    result[: pos] = data[0]
+    result[pos:] = data[: -pos]
+  elif pos < 0:
+    result[pos:] = data[-1]
+    result[: pos] = data[-pos:]
+  else:
+    result[:] = data
+  return result
+def complement_indices(indices, size):
+  all_indices = np.full(size, 1, dtype=np.int8)
+  all_indices[indices] = 0
+  return np.flatnonzero(all_indices)
+def polyfit_std(yv, xv=None, deg=1):
+  xv = np.arange(len(yv), dtype=np.float32) if xv is None else np.array(xv)
+  yv = yv if isinstance(yv, np.ndarray) else np.array(yv)
+  yfit = np.polynomial.Polynomial.fit(xv, yv, deg)
+  fyv = yfit(xv)
+  return np.std(yv - fyv), fyv, yfit
+def npdict_clone(npd):
+  cloned = type(npd)()
+  for k, v in npd.items():
+    cloned[k] = np.array(v)
+  return cloned
+def is_ordered(v, reverse=False):
+  npv = to_numpy(v)
+  return np.all(npv[:-1] >= npv[1:]) if reverse else np.all(npv[:-1] <= npv[1:])
+class RingBuffer:
+  def __init__(self, capacity, dtype, vshape):
+    self.capacity = capacity
+    self.dtype = dtype
+    self._vshape = tuple(vshape)
+    self._count = 0
+    self._data = np.zeros((capacity,) + self._vshape, dtype=dtype)
+  @property
+  def shape(self):
+    return (len(self),) + self._vshape
+  def resize(self, capacity):
+    self._data = np.resize(self._data, (capacity,) + self._vshape)
+    self._count = min(self._count, self.capacity, capacity)
+    self.capacity = capacity
+  def select(self, indices):
+    indices = indices[indices < len(self)]
+    self._count = len(indices)
+    self._data[: self._count] = self._data[indices]
+  def append(self, v):
+    self._data[self._count % self.capacity] = v
+    self._count += 1
+  def extend(self, v):
+    arr = np.asarray(v, dtype=self.dtype)
+    if self._vshape:
+      arr = arr.reshape((-1,) + self._vshape)
+    pos = self._count % self.capacity
+    front = min(self.capacity - pos, len(arr))
+    self._data[pos: pos + front] = arr[: front]
+    back = min(pos, len(arr) - front)
+    if back > 0:
+      self._data[: back] = arr[front: front + back]
+    self._count += front + back
+  def to_numpy(self):
+    return np.concatenate(tuple(self.iter_views()))
+  def data(self, dtype=None):
+    return np.array(self._data[: len(self)], dtype=dtype)
+  def iter_views(self):
+    if self._count <= self.capacity:
+      yield self._data[: self._count]
+    else:
+      pos = self._count % self.capacity
+      yield self._data[pos:]
+      if pos > 0:
+        yield self._data[: pos]
+  def iter_indices(self):
+    if self._count <= self.capacity:
+      return np.arange(0, self._count)
+    return np.arange(self._count, self._count + self.capacity) % self.capacity
+  def __len__(self):
+    return min(self.capacity, self._count)
+  def __getitem__(self, i):
+    if isinstance(i, int):
+      idx = (max(self._count, self.capacity) + i) % self.capacity
+    else:
+      # Allow seamless slicing in case of non-integer indexing.
+      idx = i
+    return self._data[idx]
+  def __array__(self, dtype=None):
+    arr = self.to_numpy()
+    return arr if dtype is None else arr.astype(dtype)
+_NP_ARRAY_TYPECODES = {
+  bool: 'B',
+  int: 'q',
+  np.int8: 'b',
+  np.uint8: 'B',
+  np.int16: 'h',
+  np.uint16: 'H',
+  np.int32: 'l',
+  np.uint32: 'L',
+  np.int64: 'q',
+  np.uint64: 'Q',
+  np.float16: 'f',
+  np.float32: 'f',
+  np.float64: 'd',
+}
+def array_typecode(dtype):
+  return _NP_ARRAY_TYPECODES.get(dtype.type)

py_misc_utils/ntuple_utils.py ADDED Viewed

@@ -0,0 +1,38 @@
+import collections
+import re
+from . import assert_checks as tas
+def extend(base_nt, name, fields, defaults=None):
+  if isinstance(fields, str):
+    fields = re.split(r'\s*[ ,]\s*', fields)
+  ext_fields, ext_def_fields, ext_defaults = [], [], []
+  missing = object()
+  for field in base_nt._fields:
+    defval = base_nt._field_defaults.get(field, missing)
+    if defval is missing:
+      ext_fields.append(field)
+    else:
+      ext_def_fields.append(field)
+      ext_defaults.append(defval)
+  defaults = defaults or ()
+  for i, field in enumerate(fields):
+    defidx = i - (len(fields) - len(defaults))
+    if defidx >= 0:
+      tas.check(field not in ext_def_fields,
+                msg=f'Field already exists: {field} in {ext_def_fields}')
+      tas.check(field not in ext_fields,
+                msg=f'Field already exists: {field} in {ext_fields}')
+      ext_def_fields.append(field)
+      ext_defaults.append(defaults[defidx])
+    else:
+      tas.check(field not in ext_fields,
+                msg=f'Field already exists: {field} in {ext_fields}')
+      ext_fields.append(field)
+  return collections.namedtuple(name, tuple(ext_fields + ext_def_fields),
+                                defaults=tuple(ext_defaults))

py_misc_utils/num_utils.py ADDED Viewed

@@ -0,0 +1,54 @@
+def prime_factors(n):
+  i = 2
+  while i * i <= n:
+    q, r = divmod(n, i)
+    if r == 0:
+      n = q
+      yield i
+    else:
+      i += 1
+  if n > 1:
+    yield n
+def nearest_divisor(value, n):
+  nmin, nmax = n, n
+  while nmin > 1:
+    if value % nmin == 0:
+      break
+    nmin -= 1
+  dmin = n - nmin
+  top_n = min(n + dmin, value // 2)
+  while nmax <= top_n:
+    if value % nmax == 0:
+      break
+    nmax += 1
+  if value % nmax != 0:
+    return nmin
+  return nmin if dmin < (nmax - n) else nmax
+def sign_extend(value, nbits):
+  sign = 1 << (nbits - 1)
+  return (value & (sign - 1)) - (value & sign)
+def round_up(v, step):
+  return ((v + step - 1) // step) * step
+def round_down(v, step):
+  return (v // step) * step
+def mix(a, b, gamma):
+  return a * gamma + b * (1.0 - gamma)

py_misc_utils/obj.py ADDED Viewed

@@ -0,0 +1,73 @@
+import copy
+from . import core_utils as cu
+class Obj:
+  def __init__(self, **kwargs):
+    self.update(**kwargs)
+  def update(self, **kwargs):
+    vars(self).update(kwargs)
+    return self
+  def update_from(self, obj):
+    vars(self).update(vars(obj))
+    return self
+  def clone(self, **kwargs):
+    nobj = copy.copy(self)
+    nobj.update(**kwargs)
+    return nobj
+  def as_dict(self):
+    ad = dict()
+    for k, v in vars(self).items():
+      if isinstance(v, Obj):
+        v = v.as_dict()
+      elif isinstance(v, (list, tuple)):
+        vals = []
+        for x in v:
+          if isinstance(x, Obj):
+            x = x.as_dict()
+          vals.append(x)
+        v = type(v)(vals)
+      elif cu.isdict(v):
+        vd = type(v)()
+        for z, x in v.items():
+          if isinstance(x, Obj):
+            x = x.as_dict()
+          vd[z] = x
+        v = vd
+      ad[k] = v
+    return ad
+  def __eq__(self, other):
+    missing = object()
+    for k, v in vars(self).items():
+      ov = getattr(other, k, missing)
+      if ov is missing or v != ov:
+        return False
+    for k in vars(other).keys():
+      if not hasattr(self, k):
+        return False
+    return True
+  def __repr__(self):
+    values = ', '.join(f'{k}={str_value(v)}' for k, v in vars(self).items())
+    return f'{type(self).__name__}({values})'
+def str_value(v):
+  return '"' + v.replace('"', '\\"') + '"' if isinstance(v, str) else str(v)

py_misc_utils/object_cache.py ADDED Viewed

@@ -0,0 +1,100 @@
+import abc
+import collections
+import functools
+import os
+import threading
+import time
+from . import alog
+from . import fin_wrap as fw
+from . import periodic_task as ptsk
+_Entry = collections.namedtuple('Entry', 'name, obj, handler, time')
+class Handler(abc.ABC):
+  @abc.abstractmethod
+  def create(self):
+    ...
+  def max_age(self):
+    return 60
+  def is_alive(self, obj):
+    return True
+  def close(self, obj):
+    pass
+class Cache:
+  def __init__(self, clean_timeo=None):
+    self._lock = threading.Lock()
+    self._cond = threading.Condition(lock=self._lock)
+    self._cache = collections.defaultdict(collections.deque)
+    self._cleaner = ptsk.PeriodicTask(
+      'CacheCleaner',
+      self._try_cleanup,
+      clean_timeo or int(os.getenv('CACHE_CLEAN_TIMEO', 8)),
+      stop_on_error=False,
+    )
+    self._cleaner.start()
+  def _try_cleanup(self):
+    alog.verbose(f'Object cache cleanup running')
+    cleans = []
+    with self._lock:
+      new_cache = collections.defaultdict(collections.deque)
+      for name, cqueue in self._cache.items():
+        for entry in cqueue:
+          age = time.time() - entry.time
+          if age > entry.handler.max_age():
+            cleans.append(entry)
+          else:
+            new_cache[name].append(entry)
+      self._cache = new_cache
+    for entry in cleans:
+      alog.debug(f'Cache Clean: name={entry.name} obj={entry.obj}')
+      entry.handler.close(entry.obj)
+  def shutdown(self):
+    self._cleaner.stop()
+  def _release(self, name, handler, obj):
+    alog.debug(f'Cache Release: name={name} obj={obj}')
+    with self._lock:
+      self._cache[name].append(_Entry(name=name,
+                                      obj=obj,
+                                      handler=handler,
+                                      time=time.time()))
+  def get(self, name, handler):
+    with self._lock:
+      cqueue, obj = self._cache[name], None
+      if cqueue:
+        entry = cqueue.popleft()
+        if not entry.handler.is_alive(entry.obj):
+          entry.handler.close(obj)
+          obj = None
+        else:
+          obj = entry.obj
+          alog.debug(f'Cache Hit: name={name} obj={obj}')
+      if obj is None:
+        obj = handler.create()
+      finfn = functools.partial(self._release, name, handler, obj)
+      return fw.FinWrapper(obj, finfn)
+_CACHE = Cache()
+def cache():
+  return _CACHE

py_misc_utils/object_tracker.py ADDED Viewed

@@ -0,0 +1,88 @@
+import collections
+import gc
+from . import alog
+from . import core_utils as cu
+from . import inspect_utils as iu
+def _get_referred(obj):
+  referred = []
+  if cu.isdict(obj):
+    for name, value in obj.items():
+      referred.append((name, value))
+  elif isinstance(obj, (list, tuple)):
+    for i, value in enumerate(obj):
+      referred.append((f'[{i}]', value))
+  elif hasattr(obj, '__dict__'):
+    for name, value in vars(obj).items():
+      referred.append((name, value))
+  return tuple(referred)
+def _get_tracking_references(obj, tracked_by, max_references=None):
+  max_references = max_references or 8
+  references = []
+  to_track = [(obj, None)]
+  while to_track:
+    tobj, tname = to_track.pop()
+    ntrack = 0
+    for rname, robj in tracked_by.get(id(tobj), ()):
+      suffix = ''
+      if tname is not None:
+        sep = '' if tname.startswith('[') else '.'
+        suffix = f'{sep}{tname}'
+      to_track.append((robj, f'{rname}{suffix}'))
+      ntrack += 1
+    if ntrack == 0:
+      references.append((iu.qual_name(tobj), tname))
+      if len(references) >= max_references:
+        break
+  return tuple(references)
+def track_objects(tracker, max_references=None):
+  gc.collect()
+  gc_objs = gc.get_objects()
+  all_objs = dict()
+  tracking = collections.defaultdict(list)
+  tracked_by = collections.defaultdict(list)
+  for obj in gc_objs:
+    all_objs[id(obj)] = obj
+    referred = _get_referred(obj)
+    for rname, robj in referred:
+      tracking[id(obj)].append((rname, robj))
+      tracked_by[id(robj)].append((rname, obj))
+      all_objs[id(robj)] = robj
+  report = []
+  for obj in all_objs.values():
+    try:
+      if (trackres := tracker.track(obj)) is not None:
+        prio, info = trackres
+        refs = _get_tracking_references(obj, tracked_by,
+                                        max_references=max_references)
+        treport = [info]
+        for r in refs:
+          treport.append(f'  refby = {r[0]} ({r[1]})')
+        report.append((prio, treport))
+    except Exception as ex:
+      alog.warning(f'Exception while tracking objects: {ex}')
+  sreport = []
+  for r in sorted(report, key=lambda r: r[0]):
+    sreport.extend(r[1])
+  return '\n'.join(sreport)