PyPI - python-misc-utils - Versions diffs - 0.2__py3-none-any.whl - Mend

python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

py_misc_utils/__init__.py +0 -0
py_misc_utils/abs_timeout.py +12 -0
py_misc_utils/alog.py +311 -0
py_misc_utils/app_main.py +179 -0
py_misc_utils/archive_streamer.py +112 -0
py_misc_utils/assert_checks.py +118 -0
py_misc_utils/ast_utils.py +121 -0
py_misc_utils/async_manager.py +189 -0
py_misc_utils/break_control.py +63 -0
py_misc_utils/buffered_iterator.py +35 -0
py_misc_utils/cached_file.py +507 -0
py_misc_utils/call_limiter.py +26 -0
py_misc_utils/call_result_selector.py +13 -0
py_misc_utils/cleanups.py +85 -0
py_misc_utils/cmd.py +97 -0
py_misc_utils/compression.py +116 -0
py_misc_utils/cond_waiter.py +13 -0
py_misc_utils/context_base.py +18 -0
py_misc_utils/context_managers.py +67 -0
py_misc_utils/core_utils.py +577 -0
py_misc_utils/daemon_process.py +252 -0
py_misc_utils/data_cache.py +46 -0
py_misc_utils/date_utils.py +90 -0
py_misc_utils/debug.py +24 -0
py_misc_utils/dyn_modules.py +50 -0
py_misc_utils/dynamod.py +103 -0
py_misc_utils/env_config.py +35 -0
py_misc_utils/executor.py +239 -0
py_misc_utils/file_overwrite.py +29 -0
py_misc_utils/fin_wrap.py +77 -0
py_misc_utils/fp_utils.py +47 -0
py_misc_utils/fs/__init__.py +0 -0
py_misc_utils/fs/file_fs.py +127 -0
py_misc_utils/fs/ftp_fs.py +242 -0
py_misc_utils/fs/gcs_fs.py +196 -0
py_misc_utils/fs/http_fs.py +241 -0
py_misc_utils/fs/s3_fs.py +417 -0
py_misc_utils/fs_base.py +133 -0
py_misc_utils/fs_utils.py +207 -0
py_misc_utils/gcs_fs.py +169 -0
py_misc_utils/gen_indices.py +54 -0
py_misc_utils/gfs.py +371 -0
py_misc_utils/git_repo.py +77 -0
py_misc_utils/global_namespace.py +110 -0
py_misc_utils/http_async_fetcher.py +139 -0
py_misc_utils/http_server.py +196 -0
py_misc_utils/http_utils.py +143 -0
py_misc_utils/img_utils.py +20 -0
py_misc_utils/infix_op.py +20 -0
py_misc_utils/inspect_utils.py +205 -0
py_misc_utils/iostream.py +21 -0
py_misc_utils/iter_file.py +117 -0
py_misc_utils/key_wrap.py +46 -0
py_misc_utils/lazy_import.py +25 -0
py_misc_utils/lockfile.py +164 -0
py_misc_utils/mem_size.py +64 -0
py_misc_utils/mirror_from.py +72 -0
py_misc_utils/mmap.py +16 -0
py_misc_utils/module_utils.py +196 -0
py_misc_utils/moving_average.py +19 -0
py_misc_utils/msgpack_streamer.py +26 -0
py_misc_utils/multi_wait.py +24 -0
py_misc_utils/multiprocessing.py +102 -0
py_misc_utils/named_array.py +224 -0
py_misc_utils/no_break.py +46 -0
py_misc_utils/no_except.py +32 -0
py_misc_utils/np_ml_framework.py +184 -0
py_misc_utils/np_utils.py +346 -0
py_misc_utils/ntuple_utils.py +38 -0
py_misc_utils/num_utils.py +54 -0
py_misc_utils/obj.py +73 -0
py_misc_utils/object_cache.py +100 -0
py_misc_utils/object_tracker.py +88 -0
py_misc_utils/ordered_set.py +71 -0
py_misc_utils/osfd.py +27 -0
py_misc_utils/packet.py +22 -0
py_misc_utils/parquet_streamer.py +69 -0
py_misc_utils/pd_utils.py +254 -0
py_misc_utils/periodic_task.py +61 -0
py_misc_utils/pickle_wrap.py +121 -0
py_misc_utils/pipeline.py +98 -0
py_misc_utils/remap_pickle.py +50 -0
py_misc_utils/resource_manager.py +155 -0
py_misc_utils/rnd_utils.py +56 -0
py_misc_utils/run_once.py +19 -0
py_misc_utils/scheduler.py +135 -0
py_misc_utils/select_params.py +300 -0
py_misc_utils/signal.py +141 -0
py_misc_utils/skl_utils.py +270 -0
py_misc_utils/split.py +147 -0
py_misc_utils/state.py +53 -0
py_misc_utils/std_module.py +56 -0
py_misc_utils/stream_dataframe.py +176 -0
py_misc_utils/streamed_file.py +144 -0
py_misc_utils/tempdir.py +79 -0
py_misc_utils/template_replace.py +51 -0
py_misc_utils/tensor_stream.py +269 -0
py_misc_utils/thread_context.py +33 -0
py_misc_utils/throttle.py +30 -0
py_misc_utils/time_trigger.py +18 -0
py_misc_utils/timegen.py +11 -0
py_misc_utils/traceback.py +49 -0
py_misc_utils/tracking_executor.py +91 -0
py_misc_utils/transform_array.py +42 -0
py_misc_utils/uncompress.py +35 -0
py_misc_utils/url_fetcher.py +157 -0
py_misc_utils/utils.py +538 -0
py_misc_utils/varint.py +50 -0
py_misc_utils/virt_array.py +52 -0
py_misc_utils/weak_call.py +33 -0
py_misc_utils/work_results.py +100 -0
py_misc_utils/writeback_file.py +43 -0
python_misc_utils-0.2.dist-info/METADATA +36 -0
python_misc_utils-0.2.dist-info/RECORD +117 -0
python_misc_utils-0.2.dist-info/WHEEL +5 -0
python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
python_misc_utils-0.2.dist-info/top_level.txt +1 -0

py_misc_utils/url_fetcher.py ADDED Viewed

@@ -0,0 +1,157 @@
+import os
+import queue
+import threading
+from . import alog
+from . import assert_checks as tas
+from . import file_overwrite as fow
+from . import gfs
+from . import tempdir as tmpd
+from . import utils as ut
+from . import work_results as wres
+def resolve_url(fss, url, fs_kwargs):
+  proto = gfs.get_proto(url)
+  fs = fss.get(proto)
+  if fs is None:
+    fs, fpath = gfs.resolve_fs(url, **fs_kwargs)
+    for fsid in fs.IDS:
+      fss[fsid] = fs
+  else:
+    fpath = fs.norm_url(url)
+  return fs, fpath
+def fetcher(path, fs_kwargs, uqueue, rqueue):
+  fss = dict()
+  while True:
+    url = uqueue.get()
+    if not url:
+      break
+    wpath = wres.work_path(path, url)
+    alog.verbose(f'Fetching "{url}"')
+    try:
+      fs, fpath = resolve_url(fss, url, fs_kwargs)
+      with wres.write_result(wpath) as fd:
+        for data in fs.get_file(fpath):
+          fd.write(data)
+    except Exception as ex:
+      wres.write_error(wpath, ex, workid=url)
+    finally:
+      rqueue.put(url)
+class UrlFetcher:
+  def __init__(self, path=None, num_workers=None, fs_kwargs=None):
+    fs_kwargs = fs_kwargs or dict()
+    fs_kwargs = ut.dict_setmissing(
+      fs_kwargs,
+      timeout=ut.getenv('FETCHER_TIMEO', dtype=float, defval=10.0),
+    )
+    self._ctor_path = path
+    self._path = None
+    self._num_workers = num_workers or max(os.cpu_count() * 4, 128)
+    self._fs_kwargs = fs_kwargs
+    self._uqueue = self._rqueue = None
+    self._workers = []
+    self._pending = set()
+  def start(self):
+    if self._ctor_path is None:
+      self._path = tmpd.fastfs_dir()
+    else:
+      self._path = self._ctor_path
+    self._uqueue = queue.Queue()
+    self._rqueue = queue.Queue()
+    for i in range(self._num_workers):
+      worker = threading.Thread(
+        target=fetcher,
+        args=(self._path, self._fs_kwargs, self._uqueue, self._rqueue),
+        daemon=True,
+      )
+      worker.start()
+      self._workers.append(worker)
+  def shutdown(self):
+    alog.verbose(f'Sending shutdowns down the queue')
+    for _ in range(len(self._workers)):
+      self._uqueue.put('')
+    alog.verbose(f'Joining fetcher workers')
+    for worker in self._workers:
+      worker.join()
+    self._uqueue = self._rqueue = None
+    self._workers = []
+    if self._path != self._ctor_path:
+      gfs.rmtree(self._path, ignore_errors=True)
+    self._path = None
+    self._pending = set()
+  def enqueue(self, *urls):
+    wmap = dict()
+    for url in urls:
+      if url:
+        self._uqueue.put(url)
+        self._pending.add(url)
+        wmap[url] = wres.work_hash(url)
+    return wmap
+  def wait(self, url):
+    wpath = wres.work_path(self._path, url)
+    if not os.path.isfile(wpath):
+      tas.check(url in self._pending, msg=f'URL already retired: {url}')
+      while self._pending:
+        rurl = self._rqueue.get()
+        self._pending.discard(rurl)
+        if rurl == url:
+          break
+    try:
+      return wres.get_work(wpath)
+    finally:
+      os.remove(wpath)
+  def iter_results(self, max_results=None, block=True, timeout=None):
+    count = 0
+    while self._pending:
+      try:
+        rurl = self._rqueue.get(block=block, timeout=timeout)
+        self._pending.discard(rurl)
+        wpath = wres.work_path(self._path, rurl)
+        wdata = wres.load_work(wpath)
+        os.remove(wpath)
+        yield rurl, wdata
+      except queue.Empty:
+        break
+      count += 1
+      if max_results is not None and count >= max_results:
+        break
+  def __enter__(self):
+    self.start()
+    return self
+  def __exit__(self, *exc):
+    self.shutdown()
+    return False

py_misc_utils/utils.py ADDED Viewed

@@ -0,0 +1,538 @@
+import array
+import collections
+import datetime
+import inspect
+import json
+import math
+import os
+import pickle
+import re
+import sys
+import time
+import types
+import yaml
+import numpy as np
+from . import alog
+from . import assert_checks as tas
+from . import core_utils as cu
+from . import file_overwrite as fow
+from . import gfs
+from . import inspect_utils as iu
+from . import mmap as mm
+from . import obj
+from . import split as sp
+from . import template_replace as tr
+from . import traceback as tb
+class _None:
+  def __repr__(self):
+    return 'NONE'
+_NONE = _None()
+def pickle_proto():
+  return getenv('PICKLE_PROTO', dtype=int, defval=pickle.HIGHEST_PROTOCOL)
+def fname():
+  return tb.get_frame(1).f_code.co_name
+def _stri(obj, seen, ffmt, dexp):
+  oid = id(obj)
+  sres = seen.get(oid, _NONE)
+  if sres is None:
+    return '...'
+  elif sres is not _NONE:
+    return sres
+  seen[oid] = None
+  if isinstance(obj, str):
+    obj_str = obj.replace('"', '\\"')
+    result = f'"{obj_str}"'
+  elif isinstance(obj, float):
+    result = f'{obj:{ffmt}}'
+  elif isinstance(obj, bytes):
+    result = obj.decode()
+  elif cu.is_namedtuple(obj):
+    result = str(obj)
+  elif cu.is_sequence(obj):
+    sl = ', '.join(_stri(x, seen, ffmt, dexp) for x in obj)
+    result = '[' + sl + ']' if isinstance(obj, list) else '(' + sl + ')'
+  elif cu.isdict(obj):
+    result = '{' + ', '.join(f'{k}={_stri(v, seen, ffmt, dexp)}' for k, v in obj.items()) + '}'
+  elif dexp and hasattr(obj, '__dict__'):
+    # Drop the braces around the __dict__ output, and use the "Classname(...)" format.
+    drepr = _stri(obj.__dict__, seen, ffmt, dexp)
+    result = f'{iu.cname(obj)}({drepr[1: -1]})'
+  else:
+    result = str(obj)
+  seen[oid] = result
+  return result
+def stri(l, float_fmt=None, dict_expand=False):
+  return _stri(l, dict(), float_fmt or '.3e', dict_expand)
+def repr_fmt(obj, *fields, repr_none=False, sep=', '):
+  parts = []
+  for fname in expand_strings(*fields):
+    rnone, xname = repr_none, fname
+    m = re.match(r'([!])?(\w+)\s*(=\s*(\w*))?', fname)
+    if m:
+      fname = m.group(2)
+      rnone = m.group(1) == '!'
+      xname = m.group(4)
+      if xname is None:
+        xname = fname
+    data = getattr(obj, fname, None)
+    if data is not None or rnone:
+      parts.append(f'{xname}={data}' if xname else str(data))
+  return sep.join(parts)
+def mget(d, *args, as_dict=False):
+  margs = expand_strings(*args)
+  if as_dict:
+    return {f: d.get(f) for f in margs}
+  else:
+    return tuple(d.get(f) for f in margs)
+def getvar(obj, name, defval=None):
+  return obj.get(name, defval) if cu.isdict(obj) else getattr(obj, name, defval)
+def dict_subset(d, *keys):
+  mkeys = expand_strings(*keys)
+  subd = dict()
+  for k in mkeys:
+    v = d.get(k, _NONE)
+    if v is not _NONE:
+      subd[k] = v
+  return subd
+def dict_setmissing(d, **kwargs):
+  kwargs.update(d)
+  return kwargs
+def pop_kwargs(kwargs, names, args_key=None):
+  xargs = kwargs.pop(args_key or '_', None)
+  if xargs is not None:
+    args = [xargs.get(name) for name in expand_strings(names)]
+  else:
+    args = [kwargs.pop(name, None) for name in expand_strings(names)]
+  return tuple(args)
+def resplit(csstr, sep):
+  return sp.split(csstr, r'\s*' + sep + r'\s*')
+def comma_split(csstr):
+  return sp.split(csstr, r'\s*,\s*')
+def ws_split(data):
+  return sp.split(data, r'\s+')
+def expand_strings(*args):
+  margs = []
+  for arg in args:
+    if cu.is_sequence(arg):
+      margs.extend(arg)
+    else:
+      margs.extend(comma_split(arg))
+  return tuple(margs)
+def name_values(base_name, values):
+  names = []
+  if isinstance(values, (list, tuple)):
+    if len(values) == 1:
+      names.append((base_name, values[0]))
+    else:
+      for i, v in enumerate(values):
+        names.append((f'{base_name}.{i}', v))
+  else:
+    names.append((base_name, values))
+  return tuple(names)
+def write_config(cfg, dest, **kwargs):
+  default_flow_style = kwargs.get('default_flow_style', False)
+  with fow.FileOverwrite(dest, mode='wt') as df:
+    yaml.dump(cfg, df, default_flow_style=default_flow_style, **kwargs)
+def config_to_string(cfg, **kwargs):
+  default_flow_style = kwargs.get('default_flow_style', False)
+  return yaml.dump(cfg, default_flow_style=default_flow_style, **kwargs)
+def parse_config(cfg):
+  if not re.match(r'[\[\{]', cfg):
+    # It must be either a dictionary in YAML format, or a valid path.
+    with gfs.open(cfg, mode='r') as fd:
+      data = fd.read()
+  else:
+    data = cfg
+  return yaml.safe_load(data)
+def load_config(path, extra=None):
+  cfgd = parse_config(path)
+  if extra:
+    for k, v in extra.items():
+      if v is not None:
+        cfgd[k] = v
+  return cfgd
+def fatal(msg, exc=RuntimeError):
+  alog.xraise(exc, msg, stacklevel=2)
+def assert_instance(msg, t, ta):
+  if not isinstance(t, ta):
+    parts = [msg, f': {iu.cname(t)} is not ']
+    if isinstance(ta, (list, tuple)):
+      parts.append('one of (')
+      parts.append(', '.join(iu.cname(x) for x in ta))
+      parts.append(')')
+    else:
+      parts.append(f'a {iu.cname(ta)}')
+    alog.xraise(ValueError, ''.join(parts))
+def make_object(**kwargs):
+  return obj.Obj(**kwargs)
+def make_object_recursive(**kwargs):
+  for k, v in kwargs.items():
+    if cu.isdict(v):
+      kwargs[k] = make_object_recursive(**v)
+  return make_object(**kwargs)
+def locals_capture(locs, exclude='self'):
+  exclude = set(expand_strings(exclude, 'self'))
+  return make_object(**{k: v for k, v in locs.items() if k not in exclude})
+def sreplace(rex, data, mapfn, nmapfn=None, join=True):
+  nmapfn = nmapfn if nmapfn is not None else ident
+  lastpos, parts = 0, []
+  for m in re.finditer(rex, data):
+    start, end = m.span()
+    if start > lastpos:
+      parts.append(nmapfn(data[lastpos: start]))
+    lastpos = end
+    mid = mapfn(m.group(1))
+    parts.append(mid)
+  if lastpos < len(data):
+    parts.append(nmapfn(data[lastpos:]))
+  return ''.join(parts) if join else parts
+def as_sequence(v, t=tuple):
+  if isinstance(t, (list, tuple)):
+    for st in t:
+      if isinstance(v, st):
+        return v
+    return t[0]([v]) if not isinstance(v, types.GeneratorType) else t[0](v)
+  if isinstance(v, t):
+    return v
+  return t(v) if cu.is_sequence(v) else t([v])
+def format(seq, fmt):
+  sfmt = f'{{:{fmt}}}'
+  return type(seq)(sfmt.format(x) for x in seq)
+def value_or(v, defval):
+  return v if v is not None else defval
+def dict_rget(sdict, path, defval=None, sep='/'):
+  if not isinstance(path, (list, tuple)):
+    path = path.strip(sep).split(sep)
+  result = sdict
+  for key in path:
+    if not cu.isdict(result):
+      return defval
+    result = result.get(key, defval)
+  return result
+def make_index_dict(vals):
+  return {v: i for i, v in enumerate(vals)}
+def append_index_dict(xlist, xdict, value):
+  xlist.append(value)
+  xdict[value] = len(xlist) - 1
+def compile(code, syms, env=None, vals=None, lookup_fn=None, delim=None):
+  # Note that objects compiled with this API cannot be pickled.
+  # If that is a requirement, use the dynamod module.
+  env = value_or(env, dict())
+  if vals is not None or lookup_fn is not None:
+    code = tr.template_replace(code, vals=vals, lookup_fn=lookup_fn, delim=delim)
+  exec(code, env)
+  return tuple(env.get(s) for s in expand_strings(syms))
+def run(path, fnname, *args, **kwargs):
+  compile_args, = pop_kwargs(kwargs, 'compile_args')
+  fn, = compile(mm.file_view(path), fnname, **(compile_args or dict()))
+  return fn(*args, **kwargs)
+def unpack_n(l, n, defval=None):
+  l = as_sequence(l)
+  return tuple(l[:n] if len(l) >= n else l + [defval] * (n - len(l)))
+def getenv(name, dtype=None, defval=None):
+  # os.getenv expects the default value to be a string, so cannot be passed in there.
+  env = os.getenv(name)
+  if env is None:
+    env = defval() if callable(defval) else defval
+  if env is not None:
+    return cu.to_type(env, dtype) if dtype is not None else env
+def env(name, defval, vtype=None):
+  return getenv(name, dtype=vtype, defval=defval)
+def envs(*args, as_dict=False):
+  return mget(os.environ, *args, as_dict=as_dict)
+def import_env(dest, *args):
+  ivars = envs(*args, as_dict=True)
+  for k, v in ivars.items():
+    dest[k] = cu.infer_value(v)
+  return dest
+def map_env(g, prefix=''):
+  ovr = dict()
+  for k, v in g.items():
+    ev = getenv(f'{prefix}{k}', dtype=type(v))
+    if ev is not None:
+      ovr[k] = ev
+  g.update(ovr)
+  return g
+MAJOR = 1
+MINOR = -1
+def squeeze(shape, keep_dims=0, sdir=MAJOR):
+  sshape = list(shape)
+  if sdir == MAJOR:
+    while len(sshape) > keep_dims and sshape[0] == 1:
+      sshape = sshape[1:]
+  elif sdir == MINOR:
+    while len(sshape) > keep_dims and sshape[-1] == 1:
+      sshape = sshape[: -1]
+  else:
+    alog.xraise(ValueError, f'Unknown squeeze direction: {sdir}')
+  return type(shape)(sshape)
+def flat2shape(data, shape):
+  tas.check_eq(len(data), np.prod(shape),
+               msg=f'Shape {shape} is unsuitable for a {len(data)} long array')
+  # For an Mx...xK input shape, return a M elements (nested) list.
+  for n in reversed(shape[1:]):
+    data = [data[i: i + n] for i in range(len(data), n)]
+  return data
+def shape2flat(data, shape):
+  for _ in range(len(shape) - 1):
+    tas.check(hasattr(data, '__iter__'), msg=f'Wrong data type: {type(data)}')
+    ndata = []
+    for av in data:
+      tas.check(hasattr(av, '__iter__'), msg=f'Wrong data type: {type(data)}')
+      ndata.extend(av)
+    data = ndata
+  tas.check_eq(len(data), np.prod(shape),
+               msg=f'Shape {shape} is unsuitable for a {len(data)} long array')
+  return tuple(data)
+def binary_reduce(parts, reduce_fn):
+  while len(parts) > 1:
+    nparts, base = [], 0
+    if len(parts) % 2 != 0:
+      nparts.append(parts[0])
+      base = 1
+    nparts.extend(reduce_fn(parts[i], parts[i + 1]) for i in range(base, len(parts) - 1, 2))
+    parts = nparts
+  return parts[0]
+def stringify(s):
+  def rwfn(v):
+    if not (isinstance(v, (list, tuple)) or cu.isdict(v)):
+      return str(v)
+  return cu.data_rewrite(s, rwfn)
+def seq_rewrite(seq, sd):
+  return type(seq)(sd.get(s, s) for s in seq)
+def dfetch(d, *args):
+  return tuple(d[n] for n in args)
+def numel(t):
+  sp = cu.get_property(t, 'shape')
+  return np.prod(sp) if sp is not None else len(t)
+def scale_data(data, base_data, scale):
+  return ((data - base_data) / base_data) * scale
+_ARRAY_SIZES = tuple((array.array(c).itemsize, c) for c in 'B,H,I,L,Q'.split(','))
+def array_code(size):
+  nbytes = math.ceil(math.log2(size)) / 8
+  for cb, code in _ARRAY_SIZES:
+    if cb >= nbytes:
+      return code
+  alog.xraise(ValueError,
+              f'Size {size} too big to fit inside any array integer types')
+def checked_remove(l, o):
+  try:
+    l.remove(o)
+  except ValueError:
+    return False
+  return True
+def sleep_until(date, msg=None):
+  now = datetime.datetime.now(tz=date.tzinfo)
+  if date > now:
+    if msg:
+      alog.info(msg)
+    time.sleep(date.timestamp() - now.timestamp())
+def parse_dict(data):
+  return yaml.safe_load(data)
+def parse_args(in_args):
+  seq_args = comma_split(in_args) if isinstance(in_args, str) else in_args
+  args, kwargs = [], dict()
+  for arg in seq_args:
+    parts = cu.separate(arg, '=')
+    if len(parts) == 2:
+      kwargs[parts[0]] = yaml.safe_load(parts[1])
+    elif len(parts) == 1:
+      args.append(yaml.safe_load(parts[0]))
+    else:
+      alog.xraise(ValueError, f'Syntax error: {arg}')
+  return args, kwargs
+def state_update(path, **kwargs):
+  if sfile := gfs.maybe_open(path, mode='rb'):
+    with sfile as fd:
+      state = pickle.load(fd)
+  else:
+    state = dict()
+  if kwargs:
+    state.update(kwargs)
+    with fow.FileOverwrite(path, mode='wb') as f:
+      pickle.dump(state, f, protocol=pickle_proto())
+  return state
+def copy_inplace(dest, src):
+  obj_dict = getattr(dest, '__dict__', None)
+  if obj_dict is not None:
+    obj_dict.update(src.__dict__)
+  else:
+    for slot in iu.class_slots(dest):
+      setattr(dest, slot, getattr(src, slot, None))
+  return dest