relib 1.2.11__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: relib
3
- Version: 1.2.11
3
+ Version: 1.3.0
4
4
  Project-URL: Repository, https://github.com/Reddan/relib.git
5
5
  Author: Hampus Hallman
6
6
  License: Copyright 2018-2025 Hampus Hallman
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "relib"
3
- version = "1.2.11"
3
+ version = "1.3.0"
4
4
  requires-python = ">=3.12"
5
5
  dependencies = []
6
6
  authors = [
@@ -0,0 +1,6 @@
1
+ from .dict_utils import *
2
+ from .io_utils import *
3
+ from .iter_utils import *
4
+ from .processing_utils import *
5
+ from .runtime_tools import *
6
+ from .type_utils import *
@@ -0,0 +1,97 @@
1
+ from typing import Any, Callable, Iterable, overload
2
+ from .type_utils import as_any
3
+
4
+ __all__ = [
5
+ "deepen_dict", "dict_by", "dict_firsts",
6
+ "flatten_dict_inner", "flatten_dict",
7
+ "get_at", "group",
8
+ "key_of",
9
+ "map_dict", "merge_dicts",
10
+ "omit",
11
+ "pick",
12
+ "tuple_by",
13
+ ]
14
+
15
+ def merge_dicts[T, K](*dicts: dict[K, T]) -> dict[K, T]:
16
+ if len(dicts) == 1:
17
+ return dicts[0]
18
+ result = {}
19
+ for d in dicts:
20
+ result |= d
21
+ return result
22
+
23
+ def omit[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
24
+ if keys:
25
+ d = dict(d)
26
+ for key in keys:
27
+ del d[key]
28
+ return d
29
+
30
+ def pick[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
31
+ return {key: d[key] for key in keys}
32
+
33
+ def dict_by[T, K](keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
34
+ return dict(zip(keys, values))
35
+
36
+ def tuple_by[T, K](d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
37
+ return tuple(d[key] for key in keys)
38
+
39
+ def map_dict[T, U, K](fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
40
+ return {key: fn(value) for key, value in d.items()}
41
+
42
+ def key_of[T, U](dicts: Iterable[dict[T, U]], key: T) -> list[U]:
43
+ return [d[key] for d in dicts]
44
+
45
+ def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
46
+ try:
47
+ for key in keys:
48
+ d = d[key]
49
+ except KeyError:
50
+ return default
51
+ return as_any(d)
52
+
53
+ def dict_firsts[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, T]:
54
+ result: dict[K, T] = {}
55
+ for key, item in pairs:
56
+ if key not in result:
57
+ result[key] = item
58
+ return result
59
+
60
+ def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
61
+ values_by_key = {}
62
+ for key, value in pairs:
63
+ values_by_key.setdefault(key, []).append(value)
64
+ return values_by_key
65
+
66
+ @overload
67
+ def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
68
+ @overload
69
+ def deepen_dict[K1, K2, U](d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
70
+ @overload
71
+ def deepen_dict[K1, K2, K3, U](d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
72
+ @overload
73
+ def deepen_dict[K1, K2, K3, K4, U](d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
74
+ @overload
75
+ def deepen_dict[K1, K2, K3, K4, K5, U](d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
76
+ @overload
77
+ def deepen_dict[K1, K2, K3, K4, K5, K6, U](d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
78
+ def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
79
+ output = {}
80
+ if () in d:
81
+ return d[()]
82
+ for (*tail, head), value in d.items():
83
+ curr = output
84
+ for key in tail:
85
+ curr = curr.setdefault(key, {})
86
+ curr[head] = value
87
+ return output
88
+
89
+ def flatten_dict_inner(d, prefix=()):
90
+ for key, value in d.items():
91
+ if not isinstance(value, dict) or value == {}:
92
+ yield prefix + (key,), value
93
+ else:
94
+ yield from flatten_dict_inner(value, prefix + (key,))
95
+
96
+ def flatten_dict(deep_dict: dict, prefix=()) -> dict:
97
+ return dict(flatten_dict_inner(deep_dict, prefix))
@@ -0,0 +1,21 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ __all__ = [
6
+ "read_json",
7
+ "write_json",
8
+ ]
9
+
10
+ default_sentinel = object()
11
+
12
+ def read_json(path: Path, default=default_sentinel) -> Any:
13
+ if default is not default_sentinel and not path.exists():
14
+ return default
15
+ with path.open("r") as f:
16
+ return json.load(f)
17
+
18
+ def write_json(path: Path, obj: object, indent: None | int = None) -> None:
19
+ with path.open("w") as f:
20
+ separators = (",", ":") if indent is None else None
21
+ return json.dump(obj, f, indent=indent, separators=separators)
@@ -0,0 +1,94 @@
1
+ from itertools import chain
2
+ from typing import Any, Iterable, Literal, overload
3
+ from .dict_utils import dict_firsts
4
+
5
+ __all__ = [
6
+ "distinct_by", "distinct", "drop_none",
7
+ "first", "flatten",
8
+ "intersect",
9
+ "list_split",
10
+ "move_value",
11
+ "num_partitions",
12
+ "reversed_enumerate",
13
+ "sized_partitions", "sort_by",
14
+ "transpose",
15
+ ]
16
+
17
+ def first[T](iterable: Iterable[T]) -> T | None:
18
+ return next(iter(iterable), None)
19
+
20
+ def drop_none[T](iterable: Iterable[T | None]) -> list[T]:
21
+ return [x for x in iterable if x is not None]
22
+
23
+ def distinct[T](iterable: Iterable[T]) -> list[T]:
24
+ return list(dict.fromkeys(iterable))
25
+
26
+ def distinct_by[T](pairs: Iterable[tuple[object, T]]) -> list[T]:
27
+ return list(dict_firsts(pairs).values())
28
+
29
+ def sort_by[T](pairs: Iterable[tuple[Any, T]]) -> list[T]:
30
+ pairs = sorted(pairs, key=lambda p: p[0])
31
+ return [v for _, v in pairs]
32
+
33
+ def move_value[T](iterable: Iterable[T], from_i: int, to_i: int) -> list[T]:
34
+ values = list(iterable)
35
+ values.insert(to_i, values.pop(from_i))
36
+ return values
37
+
38
+ def reversed_enumerate[T](values: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
39
+ return zip(reversed(range(len(values))), reversed(values))
40
+
41
+ def intersect[T](*iterables: Iterable[T]) -> list[T]:
42
+ return list(set.intersection(*map(set, iterables)))
43
+
44
+ @overload
45
+ def flatten[T](iterable: Iterable[T], depth: Literal[0]) -> list[T]: ...
46
+ @overload
47
+ def flatten[T](iterable: Iterable[Iterable[T]], depth: Literal[1] = 1) -> list[T]: ...
48
+ @overload
49
+ def flatten[T](iterable: Iterable[Iterable[Iterable[T]]], depth: Literal[2]) -> list[T]: ...
50
+ @overload
51
+ def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[T]]]], depth: Literal[3]) -> list[T]: ...
52
+ @overload
53
+ def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[Iterable[T]]]]], depth: Literal[4]) -> list[T]: ...
54
+ @overload
55
+ def flatten(iterable: Iterable, depth: int) -> list: ...
56
+ def flatten(iterable: Iterable, depth: int = 1) -> list:
57
+ for _ in range(depth):
58
+ iterable = chain.from_iterable(iterable)
59
+ return list(iterable)
60
+
61
+ def list_split[T](iterable: Iterable[T], sep: T) -> list[list[T]]:
62
+ values = [sep, *iterable, sep]
63
+ split_at = [i for i, x in enumerate(values) if x is sep]
64
+ ranges = list(zip(split_at[0:-1], split_at[1:]))
65
+ return [values[start + 1:end] for start, end in ranges]
66
+
67
+ def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
68
+ # "chunk"
69
+ if not isinstance(values, list):
70
+ values = list(values)
71
+ num_parts = (len(values) / part_size).__ceil__()
72
+ return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
73
+
74
+ def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
75
+ if not isinstance(values, list):
76
+ values = list(values)
77
+ part_size = (len(values) / num_parts).__ceil__()
78
+ return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
79
+
80
+ @overload
81
+ def transpose[T1, T2](tuples: Iterable[tuple[T1, T2]], default_num_returns=0) -> tuple[list[T1], list[T2]]: ...
82
+ @overload
83
+ def transpose[T1, T2, T3](tuples: Iterable[tuple[T1, T2, T3]], default_num_returns=0) -> tuple[list[T1], list[T2], list[T3]]: ...
84
+ @overload
85
+ def transpose[T1, T2, T3, T4](tuples: Iterable[tuple[T1, T2, T3, T4]], default_num_returns=0) -> tuple[list[T1], list[T2], list[T3], list[T4]]: ...
86
+ @overload
87
+ def transpose[T1, T2, T3, T4, T5](tuples: Iterable[tuple[T1, T2, T3, T4, T5]], default_num_returns=0) -> tuple[list[T1], list[T2], list[T3], list[T4], list[T5]]: ...
88
+ @overload
89
+ def transpose[T](tuples: Iterable[tuple[T, ...]], default_num_returns=0) -> tuple[list[T], ...]: ...
90
+ def transpose(tuples: Iterable[tuple], default_num_returns=0) -> tuple[list, ...]:
91
+ output = tuple(zip(*tuples))
92
+ if not output:
93
+ return ([],) * default_num_returns
94
+ return tuple(map(list, output))
@@ -0,0 +1,66 @@
1
+ import re
2
+ from typing import Any, Callable, Iterable, overload
3
+
4
+ __all__ = [
5
+ "clamp",
6
+ "df_from_array",
7
+ "for_each",
8
+ "noop",
9
+ "str_filterer", "StrFilter",
10
+ ]
11
+
12
+ def noop() -> None:
13
+ pass
14
+
15
+ def for_each[T](func: Callable[[T], Any], iterable: Iterable[T]) -> None:
16
+ for item in iterable:
17
+ func(item)
18
+
19
+ @overload
20
+ def clamp(value: int, low: int, high: int) -> int: ...
21
+ @overload
22
+ def clamp(value: float, low: float, high: float) -> float: ...
23
+ def clamp(value: float, low: float, high: float) -> float:
24
+ return max(low, min(value, high))
25
+
26
+ def _cat_tile(cats, n_tile):
27
+ import numpy as np
28
+ return cats[np.tile(np.arange(len(cats)), n_tile)]
29
+
30
+ def df_from_array(
31
+ value_cols: dict[str, Any],
32
+ dim_labels: list[tuple[str, list[str | int | float]]],
33
+ indexed=False,
34
+ ):
35
+ import numpy as np
36
+ import pandas as pd
37
+ dim_sizes = np.array([len(labels) for _, labels in dim_labels])
38
+ assert all(array.shape == tuple(dim_sizes) for array in value_cols.values())
39
+ array_offsets = [
40
+ (dim_sizes[i + 1:].prod(), dim_sizes[:i].prod())
41
+ for i in range(len(dim_sizes))
42
+ ]
43
+ category_cols = {
44
+ dim: _cat_tile(pd.Categorical(labels).repeat(repeats), tiles)
45
+ for (dim, labels), (repeats, tiles) in zip(dim_labels, array_offsets)
46
+ }
47
+ value_cols = {name: array.reshape(-1) for name, array in value_cols.items()}
48
+ df = pd.DataFrame({**category_cols, **value_cols}, copy=False)
49
+ if indexed:
50
+ df = df.set_index([name for name, _ in dim_labels])
51
+ return df
52
+
53
+ StrFilter = Callable[[str], bool]
54
+
55
+ def str_filterer(
56
+ include_patterns: list[re.Pattern[str]] = [],
57
+ exclude_patterns: list[re.Pattern[str]] = [],
58
+ ) -> StrFilter:
59
+ def str_filter(string: str) -> bool:
60
+ if any(pattern.search(string) for pattern in exclude_patterns):
61
+ return False
62
+ if not include_patterns:
63
+ return True
64
+ return any(pattern.search(string) for pattern in include_patterns)
65
+
66
+ return str_filter
@@ -1,38 +1,23 @@
1
1
  import asyncio
2
2
  import contextvars
3
- import functools
4
- import json
5
3
  import os
6
4
  from concurrent.futures import ThreadPoolExecutor
7
- from pathlib import Path
8
- from typing import Any, Awaitable, Callable, Iterable, ParamSpec, TypeVar
9
- from .utils import noop
5
+ from functools import partial, wraps
6
+ from time import time
7
+ from typing import Awaitable, Callable, Iterable, ParamSpec, TypeVar
8
+ from .processing_utils import noop
10
9
 
11
10
  __all__ = [
12
- "read_json",
13
- "write_json",
14
- "clear_console",
15
- "console_link",
11
+ "as_async", "async_limit",
12
+ "clear_console", "console_link",
16
13
  "roll_tasks",
17
- "as_async",
18
- "async_limit",
14
+ "measure_duration",
19
15
  ]
20
16
 
21
17
  P = ParamSpec("P")
22
18
  R = TypeVar("R")
23
- default_workers = min(32, (os.cpu_count() or 1) + 4)
24
- default_sentinel = object()
25
-
26
- def read_json(path: Path, default=default_sentinel) -> Any:
27
- if default is not default_sentinel and not path.exists():
28
- return default
29
- with path.open("r") as f:
30
- return json.load(f)
31
19
 
32
- def write_json(path: Path, obj: object, indent: None | int = None) -> None:
33
- with path.open("w") as f:
34
- separators = (",", ":") if indent is None else None
35
- return json.dump(obj, f, indent=indent, separators=separators)
20
+ default_workers = min(32, (os.cpu_count() or 1) + 4)
36
21
 
37
22
  def clear_console() -> None:
38
23
  os.system("cls" if os.name == "nt" else "clear")
@@ -54,18 +39,18 @@ async def roll_tasks[T](tasks: Iterable[Awaitable[T]], workers=default_workers,
54
39
  from tqdm import tqdm
55
40
  tasks = tasks if isinstance(tasks, list) else list(tasks)
56
41
  with tqdm(total=len(tasks)) as pbar:
57
- update = functools.partial(pbar.update, 1)
42
+ update = partial(pbar.update, 1)
58
43
  return await asyncio.gather(*[worker(task, semaphore, update) for task in tasks])
59
44
 
60
45
  def as_async(workers=default_workers) -> Callable[[Callable[P, R]], Callable[P, Awaitable[R]]]:
61
46
  executor = ThreadPoolExecutor(max_workers=workers)
62
47
 
63
48
  def on_fn(func: Callable[P, R]) -> Callable[P, Awaitable[R]]:
64
- @functools.wraps(func)
49
+ @wraps(func)
65
50
  async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
66
51
  loop = asyncio.get_running_loop()
67
52
  ctx = contextvars.copy_context()
68
- fn_call = functools.partial(ctx.run, func, *args, **kwargs)
53
+ fn_call = partial(ctx.run, func, *args, **kwargs)
69
54
  return await loop.run_in_executor(executor, fn_call)
70
55
  return wrapper
71
56
  return on_fn
@@ -74,9 +59,27 @@ def async_limit(workers=default_workers) -> Callable[[Callable[P, Awaitable[R]]]
74
59
  semaphore = asyncio.Semaphore(workers)
75
60
 
76
61
  def on_fn(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[R]]:
77
- @functools.wraps(func)
62
+ @wraps(func)
78
63
  async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
79
64
  async with semaphore:
80
65
  return await func(*args, **kwargs)
81
66
  return wrapper
82
67
  return on_fn
68
+
69
+ active_mds = []
70
+
71
+ class measure_duration:
72
+ def __init__(self, name):
73
+ self.name = name
74
+ active_mds.append(self)
75
+
76
+ def __enter__(self):
77
+ self.start = time()
78
+
79
+ def __exit__(self, *_):
80
+ duration = round(time() - self.start, 4)
81
+ depth = len(active_mds) - 1
82
+ indent = ('──' * depth) + (' ' * (depth > 0))
83
+ text = '{}: {} seconds'.format(self.name, duration)
84
+ print(indent + text)
85
+ active_mds.remove(self)
@@ -0,0 +1,17 @@
1
+ from typing import Any
2
+
3
+ __all__ = [
4
+ "as_any",
5
+ "ensure_tuple",
6
+ "non_none",
7
+ ]
8
+
9
+ def as_any(obj: Any) -> Any:
10
+ return obj
11
+
12
+ def non_none[T](obj: T | None) -> T:
13
+ assert obj is not None
14
+ return obj
15
+
16
+ def ensure_tuple[T](value: T | tuple[T, ...]) -> tuple[T, ...]:
17
+ return value if isinstance(value, tuple) else (value,)
@@ -106,7 +106,7 @@ wheels = [
106
106
 
107
107
  [[package]]
108
108
  name = "relib"
109
- version = "1.2.11"
109
+ version = "1.3.0"
110
110
  source = { editable = "." }
111
111
 
112
112
  [package.dev-dependencies]
@@ -1,4 +0,0 @@
1
- from .utils import *
2
- from .system import *
3
- from .hashing import hash, hash_obj
4
- from .measure_duration import measure_duration
@@ -1,179 +0,0 @@
1
- # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
2
- # Copyright (c) 2009 Gael Varoquaux
3
- # License: BSD Style, 3 clauses.
4
-
5
- import pickle
6
- import hashlib
7
- import sys
8
- import types
9
- import io
10
- import decimal
11
-
12
- try:
13
- import numpy
14
- except:
15
- has_numpy = False
16
- else:
17
- has_numpy = True
18
-
19
- Pickler = pickle._Pickler
20
-
21
-
22
- class _ConsistentSet(object):
23
- def __init__(self, set_sequence):
24
- try:
25
- self._sequence = sorted(set_sequence)
26
- except (TypeError, decimal.InvalidOperation):
27
- self._sequence = sorted(map(hash_obj, set_sequence))
28
-
29
-
30
- class _MyHash(object):
31
- """ Class used to hash objects that won't normally pickle """
32
-
33
- def __init__(self, *args):
34
- self.args = args
35
-
36
-
37
- class Hasher(Pickler):
38
- """ A subclass of pickler, to do cryptographic hashing, rather than pickling. """
39
-
40
- def __init__(self, hash_name="md5"):
41
- self.stream = io.BytesIO()
42
- # We want a pickle protocol that only changes with major Python versions
43
- protocol = pickle.HIGHEST_PROTOCOL
44
- Pickler.__init__(self, self.stream, protocol=protocol)
45
- self._hash = hashlib.new(hash_name)
46
-
47
- def hash(self, obj) -> str:
48
- try:
49
- self.dump(obj)
50
- except pickle.PicklingError as e:
51
- e.args += ("PicklingError while hashing %r: %r" % (obj, e),)
52
- raise
53
- dumps = self.stream.getvalue()
54
- self._hash.update(dumps)
55
- return self._hash.hexdigest()
56
-
57
- def save(self, obj):
58
- if isinstance(obj, (types.MethodType, type({}.pop))):
59
- # the Pickler cannot pickle instance methods; here we decompose
60
- # them into components that make them uniquely identifiable
61
- if hasattr(obj, "__func__"):
62
- func_name = obj.__func__.__name__
63
- else:
64
- func_name = obj.__name__
65
- inst = obj.__self__
66
- if type(inst) == type(pickle):
67
- obj = _MyHash(func_name, inst.__name__)
68
- elif inst is None:
69
- # type(None) or type(module) do not pickle
70
- obj = _MyHash(func_name, inst)
71
- else:
72
- cls = obj.__self__.__class__
73
- obj = _MyHash(func_name, inst, cls)
74
- Pickler.save(self, obj)
75
-
76
- def memoize(self, obj):
77
- # We want hashing to be sensitive to value instead of reference.
78
- # For example we want ["aa", "aa"] and ["aa", "aaZ"[:2]]
79
- # to hash to the same value and that's why we disable memoization
80
- # for strings
81
- if isinstance(obj, (bytes, str)):
82
- return
83
- Pickler.memoize(self, obj)
84
-
85
- # The dispatch table of the pickler is not accessible in Python
86
- # 3, as these lines are only bugware for IPython, we skip them.
87
- def save_global(self, obj, name=None):
88
- # We have to override this method in order to deal with objects
89
- # defined interactively in IPython that are not injected in
90
- # __main__
91
- try:
92
- Pickler.save_global(self, obj, name=name)
93
- except pickle.PicklingError:
94
- Pickler.save_global(self, obj, name=name)
95
- module = getattr(obj, "__module__", None)
96
- if module == "__main__":
97
- my_name = name
98
- if my_name is None:
99
- my_name = obj.__name__
100
- mod = sys.modules[module]
101
- if not hasattr(mod, my_name):
102
- # IPython doesn't inject the variables define
103
- # interactively in __main__
104
- setattr(mod, my_name, obj)
105
-
106
- def _batch_setitems(self, items):
107
- try:
108
- Pickler._batch_setitems(self, iter(sorted(items)))
109
- except TypeError:
110
- Pickler._batch_setitems(self, iter(sorted((hash_obj(k), v) for k, v in items)))
111
-
112
- def save_set(self, set_items):
113
- Pickler.save(self, _ConsistentSet(set_items))
114
-
115
- dispatch = Pickler.dispatch.copy()
116
- dispatch[type(len)] = save_global # builtin
117
- dispatch[type(object)] = save_global # type
118
- dispatch[type(Pickler)] = save_global # classobj
119
- dispatch[type(pickle.dump)] = save_global # function
120
- dispatch[type(set())] = save_set
121
-
122
-
123
- class NumpyHasher(Hasher):
124
- def __init__(self, hash_name="md5"):
125
- Hasher.__init__(self, hash_name=hash_name)
126
-
127
- def save(self, obj):
128
- """ Subclass the save method, to hash ndarray subclass, rather
129
- than pickling them. Off course, this is a total abuse of
130
- the Pickler class.
131
- """
132
- import numpy as np
133
-
134
- if isinstance(obj, np.ndarray) and not obj.dtype.hasobject:
135
- # Compute a hash of the object
136
- # The update function of the hash requires a c_contiguous buffer.
137
- if obj.shape == ():
138
- # 0d arrays need to be flattened because viewing them as bytes
139
- # raises a ValueError exception.
140
- obj_c_contiguous = obj.flatten()
141
- elif obj.flags.c_contiguous:
142
- obj_c_contiguous = obj
143
- elif obj.flags.f_contiguous:
144
- obj_c_contiguous = obj.T
145
- else:
146
- # Cater for non-single-segment arrays: this creates a
147
- # copy, and thus aleviates this issue.
148
- # XXX: There might be a more efficient way of doing this
149
- obj_c_contiguous = obj.flatten()
150
-
151
- # View the array as bytes to support dtypes like datetime64
152
- self._hash.update(memoryview(obj_c_contiguous.view(np.uint8)))
153
-
154
- # The object will be pickled by the pickler hashed at the end.
155
- obj = (obj.__class__, ("HASHED", obj.dtype, obj.shape, obj.strides))
156
- elif isinstance(obj, np.dtype):
157
- # Atomic dtype objects are interned by their default constructor:
158
- # np.dtype("f8") is np.dtype("f8")
159
- # This interning is not maintained by a
160
- # pickle.loads + pickle.dumps cycle, because __reduce__
161
- # uses copy=True in the dtype constructor. This
162
- # non-deterministic behavior causes the internal memoizer
163
- # of the hasher to generate different hash values
164
- # depending on the history of the dtype object.
165
- # To prevent the hash from being sensitive to this, we use
166
- # .descr which is a full (and never interned) description of
167
- # the array dtype according to the numpy doc.
168
- obj = (obj.__class__, ("HASHED", obj.descr))
169
-
170
- Hasher.save(self, obj)
171
-
172
-
173
- def hash_obj(obj, hash_name="md5") -> str:
174
- if has_numpy:
175
- return NumpyHasher(hash_name=hash_name).hash(obj)
176
- else:
177
- return Hasher(hash_name=hash_name).hash(obj)
178
-
179
- hash = hash_obj
@@ -1,19 +0,0 @@
1
- from time import time
2
-
3
- active_mds = []
4
-
5
- class measure_duration:
6
- def __init__(self, name):
7
- self.name = name
8
- active_mds.append(self)
9
-
10
- def __enter__(self):
11
- self.start = time()
12
-
13
- def __exit__(self, *_):
14
- duration = round(time() - self.start, 4)
15
- depth = len(active_mds) - 1
16
- indent = ('──' * depth) + (' ' * (depth > 0))
17
- text = '{}: {} seconds'.format(self.name, duration)
18
- print(indent + text)
19
- active_mds.remove(self)
@@ -1,306 +0,0 @@
1
- import re
2
- from itertools import chain
3
- from typing import Any, Callable, Iterable, Literal, overload
4
-
5
- __all__ = [
6
- "noop",
7
- "clamp",
8
- "non_none",
9
- "as_any",
10
- "list_split",
11
- "drop_none",
12
- "distinct",
13
- "dict_firsts",
14
- "distinct_by",
15
- "sort_by",
16
- "first",
17
- "move_value",
18
- "transpose_dict",
19
- "make_combinations_by_dict",
20
- "merge_dicts",
21
- "intersect",
22
- "ensure_tuple",
23
- "key_of",
24
- "omit",
25
- "pick",
26
- "dict_by",
27
- "tuple_by",
28
- "flatten",
29
- "transpose",
30
- "map_dict",
31
- "deepen_dict",
32
- "flatten_dict_inner",
33
- "flatten_dict",
34
- "group",
35
- "reversed_enumerate",
36
- "get_at",
37
- "for_each",
38
- "sized_partitions",
39
- "num_partitions",
40
- "df_from_array",
41
- "StrFilter",
42
- "str_filterer",
43
- ]
44
-
45
- def noop() -> None:
46
- pass
47
-
48
- @overload
49
- def clamp(value: int, low: int, high: int) -> int: ...
50
- @overload
51
- def clamp(value: float, low: float, high: float) -> float: ...
52
- def clamp(value: float, low: float, high: float) -> float:
53
- return max(low, min(value, high))
54
-
55
- def non_none[T](obj: T | None) -> T:
56
- assert obj is not None
57
- return obj
58
-
59
- def as_any(obj: Any) -> Any:
60
- return obj
61
-
62
- def list_split[T](iterable: Iterable[T], sep: T) -> list[list[T]]:
63
- values = [sep, *iterable, sep]
64
- split_at = [i for i, x in enumerate(values) if x is sep]
65
- ranges = list(zip(split_at[0:-1], split_at[1:]))
66
- return [
67
- values[start + 1:end]
68
- for start, end in ranges
69
- ]
70
-
71
- def drop_none[T](iterable: Iterable[T | None]) -> list[T]:
72
- return [x for x in iterable if x is not None]
73
-
74
- def distinct[T](iterable: Iterable[T]) -> list[T]:
75
- return list(dict.fromkeys(iterable))
76
-
77
- def dict_firsts[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, T]:
78
- result: dict[K, T] = {}
79
- for key, item in pairs:
80
- if key not in result:
81
- result[key] = item
82
- return result
83
-
84
- def distinct_by[T](pairs: Iterable[tuple[object, T]]) -> list[T]:
85
- return list(dict_firsts(pairs).values())
86
-
87
- def sort_by[T](pairs: Iterable[tuple[Any, T]]) -> list[T]:
88
- pairs = sorted(pairs, key=lambda p: p[0])
89
- return [v for _, v in pairs]
90
-
91
- def first[T](iterable: Iterable[T]) -> T | None:
92
- return next(iter(iterable), None)
93
-
94
- def move_value[T](iterable: Iterable[T], from_i: int, to_i: int) -> list[T]:
95
- values = list(iterable)
96
- values.insert(to_i, values.pop(from_i))
97
- return values
98
-
99
- def transpose_dict(des):
100
- if isinstance(des, list):
101
- keys = list(des[0].keys()) if des else []
102
- length = len(des)
103
- return {
104
- key: [des[i][key] for i in range(length)]
105
- for key in keys
106
- }
107
- elif isinstance(des, dict):
108
- keys = list(des.keys())
109
- length = len(des[keys[0]]) if keys else 0
110
- return [
111
- {key: des[key][i] for key in keys}
112
- for i in range(length)
113
- ]
114
- raise ValueError("transpose_dict only accepts dict or list")
115
-
116
- def make_combinations_by_dict(des, keys=None, pairs=[]):
117
- keys = sorted(des.keys()) if keys is None else keys
118
- if len(keys) == 0:
119
- return [dict(pairs)]
120
- key = keys[0]
121
- remaining_keys = keys[1:]
122
- new_pairs = [(key, val) for val in des[key]]
123
- return flatten([
124
- make_combinations_by_dict(des, remaining_keys, [pair] + pairs)
125
- for pair in new_pairs
126
- ])
127
-
128
- def merge_dicts[T, K](*dicts: dict[K, T]) -> dict[K, T]:
129
- if len(dicts) == 1:
130
- return dicts[0]
131
- result = {}
132
- for d in dicts:
133
- result.update(d)
134
- return result
135
-
136
- def intersect[T](*iterables: Iterable[T]) -> list[T]:
137
- return list(set.intersection(*map(set, iterables)))
138
-
139
- def ensure_tuple[T](value: T | tuple[T, ...]) -> tuple[T, ...]:
140
- return value if isinstance(value, tuple) else (value,)
141
-
142
- def key_of[T, U](dicts: Iterable[dict[T, U]], key: T) -> list[U]:
143
- return [d[key] for d in dicts]
144
-
145
- def omit[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
146
- if keys:
147
- d = dict(d)
148
- for key in keys:
149
- del d[key]
150
- return d
151
-
152
- def pick[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
153
- return {key: d[key] for key in keys}
154
-
155
- def dict_by[T, K](keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
156
- return dict(zip(keys, values))
157
-
158
- def tuple_by[T, K](d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
159
- return tuple(d[key] for key in keys)
160
-
161
- @overload
162
- def flatten[T](iterable: Iterable[T], depth: Literal[0]) -> list[T]: ...
163
- @overload
164
- def flatten[T](iterable: Iterable[Iterable[T]], depth: Literal[1] = 1) -> list[T]: ...
165
- @overload
166
- def flatten[T](iterable: Iterable[Iterable[Iterable[T]]], depth: Literal[2]) -> list[T]: ...
167
- @overload
168
- def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[T]]]], depth: Literal[3]) -> list[T]: ...
169
- @overload
170
- def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[Iterable[T]]]]], depth: Literal[4]) -> list[T]: ...
171
- @overload
172
- def flatten(iterable: Iterable, depth: int) -> list: ...
173
-
174
- def flatten(iterable: Iterable, depth: int = 1) -> list:
175
- for _ in range(depth):
176
- iterable = chain.from_iterable(iterable)
177
- return list(iterable)
178
-
179
- @overload
180
- def transpose[T1, T2](tuples: Iterable[tuple[T1, T2]], default_num_returns: int = 0) -> tuple[list[T1], list[T2]]: ...
181
- @overload
182
- def transpose[T1, T2, T3](tuples: Iterable[tuple[T1, T2, T3]], default_num_returns: int = 0) -> tuple[list[T1], list[T2], list[T3]]: ...
183
- @overload
184
- def transpose[T1, T2, T3, T4](tuples: Iterable[tuple[T1, T2, T3, T4]], default_num_returns: int = 0) -> tuple[list[T1], list[T2], list[T3], list[T4]]: ...
185
- @overload
186
- def transpose[T1, T2, T3, T4, T5](tuples: Iterable[tuple[T1, T2, T3, T4, T5]], default_num_returns: int = 0) -> tuple[list[T1], list[T2], list[T3], list[T4], list[T5]]: ...
187
- @overload
188
- def transpose(tuples: Iterable[tuple], default_num_returns: int = 0) -> tuple[list, ...]: ...
189
-
190
- def transpose(tuples: Iterable[tuple], default_num_returns=0) -> tuple[list, ...]:
191
- output = tuple(zip(*tuples))
192
- if not output:
193
- return ([],) * default_num_returns
194
- return tuple(map(list, output))
195
-
196
- def map_dict[T, U, K](fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
197
- return {key: fn(value) for key, value in d.items()}
198
-
199
- @overload
200
- def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
201
- @overload
202
- def deepen_dict[K1, K2, U](d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
203
- @overload
204
- def deepen_dict[K1, K2, K3, U](d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
205
- @overload
206
- def deepen_dict[K1, K2, K3, K4, U](d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
207
- @overload
208
- def deepen_dict[K1, K2, K3, K4, K5, U](d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
209
- @overload
210
- def deepen_dict[K1, K2, K3, K4, K5, K6, U](d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
211
- def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
212
- output = {}
213
- if () in d:
214
- return d[()]
215
- for (*tail, head), value in d.items():
216
- curr = output
217
- for key in tail:
218
- curr = curr.setdefault(key, {})
219
- curr[head] = value
220
- return output
221
-
222
- def flatten_dict_inner(d, prefix=()):
223
- for key, value in d.items():
224
- if not isinstance(value, dict) or value == {}:
225
- yield prefix + (key,), value
226
- else:
227
- yield from flatten_dict_inner(value, prefix + (key,))
228
-
229
- def flatten_dict(deep_dict: dict, prefix=()) -> dict:
230
- return dict(flatten_dict_inner(deep_dict, prefix))
231
-
232
- def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
233
- values_by_key = {}
234
- for key, value in pairs:
235
- values_by_key.setdefault(key, []).append(value)
236
- return values_by_key
237
-
238
- def reversed_enumerate[T](values: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
239
- return zip(reversed(range(len(values))), reversed(values))
240
-
241
- def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
242
- try:
243
- for key in keys:
244
- d = d[key]
245
- except KeyError:
246
- return default
247
- return as_any(d)
248
-
249
- def for_each[T](func: Callable[[T], Any], iterable: Iterable[T]) -> None:
250
- for item in iterable:
251
- func(item)
252
-
253
- def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
254
- # "chunk"
255
- if not isinstance(values, list):
256
- values = list(values)
257
- num_parts = (len(values) / part_size).__ceil__()
258
- return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
259
-
260
- def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
261
- if not isinstance(values, list):
262
- values = list(values)
263
- part_size = (len(values) / num_parts).__ceil__()
264
- return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
265
-
266
- def _cat_tile(cats, n_tile):
267
- import numpy as np
268
- return cats[np.tile(np.arange(len(cats)), n_tile)]
269
-
270
- def df_from_array(
271
- value_cols: dict[str, Any],
272
- dim_labels: list[tuple[str, list[str | int | float]]],
273
- indexed=False,
274
- ):
275
- import numpy as np
276
- import pandas as pd
277
- dim_sizes = np.array([len(labels) for _, labels in dim_labels])
278
- assert all(array.shape == tuple(dim_sizes) for array in value_cols.values())
279
- array_offsets = [
280
- (dim_sizes[i + 1:].prod(), dim_sizes[:i].prod())
281
- for i in range(len(dim_sizes))
282
- ]
283
- category_cols = {
284
- dim: _cat_tile(pd.Categorical(labels).repeat(repeats), tiles)
285
- for (dim, labels), (repeats, tiles) in zip(dim_labels, array_offsets)
286
- }
287
- value_cols = {name: array.reshape(-1) for name, array in value_cols.items()}
288
- df = pd.DataFrame({**category_cols, **value_cols}, copy=False)
289
- if indexed:
290
- df = df.set_index([name for name, _ in dim_labels])
291
- return df
292
-
293
- StrFilter = Callable[[str], bool]
294
-
295
- def str_filterer(
296
- include_patterns: list[re.Pattern[str]] = [],
297
- exclude_patterns: list[re.Pattern[str]] = [],
298
- ) -> StrFilter:
299
- def str_filter(string: str) -> bool:
300
- if any(pattern.search(string) for pattern in exclude_patterns):
301
- return False
302
- if not include_patterns:
303
- return True
304
- return any(pattern.search(string) for pattern in include_patterns)
305
-
306
- return str_filter
File without changes
File without changes
File without changes
File without changes