relib 1.2.11__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relib/__init__.py +6 -4
- relib/dict_utils.py +96 -0
- relib/io_utils.py +21 -0
- relib/iter_utils.py +97 -0
- relib/processing_utils.py +66 -0
- relib/{system.py → runtime_tools.py} +34 -29
- relib/type_utils.py +17 -0
- {relib-1.2.11.dist-info → relib-1.3.1.dist-info}/METADATA +1 -1
- relib-1.3.1.dist-info/RECORD +11 -0
- relib/hashing.py +0 -179
- relib/measure_duration.py +0 -19
- relib/utils.py +0 -306
- relib-1.2.11.dist-info/RECORD +0 -9
- {relib-1.2.11.dist-info → relib-1.3.1.dist-info}/WHEEL +0 -0
- {relib-1.2.11.dist-info → relib-1.3.1.dist-info}/licenses/LICENSE +0 -0
relib/__init__.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
4
|
-
from .
|
1
|
+
from .dict_utils import *
|
2
|
+
from .io_utils import *
|
3
|
+
from .iter_utils import *
|
4
|
+
from .processing_utils import *
|
5
|
+
from .runtime_tools import *
|
6
|
+
from .type_utils import *
|
relib/dict_utils.py
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
from typing import Any, Callable, Iterable, overload
|
2
|
+
from .type_utils import as_any
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
"deepen_dict", "dict_by", "dict_firsts",
|
6
|
+
"flatten_dict_inner", "flatten_dict",
|
7
|
+
"get_at", "group",
|
8
|
+
"key_of",
|
9
|
+
"map_dict", "merge_dicts",
|
10
|
+
"omit",
|
11
|
+
"pick",
|
12
|
+
"tuple_by",
|
13
|
+
]
|
14
|
+
|
15
|
+
def merge_dicts[T, K](*dicts: dict[K, T]) -> dict[K, T]:
|
16
|
+
if len(dicts) == 1:
|
17
|
+
return dicts[0]
|
18
|
+
result = {}
|
19
|
+
for d in dicts:
|
20
|
+
result |= d
|
21
|
+
return result
|
22
|
+
|
23
|
+
def omit[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
24
|
+
if keys:
|
25
|
+
d = dict(d)
|
26
|
+
for key in keys:
|
27
|
+
del d[key]
|
28
|
+
return d
|
29
|
+
|
30
|
+
def pick[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
31
|
+
return {key: d[key] for key in keys}
|
32
|
+
|
33
|
+
def dict_by[T, K](keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
|
34
|
+
return dict(zip(keys, values))
|
35
|
+
|
36
|
+
def tuple_by[T, K](d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
|
37
|
+
return tuple(d[key] for key in keys)
|
38
|
+
|
39
|
+
def map_dict[T, U, K](fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
|
40
|
+
return {key: fn(value) for key, value in d.items()}
|
41
|
+
|
42
|
+
def key_of[T, U](dicts: Iterable[dict[T, U]], key: T) -> list[U]:
|
43
|
+
return [d[key] for d in dicts]
|
44
|
+
|
45
|
+
def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
|
46
|
+
try:
|
47
|
+
for key in keys:
|
48
|
+
d = d[key]
|
49
|
+
except KeyError:
|
50
|
+
return default
|
51
|
+
return as_any(d)
|
52
|
+
|
53
|
+
def dict_firsts[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, T]:
|
54
|
+
result: dict[K, T] = {}
|
55
|
+
for key, value in pairs:
|
56
|
+
result.setdefault(key, value)
|
57
|
+
return result
|
58
|
+
|
59
|
+
def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
|
60
|
+
values_by_key = {}
|
61
|
+
for key, value in pairs:
|
62
|
+
values_by_key.setdefault(key, []).append(value)
|
63
|
+
return values_by_key
|
64
|
+
|
65
|
+
def flatten_dict_inner(d, prefix=()):
|
66
|
+
for key, value in d.items():
|
67
|
+
if not isinstance(value, dict) or value == {}:
|
68
|
+
yield prefix + (key,), value
|
69
|
+
else:
|
70
|
+
yield from flatten_dict_inner(value, prefix + (key,))
|
71
|
+
|
72
|
+
def flatten_dict(deep_dict: dict, prefix=()) -> dict:
|
73
|
+
return dict(flatten_dict_inner(deep_dict, prefix))
|
74
|
+
|
75
|
+
@overload
|
76
|
+
def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
|
77
|
+
@overload
|
78
|
+
def deepen_dict[K1, K2, U](d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
|
79
|
+
@overload
|
80
|
+
def deepen_dict[K1, K2, K3, U](d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
|
81
|
+
@overload
|
82
|
+
def deepen_dict[K1, K2, K3, K4, U](d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
|
83
|
+
@overload
|
84
|
+
def deepen_dict[K1, K2, K3, K4, K5, U](d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
|
85
|
+
@overload
|
86
|
+
def deepen_dict[K1, K2, K3, K4, K5, K6, U](d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
|
87
|
+
def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
|
88
|
+
output = {}
|
89
|
+
if () in d:
|
90
|
+
return d[()]
|
91
|
+
for (*tail, head), value in d.items():
|
92
|
+
curr = output
|
93
|
+
for key in tail:
|
94
|
+
curr = curr.setdefault(key, {})
|
95
|
+
curr[head] = value
|
96
|
+
return output
|
relib/io_utils.py
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
import json
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
__all__ = [
|
6
|
+
"read_json",
|
7
|
+
"write_json",
|
8
|
+
]
|
9
|
+
|
10
|
+
default_sentinel = object()
|
11
|
+
|
12
|
+
def read_json(path: Path, default=default_sentinel) -> Any:
|
13
|
+
if default is not default_sentinel and not path.exists():
|
14
|
+
return default
|
15
|
+
with path.open("r") as f:
|
16
|
+
return json.load(f)
|
17
|
+
|
18
|
+
def write_json(path: Path, obj: object, indent: None | int = None) -> None:
|
19
|
+
with path.open("w") as f:
|
20
|
+
separators = (",", ":") if indent is None else None
|
21
|
+
return json.dump(obj, f, indent=indent, separators=separators)
|
relib/iter_utils.py
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
from itertools import chain
|
2
|
+
from typing import Any, Iterable, Literal, overload
|
3
|
+
from .dict_utils import dict_firsts
|
4
|
+
|
5
|
+
__all__ = [
|
6
|
+
"distinct_by", "distinct", "drop_none",
|
7
|
+
"first", "flatten",
|
8
|
+
"interleave", "intersect",
|
9
|
+
"list_split",
|
10
|
+
"move_value",
|
11
|
+
"num_partitions",
|
12
|
+
"reversed_enumerate",
|
13
|
+
"sized_partitions", "sort_by",
|
14
|
+
"transpose",
|
15
|
+
]
|
16
|
+
|
17
|
+
def first[T](iterable: Iterable[T]) -> T | None:
|
18
|
+
return next(iter(iterable), None)
|
19
|
+
|
20
|
+
def drop_none[T](iterable: Iterable[T | None]) -> list[T]:
|
21
|
+
return [x for x in iterable if x is not None]
|
22
|
+
|
23
|
+
def distinct[T](iterable: Iterable[T]) -> list[T]:
|
24
|
+
return list(dict.fromkeys(iterable))
|
25
|
+
|
26
|
+
def distinct_by[T](pairs: Iterable[tuple[object, T]]) -> list[T]:
|
27
|
+
return list(dict_firsts(pairs).values())
|
28
|
+
|
29
|
+
def sort_by[T](pairs: Iterable[tuple[Any, T]]) -> list[T]:
|
30
|
+
pairs = sorted(pairs, key=lambda p: p[0])
|
31
|
+
return [v for _, v in pairs]
|
32
|
+
|
33
|
+
def move_value[T](iterable: Iterable[T], from_i: int, to_i: int) -> list[T]:
|
34
|
+
values = list(iterable)
|
35
|
+
values.insert(to_i, values.pop(from_i))
|
36
|
+
return values
|
37
|
+
|
38
|
+
def reversed_enumerate[T](values: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
|
39
|
+
return zip(range(len(values))[::1], reversed(values))
|
40
|
+
|
41
|
+
def intersect[T](*iterables: Iterable[T]) -> list[T]:
|
42
|
+
return list(set.intersection(*map(set, iterables)))
|
43
|
+
|
44
|
+
def interleave[T](*iterables: Iterable[T]) -> list[T]:
|
45
|
+
return flatten(zip(*iterables))
|
46
|
+
|
47
|
+
def list_split[T](iterable: Iterable[T], sep: T) -> list[list[T]]:
|
48
|
+
values = [sep, *iterable, sep]
|
49
|
+
split_at = [i for i, x in enumerate(values) if x is sep]
|
50
|
+
ranges = list(zip(split_at[0:-1], split_at[1:]))
|
51
|
+
return [values[start + 1:end] for start, end in ranges]
|
52
|
+
|
53
|
+
def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
|
54
|
+
# "chunk"
|
55
|
+
if not isinstance(values, list):
|
56
|
+
values = list(values)
|
57
|
+
num_parts = (len(values) / part_size).__ceil__()
|
58
|
+
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
59
|
+
|
60
|
+
def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
|
61
|
+
if not isinstance(values, list):
|
62
|
+
values = list(values)
|
63
|
+
part_size = (len(values) / num_parts).__ceil__()
|
64
|
+
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
65
|
+
|
66
|
+
@overload
|
67
|
+
def flatten[T](iterable: Iterable[T], depth: Literal[0]) -> list[T]: ...
|
68
|
+
@overload
|
69
|
+
def flatten[T](iterable: Iterable[Iterable[T]], depth: Literal[1] = 1) -> list[T]: ...
|
70
|
+
@overload
|
71
|
+
def flatten[T](iterable: Iterable[Iterable[Iterable[T]]], depth: Literal[2]) -> list[T]: ...
|
72
|
+
@overload
|
73
|
+
def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[T]]]], depth: Literal[3]) -> list[T]: ...
|
74
|
+
@overload
|
75
|
+
def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[Iterable[T]]]]], depth: Literal[4]) -> list[T]: ...
|
76
|
+
@overload
|
77
|
+
def flatten(iterable: Iterable, depth: int) -> list: ...
|
78
|
+
def flatten(iterable: Iterable, depth: int = 1) -> list:
|
79
|
+
for _ in range(depth):
|
80
|
+
iterable = chain.from_iterable(iterable)
|
81
|
+
return list(iterable)
|
82
|
+
|
83
|
+
@overload
|
84
|
+
def transpose[T1, T2](tuples: Iterable[tuple[T1, T2]], default_num_returns=0) -> tuple[list[T1], list[T2]]: ...
|
85
|
+
@overload
|
86
|
+
def transpose[T1, T2, T3](tuples: Iterable[tuple[T1, T2, T3]], default_num_returns=0) -> tuple[list[T1], list[T2], list[T3]]: ...
|
87
|
+
@overload
|
88
|
+
def transpose[T1, T2, T3, T4](tuples: Iterable[tuple[T1, T2, T3, T4]], default_num_returns=0) -> tuple[list[T1], list[T2], list[T3], list[T4]]: ...
|
89
|
+
@overload
|
90
|
+
def transpose[T1, T2, T3, T4, T5](tuples: Iterable[tuple[T1, T2, T3, T4, T5]], default_num_returns=0) -> tuple[list[T1], list[T2], list[T3], list[T4], list[T5]]: ...
|
91
|
+
@overload
|
92
|
+
def transpose[T](tuples: Iterable[tuple[T, ...]], default_num_returns=0) -> tuple[list[T], ...]: ...
|
93
|
+
def transpose(tuples: Iterable[tuple], default_num_returns=0) -> tuple[list, ...]:
|
94
|
+
output = tuple(zip(*tuples))
|
95
|
+
if not output:
|
96
|
+
return ([],) * default_num_returns
|
97
|
+
return tuple(map(list, output))
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Any, Callable, Iterable, overload
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
"clamp",
|
6
|
+
"df_from_array",
|
7
|
+
"for_each",
|
8
|
+
"noop",
|
9
|
+
"str_filterer", "StrFilter",
|
10
|
+
]
|
11
|
+
|
12
|
+
def noop() -> None:
|
13
|
+
pass
|
14
|
+
|
15
|
+
def for_each[T](func: Callable[[T], Any], iterable: Iterable[T]) -> None:
|
16
|
+
for item in iterable:
|
17
|
+
func(item)
|
18
|
+
|
19
|
+
@overload
|
20
|
+
def clamp(value: int, low: int, high: int) -> int: ...
|
21
|
+
@overload
|
22
|
+
def clamp(value: float, low: float, high: float) -> float: ...
|
23
|
+
def clamp(value: float, low: float, high: float) -> float:
|
24
|
+
return max(low, min(value, high))
|
25
|
+
|
26
|
+
def _cat_tile(cats, n_tile):
|
27
|
+
import numpy as np
|
28
|
+
return cats[np.tile(np.arange(len(cats)), n_tile)]
|
29
|
+
|
30
|
+
def df_from_array(
|
31
|
+
value_cols: dict[str, Any],
|
32
|
+
dim_labels: list[tuple[str, list[str | int | float]]],
|
33
|
+
indexed=False,
|
34
|
+
):
|
35
|
+
import numpy as np
|
36
|
+
import pandas as pd
|
37
|
+
dim_sizes = np.array([len(labels) for _, labels in dim_labels])
|
38
|
+
assert all(array.shape == tuple(dim_sizes) for array in value_cols.values())
|
39
|
+
array_offsets = [
|
40
|
+
(dim_sizes[i + 1:].prod(), dim_sizes[:i].prod())
|
41
|
+
for i in range(len(dim_sizes))
|
42
|
+
]
|
43
|
+
category_cols = {
|
44
|
+
dim: _cat_tile(pd.Categorical(labels).repeat(repeats), tiles)
|
45
|
+
for (dim, labels), (repeats, tiles) in zip(dim_labels, array_offsets)
|
46
|
+
}
|
47
|
+
value_cols = {name: array.reshape(-1) for name, array in value_cols.items()}
|
48
|
+
df = pd.DataFrame({**category_cols, **value_cols}, copy=False)
|
49
|
+
if indexed:
|
50
|
+
df = df.set_index([name for name, _ in dim_labels])
|
51
|
+
return df
|
52
|
+
|
53
|
+
StrFilter = Callable[[str], bool]
|
54
|
+
|
55
|
+
def str_filterer(
|
56
|
+
include_patterns: list[re.Pattern[str]] = [],
|
57
|
+
exclude_patterns: list[re.Pattern[str]] = [],
|
58
|
+
) -> StrFilter:
|
59
|
+
def str_filter(string: str) -> bool:
|
60
|
+
if any(pattern.search(string) for pattern in exclude_patterns):
|
61
|
+
return False
|
62
|
+
if not include_patterns:
|
63
|
+
return True
|
64
|
+
return any(pattern.search(string) for pattern in include_patterns)
|
65
|
+
|
66
|
+
return str_filter
|
@@ -1,38 +1,25 @@
|
|
1
1
|
import asyncio
|
2
2
|
import contextvars
|
3
|
-
import functools
|
4
|
-
import json
|
5
3
|
import os
|
6
4
|
from concurrent.futures import ThreadPoolExecutor
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
5
|
+
from functools import partial, wraps
|
6
|
+
from time import time
|
7
|
+
from typing import Awaitable, Callable, Iterable, ParamSpec, TypeVar
|
8
|
+
from .processing_utils import noop
|
10
9
|
|
11
10
|
__all__ = [
|
12
|
-
"
|
13
|
-
"
|
14
|
-
"
|
15
|
-
"console_link",
|
11
|
+
"as_async", "async_limit",
|
12
|
+
"clear_console", "console_link",
|
13
|
+
"default_executor", "default_workers",
|
16
14
|
"roll_tasks",
|
17
|
-
"
|
18
|
-
"async_limit",
|
15
|
+
"measure_duration",
|
19
16
|
]
|
20
17
|
|
21
18
|
P = ParamSpec("P")
|
22
19
|
R = TypeVar("R")
|
23
|
-
default_workers = min(32, (os.cpu_count() or 1) + 4)
|
24
|
-
default_sentinel = object()
|
25
|
-
|
26
|
-
def read_json(path: Path, default=default_sentinel) -> Any:
|
27
|
-
if default is not default_sentinel and not path.exists():
|
28
|
-
return default
|
29
|
-
with path.open("r") as f:
|
30
|
-
return json.load(f)
|
31
20
|
|
32
|
-
|
33
|
-
|
34
|
-
separators = (",", ":") if indent is None else None
|
35
|
-
return json.dump(obj, f, indent=indent, separators=separators)
|
21
|
+
default_workers = min(32, (os.cpu_count() or 1) + 4)
|
22
|
+
default_executor = ThreadPoolExecutor(max_workers=default_workers)
|
36
23
|
|
37
24
|
def clear_console() -> None:
|
38
25
|
os.system("cls" if os.name == "nt" else "clear")
|
@@ -54,18 +41,18 @@ async def roll_tasks[T](tasks: Iterable[Awaitable[T]], workers=default_workers,
|
|
54
41
|
from tqdm import tqdm
|
55
42
|
tasks = tasks if isinstance(tasks, list) else list(tasks)
|
56
43
|
with tqdm(total=len(tasks)) as pbar:
|
57
|
-
update =
|
44
|
+
update = partial(pbar.update, 1)
|
58
45
|
return await asyncio.gather(*[worker(task, semaphore, update) for task in tasks])
|
59
46
|
|
60
|
-
def as_async(workers=
|
61
|
-
executor = ThreadPoolExecutor(max_workers=workers)
|
47
|
+
def as_async(workers: int | ThreadPoolExecutor = default_executor) -> Callable[[Callable[P, R]], Callable[P, Awaitable[R]]]:
|
48
|
+
executor = ThreadPoolExecutor(max_workers=workers) if isinstance(workers, int) else workers
|
62
49
|
|
63
50
|
def on_fn(func: Callable[P, R]) -> Callable[P, Awaitable[R]]:
|
64
|
-
@
|
51
|
+
@wraps(func)
|
65
52
|
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
66
53
|
loop = asyncio.get_running_loop()
|
67
54
|
ctx = contextvars.copy_context()
|
68
|
-
fn_call =
|
55
|
+
fn_call = partial(ctx.run, func, *args, **kwargs)
|
69
56
|
return await loop.run_in_executor(executor, fn_call)
|
70
57
|
return wrapper
|
71
58
|
return on_fn
|
@@ -74,9 +61,27 @@ def async_limit(workers=default_workers) -> Callable[[Callable[P, Awaitable[R]]]
|
|
74
61
|
semaphore = asyncio.Semaphore(workers)
|
75
62
|
|
76
63
|
def on_fn(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[R]]:
|
77
|
-
@
|
64
|
+
@wraps(func)
|
78
65
|
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
79
66
|
async with semaphore:
|
80
67
|
return await func(*args, **kwargs)
|
81
68
|
return wrapper
|
82
69
|
return on_fn
|
70
|
+
|
71
|
+
active_mds = []
|
72
|
+
|
73
|
+
class measure_duration:
|
74
|
+
def __init__(self, name):
|
75
|
+
self.name = name
|
76
|
+
active_mds.append(self)
|
77
|
+
|
78
|
+
def __enter__(self):
|
79
|
+
self.start = time()
|
80
|
+
|
81
|
+
def __exit__(self, *_):
|
82
|
+
duration = round(time() - self.start, 4)
|
83
|
+
depth = len(active_mds) - 1
|
84
|
+
indent = "──" * depth + " " * (depth > 0)
|
85
|
+
text = f"{self.name}: {duration} seconds"
|
86
|
+
print(indent + text)
|
87
|
+
active_mds.remove(self)
|
relib/type_utils.py
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
__all__ = [
|
4
|
+
"as_any",
|
5
|
+
"ensure_tuple",
|
6
|
+
"non_none",
|
7
|
+
]
|
8
|
+
|
9
|
+
def as_any(obj: Any) -> Any:
|
10
|
+
return obj
|
11
|
+
|
12
|
+
def non_none[T](obj: T | None) -> T:
|
13
|
+
assert obj is not None
|
14
|
+
return obj
|
15
|
+
|
16
|
+
def ensure_tuple[T](value: T | tuple[T, ...]) -> tuple[T, ...]:
|
17
|
+
return value if isinstance(value, tuple) else (value,)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
relib/__init__.py,sha256=WerjUaM_sNvudjXFudLRtXB7viZWEW1RSinkDjrh4nE,163
|
2
|
+
relib/dict_utils.py,sha256=jqW6bYSaQMt2AC2KFzDJKyl88idyMttWxXDu3t-fA5I,2980
|
3
|
+
relib/io_utils.py,sha256=EtnIGQmLXjoHUPFteB5yPXDD3wGLvH4O3CahlCebXDQ,555
|
4
|
+
relib/iter_utils.py,sha256=5N5WVx2oZmZSxv8EnMbKeDKsAOlmPJkEARscwC4zToo,3854
|
5
|
+
relib/processing_utils.py,sha256=eMzjlxsEmfvtKafDITBWSp9D5RwegSWsUsvj1FpmBM0,1893
|
6
|
+
relib/runtime_tools.py,sha256=l7B-C3Dz8o3ffRXHC2ysyf59QfbDrzOd-KCxPgyohnE,2842
|
7
|
+
relib/type_utils.py,sha256=oY96cAAux1JwhXgWFFyqEv_f-wwyPc_Hm6I9Yeisu_M,323
|
8
|
+
relib-1.3.1.dist-info/METADATA,sha256=J_X6XPYfnT_MNGLel5k8UHThx2N7eV6YK9SzQ9xcs-M,1295
|
9
|
+
relib-1.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
10
|
+
relib-1.3.1.dist-info/licenses/LICENSE,sha256=9xVsdtv_-uSyY9Xl9yujwAPm4-mjcCLeVy-ljwXEWbo,1059
|
11
|
+
relib-1.3.1.dist-info/RECORD,,
|
relib/hashing.py
DELETED
@@ -1,179 +0,0 @@
|
|
1
|
-
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
|
2
|
-
# Copyright (c) 2009 Gael Varoquaux
|
3
|
-
# License: BSD Style, 3 clauses.
|
4
|
-
|
5
|
-
import pickle
|
6
|
-
import hashlib
|
7
|
-
import sys
|
8
|
-
import types
|
9
|
-
import io
|
10
|
-
import decimal
|
11
|
-
|
12
|
-
try:
|
13
|
-
import numpy
|
14
|
-
except:
|
15
|
-
has_numpy = False
|
16
|
-
else:
|
17
|
-
has_numpy = True
|
18
|
-
|
19
|
-
Pickler = pickle._Pickler
|
20
|
-
|
21
|
-
|
22
|
-
class _ConsistentSet(object):
|
23
|
-
def __init__(self, set_sequence):
|
24
|
-
try:
|
25
|
-
self._sequence = sorted(set_sequence)
|
26
|
-
except (TypeError, decimal.InvalidOperation):
|
27
|
-
self._sequence = sorted(map(hash_obj, set_sequence))
|
28
|
-
|
29
|
-
|
30
|
-
class _MyHash(object):
|
31
|
-
""" Class used to hash objects that won't normally pickle """
|
32
|
-
|
33
|
-
def __init__(self, *args):
|
34
|
-
self.args = args
|
35
|
-
|
36
|
-
|
37
|
-
class Hasher(Pickler):
|
38
|
-
""" A subclass of pickler, to do cryptographic hashing, rather than pickling. """
|
39
|
-
|
40
|
-
def __init__(self, hash_name="md5"):
|
41
|
-
self.stream = io.BytesIO()
|
42
|
-
# We want a pickle protocol that only changes with major Python versions
|
43
|
-
protocol = pickle.HIGHEST_PROTOCOL
|
44
|
-
Pickler.__init__(self, self.stream, protocol=protocol)
|
45
|
-
self._hash = hashlib.new(hash_name)
|
46
|
-
|
47
|
-
def hash(self, obj) -> str:
|
48
|
-
try:
|
49
|
-
self.dump(obj)
|
50
|
-
except pickle.PicklingError as e:
|
51
|
-
e.args += ("PicklingError while hashing %r: %r" % (obj, e),)
|
52
|
-
raise
|
53
|
-
dumps = self.stream.getvalue()
|
54
|
-
self._hash.update(dumps)
|
55
|
-
return self._hash.hexdigest()
|
56
|
-
|
57
|
-
def save(self, obj):
|
58
|
-
if isinstance(obj, (types.MethodType, type({}.pop))):
|
59
|
-
# the Pickler cannot pickle instance methods; here we decompose
|
60
|
-
# them into components that make them uniquely identifiable
|
61
|
-
if hasattr(obj, "__func__"):
|
62
|
-
func_name = obj.__func__.__name__
|
63
|
-
else:
|
64
|
-
func_name = obj.__name__
|
65
|
-
inst = obj.__self__
|
66
|
-
if type(inst) == type(pickle):
|
67
|
-
obj = _MyHash(func_name, inst.__name__)
|
68
|
-
elif inst is None:
|
69
|
-
# type(None) or type(module) do not pickle
|
70
|
-
obj = _MyHash(func_name, inst)
|
71
|
-
else:
|
72
|
-
cls = obj.__self__.__class__
|
73
|
-
obj = _MyHash(func_name, inst, cls)
|
74
|
-
Pickler.save(self, obj)
|
75
|
-
|
76
|
-
def memoize(self, obj):
|
77
|
-
# We want hashing to be sensitive to value instead of reference.
|
78
|
-
# For example we want ["aa", "aa"] and ["aa", "aaZ"[:2]]
|
79
|
-
# to hash to the same value and that's why we disable memoization
|
80
|
-
# for strings
|
81
|
-
if isinstance(obj, (bytes, str)):
|
82
|
-
return
|
83
|
-
Pickler.memoize(self, obj)
|
84
|
-
|
85
|
-
# The dispatch table of the pickler is not accessible in Python
|
86
|
-
# 3, as these lines are only bugware for IPython, we skip them.
|
87
|
-
def save_global(self, obj, name=None):
|
88
|
-
# We have to override this method in order to deal with objects
|
89
|
-
# defined interactively in IPython that are not injected in
|
90
|
-
# __main__
|
91
|
-
try:
|
92
|
-
Pickler.save_global(self, obj, name=name)
|
93
|
-
except pickle.PicklingError:
|
94
|
-
Pickler.save_global(self, obj, name=name)
|
95
|
-
module = getattr(obj, "__module__", None)
|
96
|
-
if module == "__main__":
|
97
|
-
my_name = name
|
98
|
-
if my_name is None:
|
99
|
-
my_name = obj.__name__
|
100
|
-
mod = sys.modules[module]
|
101
|
-
if not hasattr(mod, my_name):
|
102
|
-
# IPython doesn't inject the variables define
|
103
|
-
# interactively in __main__
|
104
|
-
setattr(mod, my_name, obj)
|
105
|
-
|
106
|
-
def _batch_setitems(self, items):
|
107
|
-
try:
|
108
|
-
Pickler._batch_setitems(self, iter(sorted(items)))
|
109
|
-
except TypeError:
|
110
|
-
Pickler._batch_setitems(self, iter(sorted((hash_obj(k), v) for k, v in items)))
|
111
|
-
|
112
|
-
def save_set(self, set_items):
|
113
|
-
Pickler.save(self, _ConsistentSet(set_items))
|
114
|
-
|
115
|
-
dispatch = Pickler.dispatch.copy()
|
116
|
-
dispatch[type(len)] = save_global # builtin
|
117
|
-
dispatch[type(object)] = save_global # type
|
118
|
-
dispatch[type(Pickler)] = save_global # classobj
|
119
|
-
dispatch[type(pickle.dump)] = save_global # function
|
120
|
-
dispatch[type(set())] = save_set
|
121
|
-
|
122
|
-
|
123
|
-
class NumpyHasher(Hasher):
|
124
|
-
def __init__(self, hash_name="md5"):
|
125
|
-
Hasher.__init__(self, hash_name=hash_name)
|
126
|
-
|
127
|
-
def save(self, obj):
|
128
|
-
""" Subclass the save method, to hash ndarray subclass, rather
|
129
|
-
than pickling them. Off course, this is a total abuse of
|
130
|
-
the Pickler class.
|
131
|
-
"""
|
132
|
-
import numpy as np
|
133
|
-
|
134
|
-
if isinstance(obj, np.ndarray) and not obj.dtype.hasobject:
|
135
|
-
# Compute a hash of the object
|
136
|
-
# The update function of the hash requires a c_contiguous buffer.
|
137
|
-
if obj.shape == ():
|
138
|
-
# 0d arrays need to be flattened because viewing them as bytes
|
139
|
-
# raises a ValueError exception.
|
140
|
-
obj_c_contiguous = obj.flatten()
|
141
|
-
elif obj.flags.c_contiguous:
|
142
|
-
obj_c_contiguous = obj
|
143
|
-
elif obj.flags.f_contiguous:
|
144
|
-
obj_c_contiguous = obj.T
|
145
|
-
else:
|
146
|
-
# Cater for non-single-segment arrays: this creates a
|
147
|
-
# copy, and thus aleviates this issue.
|
148
|
-
# XXX: There might be a more efficient way of doing this
|
149
|
-
obj_c_contiguous = obj.flatten()
|
150
|
-
|
151
|
-
# View the array as bytes to support dtypes like datetime64
|
152
|
-
self._hash.update(memoryview(obj_c_contiguous.view(np.uint8)))
|
153
|
-
|
154
|
-
# The object will be pickled by the pickler hashed at the end.
|
155
|
-
obj = (obj.__class__, ("HASHED", obj.dtype, obj.shape, obj.strides))
|
156
|
-
elif isinstance(obj, np.dtype):
|
157
|
-
# Atomic dtype objects are interned by their default constructor:
|
158
|
-
# np.dtype("f8") is np.dtype("f8")
|
159
|
-
# This interning is not maintained by a
|
160
|
-
# pickle.loads + pickle.dumps cycle, because __reduce__
|
161
|
-
# uses copy=True in the dtype constructor. This
|
162
|
-
# non-deterministic behavior causes the internal memoizer
|
163
|
-
# of the hasher to generate different hash values
|
164
|
-
# depending on the history of the dtype object.
|
165
|
-
# To prevent the hash from being sensitive to this, we use
|
166
|
-
# .descr which is a full (and never interned) description of
|
167
|
-
# the array dtype according to the numpy doc.
|
168
|
-
obj = (obj.__class__, ("HASHED", obj.descr))
|
169
|
-
|
170
|
-
Hasher.save(self, obj)
|
171
|
-
|
172
|
-
|
173
|
-
def hash_obj(obj, hash_name="md5") -> str:
|
174
|
-
if has_numpy:
|
175
|
-
return NumpyHasher(hash_name=hash_name).hash(obj)
|
176
|
-
else:
|
177
|
-
return Hasher(hash_name=hash_name).hash(obj)
|
178
|
-
|
179
|
-
hash = hash_obj
|
relib/measure_duration.py
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
from time import time
|
2
|
-
|
3
|
-
active_mds = []
|
4
|
-
|
5
|
-
class measure_duration:
|
6
|
-
def __init__(self, name):
|
7
|
-
self.name = name
|
8
|
-
active_mds.append(self)
|
9
|
-
|
10
|
-
def __enter__(self):
|
11
|
-
self.start = time()
|
12
|
-
|
13
|
-
def __exit__(self, *_):
|
14
|
-
duration = round(time() - self.start, 4)
|
15
|
-
depth = len(active_mds) - 1
|
16
|
-
indent = ('──' * depth) + (' ' * (depth > 0))
|
17
|
-
text = '{}: {} seconds'.format(self.name, duration)
|
18
|
-
print(indent + text)
|
19
|
-
active_mds.remove(self)
|
relib/utils.py
DELETED
@@ -1,306 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from itertools import chain
|
3
|
-
from typing import Any, Callable, Iterable, Literal, overload
|
4
|
-
|
5
|
-
__all__ = [
|
6
|
-
"noop",
|
7
|
-
"clamp",
|
8
|
-
"non_none",
|
9
|
-
"as_any",
|
10
|
-
"list_split",
|
11
|
-
"drop_none",
|
12
|
-
"distinct",
|
13
|
-
"dict_firsts",
|
14
|
-
"distinct_by",
|
15
|
-
"sort_by",
|
16
|
-
"first",
|
17
|
-
"move_value",
|
18
|
-
"transpose_dict",
|
19
|
-
"make_combinations_by_dict",
|
20
|
-
"merge_dicts",
|
21
|
-
"intersect",
|
22
|
-
"ensure_tuple",
|
23
|
-
"key_of",
|
24
|
-
"omit",
|
25
|
-
"pick",
|
26
|
-
"dict_by",
|
27
|
-
"tuple_by",
|
28
|
-
"flatten",
|
29
|
-
"transpose",
|
30
|
-
"map_dict",
|
31
|
-
"deepen_dict",
|
32
|
-
"flatten_dict_inner",
|
33
|
-
"flatten_dict",
|
34
|
-
"group",
|
35
|
-
"reversed_enumerate",
|
36
|
-
"get_at",
|
37
|
-
"for_each",
|
38
|
-
"sized_partitions",
|
39
|
-
"num_partitions",
|
40
|
-
"df_from_array",
|
41
|
-
"StrFilter",
|
42
|
-
"str_filterer",
|
43
|
-
]
|
44
|
-
|
45
|
-
def noop() -> None:
|
46
|
-
pass
|
47
|
-
|
48
|
-
@overload
|
49
|
-
def clamp(value: int, low: int, high: int) -> int: ...
|
50
|
-
@overload
|
51
|
-
def clamp(value: float, low: float, high: float) -> float: ...
|
52
|
-
def clamp(value: float, low: float, high: float) -> float:
|
53
|
-
return max(low, min(value, high))
|
54
|
-
|
55
|
-
def non_none[T](obj: T | None) -> T:
|
56
|
-
assert obj is not None
|
57
|
-
return obj
|
58
|
-
|
59
|
-
def as_any(obj: Any) -> Any:
|
60
|
-
return obj
|
61
|
-
|
62
|
-
def list_split[T](iterable: Iterable[T], sep: T) -> list[list[T]]:
|
63
|
-
values = [sep, *iterable, sep]
|
64
|
-
split_at = [i for i, x in enumerate(values) if x is sep]
|
65
|
-
ranges = list(zip(split_at[0:-1], split_at[1:]))
|
66
|
-
return [
|
67
|
-
values[start + 1:end]
|
68
|
-
for start, end in ranges
|
69
|
-
]
|
70
|
-
|
71
|
-
def drop_none[T](iterable: Iterable[T | None]) -> list[T]:
|
72
|
-
return [x for x in iterable if x is not None]
|
73
|
-
|
74
|
-
def distinct[T](iterable: Iterable[T]) -> list[T]:
|
75
|
-
return list(dict.fromkeys(iterable))
|
76
|
-
|
77
|
-
def dict_firsts[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, T]:
|
78
|
-
result: dict[K, T] = {}
|
79
|
-
for key, item in pairs:
|
80
|
-
if key not in result:
|
81
|
-
result[key] = item
|
82
|
-
return result
|
83
|
-
|
84
|
-
def distinct_by[T](pairs: Iterable[tuple[object, T]]) -> list[T]:
|
85
|
-
return list(dict_firsts(pairs).values())
|
86
|
-
|
87
|
-
def sort_by[T](pairs: Iterable[tuple[Any, T]]) -> list[T]:
|
88
|
-
pairs = sorted(pairs, key=lambda p: p[0])
|
89
|
-
return [v for _, v in pairs]
|
90
|
-
|
91
|
-
def first[T](iterable: Iterable[T]) -> T | None:
|
92
|
-
return next(iter(iterable), None)
|
93
|
-
|
94
|
-
def move_value[T](iterable: Iterable[T], from_i: int, to_i: int) -> list[T]:
|
95
|
-
values = list(iterable)
|
96
|
-
values.insert(to_i, values.pop(from_i))
|
97
|
-
return values
|
98
|
-
|
99
|
-
def transpose_dict(des):
|
100
|
-
if isinstance(des, list):
|
101
|
-
keys = list(des[0].keys()) if des else []
|
102
|
-
length = len(des)
|
103
|
-
return {
|
104
|
-
key: [des[i][key] for i in range(length)]
|
105
|
-
for key in keys
|
106
|
-
}
|
107
|
-
elif isinstance(des, dict):
|
108
|
-
keys = list(des.keys())
|
109
|
-
length = len(des[keys[0]]) if keys else 0
|
110
|
-
return [
|
111
|
-
{key: des[key][i] for key in keys}
|
112
|
-
for i in range(length)
|
113
|
-
]
|
114
|
-
raise ValueError("transpose_dict only accepts dict or list")
|
115
|
-
|
116
|
-
def make_combinations_by_dict(des, keys=None, pairs=[]):
|
117
|
-
keys = sorted(des.keys()) if keys is None else keys
|
118
|
-
if len(keys) == 0:
|
119
|
-
return [dict(pairs)]
|
120
|
-
key = keys[0]
|
121
|
-
remaining_keys = keys[1:]
|
122
|
-
new_pairs = [(key, val) for val in des[key]]
|
123
|
-
return flatten([
|
124
|
-
make_combinations_by_dict(des, remaining_keys, [pair] + pairs)
|
125
|
-
for pair in new_pairs
|
126
|
-
])
|
127
|
-
|
128
|
-
def merge_dicts[T, K](*dicts: dict[K, T]) -> dict[K, T]:
|
129
|
-
if len(dicts) == 1:
|
130
|
-
return dicts[0]
|
131
|
-
result = {}
|
132
|
-
for d in dicts:
|
133
|
-
result.update(d)
|
134
|
-
return result
|
135
|
-
|
136
|
-
def intersect[T](*iterables: Iterable[T]) -> list[T]:
|
137
|
-
return list(set.intersection(*map(set, iterables)))
|
138
|
-
|
139
|
-
def ensure_tuple[T](value: T | tuple[T, ...]) -> tuple[T, ...]:
|
140
|
-
return value if isinstance(value, tuple) else (value,)
|
141
|
-
|
142
|
-
def key_of[T, U](dicts: Iterable[dict[T, U]], key: T) -> list[U]:
|
143
|
-
return [d[key] for d in dicts]
|
144
|
-
|
145
|
-
def omit[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
146
|
-
if keys:
|
147
|
-
d = dict(d)
|
148
|
-
for key in keys:
|
149
|
-
del d[key]
|
150
|
-
return d
|
151
|
-
|
152
|
-
def pick[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
153
|
-
return {key: d[key] for key in keys}
|
154
|
-
|
155
|
-
def dict_by[T, K](keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
|
156
|
-
return dict(zip(keys, values))
|
157
|
-
|
158
|
-
def tuple_by[T, K](d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
|
159
|
-
return tuple(d[key] for key in keys)
|
160
|
-
|
161
|
-
@overload
|
162
|
-
def flatten[T](iterable: Iterable[T], depth: Literal[0]) -> list[T]: ...
|
163
|
-
@overload
|
164
|
-
def flatten[T](iterable: Iterable[Iterable[T]], depth: Literal[1] = 1) -> list[T]: ...
|
165
|
-
@overload
|
166
|
-
def flatten[T](iterable: Iterable[Iterable[Iterable[T]]], depth: Literal[2]) -> list[T]: ...
|
167
|
-
@overload
|
168
|
-
def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[T]]]], depth: Literal[3]) -> list[T]: ...
|
169
|
-
@overload
|
170
|
-
def flatten[T](iterable: Iterable[Iterable[Iterable[Iterable[Iterable[T]]]]], depth: Literal[4]) -> list[T]: ...
|
171
|
-
@overload
|
172
|
-
def flatten(iterable: Iterable, depth: int) -> list: ...
|
173
|
-
|
174
|
-
def flatten(iterable: Iterable, depth: int = 1) -> list:
|
175
|
-
for _ in range(depth):
|
176
|
-
iterable = chain.from_iterable(iterable)
|
177
|
-
return list(iterable)
|
178
|
-
|
179
|
-
@overload
|
180
|
-
def transpose[T1, T2](tuples: Iterable[tuple[T1, T2]], default_num_returns: int = 0) -> tuple[list[T1], list[T2]]: ...
|
181
|
-
@overload
|
182
|
-
def transpose[T1, T2, T3](tuples: Iterable[tuple[T1, T2, T3]], default_num_returns: int = 0) -> tuple[list[T1], list[T2], list[T3]]: ...
|
183
|
-
@overload
|
184
|
-
def transpose[T1, T2, T3, T4](tuples: Iterable[tuple[T1, T2, T3, T4]], default_num_returns: int = 0) -> tuple[list[T1], list[T2], list[T3], list[T4]]: ...
|
185
|
-
@overload
|
186
|
-
def transpose[T1, T2, T3, T4, T5](tuples: Iterable[tuple[T1, T2, T3, T4, T5]], default_num_returns: int = 0) -> tuple[list[T1], list[T2], list[T3], list[T4], list[T5]]: ...
|
187
|
-
@overload
|
188
|
-
def transpose(tuples: Iterable[tuple], default_num_returns: int = 0) -> tuple[list, ...]: ...
|
189
|
-
|
190
|
-
def transpose(tuples: Iterable[tuple], default_num_returns=0) -> tuple[list, ...]:
|
191
|
-
output = tuple(zip(*tuples))
|
192
|
-
if not output:
|
193
|
-
return ([],) * default_num_returns
|
194
|
-
return tuple(map(list, output))
|
195
|
-
|
196
|
-
def map_dict[T, U, K](fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
|
197
|
-
return {key: fn(value) for key, value in d.items()}
|
198
|
-
|
199
|
-
@overload
|
200
|
-
def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
|
201
|
-
@overload
|
202
|
-
def deepen_dict[K1, K2, U](d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
|
203
|
-
@overload
|
204
|
-
def deepen_dict[K1, K2, K3, U](d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
|
205
|
-
@overload
|
206
|
-
def deepen_dict[K1, K2, K3, K4, U](d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
|
207
|
-
@overload
|
208
|
-
def deepen_dict[K1, K2, K3, K4, K5, U](d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
|
209
|
-
@overload
|
210
|
-
def deepen_dict[K1, K2, K3, K4, K5, K6, U](d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
|
211
|
-
def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
|
212
|
-
output = {}
|
213
|
-
if () in d:
|
214
|
-
return d[()]
|
215
|
-
for (*tail, head), value in d.items():
|
216
|
-
curr = output
|
217
|
-
for key in tail:
|
218
|
-
curr = curr.setdefault(key, {})
|
219
|
-
curr[head] = value
|
220
|
-
return output
|
221
|
-
|
222
|
-
def flatten_dict_inner(d, prefix=()):
|
223
|
-
for key, value in d.items():
|
224
|
-
if not isinstance(value, dict) or value == {}:
|
225
|
-
yield prefix + (key,), value
|
226
|
-
else:
|
227
|
-
yield from flatten_dict_inner(value, prefix + (key,))
|
228
|
-
|
229
|
-
def flatten_dict(deep_dict: dict, prefix=()) -> dict:
|
230
|
-
return dict(flatten_dict_inner(deep_dict, prefix))
|
231
|
-
|
232
|
-
def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
|
233
|
-
values_by_key = {}
|
234
|
-
for key, value in pairs:
|
235
|
-
values_by_key.setdefault(key, []).append(value)
|
236
|
-
return values_by_key
|
237
|
-
|
238
|
-
def reversed_enumerate[T](values: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
|
239
|
-
return zip(reversed(range(len(values))), reversed(values))
|
240
|
-
|
241
|
-
def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
|
242
|
-
try:
|
243
|
-
for key in keys:
|
244
|
-
d = d[key]
|
245
|
-
except KeyError:
|
246
|
-
return default
|
247
|
-
return as_any(d)
|
248
|
-
|
249
|
-
def for_each[T](func: Callable[[T], Any], iterable: Iterable[T]) -> None:
|
250
|
-
for item in iterable:
|
251
|
-
func(item)
|
252
|
-
|
253
|
-
def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
|
254
|
-
# "chunk"
|
255
|
-
if not isinstance(values, list):
|
256
|
-
values = list(values)
|
257
|
-
num_parts = (len(values) / part_size).__ceil__()
|
258
|
-
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
259
|
-
|
260
|
-
def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
|
261
|
-
if not isinstance(values, list):
|
262
|
-
values = list(values)
|
263
|
-
part_size = (len(values) / num_parts).__ceil__()
|
264
|
-
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
265
|
-
|
266
|
-
def _cat_tile(cats, n_tile):
|
267
|
-
import numpy as np
|
268
|
-
return cats[np.tile(np.arange(len(cats)), n_tile)]
|
269
|
-
|
270
|
-
def df_from_array(
|
271
|
-
value_cols: dict[str, Any],
|
272
|
-
dim_labels: list[tuple[str, list[str | int | float]]],
|
273
|
-
indexed=False,
|
274
|
-
):
|
275
|
-
import numpy as np
|
276
|
-
import pandas as pd
|
277
|
-
dim_sizes = np.array([len(labels) for _, labels in dim_labels])
|
278
|
-
assert all(array.shape == tuple(dim_sizes) for array in value_cols.values())
|
279
|
-
array_offsets = [
|
280
|
-
(dim_sizes[i + 1:].prod(), dim_sizes[:i].prod())
|
281
|
-
for i in range(len(dim_sizes))
|
282
|
-
]
|
283
|
-
category_cols = {
|
284
|
-
dim: _cat_tile(pd.Categorical(labels).repeat(repeats), tiles)
|
285
|
-
for (dim, labels), (repeats, tiles) in zip(dim_labels, array_offsets)
|
286
|
-
}
|
287
|
-
value_cols = {name: array.reshape(-1) for name, array in value_cols.items()}
|
288
|
-
df = pd.DataFrame({**category_cols, **value_cols}, copy=False)
|
289
|
-
if indexed:
|
290
|
-
df = df.set_index([name for name, _ in dim_labels])
|
291
|
-
return df
|
292
|
-
|
293
|
-
StrFilter = Callable[[str], bool]
|
294
|
-
|
295
|
-
def str_filterer(
|
296
|
-
include_patterns: list[re.Pattern[str]] = [],
|
297
|
-
exclude_patterns: list[re.Pattern[str]] = [],
|
298
|
-
) -> StrFilter:
|
299
|
-
def str_filter(string: str) -> bool:
|
300
|
-
if any(pattern.search(string) for pattern in exclude_patterns):
|
301
|
-
return False
|
302
|
-
if not include_patterns:
|
303
|
-
return True
|
304
|
-
return any(pattern.search(string) for pattern in include_patterns)
|
305
|
-
|
306
|
-
return str_filter
|
relib-1.2.11.dist-info/RECORD
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
relib/__init__.py,sha256=4_nmex7mRhCwdtLF8k0XLbxxPs-UeN2sP-EEImm5JGs,126
|
2
|
-
relib/hashing.py,sha256=DB_fnkj0ls01FgZbf4nPFHl4EBU8X_0OrmDvty4HlRE,6020
|
3
|
-
relib/measure_duration.py,sha256=LCTo_D_qReNprD3fhtJ0daeWycS6xQE_cwxeg2_h0xo,456
|
4
|
-
relib/system.py,sha256=3RWmSweTCQtB1wzsgpUqcAsMo6TIhVRq2oSt28Ul_1E,2733
|
5
|
-
relib/utils.py,sha256=bwOJXfsNx5nZf1fZxWtT_hLwjTbWo2cP9qfsqca1aHI,9680
|
6
|
-
relib-1.2.11.dist-info/METADATA,sha256=WxEfPUA2Ox2t1ulJZFIXX5ynIOf1jw1u6iWbvtcx9HI,1296
|
7
|
-
relib-1.2.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
-
relib-1.2.11.dist-info/licenses/LICENSE,sha256=9xVsdtv_-uSyY9Xl9yujwAPm4-mjcCLeVy-ljwXEWbo,1059
|
9
|
-
relib-1.2.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|