relib 1.3.0__tar.gz → 1.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {relib-1.3.0 → relib-1.3.2}/PKG-INFO +1 -1
- {relib-1.3.0 → relib-1.3.2}/pyproject.toml +1 -1
- {relib-1.3.0 → relib-1.3.2}/relib/dict_utils.py +12 -13
- {relib-1.3.0 → relib-1.3.2}/relib/iter_utils.py +79 -25
- {relib-1.3.0 → relib-1.3.2}/relib/runtime_tools.py +13 -10
- {relib-1.3.0 → relib-1.3.2}/uv.lock +1 -1
- {relib-1.3.0 → relib-1.3.2}/.gitignore +0 -0
- {relib-1.3.0 → relib-1.3.2}/.python-version +0 -0
- {relib-1.3.0 → relib-1.3.2}/LICENSE +0 -0
- {relib-1.3.0 → relib-1.3.2}/README.md +0 -0
- {relib-1.3.0 → relib-1.3.2}/relib/__init__.py +0 -0
- {relib-1.3.0 → relib-1.3.2}/relib/io_utils.py +0 -0
- {relib-1.3.0 → relib-1.3.2}/relib/processing_utils.py +0 -0
- {relib-1.3.0 → relib-1.3.2}/relib/type_utils.py +0 -0
@@ -52,9 +52,8 @@ def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
|
|
52
52
|
|
53
53
|
def dict_firsts[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, T]:
|
54
54
|
result: dict[K, T] = {}
|
55
|
-
for key,
|
56
|
-
|
57
|
-
result[key] = item
|
55
|
+
for key, value in pairs:
|
56
|
+
result.setdefault(key, value)
|
58
57
|
return result
|
59
58
|
|
60
59
|
def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
|
@@ -63,6 +62,16 @@ def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
|
|
63
62
|
values_by_key.setdefault(key, []).append(value)
|
64
63
|
return values_by_key
|
65
64
|
|
65
|
+
def flatten_dict_inner(d, prefix=()):
|
66
|
+
for key, value in d.items():
|
67
|
+
if not isinstance(value, dict) or value == {}:
|
68
|
+
yield prefix + (key,), value
|
69
|
+
else:
|
70
|
+
yield from flatten_dict_inner(value, prefix + (key,))
|
71
|
+
|
72
|
+
def flatten_dict(deep_dict: dict, prefix=()) -> dict:
|
73
|
+
return dict(flatten_dict_inner(deep_dict, prefix))
|
74
|
+
|
66
75
|
@overload
|
67
76
|
def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
|
68
77
|
@overload
|
@@ -85,13 +94,3 @@ def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
|
|
85
94
|
curr = curr.setdefault(key, {})
|
86
95
|
curr[head] = value
|
87
96
|
return output
|
88
|
-
|
89
|
-
def flatten_dict_inner(d, prefix=()):
|
90
|
-
for key, value in d.items():
|
91
|
-
if not isinstance(value, dict) or value == {}:
|
92
|
-
yield prefix + (key,), value
|
93
|
-
else:
|
94
|
-
yield from flatten_dict_inner(value, prefix + (key,))
|
95
|
-
|
96
|
-
def flatten_dict(deep_dict: dict, prefix=()) -> dict:
|
97
|
-
return dict(flatten_dict_inner(deep_dict, prefix))
|
@@ -1,16 +1,17 @@
|
|
1
|
-
from
|
2
|
-
from
|
1
|
+
from contextlib import contextmanager
|
2
|
+
from itertools import chain, islice
|
3
|
+
from typing import Any, Iterable, Literal, Self, overload
|
3
4
|
from .dict_utils import dict_firsts
|
4
5
|
|
5
6
|
__all__ = [
|
7
|
+
"chunked",
|
6
8
|
"distinct_by", "distinct", "drop_none",
|
7
9
|
"first", "flatten",
|
8
|
-
"intersect",
|
10
|
+
"interleave", "intersect",
|
9
11
|
"list_split",
|
10
12
|
"move_value",
|
11
|
-
"num_partitions",
|
12
13
|
"reversed_enumerate",
|
13
|
-
"
|
14
|
+
"seekable", "sort_by",
|
14
15
|
"transpose",
|
15
16
|
]
|
16
17
|
|
@@ -36,11 +37,83 @@ def move_value[T](iterable: Iterable[T], from_i: int, to_i: int) -> list[T]:
|
|
36
37
|
return values
|
37
38
|
|
38
39
|
def reversed_enumerate[T](values: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
|
39
|
-
return zip(
|
40
|
+
return zip(range(len(values))[::-1], reversed(values))
|
40
41
|
|
41
42
|
def intersect[T](*iterables: Iterable[T]) -> list[T]:
|
42
43
|
return list(set.intersection(*map(set, iterables)))
|
43
44
|
|
45
|
+
def interleave[T](*iterables: Iterable[T]) -> list[T]:
|
46
|
+
return flatten(zip(*iterables))
|
47
|
+
|
48
|
+
def list_split[T](iterable: Iterable[T], sep: T) -> list[list[T]]:
|
49
|
+
values = [sep, *iterable, sep]
|
50
|
+
split_at = [i for i, x in enumerate(values) if x is sep]
|
51
|
+
ranges = list(zip(split_at[0:-1], split_at[1:]))
|
52
|
+
return [values[start + 1:end] for start, end in ranges]
|
53
|
+
|
54
|
+
class seekable[T]:
|
55
|
+
def __init__(self, iterable: Iterable[T]):
|
56
|
+
self.index = 0
|
57
|
+
self.source = iter(iterable)
|
58
|
+
self.sink: list[T] = []
|
59
|
+
|
60
|
+
def __iter__(self):
|
61
|
+
return self
|
62
|
+
|
63
|
+
def __next__(self) -> T:
|
64
|
+
if len(self.sink) > self.index:
|
65
|
+
item = self.sink[self.index]
|
66
|
+
else:
|
67
|
+
item = next(self.source)
|
68
|
+
self.sink.append(item)
|
69
|
+
self.index += 1
|
70
|
+
return item
|
71
|
+
|
72
|
+
def __bool__(self):
|
73
|
+
return bool(self.lookahead(1))
|
74
|
+
|
75
|
+
def clear(self):
|
76
|
+
self.sink[:self.index] = []
|
77
|
+
self.index = 0
|
78
|
+
|
79
|
+
def seek(self, index: int) -> Self:
|
80
|
+
remainder = index - len(self.sink)
|
81
|
+
if remainder > 0:
|
82
|
+
next(islice(self, remainder, remainder), None)
|
83
|
+
self.index = max(0, min(index, len(self.sink)))
|
84
|
+
return self
|
85
|
+
|
86
|
+
def step(self, count: int) -> Self:
|
87
|
+
return self.seek(self.index + count)
|
88
|
+
|
89
|
+
@contextmanager
|
90
|
+
def freeze(self):
|
91
|
+
def commit(offset: int = 0):
|
92
|
+
nonlocal initial_index
|
93
|
+
initial_index = self.index + offset
|
94
|
+
initial_index = self.index
|
95
|
+
try:
|
96
|
+
yield commit
|
97
|
+
finally:
|
98
|
+
self.seek(initial_index)
|
99
|
+
|
100
|
+
def lookahead(self, count: int) -> list[T]:
|
101
|
+
with self.freeze():
|
102
|
+
return list(islice(self, count))
|
103
|
+
|
104
|
+
@overload
|
105
|
+
def chunked[T](values: Iterable[T], *, num_chunks: int, chunk_size=None) -> list[list[T]]: ...
|
106
|
+
@overload
|
107
|
+
def chunked[T](values: Iterable[T], *, num_chunks=None, chunk_size: int) -> list[list[T]]: ...
|
108
|
+
def chunked(values, *, num_chunks=None, chunk_size=None):
|
109
|
+
values = values if isinstance(values, list) else list(values)
|
110
|
+
if isinstance(num_chunks, int):
|
111
|
+
chunk_size = (len(values) / num_chunks).__ceil__()
|
112
|
+
elif isinstance(chunk_size, int):
|
113
|
+
num_chunks = (len(values) / chunk_size).__ceil__()
|
114
|
+
assert isinstance(num_chunks, int) and isinstance(chunk_size, int)
|
115
|
+
return [values[i * chunk_size:(i + 1) * chunk_size] for i in range(num_chunks)]
|
116
|
+
|
44
117
|
@overload
|
45
118
|
def flatten[T](iterable: Iterable[T], depth: Literal[0]) -> list[T]: ...
|
46
119
|
@overload
|
@@ -58,25 +131,6 @@ def flatten(iterable: Iterable, depth: int = 1) -> list:
|
|
58
131
|
iterable = chain.from_iterable(iterable)
|
59
132
|
return list(iterable)
|
60
133
|
|
61
|
-
def list_split[T](iterable: Iterable[T], sep: T) -> list[list[T]]:
|
62
|
-
values = [sep, *iterable, sep]
|
63
|
-
split_at = [i for i, x in enumerate(values) if x is sep]
|
64
|
-
ranges = list(zip(split_at[0:-1], split_at[1:]))
|
65
|
-
return [values[start + 1:end] for start, end in ranges]
|
66
|
-
|
67
|
-
def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
|
68
|
-
# "chunk"
|
69
|
-
if not isinstance(values, list):
|
70
|
-
values = list(values)
|
71
|
-
num_parts = (len(values) / part_size).__ceil__()
|
72
|
-
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
73
|
-
|
74
|
-
def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
|
75
|
-
if not isinstance(values, list):
|
76
|
-
values = list(values)
|
77
|
-
part_size = (len(values) / num_parts).__ceil__()
|
78
|
-
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
79
|
-
|
80
134
|
@overload
|
81
135
|
def transpose[T1, T2](tuples: Iterable[tuple[T1, T2]], default_num_returns=0) -> tuple[list[T1], list[T2]]: ...
|
82
136
|
@overload
|
@@ -4,20 +4,23 @@ import os
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor
|
5
5
|
from functools import partial, wraps
|
6
6
|
from time import time
|
7
|
-
from typing import
|
7
|
+
from typing import Callable, Coroutine, Iterable, ParamSpec, TypeVar
|
8
8
|
from .processing_utils import noop
|
9
9
|
|
10
10
|
__all__ = [
|
11
11
|
"as_async", "async_limit",
|
12
12
|
"clear_console", "console_link",
|
13
|
+
"default_executor", "default_workers",
|
13
14
|
"roll_tasks",
|
14
15
|
"measure_duration",
|
15
16
|
]
|
16
17
|
|
17
18
|
P = ParamSpec("P")
|
18
19
|
R = TypeVar("R")
|
20
|
+
Coro = Coroutine[object, object, R]
|
19
21
|
|
20
22
|
default_workers = min(32, (os.cpu_count() or 1) + 4)
|
23
|
+
default_executor = ThreadPoolExecutor(max_workers=default_workers)
|
21
24
|
|
22
25
|
def clear_console() -> None:
|
23
26
|
os.system("cls" if os.name == "nt" else "clear")
|
@@ -25,13 +28,13 @@ def clear_console() -> None:
|
|
25
28
|
def console_link(text: str, url: str) -> str:
|
26
29
|
return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
|
27
30
|
|
28
|
-
async def worker[T](task:
|
31
|
+
async def worker[T](task: Coro[T], semaphore: asyncio.Semaphore, update=noop) -> T:
|
29
32
|
async with semaphore:
|
30
33
|
result = await task
|
31
34
|
update()
|
32
35
|
return result
|
33
36
|
|
34
|
-
async def roll_tasks[T](tasks: Iterable[
|
37
|
+
async def roll_tasks[T](tasks: Iterable[Coro[T]], workers=default_workers, progress=False) -> list[T]:
|
35
38
|
semaphore = asyncio.Semaphore(workers)
|
36
39
|
if not progress:
|
37
40
|
return await asyncio.gather(*[worker(task, semaphore) for task in tasks])
|
@@ -42,10 +45,10 @@ async def roll_tasks[T](tasks: Iterable[Awaitable[T]], workers=default_workers,
|
|
42
45
|
update = partial(pbar.update, 1)
|
43
46
|
return await asyncio.gather(*[worker(task, semaphore, update) for task in tasks])
|
44
47
|
|
45
|
-
def as_async(workers=
|
46
|
-
executor = ThreadPoolExecutor(max_workers=workers)
|
48
|
+
def as_async(workers: int | ThreadPoolExecutor = default_executor) -> Callable[[Callable[P, R]], Callable[P, Coro[R]]]:
|
49
|
+
executor = ThreadPoolExecutor(max_workers=workers) if isinstance(workers, int) else workers
|
47
50
|
|
48
|
-
def on_fn(func: Callable[P, R]) -> Callable[P,
|
51
|
+
def on_fn(func: Callable[P, R]) -> Callable[P, Coro[R]]:
|
49
52
|
@wraps(func)
|
50
53
|
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
51
54
|
loop = asyncio.get_running_loop()
|
@@ -55,10 +58,10 @@ def as_async(workers=default_workers) -> Callable[[Callable[P, R]], Callable[P,
|
|
55
58
|
return wrapper
|
56
59
|
return on_fn
|
57
60
|
|
58
|
-
def async_limit(workers=default_workers) -> Callable[[Callable[P,
|
61
|
+
def async_limit(workers=default_workers) -> Callable[[Callable[P, Coro[R]]], Callable[P, Coro[R]]]:
|
59
62
|
semaphore = asyncio.Semaphore(workers)
|
60
63
|
|
61
|
-
def on_fn(func: Callable[P,
|
64
|
+
def on_fn(func: Callable[P, Coro[R]]) -> Callable[P, Coro[R]]:
|
62
65
|
@wraps(func)
|
63
66
|
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
64
67
|
async with semaphore:
|
@@ -79,7 +82,7 @@ class measure_duration:
|
|
79
82
|
def __exit__(self, *_):
|
80
83
|
duration = round(time() - self.start, 4)
|
81
84
|
depth = len(active_mds) - 1
|
82
|
-
indent =
|
83
|
-
text =
|
85
|
+
indent = "──" * depth + " " * (depth > 0)
|
86
|
+
text = f"{self.name}: {duration} seconds"
|
84
87
|
print(indent + text)
|
85
88
|
active_mds.remove(self)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|