lkj 0.1.32__tar.gz → 0.1.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lkj-0.1.32 → lkj-0.1.34}/PKG-INFO +1 -1
- {lkj-0.1.32 → lkj-0.1.34}/lkj/__init__.py +1 -19
- lkj-0.1.34/lkj/chunking.py +112 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/strings.py +24 -12
- {lkj-0.1.32 → lkj-0.1.34}/lkj.egg-info/PKG-INFO +1 -1
- {lkj-0.1.32 → lkj-0.1.34}/lkj.egg-info/SOURCES.txt +1 -0
- {lkj-0.1.32 → lkj-0.1.34}/setup.cfg +1 -1
- {lkj-0.1.32 → lkj-0.1.34}/LICENSE +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/README.md +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/dicts.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/filesys.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/funcs.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/importing.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/iterables.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/loggers.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj/misc.py +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj.egg-info/dependency_links.txt +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj.egg-info/not-zip-safe +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/lkj.egg-info/top_level.txt +0 -0
- {lkj-0.1.32 → lkj-0.1.34}/setup.py +0 -0
|
@@ -31,30 +31,12 @@ from lkj.loggers import (
|
|
|
31
31
|
wrapped_print,
|
|
32
32
|
)
|
|
33
33
|
from lkj.importing import import_object, register_namespace_forwarding
|
|
34
|
+
from lkj.chunking import chunk_iterable, chunker
|
|
34
35
|
from lkj.misc import identity, value_in_interval
|
|
35
36
|
|
|
36
37
|
ddir = lambda obj: list(filter(lambda x: not x.startswith("_"), dir(obj)))
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
def chunker(a, chk_size, *, include_tail=True):
|
|
40
|
-
"""Chunks an iterable into non-overlapping chunks of size chk_size.
|
|
41
|
-
|
|
42
|
-
>>> list(chunker(range(8), 3))
|
|
43
|
-
[(0, 1, 2), (3, 4, 5), (6, 7)]
|
|
44
|
-
>>> list(chunker(range(8), 3, include_tail=False))
|
|
45
|
-
[(0, 1, 2), (3, 4, 5)]
|
|
46
|
-
"""
|
|
47
|
-
from itertools import zip_longest
|
|
48
|
-
|
|
49
|
-
it = iter(a)
|
|
50
|
-
if include_tail:
|
|
51
|
-
sentinel = object()
|
|
52
|
-
for chunk in zip_longest(*([it] * chk_size), fillvalue=sentinel):
|
|
53
|
-
yield tuple(item for item in chunk if item is not sentinel)
|
|
54
|
-
else:
|
|
55
|
-
yield from zip(*([it] * chk_size))
|
|
56
|
-
|
|
57
|
-
|
|
58
40
|
def user_machine_id():
|
|
59
41
|
"""Get an ID for the current computer/user that calls this function."""
|
|
60
42
|
return __import__("platform").node()
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Tools for chunking (segumentation, batching, slicing, etc.)"""
|
|
2
|
+
|
|
3
|
+
from itertools import zip_longest, chain, islice
|
|
4
|
+
|
|
5
|
+
from typing import (
|
|
6
|
+
Iterable,
|
|
7
|
+
Union,
|
|
8
|
+
Dict,
|
|
9
|
+
List,
|
|
10
|
+
Tuple,
|
|
11
|
+
Mapping,
|
|
12
|
+
TypeVar,
|
|
13
|
+
Iterator,
|
|
14
|
+
Callable,
|
|
15
|
+
Optional,
|
|
16
|
+
T,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
KT = TypeVar("KT") # there's a typing.KT, but pylance won't allow me to use it!
|
|
20
|
+
VT = TypeVar("VT") # there's a typing.VT, but pylance won't allow me to use it!
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def chunk_iterable(
|
|
24
|
+
iterable: Union[Iterable[T], Mapping[KT, VT]],
|
|
25
|
+
chk_size: int,
|
|
26
|
+
*,
|
|
27
|
+
chunk_type: Optional[Callable[..., Union[Iterable[T], Mapping[KT, VT]]]] = None,
|
|
28
|
+
) -> Iterator[Union[List[T], Tuple[T, ...], Dict[KT, VT]]]:
|
|
29
|
+
"""
|
|
30
|
+
Divide an iterable into chunks/batches of a specific size.
|
|
31
|
+
|
|
32
|
+
Handles both mappings (e.g. dicts) and non-mappings (lists, tuples, sets...)
|
|
33
|
+
as you probably expect it to (if you give a dict input, it will chunk on the
|
|
34
|
+
(key, value) items and return dicts of these).
|
|
35
|
+
Thought note that you always can control the type of the chunks with the
|
|
36
|
+
`chunk_type` argument.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
iterable: The iterable or mapping to divide.
|
|
40
|
+
chk_size: The size of each chunk.
|
|
41
|
+
chunk_type: The type of the chunks (list, tuple, set, dict...).
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
An iterator of dicts if the input is a Mapping, otherwise an iterator
|
|
45
|
+
of collections (list, tuple, set...).
|
|
46
|
+
|
|
47
|
+
Examples:
|
|
48
|
+
>>> list(chunk_iterable([1, 2, 3, 4, 5], 2))
|
|
49
|
+
[[1, 2], [3, 4], [5]]
|
|
50
|
+
|
|
51
|
+
>>> list(chunk_iterable((1, 2, 3, 4, 5), 3, chunk_type=tuple))
|
|
52
|
+
[(1, 2, 3), (4, 5)]
|
|
53
|
+
|
|
54
|
+
>>> list(chunk_iterable({"a": 1, "b": 2, "c": 3}, 2))
|
|
55
|
+
[{'a': 1, 'b': 2}, {'c': 3}]
|
|
56
|
+
|
|
57
|
+
>>> list(chunk_iterable({"x": 1, "y": 2, "z": 3}, 1, chunk_type=dict))
|
|
58
|
+
[{'x': 1}, {'y': 2}, {'z': 3}]
|
|
59
|
+
"""
|
|
60
|
+
if isinstance(iterable, Mapping):
|
|
61
|
+
if chunk_type is None:
|
|
62
|
+
chunk_type = dict
|
|
63
|
+
it = iter(iterable.items())
|
|
64
|
+
for first in it:
|
|
65
|
+
yield {
|
|
66
|
+
key: value for key, value in chain([first], islice(it, chk_size - 1))
|
|
67
|
+
}
|
|
68
|
+
else:
|
|
69
|
+
if chunk_type is None:
|
|
70
|
+
if isinstance(iterable, (list, tuple, set)):
|
|
71
|
+
chunk_type = type(iterable)
|
|
72
|
+
else:
|
|
73
|
+
chunk_type = list
|
|
74
|
+
it = iter(iterable)
|
|
75
|
+
for first in it:
|
|
76
|
+
yield chunk_type(chain([first], islice(it, chk_size - 1)))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def chunker(
|
|
80
|
+
a: Iterable[T], chk_size: int, *, include_tail: bool = True
|
|
81
|
+
) -> Iterator[Tuple[T, ...]]:
|
|
82
|
+
"""
|
|
83
|
+
Chunks an iterable into non-overlapping chunks of size `chk_size`.
|
|
84
|
+
|
|
85
|
+
Note: This chunker is simpler, but also less efficient than `chunk_iterable`.
|
|
86
|
+
It does have the extra `include_tail` argument, though.
|
|
87
|
+
Though note that you can get the effect of `include_tail=False` in `chunk_iterable`
|
|
88
|
+
by using `filter(lambda x: len(x) == chk_size, chunk_iterable(...))`.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
a: The iterable to be chunked.
|
|
92
|
+
chk_size: The size of each chunk.
|
|
93
|
+
include_tail: If True, includes the remaining elements as the last chunk
|
|
94
|
+
even if they are fewer than `chk_size`. Defaults to True.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
An iterator of tuples, where each tuple is a chunk of size `chk_size`
|
|
98
|
+
(or fewer elements if `include_tail` is True).
|
|
99
|
+
|
|
100
|
+
Examples:
|
|
101
|
+
>>> list(chunker(range(8), 3))
|
|
102
|
+
[(0, 1, 2), (3, 4, 5), (6, 7)]
|
|
103
|
+
>>> list(chunker(range(8), 3, include_tail=False))
|
|
104
|
+
[(0, 1, 2), (3, 4, 5)]
|
|
105
|
+
"""
|
|
106
|
+
it = iter(a)
|
|
107
|
+
if include_tail:
|
|
108
|
+
sentinel = object()
|
|
109
|
+
for chunk in zip_longest(*([it] * chk_size), fillvalue=sentinel):
|
|
110
|
+
yield tuple(item for item in chunk if item is not sentinel)
|
|
111
|
+
else:
|
|
112
|
+
yield from zip(*([it] * chk_size))
|
|
@@ -201,29 +201,41 @@ def regex_based_substitution(replacements: dict, regex=None, s: str = None):
|
|
|
201
201
|
'I like orange and grapes.'
|
|
202
202
|
|
|
203
203
|
You have access to the ``replacements`` and ``regex`` attributes of the
|
|
204
|
-
``substitute`` function
|
|
204
|
+
``substitute`` function. See how the replacements dict has been ordered by
|
|
205
|
+
descending length of keys. This is to ensure that longer keys are replaced
|
|
206
|
+
before shorter keys, avoiding partial replacements.
|
|
205
207
|
|
|
206
208
|
>>> substitute.replacements
|
|
207
|
-
{'
|
|
209
|
+
{'banana': 'grape', 'apple': 'orange'}
|
|
208
210
|
|
|
209
211
|
"""
|
|
210
212
|
import re
|
|
211
213
|
from functools import partial
|
|
212
214
|
|
|
213
215
|
if regex is None and s is None:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
regex
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
216
|
+
# Sort keys by length while maintaining value alignment
|
|
217
|
+
sorted_replacements = sorted(
|
|
218
|
+
replacements.items(), key=lambda x: len(x[0]), reverse=True
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Create regex pattern from sorted keys (without escaping to allow regex)
|
|
222
|
+
sorted_keys = [pair[0] for pair in sorted_replacements]
|
|
223
|
+
sorted_values = [pair[1] for pair in sorted_replacements]
|
|
224
|
+
regex = re.compile("|".join(sorted_keys))
|
|
225
|
+
|
|
226
|
+
# Prepare the substitution function with aligned replacements
|
|
227
|
+
aligned_replacements = dict(zip(sorted_keys, sorted_values))
|
|
228
|
+
substitute = partial(regex_based_substitution, aligned_replacements, regex)
|
|
229
|
+
substitute.replacements = aligned_replacements
|
|
223
230
|
substitute.regex = regex
|
|
224
231
|
return substitute
|
|
225
|
-
|
|
232
|
+
elif s is not None:
|
|
233
|
+
# Perform substitution using the compiled regex and aligned replacements
|
|
226
234
|
return regex.sub(lambda m: replacements[m.group(0)], s)
|
|
235
|
+
else:
|
|
236
|
+
raise ValueError(
|
|
237
|
+
"Invalid usage: provide either `s` or let the function construct itself."
|
|
238
|
+
)
|
|
227
239
|
|
|
228
240
|
|
|
229
241
|
from typing import Callable, Iterable, Sequence
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|