lkj 0.1.32__tar.gz → 0.1.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lkj
3
- Version: 0.1.32
3
+ Version: 0.1.34
4
4
  Summary: A dump of homeless useful utils
5
5
  Home-page: https://github.com/thorwhalen/lkj
6
6
  Author: Thor Whalen
@@ -31,30 +31,12 @@ from lkj.loggers import (
31
31
  wrapped_print,
32
32
  )
33
33
  from lkj.importing import import_object, register_namespace_forwarding
34
+ from lkj.chunking import chunk_iterable, chunker
34
35
  from lkj.misc import identity, value_in_interval
35
36
 
36
37
  ddir = lambda obj: list(filter(lambda x: not x.startswith("_"), dir(obj)))
37
38
 
38
39
 
39
- def chunker(a, chk_size, *, include_tail=True):
40
- """Chunks an iterable into non-overlapping chunks of size chk_size.
41
-
42
- >>> list(chunker(range(8), 3))
43
- [(0, 1, 2), (3, 4, 5), (6, 7)]
44
- >>> list(chunker(range(8), 3, include_tail=False))
45
- [(0, 1, 2), (3, 4, 5)]
46
- """
47
- from itertools import zip_longest
48
-
49
- it = iter(a)
50
- if include_tail:
51
- sentinel = object()
52
- for chunk in zip_longest(*([it] * chk_size), fillvalue=sentinel):
53
- yield tuple(item for item in chunk if item is not sentinel)
54
- else:
55
- yield from zip(*([it] * chk_size))
56
-
57
-
58
40
  def user_machine_id():
59
41
  """Get an ID for the current computer/user that calls this function."""
60
42
  return __import__("platform").node()
@@ -0,0 +1,112 @@
1
+ """Tools for chunking (segumentation, batching, slicing, etc.)"""
2
+
3
+ from itertools import zip_longest, chain, islice
4
+
5
+ from typing import (
6
+ Iterable,
7
+ Union,
8
+ Dict,
9
+ List,
10
+ Tuple,
11
+ Mapping,
12
+ TypeVar,
13
+ Iterator,
14
+ Callable,
15
+ Optional,
16
+ T,
17
+ )
18
+
19
+ KT = TypeVar("KT") # there's a typing.KT, but pylance won't allow me to use it!
20
+ VT = TypeVar("VT") # there's a typing.VT, but pylance won't allow me to use it!
21
+
22
+
23
+ def chunk_iterable(
24
+ iterable: Union[Iterable[T], Mapping[KT, VT]],
25
+ chk_size: int,
26
+ *,
27
+ chunk_type: Optional[Callable[..., Union[Iterable[T], Mapping[KT, VT]]]] = None,
28
+ ) -> Iterator[Union[List[T], Tuple[T, ...], Dict[KT, VT]]]:
29
+ """
30
+ Divide an iterable into chunks/batches of a specific size.
31
+
32
+ Handles both mappings (e.g. dicts) and non-mappings (lists, tuples, sets...)
33
+ as you probably expect it to (if you give a dict input, it will chunk on the
34
+ (key, value) items and return dicts of these).
35
+ Thought note that you always can control the type of the chunks with the
36
+ `chunk_type` argument.
37
+
38
+ Args:
39
+ iterable: The iterable or mapping to divide.
40
+ chk_size: The size of each chunk.
41
+ chunk_type: The type of the chunks (list, tuple, set, dict...).
42
+
43
+ Returns:
44
+ An iterator of dicts if the input is a Mapping, otherwise an iterator
45
+ of collections (list, tuple, set...).
46
+
47
+ Examples:
48
+ >>> list(chunk_iterable([1, 2, 3, 4, 5], 2))
49
+ [[1, 2], [3, 4], [5]]
50
+
51
+ >>> list(chunk_iterable((1, 2, 3, 4, 5), 3, chunk_type=tuple))
52
+ [(1, 2, 3), (4, 5)]
53
+
54
+ >>> list(chunk_iterable({"a": 1, "b": 2, "c": 3}, 2))
55
+ [{'a': 1, 'b': 2}, {'c': 3}]
56
+
57
+ >>> list(chunk_iterable({"x": 1, "y": 2, "z": 3}, 1, chunk_type=dict))
58
+ [{'x': 1}, {'y': 2}, {'z': 3}]
59
+ """
60
+ if isinstance(iterable, Mapping):
61
+ if chunk_type is None:
62
+ chunk_type = dict
63
+ it = iter(iterable.items())
64
+ for first in it:
65
+ yield {
66
+ key: value for key, value in chain([first], islice(it, chk_size - 1))
67
+ }
68
+ else:
69
+ if chunk_type is None:
70
+ if isinstance(iterable, (list, tuple, set)):
71
+ chunk_type = type(iterable)
72
+ else:
73
+ chunk_type = list
74
+ it = iter(iterable)
75
+ for first in it:
76
+ yield chunk_type(chain([first], islice(it, chk_size - 1)))
77
+
78
+
79
+ def chunker(
80
+ a: Iterable[T], chk_size: int, *, include_tail: bool = True
81
+ ) -> Iterator[Tuple[T, ...]]:
82
+ """
83
+ Chunks an iterable into non-overlapping chunks of size `chk_size`.
84
+
85
+ Note: This chunker is simpler, but also less efficient than `chunk_iterable`.
86
+ It does have the extra `include_tail` argument, though.
87
+ Though note that you can get the effect of `include_tail=False` in `chunk_iterable`
88
+ by using `filter(lambda x: len(x) == chk_size, chunk_iterable(...))`.
89
+
90
+ Args:
91
+ a: The iterable to be chunked.
92
+ chk_size: The size of each chunk.
93
+ include_tail: If True, includes the remaining elements as the last chunk
94
+ even if they are fewer than `chk_size`. Defaults to True.
95
+
96
+ Returns:
97
+ An iterator of tuples, where each tuple is a chunk of size `chk_size`
98
+ (or fewer elements if `include_tail` is True).
99
+
100
+ Examples:
101
+ >>> list(chunker(range(8), 3))
102
+ [(0, 1, 2), (3, 4, 5), (6, 7)]
103
+ >>> list(chunker(range(8), 3, include_tail=False))
104
+ [(0, 1, 2), (3, 4, 5)]
105
+ """
106
+ it = iter(a)
107
+ if include_tail:
108
+ sentinel = object()
109
+ for chunk in zip_longest(*([it] * chk_size), fillvalue=sentinel):
110
+ yield tuple(item for item in chunk if item is not sentinel)
111
+ else:
112
+ yield from zip(*([it] * chk_size))
@@ -201,29 +201,41 @@ def regex_based_substitution(replacements: dict, regex=None, s: str = None):
201
201
  'I like orange and grapes.'
202
202
 
203
203
  You have access to the ``replacements`` and ``regex`` attributes of the
204
- ``substitute`` function:
204
+ ``substitute`` function. See how the replacements dict has been ordered by
205
+ descending length of keys. This is to ensure that longer keys are replaced
206
+ before shorter keys, avoiding partial replacements.
205
207
 
206
208
  >>> substitute.replacements
207
- {'apple': 'orange', 'banana': 'grape'}
209
+ {'banana': 'grape', 'apple': 'orange'}
208
210
 
209
211
  """
210
212
  import re
211
213
  from functools import partial
212
214
 
213
215
  if regex is None and s is None:
214
- replacements = dict(replacements)
215
-
216
- if not replacements: # if replacements iterable is empty.
217
- return lambda s: s # return identity function
218
-
219
- regex = re.compile("|".join(re.escape(key) for key in replacements.keys()))
220
-
221
- substitute = partial(regex_based_substitution, replacements, regex)
222
- substitute.replacements = replacements
216
+ # Sort keys by length while maintaining value alignment
217
+ sorted_replacements = sorted(
218
+ replacements.items(), key=lambda x: len(x[0]), reverse=True
219
+ )
220
+
221
+ # Create regex pattern from sorted keys (without escaping to allow regex)
222
+ sorted_keys = [pair[0] for pair in sorted_replacements]
223
+ sorted_values = [pair[1] for pair in sorted_replacements]
224
+ regex = re.compile("|".join(sorted_keys))
225
+
226
+ # Prepare the substitution function with aligned replacements
227
+ aligned_replacements = dict(zip(sorted_keys, sorted_values))
228
+ substitute = partial(regex_based_substitution, aligned_replacements, regex)
229
+ substitute.replacements = aligned_replacements
223
230
  substitute.regex = regex
224
231
  return substitute
225
- else:
232
+ elif s is not None:
233
+ # Perform substitution using the compiled regex and aligned replacements
226
234
  return regex.sub(lambda m: replacements[m.group(0)], s)
235
+ else:
236
+ raise ValueError(
237
+ "Invalid usage: provide either `s` or let the function construct itself."
238
+ )
227
239
 
228
240
 
229
241
  from typing import Callable, Iterable, Sequence
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lkj
3
- Version: 0.1.32
3
+ Version: 0.1.34
4
4
  Summary: A dump of homeless useful utils
5
5
  Home-page: https://github.com/thorwhalen/lkj
6
6
  Author: Thor Whalen
@@ -3,6 +3,7 @@ README.md
3
3
  setup.cfg
4
4
  setup.py
5
5
  lkj/__init__.py
6
+ lkj/chunking.py
6
7
  lkj/dicts.py
7
8
  lkj/filesys.py
8
9
  lkj/funcs.py
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = lkj
3
- version = 0.1.32
3
+ version = 0.1.34
4
4
  url = https://github.com/thorwhalen/lkj
5
5
  platforms = any
6
6
  description_file = README.md
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes