pyochain 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ from collections.abc import Callable, Generator, Iterable
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ import more_itertools as mit
8
+
9
+ from .._core import IterWrapper
10
+
11
+ if TYPE_CHECKING:
12
+ from ._main import Iter
13
+
14
+
15
+ class BaseList[T](IterWrapper[T]):
16
+ def implode(self) -> Iter[list[T]]:
17
+ """
18
+ Wrap each element in the iterable into a list.
19
+
20
+ Syntactic sugar for `Iter.map(lambda x: [x])`.
21
+ ```python
22
+ >>> import pyochain as pc
23
+ >>> pc.Iter.from_(range(5)).implode().into(list)
24
+ [[0], [1], [2], [3], [4]]
25
+
26
+ ```
27
+ """
28
+
29
+ def _implode(data: Iterable[T]) -> Generator[list[T], None, None]:
30
+ return ([x] for x in data)
31
+
32
+ return self._lazy(_implode)
33
+
34
+ def split_at(
35
+ self,
36
+ pred: Callable[[T], bool],
37
+ maxsplit: int = -1,
38
+ keep_separator: bool = False,
39
+ ) -> Iter[list[T]]:
40
+ """
41
+ Yield lists of items from iterable, where each list is delimited by an item where callable pred returns True.
42
+
43
+ Args:
44
+ pred: Function to determine the split points.
45
+ maxsplit: Maximum number of splits to perform. Defaults to -1 (no limit).
46
+ keep_separator: Whether to include the separator in the output. Defaults to False.
47
+ Example:
48
+ ```python
49
+ >>> import pyochain as pc
50
+ >>> pc.Iter.from_("abcdcba").split_at(lambda x: x == "b").into(list)
51
+ [['a'], ['c', 'd', 'c'], ['a']]
52
+ >>> pc.Iter.from_(range(10)).split_at(lambda n: n % 2 == 1).into(list)
53
+ [[0], [2], [4], [6], [8], []]
54
+
55
+ At most *maxsplit* splits are done.
56
+
57
+ If *maxsplit* is not specified or -1, then there is no limit on the number of splits:
58
+ ```python
59
+ >>> pc.Iter.from_(range(10)).split_at(lambda n: n % 2 == 1, maxsplit=2).into(
60
+ ... list
61
+ ... )
62
+ [[0], [2], [4, 5, 6, 7, 8, 9]]
63
+
64
+ ```
65
+ By default, the delimiting items are not included in the output.
66
+
67
+ To include them, set *keep_separator* to `True`.
68
+ ```python
69
+ >>> def cond(x: str) -> bool:
70
+ ... return x == "b"
71
+ >>> pc.Iter.from_("abcdcba").split_at(cond, keep_separator=True).into(list)
72
+ [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
73
+
74
+ ```
75
+ """
76
+ return self._lazy(mit.split_at, pred, maxsplit, keep_separator)
77
+
78
+ def split_after(
79
+ self, predicate: Callable[[T], bool], max_split: int = -1
80
+ ) -> Iter[list[T]]:
81
+ """
82
+ Yield lists of items from iterable, where each list ends with an item where callable pred returns True.
83
+
84
+ Args:
85
+ predicate: Function to determine the split points.
86
+ max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
87
+ Example:
88
+ ```python
89
+ >>> import pyochain as pc
90
+ >>> pc.Iter.from_("one1two2").split_after(str.isdigit).into(list)
91
+ [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
92
+
93
+ >>> def cond(n: int) -> bool:
94
+ ... return n % 3 == 0
95
+ >>>
96
+ >>> pc.Iter.from_(range(10)).split_after(cond).into(list)
97
+ [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
98
+ >>> pc.Iter.from_(range(10)).split_after(cond, max_split=2).into(list)
99
+ [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
100
+
101
+ ```
102
+ """
103
+ return self._lazy(mit.split_after, predicate, max_split)
104
+
105
+ def split_before(
106
+ self, predicate: Callable[[T], bool], max_split: int = -1
107
+ ) -> Iter[list[T]]:
108
+ """
109
+ Yield lists of items from iterable, where each list ends with an item where callable pred returns True.
110
+
111
+ Args:
112
+ predicate: Function to determine the split points.
113
+ max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
114
+ Example:
115
+ ```python
116
+ >>> import pyochain as pc
117
+ >>> pc.Iter.from_("abcdcba").split_before(lambda x: x == "b").into(list)
118
+ [['a'], ['b', 'c', 'd', 'c'], ['b', 'a']]
119
+ >>>
120
+ >>> def cond(n: int) -> bool:
121
+ ... return n % 2 == 1
122
+ >>>
123
+ >>> pc.Iter.from_(range(10)).split_before(cond).into(list)
124
+ [[0], [1, 2], [3, 4], [5, 6], [7, 8], [9]]
125
+
126
+ ```
127
+ At most *max_split* splits are done.
128
+
129
+ If *max_split* is not specified or -1, then there is no limit on the number of splits:
130
+ ```python
131
+ >>> pc.Iter.from_(range(10)).split_before(cond, max_split=2).into(list)
132
+ [[0], [1, 2], [3, 4, 5, 6, 7, 8, 9]]
133
+
134
+ ```
135
+ """
136
+ return self._lazy(mit.split_before, predicate, max_split)
137
+
138
+ def split_into(self, sizes: Iterable[int | None]) -> Iter[list[T]]:
139
+ """
140
+ Yield a list of sequential items from iterable of length 'n' for each integer 'n' in sizes.
141
+
142
+ Args:
143
+ sizes: Iterable of integers specifying the sizes of each chunk. Use None for the remainder.
144
+ Example:
145
+ ```python
146
+ >>> import pyochain as pc
147
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).split_into([1, 2, 3]).into(list)
148
+ [[1], [2, 3], [4, 5, 6]]
149
+
150
+ If the sum of sizes is smaller than the length of iterable, then the remaining items of iterable will not be returned.
151
+ ```python
152
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).split_into([2, 3]).into(list)
153
+ [[1, 2], [3, 4, 5]]
154
+
155
+ ```
156
+
157
+ If the sum of sizes is larger than the length of iterable:
158
+
159
+ - fewer items will be returned in the iteration that overruns the iterable
160
+ - further lists will be empty
161
+ ```python
162
+ >>> pc.Iter.from_([1, 2, 3, 4]).split_into([1, 2, 3, 4]).into(list)
163
+ [[1], [2, 3], [4], []]
164
+
165
+ ```
166
+
167
+ When a None object is encountered in sizes, the returned list will contain items up to the end of iterable the same way that itertools.slice does:
168
+ ```python
169
+ >>> data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
170
+ >>> pc.Iter.from_(data).split_into([2, 3, None]).into(list)
171
+ [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
172
+
173
+ ```
174
+
175
+ split_into can be useful for grouping a series of items where the sizes of the groups are not uniform.
176
+
177
+ An example would be where in a row from a table:
178
+
179
+ - multiple columns represent elements of the same feature (e.g. a point represented by x,y,z)
180
+ - the format is not the same for all columns.
181
+ """
182
+ return self._lazy(mit.split_into, sizes)
183
+
184
+ def split_when(
185
+ self, predicate: Callable[[T, T], bool], max_split: int = -1
186
+ ) -> Iter[list[T]]:
187
+ """
188
+ Split iterable into pieces based on the output of a predicate function.
189
+
190
+ Args:
191
+ predicate: Function that takes successive pairs of items and returns True if the iterable should be split.
192
+ max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
193
+
194
+ For example, to find runs of increasing numbers, split the iterable when element i is larger than element i + 1:
195
+ ```python
196
+ >>> import pyochain as pc
197
+ >>> data = pc.Seq([1, 2, 3, 3, 2, 5, 2, 4, 2])
198
+ >>> data.iter().split_when(lambda x, y: x > y).into(list)
199
+ [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
200
+
201
+ ```
202
+
203
+ At most max_split splits are done.
204
+
205
+ If max_split is not specified or -1, then there is no limit on the number of splits:
206
+ ```python
207
+ >>> data.iter().split_when(lambda x, y: x > y, max_split=2).into(list)
208
+ [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
209
+
210
+ ```
211
+ """
212
+ return self._lazy(mit.split_when, predicate, max_split)
213
+
214
+ def chunks(self, n: int, strict: bool = False) -> Iter[list[T]]:
215
+ """
216
+ Break iterable into lists of length n.
217
+
218
+ By default, the last yielded list will have fewer than *n* elements if the length of *iterable* is not divisible by *n*.
219
+
220
+ To use a fill-in value instead, see the :func:`grouper` recipe.
221
+
222
+ If:
223
+
224
+ - the length of *iterable* is not divisible by *n*
225
+ - *strict* is `True`
226
+
227
+ then `ValueError` will be raised before the last list is yielded.
228
+
229
+ Args:
230
+ n: Number of elements in each chunk.
231
+ strict: Whether to raise an error if the last chunk is smaller than n. Defaults to False.
232
+ Example:
233
+ ```python
234
+ >>> import pyochain as pc
235
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).chunks(3).into(list)
236
+ [[1, 2, 3], [4, 5, 6]]
237
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6, 7, 8]).chunks(3).into(list)
238
+ [[1, 2, 3], [4, 5, 6], [7, 8]]
239
+
240
+ ```
241
+ """
242
+ return self._lazy(mit.chunked, n, strict)
243
+
244
+ def chunks_even(self, n: int) -> Iter[list[T]]:
245
+ """
246
+ Break iterable into lists of approximately length n.
247
+
248
+ Items are distributed such the lengths of the lists differ by at most 1 item.
249
+
250
+ Args:
251
+ n: Approximate number of elements in each chunk.
252
+ Example:
253
+ ```python
254
+ >>> import pyochain as pc
255
+ >>> iterable = pc.Seq([1, 2, 3, 4, 5, 6, 7])
256
+ >>> iterable.iter().chunks_even(3).into(list) # List lengths: 3, 2, 2
257
+ [[1, 2, 3], [4, 5], [6, 7]]
258
+ >>> iterable.iter().chunks(3).into(list) # List lengths: 3, 3, 1
259
+ [[1, 2, 3], [4, 5, 6], [7]]
260
+
261
+ ```
262
+ """
263
+ return self._lazy(mit.chunked_even, n)
264
+
265
+ def unique_to_each[U: Iterable[Any]](self: IterWrapper[U]) -> Iter[list[U]]:
266
+ """
267
+ Return the elements from each of the iterables that aren't in the other iterables.
268
+
269
+ For example, suppose you have a set of packages, each with a set of dependencies:
270
+
271
+ **{'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}**
272
+
273
+ If you remove one package, which dependencies can also be removed?
274
+
275
+ If pkg_1 is removed, then A is no longer necessary - it is not associated with pkg_2 or pkg_3.
276
+
277
+ Similarly, C is only needed for pkg_2, and D is only needed for pkg_3:
278
+ ```python
279
+ >>> import pyochain as pc
280
+ >>> data = ({"A", "B"}, {"B", "C"}, {"B", "D"})
281
+ >>> pc.Iter.from_(data).unique_to_each().collect().unwrap()
282
+ [['A'], ['C'], ['D']]
283
+
284
+ ```
285
+
286
+ If there are duplicates in one input iterable that aren't in the others they will be duplicated in the output.
287
+
288
+ Input order is preserved:
289
+ ```python
290
+ >>> data = ("mississippi", "missouri")
291
+ >>> pc.Iter.from_(data).unique_to_each().collect().unwrap()
292
+ [['p', 'p'], ['o', 'u', 'r']]
293
+
294
+ ```
295
+
296
+ It is assumed that the elements of each iterable are hashable.
297
+ """
298
+
299
+ from collections import Counter
300
+
301
+ def _unique_to_each(data: Iterable[U]) -> Generator[list[U], None, None]:
302
+ """from more_itertools.unique_to_each"""
303
+ pool: list[Iterable[U]] = [it for it in data]
304
+ counts: Counter[U] = Counter(itertools.chain.from_iterable(map(set, pool)))
305
+ uniques: set[U] = {element for element in counts if counts[element] == 1}
306
+ return ((list(filter(uniques.__contains__, it))) for it in pool)
307
+
308
+ return self._lazy(_unique_to_each)