pyochain 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyochain/__init__.py +5 -0
- pyochain/_core/__init__.py +23 -0
- pyochain/_core/_format.py +34 -0
- pyochain/_core/_main.py +205 -0
- pyochain/_core/_protocols.py +38 -0
- pyochain/_dict/__init__.py +3 -0
- pyochain/_dict/_filters.py +268 -0
- pyochain/_dict/_groups.py +175 -0
- pyochain/_dict/_iter.py +135 -0
- pyochain/_dict/_joins.py +139 -0
- pyochain/_dict/_main.py +113 -0
- pyochain/_dict/_maps.py +142 -0
- pyochain/_dict/_nested.py +272 -0
- pyochain/_dict/_process.py +204 -0
- pyochain/_iter/__init__.py +3 -0
- pyochain/_iter/_aggregations.py +324 -0
- pyochain/_iter/_booleans.py +227 -0
- pyochain/_iter/_dicts.py +243 -0
- pyochain/_iter/_eager.py +233 -0
- pyochain/_iter/_filters.py +510 -0
- pyochain/_iter/_joins.py +404 -0
- pyochain/_iter/_lists.py +308 -0
- pyochain/_iter/_main.py +466 -0
- pyochain/_iter/_maps.py +360 -0
- pyochain/_iter/_partitions.py +145 -0
- pyochain/_iter/_process.py +366 -0
- pyochain/_iter/_rolling.py +241 -0
- pyochain/_iter/_tuples.py +326 -0
- pyochain/py.typed +0 -0
- pyochain-0.5.3.dist-info/METADATA +261 -0
- pyochain-0.5.3.dist-info/RECORD +32 -0
- pyochain-0.5.3.dist-info/WHEEL +4 -0
pyochain/_iter/_lists.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import itertools
|
|
4
|
+
from collections.abc import Callable, Generator, Iterable
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import more_itertools as mit
|
|
8
|
+
|
|
9
|
+
from .._core import IterWrapper
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ._main import Iter
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseList[T](IterWrapper[T]):
|
|
16
|
+
def implode(self) -> Iter[list[T]]:
|
|
17
|
+
"""
|
|
18
|
+
Wrap each element in the iterable into a list.
|
|
19
|
+
|
|
20
|
+
Syntactic sugar for `Iter.map(lambda x: [x])`.
|
|
21
|
+
```python
|
|
22
|
+
>>> import pyochain as pc
|
|
23
|
+
>>> pc.Iter.from_(range(5)).implode().into(list)
|
|
24
|
+
[[0], [1], [2], [3], [4]]
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def _implode(data: Iterable[T]) -> Generator[list[T], None, None]:
|
|
30
|
+
return ([x] for x in data)
|
|
31
|
+
|
|
32
|
+
return self._lazy(_implode)
|
|
33
|
+
|
|
34
|
+
def split_at(
|
|
35
|
+
self,
|
|
36
|
+
pred: Callable[[T], bool],
|
|
37
|
+
maxsplit: int = -1,
|
|
38
|
+
keep_separator: bool = False,
|
|
39
|
+
) -> Iter[list[T]]:
|
|
40
|
+
"""
|
|
41
|
+
Yield lists of items from iterable, where each list is delimited by an item where callable pred returns True.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
pred: Function to determine the split points.
|
|
45
|
+
maxsplit: Maximum number of splits to perform. Defaults to -1 (no limit).
|
|
46
|
+
keep_separator: Whether to include the separator in the output. Defaults to False.
|
|
47
|
+
Example:
|
|
48
|
+
```python
|
|
49
|
+
>>> import pyochain as pc
|
|
50
|
+
>>> pc.Iter.from_("abcdcba").split_at(lambda x: x == "b").into(list)
|
|
51
|
+
[['a'], ['c', 'd', 'c'], ['a']]
|
|
52
|
+
>>> pc.Iter.from_(range(10)).split_at(lambda n: n % 2 == 1).into(list)
|
|
53
|
+
[[0], [2], [4], [6], [8], []]
|
|
54
|
+
|
|
55
|
+
At most *maxsplit* splits are done.
|
|
56
|
+
|
|
57
|
+
If *maxsplit* is not specified or -1, then there is no limit on the number of splits:
|
|
58
|
+
```python
|
|
59
|
+
>>> pc.Iter.from_(range(10)).split_at(lambda n: n % 2 == 1, maxsplit=2).into(
|
|
60
|
+
... list
|
|
61
|
+
... )
|
|
62
|
+
[[0], [2], [4, 5, 6, 7, 8, 9]]
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
By default, the delimiting items are not included in the output.
|
|
66
|
+
|
|
67
|
+
To include them, set *keep_separator* to `True`.
|
|
68
|
+
```python
|
|
69
|
+
>>> def cond(x: str) -> bool:
|
|
70
|
+
... return x == "b"
|
|
71
|
+
>>> pc.Iter.from_("abcdcba").split_at(cond, keep_separator=True).into(list)
|
|
72
|
+
[['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
"""
|
|
76
|
+
return self._lazy(mit.split_at, pred, maxsplit, keep_separator)
|
|
77
|
+
|
|
78
|
+
def split_after(
|
|
79
|
+
self, predicate: Callable[[T], bool], max_split: int = -1
|
|
80
|
+
) -> Iter[list[T]]:
|
|
81
|
+
"""
|
|
82
|
+
Yield lists of items from iterable, where each list ends with an item where callable pred returns True.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
predicate: Function to determine the split points.
|
|
86
|
+
max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
|
|
87
|
+
Example:
|
|
88
|
+
```python
|
|
89
|
+
>>> import pyochain as pc
|
|
90
|
+
>>> pc.Iter.from_("one1two2").split_after(str.isdigit).into(list)
|
|
91
|
+
[['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
|
|
92
|
+
|
|
93
|
+
>>> def cond(n: int) -> bool:
|
|
94
|
+
... return n % 3 == 0
|
|
95
|
+
>>>
|
|
96
|
+
>>> pc.Iter.from_(range(10)).split_after(cond).into(list)
|
|
97
|
+
[[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
|
|
98
|
+
>>> pc.Iter.from_(range(10)).split_after(cond, max_split=2).into(list)
|
|
99
|
+
[[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
"""
|
|
103
|
+
return self._lazy(mit.split_after, predicate, max_split)
|
|
104
|
+
|
|
105
|
+
def split_before(
|
|
106
|
+
self, predicate: Callable[[T], bool], max_split: int = -1
|
|
107
|
+
) -> Iter[list[T]]:
|
|
108
|
+
"""
|
|
109
|
+
Yield lists of items from iterable, where each list ends with an item where callable pred returns True.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
predicate: Function to determine the split points.
|
|
113
|
+
max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
|
|
114
|
+
Example:
|
|
115
|
+
```python
|
|
116
|
+
>>> import pyochain as pc
|
|
117
|
+
>>> pc.Iter.from_("abcdcba").split_before(lambda x: x == "b").into(list)
|
|
118
|
+
[['a'], ['b', 'c', 'd', 'c'], ['b', 'a']]
|
|
119
|
+
>>>
|
|
120
|
+
>>> def cond(n: int) -> bool:
|
|
121
|
+
... return n % 2 == 1
|
|
122
|
+
>>>
|
|
123
|
+
>>> pc.Iter.from_(range(10)).split_before(cond).into(list)
|
|
124
|
+
[[0], [1, 2], [3, 4], [5, 6], [7, 8], [9]]
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
At most *max_split* splits are done.
|
|
128
|
+
|
|
129
|
+
If *max_split* is not specified or -1, then there is no limit on the number of splits:
|
|
130
|
+
```python
|
|
131
|
+
>>> pc.Iter.from_(range(10)).split_before(cond, max_split=2).into(list)
|
|
132
|
+
[[0], [1, 2], [3, 4, 5, 6, 7, 8, 9]]
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
"""
|
|
136
|
+
return self._lazy(mit.split_before, predicate, max_split)
|
|
137
|
+
|
|
138
|
+
def split_into(self, sizes: Iterable[int | None]) -> Iter[list[T]]:
|
|
139
|
+
"""
|
|
140
|
+
Yield a list of sequential items from iterable of length 'n' for each integer 'n' in sizes.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
sizes: Iterable of integers specifying the sizes of each chunk. Use None for the remainder.
|
|
144
|
+
Example:
|
|
145
|
+
```python
|
|
146
|
+
>>> import pyochain as pc
|
|
147
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).split_into([1, 2, 3]).into(list)
|
|
148
|
+
[[1], [2, 3], [4, 5, 6]]
|
|
149
|
+
|
|
150
|
+
If the sum of sizes is smaller than the length of iterable, then the remaining items of iterable will not be returned.
|
|
151
|
+
```python
|
|
152
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).split_into([2, 3]).into(list)
|
|
153
|
+
[[1, 2], [3, 4, 5]]
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
If the sum of sizes is larger than the length of iterable:
|
|
158
|
+
|
|
159
|
+
- fewer items will be returned in the iteration that overruns the iterable
|
|
160
|
+
- further lists will be empty
|
|
161
|
+
```python
|
|
162
|
+
>>> pc.Iter.from_([1, 2, 3, 4]).split_into([1, 2, 3, 4]).into(list)
|
|
163
|
+
[[1], [2, 3], [4], []]
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
When a None object is encountered in sizes, the returned list will contain items up to the end of iterable the same way that itertools.slice does:
|
|
168
|
+
```python
|
|
169
|
+
>>> data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
|
|
170
|
+
>>> pc.Iter.from_(data).split_into([2, 3, None]).into(list)
|
|
171
|
+
[[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
split_into can be useful for grouping a series of items where the sizes of the groups are not uniform.
|
|
176
|
+
|
|
177
|
+
An example would be where in a row from a table:
|
|
178
|
+
|
|
179
|
+
- multiple columns represent elements of the same feature (e.g. a point represented by x,y,z)
|
|
180
|
+
- the format is not the same for all columns.
|
|
181
|
+
"""
|
|
182
|
+
return self._lazy(mit.split_into, sizes)
|
|
183
|
+
|
|
184
|
+
def split_when(
|
|
185
|
+
self, predicate: Callable[[T, T], bool], max_split: int = -1
|
|
186
|
+
) -> Iter[list[T]]:
|
|
187
|
+
"""
|
|
188
|
+
Split iterable into pieces based on the output of a predicate function.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
predicate: Function that takes successive pairs of items and returns True if the iterable should be split.
|
|
192
|
+
max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
|
|
193
|
+
|
|
194
|
+
For example, to find runs of increasing numbers, split the iterable when element i is larger than element i + 1:
|
|
195
|
+
```python
|
|
196
|
+
>>> import pyochain as pc
|
|
197
|
+
>>> data = pc.Seq([1, 2, 3, 3, 2, 5, 2, 4, 2])
|
|
198
|
+
>>> data.iter().split_when(lambda x, y: x > y).into(list)
|
|
199
|
+
[[1, 2, 3, 3], [2, 5], [2, 4], [2]]
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
At most max_split splits are done.
|
|
204
|
+
|
|
205
|
+
If max_split is not specified or -1, then there is no limit on the number of splits:
|
|
206
|
+
```python
|
|
207
|
+
>>> data.iter().split_when(lambda x, y: x > y, max_split=2).into(list)
|
|
208
|
+
[[1, 2, 3, 3], [2, 5], [2, 4, 2]]
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
"""
|
|
212
|
+
return self._lazy(mit.split_when, predicate, max_split)
|
|
213
|
+
|
|
214
|
+
def chunks(self, n: int, strict: bool = False) -> Iter[list[T]]:
|
|
215
|
+
"""
|
|
216
|
+
Break iterable into lists of length n.
|
|
217
|
+
|
|
218
|
+
By default, the last yielded list will have fewer than *n* elements if the length of *iterable* is not divisible by *n*.
|
|
219
|
+
|
|
220
|
+
To use a fill-in value instead, see the :func:`grouper` recipe.
|
|
221
|
+
|
|
222
|
+
If:
|
|
223
|
+
|
|
224
|
+
- the length of *iterable* is not divisible by *n*
|
|
225
|
+
- *strict* is `True`
|
|
226
|
+
|
|
227
|
+
then `ValueError` will be raised before the last list is yielded.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
n: Number of elements in each chunk.
|
|
231
|
+
strict: Whether to raise an error if the last chunk is smaller than n. Defaults to False.
|
|
232
|
+
Example:
|
|
233
|
+
```python
|
|
234
|
+
>>> import pyochain as pc
|
|
235
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).chunks(3).into(list)
|
|
236
|
+
[[1, 2, 3], [4, 5, 6]]
|
|
237
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5, 6, 7, 8]).chunks(3).into(list)
|
|
238
|
+
[[1, 2, 3], [4, 5, 6], [7, 8]]
|
|
239
|
+
|
|
240
|
+
```
|
|
241
|
+
"""
|
|
242
|
+
return self._lazy(mit.chunked, n, strict)
|
|
243
|
+
|
|
244
|
+
def chunks_even(self, n: int) -> Iter[list[T]]:
|
|
245
|
+
"""
|
|
246
|
+
Break iterable into lists of approximately length n.
|
|
247
|
+
|
|
248
|
+
Items are distributed such the lengths of the lists differ by at most 1 item.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
n: Approximate number of elements in each chunk.
|
|
252
|
+
Example:
|
|
253
|
+
```python
|
|
254
|
+
>>> import pyochain as pc
|
|
255
|
+
>>> iterable = pc.Seq([1, 2, 3, 4, 5, 6, 7])
|
|
256
|
+
>>> iterable.iter().chunks_even(3).into(list) # List lengths: 3, 2, 2
|
|
257
|
+
[[1, 2, 3], [4, 5], [6, 7]]
|
|
258
|
+
>>> iterable.iter().chunks(3).into(list) # List lengths: 3, 3, 1
|
|
259
|
+
[[1, 2, 3], [4, 5, 6], [7]]
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
"""
|
|
263
|
+
return self._lazy(mit.chunked_even, n)
|
|
264
|
+
|
|
265
|
+
def unique_to_each[U: Iterable[Any]](self: IterWrapper[U]) -> Iter[list[U]]:
|
|
266
|
+
"""
|
|
267
|
+
Return the elements from each of the iterables that aren't in the other iterables.
|
|
268
|
+
|
|
269
|
+
For example, suppose you have a set of packages, each with a set of dependencies:
|
|
270
|
+
|
|
271
|
+
**{'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}**
|
|
272
|
+
|
|
273
|
+
If you remove one package, which dependencies can also be removed?
|
|
274
|
+
|
|
275
|
+
If pkg_1 is removed, then A is no longer necessary - it is not associated with pkg_2 or pkg_3.
|
|
276
|
+
|
|
277
|
+
Similarly, C is only needed for pkg_2, and D is only needed for pkg_3:
|
|
278
|
+
```python
|
|
279
|
+
>>> import pyochain as pc
|
|
280
|
+
>>> data = ({"A", "B"}, {"B", "C"}, {"B", "D"})
|
|
281
|
+
>>> pc.Iter.from_(data).unique_to_each().collect().unwrap()
|
|
282
|
+
[['A'], ['C'], ['D']]
|
|
283
|
+
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
If there are duplicates in one input iterable that aren't in the others they will be duplicated in the output.
|
|
287
|
+
|
|
288
|
+
Input order is preserved:
|
|
289
|
+
```python
|
|
290
|
+
>>> data = ("mississippi", "missouri")
|
|
291
|
+
>>> pc.Iter.from_(data).unique_to_each().collect().unwrap()
|
|
292
|
+
[['p', 'p'], ['o', 'u', 'r']]
|
|
293
|
+
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
It is assumed that the elements of each iterable are hashable.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
from collections import Counter
|
|
300
|
+
|
|
301
|
+
def _unique_to_each(data: Iterable[U]) -> Generator[list[U], None, None]:
|
|
302
|
+
"""from more_itertools.unique_to_each"""
|
|
303
|
+
pool: list[Iterable[U]] = [it for it in data]
|
|
304
|
+
counts: Counter[U] = Counter(itertools.chain.from_iterable(map(set, pool)))
|
|
305
|
+
uniques: set[U] = {element for element in counts if counts[element] == 1}
|
|
306
|
+
return ((list(filter(uniques.__contains__, it))) for it in pool)
|
|
307
|
+
|
|
308
|
+
return self._lazy(_unique_to_each)
|