pyochain 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyochain might be problematic. Click here for more details.
- pyochain/__init__.py +5 -0
- pyochain/_core/__init__.py +21 -0
- pyochain/_core/_main.py +184 -0
- pyochain/_core/_protocols.py +43 -0
- pyochain/_dict/__init__.py +4 -0
- pyochain/_dict/_exprs.py +115 -0
- pyochain/_dict/_filters.py +273 -0
- pyochain/_dict/_funcs.py +62 -0
- pyochain/_dict/_groups.py +176 -0
- pyochain/_dict/_iter.py +92 -0
- pyochain/_dict/_joins.py +137 -0
- pyochain/_dict/_main.py +307 -0
- pyochain/_dict/_nested.py +218 -0
- pyochain/_dict/_process.py +171 -0
- pyochain/_iter/__init__.py +3 -0
- pyochain/_iter/_aggregations.py +323 -0
- pyochain/_iter/_booleans.py +224 -0
- pyochain/_iter/_constructors.py +155 -0
- pyochain/_iter/_eager.py +195 -0
- pyochain/_iter/_filters.py +503 -0
- pyochain/_iter/_groups.py +264 -0
- pyochain/_iter/_joins.py +407 -0
- pyochain/_iter/_lists.py +306 -0
- pyochain/_iter/_main.py +224 -0
- pyochain/_iter/_maps.py +358 -0
- pyochain/_iter/_partitions.py +148 -0
- pyochain/_iter/_process.py +384 -0
- pyochain/_iter/_rolling.py +247 -0
- pyochain/_iter/_tuples.py +221 -0
- pyochain/py.typed +0 -0
- pyochain-0.5.0.dist-info/METADATA +295 -0
- pyochain-0.5.0.dist-info/RECORD +33 -0
- pyochain-0.5.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
4
|
+
from functools import partial
|
|
5
|
+
from typing import TYPE_CHECKING, Any, overload
|
|
6
|
+
|
|
7
|
+
import cytoolz as cz
|
|
8
|
+
import more_itertools as mit
|
|
9
|
+
|
|
10
|
+
from .._core import IterWrapper
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .._dict import Dict
|
|
14
|
+
from ._main import Iter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseGroups[T](IterWrapper[T]):
|
|
18
|
+
def reduce_by[K](
|
|
19
|
+
self, key: Callable[[T], K], binop: Callable[[T, T], T]
|
|
20
|
+
) -> Dict[K, T]:
|
|
21
|
+
"""
|
|
22
|
+
Perform a simultaneous groupby and reduction.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
key: Function to compute the key for grouping.
|
|
26
|
+
binop: Binary operation to reduce the grouped elements.
|
|
27
|
+
Example:
|
|
28
|
+
```python
|
|
29
|
+
>>> from collections.abc import Iterable
|
|
30
|
+
>>> import pyochain as pc
|
|
31
|
+
>>> from operator import add, mul
|
|
32
|
+
>>>
|
|
33
|
+
>>> def is_even(x: int) -> bool:
|
|
34
|
+
... return x % 2 == 0
|
|
35
|
+
>>>
|
|
36
|
+
>>> def group_reduce(data: Iterable[int]) -> int:
|
|
37
|
+
... return pc.Iter.from_(data).reduce(add)
|
|
38
|
+
>>>
|
|
39
|
+
>>> data = pc.Seq([1, 2, 3, 4, 5])
|
|
40
|
+
>>> data.iter().reduce_by(is_even, add).unwrap()
|
|
41
|
+
{False: 9, True: 6}
|
|
42
|
+
>>> data.iter().group_by(is_even).map_values(group_reduce).unwrap()
|
|
43
|
+
{False: 9, True: 6}
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
But the former does not build the intermediate groups, allowing it to operate in much less space.
|
|
47
|
+
|
|
48
|
+
This makes it suitable for larger datasets that do not fit comfortably in memory
|
|
49
|
+
|
|
50
|
+
Simple Examples:
|
|
51
|
+
```python
|
|
52
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5]).reduce_by(is_even, add).unwrap()
|
|
53
|
+
{False: 9, True: 6}
|
|
54
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5]).reduce_by(is_even, mul).unwrap()
|
|
55
|
+
{False: 15, True: 8}
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
"""
|
|
59
|
+
from .._dict import Dict
|
|
60
|
+
|
|
61
|
+
return Dict(self.into(partial(cz.itertoolz.reduceby, key, binop)))
|
|
62
|
+
|
|
63
|
+
def group_by[K](self, on: Callable[[T], K]) -> Dict[K, list[T]]:
|
|
64
|
+
"""
|
|
65
|
+
Group elements by key function and return a Dict result.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
on: Function to compute the key for grouping.
|
|
69
|
+
Example:
|
|
70
|
+
```python
|
|
71
|
+
>>> import pyochain as pc
|
|
72
|
+
>>> names = [
|
|
73
|
+
... "Alice",
|
|
74
|
+
... "Bob",
|
|
75
|
+
... "Charlie",
|
|
76
|
+
... "Dan",
|
|
77
|
+
... "Edith",
|
|
78
|
+
... "Frank",
|
|
79
|
+
... ]
|
|
80
|
+
>>> pc.Iter.from_(names).group_by(len).sort()
|
|
81
|
+
... # doctest: +NORMALIZE_WHITESPACE
|
|
82
|
+
Dict({
|
|
83
|
+
3: ['Bob', 'Dan'],
|
|
84
|
+
5: ['Alice', 'Edith', 'Frank'],
|
|
85
|
+
7: ['Charlie']
|
|
86
|
+
})
|
|
87
|
+
>>>
|
|
88
|
+
>>> iseven = lambda x: x % 2 == 0
|
|
89
|
+
>>> pc.Iter.from_([1, 2, 3, 4, 5, 6, 7, 8]).group_by(iseven)
|
|
90
|
+
... # doctest: +NORMALIZE_WHITESPACE
|
|
91
|
+
Dict({
|
|
92
|
+
False: [1, 3, 5, 7],
|
|
93
|
+
True: [2, 4, 6, 8]
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
Non-callable keys imply grouping on a member.
|
|
98
|
+
```python
|
|
99
|
+
>>> data = [
|
|
100
|
+
... {"name": "Alice", "gender": "F"},
|
|
101
|
+
... {"name": "Bob", "gender": "M"},
|
|
102
|
+
... {"name": "Charlie", "gender": "M"},
|
|
103
|
+
... ]
|
|
104
|
+
>>> pc.Iter.from_(data).group_by("gender").sort()
|
|
105
|
+
... # doctest: +NORMALIZE_WHITESPACE
|
|
106
|
+
Dict({
|
|
107
|
+
'F': [
|
|
108
|
+
{'name': 'Alice', 'gender': 'F'}
|
|
109
|
+
],
|
|
110
|
+
'M': [
|
|
111
|
+
{'name': 'Bob', 'gender': 'M'},
|
|
112
|
+
{'name': 'Charlie', 'gender': 'M'}
|
|
113
|
+
]
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
"""
|
|
118
|
+
from .._dict import Dict
|
|
119
|
+
|
|
120
|
+
return Dict(self.into(partial(cz.itertoolz.groupby, on)))
|
|
121
|
+
|
|
122
|
+
def frequencies(self) -> Dict[T, int]:
|
|
123
|
+
"""
|
|
124
|
+
Find number of occurrences of each value in the iterable.
|
|
125
|
+
```python
|
|
126
|
+
>>> import pyochain as pc
|
|
127
|
+
>>> data = ["cat", "cat", "ox", "pig", "pig", "cat"]
|
|
128
|
+
>>> pc.Iter.from_(data).frequencies().unwrap()
|
|
129
|
+
{'cat': 3, 'ox': 1, 'pig': 2}
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
"""
|
|
133
|
+
from .._dict import Dict
|
|
134
|
+
|
|
135
|
+
return Dict(self.into(cz.itertoolz.frequencies))
|
|
136
|
+
|
|
137
|
+
def count_by[K](self, key: Callable[[T], K]) -> Dict[K, int]:
|
|
138
|
+
"""
|
|
139
|
+
Count elements of a collection by a key function.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
key: Function to compute the key for counting.
|
|
143
|
+
Example:
|
|
144
|
+
```python
|
|
145
|
+
>>> import pyochain as pc
|
|
146
|
+
>>> pc.Iter.from_(["cat", "mouse", "dog"]).count_by(len).unwrap()
|
|
147
|
+
{3: 2, 5: 1}
|
|
148
|
+
>>> def iseven(x):
|
|
149
|
+
... return x % 2 == 0
|
|
150
|
+
>>> pc.Iter.from_([1, 2, 3]).count_by(iseven).unwrap()
|
|
151
|
+
{False: 2, True: 1}
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
"""
|
|
155
|
+
from .._dict import Dict
|
|
156
|
+
|
|
157
|
+
return Dict(self.into(partial(cz.recipes.countby, key)))
|
|
158
|
+
|
|
159
|
+
@overload
|
|
160
|
+
def group_by_transform(
|
|
161
|
+
self,
|
|
162
|
+
keyfunc: None = None,
|
|
163
|
+
valuefunc: None = None,
|
|
164
|
+
reducefunc: None = None,
|
|
165
|
+
) -> Iter[tuple[T, Iterator[T]]]: ...
|
|
166
|
+
@overload
|
|
167
|
+
def group_by_transform[U](
|
|
168
|
+
self,
|
|
169
|
+
keyfunc: Callable[[T], U],
|
|
170
|
+
valuefunc: None,
|
|
171
|
+
reducefunc: None,
|
|
172
|
+
) -> Iter[tuple[U, Iterator[T]]]: ...
|
|
173
|
+
@overload
|
|
174
|
+
def group_by_transform[V](
|
|
175
|
+
self,
|
|
176
|
+
keyfunc: None,
|
|
177
|
+
valuefunc: Callable[[T], V],
|
|
178
|
+
reducefunc: None,
|
|
179
|
+
) -> Iter[tuple[T, Iterator[V]]]: ...
|
|
180
|
+
@overload
|
|
181
|
+
def group_by_transform[U, V](
|
|
182
|
+
self,
|
|
183
|
+
keyfunc: Callable[[T], U],
|
|
184
|
+
valuefunc: Callable[[T], V],
|
|
185
|
+
reducefunc: None,
|
|
186
|
+
) -> Iter[tuple[U, Iterator[V]]]: ...
|
|
187
|
+
@overload
|
|
188
|
+
def group_by_transform[W](
|
|
189
|
+
self,
|
|
190
|
+
keyfunc: None,
|
|
191
|
+
valuefunc: None,
|
|
192
|
+
reducefunc: Callable[[Iterator[T]], W],
|
|
193
|
+
) -> Iter[tuple[T, W]]: ...
|
|
194
|
+
@overload
|
|
195
|
+
def group_by_transform[U, W](
|
|
196
|
+
self,
|
|
197
|
+
keyfunc: Callable[[T], U],
|
|
198
|
+
valuefunc: None,
|
|
199
|
+
reducefunc: Callable[[Iterator[T]], W],
|
|
200
|
+
) -> Iter[tuple[U, W]]: ...
|
|
201
|
+
@overload
|
|
202
|
+
def group_by_transform[V, W](
|
|
203
|
+
self,
|
|
204
|
+
keyfunc: None,
|
|
205
|
+
valuefunc: Callable[[T], V],
|
|
206
|
+
reducefunc: Callable[[Iterator[V]], W],
|
|
207
|
+
) -> Iter[tuple[T, W]]: ...
|
|
208
|
+
@overload
|
|
209
|
+
def group_by_transform[U, V, W](
|
|
210
|
+
self,
|
|
211
|
+
keyfunc: Callable[[T], U],
|
|
212
|
+
valuefunc: Callable[[T], V],
|
|
213
|
+
reducefunc: Callable[[Iterator[V]], W],
|
|
214
|
+
) -> Iter[tuple[U, W]]: ...
|
|
215
|
+
def group_by_transform[U, V](
|
|
216
|
+
self,
|
|
217
|
+
keyfunc: Callable[[T], U] | None = None,
|
|
218
|
+
valuefunc: Callable[[T], V] | None = None,
|
|
219
|
+
reducefunc: Any = None,
|
|
220
|
+
) -> Iter[tuple[Any, ...]]:
|
|
221
|
+
"""
|
|
222
|
+
An extension of itertools.groupby that can apply transformations to the grouped data.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
keyfunc: Function to compute the key for grouping. Defaults to None.
|
|
226
|
+
valuefunc: Function to transform individual items after grouping. Defaults to None.
|
|
227
|
+
reducefunc: Function to transform each group of items. Defaults to None.
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
```python
|
|
231
|
+
>>> import pyochain as pc
|
|
232
|
+
>>> data = pc.Iter.from_("aAAbBBcCC")
|
|
233
|
+
>>> data.group_by_transform(
|
|
234
|
+
... lambda k: k.upper(), lambda v: v.lower(), lambda g: "".join(g)
|
|
235
|
+
... ).into(list)
|
|
236
|
+
[('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
|
|
237
|
+
|
|
238
|
+
```
|
|
239
|
+
Each optional argument defaults to an identity function if not specified.
|
|
240
|
+
|
|
241
|
+
group_by_transform is useful when grouping elements of an iterable using a separate iterable as the key.
|
|
242
|
+
|
|
243
|
+
To do this, zip the iterables and pass a keyfunc that extracts the first element and a valuefunc that extracts the second element:
|
|
244
|
+
|
|
245
|
+
Note that the order of items in the iterable is significant.
|
|
246
|
+
|
|
247
|
+
Only adjacent items are grouped together, so if you don't want any duplicate groups, you should sort the iterable by the key function.
|
|
248
|
+
|
|
249
|
+
Example:
|
|
250
|
+
```python
|
|
251
|
+
>>> from operator import itemgetter
|
|
252
|
+
>>> data = pc.Iter.from_([0, 0, 1, 1, 1, 2, 2, 2, 3])
|
|
253
|
+
>>> data.zip("abcdefghi").group_by_transform(itemgetter(0), itemgetter(1)).map(
|
|
254
|
+
... lambda kv: (kv[0], "".join(kv[1]))
|
|
255
|
+
... ).into(list)
|
|
256
|
+
[(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
|
|
257
|
+
|
|
258
|
+
```
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
def _group_by_transform(data: Iterable[T]) -> Iterator[tuple[Any, ...]]:
|
|
262
|
+
return mit.groupby_transform(data, keyfunc, valuefunc, reducefunc)
|
|
263
|
+
|
|
264
|
+
return self.apply(_group_by_transform)
|
pyochain/_iter/_joins.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import itertools
|
|
4
|
+
from collections.abc import Callable, Generator, Iterable, Iterator
|
|
5
|
+
from typing import TYPE_CHECKING, Any, overload
|
|
6
|
+
|
|
7
|
+
import cytoolz as cz
|
|
8
|
+
import more_itertools as mit
|
|
9
|
+
|
|
10
|
+
from .._core import IterWrapper
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from ._main import Iter
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseJoins[T](IterWrapper[T]):
|
|
17
|
+
@overload
|
|
18
|
+
def zip[T1](
|
|
19
|
+
self, iter1: Iterable[T1], /, *, strict: bool = ...
|
|
20
|
+
) -> Iter[tuple[T, T1]]: ...
|
|
21
|
+
@overload
|
|
22
|
+
def zip[T1, T2](
|
|
23
|
+
self,
|
|
24
|
+
iter1: Iterable[T1],
|
|
25
|
+
iter2: Iterable[T2],
|
|
26
|
+
/,
|
|
27
|
+
*,
|
|
28
|
+
strict: bool = ...,
|
|
29
|
+
) -> Iter[tuple[T, T1, T2]]: ...
|
|
30
|
+
@overload
|
|
31
|
+
def zip[T1, T2, T3](
|
|
32
|
+
self,
|
|
33
|
+
iter1: Iterable[T1],
|
|
34
|
+
iter2: Iterable[T2],
|
|
35
|
+
iter3: Iterable[T3],
|
|
36
|
+
/,
|
|
37
|
+
*,
|
|
38
|
+
strict: bool = ...,
|
|
39
|
+
) -> Iter[tuple[T, T1, T2, T3]]: ...
|
|
40
|
+
@overload
|
|
41
|
+
def zip[T1, T2, T3, T4](
|
|
42
|
+
self,
|
|
43
|
+
iter1: Iterable[T1],
|
|
44
|
+
iter2: Iterable[T2],
|
|
45
|
+
iter3: Iterable[T3],
|
|
46
|
+
iter4: Iterable[T4],
|
|
47
|
+
/,
|
|
48
|
+
*,
|
|
49
|
+
strict: bool = ...,
|
|
50
|
+
) -> Iter[tuple[T, T1, T2, T3, T4]]: ...
|
|
51
|
+
def zip(
|
|
52
|
+
self, *others: Iterable[Any], strict: bool = False
|
|
53
|
+
) -> Iter[tuple[Any, ...]]:
|
|
54
|
+
"""
|
|
55
|
+
Zip with other iterables, optionally strict.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
*others: Other iterables to zip with.
|
|
59
|
+
strict: Whether to enforce equal lengths of iterables. Defaults to False.
|
|
60
|
+
Example:
|
|
61
|
+
```python
|
|
62
|
+
>>> import pyochain as pc
|
|
63
|
+
>>> pc.Iter.from_([1, 2]).zip([10, 20]).into(list)
|
|
64
|
+
[(1, 10), (2, 20)]
|
|
65
|
+
>>> pc.Iter.from_(["a", "b"]).zip([1, 2, 3]).into(list)
|
|
66
|
+
[('a', 1), ('b', 2)]
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
"""
|
|
70
|
+
return self.apply(zip, *others, strict=strict)
|
|
71
|
+
|
|
72
|
+
def zip_offset[U](
|
|
73
|
+
self,
|
|
74
|
+
*others: Iterable[T],
|
|
75
|
+
offsets: list[int],
|
|
76
|
+
longest: bool = False,
|
|
77
|
+
fillvalue: U = None,
|
|
78
|
+
) -> Iter[tuple[T | U, ...]]:
|
|
79
|
+
"""
|
|
80
|
+
Zip the input iterables together, but offset the i-th iterable by the i-th item in offsets.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
*others: Other iterables to zip with.
|
|
84
|
+
offsets: List of integers specifying the offsets for each iterable.
|
|
85
|
+
longest: Whether to continue until the longest iterable is exhausted. Defaults to False.
|
|
86
|
+
fillvalue: Value to use for missing elements. Defaults to None.
|
|
87
|
+
Example:
|
|
88
|
+
```python
|
|
89
|
+
>>> import pyochain as pc
|
|
90
|
+
>>> data = pc.Seq("0123")
|
|
91
|
+
>>> data.iter().zip_offset("abcdef", offsets=(0, 1)).into(list)
|
|
92
|
+
[('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
This can be used as a lightweight alternative to SciPy or pandas to analyze data sets in which some series have a lead or lag relationship.
|
|
96
|
+
|
|
97
|
+
By default, the sequence will end when the shortest iterable is exhausted.
|
|
98
|
+
|
|
99
|
+
To continue until the longest iterable is exhausted, set longest to True.
|
|
100
|
+
```python
|
|
101
|
+
>>> data.iter().zip_offset("abcdef", offsets=(0, 1), longest=True).into(list)
|
|
102
|
+
[('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def _zip_offset(data: Iterable[T]) -> Iterator[tuple[T | U, ...]]:
|
|
108
|
+
return mit.zip_offset(
|
|
109
|
+
data,
|
|
110
|
+
*others,
|
|
111
|
+
offsets=offsets,
|
|
112
|
+
longest=longest,
|
|
113
|
+
fillvalue=fillvalue,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return self.apply(_zip_offset)
|
|
117
|
+
|
|
118
|
+
@overload
|
|
119
|
+
def zip_broadcast[T1](
|
|
120
|
+
self,
|
|
121
|
+
iter1: Iterable[T1],
|
|
122
|
+
/,
|
|
123
|
+
*,
|
|
124
|
+
strict: bool = False,
|
|
125
|
+
) -> Iter[tuple[T, T1]]: ...
|
|
126
|
+
@overload
|
|
127
|
+
def zip_broadcast[T1, T2](
|
|
128
|
+
self,
|
|
129
|
+
iter1: Iterable[T1],
|
|
130
|
+
iter2: Iterable[T2],
|
|
131
|
+
/,
|
|
132
|
+
*,
|
|
133
|
+
strict: bool = False,
|
|
134
|
+
) -> Iter[tuple[T, T1, T2]]: ...
|
|
135
|
+
@overload
|
|
136
|
+
def zip_broadcast[T1, T2, T3](
|
|
137
|
+
self,
|
|
138
|
+
iter1: Iterable[T1],
|
|
139
|
+
iter2: Iterable[T2],
|
|
140
|
+
iter3: Iterable[T3],
|
|
141
|
+
/,
|
|
142
|
+
*,
|
|
143
|
+
strict: bool = False,
|
|
144
|
+
) -> Iter[tuple[T, T1, T2, T3]]: ...
|
|
145
|
+
@overload
|
|
146
|
+
def zip_broadcast[T1, T2, T3, T4](
|
|
147
|
+
self,
|
|
148
|
+
iter1: Iterable[T1],
|
|
149
|
+
iter2: Iterable[T2],
|
|
150
|
+
iter3: Iterable[T3],
|
|
151
|
+
iter4: Iterable[T4],
|
|
152
|
+
/,
|
|
153
|
+
*,
|
|
154
|
+
strict: bool = False,
|
|
155
|
+
) -> Iter[tuple[T, T1, T2, T3, T4]]: ...
|
|
156
|
+
def zip_broadcast(
|
|
157
|
+
self, *others: Iterable[Any], strict: bool = False
|
|
158
|
+
) -> Iter[tuple[Any, ...]]:
|
|
159
|
+
"""
|
|
160
|
+
Version of zip that "broadcasts" any scalar (i.e., non-iterable) items into output tuples.
|
|
161
|
+
|
|
162
|
+
`str` and `bytes` are not treated as iterables.
|
|
163
|
+
|
|
164
|
+
If the strict keyword argument is True, then UnequalIterablesError will be raised if any of the iterables have different lengths.
|
|
165
|
+
Args:
|
|
166
|
+
*others: Other iterables or scalars to zip with.
|
|
167
|
+
strict: Whether to enforce equal lengths of iterables. Defaults to False.
|
|
168
|
+
Example:
|
|
169
|
+
```python
|
|
170
|
+
>>> import pyochain as pc
|
|
171
|
+
>>> data = pc.Iter.from_([1, 2, 3])
|
|
172
|
+
>>> other = ["a", "b", "c"]
|
|
173
|
+
>>> scalar = "_"
|
|
174
|
+
>>> data.zip_broadcast(other, scalar).into(list)
|
|
175
|
+
[(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')]
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def _zip_broadcast(
|
|
181
|
+
*objects: Iterable[Any],
|
|
182
|
+
) -> Generator[tuple[Iterable[Any], ...] | tuple[object, ...], Any, None]:
|
|
183
|
+
"""from more_itertools.zip_broadcast"""
|
|
184
|
+
|
|
185
|
+
def is_scalar(obj: Any) -> bool:
|
|
186
|
+
if isinstance(obj, (str, bytes)):
|
|
187
|
+
return True
|
|
188
|
+
try:
|
|
189
|
+
iter(obj)
|
|
190
|
+
except TypeError:
|
|
191
|
+
return True
|
|
192
|
+
else:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
size = len(objects)
|
|
196
|
+
if not size:
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
new_item: list[object] = [None] * size
|
|
200
|
+
iterables: list[Iterator[Any]] = []
|
|
201
|
+
iterable_positions: list[int] = []
|
|
202
|
+
for i, obj in enumerate(objects):
|
|
203
|
+
if is_scalar(obj):
|
|
204
|
+
new_item[i] = obj
|
|
205
|
+
else:
|
|
206
|
+
iterables.append(iter(obj))
|
|
207
|
+
iterable_positions.append(i)
|
|
208
|
+
|
|
209
|
+
if not iterables:
|
|
210
|
+
yield tuple(objects)
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
zipper = mit.zip_equal if strict else zip
|
|
214
|
+
for item in zipper(*iterables):
|
|
215
|
+
for i, new_item[i] in zip(iterable_positions, item):
|
|
216
|
+
pass
|
|
217
|
+
yield tuple(new_item)
|
|
218
|
+
|
|
219
|
+
return self.apply(_zip_broadcast, *others)
|
|
220
|
+
|
|
221
|
+
@overload
|
|
222
|
+
def zip_equal(self) -> Iter[tuple[T]]: ...
|
|
223
|
+
@overload
|
|
224
|
+
def zip_equal[T2](self, __iter2: Iterable[T2]) -> Iter[tuple[T, T2]]: ...
|
|
225
|
+
@overload
|
|
226
|
+
def zip_equal[T2, T3](
|
|
227
|
+
self, __iter2: Iterable[T2], __iter3: Iterable[T3]
|
|
228
|
+
) -> Iter[tuple[T, T2, T3]]: ...
|
|
229
|
+
@overload
|
|
230
|
+
def zip_equal[T2, T3, T4](
|
|
231
|
+
self,
|
|
232
|
+
__iter2: Iterable[T2],
|
|
233
|
+
__iter3: Iterable[T3],
|
|
234
|
+
__iter4: Iterable[T4],
|
|
235
|
+
) -> Iter[tuple[T, T2, T3, T4]]: ...
|
|
236
|
+
@overload
|
|
237
|
+
def zip_equal[T2, T3, T4, T5](
|
|
238
|
+
self,
|
|
239
|
+
__iter2: Iterable[T2],
|
|
240
|
+
__iter3: Iterable[T3],
|
|
241
|
+
__iter4: Iterable[T4],
|
|
242
|
+
__iter5: Iterable[T5],
|
|
243
|
+
) -> Iter[tuple[T, T2, T3, T4, T5]]: ...
|
|
244
|
+
def zip_equal(self, *others: Iterable[Any]) -> Iter[tuple[Any, ...]]:
|
|
245
|
+
"""
|
|
246
|
+
`zip` the input *iterables* together but raise `UnequalIterablesError` if they aren't all the same length.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
*others: Other iterables to zip with.
|
|
250
|
+
Example:
|
|
251
|
+
```python
|
|
252
|
+
>>> import pyochain as pc
|
|
253
|
+
>>> pc.Iter.from_(range(3)).zip_equal("abc").into(list)
|
|
254
|
+
[(0, 'a'), (1, 'b'), (2, 'c')]
|
|
255
|
+
>>> pc.Iter.from_(range(3)).zip_equal("abcd").into(list)
|
|
256
|
+
... # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
257
|
+
Traceback (most recent call last):
|
|
258
|
+
...
|
|
259
|
+
more_itertools.more.UnequalIterablesError: Iterables have different
|
|
260
|
+
lengths
|
|
261
|
+
|
|
262
|
+
```
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
def _zip_equal(data: Iterable[T]) -> Iterator[tuple[Any, ...]]:
|
|
266
|
+
return mit.zip_equal(data, *others)
|
|
267
|
+
|
|
268
|
+
return self.apply(_zip_equal)
|
|
269
|
+
|
|
270
|
+
def zip_longest[U](
|
|
271
|
+
self, *others: Iterable[T], fill_value: U = None
|
|
272
|
+
) -> Iter[tuple[U | T, ...]]:
|
|
273
|
+
"""
|
|
274
|
+
Zip with other iterables, filling missing values.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
*others: Other iterables to zip with.
|
|
278
|
+
fill_value: Value to use for missing elements. Defaults to None.
|
|
279
|
+
Example:
|
|
280
|
+
```python
|
|
281
|
+
>>> import pyochain as pc
|
|
282
|
+
>>> pc.Iter.from_([1, 2]).zip_longest([10], fill_value=0).into(list)
|
|
283
|
+
[(1, 10), (2, 0)]
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
"""
|
|
287
|
+
return self.apply(itertools.zip_longest, *others, fillvalue=fill_value)
|
|
288
|
+
|
|
289
|
+
@overload
|
|
290
|
+
def product(self) -> Iter[tuple[T]]: ...
|
|
291
|
+
@overload
|
|
292
|
+
def product[T1](self, iter1: Iterable[T1], /) -> Iter[tuple[T, T1]]: ...
|
|
293
|
+
@overload
|
|
294
|
+
def product[T1, T2](
|
|
295
|
+
self, iter1: Iterable[T1], iter2: Iterable[T2], /
|
|
296
|
+
) -> Iter[tuple[T, T1, T2]]: ...
|
|
297
|
+
@overload
|
|
298
|
+
def product[T1, T2, T3](
|
|
299
|
+
self, iter1: Iterable[T1], iter2: Iterable[T2], iter3: Iterable[T3], /
|
|
300
|
+
) -> Iter[tuple[T, T1, T2, T3]]: ...
|
|
301
|
+
@overload
|
|
302
|
+
def product[T1, T2, T3, T4](
|
|
303
|
+
self,
|
|
304
|
+
iter1: Iterable[T1],
|
|
305
|
+
iter2: Iterable[T2],
|
|
306
|
+
iter3: Iterable[T3],
|
|
307
|
+
iter4: Iterable[T4],
|
|
308
|
+
/,
|
|
309
|
+
) -> Iter[tuple[T, T1, T2, T3, T4]]: ...
|
|
310
|
+
|
|
311
|
+
def product(self, *others: Iterable[Any]) -> Iter[tuple[Any, ...]]:
|
|
312
|
+
"""
|
|
313
|
+
Computes the Cartesian product with another iterable.
|
|
314
|
+
This is the declarative equivalent of nested for-loops.
|
|
315
|
+
|
|
316
|
+
It pairs every element from the source iterable with every element from the
|
|
317
|
+
other iterable.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
*others: Other iterables to compute the Cartesian product with.
|
|
321
|
+
Example:
|
|
322
|
+
```python
|
|
323
|
+
>>> import pyochain as pc
|
|
324
|
+
>>> colors = pc.Iter.from_(["blue", "red"])
|
|
325
|
+
>>> sizes = ["S", "M"]
|
|
326
|
+
>>> colors.product(sizes).into(list)
|
|
327
|
+
[('blue', 'S'), ('blue', 'M'), ('red', 'S'), ('red', 'M')]
|
|
328
|
+
|
|
329
|
+
```
|
|
330
|
+
"""
|
|
331
|
+
return self.apply(itertools.product, *others)
|
|
332
|
+
|
|
333
|
+
def diff_at(
|
|
334
|
+
self,
|
|
335
|
+
*others: Iterable[T],
|
|
336
|
+
default: T | None = None,
|
|
337
|
+
key: Callable[[T], Any] | None = None,
|
|
338
|
+
) -> Iter[tuple[T, ...]]:
|
|
339
|
+
"""
|
|
340
|
+
Return those items that differ between iterables.
|
|
341
|
+
Each output item is a tuple where the i-th element is from the i-th input iterable.
|
|
342
|
+
|
|
343
|
+
If an input iterable is exhausted before others, then the corresponding output items will be filled with *default*.
|
|
344
|
+
Args:
|
|
345
|
+
*others: Other iterables to compare with.
|
|
346
|
+
default: Value to use for missing elements. Defaults to None.
|
|
347
|
+
key: Function to apply to each item for comparison. Defaults to None.
|
|
348
|
+
Example:
|
|
349
|
+
```python
|
|
350
|
+
>>> import pyochain as pc
|
|
351
|
+
>>> data = pc.Seq([1, 2, 3])
|
|
352
|
+
>>> data.iter().diff_at([1, 2, 10, 100], default=None).into(list)
|
|
353
|
+
[(3, 10), (None, 100)]
|
|
354
|
+
>>> data.iter().diff_at([1, 2, 10, 100, 2, 6, 7], default=0).into(list)
|
|
355
|
+
[(3, 10), (0, 100), (0, 2), (0, 6), (0, 7)]
|
|
356
|
+
|
|
357
|
+
A key function may also be applied to each item to use during comparisons:
|
|
358
|
+
```python
|
|
359
|
+
>>> import pyochain as pc
|
|
360
|
+
>>> pc.Iter.from_(["apples", "bananas"]).diff_at(
|
|
361
|
+
... ["Apples", "Oranges"], key=str.lower
|
|
362
|
+
... ).into(list)
|
|
363
|
+
[('bananas', 'Oranges')]
|
|
364
|
+
|
|
365
|
+
```
|
|
366
|
+
"""
|
|
367
|
+
return self.apply(cz.itertoolz.diff, *others, default=default, key=key)
|
|
368
|
+
|
|
369
|
+
def join[R, K](
|
|
370
|
+
self,
|
|
371
|
+
other: Iterable[R],
|
|
372
|
+
left_on: Callable[[T], K],
|
|
373
|
+
right_on: Callable[[R], K],
|
|
374
|
+
left_default: T | None = None,
|
|
375
|
+
right_default: R | None = None,
|
|
376
|
+
) -> Iter[tuple[T, R]]:
|
|
377
|
+
"""
|
|
378
|
+
Perform a relational join with another iterable.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
other: Iterable to join with.
|
|
382
|
+
left_on: Function to extract the join key from the left iterable.
|
|
383
|
+
right_on: Function to extract the join key from the right iterable.
|
|
384
|
+
left_default: Default value for missing elements in the left iterable. Defaults to None.
|
|
385
|
+
right_default: Default value for missing elements in the right iterable. Defaults to None.
|
|
386
|
+
Example:
|
|
387
|
+
```python
|
|
388
|
+
>>> import pyochain as pc
|
|
389
|
+
>>> colors = pc.Iter.from_(["blue", "red"])
|
|
390
|
+
>>> sizes = ["S", "M"]
|
|
391
|
+
>>> colors.join(sizes, left_on=lambda c: c, right_on=lambda s: s).into(list)
|
|
392
|
+
[(None, 'S'), (None, 'M'), ('blue', None), ('red', None)]
|
|
393
|
+
|
|
394
|
+
```
|
|
395
|
+
"""
|
|
396
|
+
|
|
397
|
+
def _join(data: Iterable[T]) -> Iterator[tuple[T, R]]:
|
|
398
|
+
return cz.itertoolz.join(
|
|
399
|
+
leftkey=left_on,
|
|
400
|
+
leftseq=data,
|
|
401
|
+
rightkey=right_on,
|
|
402
|
+
rightseq=other,
|
|
403
|
+
left_default=left_default,
|
|
404
|
+
right_default=right_default,
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return self.apply(_join)
|