pyochain 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyochain might be problematic. Click here for more details.

@@ -0,0 +1,306 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ from collections.abc import Callable, Generator, Iterable
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ import more_itertools as mit
8
+
9
+ from .._core import IterWrapper
10
+
11
+ if TYPE_CHECKING:
12
+ from ._main import Iter
13
+
14
+
15
+ class BaseList[T](IterWrapper[T]):
16
+ def implode(self) -> Iter[list[T]]:
17
+ """
18
+ Wrap each element in the iterable into a list.
19
+
20
+ Syntactic sugar for `Iter.map(lambda x: [x])`.
21
+ ```python
22
+ >>> import pyochain as pc
23
+ >>> pc.Iter.from_(range(5)).implode().into(list)
24
+ [[0], [1], [2], [3], [4]]
25
+
26
+ ```
27
+ """
28
+
29
+ def _implode(data: Iterable[T]) -> Generator[list[T], None, None]:
30
+ return ([x] for x in data)
31
+
32
+ return self.apply(_implode)
33
+
34
+ def split_at(
35
+ self,
36
+ pred: Callable[[T], bool],
37
+ maxsplit: int = -1,
38
+ keep_separator: bool = False,
39
+ ) -> Iter[list[T]]:
40
+ """
41
+ Yield lists of items from iterable, where each list is delimited by an item where callable pred returns True.
42
+
43
+ Args:
44
+ pred: Function to determine the split points.
45
+ maxsplit: Maximum number of splits to perform. Defaults to -1 (no limit).
46
+ keep_separator: Whether to include the separator in the output. Defaults to False.
47
+ Example:
48
+ ```python
49
+ >>> import pyochain as pc
50
+ >>> pc.Iter.from_("abcdcba").split_at(lambda x: x == "b").into(list)
51
+ [['a'], ['c', 'd', 'c'], ['a']]
52
+ >>> pc.Iter.from_(range(10)).split_at(lambda n: n % 2 == 1).into(list)
53
+ [[0], [2], [4], [6], [8], []]
54
+
55
+ At most *maxsplit* splits are done.
56
+
57
+ If *maxsplit* is not specified or -1, then there is no limit on the number of splits:
58
+ ```python
59
+ >>> pc.Iter.from_(range(10)).split_at(lambda n: n % 2 == 1, maxsplit=2).into(
60
+ ... list
61
+ ... )
62
+ [[0], [2], [4, 5, 6, 7, 8, 9]]
63
+
64
+ ```
65
+ By default, the delimiting items are not included in the output.
66
+
67
+ To include them, set *keep_separator* to `True`.
68
+ ```python
69
+ >>> def cond(x: str) -> bool:
70
+ ... return x == "b"
71
+ >>> pc.Iter.from_("abcdcba").split_at(cond, keep_separator=True).into(list)
72
+ [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
73
+
74
+ ```
75
+ """
76
+ return self.apply(mit.split_at, pred, maxsplit, keep_separator)
77
+
78
+ def split_after(
79
+ self, predicate: Callable[[T], bool], max_split: int = -1
80
+ ) -> Iter[list[T]]:
81
+ """
82
+ Yield lists of items from iterable, where each list ends with an item where callable pred returns True.
83
+
84
+ Args:
85
+ predicate: Function to determine the split points.
86
+ max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
87
+ Example:
88
+ ```python
89
+ >>> import pyochain as pc
90
+ >>> pc.Iter.from_("one1two2").split_after(str.isdigit).into(list)
91
+ [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
92
+
93
+ >>> def cond(n: int) -> bool:
94
+ ... return n % 3 == 0
95
+ >>>
96
+ >>> pc.Iter.from_(range(10)).split_after(cond).into(list)
97
+ [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
98
+ >>> pc.Iter.from_(range(10)).split_after(cond, max_split=2).into(list)
99
+ [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
100
+
101
+ ```
102
+ """
103
+ return self.apply(mit.split_after, predicate, max_split)
104
+
105
+ def split_before(
106
+ self, predicate: Callable[[T], bool], max_split: int = -1
107
+ ) -> Iter[list[T]]:
108
+ """
109
+ Yield lists of items from iterable, where each list ends with an item where callable pred returns True.
110
+
111
+ Args:
112
+ predicate: Function to determine the split points.
113
+ max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
114
+ Example:
115
+ ```python
116
+ >>> import pyochain as pc
117
+ >>> pc.Iter.from_("abcdcba").split_before(lambda x: x == "b").into(list)
118
+ [['a'], ['b', 'c', 'd', 'c'], ['b', 'a']]
119
+ >>>
120
+ >>> def cond(n: int) -> bool:
121
+ ... return n % 2 == 1
122
+ >>>
123
+ >>> pc.Iter.from_(range(10)).split_before(cond).into(list)
124
+ [[0], [1, 2], [3, 4], [5, 6], [7, 8], [9]]
125
+
126
+ ```
127
+ At most *max_split* splits are done.
128
+
129
+ If *max_split* is not specified or -1, then there is no limit on the number of splits:
130
+ ```python
131
+ >>> pc.Iter.from_(range(10)).split_before(cond, max_split=2).into(list)
132
+ [[0], [1, 2], [3, 4, 5, 6, 7, 8, 9]]
133
+
134
+ ```
135
+ """
136
+ return self.apply(mit.split_before, predicate, max_split)
137
+
138
+ def split_into(self, sizes: Iterable[int | None]) -> Iter[list[T]]:
139
+ """
140
+ Yield a list of sequential items from iterable of length 'n' for each integer 'n' in sizes.
141
+
142
+ Args:
143
+ sizes: Iterable of integers specifying the sizes of each chunk. Use None for the remainder.
144
+ Example:
145
+ ```python
146
+ >>> import pyochain as pc
147
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).split_into([1, 2, 3]).into(list)
148
+ [[1], [2, 3], [4, 5, 6]]
149
+
150
+ If the sum of sizes is smaller than the length of iterable, then the remaining items of iterable will not be returned.
151
+ ```python
152
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).split_into([2, 3]).into(list)
153
+ [[1, 2], [3, 4, 5]]
154
+
155
+ ```
156
+
157
+ If the sum of sizes is larger than the length of iterable:
158
+
159
+ - fewer items will be returned in the iteration that overruns the iterable
160
+ - further lists will be empty
161
+ ```python
162
+ >>> pc.Iter.from_([1, 2, 3, 4]).split_into([1, 2, 3, 4]).into(list)
163
+ [[1], [2, 3], [4], []]
164
+
165
+ ```
166
+
167
+ When a None object is encountered in sizes, the returned list will contain items up to the end of iterable the same way that itertools.slice does:
168
+ ```python
169
+ >>> data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
170
+ >>> pc.Iter.from_(data).split_into([2, 3, None]).into(list)
171
+ [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
172
+
173
+ ```
174
+
175
+ split_into can be useful for grouping a series of items where the sizes of the groups are not uniform.
176
+
177
+ An example would be where in a row from a table:
178
+
179
+ - multiple columns represent elements of the same feature (e.g. a point represented by x,y,z)
180
+ - the format is not the same for all columns.
181
+ """
182
+ return self.apply(mit.split_into, sizes)
183
+
184
+ def split_when(
185
+ self, predicate: Callable[[T, T], bool], max_split: int = -1
186
+ ) -> Iter[list[T]]:
187
+ """
188
+ Split iterable into pieces based on the output of a predicate function.
189
+
190
+ Args:
191
+ predicate: Function that takes successive pairs of items and returns True if the iterable should be split.
192
+ max_split: Maximum number of splits to perform. Defaults to -1 (no limit).
193
+
194
+ For example, to find runs of increasing numbers, split the iterable when element i is larger than element i + 1:
195
+ ```python
196
+ >>> import pyochain as pc
197
+ >>> data = pc.Seq([1, 2, 3, 3, 2, 5, 2, 4, 2])
198
+ >>> data.iter().split_when(lambda x, y: x > y).into(list)
199
+ [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
200
+
201
+ ```
202
+
203
+ At most max_split splits are done.
204
+
205
+ If max_split is not specified or -1, then there is no limit on the number of splits:
206
+ ```python
207
+ >>> data.iter().split_when(lambda x, y: x > y, max_split=2).into(list)
208
+ [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
209
+
210
+ ```
211
+ """
212
+ return self.apply(mit.split_when, predicate, max_split)
213
+
214
+ def chunks(self, n: int, strict: bool = False) -> Iter[list[T]]:
215
+ """
216
+ Break iterable into lists of length n.
217
+
218
+ By default, the last yielded list will have fewer than *n* elements if the length of *iterable* is not divisible by *n*.
219
+
220
+ To use a fill-in value instead, see the :func:`grouper` recipe.
221
+
222
+ If:
223
+
224
+ - the length of *iterable* is not divisible by *n*
225
+ - *strict* is `True`
226
+
227
+ then `ValueError` will be raised before the last list is yielded.
228
+ Args:
229
+ n: Number of elements in each chunk.
230
+ strict: Whether to raise an error if the last chunk is smaller than n. Defaults to False.
231
+ Example:
232
+ ```python
233
+ >>> import pyochain as pc
234
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6]).chunks(3).into(list)
235
+ [[1, 2, 3], [4, 5, 6]]
236
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6, 7, 8]).chunks(3).into(list)
237
+ [[1, 2, 3], [4, 5, 6], [7, 8]]
238
+
239
+ ```
240
+ """
241
+ return self.apply(mit.chunked, n, strict)
242
+
243
+ def chunks_even(self, n: int) -> Iter[list[T]]:
244
+ """
245
+ Break iterable into lists of approximately length n.
246
+
247
+ Items are distributed such the lengths of the lists differ by at most 1 item.
248
+ Args:
249
+ n: Approximate number of elements in each chunk.
250
+ Example:
251
+ ```python
252
+ >>> import pyochain as pc
253
+ >>> iterable = pc.Seq([1, 2, 3, 4, 5, 6, 7])
254
+ >>> iterable.iter().chunks_even(3).into(list) # List lengths: 3, 2, 2
255
+ [[1, 2, 3], [4, 5], [6, 7]]
256
+ >>> iterable.iter().chunks(3).into(list) # List lengths: 3, 3, 1
257
+ [[1, 2, 3], [4, 5, 6], [7]]
258
+
259
+ ```
260
+ """
261
+ return self.apply(mit.chunked_even, n)
262
+
263
+ def unique_to_each[U: Iterable[Any]](self: IterWrapper[U]) -> Iter[list[U]]:
264
+ """
265
+ Return the elements from each of the iterables that aren't in the other iterables.
266
+
267
+ For example, suppose you have a set of packages, each with a set of dependencies:
268
+
269
+ **{'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}**
270
+
271
+ If you remove one package, which dependencies can also be removed?
272
+
273
+ If pkg_1 is removed, then A is no longer necessary - it is not associated with pkg_2 or pkg_3.
274
+
275
+ Similarly, C is only needed for pkg_2, and D is only needed for pkg_3:
276
+ ```python
277
+ >>> import pyochain as pc
278
+ >>> data = ({"A", "B"}, {"B", "C"}, {"B", "D"})
279
+ >>> pc.Iter.from_(data).unique_to_each().collect().unwrap()
280
+ [['A'], ['C'], ['D']]
281
+
282
+ ```
283
+
284
+ If there are duplicates in one input iterable that aren't in the others they will be duplicated in the output.
285
+
286
+ Input order is preserved:
287
+ ```python
288
+ >>> data = ("mississippi", "missouri")
289
+ >>> pc.Iter.from_(data).unique_to_each().collect().unwrap()
290
+ [['p', 'p'], ['o', 'u', 'r']]
291
+
292
+ ```
293
+
294
+ It is assumed that the elements of each iterable are hashable.
295
+ """
296
+
297
+ from collections import Counter
298
+
299
+ def _unique_to_each(data: Iterable[U]) -> Generator[list[U], None, None]:
300
+ """from more_itertools.unique_to_each"""
301
+ pool: list[Iterable[U]] = [it for it in data]
302
+ counts: Counter[U] = Counter(itertools.chain.from_iterable(map(set, pool)))
303
+ uniques: set[U] = {element for element in counts if counts[element] == 1}
304
+ return ((list(filter(uniques.__contains__, it))) for it in pool)
305
+
306
+ return self.apply(_unique_to_each)
@@ -0,0 +1,224 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Collection, Generator, Iterable, Iterator
4
+ from typing import TYPE_CHECKING, Any, Concatenate
5
+
6
+ from ._aggregations import BaseAgg
7
+ from ._booleans import BaseBool
8
+ from ._constructors import IterConstructors
9
+ from ._eager import BaseEager
10
+ from ._filters import BaseFilter
11
+ from ._groups import BaseGroups
12
+ from ._joins import BaseJoins
13
+ from ._lists import BaseList
14
+ from ._maps import BaseMap
15
+ from ._partitions import BasePartitions
16
+ from ._process import BaseProcess
17
+ from ._rolling import BaseRolling
18
+ from ._tuples import BaseTuples
19
+
20
+ if TYPE_CHECKING:
21
+ from .._dict import Dict
22
+
23
+
24
+ class Iter[T](
25
+ BaseAgg[T],
26
+ BaseBool[T],
27
+ BaseFilter[T],
28
+ BaseProcess[T],
29
+ BaseMap[T],
30
+ BaseRolling[T],
31
+ BaseList[T],
32
+ BaseTuples[T],
33
+ BasePartitions[T],
34
+ BaseJoins[T],
35
+ BaseGroups[T],
36
+ BaseEager[T],
37
+ IterConstructors,
38
+ ):
39
+ """
40
+ A wrapper around Python's built-in iterable types, providing a rich set of functional programming tools.
41
+
42
+ It supports lazy evaluation, allowing for efficient processing of large datasets.
43
+
44
+ It is not a collection itself, but a wrapper that provides additional methods for working with iterables.
45
+
46
+ It can be constructed from any iterable, including `lists`, `tuples`, `sets`, and `generators`.
47
+ """
48
+
49
+ __slots__ = ("_data",)
50
+
51
+ def __init__(self, data: Iterator[T] | Generator[T, Any, Any]) -> None:
52
+ self._data = data
53
+
54
+ def __repr__(self) -> str:
55
+ return f"{self.__class__.__name__}({self.unwrap().__repr__()})"
56
+
57
+ def itr[**P, R, U: Iterable[Any]](
58
+ self: Iter[U],
59
+ func: Callable[Concatenate[Iter[U], P], R],
60
+ *args: P.args,
61
+ **kwargs: P.kwargs,
62
+ ) -> Iter[R]:
63
+ """
64
+ Apply a function to each element after wrapping it in an Iter.
65
+
66
+ This is a convenience method for the common pattern of mapping a function over an iterable of iterables.
67
+
68
+ Args:
69
+ func: Function to apply to each wrapped element.
70
+ *args: Positional arguments to pass to the function.
71
+ **kwargs: Keyword arguments to pass to the function.
72
+ Example:
73
+ ```python
74
+ >>> import pyochain as pc
75
+ >>> data = [
76
+ ... [1, 2, 3],
77
+ ... [4, 5],
78
+ ... [6, 7, 8, 9],
79
+ ... ]
80
+ >>> pc.Iter.from_(data).itr(
81
+ ... lambda x: x.repeat(2).flatten().reduce(lambda a, b: a + b)
82
+ ... ).into(list)
83
+ [12, 18, 60]
84
+
85
+ ```
86
+ """
87
+
88
+ def _itr(data: Iterable[U]) -> Generator[R, None, None]:
89
+ return (func(Iter.from_(x), *args, **kwargs) for x in data)
90
+
91
+ return self.apply(_itr)
92
+
93
+ def struct[**P, R, K, V](
94
+ self: Iter[dict[K, V]],
95
+ func: Callable[Concatenate[Dict[K, V], P], R],
96
+ *args: P.args,
97
+ **kwargs: P.kwargs,
98
+ ) -> Iter[R]:
99
+ """
100
+ Apply a function to each element after wrapping it in a Dict.
101
+
102
+ This is a convenience method for the common pattern of mapping a function over an iterable of dictionaries.
103
+ Args:
104
+ func: Function to apply to each wrapped dictionary.
105
+ *args: Positional arguments to pass to the function.
106
+ **kwargs: Keyword arguments to pass to the function.
107
+ Example:
108
+ ```python
109
+ >>> from typing import Any
110
+ >>> import pyochain as pc
111
+
112
+ >>> data: list[dict[str, Any]] = [
113
+ ... {"name": "Alice", "age": 30, "city": "New York"},
114
+ ... {"name": "Bob", "age": 25, "city": "Los Angeles"},
115
+ ... {"name": "Charlie", "age": 35, "city": "New York"},
116
+ ... {"name": "David", "age": 40, "city": "Paris"},
117
+ ... ]
118
+ >>>
119
+ >>> def to_title(d: pc.Dict[str, Any]) -> pc.Dict[str, Any]:
120
+ ... return d.map_keys(lambda k: k.title())
121
+ >>> def is_young(d: pc.Dict[str, Any]) -> bool:
122
+ ... return d.unwrap().get("Age", 0) < 30
123
+ >>> def set_continent(d: pc.Dict[str, Any], value: str) -> dict[str, Any]:
124
+ ... return d.with_key("Continent", value).unwrap()
125
+ >>>
126
+ >>> pc.Iter.from_(data).struct(to_title).filter_false(is_young).map(
127
+ ... lambda d: d.drop("Age").with_key("Continent", "NA")
128
+ ... ).map_if(
129
+ ... lambda d: d.unwrap().get("City") == "Paris",
130
+ ... lambda d: set_continent(d, "Europe"),
131
+ ... lambda d: set_continent(d, "America"),
132
+ ... ).group_by(lambda d: d.get("Continent")).map_values(
133
+ ... lambda d: pc.Iter.from_(d)
134
+ ... .struct(lambda d: d.drop("Continent").unwrap())
135
+ ... .into(list)
136
+ ... ) # doctest: +NORMALIZE_WHITESPACE
137
+ Dict({
138
+ 'America': [
139
+ {'Name': 'Alice', 'City': 'New York'},
140
+ {'Name': 'Charlie', 'City': 'New York'}
141
+ ],
142
+ 'Europe': [
143
+ {'Name': 'David', 'City': 'Paris'}
144
+ ]
145
+ })
146
+
147
+ ```
148
+ """
149
+ from .._dict import Dict
150
+
151
+ def _struct(data: Iterable[dict[K, V]]) -> Generator[R, None, None]:
152
+ return (func(Dict(x), *args, **kwargs) for x in data)
153
+
154
+ return self.apply(_struct)
155
+
156
+ def with_keys[K](self, keys: Iterable[K]) -> Dict[K, T]:
157
+ """
158
+ Create a Dict by zipping the iterable with keys.
159
+
160
+ Args:
161
+ keys: Iterable of keys to pair with the values.
162
+ Example:
163
+ ```python
164
+ >>> import pyochain as pc
165
+ >>> keys = ["a", "b", "c"]
166
+ >>> values = [1, 2, 3]
167
+ >>> pc.Iter.from_(values).with_keys(keys).unwrap()
168
+ {'a': 1, 'b': 2, 'c': 3}
169
+ >>> # This is equivalent to:
170
+ >>> pc.Iter.from_(keys).zip(values).pipe(
171
+ ... lambda x: pc.Dict(x.into(dict)).unwrap()
172
+ ... )
173
+ {'a': 1, 'b': 2, 'c': 3}
174
+
175
+ ```
176
+ """
177
+ from .._dict import Dict
178
+
179
+ return Dict(dict(zip(keys, self.unwrap())))
180
+
181
+ def with_values[V](self, values: Iterable[V]) -> Dict[T, V]:
182
+ """
183
+ Create a Dict by zipping the iterable with values.
184
+
185
+ Args:
186
+ values: Iterable of values to pair with the keys.
187
+ Example:
188
+ ```python
189
+ >>> import pyochain as pc
190
+ >>> keys = [1, 2, 3]
191
+ >>> values = ["a", "b", "c"]
192
+ >>> pc.Iter.from_(keys).with_values(values).unwrap()
193
+ {1: 'a', 2: 'b', 3: 'c'}
194
+ >>> # This is equivalent to:
195
+ >>> pc.Iter.from_(keys).zip(values).pipe(
196
+ ... lambda x: pc.Dict(x.into(dict)).unwrap()
197
+ ... )
198
+ {1: 'a', 2: 'b', 3: 'c'}
199
+
200
+ ```
201
+ """
202
+ from .._dict import Dict
203
+
204
+ return Dict(dict(zip(self.unwrap(), values)))
205
+
206
+
207
+ class Seq[T](BaseAgg[T], BaseEager[T]):
208
+ """
209
+ pyochain.Seq represent an in memory collection.
210
+
211
+ Provides a subset of pyochain.Iter methods with eager evaluation, and is the return type of pyochain.Iter.collect().
212
+ """
213
+
214
+ __slots__ = ("_data",)
215
+
216
+ def __init__(self, data: Collection[T]) -> None:
217
+ self._data = data
218
+
219
+ def iter(self) -> Iter[T]:
220
+ """
221
+ Get an iterator over the sequence.
222
+ Call this to switch to lazy evaluation.
223
+ """
224
+ return Iter.from_(self.unwrap())