pyochain 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterable, Sequence
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ import cytoolz as cz
7
+
8
+ from .._core import IterWrapper
9
+
10
+ if TYPE_CHECKING:
11
+ from .._dict import Dict
12
+
13
+
14
+ class BaseDict[T](IterWrapper[T]):
15
+ def with_keys[K](self, keys: Iterable[K]) -> Dict[K, T]:
16
+ """
17
+ Create a Dict by zipping the iterable with keys.
18
+
19
+ Args:
20
+ keys: Iterable of keys to pair with the values.
21
+ Example:
22
+ ```python
23
+ >>> import pyochain as pc
24
+ >>> keys = ["a", "b", "c"]
25
+ >>> values = [1, 2, 3]
26
+ >>> pc.Iter.from_(values).with_keys(keys).unwrap()
27
+ {'a': 1, 'b': 2, 'c': 3}
28
+ >>> # This is equivalent to:
29
+ >>> pc.Iter.from_(keys).zip(values).pipe(
30
+ ... lambda x: pc.Dict(x.into(dict)).unwrap()
31
+ ... )
32
+ {'a': 1, 'b': 2, 'c': 3}
33
+
34
+ ```
35
+ """
36
+ from .._dict import Dict
37
+
38
+ def _with_keys(data: Iterable[T]) -> Dict[K, T]:
39
+ return Dict(dict(zip(keys, data)))
40
+
41
+ return self.into(_with_keys)
42
+
43
+ def with_values[V](self, values: Iterable[V]) -> Dict[T, V]:
44
+ """
45
+ Create a Dict by zipping the iterable with values.
46
+
47
+ Args:
48
+ values: Iterable of values to pair with the keys.
49
+ Example:
50
+ ```python
51
+ >>> import pyochain as pc
52
+ >>> keys = [1, 2, 3]
53
+ >>> values = ["a", "b", "c"]
54
+ >>> pc.Iter.from_(keys).with_values(values).unwrap()
55
+ {1: 'a', 2: 'b', 3: 'c'}
56
+ >>> # This is equivalent to:
57
+ >>> pc.Iter.from_(keys).zip(values).pipe(
58
+ ... lambda x: pc.Dict(x.into(dict)).unwrap()
59
+ ... )
60
+ {1: 'a', 2: 'b', 3: 'c'}
61
+
62
+ ```
63
+ """
64
+ from .._dict import Dict
65
+
66
+ def _with_values(data: Iterable[T]) -> Dict[T, V]:
67
+ return Dict(dict(zip(data, values)))
68
+
69
+ return self.into(_with_values)
70
+
71
+ def reduce_by[K](
72
+ self, key: Callable[[T], K], binop: Callable[[T, T], T]
73
+ ) -> Dict[K, T]:
74
+ """
75
+ Perform a simultaneous groupby and reduction.
76
+
77
+ Args:
78
+ key: Function to compute the key for grouping.
79
+ binop: Binary operation to reduce the grouped elements.
80
+ Example:
81
+ ```python
82
+ >>> from collections.abc import Iterable
83
+ >>> import pyochain as pc
84
+ >>> from operator import add, mul
85
+ >>>
86
+ >>> def is_even(x: int) -> bool:
87
+ ... return x % 2 == 0
88
+ >>>
89
+ >>> def group_reduce(data: Iterable[int]) -> int:
90
+ ... return pc.Iter.from_(data).reduce(add)
91
+ >>>
92
+ >>> data = pc.Seq([1, 2, 3, 4, 5])
93
+ >>> data.iter().reduce_by(is_even, add).unwrap()
94
+ {False: 9, True: 6}
95
+ >>> data.iter().group_by(is_even).map_values(group_reduce).unwrap()
96
+ {False: 9, True: 6}
97
+
98
+ ```
99
+ But the former does not build the intermediate groups, allowing it to operate in much less space.
100
+
101
+ This makes it suitable for larger datasets that do not fit comfortably in memory
102
+
103
+ Simple Examples:
104
+ ```python
105
+ >>> pc.Iter.from_([1, 2, 3, 4, 5]).reduce_by(is_even, add).unwrap()
106
+ {False: 9, True: 6}
107
+ >>> pc.Iter.from_([1, 2, 3, 4, 5]).reduce_by(is_even, mul).unwrap()
108
+ {False: 15, True: 8}
109
+
110
+ ```
111
+ """
112
+ from .._dict import Dict
113
+
114
+ def _reduce_by(data: Iterable[T]) -> Dict[K, T]:
115
+ return Dict(cz.itertoolz.reduceby(key, binop, data))
116
+
117
+ return self.into(_reduce_by)
118
+
119
+ def group_by[K](self, on: Callable[[T], K]) -> Dict[K, list[T]]:
120
+ """
121
+ Group elements by key function and return a Dict result.
122
+
123
+ Args:
124
+ on: Function to compute the key for grouping.
125
+ Example:
126
+ ```python
127
+ >>> import pyochain as pc
128
+ >>> names = [
129
+ ... "Alice",
130
+ ... "Bob",
131
+ ... "Charlie",
132
+ ... "Dan",
133
+ ... "Edith",
134
+ ... "Frank",
135
+ ... ]
136
+ >>> pc.Iter.from_(names).group_by(len).sort()
137
+ ... # doctest: +NORMALIZE_WHITESPACE
138
+ {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
139
+ >>>
140
+ >>> iseven = lambda x: x % 2 == 0
141
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6, 7, 8]).group_by(iseven)
142
+ ... # doctest: +NORMALIZE_WHITESPACE
143
+ {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
144
+
145
+ ```
146
+ Non-callable keys imply grouping on a member.
147
+ ```python
148
+ >>> data = [
149
+ ... {"name": "Alice", "gender": "F"},
150
+ ... {"name": "Bob", "gender": "M"},
151
+ ... {"name": "Charlie", "gender": "M"},
152
+ ... ]
153
+ >>> pc.Iter.from_(data).group_by("gender").sort()
154
+ ... # doctest: +NORMALIZE_WHITESPACE
155
+ {'F': [{'gender': 'F', 'name': 'Alice'}],
156
+ 'M': [{'gender': 'M', 'name': 'Bob'}, {'gender': 'M', 'name': 'Charlie'}]}
157
+
158
+ ```
159
+ """
160
+ from .._dict import Dict
161
+
162
+ def _group_by(data: Iterable[T]) -> Dict[K, list[T]]:
163
+ return Dict(cz.itertoolz.groupby(on, data))
164
+
165
+ return self.into(_group_by)
166
+
167
+ def frequencies(self) -> Dict[T, int]:
168
+ """
169
+ Find number of occurrences of each value in the iterable.
170
+ ```python
171
+ >>> import pyochain as pc
172
+ >>> data = ["cat", "cat", "ox", "pig", "pig", "cat"]
173
+ >>> pc.Iter.from_(data).frequencies().unwrap()
174
+ {'cat': 3, 'ox': 1, 'pig': 2}
175
+
176
+ ```
177
+ """
178
+ from .._dict import Dict
179
+
180
+ def _frequencies(data: Iterable[T]) -> Dict[T, int]:
181
+ return Dict(cz.itertoolz.frequencies(data))
182
+
183
+ return self.into(_frequencies)
184
+
185
+ def count_by[K](self, key: Callable[[T], K]) -> Dict[K, int]:
186
+ """
187
+ Count elements of a collection by a key function.
188
+
189
+ Args:
190
+ key: Function to compute the key for counting.
191
+ Example:
192
+ ```python
193
+ >>> import pyochain as pc
194
+ >>> pc.Iter.from_(["cat", "mouse", "dog"]).count_by(len).unwrap()
195
+ {3: 2, 5: 1}
196
+ >>> def iseven(x):
197
+ ... return x % 2 == 0
198
+ >>> pc.Iter.from_([1, 2, 3]).count_by(iseven).unwrap()
199
+ {False: 2, True: 1}
200
+
201
+ ```
202
+ """
203
+ from .._dict import Dict
204
+
205
+ def _count_by(data: Iterable[T]) -> Dict[K, int]:
206
+ return Dict(cz.recipes.countby(key, data))
207
+
208
+ return self.into(_count_by)
209
+
210
+ def to_records[U: Sequence[Any]](self: BaseDict[U]) -> Dict[Any, Any]:
211
+ """
212
+ Transform an iterable of nested sequences into a nested dictionary.
213
+
214
+ - Each inner sequence represents a path to a value in the dictionary.
215
+ - The last element of each sequence is treated as the value
216
+ - All preceding elements are treated as keys leading to that value.
217
+
218
+ Example:
219
+ ```python
220
+ >>> import pyochain as pc
221
+ >>> arrays = [["a", "b", 1], ["a", "c", 2], ["d", 3]]
222
+ >>> pc.Seq(arrays).to_records().unwrap()
223
+ {'a': {'b': 1, 'c': 2}, 'd': 3}
224
+
225
+ ```
226
+ """
227
+ from .._dict import Dict
228
+
229
+ def _from_nested(
230
+ arrays: Iterable[Sequence[Any]], parent: dict[Any, Any] | None = None
231
+ ) -> dict[Any, Any]:
232
+ """from dictutils.pivot"""
233
+ d: dict[Any, Any] = parent or {}
234
+ for arr in arrays:
235
+ if len(arr) >= 2:
236
+ head, *tail = arr
237
+ if len(tail) == 1:
238
+ d[head] = tail[0]
239
+ else:
240
+ d[head] = _from_nested([tail], d.get(head, {}))
241
+ return d
242
+
243
+ return Dict(self.into(_from_nested))
@@ -0,0 +1,233 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterable, Sequence
4
+ from functools import partial
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ import cytoolz as cz
8
+
9
+ from .._core import IterWrapper, SupportsRichComparison
10
+
11
+ if TYPE_CHECKING:
12
+ from ._main import Seq
13
+
14
+
15
+ class BaseEager[T](IterWrapper[T]):
16
+ def sort[U: SupportsRichComparison[Any]](
17
+ self: BaseEager[U], reverse: bool = False, key: Callable[[U], Any] | None = None
18
+ ) -> Seq[U]:
19
+ """
20
+ Sort the elements of the sequence.
21
+
22
+ Note:
23
+ This method must consume the entire iterable to perform the sort.
24
+ The result is a new iterable over the sorted sequence.
25
+
26
+ Args:
27
+ reverse: Whether to sort in descending order. Defaults to False.
28
+ key: Function to extract a comparison key from each element. Defaults to None.
29
+ Example:
30
+ ```python
31
+ >>> import pyochain as pc
32
+ >>> pc.Iter.from_([3, 1, 2]).sort().into(list)
33
+ [1, 2, 3]
34
+
35
+ ```
36
+ """
37
+
38
+ def _sort(data: Iterable[U]) -> list[U]:
39
+ return sorted(data, reverse=reverse, key=key)
40
+
41
+ return self._eager(_sort)
42
+
43
+ def tail(self, n: int) -> Seq[T]:
44
+ """
45
+ Return a tuple of the last n elements.
46
+
47
+ Args:
48
+ n: Number of elements to return.
49
+ Example:
50
+ ```python
51
+ >>> import pyochain as pc
52
+ >>> pc.Iter.from_([1, 2, 3]).tail(2).unwrap()
53
+ (2, 3)
54
+
55
+ ```
56
+ """
57
+ return self._eager(partial(cz.itertoolz.tail, n))
58
+
59
+ def top_n(self, n: int, key: Callable[[T], Any] | None = None) -> Seq[T]:
60
+ """
61
+ Return a tuple of the top-n items according to key.
62
+
63
+ Args:
64
+ n: Number of top elements to return.
65
+ key: Function to extract a comparison key from each element. Defaults to None.
66
+ Example:
67
+ ```python
68
+ >>> import pyochain as pc
69
+ >>> pc.Iter.from_([1, 3, 2]).top_n(2).unwrap()
70
+ (3, 2)
71
+
72
+ ```
73
+ """
74
+ return self._eager(partial(cz.itertoolz.topk, n, key=key))
75
+
76
+ def union(self, *others: Iterable[T]) -> Seq[T]:
77
+ """
78
+ Return the union of this iterable and 'others'.
79
+
80
+ Note:
81
+ This method consumes inner data and removes duplicates.
82
+
83
+ Args:
84
+ *others: Other iterables to include in the union.
85
+ Example:
86
+ ```python
87
+ >>> import pyochain as pc
88
+ >>> pc.Iter.from_([1, 2, 2]).union([2, 3], [4]).iter().sort().unwrap()
89
+ [1, 2, 3, 4]
90
+
91
+ ```
92
+ """
93
+
94
+ def _union(data: Iterable[T]) -> list[T]:
95
+ return list(set(data).union(*others))
96
+
97
+ return self._eager(_union)
98
+
99
+ def intersection(self, *others: Iterable[T]) -> Seq[T]:
100
+ """
101
+ Return the elements common to this iterable and 'others'.
102
+
103
+ Note:
104
+ This method consumes inner data, unsorts it, and removes duplicates.
105
+
106
+ Args:
107
+ *others: Other iterables to intersect with.
108
+ Example:
109
+ ```python
110
+ >>> import pyochain as pc
111
+ >>> pc.Iter.from_([1, 2, 2]).intersection([2, 3], [2]).unwrap()
112
+ [2]
113
+
114
+ ```
115
+ """
116
+
117
+ def _intersection(data: Iterable[T]) -> list[T]:
118
+ return list(set(data).intersection(*others))
119
+
120
+ return self._eager(_intersection)
121
+
122
+ def diff_unique(self, *others: Iterable[T]) -> Seq[T]:
123
+ """
124
+ Return the difference of this iterable and 'others'.
125
+ (Elements in 'self' but not in 'others').
126
+
127
+ Note:
128
+ This method consumes inner data, unsorts it, and removes duplicates.
129
+
130
+ Args:
131
+ *others: Other iterables to subtract from this iterable.
132
+ Example:
133
+ ```python
134
+ >>> import pyochain as pc
135
+ >>> pc.Iter.from_([1, 2, 2]).diff_unique([2, 3]).unwrap()
136
+ [1]
137
+
138
+ ```
139
+ """
140
+
141
+ def _difference(data: Iterable[T]) -> list[T]:
142
+ return list(set(data).difference(*others))
143
+
144
+ return self._eager(_difference)
145
+
146
+ def diff_symmetric(self, *others: Iterable[T]) -> Seq[T]:
147
+ """
148
+ Return the symmetric difference (XOR) of this iterable and 'others'.
149
+
150
+ Note:
151
+ This method consumes inner data, unsorts it, and removes duplicates.
152
+
153
+ Args:
154
+ *others: Other iterables to compute the symmetric difference with.
155
+ Example:
156
+ ```python
157
+ >>> import pyochain as pc
158
+ >>> pc.Iter.from_([1, 2, 2]).diff_symmetric([2, 3]).iter().sort().unwrap()
159
+ [1, 3]
160
+ >>> pc.Iter.from_([1, 2, 3]).diff_symmetric([3, 4, 5]).iter().sort().unwrap()
161
+ [1, 2, 4, 5]
162
+
163
+ ```
164
+ """
165
+
166
+ def _symmetric_difference(data: Iterable[T]) -> list[T]:
167
+ return list(set(data).symmetric_difference(*others))
168
+
169
+ return self._eager(_symmetric_difference)
170
+
171
+ def most_common(self, n: int | None = None) -> Seq[tuple[T, int]]:
172
+ """
173
+ Return the n most common elements and their counts.
174
+
175
+ If n is None, then all elements are returned.
176
+
177
+ Args:
178
+ n: Number of most common elements to return. Defaults to None (all elements).
179
+ Example:
180
+ ```python
181
+ >>> import pyochain as pc
182
+ >>> pc.Iter.from_([1, 1, 2, 3, 3, 3]).most_common(2).unwrap()
183
+ [(3, 3), (1, 2)]
184
+
185
+ ```
186
+ """
187
+ from collections import Counter
188
+
189
+ def _most_common(data: Iterable[T]) -> list[tuple[T, int]]:
190
+ return Counter(data).most_common(n)
191
+
192
+ return self._eager(_most_common)
193
+
194
+ def rearrange[U: Sequence[Any]](self: BaseEager[U], *indices: int) -> Seq[list[U]]:
195
+ """
196
+ Rearrange elements in a given list of arrays by order indices.
197
+
198
+ The last element (value) always remains in place.
199
+
200
+ Args:
201
+ order: List of indices specifying new order of keys
202
+
203
+
204
+ Raises:
205
+ IndexError: If any index in order is out of range for the row
206
+
207
+ Example:
208
+ ```python
209
+ >>> import pyochain as pc
210
+ >>> data = pc.Seq([["A", "X", 1], ["A", "Y", 2], ["B", "X", 3], ["B", "Y", 4]])
211
+ >>> data.rearrange(1, 0).unwrap()
212
+ [['X', 'A', 1], ['Y', 'A', 2], ['X', 'B', 3], ['Y', 'B', 4]]
213
+
214
+ ```
215
+ """
216
+
217
+ def _rearrange(in_arrs: Iterable[U]) -> list[list[U]]:
218
+ """from dictutils.pivot"""
219
+ order = indices
220
+ out: list[list[U]] = []
221
+ for arr in in_arrs:
222
+ max_key_index: int = len(arr) - 2
223
+ for i in order:
224
+ if i < 0 or i > max_key_index:
225
+ raise IndexError(
226
+ f"order index {i} out of range for row with {max_key_index + 1} keys"
227
+ )
228
+
229
+ out.append([arr[i] for i in order] + [arr[-1]])
230
+
231
+ return out
232
+
233
+ return self._eager(_rearrange)