pyochain 0.5.1__py3-none-any.whl → 0.5.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyochain might be problematic. Click here for more details.

@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Concatenate
5
5
 
6
6
  import cytoolz as cz
7
7
 
8
- from .._core import MappingWrapper
8
+ from .._core import MappingWrapper, SupportsRichComparison
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from ._main import Dict
@@ -44,7 +44,7 @@ class ProcessDict[K, V](MappingWrapper[K, V]):
44
44
  func(k, v, *args, **kwargs)
45
45
  return data
46
46
 
47
- return self.apply(_for_each)
47
+ return self._new(_for_each)
48
48
 
49
49
  def update_in(
50
50
  self, *keys: K, func: Callable[[V], V], default: V | None = None
@@ -80,7 +80,11 @@ class ProcessDict[K, V](MappingWrapper[K, V]):
80
80
 
81
81
  ```
82
82
  """
83
- return self.apply(cz.dicttoolz.update_in, keys, func, default=default)
83
+
84
+ def _update_in(data: dict[K, V]) -> dict[K, V]:
85
+ return cz.dicttoolz.update_in(data, keys, func, default=default)
86
+
87
+ return self._new(_update_in)
84
88
 
85
89
  def with_key(self, key: K, value: V) -> Dict[K, V]:
86
90
  """
@@ -102,7 +106,11 @@ class ProcessDict[K, V](MappingWrapper[K, V]):
102
106
 
103
107
  ```
104
108
  """
105
- return self.apply(cz.dicttoolz.assoc, key, value)
109
+
110
+ def _with_key(data: dict[K, V]) -> dict[K, V]:
111
+ return cz.dicttoolz.assoc(data, key, value)
112
+
113
+ return self._new(_with_key)
106
114
 
107
115
  def drop(self, *keys: K) -> Dict[K, V]:
108
116
  """
@@ -125,7 +133,11 @@ class ProcessDict[K, V](MappingWrapper[K, V]):
125
133
 
126
134
  ```
127
135
  """
128
- return self.apply(cz.dicttoolz.dissoc, *keys)
136
+
137
+ def _drop(data: dict[K, V]) -> dict[K, V]:
138
+ return cz.dicttoolz.dissoc(data, *keys)
139
+
140
+ return self._new(_drop)
129
141
 
130
142
  def rename(self, mapping: Mapping[K, K]) -> Dict[K, V]:
131
143
  """
@@ -148,7 +160,7 @@ class ProcessDict[K, V](MappingWrapper[K, V]):
148
160
  def _rename(data: dict[K, V]) -> dict[K, V]:
149
161
  return {mapping.get(k, k): v for k, v in data.items()}
150
162
 
151
- return self.apply(_rename)
163
+ return self._new(_rename)
152
164
 
153
165
  def sort(self, reverse: bool = False) -> Dict[K, V]:
154
166
  """
@@ -168,4 +180,25 @@ class ProcessDict[K, V](MappingWrapper[K, V]):
168
180
  def _sort(data: dict[K, V]) -> dict[K, V]:
169
181
  return dict(sorted(data.items(), reverse=reverse))
170
182
 
171
- return self.apply(_sort)
183
+ return self._new(_sort)
184
+
185
+ def sort_values[U: SupportsRichComparison[Any]](
186
+ self: ProcessDict[K, U], reverse: bool = False
187
+ ) -> Dict[K, U]:
188
+ """
189
+ Sort the dictionary by its values and return a new Dict.
190
+
191
+ Args:
192
+ reverse: Whether to sort in descending order. Defaults to False.
193
+ ```python
194
+ >>> import pyochain as pc
195
+ >>> pc.Dict({"a": 2, "b": 1}).sort_values().unwrap()
196
+ {'b': 1, 'a': 2}
197
+
198
+ ```
199
+ """
200
+
201
+ def _sort_values(data: dict[K, U]) -> dict[K, U]:
202
+ return dict(sorted(data.items(), key=lambda item: item[1], reverse=reverse))
203
+
204
+ return self._new(_sort_values)
@@ -30,6 +30,7 @@ class BaseAgg[T](IterWrapper[T]):
30
30
 
31
31
  - one from the left elements of the pairs
32
32
  - one from the right elements.
33
+
33
34
  This function is, in some sense, the opposite of zip.
34
35
  ```python
35
36
  >>> import pyochain as pc
@@ -21,6 +21,7 @@ class BaseBool[T](IterWrapper[T]):
21
21
  If any of them return false, it returns false.
22
22
 
23
23
  An empty iterator returns true.
24
+
24
25
  Args:
25
26
  predicate: Function to evaluate each item. Defaults to checking truthiness.
26
27
  Example:
@@ -57,6 +58,7 @@ class BaseBool[T](IterWrapper[T]):
57
58
  If they all return false, it returns false.
58
59
 
59
60
  An empty iterator returns false.
61
+
60
62
  Args:
61
63
  predicate: Function to evaluate each item. Defaults to checking truthiness.
62
64
  Example:
@@ -200,6 +202,7 @@ class BaseBool[T](IterWrapper[T]):
200
202
  - Returning the first element that satisfies the `predicate`.
201
203
 
202
204
  If all the elements return false, `Iter.find()` returns the default value.
205
+
203
206
  Args:
204
207
  default: Value to return if no element satisfies the predicate. Defaults to None.
205
208
  predicate: Function to evaluate each item. Defaults to checking truthiness.
@@ -0,0 +1,243 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterable, Sequence
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ import cytoolz as cz
7
+
8
+ from .._core import IterWrapper
9
+
10
+ if TYPE_CHECKING:
11
+ from .._dict import Dict
12
+
13
+
14
+ class BaseDict[T](IterWrapper[T]):
15
+ def with_keys[K](self, keys: Iterable[K]) -> Dict[K, T]:
16
+ """
17
+ Create a Dict by zipping the iterable with keys.
18
+
19
+ Args:
20
+ keys: Iterable of keys to pair with the values.
21
+ Example:
22
+ ```python
23
+ >>> import pyochain as pc
24
+ >>> keys = ["a", "b", "c"]
25
+ >>> values = [1, 2, 3]
26
+ >>> pc.Iter.from_(values).with_keys(keys).unwrap()
27
+ {'a': 1, 'b': 2, 'c': 3}
28
+ >>> # This is equivalent to:
29
+ >>> pc.Iter.from_(keys).zip(values).pipe(
30
+ ... lambda x: pc.Dict(x.into(dict)).unwrap()
31
+ ... )
32
+ {'a': 1, 'b': 2, 'c': 3}
33
+
34
+ ```
35
+ """
36
+ from .._dict import Dict
37
+
38
+ def _with_keys(data: Iterable[T]) -> Dict[K, T]:
39
+ return Dict(dict(zip(keys, data)))
40
+
41
+ return self.into(_with_keys)
42
+
43
+ def with_values[V](self, values: Iterable[V]) -> Dict[T, V]:
44
+ """
45
+ Create a Dict by zipping the iterable with values.
46
+
47
+ Args:
48
+ values: Iterable of values to pair with the keys.
49
+ Example:
50
+ ```python
51
+ >>> import pyochain as pc
52
+ >>> keys = [1, 2, 3]
53
+ >>> values = ["a", "b", "c"]
54
+ >>> pc.Iter.from_(keys).with_values(values).unwrap()
55
+ {1: 'a', 2: 'b', 3: 'c'}
56
+ >>> # This is equivalent to:
57
+ >>> pc.Iter.from_(keys).zip(values).pipe(
58
+ ... lambda x: pc.Dict(x.into(dict)).unwrap()
59
+ ... )
60
+ {1: 'a', 2: 'b', 3: 'c'}
61
+
62
+ ```
63
+ """
64
+ from .._dict import Dict
65
+
66
+ def _with_values(data: Iterable[T]) -> Dict[T, V]:
67
+ return Dict(dict(zip(data, values)))
68
+
69
+ return self.into(_with_values)
70
+
71
+ def reduce_by[K](
72
+ self, key: Callable[[T], K], binop: Callable[[T, T], T]
73
+ ) -> Dict[K, T]:
74
+ """
75
+ Perform a simultaneous groupby and reduction.
76
+
77
+ Args:
78
+ key: Function to compute the key for grouping.
79
+ binop: Binary operation to reduce the grouped elements.
80
+ Example:
81
+ ```python
82
+ >>> from collections.abc import Iterable
83
+ >>> import pyochain as pc
84
+ >>> from operator import add, mul
85
+ >>>
86
+ >>> def is_even(x: int) -> bool:
87
+ ... return x % 2 == 0
88
+ >>>
89
+ >>> def group_reduce(data: Iterable[int]) -> int:
90
+ ... return pc.Iter.from_(data).reduce(add)
91
+ >>>
92
+ >>> data = pc.Seq([1, 2, 3, 4, 5])
93
+ >>> data.iter().reduce_by(is_even, add).unwrap()
94
+ {False: 9, True: 6}
95
+ >>> data.iter().group_by(is_even).map_values(group_reduce).unwrap()
96
+ {False: 9, True: 6}
97
+
98
+ ```
99
+ But the former does not build the intermediate groups, allowing it to operate in much less space.
100
+
101
+ This makes it suitable for larger datasets that do not fit comfortably in memory
102
+
103
+ Simple Examples:
104
+ ```python
105
+ >>> pc.Iter.from_([1, 2, 3, 4, 5]).reduce_by(is_even, add).unwrap()
106
+ {False: 9, True: 6}
107
+ >>> pc.Iter.from_([1, 2, 3, 4, 5]).reduce_by(is_even, mul).unwrap()
108
+ {False: 15, True: 8}
109
+
110
+ ```
111
+ """
112
+ from .._dict import Dict
113
+
114
+ def _reduce_by(data: Iterable[T]) -> Dict[K, T]:
115
+ return Dict(cz.itertoolz.reduceby(key, binop, data))
116
+
117
+ return self.into(_reduce_by)
118
+
119
+ def group_by[K](self, on: Callable[[T], K]) -> Dict[K, list[T]]:
120
+ """
121
+ Group elements by key function and return a Dict result.
122
+
123
+ Args:
124
+ on: Function to compute the key for grouping.
125
+ Example:
126
+ ```python
127
+ >>> import pyochain as pc
128
+ >>> names = [
129
+ ... "Alice",
130
+ ... "Bob",
131
+ ... "Charlie",
132
+ ... "Dan",
133
+ ... "Edith",
134
+ ... "Frank",
135
+ ... ]
136
+ >>> pc.Iter.from_(names).group_by(len).sort()
137
+ ... # doctest: +NORMALIZE_WHITESPACE
138
+ {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
139
+ >>>
140
+ >>> iseven = lambda x: x % 2 == 0
141
+ >>> pc.Iter.from_([1, 2, 3, 4, 5, 6, 7, 8]).group_by(iseven)
142
+ ... # doctest: +NORMALIZE_WHITESPACE
143
+ {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
144
+
145
+ ```
146
+ Non-callable keys imply grouping on a member.
147
+ ```python
148
+ >>> data = [
149
+ ... {"name": "Alice", "gender": "F"},
150
+ ... {"name": "Bob", "gender": "M"},
151
+ ... {"name": "Charlie", "gender": "M"},
152
+ ... ]
153
+ >>> pc.Iter.from_(data).group_by("gender").sort()
154
+ ... # doctest: +NORMALIZE_WHITESPACE
155
+ {'F': [{'gender': 'F', 'name': 'Alice'}],
156
+ 'M': [{'gender': 'M', 'name': 'Bob'}, {'gender': 'M', 'name': 'Charlie'}]}
157
+
158
+ ```
159
+ """
160
+ from .._dict import Dict
161
+
162
+ def _group_by(data: Iterable[T]) -> Dict[K, list[T]]:
163
+ return Dict(cz.itertoolz.groupby(on, data))
164
+
165
+ return self.into(_group_by)
166
+
167
+ def frequencies(self) -> Dict[T, int]:
168
+ """
169
+ Find number of occurrences of each value in the iterable.
170
+ ```python
171
+ >>> import pyochain as pc
172
+ >>> data = ["cat", "cat", "ox", "pig", "pig", "cat"]
173
+ >>> pc.Iter.from_(data).frequencies().unwrap()
174
+ {'cat': 3, 'ox': 1, 'pig': 2}
175
+
176
+ ```
177
+ """
178
+ from .._dict import Dict
179
+
180
+ def _frequencies(data: Iterable[T]) -> Dict[T, int]:
181
+ return Dict(cz.itertoolz.frequencies(data))
182
+
183
+ return self.into(_frequencies)
184
+
185
+ def count_by[K](self, key: Callable[[T], K]) -> Dict[K, int]:
186
+ """
187
+ Count elements of a collection by a key function.
188
+
189
+ Args:
190
+ key: Function to compute the key for counting.
191
+ Example:
192
+ ```python
193
+ >>> import pyochain as pc
194
+ >>> pc.Iter.from_(["cat", "mouse", "dog"]).count_by(len).unwrap()
195
+ {3: 2, 5: 1}
196
+ >>> def iseven(x):
197
+ ... return x % 2 == 0
198
+ >>> pc.Iter.from_([1, 2, 3]).count_by(iseven).unwrap()
199
+ {False: 2, True: 1}
200
+
201
+ ```
202
+ """
203
+ from .._dict import Dict
204
+
205
+ def _count_by(data: Iterable[T]) -> Dict[K, int]:
206
+ return Dict(cz.recipes.countby(key, data))
207
+
208
+ return self.into(_count_by)
209
+
210
+ def to_records[U: Sequence[Any]](self: BaseDict[U]) -> Dict[Any, Any]:
211
+ """
212
+ Transform an iterable of nested sequences into a nested dictionary.
213
+
214
+ - Each inner sequence represents a path to a value in the dictionary.
215
+ - The last element of each sequence is treated as the value
216
+ - All preceding elements are treated as keys leading to that value.
217
+
218
+ Example:
219
+ ```python
220
+ >>> import pyochain as pc
221
+ >>> arrays = [["a", "b", 1], ["a", "c", 2], ["d", 3]]
222
+ >>> pc.Seq(arrays).to_records().unwrap()
223
+ {'a': {'b': 1, 'c': 2}, 'd': 3}
224
+
225
+ ```
226
+ """
227
+ from .._dict import Dict
228
+
229
+ def _from_nested(
230
+ arrays: Iterable[Sequence[Any]], parent: dict[Any, Any] | None = None
231
+ ) -> dict[Any, Any]:
232
+ """from dictutils.pivot"""
233
+ d: dict[Any, Any] = parent or {}
234
+ for arr in arrays:
235
+ if len(arr) >= 2:
236
+ head, *tail = arr
237
+ if len(tail) == 1:
238
+ d[head] = tail[0]
239
+ else:
240
+ d[head] = _from_nested([tail], d.get(head, {}))
241
+ return d
242
+
243
+ return Dict(self.into(_from_nested))
pyochain/_iter/_eager.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Callable, Iterable
3
+ from collections.abc import Callable, Iterable, Sequence
4
4
  from functools import partial
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
@@ -38,7 +38,7 @@ class BaseEager[T](IterWrapper[T]):
38
38
  def _sort(data: Iterable[U]) -> list[U]:
39
39
  return sorted(data, reverse=reverse, key=key)
40
40
 
41
- return self.collect(_sort)
41
+ return self._eager(_sort)
42
42
 
43
43
  def tail(self, n: int) -> Seq[T]:
44
44
  """
@@ -54,7 +54,7 @@ class BaseEager[T](IterWrapper[T]):
54
54
 
55
55
  ```
56
56
  """
57
- return self.collect(partial(cz.itertoolz.tail, n))
57
+ return self._eager(partial(cz.itertoolz.tail, n))
58
58
 
59
59
  def top_n(self, n: int, key: Callable[[T], Any] | None = None) -> Seq[T]:
60
60
  """
@@ -71,7 +71,7 @@ class BaseEager[T](IterWrapper[T]):
71
71
 
72
72
  ```
73
73
  """
74
- return self.collect(partial(cz.itertoolz.topk, n, key=key))
74
+ return self._eager(partial(cz.itertoolz.topk, n, key=key))
75
75
 
76
76
  def union(self, *others: Iterable[T]) -> Seq[T]:
77
77
  """
@@ -91,10 +91,10 @@ class BaseEager[T](IterWrapper[T]):
91
91
  ```
92
92
  """
93
93
 
94
- def _union(data: Iterable[T]) -> set[T]:
95
- return set(data).union(*others)
94
+ def _union(data: Iterable[T]) -> list[T]:
95
+ return list(set(data).union(*others))
96
96
 
97
- return self.collect(_union)
97
+ return self._eager(_union)
98
98
 
99
99
  def intersection(self, *others: Iterable[T]) -> Seq[T]:
100
100
  """
@@ -109,15 +109,15 @@ class BaseEager[T](IterWrapper[T]):
109
109
  ```python
110
110
  >>> import pyochain as pc
111
111
  >>> pc.Iter.from_([1, 2, 2]).intersection([2, 3], [2]).unwrap()
112
- {2}
112
+ [2]
113
113
 
114
114
  ```
115
115
  """
116
116
 
117
- def _intersection(data: Iterable[T]) -> set[T]:
118
- return set(data).intersection(*others)
117
+ def _intersection(data: Iterable[T]) -> list[T]:
118
+ return list(set(data).intersection(*others))
119
119
 
120
- return self.collect(_intersection)
120
+ return self._eager(_intersection)
121
121
 
122
122
  def diff_unique(self, *others: Iterable[T]) -> Seq[T]:
123
123
  """
@@ -133,15 +133,15 @@ class BaseEager[T](IterWrapper[T]):
133
133
  ```python
134
134
  >>> import pyochain as pc
135
135
  >>> pc.Iter.from_([1, 2, 2]).diff_unique([2, 3]).unwrap()
136
- {1}
136
+ [1]
137
137
 
138
138
  ```
139
139
  """
140
140
 
141
- def _difference(data: Iterable[T]) -> set[T]:
142
- return set(data).difference(*others)
141
+ def _difference(data: Iterable[T]) -> list[T]:
142
+ return list(set(data).difference(*others))
143
143
 
144
- return self.collect(_difference)
144
+ return self._eager(_difference)
145
145
 
146
146
  def diff_symmetric(self, *others: Iterable[T]) -> Seq[T]:
147
147
  """
@@ -163,10 +163,10 @@ class BaseEager[T](IterWrapper[T]):
163
163
  ```
164
164
  """
165
165
 
166
- def _symmetric_difference(data: Iterable[T]) -> set[T]:
167
- return set(data).symmetric_difference(*others)
166
+ def _symmetric_difference(data: Iterable[T]) -> list[T]:
167
+ return list(set(data).symmetric_difference(*others))
168
168
 
169
- return self.collect(_symmetric_difference)
169
+ return self._eager(_symmetric_difference)
170
170
 
171
171
  def most_common(self, n: int | None = None) -> Seq[tuple[T, int]]:
172
172
  """
@@ -176,7 +176,6 @@ class BaseEager[T](IterWrapper[T]):
176
176
 
177
177
  Args:
178
178
  n: Number of most common elements to return. Defaults to None (all elements).
179
-
180
179
  Example:
181
180
  ```python
182
181
  >>> import pyochain as pc
@@ -187,9 +186,48 @@ class BaseEager[T](IterWrapper[T]):
187
186
  """
188
187
  from collections import Counter
189
188
 
190
- from ._main import Seq
191
-
192
189
  def _most_common(data: Iterable[T]) -> list[tuple[T, int]]:
193
190
  return Counter(data).most_common(n)
194
191
 
195
- return Seq(self.into(_most_common))
192
+ return self._eager(_most_common)
193
+
194
+ def rearrange[U: Sequence[Any]](self: BaseEager[U], *indices: int) -> Seq[list[U]]:
195
+ """
196
+ Rearrange elements in a given list of arrays by order indices.
197
+
198
+ The last element (value) always remains in place.
199
+
200
+ Args:
201
+ order: List of indices specifying new order of keys
202
+
203
+
204
+ Raises:
205
+ IndexError: If any index in order is out of range for the row
206
+
207
+ Example:
208
+ ```python
209
+ >>> import pyochain as pc
210
+ >>> data = pc.Seq([["A", "X", 1], ["A", "Y", 2], ["B", "X", 3], ["B", "Y", 4]])
211
+ >>> data.rearrange(1, 0).unwrap()
212
+ [['X', 'A', 1], ['Y', 'A', 2], ['X', 'B', 3], ['Y', 'B', 4]]
213
+
214
+ ```
215
+ """
216
+
217
+ def _rearrange(in_arrs: Iterable[U]) -> list[list[U]]:
218
+ """from dictutils.pivot"""
219
+ order = indices
220
+ out: list[list[U]] = []
221
+ for arr in in_arrs:
222
+ max_key_index: int = len(arr) - 2
223
+ for i in order:
224
+ if i < 0 or i > max_key_index:
225
+ raise IndexError(
226
+ f"order index {i} out of range for row with {max_key_index + 1} keys"
227
+ )
228
+
229
+ out.append([arr[i] for i in order] + [arr[-1]])
230
+
231
+ return out
232
+
233
+ return self._eager(_rearrange)