pyochain 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,466 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ from collections.abc import (
5
+ Callable,
6
+ Generator,
7
+ Iterable,
8
+ Iterator,
9
+ Sequence,
10
+ )
11
+ from typing import TYPE_CHECKING, Any, Concatenate, overload, override
12
+
13
+ import cytoolz as cz
14
+
15
+ from ._aggregations import BaseAgg
16
+ from ._booleans import BaseBool
17
+ from ._dicts import BaseDict
18
+ from ._eager import BaseEager
19
+ from ._filters import BaseFilter
20
+ from ._joins import BaseJoins
21
+ from ._lists import BaseList
22
+ from ._maps import BaseMap
23
+ from ._partitions import BasePartitions
24
+ from ._process import BaseProcess
25
+ from ._rolling import BaseRolling
26
+ from ._tuples import BaseTuples
27
+
28
+ if TYPE_CHECKING:
29
+ from .._dict import Dict
30
+
31
+
32
+ class CommonMethods[T](BaseAgg[T], BaseEager[T], BaseDict[T]):
33
+ pass
34
+
35
+
36
+ class Iter[T](
37
+ BaseBool[T],
38
+ BaseFilter[T],
39
+ BaseProcess[T],
40
+ BaseMap[T],
41
+ BaseRolling[T],
42
+ BaseList[T],
43
+ BaseTuples[T],
44
+ BasePartitions[T],
45
+ BaseJoins[T],
46
+ CommonMethods[T],
47
+ ):
48
+ """
49
+ A wrapper around Python's built-in Iterators/Generators types, providing a rich set of functional programming tools.
50
+
51
+ It's designed around lazy evaluation, allowing for efficient processing of large datasets.
52
+
53
+ - To instantiate from a lazy Iterator/Generator, simply pass it to the standard constructor.
54
+ - To instantiate from an eager Sequence (like a list or set), use the `from_` class method.
55
+ """
56
+
57
+ __slots__ = ("_data",)
58
+
59
+ def __init__(self, data: Iterator[T] | Generator[T, Any, Any]) -> None:
60
+ self._data = data
61
+
62
+ @staticmethod
63
+ def from_count(start: int = 0, step: int = 1) -> Iter[int]:
64
+ """
65
+ Create an infinite iterator of evenly spaced values.
66
+
67
+ **Warning** ⚠️
68
+ This creates an infinite iterator.
69
+ Be sure to use `Iter.take()` or `Iter.slice()` to limit the number of items taken.
70
+
71
+ Args:
72
+ start: Starting value of the sequence. Defaults to 0.
73
+ step: Difference between consecutive values. Defaults to 1.
74
+ Example:
75
+ ```python
76
+ >>> import pyochain as pc
77
+ >>> pc.Iter.from_count(10, 2).take(3).into(list)
78
+ [10, 12, 14]
79
+
80
+ ```
81
+ """
82
+
83
+ return Iter(itertools.count(start, step))
84
+
85
+ @staticmethod
86
+ def from_func[U](func: Callable[[U], U], input: U) -> Iter[U]:
87
+ """
88
+ Create an infinite iterator by repeatedly applying a function on an original input.
89
+
90
+ **Warning** ⚠️
91
+ This creates an infinite iterator.
92
+ Be sure to use `Iter.take()` or `Iter.slice()` to limit the number of items taken.
93
+
94
+ Args:
95
+ func: Function to apply repeatedly.
96
+ input: Initial value to start the iteration.
97
+
98
+ Example:
99
+ ```python
100
+ >>> import pyochain as pc
101
+ >>> pc.Iter.from_func(lambda x: x + 1, 0).take(3).into(list)
102
+ [0, 1, 2]
103
+
104
+ ```
105
+ """
106
+
107
+ return Iter(cz.itertoolz.iterate(func, input))
108
+
109
+ @overload
110
+ @staticmethod
111
+ def from_[U](data: Iterable[U]) -> Iter[U]: ...
112
+ @overload
113
+ @staticmethod
114
+ def from_[U](data: U, *more_data: U) -> Iter[U]: ...
115
+ @staticmethod
116
+ def from_[U](data: Iterable[U] | U, *more_data: U) -> Iter[U]:
117
+ """
118
+ Create an iterator from any Iterable, or from unpacked values.
119
+
120
+ - An Iterable is any object capable of returning its members one at a time, permitting it to be iterated over in a for-loop.
121
+ - An Iterator is an object representing a stream of data; returned by calling `iter()` on an Iterable.
122
+ - Once an Iterator is exhausted, it cannot be reused or reset.
123
+
124
+ If you need to reuse the data, consider collecting it into a list first with `.collect()`.
125
+
126
+ In general, avoid intermediate references when dealing with lazy iterators, and prioritize method chaining instead.
127
+
128
+ Args:
129
+ data: Iterable to convert into an iterator, or a single value.
130
+ more_data: Additional values to include if 'data' is not an Iterable.
131
+ Example:
132
+ ```python
133
+ >>> import pyochain as pc
134
+ >>> data: tuple[int, ...] = (1, 2, 3)
135
+ >>> iterator = pc.Iter.from_(data)
136
+ >>> iterator.unwrap().__class__.__name__
137
+ 'tuple_iterator'
138
+ >>> mapped = iterator.map(lambda x: x * 2)
139
+ >>> mapped.unwrap().__class__.__name__
140
+ 'map'
141
+ >>> mapped.collect(tuple).unwrap()
142
+ (2, 4, 6)
143
+ >>> # iterator is now exhausted
144
+ >>> iterator.collect().unwrap()
145
+ []
146
+ >>> # Creating from unpacked values
147
+ >>> pc.Iter.from_(1, 2, 3).collect(tuple).unwrap()
148
+ (1, 2, 3)
149
+
150
+ ```
151
+ """
152
+
153
+ def _convert_data() -> Sequence[Any]:
154
+ if cz.itertoolz.isiterable(data):
155
+ return data
156
+ else:
157
+ return (data, *more_data)
158
+
159
+ return Iter(iter(_convert_data()))
160
+
161
+ @staticmethod
162
+ def unfold[S, V](seed: S, generator: Callable[[S], tuple[V, S] | None]) -> Iter[V]:
163
+ """
164
+ Create an iterator by repeatedly applying a generator function to an initial state.
165
+
166
+ The `generator` function takes the current state and must return:
167
+
168
+ - A tuple `(value, new_state)` to emit the `value` and continue with the `new_state`.
169
+ - `None` to stop the generation.
170
+
171
+ This is functionally equivalent to a state-based `while` loop.
172
+
173
+ **Warning** ⚠️
174
+ If the `generator` function never returns `None`, it creates an infinite iterator.
175
+ Be sure to use `Iter.take()` or `Iter.slice()` to limit the number of items taken if necessary.
176
+
177
+ Args:
178
+ seed: Initial state for the generator.
179
+ generator: Function that generates the next value and state.
180
+
181
+ Example:
182
+ ```python
183
+ >>> import pyochain as pc
184
+ >>> # Example 1: Simple counter up to 5
185
+ >>> def counter_generator(state: int) -> tuple[int, int] | None:
186
+ ... if state < 5:
187
+ ... return (state * 10, state + 1)
188
+ ... return None
189
+ >>> pc.Iter.unfold(seed=0, generator=counter_generator).into(list)
190
+ [0, 10, 20, 30, 40]
191
+ >>> # Example 2: Fibonacci sequence up to 100
192
+ >>> type FibState = tuple[int, int]
193
+ >>> def fib_generator(state: FibState) -> tuple[int, FibState] | None:
194
+ ... a, b = state
195
+ ... if a > 100:
196
+ ... return None
197
+ ... return (a, (b, a + b))
198
+ >>> pc.Iter.unfold(seed=(0, 1), generator=fib_generator).into(list)
199
+ [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
200
+ >>> # Example 3: Infinite iterator (requires take())
201
+ >>> pc.Iter.unfold(seed=1, generator=lambda s: (s, s * 2)).take(5).into(list)
202
+ [1, 2, 4, 8, 16]
203
+
204
+ ```
205
+ """
206
+ from ._main import Iter
207
+
208
+ def _unfold() -> Iterator[V]:
209
+ current_seed: S = seed
210
+ while True:
211
+ result: tuple[V, S] | None = generator(current_seed)
212
+ if result is None:
213
+ break
214
+ value, next_seed = result
215
+ yield value
216
+ current_seed = next_seed
217
+
218
+ return Iter(_unfold())
219
+
220
+ def itr[**P, R, U: Iterable[Any]](
221
+ self: Iter[U],
222
+ func: Callable[Concatenate[Iter[U], P], R],
223
+ *args: P.args,
224
+ **kwargs: P.kwargs,
225
+ ) -> Iter[R]:
226
+ """
227
+ Apply a function to each element after wrapping it in an Iter.
228
+
229
+ This is a convenience method for the common pattern of mapping a function over an iterable of iterables.
230
+
231
+ Args:
232
+ func: Function to apply to each wrapped element.
233
+ *args: Positional arguments to pass to the function.
234
+ **kwargs: Keyword arguments to pass to the function.
235
+ Example:
236
+ ```python
237
+ >>> import pyochain as pc
238
+ >>> data = [
239
+ ... [1, 2, 3],
240
+ ... [4, 5],
241
+ ... [6, 7, 8, 9],
242
+ ... ]
243
+ >>> pc.Iter.from_(data).itr(
244
+ ... lambda x: x.repeat(2).flatten().reduce(lambda a, b: a + b)
245
+ ... ).into(list)
246
+ [12, 18, 60]
247
+
248
+ ```
249
+ """
250
+
251
+ def _itr(data: Iterable[U]) -> Generator[R, None, None]:
252
+ return (func(Iter(iter(x)), *args, **kwargs) for x in data)
253
+
254
+ return self._lazy(_itr)
255
+
256
+ def struct[**P, R, K, V](
257
+ self: Iter[dict[K, V]],
258
+ func: Callable[Concatenate[Dict[K, V], P], R],
259
+ *args: P.args,
260
+ **kwargs: P.kwargs,
261
+ ) -> Iter[R]:
262
+ """
263
+ Apply a function to each element after wrapping it in a Dict.
264
+
265
+ This is a convenience method for the common pattern of mapping a function over an iterable of dictionaries.
266
+
267
+ Args:
268
+ func: Function to apply to each wrapped dictionary.
269
+ *args: Positional arguments to pass to the function.
270
+ **kwargs: Keyword arguments to pass to the function.
271
+ Example:
272
+ ```python
273
+ >>> from typing import Any
274
+ >>> import pyochain as pc
275
+
276
+ >>> data: list[dict[str, Any]] = [
277
+ ... {"name": "Alice", "age": 30, "city": "New York"},
278
+ ... {"name": "Bob", "age": 25, "city": "Los Angeles"},
279
+ ... {"name": "Charlie", "age": 35, "city": "New York"},
280
+ ... {"name": "David", "age": 40, "city": "Paris"},
281
+ ... ]
282
+ >>>
283
+ >>> def to_title(d: pc.Dict[str, Any]) -> pc.Dict[str, Any]:
284
+ ... return d.map_keys(lambda k: k.title())
285
+ >>> def is_young(d: pc.Dict[str, Any]) -> bool:
286
+ ... return d.unwrap().get("Age", 0) < 30
287
+ >>> def set_continent(d: pc.Dict[str, Any], value: str) -> dict[str, Any]:
288
+ ... return d.with_key("Continent", value).unwrap()
289
+ >>>
290
+ >>> pc.Iter.from_(data).struct(to_title).filter_false(is_young).map(
291
+ ... lambda d: d.drop("Age").with_key("Continent", "NA")
292
+ ... ).map_if(
293
+ ... lambda d: d.unwrap().get("City") == "Paris",
294
+ ... lambda d: set_continent(d, "Europe"),
295
+ ... lambda d: set_continent(d, "America"),
296
+ ... ).group_by(lambda d: d.get("Continent")).map_values(
297
+ ... lambda d: pc.Iter.from_(d)
298
+ ... .struct(lambda d: d.drop("Continent").unwrap())
299
+ ... .into(list)
300
+ ... ) # doctest: +NORMALIZE_WHITESPACE
301
+ {'America': [{'City': 'New York', 'Name': 'Alice'},
302
+ {'City': 'New York', 'Name': 'Charlie'}],
303
+ 'Europe': [{'City': 'Paris', 'Name': 'David'}]}
304
+
305
+ ```
306
+ """
307
+ from .._dict import Dict
308
+
309
+ def _struct(data: Iterable[dict[K, V]]) -> Generator[R, None, None]:
310
+ return (func(Dict(x), *args, **kwargs) for x in data)
311
+
312
+ return self._lazy(_struct)
313
+
314
+ def apply[**P, R](
315
+ self,
316
+ func: Callable[Concatenate[Iterable[T], P], Iterator[R]],
317
+ *args: P.args,
318
+ **kwargs: P.kwargs,
319
+ ) -> Iter[R]:
320
+ """
321
+ Apply a function to the underlying Iterator and return a new Iter instance.
322
+
323
+ Allow to pass user defined functions that transform the iterable while retaining the Iter wrapper.
324
+
325
+ Args:
326
+ func: Function to apply to the underlying iterable.
327
+ *args: Positional arguments to pass to the function.
328
+ **kwargs: Keyword arguments to pass to the function.
329
+
330
+ Example:
331
+ ```python
332
+ >>> import pyochain as pc
333
+ >>> def double(data: Iterable[int]) -> Iterator[int]:
334
+ ... return (x * 2 for x in data)
335
+ >>> pc.Iter.from_([1, 2, 3]).apply(double).into(list)
336
+ [2, 4, 6]
337
+
338
+ ```
339
+ """
340
+ return self._lazy(func, *args, **kwargs)
341
+
342
+ def collect(self, factory: Callable[[Iterable[T]], Sequence[T]] = list) -> Seq[T]:
343
+ """
344
+ Collect the elements into a sequence, using the provided factory.
345
+
346
+ Args:
347
+ factory: A callable that takes an iterable and returns a Sequence. Defaults to list.
348
+
349
+ Example:
350
+ ```python
351
+ >>> import pyochain as pc
352
+ >>> pc.Iter.from_(range(5)).collect().unwrap()
353
+ [0, 1, 2, 3, 4]
354
+
355
+ ```
356
+ """
357
+ return self._eager(factory)
358
+
359
+ @override
360
+ def unwrap(self) -> Iterator[T]:
361
+ """
362
+ Unwrap and return the underlying Iterator.
363
+
364
+ ```python
365
+ >>> import pyochain as pc
366
+ >>> iterator = pc.Iter.from_([1, 2, 3])
367
+ >>> unwrapped = iterator.unwrap()
368
+ >>> list(unwrapped)
369
+ [1, 2, 3]
370
+
371
+ ```
372
+ """
373
+ return self._data # type: ignore[return-value]
374
+
375
+
376
+ class Seq[T](CommonMethods[T]):
377
+ """
378
+ pyochain.Seq represent an in memory Sequence.
379
+
380
+ Provides a subset of pyochain.Iter methods with eager evaluation, and is the return type of pyochain.Iter.collect().
381
+ """
382
+
383
+ __slots__ = ("_data",)
384
+
385
+ def __init__(self, data: Sequence[T]) -> None:
386
+ self._data = data
387
+
388
+ @overload
389
+ @staticmethod
390
+ def from_[U](data: Sequence[U]) -> Seq[U]: ...
391
+ @overload
392
+ @staticmethod
393
+ def from_[U](data: U, *more_data: U) -> Seq[U]: ...
394
+ @staticmethod
395
+ def from_[U](data: Sequence[U] | U, *more_data: U) -> Seq[U]:
396
+ """
397
+ Create a Seq from a Sequence or unpacked values.
398
+
399
+ Args:
400
+ data: Sequence of items or a single item.
401
+ more_data: Additional item to include if 'data' is not a Sequence.
402
+
403
+ Example:
404
+ ```python
405
+ >>> import pyochain as pc
406
+ >>> pc.Seq.from_([1, 2, 3]).unwrap()
407
+ [1, 2, 3]
408
+ >>> pc.Seq.from_(1, 2).unwrap()
409
+ (1, 2)
410
+
411
+ ```
412
+
413
+ """
414
+ if cz.itertoolz.isiterable(data):
415
+ return Seq(data)
416
+ else:
417
+ return Seq((data, *more_data))
418
+
419
+ def iter(self) -> Iter[T]:
420
+ """
421
+ Get an iterator over the sequence.
422
+ Call this to switch to lazy evaluation.
423
+ """
424
+ return self._lazy(iter)
425
+
426
+ def apply[**P, R](
427
+ self,
428
+ func: Callable[Concatenate[Iterable[T], P], Sequence[R]],
429
+ *args: P.args,
430
+ **kwargs: P.kwargs,
431
+ ) -> Seq[R]:
432
+ """
433
+ Apply a function to the underlying Sequence and return a Seq instance.
434
+
435
+ Allow to pass user defined functions that transform the Sequence while retaining the Seq wrapper.
436
+
437
+ Args:
438
+ func: Function to apply to the underlying Sequence.
439
+ *args: Positional arguments to pass to the function.
440
+ **kwargs: Keyword arguments to pass to the function.
441
+
442
+ Example:
443
+ ```python
444
+ >>> import pyochain as pc
445
+ >>> def double(data: Iterable[int]) -> Sequence[int]:
446
+ ... return [x * 2 for x in data]
447
+ >>> pc.Seq([1, 2, 3]).apply(double).into(list)
448
+ [2, 4, 6]
449
+
450
+ ```
451
+ """
452
+ return self._eager(func, *args, **kwargs)
453
+
454
+ @override
455
+ def unwrap(self) -> Sequence[T]:
456
+ """
457
+ Unwrap and return the underlying Sequence.
458
+
459
+ ```python
460
+ >>> import pyochain as pc
461
+ >>> pc.Seq([1, 2, 3]).unwrap()
462
+ [1, 2, 3]
463
+
464
+ ```
465
+ """
466
+ return self._data # type: ignore[return-value]