oups 2025.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of oups might be problematic. Click here for more details.

Files changed (43) hide show
  1. oups/__init__.py +40 -0
  2. oups/date_utils.py +62 -0
  3. oups/defines.py +26 -0
  4. oups/numpy_utils.py +114 -0
  5. oups/stateful_loop/__init__.py +14 -0
  6. oups/stateful_loop/loop_persistence_io.py +55 -0
  7. oups/stateful_loop/stateful_loop.py +654 -0
  8. oups/stateful_loop/validate_loop_usage.py +338 -0
  9. oups/stateful_ops/__init__.py +22 -0
  10. oups/stateful_ops/aggstream/__init__.py +12 -0
  11. oups/stateful_ops/aggstream/aggstream.py +1524 -0
  12. oups/stateful_ops/aggstream/cumsegagg.py +580 -0
  13. oups/stateful_ops/aggstream/jcumsegagg.py +416 -0
  14. oups/stateful_ops/aggstream/segmentby.py +1018 -0
  15. oups/stateful_ops/aggstream/utils.py +71 -0
  16. oups/stateful_ops/asof_merger/__init__.py +11 -0
  17. oups/stateful_ops/asof_merger/asof_merger.py +750 -0
  18. oups/stateful_ops/asof_merger/get_config.py +401 -0
  19. oups/stateful_ops/asof_merger/validate_params.py +285 -0
  20. oups/store/__init__.py +15 -0
  21. oups/store/filepath_utils.py +68 -0
  22. oups/store/indexer.py +457 -0
  23. oups/store/ordered_parquet_dataset/__init__.py +19 -0
  24. oups/store/ordered_parquet_dataset/metadata_filename.py +50 -0
  25. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/__init__.py +15 -0
  26. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/base.py +863 -0
  27. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/read_only.py +252 -0
  28. oups/store/ordered_parquet_dataset/parquet_adapter.py +157 -0
  29. oups/store/ordered_parquet_dataset/write/__init__.py +19 -0
  30. oups/store/ordered_parquet_dataset/write/iter_merge_split_data.py +131 -0
  31. oups/store/ordered_parquet_dataset/write/merge_split_strategies/__init__.py +22 -0
  32. oups/store/ordered_parquet_dataset/write/merge_split_strategies/base.py +784 -0
  33. oups/store/ordered_parquet_dataset/write/merge_split_strategies/n_rows_strategy.py +297 -0
  34. oups/store/ordered_parquet_dataset/write/merge_split_strategies/time_period_strategy.py +319 -0
  35. oups/store/ordered_parquet_dataset/write/write.py +270 -0
  36. oups/store/store/__init__.py +11 -0
  37. oups/store/store/dataset_cache.py +50 -0
  38. oups/store/store/iter_intersections.py +397 -0
  39. oups/store/store/store.py +345 -0
  40. oups-2025.9.5.dist-info/LICENSE +201 -0
  41. oups-2025.9.5.dist-info/METADATA +44 -0
  42. oups-2025.9.5.dist-info/RECORD +43 -0
  43. oups-2025.9.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,654 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Created on Wed Jun 1 18:35:00 2025.
4
+
5
+ @author: pierrot
6
+
7
+ """
8
+ from collections import defaultdict
9
+ from collections.abc import Callable
10
+ from collections.abc import Hashable
11
+ from collections.abc import Iterable
12
+ from collections.abc import Iterator
13
+ from functools import partial
14
+ from inspect import signature
15
+ from pathlib import Path
16
+ from types import TracebackType
17
+ from typing import Any
18
+ from typing import TypeVar
19
+
20
+ from pandas import DataFrame
21
+ from pandas import concat
22
+
23
+ from oups.stateful_loop.loop_persistence_io import LoopPersistenceIO
24
+ from oups.stateful_loop.validate_loop_usage import validate_loop_usage
25
+
26
+
27
+ T = TypeVar("T")
28
+
29
+
30
+ class Skip(Exception):
31
+ """
32
+ Exception used to skip downstream processing for the current item.
33
+
34
+ In this stateful loop context, ``Skip`` signals that the current iteration
35
+ should continue without executing downstream code for the current data item.
36
+ It is typically raised by accumulating operations when memory limits are not
37
+ yet reached.
38
+
39
+ """
40
+
41
+
42
+ class IterationContext:
43
+ """
44
+ Per-item context manager that swallows ``Skip`` and yields the item.
45
+
46
+ The ``__enter__`` method returns the current item. If a ``Skip``
47
+ exception is raised inside the ``with`` block, it is swallowed to
48
+ proceed to the next iteration without running downstream code.
49
+
50
+ """
51
+
52
+ def __init__(self, current: Any):
53
+ """
54
+ Initialize the IterationContext.
55
+
56
+ Parameters
57
+ ----------
58
+ current : Any
59
+ The current item to be yielded.
60
+
61
+ """
62
+ self._current = current
63
+
64
+ def __enter__(self):
65
+ """
66
+ Return the current item.
67
+ """
68
+ return self._current
69
+
70
+ def __exit__(
71
+ self,
72
+ exc_type: type[BaseException] | None,
73
+ _exc: BaseException | None,
74
+ _tb: TracebackType | None,
75
+ ) -> bool:
76
+ """
77
+ Exit the IterationContext.
78
+
79
+ Swallow ``Skip`` exceptions to proceed to the next iteration without
80
+ running downstream code.
81
+
82
+ """
83
+ if exc_type is Skip:
84
+ return True
85
+ return False
86
+
87
+
88
+ def _raise_invalid_state_keys(
89
+ invalid_keys: Iterable[str],
90
+ ) -> None:
91
+ """
92
+ Raise a ValueError if the invalid keys are not empty.
93
+
94
+ Parameters
95
+ ----------
96
+ invalid_keys : Iterable[str]
97
+ Keys that are not present in the stateful function/object.
98
+
99
+ Raises
100
+ ------
101
+ ValueError
102
+ If the invalid keys are not empty.
103
+
104
+ """
105
+ if invalid_keys:
106
+ raise ValueError(
107
+ "state contains keys not present in stateful function/object: " + ", ".join(sorted(invalid_keys)),
108
+ )
109
+
110
+
111
+ class StatefulLoop:
112
+ """
113
+ Main orchestrator for stateful loop execution.
114
+
115
+ The StatefulLoop class provides the core functionality wrapping a
116
+ lightweight data-processing ``for`` loop, including iteration control, state
117
+ management, and DataFrame buffering with memory-triggered concatenation.
118
+
119
+ If the provided ``filepath`` already exists at instantiation time,
120
+ its content is loaded and used to initialize the internal state store. In
121
+ that case, any initial values given later via ``bind_function_state`` or
122
+ ``bind_object_state`` are ignored for the corresponding bindings, because
123
+ previously recorded state takes precedence. This enables resuming a
124
+ stateful loop by re-running the same function that declares the loop,
125
+ bindings, and iteration.
126
+
127
+ Targeted usage is:
128
+
129
+ - define a function that receives a data iterable/generator as a parameter.
130
+ - instantiate ``StatefulLoop`` inside the function, providing a stable
131
+ loop persistence ``filepath``.
132
+ - bind stateful functions/objects with ``bind_function_state`` and
133
+ ``bind_object_state``.
134
+ - iterate using ``for item_ctx in loop.iterate(source):``.
135
+
136
+ On subsequent calls of the same function, the stored state is loaded from
137
+ the loop persistence ``filepath`` at construction time, so stateful
138
+ functions resume from their last recorded state and ignore newly provided
139
+ initial values. State is persisted when the loop completes (after the last
140
+ item).
141
+
142
+ Attributes
143
+ ----------
144
+ default_memory_limit_mb : float
145
+ Default memory limit in megabytes used by accumulation when
146
+ no per-call override is provided.
147
+ default_memory_limit_bytes : int
148
+ Default memory limit in bytes used by accumulation when
149
+ no per-call override is provided.
150
+ is_last_iteration : bool
151
+ Flag indicating if this is the last iteration of the stateful loop.
152
+ iteration_count : int
153
+ Current iteration count (0-based).
154
+ Value is '-1' till the loop starts.
155
+ filepath : Path
156
+ Path of the loop persistence file: serialized states for stateful
157
+ functions/objects and a run-flag used by loop validation and buffering
158
+ behavior.
159
+ _persistence_loaded : bool
160
+ Whether a persistence file existed and was loaded at construction time.
161
+ Used to decide default behavior of the buffer placement validation.
162
+ _data_buffer : dict[int, defaultdict[Hashable, list[DataFrame]]]
163
+ Nested buffer for buffering DataFrames.
164
+ First level keys are buffer IDs (call position within iteration),
165
+ second level keys are user-provided keys, values are lists of
166
+ DataFrames.
167
+ _iteration_buffer_current : int
168
+ Tracks the iteration index for which the current buffer position
169
+ counter is valid. Used to reset the counter at each new iteration.
170
+ _iteration_buffer_count : int
171
+ Tracks the 0-based call position of ``buffer()`` within the current
172
+ iteration.
173
+ _memory_usage_bytes : dict[int, int]
174
+ Memory usage tracking per buffer ID in bytes.
175
+ _state_key_counts : dict[str, int]
176
+ Counter per base state reference used to generate stable unique keys
177
+ for stateful functions and objects (e.g., ``func:name#1``,
178
+ ``obj:name#1``).
179
+ _state_store : dict[str, dict[str, Any]]
180
+ In-memory state storage (persisted on disk when stateful loop finishes).
181
+ For stateful functions: stores parameter name -> value mappings.
182
+ For stateful objects: stores last persisted attribute values.
183
+ _object_bindings : dict[str, tuple[Any, list[str]]]
184
+ Registry of object bindings keyed by namespaced state reference
185
+ (e.g., ``obj:Counter#1``) to a tuple of the bound object and the list
186
+ of attribute names to snapshot on save.
187
+
188
+ Methods
189
+ -------
190
+ iterate(iterable: Iterable[Any], *, check_loop_usage: Optional[bool] = None)
191
+ -> Iterator[IterationContext]
192
+ Wrap an iterable to control loop flow in stateful loop context. Optionally
193
+ runs a strict AST validation that enforces legal buffer placement.
194
+ buffer(
195
+ data: dict[Hashable, DataFrame],
196
+ memory_limit_mb: Optional[float] = None,
197
+ concat_func: Callable[[list[DataFrame]], DataFrame] = pandas.concat,
198
+ ) -> Optional[dict[Hashable, DataFrame]]
199
+ Buffer DataFrames in memory and track memory usage.
200
+ bind_function_state(func: Callable[..., Any], *, state: dict[str, Any],
201
+ name: Optional[str] = None) -> Callable[..., Any]
202
+ Wrap a function to bind specified parameters as state across iterations.
203
+ bind_object_state(obj: T, *, state: list[str], name: Optional[str] = None) -> T
204
+ Register a stateful object for state binding.
205
+
206
+ """
207
+
208
+ def __init__(
209
+ self,
210
+ filepath: Path,
211
+ *,
212
+ default_memory_limit_mb: float = 300.0,
213
+ ):
214
+ """
215
+ Initialize the StatefulLoop.
216
+
217
+ Parameters
218
+ ----------
219
+ filepath : Path
220
+ File path for storing the loop persistence. If this file
221
+ already exists when the instance is created, its content is loaded
222
+ to initialize the internal state store, allowing stateful loops to
223
+ resume from a prior run. This also flags that a successful prior run
224
+ occurred, allowing the loop validation to be skipped on subsequent
225
+ runs if 'check_loop_usage' is ``None``.
226
+ default_memory_limit_mb : float, default 300.0
227
+ Default memory limit in megabytes used by delayed concatenation when
228
+ no per-instance/per-site override is provided.
229
+
230
+ """
231
+ self._filepath = filepath
232
+ self._default_memory_limit_bytes = int(default_memory_limit_mb * 1024 * 1024)
233
+ # Simple iteration context attributes
234
+ self.is_last_iteration = False
235
+ # 'iteration_count' will be set to 0 at first iteration.
236
+ self.iteration_count: int = -1
237
+ # In-memory state storage (persisted when the stateful loop finishes).
238
+ # If a loop persistence file exists, load it to resume previous states;
239
+ # otherwise start empty.
240
+ self._state_store: dict[str, dict[str, Any]] = {}
241
+ self._persistence_loaded = Path(self._filepath).exists()
242
+ if self._persistence_loaded:
243
+ self._state_store = LoopPersistenceIO.load(self._filepath)
244
+ # Track counts for state references to ensure stable, unique keys
245
+ self._state_key_counts: dict[str, int] = defaultdict(int)
246
+ # Registry for object bindings (strong refs during loop lifetime)
247
+ self._object_bindings: dict[str, tuple[Any, list[str]]] = {}
248
+ # Track buffer call order within each iteration
249
+ self._iteration_buffer_current = -1
250
+ self._iteration_buffer_count = 0
251
+ # Data buffer for buffering: buffer_id -> user_key -> list[DataFrame]
252
+ self._data_buffer: dict[int, defaultdict[Hashable, list[DataFrame]]] = {}
253
+ # Track memory usage per buffer_id in bytes
254
+ self._memory_usage_bytes: dict[int, int] = {}
255
+
256
+ def __repr__(self):
257
+ """
258
+ Return string representation of the StatefulLoop.
259
+ """
260
+ return (
261
+ f"StatefulLoop(filepath={self.filepath}, "
262
+ f"default_memory_limit_mb={self.default_memory_limit_mb})"
263
+ )
264
+
265
+ @property
266
+ def filepath(self):
267
+ """
268
+ Return loop persistence file path.
269
+ """
270
+ return self._filepath
271
+
272
+ @property
273
+ def default_memory_limit_mb(self):
274
+ """
275
+ Return default memory limit in megabytes.
276
+ """
277
+ return self._default_memory_limit_bytes / (1024 * 1024)
278
+
279
+ @property
280
+ def default_memory_limit_bytes(self):
281
+ """
282
+ Return default memory limit in bytes.
283
+ """
284
+ return self._default_memory_limit_bytes
285
+
286
+ def bind_function_state(
287
+ self,
288
+ func: Callable[..., Any],
289
+ *,
290
+ state: dict[str, Any],
291
+ name: str | None = None,
292
+ ) -> Callable[..., Any]:
293
+ """
294
+ Create a partial callable that binds specified parameters as state.
295
+
296
+ The binding is by reference. For it to work, the parameters bound as
297
+ state must be mutable (e.g., ``dict`` or ``list``) and updated in place
298
+ by the stateful function.
299
+
300
+ Parameters
301
+ ----------
302
+ func : callable
303
+ Function to wrap. The partial callable publishes a reduced
304
+ signature that hides state-managed parameters. There is no runtime
305
+ guard: if callers pass those parameters, they will override the
306
+ bound values for that call.
307
+ state : dict[str, Any]
308
+ Mapping of state parameter names to initial values used only if no
309
+ stored state exists yet for this binding. Values should be mutable
310
+ (e.g., ``dict`` or ``list``) and updated in place by the stateful
311
+ function.
312
+ name : Optional[str]
313
+ Optional base name used to generate a stable, unique state key
314
+ (e.g., ``name#1``). Declare stateful functions in a consistent
315
+ order to keep keys stable across runs. Defaults to
316
+ the function's ``__name__``.
317
+
318
+ Returns
319
+ -------
320
+ callable
321
+ A partial callable compatible with ``func`` with state references
322
+ pre-bound and a reduced public signature.
323
+
324
+ """
325
+ # Initialize or reuse stored state references.
326
+ base_ref = name or getattr(func, "__name__", None)
327
+ if base_ref is None:
328
+ raise ValueError("function has no name.")
329
+ sig = signature(func)
330
+ _raise_invalid_state_keys(set(state) - set(sig.parameters))
331
+ _, stored_state = self._get_or_init_state(
332
+ "func",
333
+ base_ref,
334
+ initial_state=state,
335
+ )
336
+ partial_func = partial(func, **stored_state)
337
+ # Publish reduced signature so callers see only non-state parameters.
338
+ public_params = [p for p in sig.parameters.values() if p.name not in state]
339
+ partial_func.__signature__ = sig.replace(parameters=public_params)
340
+ return partial_func
341
+
342
+ # --- Stateful object support ---
343
+ def bind_object_state(
344
+ self,
345
+ obj: T,
346
+ *,
347
+ state: list[str],
348
+ name: str | None = None,
349
+ ) -> T:
350
+ """
351
+ Register a stateful object for state binding.
352
+
353
+ At bind time, if a stored persisted state exists for this object binding,
354
+ the listed attributes are restored on ``obj``. Otherwise, the current
355
+ values of those attributes are stored into the internal state store.
356
+ The loop keeps a registry of bound objects and attribute names, and on
357
+ persistence it records the latest attribute values via
358
+ ``getattr``. Both in-place mutation and reassignment are supported.
359
+
360
+ Parameters
361
+ ----------
362
+ obj : T
363
+ The object to bind.
364
+ state : list[str]
365
+ List of attribute names to bind as state. Attributes must exist on
366
+ ``obj`` at bind time and their values must be serializable by the
367
+ configured loop persistence I/O.
368
+ name : Optional[str], default None
369
+ Base name used to build a stable state reference for this object.
370
+ If None, the object's name or its class name is used.
371
+
372
+ Returns
373
+ -------
374
+ T
375
+ The same object instance provided in ``obj``.
376
+
377
+ Examples
378
+ --------
379
+ Pre-initialized attribute (mutation):
380
+
381
+ >>> class PreInitCounter:
382
+ ... def __init__(self):
383
+ ... self.state1 = {"count": 0}
384
+ ... def process(self, x):
385
+ ... self.state1["count"] += 1
386
+ ... return x
387
+ >>> obj = PreInitCounter()
388
+ >>> loop.bind_object_state(obj, state=["state1"])
389
+
390
+ Lazy initialization with reassignment:
391
+
392
+ >>> class ReassigningCounter:
393
+ ... def __init__(self, start=0):
394
+ ... self.state1 = None
395
+ ... self._start = start
396
+ ... def process(self, x):
397
+ ... if self.state1 is None:
398
+ ... self.state1 = {"count": self._start}
399
+ ... self.state1["count"] += 1
400
+ ... return x
401
+ >>> obj2 = ReassigningCounter()
402
+ >>> loop.bind_object_state(obj2, state=["state1"])
403
+
404
+ """
405
+ base_ref = name or getattr(obj, "__name__", None) or obj.__class__.__name__
406
+ if base_ref is None:
407
+ raise ValueError("object has no name.")
408
+ _raise_invalid_state_keys({attr for attr in state if not hasattr(obj, attr)})
409
+ state_ref, stored_state = self._get_or_init_state(
410
+ "obj",
411
+ base_ref,
412
+ initial_state={attr: getattr(obj, attr) for attr in state},
413
+ )
414
+ for attr in state:
415
+ setattr(obj, attr, stored_state[attr])
416
+ # Register binding for persistence-on-save using strong reference.
417
+ self._object_bindings[state_ref] = (obj, list(state))
418
+ return obj
419
+
420
+ def buffer(
421
+ self,
422
+ data: dict[Hashable, DataFrame],
423
+ memory_limit_mb: float | None = None,
424
+ concat_func: Callable[[list[DataFrame]], DataFrame] = concat,
425
+ ) -> dict[Hashable, DataFrame] | None:
426
+ """
427
+ Buffer DataFrames in memory and track memory usage.
428
+
429
+ This method automatically creates unique buffer spaces for each
430
+ ``buffer()`` call within an iteration, preventing data from different
431
+ ``buffer()`` calls from interfering with each other, even when using the
432
+ same user-provided keys.
433
+
434
+ ``buffer()`` cannot be placed within a nested loop. The unique
435
+ identifier is based on call order within each iteration.
436
+
437
+ Placement rules
438
+ ---------------
439
+ Calls to ``buffer()`` are intended to be used directly as top-level
440
+ statements inside the first ``with item_ctx as ...:`` block inside the
441
+ body of ``for item_ctx in loop.iterate(...):``. A strict AST validation
442
+ can enforce these rules when ``iterate(..., check_loop_usage=...)``
443
+ enables it (see ``iterate`` docstring for details).
444
+
445
+ Parameters
446
+ ----------
447
+ data : dict[Hashable, DataFrame]
448
+ Dictionary mapping keys to DataFrames to be buffered.
449
+ Keys can be reused across different ``buffer()`` calls without
450
+ conflict.
451
+ memory_limit_mb : Optional[float], default None
452
+ Memory limit in megabytes. If None, uses default memory limit.
453
+ When exceeded, triggers concatenation and returns results.
454
+ concat_func : Callable[[list[DataFrame]], DataFrame], default ``pandas.concat``
455
+ Function to concatenate a non-empty list of DataFrames when the
456
+ memory limit is reached.
457
+
458
+ Returns
459
+ -------
460
+ Optional[dict[Hashable, DataFrame]]
461
+ Returns concatenated DataFrames when memory limit is exceeded or on
462
+ last iteration.
463
+
464
+ Raises
465
+ ------
466
+ Skip
467
+ Raised when memory limit is not exceeded and not on last iteration
468
+ to signal the caller to skip downstream processing and continue to
469
+ the next iteration.
470
+
471
+ """
472
+ # Generate unique iteration-based identifier.
473
+ buffer_id = self._get_buffer_id()
474
+ # Ensure buffer_id exists in buffer and memory tracker.
475
+ if buffer_id not in self._data_buffer:
476
+ self._data_buffer[buffer_id] = defaultdict(list)
477
+ self._memory_usage_bytes[buffer_id] = 0
478
+ # Append data to buffer and track memory usage.
479
+ for user_key, df in data.items():
480
+ self._data_buffer[buffer_id][user_key].append(df)
481
+ # Increment memory usage for this buffer_id
482
+ self._memory_usage_bytes[buffer_id] += df.memory_usage(deep=True).sum()
483
+ # Check if we need to concat current buffer.
484
+ memory_limit_bytes = int(
485
+ (
486
+ memory_limit_mb * 1024 * 1024
487
+ if memory_limit_mb is not None
488
+ else self._default_memory_limit_bytes
489
+ ),
490
+ )
491
+ if self.is_last_iteration or self._memory_usage_bytes[buffer_id] >= memory_limit_bytes:
492
+ concat_res = {}
493
+ for user_key, df_list in self._data_buffer[buffer_id].items():
494
+ if df_list:
495
+ # Concatenate all DataFrames for this user_key.
496
+ concat_res[user_key] = concat_func(df_list)
497
+ # Free memory on the way to prevent buffering data
498
+ # twice along the concatenation chain.
499
+ self._data_buffer[buffer_id][user_key].clear()
500
+ self._memory_usage_bytes[buffer_id] = 0
501
+ return concat_res
502
+ else:
503
+ raise Skip
504
+
505
+ def iterate(
506
+ self,
507
+ iterable: Iterable[Any],
508
+ *,
509
+ check_loop_usage: bool | None = None,
510
+ ) -> Iterator[IterationContext]:
511
+ """
512
+ Wrap an iterable to control loop flow using context-managed steps.
513
+
514
+ This method provides the fundamental pattern for stateful loops: a
515
+ for-loop that processes data iteratively while managing iteration
516
+ context flags. It uses lookahead to detect the last element without
517
+ emitting a sentinel value.
518
+
519
+ ``Skip`` exceptions raised inside the ``with item_ctx`` block (e.g., by
520
+ ``buffer()`` while still under the memory limit) are swallowed by the
521
+ ``IterationContext``, skipping downstream code and continuing to the
522
+ next iteration.
523
+
524
+ Parameters
525
+ ----------
526
+ iterable : Iterable[Any]
527
+ The iterable to wrap and process
528
+ check_loop_usage : Optional[bool], default None
529
+ If True, always run the strict validation that enforces:
530
+ - the first statement in the loop body is ``with item_ctx as ...:``
531
+ - any ``loop.buffer(...)`` calls are direct statements at the top
532
+ level inside that ``with`` body (not in conditionals/loops/nested
533
+ blocks).
534
+ File read failures will raise immediately.
535
+ If False, never run the validation.
536
+ If None, run the validation only if the loop persistence file was
537
+ not loaded at construction time (i.e., first run for this stateful
538
+ loop). If a loop persistence file existed and was loaded, skip the
539
+ validation.
540
+
541
+ Yields
542
+ ------
543
+ IterationContext
544
+ A context manager that yields the current item and swallows
545
+ ``Skip``.
546
+
547
+ Examples
548
+ --------
549
+ >>> from pathlib import Path
550
+ >>> loop = StatefulLoop(Path("state.pkl"))
551
+ >>> out = []
552
+ >>> for item_ctx in loop.iterate([10, 20]):
553
+ ... with item_ctx as item:
554
+ ... out.append(item)
555
+ >>> out
556
+ [10, 20]
557
+
558
+ """
559
+ # Optional strict validation to fail fast on illegal buffer placement.
560
+ if check_loop_usage is True or (check_loop_usage is None and not self._persistence_loaded):
561
+ validate_loop_usage(self)
562
+ it = iter(iterable)
563
+ try:
564
+ try:
565
+ next_item = next(it)
566
+ except StopIteration:
567
+ # Empty iterable, exit.
568
+ # No state to persist since no iterations occurred.
569
+ return
570
+
571
+ while True:
572
+ self.iteration_count += 1
573
+ current = next_item
574
+ try:
575
+ next_item = next(it)
576
+ except StopIteration:
577
+ self.is_last_iteration = True
578
+ yield IterationContext(current)
579
+ break
580
+
581
+ yield IterationContext(current)
582
+
583
+ finally:
584
+ if self.is_last_iteration:
585
+ # Persist object-bound attributes just before saving state.
586
+ if self._object_bindings:
587
+ for state_ref, (obj, attrs) in self._object_bindings.items():
588
+ self._state_store[state_ref] = {attr: getattr(obj, attr) for attr in attrs}
589
+ # After yielding last element, persist state and stop.
590
+ # The file is created even if there is no state to persist
591
+ # (no bindings). The file is still used as a flag to indicate
592
+ # the stateful loop has been run once (see 'check_loop_usage').
593
+ LoopPersistenceIO.save(self.filepath, self._state_store)
594
+ # Clear strong references so objects can be GC'ed after save.
595
+ self._object_bindings.clear()
596
+
597
+ # --- Iteration-based buffer ID generation ---
598
+ def _get_buffer_id(self) -> int:
599
+ """
600
+ Generate buffer id depending on its call order in the iteration.
601
+
602
+ This approach is valid if all ``buffer()`` calls are at same level
603
+ in the code, within the stateful loop.
604
+
605
+ Returns
606
+ -------
607
+ int
608
+ The call position (0, 1, 2, ...) of this buffer within the
609
+ iteration. Same position across different iterations gets the same
610
+ ID.
611
+
612
+ """
613
+ # Reset counter when we enter a new iteration
614
+ if self.iteration_count != self._iteration_buffer_current:
615
+ self._iteration_buffer_current = self.iteration_count
616
+ self._iteration_buffer_count = 0
617
+ else:
618
+ self._iteration_buffer_count += 1
619
+ return self._iteration_buffer_count
620
+
621
+ def _get_or_init_state(
622
+ self,
623
+ namespace: str,
624
+ base_ref: str,
625
+ *,
626
+ initial_state: dict[str, Any],
627
+ ) -> tuple[str, dict[str, Any]]:
628
+ """
629
+ Validate states, initialize if needed, and return state ref and values.
630
+
631
+ Parameters
632
+ ----------
633
+ namespace : str
634
+ Either ``"func"`` or ``"obj"``. Used to namespace state keys.
635
+ base_ref : str
636
+ Base name for the state reference; a unique counter is appended and
637
+ prefixed by the namespace (e.g., ``func:base#1``, ``obj:base#1``).
638
+ initial_state : dict[str, Any]
639
+ Initial mapping to use when creating a new state entry.
640
+
641
+ Returns
642
+ -------
643
+ tuple[str, dict[str, Any]]
644
+ The namespaced state reference and the stored state mapping.
645
+
646
+ """
647
+ # Generate unique state reference and initialize if needed
648
+ namespaced_ref = f"{namespace}:{base_ref}"
649
+ self._state_key_counts[namespaced_ref] += 1
650
+ state_ref = f"{namespaced_ref}#{self._state_key_counts[namespaced_ref]}"
651
+ if state_ref not in self._state_store:
652
+ # Initialize new state entry with initial state.
653
+ self._state_store[state_ref] = initial_state
654
+ return state_ref, self._state_store[state_ref]