oups 2025.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of oups might be problematic. Click here for more details.

Files changed (43) hide show
  1. oups/__init__.py +40 -0
  2. oups/date_utils.py +62 -0
  3. oups/defines.py +26 -0
  4. oups/numpy_utils.py +114 -0
  5. oups/stateful_loop/__init__.py +14 -0
  6. oups/stateful_loop/loop_persistence_io.py +55 -0
  7. oups/stateful_loop/stateful_loop.py +654 -0
  8. oups/stateful_loop/validate_loop_usage.py +338 -0
  9. oups/stateful_ops/__init__.py +22 -0
  10. oups/stateful_ops/aggstream/__init__.py +12 -0
  11. oups/stateful_ops/aggstream/aggstream.py +1524 -0
  12. oups/stateful_ops/aggstream/cumsegagg.py +580 -0
  13. oups/stateful_ops/aggstream/jcumsegagg.py +416 -0
  14. oups/stateful_ops/aggstream/segmentby.py +1018 -0
  15. oups/stateful_ops/aggstream/utils.py +71 -0
  16. oups/stateful_ops/asof_merger/__init__.py +11 -0
  17. oups/stateful_ops/asof_merger/asof_merger.py +750 -0
  18. oups/stateful_ops/asof_merger/get_config.py +401 -0
  19. oups/stateful_ops/asof_merger/validate_params.py +285 -0
  20. oups/store/__init__.py +15 -0
  21. oups/store/filepath_utils.py +68 -0
  22. oups/store/indexer.py +457 -0
  23. oups/store/ordered_parquet_dataset/__init__.py +19 -0
  24. oups/store/ordered_parquet_dataset/metadata_filename.py +50 -0
  25. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/__init__.py +15 -0
  26. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/base.py +863 -0
  27. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/read_only.py +252 -0
  28. oups/store/ordered_parquet_dataset/parquet_adapter.py +157 -0
  29. oups/store/ordered_parquet_dataset/write/__init__.py +19 -0
  30. oups/store/ordered_parquet_dataset/write/iter_merge_split_data.py +131 -0
  31. oups/store/ordered_parquet_dataset/write/merge_split_strategies/__init__.py +22 -0
  32. oups/store/ordered_parquet_dataset/write/merge_split_strategies/base.py +784 -0
  33. oups/store/ordered_parquet_dataset/write/merge_split_strategies/n_rows_strategy.py +297 -0
  34. oups/store/ordered_parquet_dataset/write/merge_split_strategies/time_period_strategy.py +319 -0
  35. oups/store/ordered_parquet_dataset/write/write.py +270 -0
  36. oups/store/store/__init__.py +11 -0
  37. oups/store/store/dataset_cache.py +50 -0
  38. oups/store/store/iter_intersections.py +397 -0
  39. oups/store/store/store.py +345 -0
  40. oups-2025.9.5.dist-info/LICENSE +201 -0
  41. oups-2025.9.5.dist-info/METADATA +44 -0
  42. oups-2025.9.5.dist-info/RECORD +43 -0
  43. oups-2025.9.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,338 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ AST-based validation for ``StatefulLoop`` usage.
4
+
5
+ This module enforces the positive placement rules for ``StatefulLoop``:
6
+
7
+ - The ``for`` statement iterating over ``loop.iterate(...)`` must be
8
+ immediately followed by a ``with <item_ctx> as ...:`` statement, where
9
+ ``<item_ctx>`` is the loop target variable.
10
+ - Any ``loop.buffer(...)`` calls within that loop must appear as direct
11
+ statements inside the body of that single ``with`` block (i.e., at the same
12
+ indentation level as other top-level statements inside the ``with``), not
13
+ inside conditionals, nested loops, nested ``with`` blocks, try/except, match
14
+ blocks, comprehensions, or nested function/class bodies.
15
+
16
+ This check is intended to run before the loop begins and is strict: if the
17
+ source file cannot be read, the calling code should surface that failure.
18
+
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import ast
23
+ import inspect
24
+ from pathlib import Path
25
+ from typing import TYPE_CHECKING
26
+
27
+
28
+ if TYPE_CHECKING: # pragma: no cover
29
+ # Imported only for typing to avoid circular imports at runtime
30
+ from .stateful_loop import StatefulLoop
31
+
32
+
33
+ ILLEGAL_ANCESTOR_TYPES = (
34
+ ast.If,
35
+ ast.For,
36
+ ast.While,
37
+ ast.AsyncFor,
38
+ ast.Try,
39
+ ast.Match,
40
+ ast.FunctionDef,
41
+ ast.AsyncFunctionDef,
42
+ ast.ClassDef,
43
+ ast.Lambda,
44
+ ast.ListComp,
45
+ ast.SetComp,
46
+ ast.DictComp,
47
+ ast.GeneratorExp,
48
+ )
49
+
50
+
51
+ def _get_for_target_name(for_node: ast.For) -> str | None:
52
+ """
53
+ Return the name bound by the iteration context in the ``for`` target.
54
+
55
+ Parameters
56
+ ----------
57
+ for_node : ast.For
58
+ The ``for`` node to inspect.
59
+
60
+ Returns
61
+ -------
62
+ Optional[str]
63
+ The name bound by the loop target, or None if unsupported.
64
+
65
+ """
66
+ tgt = for_node.target
67
+ if isinstance(tgt, ast.Name):
68
+ return tgt.id
69
+ # Allow tuple-unpacking pattern produced by enumerate(...), e.g.::
70
+ # for i, item_ctx in enumerate(loop.iterate(...)):
71
+ # In this case, the iteration context variable is conventionally the
72
+ # second element.
73
+ if isinstance(tgt, ast.Tuple) and len(getattr(tgt, "elts", ())) == 2:
74
+ second = tgt.elts[1]
75
+ if isinstance(second, ast.Name):
76
+ return second.id
77
+ return None
78
+
79
+
80
+ def _find_for_node(tree: ast.AST, lineno: int) -> ast.For | None:
81
+ """
82
+ Find the ``ast.For`` node that starts at the given line number.
83
+
84
+ Parameters
85
+ ----------
86
+ tree : ast.AST
87
+ Parsed module AST.
88
+ lineno : int
89
+ Expected starting line number of the ``for`` statement.
90
+
91
+ Returns
92
+ -------
93
+ Optional[ast.For]
94
+ The matching ``For`` node, if any.
95
+
96
+ """
97
+ for node in ast.walk(tree):
98
+ if isinstance(node, ast.For) and getattr(node, "lineno", None) == lineno:
99
+ return node
100
+ return None
101
+
102
+
103
+ def _extract_iterate_target_name(for_node: ast.For) -> str | None:
104
+ """
105
+ Return the variable name used to call ``iterate`` in ``for ... in ...``.
106
+
107
+ Supports the simple dotted form ``<name>.iterate(...)`` and the common
108
+ ``enumerate(<name>.iterate(...))`` wrapper. If the iterate call target is
109
+ more complex (e.g., attribute chains), return None to skip strict
110
+ enforcement.
111
+
112
+ Parameters
113
+ ----------
114
+ for_node : ast.For
115
+ The ``for`` node to inspect.
116
+
117
+ Returns
118
+ -------
119
+ Optional[str]
120
+ The base name used for the ``iterate`` call, or None if unsupported.
121
+
122
+ """
123
+ it = for_node.iter
124
+ call_candidate: ast.AST | None = it
125
+ # Unwrap enumerate(...) if present
126
+ if isinstance(call_candidate, ast.Call) and isinstance(call_candidate.func, ast.Name):
127
+ if call_candidate.func.id == "enumerate" and call_candidate.args:
128
+ call_candidate = call_candidate.args[0]
129
+ if (
130
+ isinstance(call_candidate, ast.Call)
131
+ and isinstance(call_candidate.func, ast.Attribute)
132
+ and call_candidate.func.attr == "iterate"
133
+ ):
134
+ base = call_candidate.func.value
135
+ if isinstance(base, ast.Name):
136
+ return base.id
137
+ return None
138
+
139
+
140
+ def _is_illegal_placement(
141
+ ancestors: tuple[ast.AST, ...],
142
+ *,
143
+ allowed_with: ast.With | None,
144
+ ) -> bool:
145
+ """
146
+ Return True if the call is not directly under the required ``with`` block.
147
+
148
+ Rules:
149
+ - If ``allowed_with`` is None, any placement is illegal (used to flag calls
150
+ outside the first required ``with``).
151
+ - Otherwise, since traversal starts at a single top-level statement inside
152
+ the required ``with`` body, we accept when no control-flow nodes appear
153
+ among ancestors. Control-flow nodes are ``If``, ``For``, ``While``,
154
+ ``AsyncFor``, ``Try``, ``Match``, function/class/lambda defs, or
155
+ comprehensions. This ensures the call is not nested within such blocks.
156
+
157
+ """
158
+ if allowed_with is None:
159
+ return True
160
+ # Since we traverse from a single top-level statement inside the 'with'
161
+ # body, simply ensure there is no control-flow node among ancestors.
162
+ for a in ancestors:
163
+ if isinstance(a, ILLEGAL_ANCESTOR_TYPES):
164
+ return True
165
+ return False
166
+
167
+
168
+ def _first_offending_buffer_call(
169
+ node: ast.AST,
170
+ loop_var_name: str,
171
+ *,
172
+ allowed_with: ast.With | None,
173
+ ) -> ast.Call | None:
174
+ """
175
+ Find and return the first illegal ``loop.buffer(...)`` call under ``node``.
176
+
177
+ Parameters
178
+ ----------
179
+ node : ast.AST
180
+ Root node to inspect (typically a ``For`` body statement).
181
+ loop_var_name : str
182
+ The variable name on which ``iterate`` was called (e.g., ``loop``).
183
+
184
+ Returns
185
+ -------
186
+ Optional[ast.Call]
187
+ The offending call node, if found.
188
+
189
+ """
190
+ stack: list[tuple[ast.AST, tuple[ast.AST, ...]]] = [(node, ())]
191
+ while stack:
192
+ current, ancestors = stack.pop()
193
+ if isinstance(current, ast.Call) and isinstance(
194
+ getattr(current, "func", None),
195
+ ast.Attribute,
196
+ ):
197
+ attr = current.func
198
+ if attr.attr == "buffer" and isinstance(attr.value, ast.Name) and attr.value.id == loop_var_name:
199
+ if _is_illegal_placement(ancestors, allowed_with=allowed_with):
200
+ return current
201
+ for child in ast.iter_child_nodes(current):
202
+ stack.append((child, ancestors + (current,)))
203
+ return None
204
+
205
+
206
+ def _parse_and_find_for(filename: str, for_lineno: int) -> ast.For:
207
+ """
208
+ Parse source and locate the for-node at the call site.
209
+ """
210
+ source = Path(filename).read_text()
211
+ tree = ast.parse(source, filename)
212
+ for_node = _find_for_node(tree, for_lineno)
213
+ if for_node is None:
214
+ raise ValueError(
215
+ "StatefulLoop.iterate() usage validation failed: could not locate the 'for' "
216
+ "statement at the call site. Ensure you call iterate() directly in a for "
217
+ "header, e.g., 'for item_ctx in loop.iterate(...)'.",
218
+ )
219
+ return for_node
220
+
221
+
222
+ def _validate_header_requirements(for_node: ast.For) -> tuple[str, ast.With]:
223
+ """
224
+ Validate the ``for`` header and first ``with`` block.
225
+
226
+ Returns
227
+ -------
228
+ tuple[str, ast.With]
229
+ The loop variable name and the first ``with`` block node.
230
+
231
+ """
232
+ loop_var_name = _extract_iterate_target_name(for_node)
233
+ if loop_var_name is None:
234
+ raise ValueError(
235
+ "StatefulLoop.iterate() usage validation failed: expected '<name>.iterate(...)' "
236
+ "in the for header. Complex expressions are not supported for this validation.",
237
+ )
238
+ target_name = _get_for_target_name(for_node)
239
+ if target_name is None:
240
+ raise ValueError(
241
+ "StatefulLoop.iterate() usage validation failed: requires binding the iteration "
242
+ "context to a simple name, e.g., 'for item_ctx in loop.iterate(...):'. ",
243
+ )
244
+ if not for_node.body or not isinstance(for_node.body[0], ast.With):
245
+ raise ValueError(
246
+ "StatefulLoop.iterate() usage validation failed: using a with statement is "
247
+ "compulsory immediately inside the iterate-for body, e.g., "
248
+ "'with item_ctx as item:'.",
249
+ )
250
+ with_node = for_node.body[0]
251
+ if len(with_node.items) != 1:
252
+ raise ValueError(
253
+ "StatefulLoop.iterate() usage validation failed: the first with statement must "
254
+ "have a single context manager using the loop target name, e.g., "
255
+ "'with item_ctx as item:'.",
256
+ )
257
+ with_item = with_node.items[0]
258
+ if not isinstance(with_item.context_expr, ast.Name) or with_item.context_expr.id != target_name:
259
+ raise ValueError(
260
+ "StatefulLoop.iterate() usage validation failed: the first with statement in the "
261
+ "iterate-for body must use the iteration context variable, e.g., "
262
+ "'with item_ctx as item:'.",
263
+ )
264
+ return loop_var_name, with_node
265
+
266
+
267
+ def _enforce_no_buffer_outside_with(for_node: ast.For, loop_var_name: str, filename: str) -> None:
268
+ """
269
+ Disallow buffer calls outside the first-with block.
270
+ """
271
+ for outer_stmt in for_node.body[1:]:
272
+ offender = _first_offending_buffer_call(outer_stmt, loop_var_name, allowed_with=None)
273
+ if offender is not None:
274
+ raise ValueError(
275
+ "StatefulLoop.buffer() placement validation failed: must be called directly inside "
276
+ "the first 'with item_ctx as ...:' block (offending call at "
277
+ f"{filename}:{offender.lineno}).",
278
+ )
279
+
280
+
281
+ def _enforce_top_level_inside_with(with_node: ast.With, loop_var_name: str, filename: str) -> None:
282
+ """
283
+ Allow buffer only as top-level statements inside the with block.
284
+ """
285
+ for inner_stmt in with_node.body:
286
+ offender = _first_offending_buffer_call(inner_stmt, loop_var_name, allowed_with=with_node)
287
+ if offender is not None:
288
+ raise ValueError(
289
+ "StatefulLoop.buffer() placement validation failed: must be at the same "
290
+ "indentation level as other top-level statements inside the "
291
+ "'with item_ctx as ...:' "
292
+ f"block (offending call at {filename}:{offender.lineno}). "
293
+ "Avoid conditionals/loops/nested blocks.",
294
+ )
295
+
296
+
297
+ def validate_loop_usage(loop: StatefulLoop) -> None:
298
+ """
299
+ Validate placement rules for ``StatefulLoop.buffer`` at iterate callsite.
300
+
301
+ The validation enforces the following constraints for the loop where
302
+ ``for item_ctx in loop.iterate(...):`` appears:
303
+
304
+ - The first statement in the loop body must be a single ``with`` statement
305
+ using the iteration context variable, e.g., ``with item_ctx as item:``.
306
+ - Any calls to ``loop.buffer(...)`` must be top-level statements inside
307
+ that ``with`` block (i.e., not nested inside conditionals/loops/try/with
308
+ blocks, functions/classes/lambdas, or comprehensions).
309
+ - No ``loop.buffer(...)`` call can appear outside that first ``with`` block
310
+ within the same loop.
311
+
312
+ Parameters
313
+ ----------
314
+ loop : StatefulLoop
315
+ The loop instance for which placement is being validated.
316
+
317
+ Raises
318
+ ------
319
+ ValueError
320
+ If the structure of the loop body or the placement of ``buffer`` calls
321
+ violates the rules above.
322
+
323
+ Notes
324
+ -----
325
+ - This check analyzes the caller's source file using the AST and is intended
326
+ to run before the first iteration as a fail-fast safeguard.
327
+ - The function assumes Python 3.10+ AST (e.g., presence of ``ast.Match``).
328
+
329
+ """
330
+ frame = inspect.currentframe().f_back.f_back
331
+ filename = frame.f_code.co_filename
332
+ for_lineno = frame.f_lineno
333
+ # Validate StatefulLoop.iterate() usage.
334
+ for_node = _parse_and_find_for(filename, for_lineno)
335
+ loop_var_name, with_node = _validate_header_requirements(for_node)
336
+ # Validate StatefulLoop.buffer() usage.
337
+ _enforce_no_buffer_outside_with(for_node, loop_var_name, filename)
338
+ _enforce_top_level_inside_with(with_node, loop_var_name, filename)
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Created on Sat Jun 28 18:35:00 2025.
4
+
5
+ @author: pierrot
6
+
7
+ """
8
+ import sys
9
+
10
+
11
+ # Avoid importing aggstream during Sphinx autodoc builds (numba dependency)
12
+ if "sphinx" in sys.modules:
13
+ AggStream = None
14
+ by_x_rows = None
15
+ else:
16
+ from .aggstream import AggStream # type: ignore
17
+ from .aggstream import by_x_rows # type: ignore
18
+
19
+ from .asof_merger import AsofMerger
20
+
21
+
22
+ __all__ = ["AggStream", "AsofMerger", "by_x_rows"]
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Created on Wed Dec 1 18:35:00 2021.
4
+
5
+ @author: pierrot
6
+
7
+ """
8
+ from .aggstream import AggStream
9
+ from .segmentby import by_x_rows
10
+
11
+
12
+ __all__ = ["AggStream", "by_x_rows"]