oups 2025.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of oups might be problematic. Click here for more details.
- oups/__init__.py +40 -0
- oups/date_utils.py +62 -0
- oups/defines.py +26 -0
- oups/numpy_utils.py +114 -0
- oups/stateful_loop/__init__.py +14 -0
- oups/stateful_loop/loop_persistence_io.py +55 -0
- oups/stateful_loop/stateful_loop.py +654 -0
- oups/stateful_loop/validate_loop_usage.py +338 -0
- oups/stateful_ops/__init__.py +22 -0
- oups/stateful_ops/aggstream/__init__.py +12 -0
- oups/stateful_ops/aggstream/aggstream.py +1524 -0
- oups/stateful_ops/aggstream/cumsegagg.py +580 -0
- oups/stateful_ops/aggstream/jcumsegagg.py +416 -0
- oups/stateful_ops/aggstream/segmentby.py +1018 -0
- oups/stateful_ops/aggstream/utils.py +71 -0
- oups/stateful_ops/asof_merger/__init__.py +11 -0
- oups/stateful_ops/asof_merger/asof_merger.py +750 -0
- oups/stateful_ops/asof_merger/get_config.py +401 -0
- oups/stateful_ops/asof_merger/validate_params.py +285 -0
- oups/store/__init__.py +15 -0
- oups/store/filepath_utils.py +68 -0
- oups/store/indexer.py +457 -0
- oups/store/ordered_parquet_dataset/__init__.py +19 -0
- oups/store/ordered_parquet_dataset/metadata_filename.py +50 -0
- oups/store/ordered_parquet_dataset/ordered_parquet_dataset/__init__.py +15 -0
- oups/store/ordered_parquet_dataset/ordered_parquet_dataset/base.py +863 -0
- oups/store/ordered_parquet_dataset/ordered_parquet_dataset/read_only.py +252 -0
- oups/store/ordered_parquet_dataset/parquet_adapter.py +157 -0
- oups/store/ordered_parquet_dataset/write/__init__.py +19 -0
- oups/store/ordered_parquet_dataset/write/iter_merge_split_data.py +131 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/__init__.py +22 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/base.py +784 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/n_rows_strategy.py +297 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/time_period_strategy.py +319 -0
- oups/store/ordered_parquet_dataset/write/write.py +270 -0
- oups/store/store/__init__.py +11 -0
- oups/store/store/dataset_cache.py +50 -0
- oups/store/store/iter_intersections.py +397 -0
- oups/store/store/store.py +345 -0
- oups-2025.9.5.dist-info/LICENSE +201 -0
- oups-2025.9.5.dist-info/METADATA +44 -0
- oups-2025.9.5.dist-info/RECORD +43 -0
- oups-2025.9.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
AST-based validation for ``StatefulLoop`` usage.
|
|
4
|
+
|
|
5
|
+
This module enforces the positive placement rules for ``StatefulLoop``:
|
|
6
|
+
|
|
7
|
+
- The ``for`` statement iterating over ``loop.iterate(...)`` must be
|
|
8
|
+
immediately followed by a ``with <item_ctx> as ...:`` statement, where
|
|
9
|
+
``<item_ctx>`` is the loop target variable.
|
|
10
|
+
- Any ``loop.buffer(...)`` calls within that loop must appear as direct
|
|
11
|
+
statements inside the body of that single ``with`` block (i.e., at the same
|
|
12
|
+
indentation level as other top-level statements inside the ``with``), not
|
|
13
|
+
inside conditionals, nested loops, nested ``with`` blocks, try/except, match
|
|
14
|
+
blocks, comprehensions, or nested function/class bodies.
|
|
15
|
+
|
|
16
|
+
This check is intended to run before the loop begins and is strict: if the
|
|
17
|
+
source file cannot be read, the calling code should surface that failure.
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import ast
|
|
23
|
+
import inspect
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import TYPE_CHECKING
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
29
|
+
# Imported only for typing to avoid circular imports at runtime
|
|
30
|
+
from .stateful_loop import StatefulLoop
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
ILLEGAL_ANCESTOR_TYPES = (
|
|
34
|
+
ast.If,
|
|
35
|
+
ast.For,
|
|
36
|
+
ast.While,
|
|
37
|
+
ast.AsyncFor,
|
|
38
|
+
ast.Try,
|
|
39
|
+
ast.Match,
|
|
40
|
+
ast.FunctionDef,
|
|
41
|
+
ast.AsyncFunctionDef,
|
|
42
|
+
ast.ClassDef,
|
|
43
|
+
ast.Lambda,
|
|
44
|
+
ast.ListComp,
|
|
45
|
+
ast.SetComp,
|
|
46
|
+
ast.DictComp,
|
|
47
|
+
ast.GeneratorExp,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_for_target_name(for_node: ast.For) -> str | None:
|
|
52
|
+
"""
|
|
53
|
+
Return the name bound by the iteration context in the ``for`` target.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
for_node : ast.For
|
|
58
|
+
The ``for`` node to inspect.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
Optional[str]
|
|
63
|
+
The name bound by the loop target, or None if unsupported.
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
tgt = for_node.target
|
|
67
|
+
if isinstance(tgt, ast.Name):
|
|
68
|
+
return tgt.id
|
|
69
|
+
# Allow tuple-unpacking pattern produced by enumerate(...), e.g.::
|
|
70
|
+
# for i, item_ctx in enumerate(loop.iterate(...)):
|
|
71
|
+
# In this case, the iteration context variable is conventionally the
|
|
72
|
+
# second element.
|
|
73
|
+
if isinstance(tgt, ast.Tuple) and len(getattr(tgt, "elts", ())) == 2:
|
|
74
|
+
second = tgt.elts[1]
|
|
75
|
+
if isinstance(second, ast.Name):
|
|
76
|
+
return second.id
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _find_for_node(tree: ast.AST, lineno: int) -> ast.For | None:
|
|
81
|
+
"""
|
|
82
|
+
Find the ``ast.For`` node that starts at the given line number.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
tree : ast.AST
|
|
87
|
+
Parsed module AST.
|
|
88
|
+
lineno : int
|
|
89
|
+
Expected starting line number of the ``for`` statement.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
Optional[ast.For]
|
|
94
|
+
The matching ``For`` node, if any.
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
for node in ast.walk(tree):
|
|
98
|
+
if isinstance(node, ast.For) and getattr(node, "lineno", None) == lineno:
|
|
99
|
+
return node
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _extract_iterate_target_name(for_node: ast.For) -> str | None:
|
|
104
|
+
"""
|
|
105
|
+
Return the variable name used to call ``iterate`` in ``for ... in ...``.
|
|
106
|
+
|
|
107
|
+
Supports the simple dotted form ``<name>.iterate(...)`` and the common
|
|
108
|
+
``enumerate(<name>.iterate(...))`` wrapper. If the iterate call target is
|
|
109
|
+
more complex (e.g., attribute chains), return None to skip strict
|
|
110
|
+
enforcement.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
for_node : ast.For
|
|
115
|
+
The ``for`` node to inspect.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
Optional[str]
|
|
120
|
+
The base name used for the ``iterate`` call, or None if unsupported.
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
it = for_node.iter
|
|
124
|
+
call_candidate: ast.AST | None = it
|
|
125
|
+
# Unwrap enumerate(...) if present
|
|
126
|
+
if isinstance(call_candidate, ast.Call) and isinstance(call_candidate.func, ast.Name):
|
|
127
|
+
if call_candidate.func.id == "enumerate" and call_candidate.args:
|
|
128
|
+
call_candidate = call_candidate.args[0]
|
|
129
|
+
if (
|
|
130
|
+
isinstance(call_candidate, ast.Call)
|
|
131
|
+
and isinstance(call_candidate.func, ast.Attribute)
|
|
132
|
+
and call_candidate.func.attr == "iterate"
|
|
133
|
+
):
|
|
134
|
+
base = call_candidate.func.value
|
|
135
|
+
if isinstance(base, ast.Name):
|
|
136
|
+
return base.id
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _is_illegal_placement(
|
|
141
|
+
ancestors: tuple[ast.AST, ...],
|
|
142
|
+
*,
|
|
143
|
+
allowed_with: ast.With | None,
|
|
144
|
+
) -> bool:
|
|
145
|
+
"""
|
|
146
|
+
Return True if the call is not directly under the required ``with`` block.
|
|
147
|
+
|
|
148
|
+
Rules:
|
|
149
|
+
- If ``allowed_with`` is None, any placement is illegal (used to flag calls
|
|
150
|
+
outside the first required ``with``).
|
|
151
|
+
- Otherwise, since traversal starts at a single top-level statement inside
|
|
152
|
+
the required ``with`` body, we accept when no control-flow nodes appear
|
|
153
|
+
among ancestors. Control-flow nodes are ``If``, ``For``, ``While``,
|
|
154
|
+
``AsyncFor``, ``Try``, ``Match``, function/class/lambda defs, or
|
|
155
|
+
comprehensions. This ensures the call is not nested within such blocks.
|
|
156
|
+
|
|
157
|
+
"""
|
|
158
|
+
if allowed_with is None:
|
|
159
|
+
return True
|
|
160
|
+
# Since we traverse from a single top-level statement inside the 'with'
|
|
161
|
+
# body, simply ensure there is no control-flow node among ancestors.
|
|
162
|
+
for a in ancestors:
|
|
163
|
+
if isinstance(a, ILLEGAL_ANCESTOR_TYPES):
|
|
164
|
+
return True
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _first_offending_buffer_call(
|
|
169
|
+
node: ast.AST,
|
|
170
|
+
loop_var_name: str,
|
|
171
|
+
*,
|
|
172
|
+
allowed_with: ast.With | None,
|
|
173
|
+
) -> ast.Call | None:
|
|
174
|
+
"""
|
|
175
|
+
Find and return the first illegal ``loop.buffer(...)`` call under ``node``.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
node : ast.AST
|
|
180
|
+
Root node to inspect (typically a ``For`` body statement).
|
|
181
|
+
loop_var_name : str
|
|
182
|
+
The variable name on which ``iterate`` was called (e.g., ``loop``).
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
Optional[ast.Call]
|
|
187
|
+
The offending call node, if found.
|
|
188
|
+
|
|
189
|
+
"""
|
|
190
|
+
stack: list[tuple[ast.AST, tuple[ast.AST, ...]]] = [(node, ())]
|
|
191
|
+
while stack:
|
|
192
|
+
current, ancestors = stack.pop()
|
|
193
|
+
if isinstance(current, ast.Call) and isinstance(
|
|
194
|
+
getattr(current, "func", None),
|
|
195
|
+
ast.Attribute,
|
|
196
|
+
):
|
|
197
|
+
attr = current.func
|
|
198
|
+
if attr.attr == "buffer" and isinstance(attr.value, ast.Name) and attr.value.id == loop_var_name:
|
|
199
|
+
if _is_illegal_placement(ancestors, allowed_with=allowed_with):
|
|
200
|
+
return current
|
|
201
|
+
for child in ast.iter_child_nodes(current):
|
|
202
|
+
stack.append((child, ancestors + (current,)))
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _parse_and_find_for(filename: str, for_lineno: int) -> ast.For:
|
|
207
|
+
"""
|
|
208
|
+
Parse source and locate the for-node at the call site.
|
|
209
|
+
"""
|
|
210
|
+
source = Path(filename).read_text()
|
|
211
|
+
tree = ast.parse(source, filename)
|
|
212
|
+
for_node = _find_for_node(tree, for_lineno)
|
|
213
|
+
if for_node is None:
|
|
214
|
+
raise ValueError(
|
|
215
|
+
"StatefulLoop.iterate() usage validation failed: could not locate the 'for' "
|
|
216
|
+
"statement at the call site. Ensure you call iterate() directly in a for "
|
|
217
|
+
"header, e.g., 'for item_ctx in loop.iterate(...)'.",
|
|
218
|
+
)
|
|
219
|
+
return for_node
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _validate_header_requirements(for_node: ast.For) -> tuple[str, ast.With]:
|
|
223
|
+
"""
|
|
224
|
+
Validate the ``for`` header and first ``with`` block.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
tuple[str, ast.With]
|
|
229
|
+
The loop variable name and the first ``with`` block node.
|
|
230
|
+
|
|
231
|
+
"""
|
|
232
|
+
loop_var_name = _extract_iterate_target_name(for_node)
|
|
233
|
+
if loop_var_name is None:
|
|
234
|
+
raise ValueError(
|
|
235
|
+
"StatefulLoop.iterate() usage validation failed: expected '<name>.iterate(...)' "
|
|
236
|
+
"in the for header. Complex expressions are not supported for this validation.",
|
|
237
|
+
)
|
|
238
|
+
target_name = _get_for_target_name(for_node)
|
|
239
|
+
if target_name is None:
|
|
240
|
+
raise ValueError(
|
|
241
|
+
"StatefulLoop.iterate() usage validation failed: requires binding the iteration "
|
|
242
|
+
"context to a simple name, e.g., 'for item_ctx in loop.iterate(...):'. ",
|
|
243
|
+
)
|
|
244
|
+
if not for_node.body or not isinstance(for_node.body[0], ast.With):
|
|
245
|
+
raise ValueError(
|
|
246
|
+
"StatefulLoop.iterate() usage validation failed: using a with statement is "
|
|
247
|
+
"compulsory immediately inside the iterate-for body, e.g., "
|
|
248
|
+
"'with item_ctx as item:'.",
|
|
249
|
+
)
|
|
250
|
+
with_node = for_node.body[0]
|
|
251
|
+
if len(with_node.items) != 1:
|
|
252
|
+
raise ValueError(
|
|
253
|
+
"StatefulLoop.iterate() usage validation failed: the first with statement must "
|
|
254
|
+
"have a single context manager using the loop target name, e.g., "
|
|
255
|
+
"'with item_ctx as item:'.",
|
|
256
|
+
)
|
|
257
|
+
with_item = with_node.items[0]
|
|
258
|
+
if not isinstance(with_item.context_expr, ast.Name) or with_item.context_expr.id != target_name:
|
|
259
|
+
raise ValueError(
|
|
260
|
+
"StatefulLoop.iterate() usage validation failed: the first with statement in the "
|
|
261
|
+
"iterate-for body must use the iteration context variable, e.g., "
|
|
262
|
+
"'with item_ctx as item:'.",
|
|
263
|
+
)
|
|
264
|
+
return loop_var_name, with_node
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _enforce_no_buffer_outside_with(for_node: ast.For, loop_var_name: str, filename: str) -> None:
|
|
268
|
+
"""
|
|
269
|
+
Disallow buffer calls outside the first-with block.
|
|
270
|
+
"""
|
|
271
|
+
for outer_stmt in for_node.body[1:]:
|
|
272
|
+
offender = _first_offending_buffer_call(outer_stmt, loop_var_name, allowed_with=None)
|
|
273
|
+
if offender is not None:
|
|
274
|
+
raise ValueError(
|
|
275
|
+
"StatefulLoop.buffer() placement validation failed: must be called directly inside "
|
|
276
|
+
"the first 'with item_ctx as ...:' block (offending call at "
|
|
277
|
+
f"{filename}:{offender.lineno}).",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _enforce_top_level_inside_with(with_node: ast.With, loop_var_name: str, filename: str) -> None:
|
|
282
|
+
"""
|
|
283
|
+
Allow buffer only as top-level statements inside the with block.
|
|
284
|
+
"""
|
|
285
|
+
for inner_stmt in with_node.body:
|
|
286
|
+
offender = _first_offending_buffer_call(inner_stmt, loop_var_name, allowed_with=with_node)
|
|
287
|
+
if offender is not None:
|
|
288
|
+
raise ValueError(
|
|
289
|
+
"StatefulLoop.buffer() placement validation failed: must be at the same "
|
|
290
|
+
"indentation level as other top-level statements inside the "
|
|
291
|
+
"'with item_ctx as ...:' "
|
|
292
|
+
f"block (offending call at {filename}:{offender.lineno}). "
|
|
293
|
+
"Avoid conditionals/loops/nested blocks.",
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def validate_loop_usage(loop: StatefulLoop) -> None:
|
|
298
|
+
"""
|
|
299
|
+
Validate placement rules for ``StatefulLoop.buffer`` at iterate callsite.
|
|
300
|
+
|
|
301
|
+
The validation enforces the following constraints for the loop where
|
|
302
|
+
``for item_ctx in loop.iterate(...):`` appears:
|
|
303
|
+
|
|
304
|
+
- The first statement in the loop body must be a single ``with`` statement
|
|
305
|
+
using the iteration context variable, e.g., ``with item_ctx as item:``.
|
|
306
|
+
- Any calls to ``loop.buffer(...)`` must be top-level statements inside
|
|
307
|
+
that ``with`` block (i.e., not nested inside conditionals/loops/try/with
|
|
308
|
+
blocks, functions/classes/lambdas, or comprehensions).
|
|
309
|
+
- No ``loop.buffer(...)`` call can appear outside that first ``with`` block
|
|
310
|
+
within the same loop.
|
|
311
|
+
|
|
312
|
+
Parameters
|
|
313
|
+
----------
|
|
314
|
+
loop : StatefulLoop
|
|
315
|
+
The loop instance for which placement is being validated.
|
|
316
|
+
|
|
317
|
+
Raises
|
|
318
|
+
------
|
|
319
|
+
ValueError
|
|
320
|
+
If the structure of the loop body or the placement of ``buffer`` calls
|
|
321
|
+
violates the rules above.
|
|
322
|
+
|
|
323
|
+
Notes
|
|
324
|
+
-----
|
|
325
|
+
- This check analyzes the caller's source file using the AST and is intended
|
|
326
|
+
to run before the first iteration as a fail-fast safeguard.
|
|
327
|
+
- The function assumes Python 3.10+ AST (e.g., presence of ``ast.Match``).
|
|
328
|
+
|
|
329
|
+
"""
|
|
330
|
+
frame = inspect.currentframe().f_back.f_back
|
|
331
|
+
filename = frame.f_code.co_filename
|
|
332
|
+
for_lineno = frame.f_lineno
|
|
333
|
+
# Validate StatefulLoop.iterate() usage.
|
|
334
|
+
for_node = _parse_and_find_for(filename, for_lineno)
|
|
335
|
+
loop_var_name, with_node = _validate_header_requirements(for_node)
|
|
336
|
+
# Validate StatefulLoop.buffer() usage.
|
|
337
|
+
_enforce_no_buffer_outside_with(for_node, loop_var_name, filename)
|
|
338
|
+
_enforce_top_level_inside_with(with_node, loop_var_name, filename)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Created on Sat Jun 28 18:35:00 2025.
|
|
4
|
+
|
|
5
|
+
@author: pierrot
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Avoid importing aggstream during Sphinx autodoc builds (numba dependency)
|
|
12
|
+
if "sphinx" in sys.modules:
|
|
13
|
+
AggStream = None
|
|
14
|
+
by_x_rows = None
|
|
15
|
+
else:
|
|
16
|
+
from .aggstream import AggStream # type: ignore
|
|
17
|
+
from .aggstream import by_x_rows # type: ignore
|
|
18
|
+
|
|
19
|
+
from .asof_merger import AsofMerger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
__all__ = ["AggStream", "AsofMerger", "by_x_rows"]
|