oups 2025.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of oups might be problematic. Click here for more details.
- oups/__init__.py +40 -0
- oups/date_utils.py +62 -0
- oups/defines.py +26 -0
- oups/numpy_utils.py +114 -0
- oups/stateful_loop/__init__.py +14 -0
- oups/stateful_loop/loop_persistence_io.py +55 -0
- oups/stateful_loop/stateful_loop.py +654 -0
- oups/stateful_loop/validate_loop_usage.py +338 -0
- oups/stateful_ops/__init__.py +22 -0
- oups/stateful_ops/aggstream/__init__.py +12 -0
- oups/stateful_ops/aggstream/aggstream.py +1524 -0
- oups/stateful_ops/aggstream/cumsegagg.py +580 -0
- oups/stateful_ops/aggstream/jcumsegagg.py +416 -0
- oups/stateful_ops/aggstream/segmentby.py +1018 -0
- oups/stateful_ops/aggstream/utils.py +71 -0
- oups/stateful_ops/asof_merger/__init__.py +11 -0
- oups/stateful_ops/asof_merger/asof_merger.py +750 -0
- oups/stateful_ops/asof_merger/get_config.py +401 -0
- oups/stateful_ops/asof_merger/validate_params.py +285 -0
- oups/store/__init__.py +15 -0
- oups/store/filepath_utils.py +68 -0
- oups/store/indexer.py +457 -0
- oups/store/ordered_parquet_dataset/__init__.py +19 -0
- oups/store/ordered_parquet_dataset/metadata_filename.py +50 -0
- oups/store/ordered_parquet_dataset/ordered_parquet_dataset/__init__.py +15 -0
- oups/store/ordered_parquet_dataset/ordered_parquet_dataset/base.py +863 -0
- oups/store/ordered_parquet_dataset/ordered_parquet_dataset/read_only.py +252 -0
- oups/store/ordered_parquet_dataset/parquet_adapter.py +157 -0
- oups/store/ordered_parquet_dataset/write/__init__.py +19 -0
- oups/store/ordered_parquet_dataset/write/iter_merge_split_data.py +131 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/__init__.py +22 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/base.py +784 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/n_rows_strategy.py +297 -0
- oups/store/ordered_parquet_dataset/write/merge_split_strategies/time_period_strategy.py +319 -0
- oups/store/ordered_parquet_dataset/write/write.py +270 -0
- oups/store/store/__init__.py +11 -0
- oups/store/store/dataset_cache.py +50 -0
- oups/store/store/iter_intersections.py +397 -0
- oups/store/store/store.py +345 -0
- oups-2025.9.5.dist-info/LICENSE +201 -0
- oups-2025.9.5.dist-info/METADATA +44 -0
- oups-2025.9.5.dist-info/RECORD +43 -0
- oups-2025.9.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Created on Wed Jan 24 21:30:00 2024.
|
|
4
|
+
|
|
5
|
+
@author: pierrot
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
import operator
|
|
9
|
+
|
|
10
|
+
from numpy import ones
|
|
11
|
+
from numpy import zeros
|
|
12
|
+
from numpy.typing import NDArray
|
|
13
|
+
from pandas import DataFrame
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
ops = {
|
|
17
|
+
"==": operator.eq,
|
|
18
|
+
"=": operator.eq,
|
|
19
|
+
"!=": operator.ne,
|
|
20
|
+
">": operator.gt,
|
|
21
|
+
">=": operator.ge,
|
|
22
|
+
"<": operator.lt,
|
|
23
|
+
"<=": operator.le,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def dataframe_filter(df: DataFrame, filters) -> NDArray:
|
|
28
|
+
"""
|
|
29
|
+
Produce a column filter of the input dataframe.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
df : DataFrame
|
|
34
|
+
DataFrame to filter.
|
|
35
|
+
filters : list[list[tuple]]
|
|
36
|
+
To filter out data from seed.
|
|
37
|
+
Filter syntax: [[(column, op, val), ...],...]
|
|
38
|
+
where op is [==, =, >, >=, <, <=, !=, in, not in, ~]
|
|
39
|
+
The innermost tuples are transposed into a set of filters applied
|
|
40
|
+
through an `AND` operation.
|
|
41
|
+
The outer list combines these sets of filters through an `OR` operation.
|
|
42
|
+
A single list of tuples can also be used, meaning that no `OR` operation
|
|
43
|
+
between set of filters is to be conducted.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
numpy 1D-array
|
|
48
|
+
Array of rows to keep (boolean mask).
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
if isinstance(filters[0], tuple):
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"not possible to have a 'filters' parameter without at least an inner list.",
|
|
54
|
+
)
|
|
55
|
+
out = zeros(len(df), dtype=bool)
|
|
56
|
+
for or_part in filters:
|
|
57
|
+
and_part = ones(len(df), dtype=bool)
|
|
58
|
+
for name, op, val in or_part:
|
|
59
|
+
if op == "in":
|
|
60
|
+
and_part &= df[name].isin(val).values
|
|
61
|
+
elif op == "not in":
|
|
62
|
+
and_part &= ~df[name].isin(val).values
|
|
63
|
+
elif op in ops:
|
|
64
|
+
and_part &= ops[op](df[name].values, val)
|
|
65
|
+
elif op == "~":
|
|
66
|
+
and_part &= ~df[name].values
|
|
67
|
+
else:
|
|
68
|
+
# Unknown operator.
|
|
69
|
+
raise ValueError(f"operator '{op}' is not supported.")
|
|
70
|
+
out |= and_part
|
|
71
|
+
return out
|