oups 2025.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of oups might be problematic. Click here for more details.

Files changed (43) hide show
  1. oups/__init__.py +40 -0
  2. oups/date_utils.py +62 -0
  3. oups/defines.py +26 -0
  4. oups/numpy_utils.py +114 -0
  5. oups/stateful_loop/__init__.py +14 -0
  6. oups/stateful_loop/loop_persistence_io.py +55 -0
  7. oups/stateful_loop/stateful_loop.py +654 -0
  8. oups/stateful_loop/validate_loop_usage.py +338 -0
  9. oups/stateful_ops/__init__.py +22 -0
  10. oups/stateful_ops/aggstream/__init__.py +12 -0
  11. oups/stateful_ops/aggstream/aggstream.py +1524 -0
  12. oups/stateful_ops/aggstream/cumsegagg.py +580 -0
  13. oups/stateful_ops/aggstream/jcumsegagg.py +416 -0
  14. oups/stateful_ops/aggstream/segmentby.py +1018 -0
  15. oups/stateful_ops/aggstream/utils.py +71 -0
  16. oups/stateful_ops/asof_merger/__init__.py +11 -0
  17. oups/stateful_ops/asof_merger/asof_merger.py +750 -0
  18. oups/stateful_ops/asof_merger/get_config.py +401 -0
  19. oups/stateful_ops/asof_merger/validate_params.py +285 -0
  20. oups/store/__init__.py +15 -0
  21. oups/store/filepath_utils.py +68 -0
  22. oups/store/indexer.py +457 -0
  23. oups/store/ordered_parquet_dataset/__init__.py +19 -0
  24. oups/store/ordered_parquet_dataset/metadata_filename.py +50 -0
  25. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/__init__.py +15 -0
  26. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/base.py +863 -0
  27. oups/store/ordered_parquet_dataset/ordered_parquet_dataset/read_only.py +252 -0
  28. oups/store/ordered_parquet_dataset/parquet_adapter.py +157 -0
  29. oups/store/ordered_parquet_dataset/write/__init__.py +19 -0
  30. oups/store/ordered_parquet_dataset/write/iter_merge_split_data.py +131 -0
  31. oups/store/ordered_parquet_dataset/write/merge_split_strategies/__init__.py +22 -0
  32. oups/store/ordered_parquet_dataset/write/merge_split_strategies/base.py +784 -0
  33. oups/store/ordered_parquet_dataset/write/merge_split_strategies/n_rows_strategy.py +297 -0
  34. oups/store/ordered_parquet_dataset/write/merge_split_strategies/time_period_strategy.py +319 -0
  35. oups/store/ordered_parquet_dataset/write/write.py +270 -0
  36. oups/store/store/__init__.py +11 -0
  37. oups/store/store/dataset_cache.py +50 -0
  38. oups/store/store/iter_intersections.py +397 -0
  39. oups/store/store/store.py +345 -0
  40. oups-2025.9.5.dist-info/LICENSE +201 -0
  41. oups-2025.9.5.dist-info/METADATA +44 -0
  42. oups-2025.9.5.dist-info/RECORD +43 -0
  43. oups-2025.9.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Created on Wed Jan 24 21:30:00 2024.
4
+
5
+ @author: pierrot
6
+
7
+ """
8
+ import operator
9
+
10
+ from numpy import ones
11
+ from numpy import zeros
12
+ from numpy.typing import NDArray
13
+ from pandas import DataFrame
14
+
15
+
16
+ ops = {
17
+ "==": operator.eq,
18
+ "=": operator.eq,
19
+ "!=": operator.ne,
20
+ ">": operator.gt,
21
+ ">=": operator.ge,
22
+ "<": operator.lt,
23
+ "<=": operator.le,
24
+ }
25
+
26
+
27
+ def dataframe_filter(df: DataFrame, filters) -> NDArray:
28
+ """
29
+ Produce a column filter of the input dataframe.
30
+
31
+ Parameters
32
+ ----------
33
+ df : DataFrame
34
+ DataFrame to filter.
35
+ filters : list[list[tuple]]
36
+ To filter out data from seed.
37
+ Filter syntax: [[(column, op, val), ...],...]
38
+ where op is [==, =, >, >=, <, <=, !=, in, not in, ~]
39
+ The innermost tuples are transposed into a set of filters applied
40
+ through an `AND` operation.
41
+ The outer list combines these sets of filters through an `OR` operation.
42
+ A single list of tuples can also be used, meaning that no `OR` operation
43
+ between set of filters is to be conducted.
44
+
45
+ Returns
46
+ -------
47
+ numpy 1D-array
48
+ Array of rows to keep (boolean mask).
49
+
50
+ """
51
+ if isinstance(filters[0], tuple):
52
+ raise ValueError(
53
+ "not possible to have a 'filters' parameter without at least an inner list.",
54
+ )
55
+ out = zeros(len(df), dtype=bool)
56
+ for or_part in filters:
57
+ and_part = ones(len(df), dtype=bool)
58
+ for name, op, val in or_part:
59
+ if op == "in":
60
+ and_part &= df[name].isin(val).values
61
+ elif op == "not in":
62
+ and_part &= ~df[name].isin(val).values
63
+ elif op in ops:
64
+ and_part &= ops[op](df[name].values, val)
65
+ elif op == "~":
66
+ and_part &= ~df[name].values
67
+ else:
68
+ # Unknown operator.
69
+ raise ValueError(f"operator '{op}' is not supported.")
70
+ out |= and_part
71
+ return out
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Created on Sat Jun 28 18:35:00 2025.
4
+
5
+ @author: pierrot
6
+
7
+ """
8
+ from .asof_merger import AsofMerger
9
+
10
+
11
+ __all__ = ["AsofMerger"]