ocf-data-sampler 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/PKG-INFO +1 -1
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/config/model.py +33 -4
- ocf_data_sampler-0.3.1/ocf_data_sampler/select/dropout.py +61 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
- ocf_data_sampler-0.3.0/ocf_data_sampler/select/dropout.py +0 -47
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/LICENSE +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/README.md +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/config/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/config/load.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/config/save.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/gsp.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/load_dataset.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/open_tensorstore_zarrs.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/site.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/collate.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/site.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/geospatial.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/location.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/utils.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/pyproject.toml +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/scripts/download_gsp_location_data.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/scripts/refactor_site.py +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/setup.cfg +0 -0
- {ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/utils/compute_icon_mean_stddev.py +0 -0
|
@@ -90,11 +90,10 @@ class DropoutMixin(Base):
|
|
|
90
90
|
"negative or zero.",
|
|
91
91
|
)
|
|
92
92
|
|
|
93
|
-
dropout_fraction: float = Field(
|
|
93
|
+
dropout_fraction: float|list[float] = Field(
|
|
94
94
|
default=0,
|
|
95
|
-
description="Chance of dropout being applied to each sample"
|
|
96
|
-
|
|
97
|
-
le=1,
|
|
95
|
+
description="Either a float(Chance of dropout being applied to each sample) or a list of "
|
|
96
|
+
"floats (probability that dropout of the corresponding timedelta is applied)",
|
|
98
97
|
)
|
|
99
98
|
|
|
100
99
|
@field_validator("dropout_timedeltas_minutes")
|
|
@@ -105,6 +104,36 @@ class DropoutMixin(Base):
|
|
|
105
104
|
raise ValueError("Dropout timedeltas must be negative")
|
|
106
105
|
return v
|
|
107
106
|
|
|
107
|
+
|
|
108
|
+
@field_validator("dropout_fraction")
|
|
109
|
+
def dropout_fractions(cls, dropout_frac: float|list[float]) -> float|list[float]:
|
|
110
|
+
"""Validate 'dropout_frac'."""
|
|
111
|
+
from math import isclose
|
|
112
|
+
if isinstance(dropout_frac, float):
|
|
113
|
+
if not (dropout_frac <= 1):
|
|
114
|
+
raise ValueError("Input should be less than or equal to 1")
|
|
115
|
+
elif not (dropout_frac >= 0):
|
|
116
|
+
raise ValueError("Input should be greater than or equal to 0")
|
|
117
|
+
|
|
118
|
+
elif isinstance(dropout_frac, list):
|
|
119
|
+
if not dropout_frac:
|
|
120
|
+
raise ValueError("List cannot be empty")
|
|
121
|
+
|
|
122
|
+
if not all(isinstance(i, float) for i in dropout_frac):
|
|
123
|
+
raise ValueError("All elements in the list must be floats")
|
|
124
|
+
|
|
125
|
+
if not all(0 <= i <= 1 for i in dropout_frac):
|
|
126
|
+
raise ValueError("Each float in the list must be between 0 and 1")
|
|
127
|
+
|
|
128
|
+
if not isclose(sum(dropout_frac), 1.0, rel_tol=1e-9):
|
|
129
|
+
raise ValueError("Sum of all floats in the list must be 1.0")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
else:
|
|
133
|
+
raise TypeError("Must be either a float or a list of floats")
|
|
134
|
+
return dropout_frac
|
|
135
|
+
|
|
136
|
+
|
|
108
137
|
@model_validator(mode="after")
|
|
109
138
|
def dropout_instructions_consistent(self) -> "DropoutMixin":
|
|
110
139
|
"""Validator for dropout instructions."""
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Functions for simulating dropout in time series data.
|
|
2
|
+
|
|
3
|
+
This is used for the following types of data: GSP, Satellite and Site
|
|
4
|
+
This is not used for NWP
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import xarray as xr
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def apply_sampled_dropout_time(
|
|
13
|
+
t0: pd.Timestamp,
|
|
14
|
+
dropout_timedeltas: list[pd.Timedelta],
|
|
15
|
+
dropout_frac: float|list[float],
|
|
16
|
+
da: xr.DataArray,
|
|
17
|
+
) -> xr.DataArray:
|
|
18
|
+
"""Randomly pick a dropout time from a list of timedeltas and apply dropout time to the data.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
t0: The forecast init-time
|
|
22
|
+
dropout_timedeltas: List of timedeltas relative to t0 to pick from
|
|
23
|
+
dropout_frac: Either a probability that dropout will be applied.
|
|
24
|
+
This should be between 0 and 1 inclusive.
|
|
25
|
+
Or a list of probabilities for each of the corresponding timedeltas
|
|
26
|
+
da: Xarray DataArray with 'time_utc' coordinate
|
|
27
|
+
"""
|
|
28
|
+
if isinstance(dropout_frac, list):
|
|
29
|
+
# checking if len match
|
|
30
|
+
if len(dropout_frac) != len(dropout_timedeltas):
|
|
31
|
+
raise ValueError("Lengths of dropout_frac and dropout_timedeltas should match")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
dropout_time = t0 + np.random.choice(dropout_timedeltas,p=dropout_frac)
|
|
37
|
+
|
|
38
|
+
return da.where(da.time_utc <= dropout_time)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# old logic
|
|
43
|
+
else:
|
|
44
|
+
# sample dropout time
|
|
45
|
+
if dropout_frac > 0 and len(dropout_timedeltas) == 0:
|
|
46
|
+
raise ValueError("To apply dropout, dropout_timedeltas must be provided")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if not (0 <= dropout_frac <= 1):
|
|
50
|
+
raise ValueError("dropout_frac must be between 0 and 1 inclusive")
|
|
51
|
+
|
|
52
|
+
if (len(dropout_timedeltas) == 0) or (np.random.uniform() >= dropout_frac):
|
|
53
|
+
dropout_time = None
|
|
54
|
+
else:
|
|
55
|
+
dropout_time = t0 + np.random.choice(dropout_timedeltas)
|
|
56
|
+
|
|
57
|
+
# apply dropout time
|
|
58
|
+
if dropout_time is None:
|
|
59
|
+
return da
|
|
60
|
+
# This replaces the times after the dropout with NaNs
|
|
61
|
+
return da.where(da.time_utc <= dropout_time)
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"""Functions for simulating dropout in time series data.
|
|
2
|
-
|
|
3
|
-
This is used for the following types of data: GSP, Satellite and Site
|
|
4
|
-
This is not used for NWP
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pandas as pd
|
|
9
|
-
import xarray as xr
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def apply_sampled_dropout_time(
|
|
13
|
-
t0: pd.Timestamp,
|
|
14
|
-
dropout_timedeltas: list[pd.Timedelta],
|
|
15
|
-
dropout_frac: float,
|
|
16
|
-
da: xr.DataArray,
|
|
17
|
-
) -> xr.DataArray:
|
|
18
|
-
"""Randomly pick a dropout time from a list of timedeltas and apply dropout time to the data.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
t0: The forecast init-time
|
|
22
|
-
dropout_timedeltas: List of timedeltas relative to t0 to pick from
|
|
23
|
-
dropout_frac: Probability that dropout will be applied.
|
|
24
|
-
This should be between 0 and 1 inclusive
|
|
25
|
-
da: Xarray DataArray with 'time_utc' coordinate
|
|
26
|
-
"""
|
|
27
|
-
# sample dropout time
|
|
28
|
-
if dropout_frac > 0 and len(dropout_timedeltas) == 0:
|
|
29
|
-
raise ValueError("To apply dropout, dropout_timedeltas must be provided")
|
|
30
|
-
|
|
31
|
-
for t in dropout_timedeltas:
|
|
32
|
-
if t > pd.Timedelta("0min"):
|
|
33
|
-
raise ValueError("Dropout timedeltas must be negative")
|
|
34
|
-
|
|
35
|
-
if not (0 <= dropout_frac <= 1):
|
|
36
|
-
raise ValueError("dropout_frac must be between 0 and 1 inclusive")
|
|
37
|
-
|
|
38
|
-
if (len(dropout_timedeltas) == 0) or (np.random.uniform() >= dropout_frac):
|
|
39
|
-
dropout_time = None
|
|
40
|
-
else:
|
|
41
|
-
dropout_time = t0 + np.random.choice(dropout_timedeltas)
|
|
42
|
-
|
|
43
|
-
# apply dropout time
|
|
44
|
-
if dropout_time is None:
|
|
45
|
-
return da
|
|
46
|
-
# This replaces the times after the dropout with NaNs
|
|
47
|
-
return da.where(da.time_utc <= dropout_time)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/ecmwf.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/gfs.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/icon.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/ukv.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/nwp/providers/utils.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/load/open_tensorstore_zarrs.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/common_types.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/datetime_features.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/satellite.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/numpy_sample/sun_position.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/fill_time_periods.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/select_spatial_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/select/select_time_slice.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/datasets/site.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/__init__.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/base.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/sample/site.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler/torch_datasets/utils/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.3.0 → ocf_data_sampler-0.3.1}/ocf_data_sampler.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|