ocf-data-sampler 0.5.34__tar.gz → 0.5.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/PKG-INFO +1 -1
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/model.py +5 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +1 -1
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/site.py +7 -2
- ocf_data_sampler-0.5.36/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +50 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
- ocf_data_sampler-0.5.34/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -29
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/LICENSE +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/README.md +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/load.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/save.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/gsp.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/load_dataset.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/open_xarray_tensorstore.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/site.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/collate.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/site.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/diff_channels.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/dropout.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/geospatial.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/location.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/picklecache.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/add_alterate_coordinate_projections.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/diff_nwp_data.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/utils.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/pyproject.toml +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/scripts/download_gsp_location_data.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/scripts/refactor_site.py +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/setup.cfg +0 -0
- {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/tests/test_utils.py +0 -0
|
@@ -93,6 +93,11 @@ class DropoutMixin(Base):
|
|
|
93
93
|
"floats (probability that dropout of the corresponding timedelta is applied)",
|
|
94
94
|
)
|
|
95
95
|
|
|
96
|
+
dropout_value: float = Field(
|
|
97
|
+
default=0.0,
|
|
98
|
+
description="The value to use for dropped out values. "
|
|
99
|
+
"Idea is to use -1, but to be backwards comptaible we've put the default as 0")
|
|
100
|
+
|
|
96
101
|
@field_validator("dropout_timedeltas_minutes")
|
|
97
102
|
def dropout_timedeltas_minutes_negative(cls, v: list[int]) -> list[int]:
|
|
98
103
|
"""Validate 'dropout_timedeltas_minutes'."""
|
|
@@ -206,7 +206,7 @@ class AbstractPVNetUKDataset(PickleCacheMixin, Dataset):
|
|
|
206
206
|
|
|
207
207
|
# Combine all the modalities and fill NaNs
|
|
208
208
|
combined_sample = merge_dicts(numpy_modalities)
|
|
209
|
-
combined_sample = fill_nans_in_arrays(combined_sample)
|
|
209
|
+
combined_sample = fill_nans_in_arrays(combined_sample, config=self.config)
|
|
210
210
|
|
|
211
211
|
return combined_sample
|
|
212
212
|
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/site.py
RENAMED
|
@@ -139,7 +139,7 @@ def process_and_combine_datasets(
|
|
|
139
139
|
|
|
140
140
|
# Combine all the modalities and fill NaNs
|
|
141
141
|
combined_sample = merge_dicts(numpy_modalities)
|
|
142
|
-
combined_sample = fill_nans_in_arrays(combined_sample)
|
|
142
|
+
combined_sample = fill_nans_in_arrays(combined_sample, config=config)
|
|
143
143
|
|
|
144
144
|
return combined_sample
|
|
145
145
|
|
|
@@ -330,7 +330,12 @@ class SitesDatasetConcurrent(PickleCacheMixin, Dataset):
|
|
|
330
330
|
self.config = config
|
|
331
331
|
|
|
332
332
|
# get all locations
|
|
333
|
-
|
|
333
|
+
locations = get_locations(datasets_dict["site"])
|
|
334
|
+
self.locations = add_alterate_coordinate_projections(
|
|
335
|
+
locations,
|
|
336
|
+
datasets_dict,
|
|
337
|
+
primary_coords="lon_lat",
|
|
338
|
+
)
|
|
334
339
|
|
|
335
340
|
# Get t0 times where all input data is available
|
|
336
341
|
valid_t0s = self.find_valid_t0s(datasets_dict)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Utility functions for merging dictionaries and filling NaNs in arrays."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from ocf_data_sampler.config.model import Configuration
|
|
6
|
+
from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
|
|
7
|
+
from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
|
|
8
|
+
from ocf_data_sampler.numpy_sample.satellite import SatelliteSampleKey
|
|
9
|
+
from ocf_data_sampler.numpy_sample.site import SiteSampleKey
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def merge_dicts(list_of_dicts: list[dict]) -> dict:
|
|
13
|
+
"""Merge a list of dictionaries into a single dictionary."""
|
|
14
|
+
# TODO: This doesn't account for duplicate keys, which will be overwritten
|
|
15
|
+
combined_dict = {}
|
|
16
|
+
for d in list_of_dicts:
|
|
17
|
+
combined_dict.update(d)
|
|
18
|
+
return combined_dict
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def fill_nans_in_arrays(
|
|
22
|
+
sample: dict, config: Configuration | None = None, nwp_provider: str | None = None,
|
|
23
|
+
) -> dict:
|
|
24
|
+
"""Fills all NaN values in each np.ndarray in the sample dictionary.
|
|
25
|
+
|
|
26
|
+
Operation is performed in-place on the sample.
|
|
27
|
+
By default a fill value of 0.0 are used, but if a config is provided,
|
|
28
|
+
it can use the configured dropout values.
|
|
29
|
+
"""
|
|
30
|
+
for k, v in sample.items():
|
|
31
|
+
if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
|
|
32
|
+
if np.isnan(v).any():
|
|
33
|
+
fill_value = 0.0
|
|
34
|
+
if config is not None:
|
|
35
|
+
if k == GSPSampleKey.gsp:
|
|
36
|
+
fill_value = config.input_data.gsp.dropout_value
|
|
37
|
+
elif k == SiteSampleKey.generation:
|
|
38
|
+
fill_value = config.input_data.site.dropout_value
|
|
39
|
+
elif k == SatelliteSampleKey.satellite_actual:
|
|
40
|
+
fill_value = config.input_data.satellite.dropout_value
|
|
41
|
+
elif k == NWPSampleKey.nwp and nwp_provider in config.input_data.nwp:
|
|
42
|
+
fill_value = config.input_data.nwp[nwp_provider].dropout_value
|
|
43
|
+
|
|
44
|
+
sample[k] = np.nan_to_num(v, copy=False, nan=fill_value)
|
|
45
|
+
|
|
46
|
+
# Recursion is included to reach NWP arrays in subdict
|
|
47
|
+
elif isinstance(v, dict):
|
|
48
|
+
fill_nans_in_arrays(v, config=config, nwp_provider=k)
|
|
49
|
+
|
|
50
|
+
return sample
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
"""Utility functions for merging dictionaries and filling NaNs in arrays."""
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def merge_dicts(list_of_dicts: list[dict]) -> dict:
|
|
7
|
-
"""Merge a list of dictionaries into a single dictionary."""
|
|
8
|
-
# TODO: This doesn't account for duplicate keys, which will be overwritten
|
|
9
|
-
combined_dict = {}
|
|
10
|
-
for d in list_of_dicts:
|
|
11
|
-
combined_dict.update(d)
|
|
12
|
-
return combined_dict
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def fill_nans_in_arrays(sample: dict) -> dict:
|
|
16
|
-
"""Fills all NaN values in each np.ndarray in the sample dictionary with zeros.
|
|
17
|
-
|
|
18
|
-
Operation is performed in-place on the sample.
|
|
19
|
-
"""
|
|
20
|
-
for k, v in sample.items():
|
|
21
|
-
if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
|
|
22
|
-
if np.isnan(v).any():
|
|
23
|
-
sample[k] = np.nan_to_num(v, copy=False, nan=0.0)
|
|
24
|
-
|
|
25
|
-
# Recursion is included to reach NWP arrays in subdict
|
|
26
|
-
elif isinstance(v, dict):
|
|
27
|
-
fill_nans_in_arrays(v)
|
|
28
|
-
|
|
29
|
-
return sample
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/ecmwf.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/gfs.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/icon.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/ukv.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/utils.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/open_xarray_tensorstore.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/__init__.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/collate.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/common_types.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/satellite.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/sun_position.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/diff_channels.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/fill_time_periods.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/select_spatial_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/select_time_slice.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/base.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/site.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|