ocf-data-sampler 0.5.18__tar.gz → 0.5.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/PKG-INFO +1 -1
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/gsp.py +4 -2
- ocf_data_sampler-0.5.20/ocf_data_sampler/torch_datasets/datasets/picklecache.py +33 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +4 -2
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/datasets/site.py +7 -2
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler.egg-info/SOURCES.txt +1 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/LICENSE +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/README.md +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/config/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/config/load.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/config/model.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/config/save.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/load_dataset.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/open_xarray_tensorstore.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/site.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/collate.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/site.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/diff_channels.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/dropout.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/geospatial.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/location.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/add_alterate_coordinate_projections.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/diff_nwp_data.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/utils.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/pyproject.toml +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/scripts/download_gsp_location_data.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/scripts/refactor_site.py +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/setup.cfg +0 -0
- {ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/tests/test_utils.py +0 -0
|
@@ -32,7 +32,7 @@ def open_gsp(
|
|
|
32
32
|
boundaries_version: str = "20220314",
|
|
33
33
|
public: bool = False,
|
|
34
34
|
) -> xr.DataArray:
|
|
35
|
-
"""Open the GSP data and validates its data types.
|
|
35
|
+
"""Open and eagerly load the GSP data and validates its data types.
|
|
36
36
|
|
|
37
37
|
Args:
|
|
38
38
|
zarr_path: Path to the GSP zarr data
|
|
@@ -93,4 +93,6 @@ def open_gsp(
|
|
|
93
93
|
dtype = gsp_da.coords[coord].dtype
|
|
94
94
|
raise TypeError(f"{coord} should be {expected_dtype.__name__}, not {dtype}")
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
# Below we load the data eagerly into memory - this makes the dataset faster to sample from, but
|
|
97
|
+
# at the cost of a little extra memory usage
|
|
98
|
+
return gsp_da.compute()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""A mixin to handle pickling and caching a dataset's state."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pickle
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PickleCacheMixin:
|
|
8
|
+
"""A mixin for classes that need to cache their state using pickle."""
|
|
9
|
+
def __init__(self, *args: list, **kwargs: dict) -> None:
|
|
10
|
+
"""Initialize the pickle path and call the parent constructor."""
|
|
11
|
+
self._pickle_path = None
|
|
12
|
+
super().__init__(*args, **kwargs) # cooperative multiple inheritance
|
|
13
|
+
|
|
14
|
+
def presave_pickle(self, pickle_path: str) -> None:
|
|
15
|
+
"""Save the full object state to a pickle file and store the pickle path."""
|
|
16
|
+
self._pickle_path = pickle_path
|
|
17
|
+
with open(pickle_path, "wb") as f:
|
|
18
|
+
pickle.dump(self.__dict__, f)
|
|
19
|
+
|
|
20
|
+
def __getstate__(self) -> dict:
|
|
21
|
+
"""If presaved, only pickle reference. Otherwise pickle everything."""
|
|
22
|
+
if self._pickle_path:
|
|
23
|
+
return {"_pickle_path": self._pickle_path}
|
|
24
|
+
else:
|
|
25
|
+
return self.__dict__
|
|
26
|
+
|
|
27
|
+
def __setstate__(self, state: dict) -> None:
|
|
28
|
+
"""Restore object from pickle, reloading from presaved file if possible."""
|
|
29
|
+
self.__dict__.update(state)
|
|
30
|
+
if self._pickle_path and os.path.exists(self._pickle_path):
|
|
31
|
+
with open(self._pickle_path, "rb") as f:
|
|
32
|
+
saved_state = pickle.load(f) # noqa: S301
|
|
33
|
+
self.__dict__.update(saved_state)
|
|
@@ -19,6 +19,7 @@ from ocf_data_sampler.numpy_sample.common_types import NumpyBatch, NumpySample
|
|
|
19
19
|
from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
|
|
20
20
|
from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
|
|
21
21
|
from ocf_data_sampler.select import Location, fill_time_periods
|
|
22
|
+
from ocf_data_sampler.torch_datasets.datasets.picklecache import PickleCacheMixin
|
|
22
23
|
from ocf_data_sampler.torch_datasets.utils import (
|
|
23
24
|
add_alterate_coordinate_projections,
|
|
24
25
|
config_normalization_values_to_dicts,
|
|
@@ -67,7 +68,7 @@ def get_gsp_locations(
|
|
|
67
68
|
return locations
|
|
68
69
|
|
|
69
70
|
|
|
70
|
-
class AbstractPVNetUKDataset(Dataset):
|
|
71
|
+
class AbstractPVNetUKDataset(PickleCacheMixin, Dataset):
|
|
71
72
|
"""Abstract class for PVNet UK datasets."""
|
|
72
73
|
|
|
73
74
|
def __init__(
|
|
@@ -85,6 +86,8 @@ class AbstractPVNetUKDataset(Dataset):
|
|
|
85
86
|
end_time: Limit the init-times to be before this
|
|
86
87
|
gsp_ids: List of GSP IDs to create samples for. Defaults to all
|
|
87
88
|
"""
|
|
89
|
+
super().__init__()
|
|
90
|
+
|
|
88
91
|
config = load_yaml_configuration(config_filename)
|
|
89
92
|
datasets_dict = get_dataset_dict(config.input_data, gsp_ids=gsp_ids)
|
|
90
93
|
|
|
@@ -225,7 +228,6 @@ class AbstractPVNetUKDataset(Dataset):
|
|
|
225
228
|
return valid_t0_times
|
|
226
229
|
|
|
227
230
|
|
|
228
|
-
|
|
229
231
|
class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
|
|
230
232
|
"""A torch Dataset for creating PVNet UK regional samples."""
|
|
231
233
|
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/datasets/site.py
RENAMED
|
@@ -24,6 +24,7 @@ from ocf_data_sampler.select import (
|
|
|
24
24
|
find_contiguous_t0_periods,
|
|
25
25
|
intersection_of_multiple_dataframes_of_periods,
|
|
26
26
|
)
|
|
27
|
+
from ocf_data_sampler.torch_datasets.datasets.picklecache import PickleCacheMixin
|
|
27
28
|
from ocf_data_sampler.torch_datasets.utils import (
|
|
28
29
|
add_alterate_coordinate_projections,
|
|
29
30
|
config_normalization_values_to_dicts,
|
|
@@ -144,7 +145,7 @@ def process_and_combine_datasets(
|
|
|
144
145
|
return combined_sample
|
|
145
146
|
|
|
146
147
|
|
|
147
|
-
class SitesDataset(Dataset):
|
|
148
|
+
class SitesDataset(PickleCacheMixin, Dataset):
|
|
148
149
|
"""A torch Dataset for creating PVNet Site samples."""
|
|
149
150
|
|
|
150
151
|
def __init__(
|
|
@@ -160,6 +161,8 @@ class SitesDataset(Dataset):
|
|
|
160
161
|
start_time: Limit the init-times to be after this
|
|
161
162
|
end_time: Limit the init-times to be before this
|
|
162
163
|
"""
|
|
164
|
+
super().__init__()
|
|
165
|
+
|
|
163
166
|
config = load_yaml_configuration(config_filename)
|
|
164
167
|
datasets_dict = get_dataset_dict(config.input_data)
|
|
165
168
|
|
|
@@ -301,7 +304,7 @@ class SitesDataset(Dataset):
|
|
|
301
304
|
return self._get_sample(t0, location)
|
|
302
305
|
|
|
303
306
|
|
|
304
|
-
class SitesDatasetConcurrent(Dataset):
|
|
307
|
+
class SitesDatasetConcurrent(PickleCacheMixin, Dataset):
|
|
305
308
|
"""A torch Dataset for creating PVNet Site batches with samples for all sites."""
|
|
306
309
|
|
|
307
310
|
def __init__(
|
|
@@ -317,6 +320,8 @@ class SitesDatasetConcurrent(Dataset):
|
|
|
317
320
|
start_time: Limit the init-times to be after this
|
|
318
321
|
end_time: Limit the init-times to be before this
|
|
319
322
|
"""
|
|
323
|
+
super().__init__()
|
|
324
|
+
|
|
320
325
|
config = load_yaml_configuration(config_filename)
|
|
321
326
|
datasets_dict = get_dataset_dict(config.input_data)
|
|
322
327
|
|
|
@@ -49,6 +49,7 @@ ocf_data_sampler/select/location.py
|
|
|
49
49
|
ocf_data_sampler/select/select_spatial_slice.py
|
|
50
50
|
ocf_data_sampler/select/select_time_slice.py
|
|
51
51
|
ocf_data_sampler/torch_datasets/datasets/__init__.py
|
|
52
|
+
ocf_data_sampler/torch_datasets/datasets/picklecache.py
|
|
52
53
|
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py
|
|
53
54
|
ocf_data_sampler/torch_datasets/datasets/site.py
|
|
54
55
|
ocf_data_sampler/torch_datasets/sample/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/ecmwf.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/gfs.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/icon.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/ukv.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/nwp/providers/utils.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/load/open_xarray_tensorstore.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/__init__.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/collate.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/common_types.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/satellite.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/numpy_sample/sun_position.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/diff_channels.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/fill_time_periods.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/select_spatial_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/select/select_time_slice.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/sample/base.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler/torch_datasets/sample/site.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.5.18 → ocf_data_sampler-0.5.20}/ocf_data_sampler.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|