ocf-data-sampler 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -0,0 +1,33 @@
1
+ """A mixin to handle pickling and caching a dataset's state."""
2
+
3
+ import os
4
+ import pickle
5
+
6
+
7
+ class PickleCacheMixin:
8
+ """A mixin for classes that need to cache their state using pickle."""
9
+ def __init__(self, *args: list, **kwargs: dict) -> None:
10
+ """Initialize the pickle path and call the parent constructor."""
11
+ self._pickle_path = None
12
+ super().__init__(*args, **kwargs) # cooperative multiple inheritance
13
+
14
+ def presave_pickle(self, pickle_path: str) -> None:
15
+ """Save the full object state to a pickle file and store the pickle path."""
16
+ self._pickle_path = pickle_path
17
+ with open(pickle_path, "wb") as f:
18
+ pickle.dump(self.__dict__, f)
19
+
20
+ def __getstate__(self) -> dict:
21
+ """If presaved, only pickle reference. Otherwise pickle everything."""
22
+ if self._pickle_path:
23
+ return {"_pickle_path": self._pickle_path}
24
+ else:
25
+ return self.__dict__
26
+
27
+ def __setstate__(self, state: dict) -> None:
28
+ """Restore object from pickle, reloading from presaved file if possible."""
29
+ self.__dict__.update(state)
30
+ if self._pickle_path and os.path.exists(self._pickle_path):
31
+ with open(self._pickle_path, "rb") as f:
32
+ saved_state = pickle.load(f) # noqa: S301
33
+ self.__dict__.update(saved_state)
@@ -19,6 +19,7 @@ from ocf_data_sampler.numpy_sample.common_types import NumpyBatch, NumpySample
19
19
  from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
20
20
  from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
21
21
  from ocf_data_sampler.select import Location, fill_time_periods
22
+ from ocf_data_sampler.torch_datasets.datasets.picklecache import PickleCacheMixin
22
23
  from ocf_data_sampler.torch_datasets.utils import (
23
24
  add_alterate_coordinate_projections,
24
25
  config_normalization_values_to_dicts,
@@ -67,7 +68,7 @@ def get_gsp_locations(
67
68
  return locations
68
69
 
69
70
 
70
- class AbstractPVNetUKDataset(Dataset):
71
+ class AbstractPVNetUKDataset(PickleCacheMixin, Dataset):
71
72
  """Abstract class for PVNet UK datasets."""
72
73
 
73
74
  def __init__(
@@ -85,6 +86,8 @@ class AbstractPVNetUKDataset(Dataset):
85
86
  end_time: Limit the init-times to be before this
86
87
  gsp_ids: List of GSP IDs to create samples for. Defaults to all
87
88
  """
89
+ super().__init__()
90
+
88
91
  config = load_yaml_configuration(config_filename)
89
92
  datasets_dict = get_dataset_dict(config.input_data, gsp_ids=gsp_ids)
90
93
 
@@ -225,7 +228,6 @@ class AbstractPVNetUKDataset(Dataset):
225
228
  return valid_t0_times
226
229
 
227
230
 
228
-
229
231
  class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
230
232
  """A torch Dataset for creating PVNet UK regional samples."""
231
233
 
@@ -24,6 +24,7 @@ from ocf_data_sampler.select import (
24
24
  find_contiguous_t0_periods,
25
25
  intersection_of_multiple_dataframes_of_periods,
26
26
  )
27
+ from ocf_data_sampler.torch_datasets.datasets.picklecache import PickleCacheMixin
27
28
  from ocf_data_sampler.torch_datasets.utils import (
28
29
  add_alterate_coordinate_projections,
29
30
  config_normalization_values_to_dicts,
@@ -144,7 +145,7 @@ def process_and_combine_datasets(
144
145
  return combined_sample
145
146
 
146
147
 
147
- class SitesDataset(Dataset):
148
+ class SitesDataset(PickleCacheMixin, Dataset):
148
149
  """A torch Dataset for creating PVNet Site samples."""
149
150
 
150
151
  def __init__(
@@ -160,6 +161,8 @@ class SitesDataset(Dataset):
160
161
  start_time: Limit the init-times to be after this
161
162
  end_time: Limit the init-times to be before this
162
163
  """
164
+ super().__init__()
165
+
163
166
  config = load_yaml_configuration(config_filename)
164
167
  datasets_dict = get_dataset_dict(config.input_data)
165
168
 
@@ -301,7 +304,7 @@ class SitesDataset(Dataset):
301
304
  return self._get_sample(t0, location)
302
305
 
303
306
 
304
- class SitesDatasetConcurrent(Dataset):
307
+ class SitesDatasetConcurrent(PickleCacheMixin, Dataset):
305
308
  """A torch Dataset for creating PVNet Site batches with samples for all sites."""
306
309
 
307
310
  def __init__(
@@ -317,6 +320,8 @@ class SitesDatasetConcurrent(Dataset):
317
320
  start_time: Limit the init-times to be after this
318
321
  end_time: Limit the init-times to be before this
319
322
  """
323
+ super().__init__()
324
+
320
325
  config = load_yaml_configuration(config_filename)
321
326
  datasets_dict = get_dataset_dict(config.input_data)
322
327
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.19
3
+ Version: 0.5.20
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -41,8 +41,9 @@ ocf_data_sampler/select/location.py,sha256=Qp0di-Pgq8WLjN9IBcTVTaRM3lckhr4ZVzaDR
41
41
  ocf_data_sampler/select/select_spatial_slice.py,sha256=NB6NtZBc_Mb5zPCItzBIEa_Nroj2kEsjUIsa_kdWoj0,7081
42
42
  ocf_data_sampler/select/select_time_slice.py,sha256=cpkdovJMvcjxSGfq9G0OJK5aDAeCXg7exWYrJnR4N2w,4116
43
43
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=o0SsEXXZ6k9iL__5_RN1Sf60lw_eqK91P3UFEHAD2k0,102
44
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=wUsIZ0Fhq5bbE8v02C0UPcFWIhWI7kfSka9UrWP0_m4,12240
45
- ocf_data_sampler/torch_datasets/datasets/site.py,sha256=OXrYSRrWUdQbEjsEPPJjam10zJKU6S3r5kA07RbpzFU,15680
44
+ ocf_data_sampler/torch_datasets/datasets/picklecache.py,sha256=b8T5lgKfiPXLwuVQuFpCQBlU-HNBrA-Z-eSwYICKvsQ,1350
45
+ ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=n0NYbhrMR_LtGz4EiryljeQG-tI4fWfJLifKclMThcU,12367
46
+ ocf_data_sampler/torch_datasets/datasets/site.py,sha256=fZyGwUmcFMYuEJd05fZc96mLEya8ehYnVRVRssX52qM,15854
46
47
  ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
47
48
  ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
48
49
  ocf_data_sampler/torch_datasets/sample/site.py,sha256=40NwNTqjL1WVhPdwe02zDHHfDLG2u_bvCfRCtGAtFc0,1466
@@ -58,7 +59,7 @@ ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0Wrg
58
59
  ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
59
60
  scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
60
61
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
61
- ocf_data_sampler-0.5.19.dist-info/METADATA,sha256=CyMcURh5QvAzzxVvu_DzbK4krMLtGtSlVjr4KURpFiw,12817
62
- ocf_data_sampler-0.5.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
- ocf_data_sampler-0.5.19.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
64
- ocf_data_sampler-0.5.19.dist-info/RECORD,,
62
+ ocf_data_sampler-0.5.20.dist-info/METADATA,sha256=AKZ_mCJGD1NgN_ZG9YF9pa0CZ9fEZOwWamZPmM8jBXc,12817
63
+ ocf_data_sampler-0.5.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
+ ocf_data_sampler-0.5.20.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
65
+ ocf_data_sampler-0.5.20.dist-info/RECORD,,