ocf-data-sampler 0.5.34__tar.gz → 0.5.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (73) hide show
  1. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/model.py +5 -0
  3. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +1 -1
  4. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/site.py +7 -2
  5. ocf_data_sampler-0.5.36/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +50 -0
  6. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
  7. ocf_data_sampler-0.5.34/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -29
  8. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/LICENSE +0 -0
  9. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/README.md +0 -0
  10. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/__init__.py +0 -0
  11. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/__init__.py +0 -0
  12. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/load.py +0 -0
  13. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/config/save.py +0 -0
  14. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
  15. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
  16. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/__init__.py +0 -0
  17. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/gsp.py +0 -0
  18. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/load_dataset.py +0 -0
  19. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  20. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  21. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  22. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
  23. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  24. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
  25. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
  26. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  27. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  28. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/open_xarray_tensorstore.py +0 -0
  29. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/satellite.py +0 -0
  30. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/site.py +0 -0
  31. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/load/utils.py +0 -0
  32. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  33. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  34. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  35. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  36. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  37. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  38. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  39. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/site.py +0 -0
  40. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  41. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/__init__.py +0 -0
  42. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/diff_channels.py +0 -0
  43. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/dropout.py +0 -0
  44. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  45. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  46. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/geospatial.py +0 -0
  47. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/location.py +0 -0
  48. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  49. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/select/select_time_slice.py +0 -0
  50. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  51. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/datasets/picklecache.py +0 -0
  52. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
  53. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
  54. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
  55. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
  56. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
  57. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/add_alterate_coordinate_projections.py +0 -0
  58. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py +0 -0
  59. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/diff_nwp_data.py +0 -0
  60. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
  61. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
  62. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  63. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
  64. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler/utils.py +0 -0
  65. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  66. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  67. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/requires.txt +0 -0
  68. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  69. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/pyproject.toml +0 -0
  70. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/scripts/download_gsp_location_data.py +0 -0
  71. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/scripts/refactor_site.py +0 -0
  72. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/setup.cfg +0 -0
  73. {ocf_data_sampler-0.5.34 → ocf_data_sampler-0.5.36}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.34
3
+ Version: 0.5.36
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -93,6 +93,11 @@ class DropoutMixin(Base):
93
93
  "floats (probability that dropout of the corresponding timedelta is applied)",
94
94
  )
95
95
 
96
+ dropout_value: float = Field(
97
+ default=0.0,
98
+ description="The value to use for dropped out values. "
99
+ "Idea is to use -1, but to be backwards comptaible we've put the default as 0")
100
+
96
101
  @field_validator("dropout_timedeltas_minutes")
97
102
  def dropout_timedeltas_minutes_negative(cls, v: list[int]) -> list[int]:
98
103
  """Validate 'dropout_timedeltas_minutes'."""
@@ -206,7 +206,7 @@ class AbstractPVNetUKDataset(PickleCacheMixin, Dataset):
206
206
 
207
207
  # Combine all the modalities and fill NaNs
208
208
  combined_sample = merge_dicts(numpy_modalities)
209
- combined_sample = fill_nans_in_arrays(combined_sample)
209
+ combined_sample = fill_nans_in_arrays(combined_sample, config=self.config)
210
210
 
211
211
  return combined_sample
212
212
 
@@ -139,7 +139,7 @@ def process_and_combine_datasets(
139
139
 
140
140
  # Combine all the modalities and fill NaNs
141
141
  combined_sample = merge_dicts(numpy_modalities)
142
- combined_sample = fill_nans_in_arrays(combined_sample)
142
+ combined_sample = fill_nans_in_arrays(combined_sample, config=config)
143
143
 
144
144
  return combined_sample
145
145
 
@@ -330,7 +330,12 @@ class SitesDatasetConcurrent(PickleCacheMixin, Dataset):
330
330
  self.config = config
331
331
 
332
332
  # get all locations
333
- self.locations = get_locations(datasets_dict["site"])
333
+ locations = get_locations(datasets_dict["site"])
334
+ self.locations = add_alterate_coordinate_projections(
335
+ locations,
336
+ datasets_dict,
337
+ primary_coords="lon_lat",
338
+ )
334
339
 
335
340
  # Get t0 times where all input data is available
336
341
  valid_t0s = self.find_valid_t0s(datasets_dict)
@@ -0,0 +1,50 @@
1
+ """Utility functions for merging dictionaries and filling NaNs in arrays."""
2
+
3
+ import numpy as np
4
+
5
+ from ocf_data_sampler.config.model import Configuration
6
+ from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
7
+ from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
8
+ from ocf_data_sampler.numpy_sample.satellite import SatelliteSampleKey
9
+ from ocf_data_sampler.numpy_sample.site import SiteSampleKey
10
+
11
+
12
+ def merge_dicts(list_of_dicts: list[dict]) -> dict:
13
+ """Merge a list of dictionaries into a single dictionary."""
14
+ # TODO: This doesn't account for duplicate keys, which will be overwritten
15
+ combined_dict = {}
16
+ for d in list_of_dicts:
17
+ combined_dict.update(d)
18
+ return combined_dict
19
+
20
+
21
+ def fill_nans_in_arrays(
22
+ sample: dict, config: Configuration | None = None, nwp_provider: str | None = None,
23
+ ) -> dict:
24
+ """Fills all NaN values in each np.ndarray in the sample dictionary.
25
+
26
+ Operation is performed in-place on the sample.
27
+ By default a fill value of 0.0 are used, but if a config is provided,
28
+ it can use the configured dropout values.
29
+ """
30
+ for k, v in sample.items():
31
+ if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
32
+ if np.isnan(v).any():
33
+ fill_value = 0.0
34
+ if config is not None:
35
+ if k == GSPSampleKey.gsp:
36
+ fill_value = config.input_data.gsp.dropout_value
37
+ elif k == SiteSampleKey.generation:
38
+ fill_value = config.input_data.site.dropout_value
39
+ elif k == SatelliteSampleKey.satellite_actual:
40
+ fill_value = config.input_data.satellite.dropout_value
41
+ elif k == NWPSampleKey.nwp and nwp_provider in config.input_data.nwp:
42
+ fill_value = config.input_data.nwp[nwp_provider].dropout_value
43
+
44
+ sample[k] = np.nan_to_num(v, copy=False, nan=fill_value)
45
+
46
+ # Recursion is included to reach NWP arrays in subdict
47
+ elif isinstance(v, dict):
48
+ fill_nans_in_arrays(v, config=config, nwp_provider=k)
49
+
50
+ return sample
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.34
3
+ Version: 0.5.36
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -1,29 +0,0 @@
1
- """Utility functions for merging dictionaries and filling NaNs in arrays."""
2
-
3
- import numpy as np
4
-
5
-
6
- def merge_dicts(list_of_dicts: list[dict]) -> dict:
7
- """Merge a list of dictionaries into a single dictionary."""
8
- # TODO: This doesn't account for duplicate keys, which will be overwritten
9
- combined_dict = {}
10
- for d in list_of_dicts:
11
- combined_dict.update(d)
12
- return combined_dict
13
-
14
-
15
- def fill_nans_in_arrays(sample: dict) -> dict:
16
- """Fills all NaN values in each np.ndarray in the sample dictionary with zeros.
17
-
18
- Operation is performed in-place on the sample.
19
- """
20
- for k, v in sample.items():
21
- if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
22
- if np.isnan(v).any():
23
- sample[k] = np.nan_to_num(v, copy=False, nan=0.0)
24
-
25
- # Recursion is included to reach NWP arrays in subdict
26
- elif isinstance(v, dict):
27
- fill_nans_in_arrays(v)
28
-
29
- return sample