ocf-data-sampler 0.5.13__tar.gz → 0.5.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (70) hide show
  1. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/config/model.py +0 -17
  3. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +7 -7
  4. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/datasets/site.py +7 -7
  5. ocf_data_sampler-0.5.14/ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py +59 -0
  6. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
  7. ocf_data_sampler-0.5.13/ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py +0 -57
  8. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/LICENSE +0 -0
  9. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/README.md +0 -0
  10. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/__init__.py +0 -0
  11. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/config/__init__.py +0 -0
  12. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/config/load.py +0 -0
  13. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/config/save.py +0 -0
  14. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
  15. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
  16. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/__init__.py +0 -0
  17. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/gsp.py +0 -0
  18. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/load_dataset.py +0 -0
  19. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  20. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  21. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  22. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
  23. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  24. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
  25. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
  26. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  27. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  28. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/open_xarray_tensorstore.py +0 -0
  29. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/satellite.py +0 -0
  30. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/site.py +0 -0
  31. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/load/utils.py +0 -0
  32. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  33. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  34. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  35. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  36. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  37. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  38. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  39. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/site.py +0 -0
  40. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  41. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/__init__.py +0 -0
  42. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/dropout.py +0 -0
  43. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  44. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  45. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/geospatial.py +0 -0
  46. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/location.py +0 -0
  47. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  48. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/select/select_time_slice.py +0 -0
  49. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  50. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
  51. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
  52. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
  53. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
  54. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
  55. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/add_alterate_coordinate_projections.py +0 -0
  56. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  57. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
  58. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
  59. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  60. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
  61. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler/utils.py +0 -0
  62. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  63. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  64. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler.egg-info/requires.txt +0 -0
  65. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  66. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/pyproject.toml +0 -0
  67. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/scripts/download_gsp_location_data.py +0 -0
  68. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/scripts/refactor_site.py +0 -0
  69. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/setup.cfg +0 -0
  70. {ocf_data_sampler-0.5.13 → ocf_data_sampler-0.5.14}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.13
3
+ Version: 0.5.14
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -172,23 +172,6 @@ class NormalisationConstantsMixin(Base):
172
172
  """Normalisation constants for multiple channels."""
173
173
  normalisation_constants: dict[str, NormalisationValues]
174
174
 
175
- @property
176
- def channel_means(self) -> dict[str, float]:
177
- """Return the channel means."""
178
- return {
179
- channel: norm_values.mean
180
- for channel, norm_values in self.normalisation_constants.items()
181
- }
182
-
183
-
184
- @property
185
- def channel_stds(self) -> dict[str, float]:
186
- """Return the channel standard deviations."""
187
- return {
188
- channel: norm_values.std
189
- for channel, norm_values in self.normalisation_constants.items()
190
- }
191
-
192
175
 
193
176
  class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin, NormalisationConstantsMixin):
194
177
  """Satellite configuration model."""
@@ -137,10 +137,10 @@ class AbstractPVNetUKDataset(Dataset):
137
137
  for nwp_key, da_nwp in dataset_dict["nwp"].items():
138
138
 
139
139
  # Standardise and convert to NumpyBatch
140
- da_channel_means = self.means_dict["nwp"][nwp_key]
141
- da_channel_stds = self.stds_dict["nwp"][nwp_key]
140
+ channel_means = self.means_dict["nwp"][nwp_key]
141
+ channel_stds = self.stds_dict["nwp"][nwp_key]
142
142
 
143
- da_nwp = (da_nwp - da_channel_means) / da_channel_stds
143
+ da_nwp = (da_nwp - channel_means) / channel_stds
144
144
 
145
145
  nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_sample(da_nwp)
146
146
 
@@ -151,17 +151,17 @@ class AbstractPVNetUKDataset(Dataset):
151
151
  da_sat = dataset_dict["sat"]
152
152
 
153
153
  # Standardise and convert to NumpyBatch
154
- da_channel_means = self.means_dict["sat"]
155
- da_channel_stds = self.stds_dict["sat"]
154
+ channel_means = self.means_dict["sat"]
155
+ channel_stds = self.stds_dict["sat"]
156
156
 
157
- da_sat = (da_sat - da_channel_means) / da_channel_stds
157
+ da_sat = (da_sat - channel_means) / channel_stds
158
158
 
159
159
  numpy_modalities.append(convert_satellite_to_numpy_sample(da_sat))
160
160
 
161
161
  if "gsp" in dataset_dict:
162
162
  gsp_config = self.config.input_data.gsp
163
163
  da_gsp = dataset_dict["gsp"]
164
- da_gsp = da_gsp / da_gsp.effective_capacity_mwp
164
+ da_gsp = da_gsp / da_gsp.effective_capacity_mwp.values
165
165
 
166
166
  # Convert to NumpyBatch
167
167
  numpy_modalities.append(
@@ -82,10 +82,10 @@ def process_and_combine_datasets(
82
82
 
83
83
  # Standardise and convert to NumpyBatch
84
84
 
85
- da_channel_means = means_dict["nwp"][nwp_key]
86
- da_channel_stds = stds_dict["nwp"][nwp_key]
85
+ channel_means = means_dict["nwp"][nwp_key]
86
+ channel_stds = stds_dict["nwp"][nwp_key]
87
87
 
88
- da_nwp = (da_nwp - da_channel_means) / da_channel_stds
88
+ da_nwp = (da_nwp - channel_means) / channel_stds
89
89
 
90
90
  nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_sample(da_nwp)
91
91
 
@@ -96,16 +96,16 @@ def process_and_combine_datasets(
96
96
  da_sat = dataset_dict["sat"]
97
97
 
98
98
  # Standardise and convert to NumpyBatch
99
- da_channel_means = means_dict["sat"]
100
- da_channel_stds = stds_dict["sat"]
99
+ channel_means = means_dict["sat"]
100
+ channel_stds = stds_dict["sat"]
101
101
 
102
- da_sat = (da_sat - da_channel_means) / da_channel_stds
102
+ da_sat = (da_sat - channel_means) / channel_stds
103
103
 
104
104
  numpy_modalities.append(convert_satellite_to_numpy_sample(da_sat))
105
105
 
106
106
  if "site" in dataset_dict:
107
107
  da_sites = dataset_dict["site"]
108
- da_sites = da_sites / da_sites.capacity_kwp
108
+ da_sites = da_sites / da_sites.capacity_kwp.values
109
109
 
110
110
  # Convert to NumpyBatch
111
111
  numpy_modalities.append(convert_site_to_numpy_sample(da_sites))
@@ -0,0 +1,59 @@
1
+ """Utility function for converting normalisation constants in the config to arrays."""
2
+
3
+ import numpy as np
4
+
5
+ from ocf_data_sampler.config import Configuration
6
+
7
+
8
+ def config_normalization_values_to_dicts(
9
+ config: Configuration,
10
+ ) -> tuple[dict[str, np.ndarray | dict[str, np.ndarray]]]:
11
+ """Construct numpy arrays of mean and std values from the config normalisation constants.
12
+
13
+ Args:
14
+ config: Data configuration.
15
+
16
+ Returns:
17
+ Means dict
18
+ Stds dict
19
+ """
20
+ means_dict = {}
21
+ stds_dict = {}
22
+
23
+ if config.input_data.nwp is not None:
24
+
25
+ means_dict["nwp"] = {}
26
+ stds_dict["nwp"] = {}
27
+
28
+ for nwp_key in config.input_data.nwp:
29
+ nwp_config = config.input_data.nwp[nwp_key]
30
+
31
+ means_list = []
32
+ stds_list = []
33
+
34
+ for channel in list(nwp_config.channels):
35
+ # These accumulated channels are diffed and renamed
36
+ if channel in nwp_config.accum_channels:
37
+ channel =f"diff_{channel}"
38
+
39
+ means_list.append(nwp_config.normalisation_constants[channel].mean)
40
+ stds_list.append(nwp_config.normalisation_constants[channel].std)
41
+
42
+ means_dict["nwp"][nwp_key] = np.array(means_list)[None, :, None, None]
43
+ stds_dict["nwp"][nwp_key] = np.array(stds_list)[None, :, None, None]
44
+
45
+ if config.input_data.satellite is not None:
46
+ sat_config = config.input_data.satellite
47
+
48
+ means_list = []
49
+ stds_list = []
50
+
51
+ for channel in list(config.input_data.satellite.channels):
52
+ means_list.append(sat_config.normalisation_constants[channel].mean)
53
+ stds_list.append(sat_config.normalisation_constants[channel].std)
54
+
55
+ # Convert to array and expand dimensions so we can normalise the 4D sat and NWP sources
56
+ means_dict["sat"] = np.array(means_list)[None, :, None, None]
57
+ stds_dict["sat"] = np.array(stds_list)[None, :, None, None]
58
+
59
+ return means_dict, stds_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.13
3
+ Version: 0.5.14
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -1,57 +0,0 @@
1
- """Utility function for converting channel dictionaries to xarray DataArrays."""
2
-
3
- import xarray as xr
4
-
5
- from ocf_data_sampler.config import Configuration
6
-
7
-
8
- def channel_dict_to_dataarray(channel_dict: dict[str, float]) -> xr.DataArray:
9
- """Converts a dictionary of channel values to a DataArray.
10
-
11
- Args:
12
- channel_dict: Dictionary mapping channel names (str) to their values (float).
13
-
14
- Returns:
15
- xr.DataArray: A 1D DataArray with channels as coordinates.
16
- """
17
- return xr.DataArray(
18
- list(channel_dict.values()),
19
- coords={"channel": list(channel_dict.keys())},
20
- )
21
-
22
- def config_normalization_values_to_dicts(
23
- config: Configuration,
24
- ) -> tuple[dict[str, xr.DataArray | dict[str, xr.DataArray]]]:
25
- """Construct DataArrays of mean and std values from the config normalisation constants.
26
-
27
- Args:
28
- config: Data configuration.
29
-
30
- Returns:
31
- Means dict
32
- Stds dict
33
- """
34
- means_dict = {}
35
- stds_dict = {}
36
-
37
- if config.input_data.nwp is not None:
38
-
39
- means_dict["nwp"] = {}
40
- stds_dict["nwp"] = {}
41
-
42
- for nwp_key in config.input_data.nwp:
43
- # Standardise and convert to NumpyBatch
44
-
45
- means_dict["nwp"][nwp_key] = channel_dict_to_dataarray(
46
- config.input_data.nwp[nwp_key].channel_means,
47
- )
48
- stds_dict["nwp"][nwp_key] = channel_dict_to_dataarray(
49
- config.input_data.nwp[nwp_key].channel_stds,
50
- )
51
-
52
- if config.input_data.satellite is not None:
53
-
54
- means_dict["sat"] = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
55
- stds_dict["sat"] = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
56
-
57
- return means_dict, stds_dict