ocf-data-sampler 0.0.36__tar.gz → 0.0.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (75) hide show
  1. {ocf_data_sampler-0.0.36/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.38}/PKG-INFO +3 -2
  2. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/README.md +2 -1
  3. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/constants.py +38 -0
  4. ocf_data_sampler-0.0.38/ocf_data_sampler/numpy_batch/collate.py +79 -0
  5. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/nwp.py +0 -1
  6. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/satellite.py +2 -1
  7. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/torch_datasets/process_and_combine.py +11 -4
  8. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38/ocf_data_sampler.egg-info}/PKG-INFO +3 -2
  9. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler.egg-info/SOURCES.txt +3 -0
  10. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/pyproject.toml +1 -1
  11. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/conftest.py +16 -0
  12. ocf_data_sampler-0.0.38/tests/numpy_batch/test_collate.py +26 -0
  13. ocf_data_sampler-0.0.38/tests/torch_datasets/test_process_and_combine.py +165 -0
  14. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/torch_datasets/test_pvnet_uk_regional.py +0 -13
  15. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/LICENSE +0 -0
  16. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/MANIFEST.in +0 -0
  17. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/__init__.py +0 -0
  18. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/config/__init__.py +0 -0
  19. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/config/load.py +0 -0
  20. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/config/model.py +0 -0
  21. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/config/save.py +0 -0
  22. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  23. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/__init__.py +0 -0
  24. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/gsp.py +0 -0
  25. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/load_dataset.py +0 -0
  26. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  27. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  28. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  29. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  30. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  31. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  32. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/satellite.py +0 -0
  33. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/site.py +0 -0
  34. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/load/utils.py +0 -0
  35. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
  36. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/gsp.py +0 -0
  37. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/site.py +0 -0
  38. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/sun_position.py +0 -0
  39. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/__init__.py +0 -0
  40. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/dropout.py +0 -0
  41. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  42. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  43. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/geospatial.py +0 -0
  44. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/location.py +0 -0
  45. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  46. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/select_time_slice.py +0 -0
  47. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  48. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/select/time_slice_for_dataset.py +0 -0
  49. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
  50. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +0 -0
  51. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/torch_datasets/site.py +0 -0
  52. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/torch_datasets/valid_time_periods.py +0 -0
  53. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/utils.py +0 -0
  54. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  55. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler.egg-info/requires.txt +0 -0
  56. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  57. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/scripts/refactor_site.py +0 -0
  58. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/setup.cfg +0 -0
  59. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/__init__.py +0 -0
  60. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/config/test_config.py +0 -0
  61. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/load/test_load_gsp.py +0 -0
  62. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/load/test_load_nwp.py +0 -0
  63. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/load/test_load_satellite.py +0 -0
  64. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/load/test_load_sites.py +0 -0
  65. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/numpy_batch/test_gsp.py +0 -0
  66. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/numpy_batch/test_nwp.py +0 -0
  67. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/numpy_batch/test_satellite.py +0 -0
  68. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/numpy_batch/test_sun_position.py +0 -0
  69. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/select/test_dropout.py +0 -0
  70. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/select/test_fill_time_periods.py +0 -0
  71. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/select/test_find_contiguous_time_periods.py +0 -0
  72. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/select/test_location.py +0 -0
  73. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/select/test_select_spatial_slice.py +0 -0
  74. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/select/test_select_time_slice.py +0 -0
  75. {ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/torch_datasets/test_site.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.36
3
+ Version: 0.0.38
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -56,7 +56,7 @@ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
56
56
  # ocf-data-sampler
57
57
 
58
58
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
59
- [![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-)
59
+ [![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
60
60
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
61
61
 
62
62
  [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -129,6 +129,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
129
129
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
130
130
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
131
131
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
132
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
132
133
  </tr>
133
134
  </tbody>
134
135
  </table>
@@ -1,7 +1,7 @@
1
1
  # ocf-data-sampler
2
2
 
3
3
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
4
- [![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-)
4
+ [![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
5
5
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
6
6
 
7
7
  [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -74,6 +74,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
74
74
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
75
75
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
76
76
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
77
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
77
78
  </tr>
78
79
  </tbody>
79
80
  </table>
@@ -28,6 +28,7 @@ class NWPStatDict(dict):
28
28
  f"Values for {key} not yet available in ocf-data-sampler {list(self.keys())}"
29
29
  )
30
30
 
31
+
31
32
  # ------ UKV
32
33
  # Means and std computed WITH version_7 and higher, MetOffice values
33
34
  UKV_STD = {
@@ -49,6 +50,7 @@ UKV_STD = {
49
50
  "prmsl": 1252.71790539,
50
51
  "prate": 0.00021497,
51
52
  }
53
+
52
54
  UKV_MEAN = {
53
55
  "cdcb": 1412.26599062,
54
56
  "lcc": 50.08362643,
@@ -97,6 +99,7 @@ ECMWF_STD = {
97
99
  "diff_duvrs": 81605.25,
98
100
  "diff_sr": 818950.6875,
99
101
  }
102
+
100
103
  ECMWF_MEAN = {
101
104
  "dlwrf": 27187026.0,
102
105
  "dswrf": 11458988.0,
@@ -133,3 +136,38 @@ NWP_MEANS = NWPStatDict(
133
136
  ecmwf=ECMWF_MEAN,
134
137
  )
135
138
 
139
+ # ------ Satellite
140
+ # RSS Mean and std values from randomised 20% of 2020 imagery
141
+
142
+ RSS_STD = {
143
+ "HRV": 0.11405209,
144
+ "IR_016": 0.21462157,
145
+ "IR_039": 0.04618041,
146
+ "IR_087": 0.06687243,
147
+ "IR_097": 0.0468558,
148
+ "IR_108": 0.17482725,
149
+ "IR_120": 0.06115861,
150
+ "IR_134": 0.04492306,
151
+ "VIS006": 0.12184761,
152
+ "VIS008": 0.13090034,
153
+ "WV_062": 0.16111417,
154
+ "WV_073": 0.12924142,
155
+ }
156
+
157
+ RSS_MEAN = {
158
+ "HRV": 0.09298719,
159
+ "IR_016": 0.17594202,
160
+ "IR_039": 0.86167645,
161
+ "IR_087": 0.7719318,
162
+ "IR_097": 0.8014212,
163
+ "IR_108": 0.71254843,
164
+ "IR_120": 0.89058584,
165
+ "IR_134": 0.944365,
166
+ "VIS006": 0.09633306,
167
+ "VIS008": 0.11426069,
168
+ "WV_062": 0.7359355,
169
+ "WV_073": 0.62479186,
170
+ }
171
+
172
+ RSS_STD = _to_data_array(RSS_STD)
173
+ RSS_MEAN = _to_data_array(RSS_MEAN)
@@ -0,0 +1,79 @@
1
+ from ocf_data_sampler.numpy_batch import NWPBatchKey
2
+
3
+ import numpy as np
4
+ import logging
5
+ from typing import Union
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def stack_np_examples_into_batch(dict_list):
11
+ """
12
+ Stacks Numpy examples into a batch
13
+
14
+ See also: `unstack_np_batch_into_examples()` for opposite
15
+
16
+ Args:
17
+ dict_list: A list of dict-like Numpy examples to stack
18
+
19
+ Returns:
20
+ The stacked NumpyBatch object
21
+ """
22
+ batch = {}
23
+
24
+ batch_keys = list(dict_list[0].keys())
25
+
26
+ for batch_key in batch_keys:
27
+ # NWP is nested so treat separately
28
+ if batch_key == "nwp":
29
+ nwp_batch: dict[str, NWPBatchKey] = {}
30
+
31
+ # Unpack source keys
32
+ nwp_sources = list(dict_list[0]["nwp"].keys())
33
+
34
+ for nwp_source in nwp_sources:
35
+ # Keys can be different for different NWPs
36
+ nwp_batch_keys = list(dict_list[0]["nwp"][nwp_source].keys())
37
+
38
+ nwp_source_batch = {}
39
+ for nwp_batch_key in nwp_batch_keys:
40
+ nwp_source_batch[nwp_batch_key] = stack_data_list(
41
+ [d["nwp"][nwp_source][nwp_batch_key] for d in dict_list],
42
+ nwp_batch_key,
43
+ )
44
+
45
+ nwp_batch[nwp_source] = nwp_source_batch
46
+
47
+ batch["nwp"] = nwp_batch
48
+
49
+ else:
50
+ batch[batch_key] = stack_data_list(
51
+ [d[batch_key] for d in dict_list],
52
+ batch_key,
53
+ )
54
+
55
+ return batch
56
+
57
+
58
+ def _key_is_constant(batch_key):
59
+ is_constant = batch_key.endswith("t0_idx") or batch_key == NWPBatchKey.channel_names
60
+ return is_constant
61
+
62
+
63
+ def stack_data_list(
64
+ data_list: list,
65
+ batch_key: Union[str, NWPBatchKey],
66
+ ):
67
+ """How to combine data entries for each key
68
+ """
69
+ if _key_is_constant(batch_key):
70
+ # These are always the same for all examples.
71
+ return data_list[0]
72
+ try:
73
+ return np.stack(data_list)
74
+ except Exception as e:
75
+ logger.debug(f"Could not stack the following shapes together, ({batch_key})")
76
+ shapes = [example.shape for example in data_list]
77
+ logger.debug(shapes)
78
+ logger.error(e)
79
+ raise e
@@ -1,5 +1,4 @@
1
1
  """Convert NWP to NumpyBatch"""
2
-
3
2
  import pandas as pd
4
3
  import xarray as xr
5
4
 
@@ -13,6 +13,7 @@ class SatelliteBatchKey:
13
13
 
14
14
  def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
15
15
  """Convert from Xarray to NumpyBatch"""
16
+
16
17
  example = {
17
18
  SatelliteBatchKey.satellite_actual: da.values,
18
19
  SatelliteBatchKey.time_utc: da.time_utc.values.astype(float),
@@ -27,4 +28,4 @@ def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None
27
28
  if t0_idx is not None:
28
29
  example[SatelliteBatchKey.t0_idx] = t0_idx
29
30
 
30
- return example
31
+ return example
@@ -4,7 +4,7 @@ import xarray as xr
4
4
  from typing import Tuple
5
5
 
6
6
  from ocf_data_sampler.config import Configuration
7
- from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS
7
+ from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
8
8
  from ocf_data_sampler.numpy_batch import (
9
9
  convert_nwp_to_numpy_batch,
10
10
  convert_satellite_to_numpy_batch,
@@ -25,8 +25,8 @@ def process_and_combine_datasets(
25
25
  location: Location,
26
26
  target_key: str = 'gsp'
27
27
  ) -> dict:
28
- """Normalize and convert data to numpy arrays"""
29
28
 
29
+ """Normalise and convert data to numpy arrays"""
30
30
  numpy_modalities = []
31
31
 
32
32
  if "nwp" in dataset_dict:
@@ -37,19 +37,23 @@ def process_and_combine_datasets(
37
37
  # Standardise
38
38
  provider = config.input_data.nwp[nwp_key].provider
39
39
  da_nwp = (da_nwp - NWP_MEANS[provider]) / NWP_STDS[provider]
40
+
40
41
  # Convert to NumpyBatch
41
42
  nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_batch(da_nwp)
42
43
 
43
44
  # Combine the NWPs into NumpyBatch
44
45
  numpy_modalities.append({NWPBatchKey.nwp: nwp_numpy_modalities})
45
46
 
47
+
46
48
  if "sat" in dataset_dict:
47
- # Satellite is already in the range [0-1] so no need to standardise
49
+ # Standardise
48
50
  da_sat = dataset_dict["sat"]
51
+ da_sat = (da_sat - RSS_MEAN) / RSS_STD
49
52
 
50
53
  # Convert to NumpyBatch
51
54
  numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
52
55
 
56
+
53
57
  gsp_config = config.input_data.gsp
54
58
 
55
59
  if "gsp" in dataset_dict:
@@ -93,6 +97,7 @@ def process_and_combine_datasets(
93
97
 
94
98
  return combined_sample
95
99
 
100
+
96
101
  def process_and_combine_site_sample_dict(
97
102
  dataset_dict: dict,
98
103
  config: Configuration,
@@ -119,8 +124,9 @@ def process_and_combine_site_sample_dict(
119
124
  data_arrays.append((f"nwp-{provider}", da_nwp))
120
125
 
121
126
  if "sat" in dataset_dict:
122
- # TODO add some satellite normalisation
127
+ # Standardise
123
128
  da_sat = dataset_dict["sat"]
129
+ da_sat = (da_sat - RSS_MEAN) / RSS_STD
124
130
  data_arrays.append(("satellite", da_sat))
125
131
 
126
132
  if "site" in dataset_dict:
@@ -143,6 +149,7 @@ def merge_dicts(list_of_dicts: list[dict]) -> dict:
143
149
  combined_dict.update(d)
144
150
  return combined_dict
145
151
 
152
+
146
153
  def merge_arrays(normalised_data_arrays: list[Tuple[str, xr.DataArray]]) -> xr.Dataset:
147
154
  """
148
155
  Combine a list of DataArrays into a single Dataset with unique naming conventions.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.36
3
+ Version: 0.0.38
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -56,7 +56,7 @@ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
56
56
  # ocf-data-sampler
57
57
 
58
58
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
59
- [![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-)
59
+ [![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
60
60
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
61
61
 
62
62
  [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -129,6 +129,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
129
129
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
130
130
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
131
131
  <td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
132
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
132
133
  </tr>
133
134
  </tbody>
134
135
  </table>
@@ -28,6 +28,7 @@ ocf_data_sampler/load/nwp/providers/ecmwf.py
28
28
  ocf_data_sampler/load/nwp/providers/ukv.py
29
29
  ocf_data_sampler/load/nwp/providers/utils.py
30
30
  ocf_data_sampler/numpy_batch/__init__.py
31
+ ocf_data_sampler/numpy_batch/collate.py
31
32
  ocf_data_sampler/numpy_batch/gsp.py
32
33
  ocf_data_sampler/numpy_batch/nwp.py
33
34
  ocf_data_sampler/numpy_batch/satellite.py
@@ -56,6 +57,7 @@ tests/load/test_load_gsp.py
56
57
  tests/load/test_load_nwp.py
57
58
  tests/load/test_load_satellite.py
58
59
  tests/load/test_load_sites.py
60
+ tests/numpy_batch/test_collate.py
59
61
  tests/numpy_batch/test_gsp.py
60
62
  tests/numpy_batch/test_nwp.py
61
63
  tests/numpy_batch/test_satellite.py
@@ -66,5 +68,6 @@ tests/select/test_find_contiguous_time_periods.py
66
68
  tests/select/test_location.py
67
69
  tests/select/test_select_spatial_slice.py
68
70
  tests/select/test_select_time_slice.py
71
+ tests/torch_datasets/test_process_and_combine.py
69
72
  tests/torch_datasets/test_pvnet_uk_regional.py
70
73
  tests/torch_datasets/test_site.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.0.36"
7
+ version = "0.0.38"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"
@@ -7,6 +7,7 @@ import xarray as xr
7
7
  import tempfile
8
8
 
9
9
  from ocf_data_sampler.config.model import Site
10
+ from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
10
11
 
11
12
  _top_test_directory = os.path.dirname(os.path.realpath(__file__))
12
13
 
@@ -269,3 +270,18 @@ def uk_gsp_zarr_path(ds_uk_gsp):
269
270
  ds_uk_gsp.to_zarr(filename)
270
271
  yield filename
271
272
 
273
+
274
+ @pytest.fixture()
275
+ def pvnet_config_filename(
276
+ tmp_path, config_filename, nwp_ukv_zarr_path, uk_gsp_zarr_path, sat_zarr_path
277
+ ):
278
+
279
+ # adjust config to point to the zarr file
280
+ config = load_yaml_configuration(config_filename)
281
+ config.input_data.nwp["ukv"].zarr_path = nwp_ukv_zarr_path
282
+ config.input_data.satellite.zarr_path = sat_zarr_path
283
+ config.input_data.gsp.zarr_path = uk_gsp_zarr_path
284
+
285
+ filename = f"{tmp_path}/configuration.yaml"
286
+ save_yaml_configuration(config, filename)
287
+ return filename
@@ -0,0 +1,26 @@
1
+ from ocf_data_sampler.numpy_batch import GSPBatchKey, SatelliteBatchKey
2
+ from ocf_data_sampler.numpy_batch.collate import stack_np_examples_into_batch
3
+ from ocf_data_sampler.torch_datasets import PVNetUKRegionalDataset
4
+
5
+
6
+ def test_pvnet(pvnet_config_filename):
7
+
8
+ # Create dataset object
9
+ dataset = PVNetUKRegionalDataset(pvnet_config_filename)
10
+
11
+ assert len(dataset.locations) == 317
12
+ assert len(dataset.valid_t0_times) == 39
13
+ assert len(dataset) == 317 * 39
14
+
15
+ # Generate 2 samples
16
+ sample1 = dataset[0]
17
+ sample2 = dataset[1]
18
+
19
+ batch = stack_np_examples_into_batch([sample1, sample2])
20
+
21
+ assert isinstance(batch, dict)
22
+ assert "nwp" in batch
23
+ assert isinstance(batch["nwp"], dict)
24
+ assert "ukv" in batch["nwp"]
25
+ assert GSPBatchKey.gsp in batch
26
+ assert SatelliteBatchKey.satellite_actual in batch
@@ -0,0 +1,165 @@
1
+ import pytest
2
+ import tempfile
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import xarray as xr
7
+ import dask.array as da
8
+
9
+ from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
10
+ from ocf_data_sampler.config import Configuration
11
+ from ocf_data_sampler.select.location import Location
12
+ from ocf_data_sampler.numpy_batch import NWPBatchKey, GSPBatchKey, SatelliteBatchKey
13
+ from ocf_data_sampler.torch_datasets import PVNetUKRegionalDataset
14
+
15
+ from ocf_data_sampler.torch_datasets.process_and_combine import (
16
+ process_and_combine_datasets,
17
+ process_and_combine_site_sample_dict,
18
+ merge_dicts,
19
+ fill_nans_in_arrays,
20
+ compute,
21
+ )
22
+
23
+
24
+ def test_process_and_combine_datasets(pvnet_config_filename):
25
+
26
+ # Load in config for function and define location
27
+ config = load_yaml_configuration(pvnet_config_filename)
28
+ t0 = pd.Timestamp("2024-01-01 00:00")
29
+ location = Location(coordinate_system="osgb", x=1234, y=5678, id=1)
30
+
31
+ nwp_data = xr.DataArray(
32
+ np.random.rand(4, 2, 2, 2),
33
+ dims=["time_utc", "channel", "y", "x"],
34
+ coords={
35
+ "time_utc": pd.date_range("2024-01-01 00:00", periods=4, freq="h"),
36
+ "channel": ["t2m", "dswrf"],
37
+ "step": ("time_utc", pd.timedelta_range(start='0h', periods=4, freq='h')),
38
+ "init_time_utc": pd.Timestamp("2024-01-01 00:00")
39
+ }
40
+ )
41
+
42
+ sat_data = xr.DataArray(
43
+ np.random.rand(7, 1, 2, 2),
44
+ dims=["time_utc", "channel", "y", "x"],
45
+ coords={
46
+ "time_utc": pd.date_range("2024-01-01 00:00", periods=7, freq="5min"),
47
+ "channel": ["HRV"],
48
+ "x_geostationary": (["y", "x"], np.array([[1, 2], [1, 2]])),
49
+ "y_geostationary": (["y", "x"], np.array([[1, 1], [2, 2]]))
50
+ }
51
+ )
52
+
53
+ # Combine as dict
54
+ dataset_dict = {
55
+ "nwp": {"ukv": nwp_data},
56
+ "sat": sat_data
57
+ }
58
+
59
+ # Call relevant function
60
+ result = process_and_combine_datasets(dataset_dict, config, t0, location)
61
+
62
+ # Assert result is dict - check and validate
63
+ assert isinstance(result, dict)
64
+ assert NWPBatchKey.nwp in result
65
+ assert result[SatelliteBatchKey.satellite_actual].shape == (7, 1, 2, 2)
66
+ assert result[NWPBatchKey.nwp]["ukv"][NWPBatchKey.nwp].shape == (4, 1, 2, 2)
67
+
68
+
69
+ def test_merge_dicts():
70
+ """Test merge_dicts function"""
71
+ dict1 = {"a": 1, "b": 2}
72
+ dict2 = {"c": 3, "d": 4}
73
+ dict3 = {"e": 5}
74
+
75
+ result = merge_dicts([dict1, dict2, dict3])
76
+ assert result == {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
77
+
78
+ # Test key overwriting
79
+ dict4 = {"a": 10, "f": 6}
80
+ result = merge_dicts([dict1, dict4])
81
+ assert result["a"] == 10
82
+
83
+
84
+ def test_fill_nans_in_arrays():
85
+ """Test the fill_nans_in_arrays function"""
86
+ array_with_nans = np.array([1.0, np.nan, 3.0, np.nan])
87
+ nested_dict = {
88
+ "array1": array_with_nans,
89
+ "nested": {
90
+ "array2": np.array([np.nan, 2.0, np.nan, 4.0])
91
+ },
92
+ "string_key": "not_an_array"
93
+ }
94
+
95
+ result = fill_nans_in_arrays(nested_dict)
96
+
97
+ assert not np.isnan(result["array1"]).any()
98
+ assert np.array_equal(result["array1"], np.array([1.0, 0.0, 3.0, 0.0]))
99
+ assert not np.isnan(result["nested"]["array2"]).any()
100
+ assert np.array_equal(result["nested"]["array2"], np.array([0.0, 2.0, 0.0, 4.0]))
101
+ assert result["string_key"] == "not_an_array"
102
+
103
+
104
+ def test_compute():
105
+ """Test compute function with dask array"""
106
+ da_dask = xr.DataArray(da.random.random((5, 5)))
107
+
108
+ # Create a nested dictionary with dask array
109
+ nested_dict = {
110
+ "array1": da_dask,
111
+ "nested": {
112
+ "array2": da_dask
113
+ }
114
+ }
115
+
116
+ # Ensure initial data is lazy - i.e. not yet computed
117
+ assert not isinstance(nested_dict["array1"].data, np.ndarray)
118
+ assert not isinstance(nested_dict["nested"]["array2"].data, np.ndarray)
119
+
120
+ # Call the compute function
121
+ result = compute(nested_dict)
122
+
123
+ # Assert that the result is an xarray DataArray and no longer lazy
124
+ assert isinstance(result["array1"], xr.DataArray)
125
+ assert isinstance(result["nested"]["array2"], xr.DataArray)
126
+ assert isinstance(result["array1"].data, np.ndarray)
127
+ assert isinstance(result["nested"]["array2"].data, np.ndarray)
128
+
129
+ # Ensure there no NaN values in computed data
130
+ assert not np.isnan(result["array1"].data).any()
131
+ assert not np.isnan(result["nested"]["array2"].data).any()
132
+
133
+
134
+ def test_process_and_combine_site_sample_dict(pvnet_config_filename):
135
+ # Load config
136
+ config = load_yaml_configuration(pvnet_config_filename)
137
+
138
+ # Specify minimal structure for testing
139
+ raw_nwp_values = np.random.rand(4, 1, 2, 2) # Single channel
140
+ site_dict = {
141
+ "nwp": {
142
+ "ukv": xr.DataArray(
143
+ raw_nwp_values,
144
+ dims=["time_utc", "channel", "y", "x"],
145
+ coords={
146
+ "time_utc": pd.date_range("2024-01-01 00:00", periods=4, freq="h"),
147
+ "channel": ["dswrf"], # Single channel
148
+ },
149
+ )
150
+ }
151
+ }
152
+ print(f"Input site_dict: {site_dict}")
153
+
154
+ # Call function
155
+ result = process_and_combine_site_sample_dict(site_dict, config)
156
+
157
+ # Assert to validate output structure
158
+ assert isinstance(result, xr.Dataset), "Result should be an xarray.Dataset"
159
+ assert len(result.data_vars) > 0, "Dataset should contain data variables"
160
+
161
+ # Validate variable via assertion and shape of such
162
+ expected_variable = "nwp-ukv"
163
+ assert expected_variable in result.data_vars, f"Expected variable '{expected_variable}' not found"
164
+ nwp_result = result[expected_variable]
165
+ assert nwp_result.shape == (4, 1, 2, 2), f"Unexpected shape for '{expected_variable}': {nwp_result.shape}"
@@ -6,19 +6,6 @@ from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configura
6
6
  from ocf_data_sampler.numpy_batch import NWPBatchKey, GSPBatchKey, SatelliteBatchKey
7
7
 
8
8
 
9
- @pytest.fixture()
10
- def pvnet_config_filename(tmp_path, config_filename, nwp_ukv_zarr_path, uk_gsp_zarr_path, sat_zarr_path):
11
-
12
- # adjust config to point to the zarr file
13
- config = load_yaml_configuration(config_filename)
14
- config.input_data.nwp['ukv'].zarr_path = nwp_ukv_zarr_path
15
- config.input_data.satellite.zarr_path = sat_zarr_path
16
- config.input_data.gsp.zarr_path = uk_gsp_zarr_path
17
-
18
- filename = f"{tmp_path}/configuration.yaml"
19
- save_yaml_configuration(config, filename)
20
- return filename
21
-
22
9
 
23
10
  def test_pvnet(pvnet_config_filename):
24
11