ocf-data-sampler 0.1.11__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (76) hide show
  1. ocf_data_sampler/config/load.py +3 -3
  2. ocf_data_sampler/config/model.py +73 -61
  3. ocf_data_sampler/config/save.py +5 -4
  4. ocf_data_sampler/constants.py +140 -12
  5. ocf_data_sampler/load/gsp.py +6 -5
  6. ocf_data_sampler/load/load_dataset.py +5 -6
  7. ocf_data_sampler/load/nwp/nwp.py +17 -5
  8. ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
  9. ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
  10. ocf_data_sampler/load/nwp/providers/icon.py +46 -0
  11. ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
  12. ocf_data_sampler/load/nwp/providers/utils.py +3 -1
  13. ocf_data_sampler/load/satellite.py +9 -10
  14. ocf_data_sampler/load/site.py +10 -6
  15. ocf_data_sampler/load/utils.py +21 -16
  16. ocf_data_sampler/numpy_sample/collate.py +10 -9
  17. ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
  18. ocf_data_sampler/numpy_sample/gsp.py +12 -14
  19. ocf_data_sampler/numpy_sample/nwp.py +12 -12
  20. ocf_data_sampler/numpy_sample/satellite.py +9 -9
  21. ocf_data_sampler/numpy_sample/site.py +5 -8
  22. ocf_data_sampler/numpy_sample/sun_position.py +16 -21
  23. ocf_data_sampler/sample/base.py +15 -17
  24. ocf_data_sampler/sample/site.py +13 -20
  25. ocf_data_sampler/sample/uk_regional.py +29 -35
  26. ocf_data_sampler/select/dropout.py +16 -14
  27. ocf_data_sampler/select/fill_time_periods.py +15 -5
  28. ocf_data_sampler/select/find_contiguous_time_periods.py +88 -75
  29. ocf_data_sampler/select/geospatial.py +63 -54
  30. ocf_data_sampler/select/location.py +16 -51
  31. ocf_data_sampler/select/select_spatial_slice.py +105 -89
  32. ocf_data_sampler/select/select_time_slice.py +71 -58
  33. ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
  34. ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
  35. ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
  36. ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
  37. ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
  38. ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
  39. ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
  40. ocf_data_sampler/utils.py +3 -1
  41. {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
  42. ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
  43. {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
  44. {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
  45. scripts/refactor_site.py +62 -33
  46. utils/compute_icon_mean_stddev.py +72 -0
  47. ocf_data_sampler-0.1.11.dist-info/LICENSE +0 -21
  48. ocf_data_sampler-0.1.11.dist-info/RECORD +0 -82
  49. tests/__init__.py +0 -0
  50. tests/config/test_config.py +0 -113
  51. tests/config/test_load.py +0 -7
  52. tests/config/test_save.py +0 -28
  53. tests/conftest.py +0 -319
  54. tests/load/test_load_gsp.py +0 -15
  55. tests/load/test_load_nwp.py +0 -21
  56. tests/load/test_load_satellite.py +0 -17
  57. tests/load/test_load_sites.py +0 -14
  58. tests/numpy_sample/test_collate.py +0 -21
  59. tests/numpy_sample/test_datetime_features.py +0 -37
  60. tests/numpy_sample/test_gsp.py +0 -38
  61. tests/numpy_sample/test_nwp.py +0 -13
  62. tests/numpy_sample/test_satellite.py +0 -40
  63. tests/numpy_sample/test_sun_position.py +0 -81
  64. tests/select/test_dropout.py +0 -69
  65. tests/select/test_fill_time_periods.py +0 -28
  66. tests/select/test_find_contiguous_time_periods.py +0 -202
  67. tests/select/test_location.py +0 -67
  68. tests/select/test_select_spatial_slice.py +0 -154
  69. tests/select/test_select_time_slice.py +0 -275
  70. tests/test_sample/test_base.py +0 -164
  71. tests/test_sample/test_site_sample.py +0 -165
  72. tests/test_sample/test_uk_regional_sample.py +0 -136
  73. tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
  74. tests/torch_datasets/test_pvnet_uk.py +0 -154
  75. tests/torch_datasets/test_site.py +0 -226
  76. tests/torch_datasets/test_validate_channels_utils.py +0 -78
@@ -1,4 +1,4 @@
1
- import xarray as xr
1
+ """Functions for checking that normalisation statistics exist for the data channels requested."""
2
2
 
3
3
  from ocf_data_sampler.config import Configuration
4
4
  from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
@@ -8,10 +8,9 @@ def validate_channels(
8
8
  data_channels: list,
9
9
  means_channels: list,
10
10
  stds_channels: list,
11
- source_name: str | None = None
11
+ source_name: str | None = None,
12
12
  ) -> None:
13
- """
14
- Validates that all channels in data have corresponding normalisation constants.
13
+ """Validates that all channels in data have corresponding normalisation constants.
15
14
 
16
15
  Args:
17
16
  data_channels: Set of channels from the data
@@ -22,7 +21,6 @@ def validate_channels(
22
21
  Raises:
23
22
  ValueError: If there's a mismatch between data channels and normalisation constants
24
23
  """
25
-
26
24
  data_set = set(data_channels)
27
25
  means_set = set(means_channels)
28
26
  stds_set = set(stds_channels)
@@ -32,51 +30,57 @@ def validate_channels(
32
30
  if missing_in_means:
33
31
  raise ValueError(
34
32
  f"The following channels for {source_name} are missing in normalisation means: "
35
- f"{missing_in_means}"
33
+ f"{missing_in_means}",
36
34
  )
37
-
35
+
38
36
  # Find missing channels in stds
39
37
  missing_in_stds = data_set - stds_set
40
38
  if missing_in_stds:
41
39
  raise ValueError(
42
40
  f"The following channels for {source_name} are missing in normalisation stds: "
43
- f"{missing_in_stds}"
41
+ f"{missing_in_stds}",
44
42
  )
45
43
 
46
44
 
47
45
  def validate_nwp_channels(config: Configuration) -> None:
48
46
  """Validate that NWP channels in config have corresponding normalisation constants.
49
-
47
+
50
48
  Args:
51
49
  config: Configuration object containing NWP channel information
52
-
50
+
53
51
  Raises:
54
- ValueError: If there's a mismatch between configured NWP channels and normalisation constants
52
+ ValueError: If there's a mismatch between configured NWP channels
53
+ and normalisation constants
55
54
  """
56
- if hasattr(config.input_data, "nwp"):
57
- for nwp_key, nwp_config in config.input_data.nwp.items():
55
+ if hasattr(config.input_data, "nwp") and (
56
+ config.input_data.nwp is not None
57
+ ):
58
+ for _, nwp_config in config.input_data.nwp.items():
58
59
  provider = nwp_config.provider
59
60
  validate_channels(
60
61
  data_channels=nwp_config.channels,
61
62
  means_channels=NWP_MEANS[provider].channel.values,
62
63
  stds_channels=NWP_STDS[provider].channel.values,
63
- source_name=provider
64
+ source_name=provider,
64
65
  )
65
66
 
66
67
 
67
68
  def validate_satellite_channels(config: Configuration) -> None:
68
69
  """Validate that satellite channels in config have corresponding normalisation constants.
69
-
70
+
70
71
  Args:
71
72
  config: Configuration object containing satellite channel information
72
-
73
+
73
74
  Raises:
74
- ValueError: If there's a mismatch between configured satellite channels and normalisation constants
75
+ ValueError: If there's a mismatch between configured satellite channels
76
+ and normalisation constants
75
77
  """
76
- if hasattr(config.input_data, "satellite"):
78
+ if hasattr(config.input_data, "satellite") and (
79
+ config.input_data.satellite is not None
80
+ ):
77
81
  validate_channels(
78
82
  data_channels=config.input_data.satellite.channels,
79
83
  means_channels=RSS_MEAN.channel.values,
80
84
  stds_channels=RSS_STD.channel.values,
81
- source_name="satellite"
85
+ source_name="satellite",
82
86
  )
ocf_data_sampler/utils.py CHANGED
@@ -1,8 +1,10 @@
1
+ """Miscellaneous helper functions."""
2
+
1
3
  import pandas as pd
2
4
 
3
5
 
4
6
  def minutes(minutes: int | list[float]) -> pd.Timedelta | pd.TimedeltaIndex:
5
- """Timedelta minutes
7
+ """Timedelta minutes.
6
8
 
7
9
  Args:
8
10
  minutes: the number of minutes, single value or list
@@ -1,10 +1,8 @@
1
1
  Metadata-Version: 2.2
2
- Name: ocf_data_sampler
3
- Version: 0.1.11
4
- Summary: Sample from weather data for renewable energy prediction
5
- Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
- Author-email: info@openclimatefix.org
7
- Maintainer: Open Climate Fix Ltd
2
+ Name: ocf-data-sampler
3
+ Version: 0.1.16
4
+ Author: James Fulton, Peter Dudfield
5
+ Author-email: Open Climate Fix team <info@openclimatefix.org>
8
6
  License: MIT License
9
7
 
10
8
  Copyright (c) 2023 Open Climate Fix
@@ -27,21 +25,18 @@ License: MIT License
27
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
26
  SOFTWARE.
29
27
 
30
- Project-URL: homepage, https://github.com/openclimatefix
31
28
  Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
32
- Keywords: weather data,renewable energy prediction,sample weather data
29
+ Classifier: Programming Language :: Python :: 3
33
30
  Classifier: License :: OSI Approved :: MIT License
34
- Classifier: Programming Language :: Python :: 3.8
35
- Classifier: Operating System :: POSIX :: Linux
36
- Requires-Python: >=3.8
31
+ Requires-Python: >=3.10
37
32
  Description-Content-Type: text/markdown
38
- License-File: LICENSE
39
33
  Requires-Dist: torch
40
34
  Requires-Dist: numpy
41
35
  Requires-Dist: pandas
42
36
  Requires-Dist: xarray
43
37
  Requires-Dist: zarr==2.18.3
44
38
  Requires-Dist: dask
39
+ Requires-Dist: matplotlib
45
40
  Requires-Dist: ocf_blosc2
46
41
  Requires-Dist: pvlib
47
42
  Requires-Dist: pydantic
@@ -50,11 +45,6 @@ Requires-Dist: pathy
50
45
  Requires-Dist: pyaml_env
51
46
  Requires-Dist: pyresample
52
47
  Requires-Dist: h5netcdf
53
- Provides-Extra: docs
54
- Requires-Dist: mkdocs>=1.2; extra == "docs"
55
- Requires-Dist: mkdocs-material>=8.0; extra == "docs"
56
- Provides-Extra: plot
57
- Requires-Dist: matplotlib; extra == "plot"
58
48
 
59
49
  # ocf-data-sampler
60
50
 
@@ -77,7 +67,6 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
77
67
 
78
68
  **ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
79
69
 
80
-
81
70
  ## FAQ
82
71
 
83
72
  If you have any questions about this or any other of our repos, don't hesitate to hop to our [Discussions Page](https://github.com/orgs/openclimatefix/discussions)!
@@ -0,0 +1,56 @@
1
+ ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
2
+ ocf_data_sampler/constants.py,sha256=scyqNXgmbMmZNs9TyIJ-omOOvE0SaPf-UvPxUG7SaSo,8074
3
+ ocf_data_sampler/utils.py,sha256=DjuneGGisl08ENvPZV_lrcX4b2NCKJC1ZpXgIpxuQi4,290
4
+ ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
5
+ ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
6
+ ocf_data_sampler/config/model.py,sha256=LSdBe89nGTzYceA7-Pxc2wHj7HkpghiaM4fUsHUqeT8,7381
7
+ ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
8
+ ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
9
+ ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
10
+ ocf_data_sampler/load/gsp.py,sha256=keB3Nv_CNK1P6pS9Kdfc8PoZXTI1_YFN-spsvEv_Ewc,899
11
+ ocf_data_sampler/load/load_dataset.py,sha256=0NyDxCDfgE_esKVW3s-rZEe16WB30FQ74ClWlrIo72M,1602
12
+ ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
13
+ ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
14
+ ocf_data_sampler/load/utils.py,sha256=Jwbr1rpEa3cefjw-OTVRaxnIHyGixYB3TlTlta0BOdU,1727
15
+ ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
16
+ ocf_data_sampler/load/nwp/nwp.py,sha256=0AIHQTJLUtwP2Toz_PskOTYFJXfMvGhk8faAcNvI9jk,922
17
+ ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=5AzktPJgertCx6oij6aePRosPuZHGFznMxTgtkk_mgc,994
19
+ ocf_data_sampler/load/nwp/providers/gfs.py,sha256=JSDeh4YG1wibV8--P3X-zTO8LP0dsJcpFvIyglBbhi0,979
20
+ ocf_data_sampler/load/nwp/providers/icon.py,sha256=yYUrs5HgjU0C5pMHBB6FGn3tLjswi990IY6QCXS1Zmw,1569
21
+ ocf_data_sampler/load/nwp/providers/ukv.py,sha256=-0v8JCLH8ypz8GMXZ6Rrx-I0LoHuHO8sXFupbC1RpM0,1013
22
+ ocf_data_sampler/load/nwp/providers/utils.py,sha256=cJZ9JA4W_ZeTcLQ5z71w46_DJaPcW_2JMmBdjP9r3qs,835
23
+ ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
24
+ ocf_data_sampler/numpy_sample/collate.py,sha256=I9YPcbxOwHYaDGKbzxqdV-3DFEHkzqdhAwnW7_tZH2w,1966
25
+ ocf_data_sampler/numpy_sample/datetime_features.py,sha256=INudxHcoB_c-GvYXe08S4Up_8TU5zOJ39PWRrTKfLp8,1203
26
+ ocf_data_sampler/numpy_sample/gsp.py,sha256=EDaQdOVEDBJGrXsq54UNBfpXTzi0ky_WpgBbmlyxOXM,1074
27
+ ocf_data_sampler/numpy_sample/nwp.py,sha256=iBGOdLMn-F5yR3juX3l4G2oXDpvGNuUdcR6ZCZkCqZk,1037
28
+ ocf_data_sampler/numpy_sample/satellite.py,sha256=oBlyNpO-syoyK4SSghoHqIDNyhcBqyd1L6eXSSw0k3w,1036
29
+ ocf_data_sampler/numpy_sample/site.py,sha256=tpX7j6dTOz2YmOFIzVYqTfWvIduKlOnBcLITsuPMgxU,1250
30
+ ocf_data_sampler/numpy_sample/sun_position.py,sha256=nkfgN6NmiLGoLSuJZrDsM-6nsIzJN75tWfN20Z7n8xo,1480
31
+ ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
32
+ ocf_data_sampler/sample/base.py,sha256=lnr-MNRpAxjVFJHCEvCZL86NrYy9LWnNOsLWBGDL8kc,2359
33
+ ocf_data_sampler/sample/site.py,sha256=4aJys40CQ-2CRKo_dgvm3rINTdfyTGWQGEaXGbh58qQ,1236
34
+ ocf_data_sampler/sample/uk_regional.py,sha256=uMtLdqZCsKttjFmhIC6JITzu2JDZh-VQdYUfbpyhgFM,2409
35
+ ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
36
+ ocf_data_sampler/select/dropout.py,sha256=_rzXl8_4VHTY_JMjbaoWopaFCJmLdaBpqfYF4vr24tk,1638
37
+ ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00dtimjWdLzr4JoTGA,865
38
+ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=cEXrQDzk8pXknxB0q3v5DakosagHMoLDAj302B8Xpw0,11537
39
+ ocf_data_sampler/select/geospatial.py,sha256=CDExkl36eZOKmdJPzUr_K0Wn3axHqv5nYo-EkSiINcc,5032
40
+ ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O5Deu0c,1037
41
+ ocf_data_sampler/select/select_spatial_slice.py,sha256=qY2Ll00EPA80oBtzwMoR5nk0UIpoWZF9oXl22YwWr0Q,12341
42
+ ocf_data_sampler/select/select_time_slice.py,sha256=q5QdgHPIXQb49uT5NwbOguY1GhjWc_o3c-2cDb5kLAo,5455
43
+ ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
44
+ ocf_data_sampler/select/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
45
+ ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
46
+ ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=PW46uq53o84ihrR3vCg0KiqyihV_VKTC_zS67oH1M8Y,12892
47
+ ocf_data_sampler/torch_datasets/datasets/site.py,sha256=Pr9DQszBP6GyS2uTT3unB50FfYsscu4qTiu9kgcQUys,17798
48
+ ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
49
+ ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=LdHgLPAYUVoCRMk2nnFdsMpygGS2kbps3h-7_bZnETw,4718
50
+ ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=tFBZqo7hYNkNb5Du8e5JSCKC21XcEuF_mbxZ6kdj0Og,3057
51
+ scripts/refactor_site.py,sha256=pu50bqNH9PCmFnWDcIUsYkrDr6zASpkpBUzbZ48NjnU,3084
52
+ utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
53
+ ocf_data_sampler-0.1.16.dist-info/METADATA,sha256=NhVC5ZO3PEI4_8HEnrwKl3Jr7GwlUp1dQnSnn4beDTk,11713
54
+ ocf_data_sampler-0.1.16.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
55
+ ocf_data_sampler-0.1.16.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
56
+ ocf_data_sampler-0.1.16.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,3 @@
1
1
  ocf_data_sampler
2
2
  scripts
3
- tests
3
+ utils
scripts/refactor_site.py CHANGED
@@ -1,50 +1,79 @@
1
- """ Helper functions for refactoring legacy site data """
2
1
  import xarray as xr
2
+ import pandas as pd
3
3
 
4
- def legacy_format(data_ds, metadata_df):
5
- """This formats old legacy data to the new format.
4
+ def legacy_format(data_ds: xr.Dataset, metadata_df: pd.DataFrame) -> xr.Dataset:
5
+ """
6
+ Converts old legacy site data into a more structured format.
7
+
8
+ This function does three main things:
9
+ 1. Renames some columns in the metadata to keep things consistent.
10
+ 2. Reshapes site data so that instead of having separate variables for each site,
11
+ we use a `site_id` dimension—makes life easier for analysis.
12
+ 3. Adds `capacity_kwp` as a time series so that each site has its capacity info.
13
+
14
+ Parameters:
15
+ data_ds (xr.Dataset): The dataset containing legacy site data.
16
+ metadata_df (pd.DataFrame): A DataFrame with metadata about the sites.
6
17
 
7
- 1. This renames the columns in the metadata
8
- 2. Re-formats the site data from data variables named by the site_id to
9
- a data array with a site_id dimension. Also adds capacity_kwp to the dataset as a time series for each site_id
18
+ Returns:
19
+ xr.Dataset: Reformatted dataset with `generation_kw` and `capacity_kwp`.
10
20
  """
11
21
 
22
+ # Step 1: Rename metadata columns to match the new expected format
12
23
  if "system_id" in metadata_df.columns:
13
- metadata_df["site_id"] = metadata_df["system_id"]
14
-
24
+ metadata_df = metadata_df.rename(columns={"system_id": "site_id"})
25
+
26
+ # Convert capacity from megawatts to kilowatts if needed
15
27
  if "capacity_megawatts" in metadata_df.columns:
16
28
  metadata_df["capacity_kwp"] = metadata_df["capacity_megawatts"] * 1000
17
29
 
18
- # only site data has the site_id as data variables.
19
- # We want to join them all together and create another coordinate called site_id
30
+ # Quick sanity check to ensure we have what we need
31
+ if "site_id" not in metadata_df.columns or "capacity_kwp" not in metadata_df.columns:
32
+ raise ValueError("Metadata is missing required columns: 'site_id' and 'capacity_kwp'.")
33
+
34
+ # Step 2: Transform the dataset
35
+ # Check if we actually have site data in the expected format
20
36
  if "0" in data_ds:
21
- gen_df = data_ds.to_dataframe()
22
- gen_da = xr.DataArray(
23
- data=gen_df.values,
24
- coords=(
25
- ("time_utc", gen_df.index.values),
26
- ("site_id", metadata_df["site_id"]),
27
- ),
37
+ # Convert the dataset into a DataFrame so we can manipulate it more easily
38
+ site_data_df = data_ds.to_dataframe()
39
+
40
+ # Create a DataArray for generation data
41
+ generation_da = xr.DataArray(
42
+ data=site_data_df.values,
43
+ coords={
44
+ "time_utc": site_data_df.index.values,
45
+ "site_id": metadata_df["site_id"].values,
46
+ },
47
+ dims=["time_utc", "site_id"],
28
48
  name="generation_kw",
29
49
  )
30
50
 
31
- capacity_df = gen_df
32
- for col in capacity_df.columns:
33
- capacity_df[col] = metadata_df[metadata_df["site_id"].astype(str) == col][
34
- "capacity_kwp"
35
- ].iloc[0]
51
+ # Step 3: Attach capacity information
52
+ # Map site_ids to their respective capacities
53
+ site_ids = site_data_df.columns
54
+ capacities = metadata_df.set_index("site_id").loc[site_ids, "capacity_kwp"]
55
+
56
+ # Broadcast capacities across all timestamps
57
+ capacity_df = pd.DataFrame(
58
+ {site_id: [capacities[site_id]] * len(site_data_df) for site_id in site_ids},
59
+ index=site_data_df.index,
60
+ )
61
+
62
+ # Create a DataArray for capacity data
36
63
  capacity_da = xr.DataArray(
37
64
  data=capacity_df.values,
38
- coords=(
39
- ("time_utc", gen_df.index.values),
40
- ("site_id", metadata_df["site_id"]),
41
- ),
65
+ coords={
66
+ "time_utc": site_data_df.index.values,
67
+ "site_id": metadata_df["site_id"].values,
68
+ },
69
+ dims=["time_utc", "site_id"],
42
70
  name="capacity_kwp",
43
71
  )
44
- data_ds = xr.Dataset(
45
- {
46
- "generation_kw": gen_da,
47
- "capacity_kwp": capacity_da,
48
- }
49
- )
50
- return data_ds
72
+
73
+ # Finally, bundle everything into a single Dataset
74
+ data_ds = xr.Dataset({
75
+ "generation_kw": generation_da,
76
+ "capacity_kwp": capacity_da,
77
+ })
78
+
79
+ return data_ds
@@ -0,0 +1,72 @@
1
+ """Script to compute normalisation constants from NWP data."""
2
+
3
+ import argparse
4
+ import glob
5
+ import logging
6
+
7
+ import numpy as np
8
+ import xarray as xr
9
+
10
+ from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Add argument parser
17
+ parser = argparse.ArgumentParser(description="Compute normalization constants from NWP data")
18
+ parser.add_argument("--data-path", type=str, required=True,
19
+ help='Path pattern to zarr files (e.g., "/path/to/data/*.zarr.zip")')
20
+ parser.add_argument("--n-samples", type=int, default=2000,
21
+ help="Number of random samples to use (default: 2000)")
22
+
23
+ args = parser.parse_args()
24
+
25
+ zarr_files = glob.glob(args.data_path)
26
+ n_samples = args.n_samples
27
+
28
+ ds = open_icon_eu(zarr_files)
29
+
30
+ n_init_times = ds.sizes["init_time_utc"]
31
+ n_lats = ds.sizes["latitude"]
32
+ n_longs = ds.sizes["longitude"]
33
+ n_steps = ds.sizes["step"]
34
+
35
+ random_init_times = np.random.choice(n_init_times, size=n_samples, replace=True)
36
+ random_lats = np.random.choice(n_lats, size=n_samples, replace=True)
37
+ random_longs = np.random.choice(n_longs, size=n_samples, replace=True)
38
+ random_steps = np.random.choice(n_steps, size=n_samples, replace=True)
39
+
40
+ samples = []
41
+ for i in range(n_samples):
42
+ sample = ds.isel(init_time_utc=random_init_times[i],
43
+ latitude=random_lats[i],
44
+ longitude=random_longs[i],
45
+ step=random_steps[i])
46
+ samples.append(sample)
47
+
48
+ samples_stack = xr.concat(samples, dim="samples")
49
+
50
+
51
+ available_channels = samples_stack.channel.values.tolist()
52
+ logger.info("Available channels: %s", available_channels)
53
+
54
+ ICON_EU_MEAN = {}
55
+ ICON_EU_STD = {}
56
+
57
+ for var in available_channels:
58
+ if var not in available_channels:
59
+ logger.warning("Variable '%s' not found in the channel coordinate; skipping.", var)
60
+ continue
61
+ var_data = samples_stack.sel(channel=var)
62
+ var_mean = float(var_data.mean().compute())
63
+ var_std = float(var_data.std().compute())
64
+
65
+ ICON_EU_MEAN[var] = var_mean
66
+ ICON_EU_STD[var] = var_std
67
+
68
+ logger.info("Processed %s: mean=%.4f, std=%.4f", var, var_mean, var_std)
69
+
70
+ logger.info("\nMean values:\n%s", ICON_EU_MEAN)
71
+ logger.info("\nStandard deviations:\n%s", ICON_EU_STD)
72
+
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2023 Open Climate Fix
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
@@ -1,82 +0,0 @@
1
- ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
2
- ocf_data_sampler/constants.py,sha256=0HYNmqwBaHVTAEEx9qzk6WD9YInh0gSKLeI3pyq7aNs,5077
3
- ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
4
- ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
5
- ocf_data_sampler/config/load.py,sha256=sKCKmhkkeFvvkNL5xmnFvdAulaCtV4-rigPsFvVDPDc,634
6
- ocf_data_sampler/config/model.py,sha256=8PO-23uVy_JjWOJKgaZWdNMehQsAI-Jn8t0lcmBycwg,6992
7
- ocf_data_sampler/config/save.py,sha256=OqCPT3e0d7vMI2g2iRzmifPD7GscDkFQztU_qE5I0JY,1066
8
- ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
9
- ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
10
- ocf_data_sampler/load/gsp.py,sha256=uRxEORH7J99JAJ-D38nm0iJFOQh7dkm_NCXcpbYkyvo,857
11
- ocf_data_sampler/load/load_dataset.py,sha256=PHUGSm4hFHfS9nfIP2KjHHCp325O4br7uGBdQH_DP7g,1603
12
- ocf_data_sampler/load/satellite.py,sha256=SEQZ9oPe-asEeZeEMDkB1xWK5hErhWMagxohFcBl6KI,2294
13
- ocf_data_sampler/load/site.py,sha256=hMdoF6sn2PcSBfF2soj7nuQoK9SItaxDXco5nk2n-44,1232
14
- ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
15
- ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
16
- ocf_data_sampler/load/nwp/nwp.py,sha256=Jyq1dE7DN0iSe6iSEGA76uu9LoeJz9FzfEUkq6ZZExQ,565
17
- ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=8rYZKdV62AdczVNSOJ2G0BM4-fRFRV0_y5zkHgNYkQs,1004
19
- ocf_data_sampler/load/nwp/providers/ukv.py,sha256=dM_kvUI0xk9xEdslXqZGjOPP96PEw3qAci5mPUgUvxA,1014
20
- ocf_data_sampler/load/nwp/providers/utils.py,sha256=MFOZ5ZXLu3-SxYVJExdlo30b3y3s5ebRx3_6DO-33FQ,780
21
- ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
22
- ocf_data_sampler/numpy_sample/collate.py,sha256=oX5axq30sCsSquhNbmWAVMjM54HT1v3MCMopYHcO5Q0,1950
23
- ocf_data_sampler/numpy_sample/datetime_features.py,sha256=D0RajbnBjg15qjYk16h2H0XO4wH3fw-x0--4VC2nq0s,1204
24
- ocf_data_sampler/numpy_sample/gsp.py,sha256=uBquCFCoWuhJKY8sXpgsTCUDWUuLuv1XeixtFnFw6KU,1115
25
- ocf_data_sampler/numpy_sample/nwp.py,sha256=Tiba-es23XeyMoEPgZUpLT6EnJCGU9A_1MdY6qkE7bM,1015
26
- ocf_data_sampler/numpy_sample/satellite.py,sha256=RdXMdGGXysUx-AdL9T33yFOlxprtIdPNBKKX99-mhpY,991
27
- ocf_data_sampler/numpy_sample/site.py,sha256=TvoEU85fmjYW8pD9UZOyUUACjimdQYxEzulQXunRO6Q,1425
28
- ocf_data_sampler/numpy_sample/sun_position.py,sha256=ithM--eztAhiIQ1g52tlxgj-tMKbsJzx8mk6CgV2tzk,1613
29
- ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
30
- ocf_data_sampler/sample/base.py,sha256=IH3HbfqEUwjHmq-h2eJYLd8Jk-0ZcOylnehMyCPMV38,2223
31
- ocf_data_sampler/sample/site.py,sha256=ONf2Yz5zi8Ombd_znA4T7NXbO01F76kQsBZv6rfnC74,1343
32
- ocf_data_sampler/sample/uk_regional.py,sha256=KhJ5Ik1pZRp7PgIJjGIrE4i7SQnIdVjUbBHnfn-7ghg,2649
33
- ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
34
- ocf_data_sampler/select/dropout.py,sha256=Pgov9P7rQMkSdqluG_hwm8loGyYNFOg-3PJUBLN_kjU,1526
35
- ocf_data_sampler/select/fill_time_periods.py,sha256=EIcXG-77aQVOAYNwbDBEv6SGf6DO2p1WMEf96iW4MEM,596
36
- ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=IwPQwvgu4cOiAZ5Gbjflv3fnQCcs0EVK0g4V6yqqSgw,11129
37
- ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
38
- ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
39
- ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
40
- ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
41
- ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
42
- ocf_data_sampler/select/time_slice_for_dataset.py,sha256=Z7pOiilSHScxmBKZNG18K5J-S4ifdXXAYGZoHRHD3AY,4324
43
- ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
44
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZgfvVCcEU3dj3RoY0zdBdKGppC7Wm81qecqB17gYTmE,12286
45
- ocf_data_sampler/torch_datasets/datasets/site.py,sha256=_uHmqg-VJu-MHgXc5JFDX1noPfH6E8nY4XhQmsrOav4,16325
46
- ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
47
- ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
48
- ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=u2EpiFAKAOHpmvINhOUJCT8Vbc-cle6qJ3YNVse4yLs,2884
49
- scripts/refactor_site.py,sha256=xaJGxt2_WObIPrPAnRiOMMB68r-5Q51jWRx409AcscM,1747
50
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- tests/conftest.py,sha256=k7nM3u2YJmkMupN4SIbJP3BRoxNR1dpIoo2fPFf0abg,8588
52
- tests/config/test_config.py,sha256=CzYVhAUpgT4lvQdIddtVxtJeMqYL_TJolfeIwaaohq4,3969
53
- tests/config/test_load.py,sha256=8nui2UsgK_eufWGD74yXvf-6eY_SxBFKhDmGYUtRQxw,260
54
- tests/config/test_save.py,sha256=BxSd2S50-bRPIXP_4iX0B6Wt7pRFJnUbLYtzfLaqlAs,915
55
- tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
56
- tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
57
- tests/load/test_load_satellite.py,sha256=IQ8ISRZKCEoi8IsJoPpXZJTolD0mwjnl2E7762RM_PM,524
58
- tests/load/test_load_sites.py,sha256=6V-U3_EtBklkV7w-hOoR4nba3dSaZ_cnjuRWFs8kYVU,405
59
- tests/numpy_sample/test_collate.py,sha256=RqHCD5_LTRpe4r6kqC_2TKhmhM_IHYM0ZtFUvSjDqcM,654
60
- tests/numpy_sample/test_datetime_features.py,sha256=iR9WdBLj1nIBNqoaTFE9rkUaH1eKFJSNb96nwiEaQH0,1449
61
- tests/numpy_sample/test_gsp.py,sha256=FLlq4SlJ-9cSRAepf4_ksA6PsUVKegnKEAc5pUojCJ0,1458
62
- tests/numpy_sample/test_nwp.py,sha256=Lnd-PMa6gI-fSIJkSZ554QiHFfnwxeXZxLg-rpuBv1U,442
63
- tests/numpy_sample/test_satellite.py,sha256=cCqtn5See-uSNfh89COGTUQNuFm6sIZ8QmBVHsuUeRI,1189
64
- tests/numpy_sample/test_sun_position.py,sha256=_ENYzsNBVPdNXf--FI-UUFqw2u5w7_zqw6LcENU2uZM,2504
65
- tests/select/test_dropout.py,sha256=aQuSSqZF9RxBjN9-ogkQ8O-_zktAM30CrT1Lz7j1hMg,2222
66
- tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJxdXZluNQg,907
67
- tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
68
- tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
69
- tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
70
- tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
71
- tests/test_sample/test_base.py,sha256=sD9NZghYQWbkAcQP9YXypWZowqYkO3xeNMH-_mEoD5I,4833
72
- tests/test_sample/test_site_sample.py,sha256=8HNenhIWYouCQu4y389PDQGokSPI5jQ4lS4CG-eA1Y8,5382
73
- tests/test_sample/test_uk_regional_sample.py,sha256=MFibX9-M8mFK7vwMPu58gAG2VoY6y7w7chW5BlZclwk,3962
74
- tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
75
- tests/torch_datasets/test_pvnet_uk.py,sha256=hgD_IDa4D8cgc4cgK1UqKYkT6sFlrTMAvgVn_iwD5_4,5086
76
- tests/torch_datasets/test_site.py,sha256=t57vAR_RRWcbG_kEFk6VrFCYzVxwFG6qJKBnRHF02fM,7000
77
- tests/torch_datasets/test_validate_channels_utils.py,sha256=Rzdweu98j1of45jCOUrSiBtyPlf-dDaCceulf0H7ml8,2921
78
- ocf_data_sampler-0.1.11.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
79
- ocf_data_sampler-0.1.11.dist-info/METADATA,sha256=d8wctSlRyDbP1_yYHFvIGQgEC8DmOkM8h-ITI4XFuPw,12174
80
- ocf_data_sampler-0.1.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
81
- ocf_data_sampler-0.1.11.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
82
- ocf_data_sampler-0.1.11.dist-info/RECORD,,
tests/__init__.py DELETED
File without changes
@@ -1,113 +0,0 @@
1
- import pytest
2
- from pydantic import ValidationError
3
- from ocf_data_sampler.config import load_yaml_configuration, Configuration
4
-
5
-
6
- def test_default_configuration():
7
- """Test default pydantic class"""
8
- _ = Configuration()
9
-
10
-
11
- def test_extra_field_error():
12
- """
13
- Check an extra parameters in config causes error
14
- """
15
-
16
- configuration = Configuration()
17
- configuration_dict = configuration.model_dump()
18
- configuration_dict["extra_field"] = "extra_value"
19
- with pytest.raises(ValidationError, match="Extra inputs are not permitted"):
20
- _ = Configuration(**configuration_dict)
21
-
22
-
23
- def test_incorrect_interval_start_minutes(test_config_filename):
24
- """
25
- Check a history length not divisible by time resolution causes error
26
- """
27
-
28
- configuration = load_yaml_configuration(test_config_filename)
29
-
30
- configuration.input_data.nwp['ukv'].interval_start_minutes = -1111
31
- with pytest.raises(
32
- ValueError,
33
- match="interval_start_minutes.*must be divisible.*time_resolution_minutes.*"
34
- ):
35
- _ = Configuration(**configuration.model_dump())
36
-
37
-
38
- def test_incorrect_interval_end_minutes(test_config_filename):
39
- """
40
- Check a forecast length not divisible by time resolution causes error
41
- """
42
-
43
- configuration = load_yaml_configuration(test_config_filename)
44
-
45
- configuration.input_data.nwp['ukv'].interval_end_minutes = 1111
46
- with pytest.raises(
47
- ValueError,
48
- match="interval_end_minutes.*must be divisible.*time_resolution_minutes.*"
49
- ):
50
- _ = Configuration(**configuration.model_dump())
51
-
52
-
53
- def test_incorrect_nwp_provider(test_config_filename):
54
- """
55
- Check an unexpected nwp provider causes error
56
- """
57
-
58
- configuration = load_yaml_configuration(test_config_filename)
59
-
60
- configuration.input_data.nwp['ukv'].provider = "unexpected_provider"
61
- with pytest.raises(Exception, match="NWP provider"):
62
- _ = Configuration(**configuration.model_dump())
63
-
64
-
65
- def test_incorrect_dropout(test_config_filename):
66
- """
67
- Check a dropout timedelta over 0 causes error and 0 doesn't
68
- """
69
-
70
- configuration = load_yaml_configuration(test_config_filename)
71
-
72
- # check a positive number is not allowed
73
- configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [120]
74
- with pytest.raises(Exception, match="Dropout timedeltas must be negative"):
75
- _ = Configuration(**configuration.model_dump())
76
-
77
- # check 0 is allowed
78
- configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [0]
79
- _ = Configuration(**configuration.model_dump())
80
-
81
-
82
- def test_incorrect_dropout_fraction(test_config_filename):
83
- """
84
- Check dropout fraction outside of range causes error
85
- """
86
-
87
- configuration = load_yaml_configuration(test_config_filename)
88
-
89
- configuration.input_data.nwp['ukv'].dropout_fraction= 1.1
90
-
91
- with pytest.raises(ValidationError, match="Input should be less than or equal to 1"):
92
- _ = Configuration(**configuration.model_dump())
93
-
94
- configuration.input_data.nwp['ukv'].dropout_fraction= -0.1
95
- with pytest.raises(ValidationError, match="Input should be greater than or equal to 0"):
96
- _ = Configuration(**configuration.model_dump())
97
-
98
-
99
- def test_inconsistent_dropout_use(test_config_filename):
100
- """
101
- Check dropout fraction outside of range causes error
102
- """
103
-
104
- configuration = load_yaml_configuration(test_config_filename)
105
- configuration.input_data.satellite.dropout_fraction= 1.0
106
- configuration.input_data.satellite.dropout_timedeltas_minutes = []
107
-
108
- with pytest.raises(ValueError, match="To dropout fraction > 0 requires a list of dropout timedeltas"):
109
- _ = Configuration(**configuration.model_dump())
110
- configuration.input_data.satellite.dropout_fraction= 0.0
111
- configuration.input_data.satellite.dropout_timedeltas_minutes = [-120, -60]
112
- with pytest.raises(ValueError, match="To use dropout timedeltas dropout fraction should be > 0"):
113
- _ = Configuration(**configuration.model_dump())