ocf-data-sampler 0.1.11__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/load.py +3 -3
- ocf_data_sampler/config/model.py +73 -61
- ocf_data_sampler/config/save.py +5 -4
- ocf_data_sampler/constants.py +140 -12
- ocf_data_sampler/load/gsp.py +6 -5
- ocf_data_sampler/load/load_dataset.py +5 -6
- ocf_data_sampler/load/nwp/nwp.py +17 -5
- ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
- ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
- ocf_data_sampler/load/nwp/providers/icon.py +46 -0
- ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
- ocf_data_sampler/load/nwp/providers/utils.py +3 -1
- ocf_data_sampler/load/satellite.py +9 -10
- ocf_data_sampler/load/site.py +10 -6
- ocf_data_sampler/load/utils.py +21 -16
- ocf_data_sampler/numpy_sample/collate.py +10 -9
- ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
- ocf_data_sampler/numpy_sample/gsp.py +12 -14
- ocf_data_sampler/numpy_sample/nwp.py +12 -12
- ocf_data_sampler/numpy_sample/satellite.py +9 -9
- ocf_data_sampler/numpy_sample/site.py +5 -8
- ocf_data_sampler/numpy_sample/sun_position.py +16 -21
- ocf_data_sampler/sample/base.py +15 -17
- ocf_data_sampler/sample/site.py +13 -20
- ocf_data_sampler/sample/uk_regional.py +29 -35
- ocf_data_sampler/select/dropout.py +16 -14
- ocf_data_sampler/select/fill_time_periods.py +15 -5
- ocf_data_sampler/select/find_contiguous_time_periods.py +88 -75
- ocf_data_sampler/select/geospatial.py +63 -54
- ocf_data_sampler/select/location.py +16 -51
- ocf_data_sampler/select/select_spatial_slice.py +105 -89
- ocf_data_sampler/select/select_time_slice.py +71 -58
- ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
- ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
- ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
- ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
- ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
- ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
- ocf_data_sampler/utils.py +3 -1
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
- ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
- scripts/refactor_site.py +62 -33
- utils/compute_icon_mean_stddev.py +72 -0
- ocf_data_sampler-0.1.11.dist-info/LICENSE +0 -21
- ocf_data_sampler-0.1.11.dist-info/RECORD +0 -82
- tests/__init__.py +0 -0
- tests/config/test_config.py +0 -113
- tests/config/test_load.py +0 -7
- tests/config/test_save.py +0 -28
- tests/conftest.py +0 -319
- tests/load/test_load_gsp.py +0 -15
- tests/load/test_load_nwp.py +0 -21
- tests/load/test_load_satellite.py +0 -17
- tests/load/test_load_sites.py +0 -14
- tests/numpy_sample/test_collate.py +0 -21
- tests/numpy_sample/test_datetime_features.py +0 -37
- tests/numpy_sample/test_gsp.py +0 -38
- tests/numpy_sample/test_nwp.py +0 -13
- tests/numpy_sample/test_satellite.py +0 -40
- tests/numpy_sample/test_sun_position.py +0 -81
- tests/select/test_dropout.py +0 -69
- tests/select/test_fill_time_periods.py +0 -28
- tests/select/test_find_contiguous_time_periods.py +0 -202
- tests/select/test_location.py +0 -67
- tests/select/test_select_spatial_slice.py +0 -154
- tests/select/test_select_time_slice.py +0 -275
- tests/test_sample/test_base.py +0 -164
- tests/test_sample/test_site_sample.py +0 -165
- tests/test_sample/test_uk_regional_sample.py +0 -136
- tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
- tests/torch_datasets/test_pvnet_uk.py +0 -154
- tests/torch_datasets/test_site.py +0 -226
- tests/torch_datasets/test_validate_channels_utils.py +0 -78
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
"""Functions for checking that normalisation statistics exist for the data channels requested."""
|
|
2
2
|
|
|
3
3
|
from ocf_data_sampler.config import Configuration
|
|
4
4
|
from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
|
|
@@ -8,10 +8,9 @@ def validate_channels(
|
|
|
8
8
|
data_channels: list,
|
|
9
9
|
means_channels: list,
|
|
10
10
|
stds_channels: list,
|
|
11
|
-
source_name: str | None = None
|
|
11
|
+
source_name: str | None = None,
|
|
12
12
|
) -> None:
|
|
13
|
-
"""
|
|
14
|
-
Validates that all channels in data have corresponding normalisation constants.
|
|
13
|
+
"""Validates that all channels in data have corresponding normalisation constants.
|
|
15
14
|
|
|
16
15
|
Args:
|
|
17
16
|
data_channels: Set of channels from the data
|
|
@@ -22,7 +21,6 @@ def validate_channels(
|
|
|
22
21
|
Raises:
|
|
23
22
|
ValueError: If there's a mismatch between data channels and normalisation constants
|
|
24
23
|
"""
|
|
25
|
-
|
|
26
24
|
data_set = set(data_channels)
|
|
27
25
|
means_set = set(means_channels)
|
|
28
26
|
stds_set = set(stds_channels)
|
|
@@ -32,51 +30,57 @@ def validate_channels(
|
|
|
32
30
|
if missing_in_means:
|
|
33
31
|
raise ValueError(
|
|
34
32
|
f"The following channels for {source_name} are missing in normalisation means: "
|
|
35
|
-
f"{missing_in_means}"
|
|
33
|
+
f"{missing_in_means}",
|
|
36
34
|
)
|
|
37
|
-
|
|
35
|
+
|
|
38
36
|
# Find missing channels in stds
|
|
39
37
|
missing_in_stds = data_set - stds_set
|
|
40
38
|
if missing_in_stds:
|
|
41
39
|
raise ValueError(
|
|
42
40
|
f"The following channels for {source_name} are missing in normalisation stds: "
|
|
43
|
-
f"{missing_in_stds}"
|
|
41
|
+
f"{missing_in_stds}",
|
|
44
42
|
)
|
|
45
43
|
|
|
46
44
|
|
|
47
45
|
def validate_nwp_channels(config: Configuration) -> None:
|
|
48
46
|
"""Validate that NWP channels in config have corresponding normalisation constants.
|
|
49
|
-
|
|
47
|
+
|
|
50
48
|
Args:
|
|
51
49
|
config: Configuration object containing NWP channel information
|
|
52
|
-
|
|
50
|
+
|
|
53
51
|
Raises:
|
|
54
|
-
ValueError: If there's a mismatch between configured NWP channels
|
|
52
|
+
ValueError: If there's a mismatch between configured NWP channels
|
|
53
|
+
and normalisation constants
|
|
55
54
|
"""
|
|
56
|
-
if hasattr(config.input_data, "nwp")
|
|
57
|
-
|
|
55
|
+
if hasattr(config.input_data, "nwp") and (
|
|
56
|
+
config.input_data.nwp is not None
|
|
57
|
+
):
|
|
58
|
+
for _, nwp_config in config.input_data.nwp.items():
|
|
58
59
|
provider = nwp_config.provider
|
|
59
60
|
validate_channels(
|
|
60
61
|
data_channels=nwp_config.channels,
|
|
61
62
|
means_channels=NWP_MEANS[provider].channel.values,
|
|
62
63
|
stds_channels=NWP_STDS[provider].channel.values,
|
|
63
|
-
source_name=provider
|
|
64
|
+
source_name=provider,
|
|
64
65
|
)
|
|
65
66
|
|
|
66
67
|
|
|
67
68
|
def validate_satellite_channels(config: Configuration) -> None:
|
|
68
69
|
"""Validate that satellite channels in config have corresponding normalisation constants.
|
|
69
|
-
|
|
70
|
+
|
|
70
71
|
Args:
|
|
71
72
|
config: Configuration object containing satellite channel information
|
|
72
|
-
|
|
73
|
+
|
|
73
74
|
Raises:
|
|
74
|
-
ValueError: If there's a mismatch between configured satellite channels
|
|
75
|
+
ValueError: If there's a mismatch between configured satellite channels
|
|
76
|
+
and normalisation constants
|
|
75
77
|
"""
|
|
76
|
-
if hasattr(config.input_data, "satellite")
|
|
78
|
+
if hasattr(config.input_data, "satellite") and (
|
|
79
|
+
config.input_data.satellite is not None
|
|
80
|
+
):
|
|
77
81
|
validate_channels(
|
|
78
82
|
data_channels=config.input_data.satellite.channels,
|
|
79
83
|
means_channels=RSS_MEAN.channel.values,
|
|
80
84
|
stds_channels=RSS_STD.channel.values,
|
|
81
|
-
source_name="satellite"
|
|
85
|
+
source_name="satellite",
|
|
82
86
|
)
|
ocf_data_sampler/utils.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
|
-
Name:
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
|
|
5
|
-
Author:
|
|
6
|
-
Author-email: info@openclimatefix.org
|
|
7
|
-
Maintainer: Open Climate Fix Ltd
|
|
2
|
+
Name: ocf-data-sampler
|
|
3
|
+
Version: 0.1.16
|
|
4
|
+
Author: James Fulton, Peter Dudfield
|
|
5
|
+
Author-email: Open Climate Fix team <info@openclimatefix.org>
|
|
8
6
|
License: MIT License
|
|
9
7
|
|
|
10
8
|
Copyright (c) 2023 Open Climate Fix
|
|
@@ -27,21 +25,18 @@ License: MIT License
|
|
|
27
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
26
|
SOFTWARE.
|
|
29
27
|
|
|
30
|
-
Project-URL: homepage, https://github.com/openclimatefix
|
|
31
28
|
Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
|
|
32
|
-
|
|
29
|
+
Classifier: Programming Language :: Python :: 3
|
|
33
30
|
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
-
|
|
35
|
-
Classifier: Operating System :: POSIX :: Linux
|
|
36
|
-
Requires-Python: >=3.8
|
|
31
|
+
Requires-Python: >=3.10
|
|
37
32
|
Description-Content-Type: text/markdown
|
|
38
|
-
License-File: LICENSE
|
|
39
33
|
Requires-Dist: torch
|
|
40
34
|
Requires-Dist: numpy
|
|
41
35
|
Requires-Dist: pandas
|
|
42
36
|
Requires-Dist: xarray
|
|
43
37
|
Requires-Dist: zarr==2.18.3
|
|
44
38
|
Requires-Dist: dask
|
|
39
|
+
Requires-Dist: matplotlib
|
|
45
40
|
Requires-Dist: ocf_blosc2
|
|
46
41
|
Requires-Dist: pvlib
|
|
47
42
|
Requires-Dist: pydantic
|
|
@@ -50,11 +45,6 @@ Requires-Dist: pathy
|
|
|
50
45
|
Requires-Dist: pyaml_env
|
|
51
46
|
Requires-Dist: pyresample
|
|
52
47
|
Requires-Dist: h5netcdf
|
|
53
|
-
Provides-Extra: docs
|
|
54
|
-
Requires-Dist: mkdocs>=1.2; extra == "docs"
|
|
55
|
-
Requires-Dist: mkdocs-material>=8.0; extra == "docs"
|
|
56
|
-
Provides-Extra: plot
|
|
57
|
-
Requires-Dist: matplotlib; extra == "plot"
|
|
58
48
|
|
|
59
49
|
# ocf-data-sampler
|
|
60
50
|
|
|
@@ -77,7 +67,6 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
|
|
|
77
67
|
|
|
78
68
|
**ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
|
|
79
69
|
|
|
80
|
-
|
|
81
70
|
## FAQ
|
|
82
71
|
|
|
83
72
|
If you have any questions about this or any other of our repos, don't hesitate to hop to our [Discussions Page](https://github.com/orgs/openclimatefix/discussions)!
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
2
|
+
ocf_data_sampler/constants.py,sha256=scyqNXgmbMmZNs9TyIJ-omOOvE0SaPf-UvPxUG7SaSo,8074
|
|
3
|
+
ocf_data_sampler/utils.py,sha256=DjuneGGisl08ENvPZV_lrcX4b2NCKJC1ZpXgIpxuQi4,290
|
|
4
|
+
ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
|
|
5
|
+
ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
|
|
6
|
+
ocf_data_sampler/config/model.py,sha256=LSdBe89nGTzYceA7-Pxc2wHj7HkpghiaM4fUsHUqeT8,7381
|
|
7
|
+
ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
|
|
8
|
+
ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
|
|
9
|
+
ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
|
|
10
|
+
ocf_data_sampler/load/gsp.py,sha256=keB3Nv_CNK1P6pS9Kdfc8PoZXTI1_YFN-spsvEv_Ewc,899
|
|
11
|
+
ocf_data_sampler/load/load_dataset.py,sha256=0NyDxCDfgE_esKVW3s-rZEe16WB30FQ74ClWlrIo72M,1602
|
|
12
|
+
ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
|
|
13
|
+
ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
|
|
14
|
+
ocf_data_sampler/load/utils.py,sha256=Jwbr1rpEa3cefjw-OTVRaxnIHyGixYB3TlTlta0BOdU,1727
|
|
15
|
+
ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
|
|
16
|
+
ocf_data_sampler/load/nwp/nwp.py,sha256=0AIHQTJLUtwP2Toz_PskOTYFJXfMvGhk8faAcNvI9jk,922
|
|
17
|
+
ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=5AzktPJgertCx6oij6aePRosPuZHGFznMxTgtkk_mgc,994
|
|
19
|
+
ocf_data_sampler/load/nwp/providers/gfs.py,sha256=JSDeh4YG1wibV8--P3X-zTO8LP0dsJcpFvIyglBbhi0,979
|
|
20
|
+
ocf_data_sampler/load/nwp/providers/icon.py,sha256=yYUrs5HgjU0C5pMHBB6FGn3tLjswi990IY6QCXS1Zmw,1569
|
|
21
|
+
ocf_data_sampler/load/nwp/providers/ukv.py,sha256=-0v8JCLH8ypz8GMXZ6Rrx-I0LoHuHO8sXFupbC1RpM0,1013
|
|
22
|
+
ocf_data_sampler/load/nwp/providers/utils.py,sha256=cJZ9JA4W_ZeTcLQ5z71w46_DJaPcW_2JMmBdjP9r3qs,835
|
|
23
|
+
ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
|
|
24
|
+
ocf_data_sampler/numpy_sample/collate.py,sha256=I9YPcbxOwHYaDGKbzxqdV-3DFEHkzqdhAwnW7_tZH2w,1966
|
|
25
|
+
ocf_data_sampler/numpy_sample/datetime_features.py,sha256=INudxHcoB_c-GvYXe08S4Up_8TU5zOJ39PWRrTKfLp8,1203
|
|
26
|
+
ocf_data_sampler/numpy_sample/gsp.py,sha256=EDaQdOVEDBJGrXsq54UNBfpXTzi0ky_WpgBbmlyxOXM,1074
|
|
27
|
+
ocf_data_sampler/numpy_sample/nwp.py,sha256=iBGOdLMn-F5yR3juX3l4G2oXDpvGNuUdcR6ZCZkCqZk,1037
|
|
28
|
+
ocf_data_sampler/numpy_sample/satellite.py,sha256=oBlyNpO-syoyK4SSghoHqIDNyhcBqyd1L6eXSSw0k3w,1036
|
|
29
|
+
ocf_data_sampler/numpy_sample/site.py,sha256=tpX7j6dTOz2YmOFIzVYqTfWvIduKlOnBcLITsuPMgxU,1250
|
|
30
|
+
ocf_data_sampler/numpy_sample/sun_position.py,sha256=nkfgN6NmiLGoLSuJZrDsM-6nsIzJN75tWfN20Z7n8xo,1480
|
|
31
|
+
ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
|
|
32
|
+
ocf_data_sampler/sample/base.py,sha256=lnr-MNRpAxjVFJHCEvCZL86NrYy9LWnNOsLWBGDL8kc,2359
|
|
33
|
+
ocf_data_sampler/sample/site.py,sha256=4aJys40CQ-2CRKo_dgvm3rINTdfyTGWQGEaXGbh58qQ,1236
|
|
34
|
+
ocf_data_sampler/sample/uk_regional.py,sha256=uMtLdqZCsKttjFmhIC6JITzu2JDZh-VQdYUfbpyhgFM,2409
|
|
35
|
+
ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
|
|
36
|
+
ocf_data_sampler/select/dropout.py,sha256=_rzXl8_4VHTY_JMjbaoWopaFCJmLdaBpqfYF4vr24tk,1638
|
|
37
|
+
ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00dtimjWdLzr4JoTGA,865
|
|
38
|
+
ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=cEXrQDzk8pXknxB0q3v5DakosagHMoLDAj302B8Xpw0,11537
|
|
39
|
+
ocf_data_sampler/select/geospatial.py,sha256=CDExkl36eZOKmdJPzUr_K0Wn3axHqv5nYo-EkSiINcc,5032
|
|
40
|
+
ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O5Deu0c,1037
|
|
41
|
+
ocf_data_sampler/select/select_spatial_slice.py,sha256=qY2Ll00EPA80oBtzwMoR5nk0UIpoWZF9oXl22YwWr0Q,12341
|
|
42
|
+
ocf_data_sampler/select/select_time_slice.py,sha256=q5QdgHPIXQb49uT5NwbOguY1GhjWc_o3c-2cDb5kLAo,5455
|
|
43
|
+
ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
|
|
44
|
+
ocf_data_sampler/select/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
|
|
45
|
+
ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
|
|
46
|
+
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=PW46uq53o84ihrR3vCg0KiqyihV_VKTC_zS67oH1M8Y,12892
|
|
47
|
+
ocf_data_sampler/torch_datasets/datasets/site.py,sha256=Pr9DQszBP6GyS2uTT3unB50FfYsscu4qTiu9kgcQUys,17798
|
|
48
|
+
ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
|
|
49
|
+
ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=LdHgLPAYUVoCRMk2nnFdsMpygGS2kbps3h-7_bZnETw,4718
|
|
50
|
+
ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=tFBZqo7hYNkNb5Du8e5JSCKC21XcEuF_mbxZ6kdj0Og,3057
|
|
51
|
+
scripts/refactor_site.py,sha256=pu50bqNH9PCmFnWDcIUsYkrDr6zASpkpBUzbZ48NjnU,3084
|
|
52
|
+
utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
|
|
53
|
+
ocf_data_sampler-0.1.16.dist-info/METADATA,sha256=NhVC5ZO3PEI4_8HEnrwKl3Jr7GwlUp1dQnSnn4beDTk,11713
|
|
54
|
+
ocf_data_sampler-0.1.16.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
55
|
+
ocf_data_sampler-0.1.16.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
|
|
56
|
+
ocf_data_sampler-0.1.16.dist-info/RECORD,,
|
scripts/refactor_site.py
CHANGED
|
@@ -1,50 +1,79 @@
|
|
|
1
|
-
""" Helper functions for refactoring legacy site data """
|
|
2
1
|
import xarray as xr
|
|
2
|
+
import pandas as pd
|
|
3
3
|
|
|
4
|
-
def legacy_format(data_ds, metadata_df):
|
|
5
|
-
"""
|
|
4
|
+
def legacy_format(data_ds: xr.Dataset, metadata_df: pd.DataFrame) -> xr.Dataset:
|
|
5
|
+
"""
|
|
6
|
+
Converts old legacy site data into a more structured format.
|
|
7
|
+
|
|
8
|
+
This function does three main things:
|
|
9
|
+
1. Renames some columns in the metadata to keep things consistent.
|
|
10
|
+
2. Reshapes site data so that instead of having separate variables for each site,
|
|
11
|
+
we use a `site_id` dimension—makes life easier for analysis.
|
|
12
|
+
3. Adds `capacity_kwp` as a time series so that each site has its capacity info.
|
|
13
|
+
|
|
14
|
+
Parameters:
|
|
15
|
+
data_ds (xr.Dataset): The dataset containing legacy site data.
|
|
16
|
+
metadata_df (pd.DataFrame): A DataFrame with metadata about the sites.
|
|
6
17
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
a data array with a site_id dimension. Also adds capacity_kwp to the dataset as a time series for each site_id
|
|
18
|
+
Returns:
|
|
19
|
+
xr.Dataset: Reformatted dataset with `generation_kw` and `capacity_kwp`.
|
|
10
20
|
"""
|
|
11
21
|
|
|
22
|
+
# Step 1: Rename metadata columns to match the new expected format
|
|
12
23
|
if "system_id" in metadata_df.columns:
|
|
13
|
-
metadata_df
|
|
14
|
-
|
|
24
|
+
metadata_df = metadata_df.rename(columns={"system_id": "site_id"})
|
|
25
|
+
|
|
26
|
+
# Convert capacity from megawatts to kilowatts if needed
|
|
15
27
|
if "capacity_megawatts" in metadata_df.columns:
|
|
16
28
|
metadata_df["capacity_kwp"] = metadata_df["capacity_megawatts"] * 1000
|
|
17
29
|
|
|
18
|
-
#
|
|
19
|
-
|
|
30
|
+
# Quick sanity check to ensure we have what we need
|
|
31
|
+
if "site_id" not in metadata_df.columns or "capacity_kwp" not in metadata_df.columns:
|
|
32
|
+
raise ValueError("Metadata is missing required columns: 'site_id' and 'capacity_kwp'.")
|
|
33
|
+
|
|
34
|
+
# Step 2: Transform the dataset
|
|
35
|
+
# Check if we actually have site data in the expected format
|
|
20
36
|
if "0" in data_ds:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
37
|
+
# Convert the dataset into a DataFrame so we can manipulate it more easily
|
|
38
|
+
site_data_df = data_ds.to_dataframe()
|
|
39
|
+
|
|
40
|
+
# Create a DataArray for generation data
|
|
41
|
+
generation_da = xr.DataArray(
|
|
42
|
+
data=site_data_df.values,
|
|
43
|
+
coords={
|
|
44
|
+
"time_utc": site_data_df.index.values,
|
|
45
|
+
"site_id": metadata_df["site_id"].values,
|
|
46
|
+
},
|
|
47
|
+
dims=["time_utc", "site_id"],
|
|
28
48
|
name="generation_kw",
|
|
29
49
|
)
|
|
30
50
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
51
|
+
# Step 3: Attach capacity information
|
|
52
|
+
# Map site_ids to their respective capacities
|
|
53
|
+
site_ids = site_data_df.columns
|
|
54
|
+
capacities = metadata_df.set_index("site_id").loc[site_ids, "capacity_kwp"]
|
|
55
|
+
|
|
56
|
+
# Broadcast capacities across all timestamps
|
|
57
|
+
capacity_df = pd.DataFrame(
|
|
58
|
+
{site_id: [capacities[site_id]] * len(site_data_df) for site_id in site_ids},
|
|
59
|
+
index=site_data_df.index,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Create a DataArray for capacity data
|
|
36
63
|
capacity_da = xr.DataArray(
|
|
37
64
|
data=capacity_df.values,
|
|
38
|
-
coords=
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
65
|
+
coords={
|
|
66
|
+
"time_utc": site_data_df.index.values,
|
|
67
|
+
"site_id": metadata_df["site_id"].values,
|
|
68
|
+
},
|
|
69
|
+
dims=["time_utc", "site_id"],
|
|
42
70
|
name="capacity_kwp",
|
|
43
71
|
)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
)
|
|
50
|
-
|
|
72
|
+
|
|
73
|
+
# Finally, bundle everything into a single Dataset
|
|
74
|
+
data_ds = xr.Dataset({
|
|
75
|
+
"generation_kw": generation_da,
|
|
76
|
+
"capacity_kwp": capacity_da,
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
return data_ds
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Script to compute normalisation constants from NWP data."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import glob
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import xarray as xr
|
|
9
|
+
|
|
10
|
+
from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
|
|
11
|
+
|
|
12
|
+
# Configure logging
|
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Add argument parser
|
|
17
|
+
parser = argparse.ArgumentParser(description="Compute normalization constants from NWP data")
|
|
18
|
+
parser.add_argument("--data-path", type=str, required=True,
|
|
19
|
+
help='Path pattern to zarr files (e.g., "/path/to/data/*.zarr.zip")')
|
|
20
|
+
parser.add_argument("--n-samples", type=int, default=2000,
|
|
21
|
+
help="Number of random samples to use (default: 2000)")
|
|
22
|
+
|
|
23
|
+
args = parser.parse_args()
|
|
24
|
+
|
|
25
|
+
zarr_files = glob.glob(args.data_path)
|
|
26
|
+
n_samples = args.n_samples
|
|
27
|
+
|
|
28
|
+
ds = open_icon_eu(zarr_files)
|
|
29
|
+
|
|
30
|
+
n_init_times = ds.sizes["init_time_utc"]
|
|
31
|
+
n_lats = ds.sizes["latitude"]
|
|
32
|
+
n_longs = ds.sizes["longitude"]
|
|
33
|
+
n_steps = ds.sizes["step"]
|
|
34
|
+
|
|
35
|
+
random_init_times = np.random.choice(n_init_times, size=n_samples, replace=True)
|
|
36
|
+
random_lats = np.random.choice(n_lats, size=n_samples, replace=True)
|
|
37
|
+
random_longs = np.random.choice(n_longs, size=n_samples, replace=True)
|
|
38
|
+
random_steps = np.random.choice(n_steps, size=n_samples, replace=True)
|
|
39
|
+
|
|
40
|
+
samples = []
|
|
41
|
+
for i in range(n_samples):
|
|
42
|
+
sample = ds.isel(init_time_utc=random_init_times[i],
|
|
43
|
+
latitude=random_lats[i],
|
|
44
|
+
longitude=random_longs[i],
|
|
45
|
+
step=random_steps[i])
|
|
46
|
+
samples.append(sample)
|
|
47
|
+
|
|
48
|
+
samples_stack = xr.concat(samples, dim="samples")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
available_channels = samples_stack.channel.values.tolist()
|
|
52
|
+
logger.info("Available channels: %s", available_channels)
|
|
53
|
+
|
|
54
|
+
ICON_EU_MEAN = {}
|
|
55
|
+
ICON_EU_STD = {}
|
|
56
|
+
|
|
57
|
+
for var in available_channels:
|
|
58
|
+
if var not in available_channels:
|
|
59
|
+
logger.warning("Variable '%s' not found in the channel coordinate; skipping.", var)
|
|
60
|
+
continue
|
|
61
|
+
var_data = samples_stack.sel(channel=var)
|
|
62
|
+
var_mean = float(var_data.mean().compute())
|
|
63
|
+
var_std = float(var_data.std().compute())
|
|
64
|
+
|
|
65
|
+
ICON_EU_MEAN[var] = var_mean
|
|
66
|
+
ICON_EU_STD[var] = var_std
|
|
67
|
+
|
|
68
|
+
logger.info("Processed %s: mean=%.4f, std=%.4f", var, var_mean, var_std)
|
|
69
|
+
|
|
70
|
+
logger.info("\nMean values:\n%s", ICON_EU_MEAN)
|
|
71
|
+
logger.info("\nStandard deviations:\n%s", ICON_EU_STD)
|
|
72
|
+
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2023 Open Climate Fix
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
2
|
-
ocf_data_sampler/constants.py,sha256=0HYNmqwBaHVTAEEx9qzk6WD9YInh0gSKLeI3pyq7aNs,5077
|
|
3
|
-
ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
|
|
4
|
-
ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
|
|
5
|
-
ocf_data_sampler/config/load.py,sha256=sKCKmhkkeFvvkNL5xmnFvdAulaCtV4-rigPsFvVDPDc,634
|
|
6
|
-
ocf_data_sampler/config/model.py,sha256=8PO-23uVy_JjWOJKgaZWdNMehQsAI-Jn8t0lcmBycwg,6992
|
|
7
|
-
ocf_data_sampler/config/save.py,sha256=OqCPT3e0d7vMI2g2iRzmifPD7GscDkFQztU_qE5I0JY,1066
|
|
8
|
-
ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
|
|
9
|
-
ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
|
|
10
|
-
ocf_data_sampler/load/gsp.py,sha256=uRxEORH7J99JAJ-D38nm0iJFOQh7dkm_NCXcpbYkyvo,857
|
|
11
|
-
ocf_data_sampler/load/load_dataset.py,sha256=PHUGSm4hFHfS9nfIP2KjHHCp325O4br7uGBdQH_DP7g,1603
|
|
12
|
-
ocf_data_sampler/load/satellite.py,sha256=SEQZ9oPe-asEeZeEMDkB1xWK5hErhWMagxohFcBl6KI,2294
|
|
13
|
-
ocf_data_sampler/load/site.py,sha256=hMdoF6sn2PcSBfF2soj7nuQoK9SItaxDXco5nk2n-44,1232
|
|
14
|
-
ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
|
|
15
|
-
ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
|
|
16
|
-
ocf_data_sampler/load/nwp/nwp.py,sha256=Jyq1dE7DN0iSe6iSEGA76uu9LoeJz9FzfEUkq6ZZExQ,565
|
|
17
|
-
ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=8rYZKdV62AdczVNSOJ2G0BM4-fRFRV0_y5zkHgNYkQs,1004
|
|
19
|
-
ocf_data_sampler/load/nwp/providers/ukv.py,sha256=dM_kvUI0xk9xEdslXqZGjOPP96PEw3qAci5mPUgUvxA,1014
|
|
20
|
-
ocf_data_sampler/load/nwp/providers/utils.py,sha256=MFOZ5ZXLu3-SxYVJExdlo30b3y3s5ebRx3_6DO-33FQ,780
|
|
21
|
-
ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
|
|
22
|
-
ocf_data_sampler/numpy_sample/collate.py,sha256=oX5axq30sCsSquhNbmWAVMjM54HT1v3MCMopYHcO5Q0,1950
|
|
23
|
-
ocf_data_sampler/numpy_sample/datetime_features.py,sha256=D0RajbnBjg15qjYk16h2H0XO4wH3fw-x0--4VC2nq0s,1204
|
|
24
|
-
ocf_data_sampler/numpy_sample/gsp.py,sha256=uBquCFCoWuhJKY8sXpgsTCUDWUuLuv1XeixtFnFw6KU,1115
|
|
25
|
-
ocf_data_sampler/numpy_sample/nwp.py,sha256=Tiba-es23XeyMoEPgZUpLT6EnJCGU9A_1MdY6qkE7bM,1015
|
|
26
|
-
ocf_data_sampler/numpy_sample/satellite.py,sha256=RdXMdGGXysUx-AdL9T33yFOlxprtIdPNBKKX99-mhpY,991
|
|
27
|
-
ocf_data_sampler/numpy_sample/site.py,sha256=TvoEU85fmjYW8pD9UZOyUUACjimdQYxEzulQXunRO6Q,1425
|
|
28
|
-
ocf_data_sampler/numpy_sample/sun_position.py,sha256=ithM--eztAhiIQ1g52tlxgj-tMKbsJzx8mk6CgV2tzk,1613
|
|
29
|
-
ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
|
|
30
|
-
ocf_data_sampler/sample/base.py,sha256=IH3HbfqEUwjHmq-h2eJYLd8Jk-0ZcOylnehMyCPMV38,2223
|
|
31
|
-
ocf_data_sampler/sample/site.py,sha256=ONf2Yz5zi8Ombd_znA4T7NXbO01F76kQsBZv6rfnC74,1343
|
|
32
|
-
ocf_data_sampler/sample/uk_regional.py,sha256=KhJ5Ik1pZRp7PgIJjGIrE4i7SQnIdVjUbBHnfn-7ghg,2649
|
|
33
|
-
ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
|
|
34
|
-
ocf_data_sampler/select/dropout.py,sha256=Pgov9P7rQMkSdqluG_hwm8loGyYNFOg-3PJUBLN_kjU,1526
|
|
35
|
-
ocf_data_sampler/select/fill_time_periods.py,sha256=EIcXG-77aQVOAYNwbDBEv6SGf6DO2p1WMEf96iW4MEM,596
|
|
36
|
-
ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=IwPQwvgu4cOiAZ5Gbjflv3fnQCcs0EVK0g4V6yqqSgw,11129
|
|
37
|
-
ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
|
|
38
|
-
ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
|
|
39
|
-
ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
|
|
40
|
-
ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
|
|
41
|
-
ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
|
|
42
|
-
ocf_data_sampler/select/time_slice_for_dataset.py,sha256=Z7pOiilSHScxmBKZNG18K5J-S4ifdXXAYGZoHRHD3AY,4324
|
|
43
|
-
ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
|
|
44
|
-
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZgfvVCcEU3dj3RoY0zdBdKGppC7Wm81qecqB17gYTmE,12286
|
|
45
|
-
ocf_data_sampler/torch_datasets/datasets/site.py,sha256=_uHmqg-VJu-MHgXc5JFDX1noPfH6E8nY4XhQmsrOav4,16325
|
|
46
|
-
ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
|
|
47
|
-
ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
|
|
48
|
-
ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=u2EpiFAKAOHpmvINhOUJCT8Vbc-cle6qJ3YNVse4yLs,2884
|
|
49
|
-
scripts/refactor_site.py,sha256=xaJGxt2_WObIPrPAnRiOMMB68r-5Q51jWRx409AcscM,1747
|
|
50
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
tests/conftest.py,sha256=k7nM3u2YJmkMupN4SIbJP3BRoxNR1dpIoo2fPFf0abg,8588
|
|
52
|
-
tests/config/test_config.py,sha256=CzYVhAUpgT4lvQdIddtVxtJeMqYL_TJolfeIwaaohq4,3969
|
|
53
|
-
tests/config/test_load.py,sha256=8nui2UsgK_eufWGD74yXvf-6eY_SxBFKhDmGYUtRQxw,260
|
|
54
|
-
tests/config/test_save.py,sha256=BxSd2S50-bRPIXP_4iX0B6Wt7pRFJnUbLYtzfLaqlAs,915
|
|
55
|
-
tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
|
|
56
|
-
tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
|
|
57
|
-
tests/load/test_load_satellite.py,sha256=IQ8ISRZKCEoi8IsJoPpXZJTolD0mwjnl2E7762RM_PM,524
|
|
58
|
-
tests/load/test_load_sites.py,sha256=6V-U3_EtBklkV7w-hOoR4nba3dSaZ_cnjuRWFs8kYVU,405
|
|
59
|
-
tests/numpy_sample/test_collate.py,sha256=RqHCD5_LTRpe4r6kqC_2TKhmhM_IHYM0ZtFUvSjDqcM,654
|
|
60
|
-
tests/numpy_sample/test_datetime_features.py,sha256=iR9WdBLj1nIBNqoaTFE9rkUaH1eKFJSNb96nwiEaQH0,1449
|
|
61
|
-
tests/numpy_sample/test_gsp.py,sha256=FLlq4SlJ-9cSRAepf4_ksA6PsUVKegnKEAc5pUojCJ0,1458
|
|
62
|
-
tests/numpy_sample/test_nwp.py,sha256=Lnd-PMa6gI-fSIJkSZ554QiHFfnwxeXZxLg-rpuBv1U,442
|
|
63
|
-
tests/numpy_sample/test_satellite.py,sha256=cCqtn5See-uSNfh89COGTUQNuFm6sIZ8QmBVHsuUeRI,1189
|
|
64
|
-
tests/numpy_sample/test_sun_position.py,sha256=_ENYzsNBVPdNXf--FI-UUFqw2u5w7_zqw6LcENU2uZM,2504
|
|
65
|
-
tests/select/test_dropout.py,sha256=aQuSSqZF9RxBjN9-ogkQ8O-_zktAM30CrT1Lz7j1hMg,2222
|
|
66
|
-
tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJxdXZluNQg,907
|
|
67
|
-
tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
|
|
68
|
-
tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
|
|
69
|
-
tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
|
|
70
|
-
tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
|
|
71
|
-
tests/test_sample/test_base.py,sha256=sD9NZghYQWbkAcQP9YXypWZowqYkO3xeNMH-_mEoD5I,4833
|
|
72
|
-
tests/test_sample/test_site_sample.py,sha256=8HNenhIWYouCQu4y389PDQGokSPI5jQ4lS4CG-eA1Y8,5382
|
|
73
|
-
tests/test_sample/test_uk_regional_sample.py,sha256=MFibX9-M8mFK7vwMPu58gAG2VoY6y7w7chW5BlZclwk,3962
|
|
74
|
-
tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
|
|
75
|
-
tests/torch_datasets/test_pvnet_uk.py,sha256=hgD_IDa4D8cgc4cgK1UqKYkT6sFlrTMAvgVn_iwD5_4,5086
|
|
76
|
-
tests/torch_datasets/test_site.py,sha256=t57vAR_RRWcbG_kEFk6VrFCYzVxwFG6qJKBnRHF02fM,7000
|
|
77
|
-
tests/torch_datasets/test_validate_channels_utils.py,sha256=Rzdweu98j1of45jCOUrSiBtyPlf-dDaCceulf0H7ml8,2921
|
|
78
|
-
ocf_data_sampler-0.1.11.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
|
|
79
|
-
ocf_data_sampler-0.1.11.dist-info/METADATA,sha256=d8wctSlRyDbP1_yYHFvIGQgEC8DmOkM8h-ITI4XFuPw,12174
|
|
80
|
-
ocf_data_sampler-0.1.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
81
|
-
ocf_data_sampler-0.1.11.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
|
|
82
|
-
ocf_data_sampler-0.1.11.dist-info/RECORD,,
|
tests/__init__.py
DELETED
|
File without changes
|
tests/config/test_config.py
DELETED
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
from pydantic import ValidationError
|
|
3
|
-
from ocf_data_sampler.config import load_yaml_configuration, Configuration
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_default_configuration():
|
|
7
|
-
"""Test default pydantic class"""
|
|
8
|
-
_ = Configuration()
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def test_extra_field_error():
|
|
12
|
-
"""
|
|
13
|
-
Check an extra parameters in config causes error
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
configuration = Configuration()
|
|
17
|
-
configuration_dict = configuration.model_dump()
|
|
18
|
-
configuration_dict["extra_field"] = "extra_value"
|
|
19
|
-
with pytest.raises(ValidationError, match="Extra inputs are not permitted"):
|
|
20
|
-
_ = Configuration(**configuration_dict)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_incorrect_interval_start_minutes(test_config_filename):
|
|
24
|
-
"""
|
|
25
|
-
Check a history length not divisible by time resolution causes error
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
configuration = load_yaml_configuration(test_config_filename)
|
|
29
|
-
|
|
30
|
-
configuration.input_data.nwp['ukv'].interval_start_minutes = -1111
|
|
31
|
-
with pytest.raises(
|
|
32
|
-
ValueError,
|
|
33
|
-
match="interval_start_minutes.*must be divisible.*time_resolution_minutes.*"
|
|
34
|
-
):
|
|
35
|
-
_ = Configuration(**configuration.model_dump())
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def test_incorrect_interval_end_minutes(test_config_filename):
|
|
39
|
-
"""
|
|
40
|
-
Check a forecast length not divisible by time resolution causes error
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
configuration = load_yaml_configuration(test_config_filename)
|
|
44
|
-
|
|
45
|
-
configuration.input_data.nwp['ukv'].interval_end_minutes = 1111
|
|
46
|
-
with pytest.raises(
|
|
47
|
-
ValueError,
|
|
48
|
-
match="interval_end_minutes.*must be divisible.*time_resolution_minutes.*"
|
|
49
|
-
):
|
|
50
|
-
_ = Configuration(**configuration.model_dump())
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def test_incorrect_nwp_provider(test_config_filename):
|
|
54
|
-
"""
|
|
55
|
-
Check an unexpected nwp provider causes error
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
configuration = load_yaml_configuration(test_config_filename)
|
|
59
|
-
|
|
60
|
-
configuration.input_data.nwp['ukv'].provider = "unexpected_provider"
|
|
61
|
-
with pytest.raises(Exception, match="NWP provider"):
|
|
62
|
-
_ = Configuration(**configuration.model_dump())
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def test_incorrect_dropout(test_config_filename):
|
|
66
|
-
"""
|
|
67
|
-
Check a dropout timedelta over 0 causes error and 0 doesn't
|
|
68
|
-
"""
|
|
69
|
-
|
|
70
|
-
configuration = load_yaml_configuration(test_config_filename)
|
|
71
|
-
|
|
72
|
-
# check a positive number is not allowed
|
|
73
|
-
configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [120]
|
|
74
|
-
with pytest.raises(Exception, match="Dropout timedeltas must be negative"):
|
|
75
|
-
_ = Configuration(**configuration.model_dump())
|
|
76
|
-
|
|
77
|
-
# check 0 is allowed
|
|
78
|
-
configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [0]
|
|
79
|
-
_ = Configuration(**configuration.model_dump())
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def test_incorrect_dropout_fraction(test_config_filename):
|
|
83
|
-
"""
|
|
84
|
-
Check dropout fraction outside of range causes error
|
|
85
|
-
"""
|
|
86
|
-
|
|
87
|
-
configuration = load_yaml_configuration(test_config_filename)
|
|
88
|
-
|
|
89
|
-
configuration.input_data.nwp['ukv'].dropout_fraction= 1.1
|
|
90
|
-
|
|
91
|
-
with pytest.raises(ValidationError, match="Input should be less than or equal to 1"):
|
|
92
|
-
_ = Configuration(**configuration.model_dump())
|
|
93
|
-
|
|
94
|
-
configuration.input_data.nwp['ukv'].dropout_fraction= -0.1
|
|
95
|
-
with pytest.raises(ValidationError, match="Input should be greater than or equal to 0"):
|
|
96
|
-
_ = Configuration(**configuration.model_dump())
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def test_inconsistent_dropout_use(test_config_filename):
|
|
100
|
-
"""
|
|
101
|
-
Check dropout fraction outside of range causes error
|
|
102
|
-
"""
|
|
103
|
-
|
|
104
|
-
configuration = load_yaml_configuration(test_config_filename)
|
|
105
|
-
configuration.input_data.satellite.dropout_fraction= 1.0
|
|
106
|
-
configuration.input_data.satellite.dropout_timedeltas_minutes = []
|
|
107
|
-
|
|
108
|
-
with pytest.raises(ValueError, match="To dropout fraction > 0 requires a list of dropout timedeltas"):
|
|
109
|
-
_ = Configuration(**configuration.model_dump())
|
|
110
|
-
configuration.input_data.satellite.dropout_fraction= 0.0
|
|
111
|
-
configuration.input_data.satellite.dropout_timedeltas_minutes = [-120, -60]
|
|
112
|
-
with pytest.raises(ValueError, match="To use dropout timedeltas dropout fraction should be > 0"):
|
|
113
|
-
_ = Configuration(**configuration.model_dump())
|