ocf-data-sampler 0.2.33__tar.gz → 0.2.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (69) hide show
  1. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/gsp.py +32 -12
  3. ocf_data_sampler-0.2.35/ocf_data_sampler/load/nwp/nwp.py +115 -0
  4. ocf_data_sampler-0.2.35/ocf_data_sampler/load/nwp/providers/icon.py +37 -0
  5. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/satellite.py +21 -4
  6. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/site.py +20 -2
  7. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
  8. ocf_data_sampler-0.2.33/ocf_data_sampler/load/nwp/nwp.py +0 -47
  9. ocf_data_sampler-0.2.33/ocf_data_sampler/load/nwp/providers/icon.py +0 -46
  10. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/LICENSE +0 -0
  11. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/README.md +0 -0
  12. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/__init__.py +0 -0
  13. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/config/__init__.py +0 -0
  14. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/config/load.py +0 -0
  15. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/config/model.py +0 -0
  16. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/config/save.py +0 -0
  17. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
  18. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
  19. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/__init__.py +0 -0
  20. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/load_dataset.py +0 -0
  21. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  22. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  23. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
  24. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  25. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
  26. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  27. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  28. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/load/utils.py +0 -0
  29. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  30. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  31. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  32. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  33. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  34. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  35. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  36. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/site.py +0 -0
  37. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  38. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/__init__.py +0 -0
  39. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/dropout.py +0 -0
  40. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  41. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  42. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/geospatial.py +0 -0
  43. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/location.py +0 -0
  44. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  45. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/select/select_time_slice.py +0 -0
  46. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  47. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +0 -0
  48. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  49. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
  50. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
  51. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
  52. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
  53. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
  54. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +0 -0
  55. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  56. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
  57. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
  58. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  59. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
  60. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler/utils.py +0 -0
  61. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  62. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  63. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler.egg-info/requires.txt +0 -0
  64. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  65. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/pyproject.toml +0 -0
  66. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/scripts/download_gsp_location_data.py +0 -0
  67. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/scripts/refactor_site.py +0 -0
  68. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/setup.cfg +0 -0
  69. {ocf_data_sampler-0.2.33 → ocf_data_sampler-0.2.35}/utils/compute_icon_mean_stddev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.33
3
+ Version: 0.2.35
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -2,6 +2,7 @@
2
2
 
3
3
  from importlib.resources import files
4
4
 
5
+ import numpy as np
5
6
  import pandas as pd
6
7
  import xarray as xr
7
8
 
@@ -26,11 +27,12 @@ def get_gsp_boundaries(version: str) -> pd.DataFrame:
26
27
  )
27
28
 
28
29
 
29
- def open_gsp(zarr_path: str,
30
- boundaries_version: str = "20220314",
31
- public: bool = False,
32
- ) -> xr.DataArray:
33
- """Open the GSP data.
30
+ def open_gsp(
31
+ zarr_path: str,
32
+ boundaries_version: str = "20220314",
33
+ public: bool = False,
34
+ ) -> xr.DataArray:
35
+ """Open the GSP data and validates its data types.
34
36
 
35
37
  Args:
36
38
  zarr_path: Path to the GSP zarr data
@@ -44,18 +46,16 @@ def open_gsp(zarr_path: str,
44
46
  # Load UK GSP locations
45
47
  df_gsp_loc = get_gsp_boundaries(boundaries_version)
46
48
 
47
- backend_kwargs ={}
49
+ backend_kwargs = {}
48
50
  # Open the GSP generation data
49
51
  if public:
50
- backend_kwargs ={"storage_options":{"anon": True}}
52
+ backend_kwargs = {"storage_options": {"anon": True}}
51
53
  # Currently only compatible with S3 bucket.
52
54
 
53
- ds = (
54
- xr.open_dataset(zarr_path,engine="zarr",backend_kwargs=backend_kwargs)
55
- .rename({"datetime_gmt": "time_utc"})
55
+ ds = xr.open_dataset(zarr_path, engine="zarr", backend_kwargs=backend_kwargs).rename(
56
+ {"datetime_gmt": "time_utc"},
56
57
  )
57
58
 
58
-
59
59
  if not (ds.gsp_id.isin(df_gsp_loc.index)).all():
60
60
  raise ValueError(
61
61
  "Some GSP IDs in the GSP generation data are not available in the locations file.",
@@ -72,4 +72,24 @@ def open_gsp(zarr_path: str,
72
72
  effective_capacity_mwp=ds.capacity_mwp,
73
73
  )
74
74
 
75
- return ds.generation_mw
75
+ gsp_da = ds.generation_mw
76
+
77
+ # Validate data types directly in loading function
78
+ if not np.issubdtype(gsp_da.dtype, np.floating):
79
+ raise TypeError(f"generation_mw should be floating, not {gsp_da.dtype}")
80
+
81
+ coord_dtypes = {
82
+ "time_utc": np.datetime64,
83
+ "gsp_id": np.integer,
84
+ "nominal_capacity_mwp": np.floating,
85
+ "effective_capacity_mwp": np.floating,
86
+ "x_osgb": np.floating,
87
+ "y_osgb": np.floating,
88
+ }
89
+
90
+ for coord, expected_dtype in coord_dtypes.items():
91
+ if not np.issubdtype(gsp_da.coords[coord].dtype, expected_dtype):
92
+ dtype = gsp_da.coords[coord].dtype
93
+ raise TypeError(f"{coord} should be {expected_dtype.__name__}, not {dtype}")
94
+
95
+ return gsp_da
@@ -0,0 +1,115 @@
1
+ """Module for opening NWP data."""
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+
6
+ from ocf_data_sampler.load.nwp.providers.cloudcasting import open_cloudcasting
7
+ from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
8
+ from ocf_data_sampler.load.nwp.providers.gfs import open_gfs
9
+ from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
10
+ from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
11
+
12
+
13
+ def _validate_nwp_data(data_array: xr.DataArray, provider: str) -> None:
14
+ """Validates the structure and data types of a loaded NWP DataArray.
15
+
16
+ This helper function is extracted to keep the main `open_nwp` function clean.
17
+
18
+ Args:
19
+ data_array: The xarray.DataArray to validate.
20
+ provider: The NWP provider name.
21
+
22
+ Raises:
23
+ TypeError: If the data or any coordinate has an unexpected dtype.
24
+ ValueError: If a required coordinate is missing.
25
+ """
26
+ if not np.issubdtype(data_array.dtype, np.number):
27
+ raise TypeError(f"NWP data for {provider} should be numeric, not {data_array.dtype}")
28
+
29
+ common_expected_dtypes = {
30
+ "init_time_utc": np.datetime64,
31
+ "step": np.timedelta64,
32
+ "channel": (np.str_, np.object_),
33
+ }
34
+
35
+ geographic_spatial_dtypes = {
36
+ "latitude": np.floating,
37
+ "longitude": np.floating,
38
+ }
39
+
40
+ provider_specific_spatial_dtypes = {
41
+ "ecmwf": geographic_spatial_dtypes,
42
+ "icon-eu": geographic_spatial_dtypes,
43
+ "gfs": geographic_spatial_dtypes,
44
+ "mo_global": geographic_spatial_dtypes,
45
+ "ukv": {
46
+ "x_osgb": np.floating,
47
+ "y_osgb": np.floating,
48
+ },
49
+ "cloudcasting": {
50
+ "x_geostationary": np.floating,
51
+ "y_geostationary": np.floating,
52
+ },
53
+ }
54
+
55
+ expected_dtypes = {
56
+ **common_expected_dtypes,
57
+ **provider_specific_spatial_dtypes.get(provider, {}),
58
+ }
59
+
60
+ for coord, expected_dtype in expected_dtypes.items():
61
+ if coord not in data_array.coords:
62
+ raise ValueError(f"Coordinate '{coord}' missing for provider '{provider}'")
63
+
64
+ actual_dtype = data_array.coords[coord].dtype
65
+
66
+ if not np.issubdtype(actual_dtype, expected_dtype):
67
+ if isinstance(expected_dtype, tuple):
68
+ expected_name_str = " or ".join([t.__name__ for t in expected_dtype])
69
+ else:
70
+ expected_name_str = expected_dtype.__name__
71
+
72
+ err_msg = (
73
+ f"'{coord}' for {provider} should be {expected_name_str}, "
74
+ f"not {actual_dtype.name}"
75
+ )
76
+ raise TypeError(err_msg)
77
+
78
+
79
+ def open_nwp(
80
+ zarr_path: str | list[str],
81
+ provider: str,
82
+ public: bool = False,
83
+ ) -> xr.DataArray:
84
+ """Opens NWP zarr and validates its structure and data types.
85
+
86
+ Args:
87
+ zarr_path: path to the zarr file
88
+ provider: NWP provider
89
+ public: Whether the data is public or private (only for GFS)
90
+ """
91
+ provider = provider.lower()
92
+
93
+ kwargs = {
94
+ "zarr_path": zarr_path,
95
+ }
96
+ if provider == "ukv":
97
+ _open_nwp = open_ukv
98
+ elif provider in ["ecmwf", "mo_global"]:
99
+ _open_nwp = open_ifs
100
+ elif provider == "icon-eu":
101
+ _open_nwp = open_icon_eu
102
+ elif provider == "gfs":
103
+ _open_nwp = open_gfs
104
+ # GFS has a public/private flag
105
+ if public:
106
+ kwargs["public"] = True
107
+ elif provider == "cloudcasting":
108
+ _open_nwp = open_cloudcasting
109
+ else:
110
+ raise ValueError(f"Unknown provider: {provider}")
111
+
112
+ data_array = _open_nwp(**kwargs)
113
+ _validate_nwp_data(data_array, provider)
114
+
115
+ return data_array
@@ -0,0 +1,37 @@
1
+ """DWD ICON Loading."""
2
+
3
+ import xarray as xr
4
+
5
+ from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
6
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
7
+
8
+
9
+ def open_icon_eu(zarr_path: str | list[str]) -> xr.DataArray:
10
+ """Opens the ICON data.
11
+
12
+ ICON EU Data is now expected to be on a regular lat/lon grid,
13
+ with a 'channel' dimension directly available (as per the updated fixture).
14
+ The 'isobaricInhPa' dimension is expected to be already handled.
15
+
16
+ Args:
17
+ zarr_path: Path to the zarr(s) to open
18
+
19
+ Returns:
20
+ Xarray DataArray of the NWP data
21
+ """
22
+ # Open and check initially
23
+ ds = open_zarr_paths(zarr_path, time_dim="init_time_utc")
24
+
25
+ if "icon_eu_data" in ds.data_vars:
26
+ nwp = ds["icon_eu_data"]
27
+ else:
28
+ raise ValueError("Could not find 'icon_eu_data' DataArray in the ICON-EU Zarr file.")
29
+
30
+ check_time_unique_increasing(nwp.init_time_utc)
31
+
32
+ # 0-78 one hour steps, rest 3 hour steps
33
+ nwp = nwp.isel(step=slice(0, 78))
34
+ nwp = nwp.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
35
+ nwp = make_spatial_coords_increasing(nwp, x_coord="longitude", y_coord="latitude")
36
+
37
+ return nwp
@@ -1,5 +1,5 @@
1
1
  """Satellite loader."""
2
-
2
+ import numpy as np
3
3
  import xarray as xr
4
4
 
5
5
  from ocf_data_sampler.load.utils import (
@@ -44,7 +44,7 @@ def get_single_sat_data(zarr_path: str) -> xr.Dataset:
44
44
 
45
45
 
46
46
  def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:
47
- """Lazily opens the zarr store.
47
+ """Lazily opens the zarr store and validates data types.
48
48
 
49
49
  Args:
50
50
  zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL,
@@ -72,5 +72,22 @@ def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:
72
72
  ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
73
73
  ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
74
74
 
75
- # TODO: should we control the dtype of the DataArray?
76
- return get_xr_data_array_from_xr_dataset(ds)
75
+ data_array = get_xr_data_array_from_xr_dataset(ds)
76
+
77
+ # Validate data types directly loading function
78
+ if not np.issubdtype(data_array.dtype, np.number):
79
+ raise TypeError(f"Satellite data should be numeric, not {data_array.dtype}")
80
+
81
+ coord_dtypes = {
82
+ "time_utc": np.datetime64,
83
+ "channel": np.str_,
84
+ "x_geostationary": np.floating,
85
+ "y_geostationary": np.floating,
86
+ }
87
+
88
+ for coord, expected_dtype in coord_dtypes.items():
89
+ if not np.issubdtype(data_array.coords[coord].dtype, expected_dtype):
90
+ dtype = data_array.coords[coord].dtype
91
+ raise TypeError(f"{coord} should be {expected_dtype.__name__}, not {dtype}")
92
+
93
+ return data_array
@@ -16,7 +16,6 @@ def open_site(generation_file_path: str, metadata_file_path: str) -> xr.DataArra
16
16
  xr.DataArray: The opened site generation data
17
17
  """
18
18
  generation_ds = xr.open_dataset(generation_file_path)
19
-
20
19
  metadata_df = pd.read_csv(metadata_file_path, index_col="site_id")
21
20
 
22
21
  if not metadata_df.index.is_unique:
@@ -38,4 +37,23 @@ def open_site(generation_file_path: str, metadata_file_path: str) -> xr.DataArra
38
37
  if not (generation_ds.capacity_kwp.values > 0).all():
39
38
  raise ValueError("capacity_kwp contains non-positive values")
40
39
 
41
- return generation_ds.generation_kw
40
+ site_da = generation_ds.generation_kw
41
+
42
+ # Validate data types directly in loading function
43
+ if not np.issubdtype(site_da.dtype, np.floating):
44
+ raise TypeError(f"Generation data should be float, not {site_da.dtype}")
45
+
46
+ coord_dtypes = {
47
+ "time_utc": np.datetime64,
48
+ "site_id": np.integer,
49
+ "capacity_kwp": np.floating,
50
+ "latitude": np.floating,
51
+ "longitude": np.floating,
52
+ }
53
+
54
+ for coord, expected_dtype in coord_dtypes.items():
55
+ if not np.issubdtype(site_da.coords[coord].dtype, expected_dtype):
56
+ dtype = site_da.coords[coord].dtype
57
+ raise TypeError(f"{coord} should be {expected_dtype.__name__}, not {dtype}")
58
+
59
+ return site_da
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.33
3
+ Version: 0.2.35
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -1,47 +0,0 @@
1
- """Module for opening NWP data."""
2
-
3
- import xarray as xr
4
-
5
- from ocf_data_sampler.load.nwp.providers.cloudcasting import open_cloudcasting
6
- from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
7
- from ocf_data_sampler.load.nwp.providers.gfs import open_gfs
8
- from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
9
- from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
10
-
11
-
12
- def open_nwp(zarr_path: str | list[str], provider: str, public: bool = False) -> xr.DataArray:
13
- """Opens NWP zarr.
14
-
15
- Args:
16
- zarr_path: path to the zarr file
17
- provider: NWP provider
18
- public: Whether the data is public or private (only for GFS)
19
-
20
- Returns:
21
- Xarray DataArray of the NWP data
22
- """
23
- provider = provider.lower()
24
-
25
- kwargs = {
26
- "zarr_path": zarr_path,
27
- }
28
-
29
- if provider == "ukv":
30
- _open_nwp = open_ukv
31
- elif provider in ["ecmwf", "mo_global"]:
32
- _open_nwp = open_ifs
33
- elif provider == "icon-eu":
34
- _open_nwp = open_icon_eu
35
- elif provider == "gfs":
36
- _open_nwp = open_gfs
37
-
38
- # GFS has a public/private flag
39
- if public:
40
- kwargs["public"] = True
41
-
42
- elif provider == "cloudcasting":
43
- _open_nwp = open_cloudcasting
44
- else:
45
- raise ValueError(f"Unknown provider: {provider}")
46
-
47
- return _open_nwp(**kwargs)
@@ -1,46 +0,0 @@
1
- """DWD ICON Loading."""
2
-
3
- import xarray as xr
4
-
5
- from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
6
- from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
7
-
8
-
9
- def remove_isobaric_lelvels_from_coords(nwp: xr.Dataset) -> xr.Dataset:
10
- """Removes the isobaric levels from the coordinates of the NWP data.
11
-
12
- Args:
13
- nwp: NWP data
14
-
15
- Returns:
16
- NWP data without isobaric levels in the coordinates
17
- """
18
- variables_to_drop = [var for var in nwp.data_vars if "isobaricInhPa" in nwp[var].dims]
19
- return nwp.drop_vars(["isobaricInhPa", *variables_to_drop])
20
-
21
-
22
- def open_icon_eu(zarr_path: str | list[str]) -> xr.Dataset:
23
- """Opens the ICON data.
24
-
25
- ICON EU Data is on a regular lat/lon grid
26
- It has data on multiple pressure levels, as well as the surface
27
- Each of the variables is its own data variable
28
-
29
- Args:
30
- zarr_path: Path to the zarr(s) to open
31
-
32
- Returns:
33
- Xarray DataArray of the NWP data
34
- """
35
- # Open the data
36
- nwp = open_zarr_paths(zarr_path, time_dim="time")
37
- nwp = nwp.rename({"time": "init_time_utc"})
38
- # Sanity checks.
39
- check_time_unique_increasing(nwp.init_time_utc)
40
- # 0-78 one hour steps, rest 3 hour steps
41
- nwp = nwp.isel(step=slice(0, 78))
42
- nwp = remove_isobaric_lelvels_from_coords(nwp)
43
- nwp = nwp.to_array().rename({"variable": "channel"})
44
- nwp = nwp.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
45
- nwp = make_spatial_coords_increasing(nwp, x_coord="longitude", y_coord="latitude")
46
- return nwp