ocf-data-sampler 0.2.18__tar.gz → 0.2.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (67) hide show
  1. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/config/model.py +1 -0
  3. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/load_dataset.py +5 -1
  4. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/nwp.py +12 -2
  5. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +2 -4
  6. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/ecmwf.py +1 -1
  7. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/gfs.py +5 -4
  8. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/icon.py +2 -2
  9. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/ukv.py +1 -1
  10. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/utils.py +16 -7
  11. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
  12. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/LICENSE +0 -0
  13. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/README.md +0 -0
  14. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/__init__.py +0 -0
  15. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/config/__init__.py +0 -0
  16. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/config/load.py +0 -0
  17. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/config/save.py +0 -0
  18. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
  19. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
  20. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/__init__.py +0 -0
  21. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/gsp.py +0 -0
  22. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  23. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  24. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/satellite.py +0 -0
  25. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/site.py +0 -0
  26. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/load/utils.py +0 -0
  27. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  28. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  29. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  30. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  31. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  32. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  33. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  34. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/site.py +0 -0
  35. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  36. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/__init__.py +0 -0
  37. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/dropout.py +0 -0
  38. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  39. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  40. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/geospatial.py +0 -0
  41. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/location.py +0 -0
  42. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  43. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/select/select_time_slice.py +0 -0
  44. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  45. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +0 -0
  46. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  47. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
  48. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
  49. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
  50. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
  51. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
  52. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +0 -0
  53. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  54. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
  55. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
  56. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  57. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
  58. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler/utils.py +0 -0
  59. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  60. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  61. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler.egg-info/requires.txt +0 -0
  62. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  63. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/pyproject.toml +0 -0
  64. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/scripts/download_gsp_location_data.py +0 -0
  65. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/scripts/refactor_site.py +0 -0
  66. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/setup.cfg +0 -0
  67. {ocf_data_sampler-0.2.18 → ocf_data_sampler-0.2.20}/utils/compute_icon_mean_stddev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.18
3
+ Version: 0.2.20
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -211,6 +211,7 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin, NormalisationConsta
211
211
  " used to construct an example. If set to None, then the max staleness is set according to"
212
212
  " the maximum forecast horizon of the NWP and the requested forecast length.",
213
213
  )
214
+ public: bool = Field(False, description="Whether the NWP data is public or private")
214
215
 
215
216
  @field_validator("provider")
216
217
  def validate_provider(cls, v: str) -> str:
@@ -38,7 +38,11 @@ def get_dataset_dict(
38
38
  if input_config.nwp:
39
39
  datasets_dict["nwp"] = {}
40
40
  for nwp_source, nwp_config in input_config.nwp.items():
41
- da_nwp = open_nwp(nwp_config.zarr_path, provider=nwp_config.provider)
41
+ da_nwp = open_nwp(
42
+ zarr_path=nwp_config.zarr_path,
43
+ provider=nwp_config.provider,
44
+ public=nwp_config.public,
45
+ )
42
46
 
43
47
  da_nwp = da_nwp.sel(channel=list(nwp_config.channels))
44
48
 
@@ -9,18 +9,23 @@ from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
9
9
  from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
10
10
 
11
11
 
12
- def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
12
+ def open_nwp(zarr_path: str | list[str], provider: str, public: bool = False) -> xr.DataArray:
13
13
  """Opens NWP zarr.
14
14
 
15
15
  Args:
16
16
  zarr_path: path to the zarr file
17
17
  provider: NWP provider
18
+ public: Whether the data is public or private (only for GFS)
18
19
 
19
20
  Returns:
20
21
  Xarray DataArray of the NWP data
21
22
  """
22
23
  provider = provider.lower()
23
24
 
25
+ kwargs = {
26
+ "zarr_path": zarr_path,
27
+ }
28
+
24
29
  if provider == "ukv":
25
30
  _open_nwp = open_ukv
26
31
  elif provider == "ecmwf":
@@ -29,9 +34,14 @@ def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
29
34
  _open_nwp = open_icon_eu
30
35
  elif provider == "gfs":
31
36
  _open_nwp = open_gfs
37
+
38
+ # GFS has a public/private flag
39
+ if public:
40
+ kwargs["public"] = True
41
+
32
42
  elif provider == "cloudcasting":
33
43
  _open_nwp = open_cloudcasting
34
44
  else:
35
45
  raise ValueError(f"Unknown provider: {provider}")
36
46
 
37
- return _open_nwp(zarr_path)
47
+ return _open_nwp(**kwargs)
@@ -1,7 +1,5 @@
1
1
  """Cloudcasting provider loader."""
2
2
 
3
- from pathlib import Path
4
-
5
3
  import xarray as xr
6
4
 
7
5
  from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
@@ -12,14 +10,14 @@ from ocf_data_sampler.load.utils import (
12
10
  )
13
11
 
14
12
 
15
- def open_cloudcasting(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
13
+ def open_cloudcasting(zarr_path: str | list[str]) -> xr.DataArray:
16
14
  """Opens the satellite predictions from cloudcasting.
17
15
 
18
16
  Cloudcasting is a OCF forecast product. We forecast future satellite images from recent
19
17
  satellite images. More information can be found in the references below.
20
18
 
21
19
  Args:
22
- zarr_path: Path to the zarr to open
20
+ zarr_path: Path to the zarr(s) to open
23
21
 
24
22
  Returns:
25
23
  Xarray DataArray of the cloudcasting data
@@ -14,7 +14,7 @@ def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
14
14
  """Opens the ECMWF IFS NWP data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
18
 
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
@@ -10,11 +10,12 @@ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spati
10
10
  _log = logging.getLogger(__name__)
11
11
 
12
12
 
13
- def open_gfs(zarr_path: str | list[str]) -> xr.DataArray:
13
+ def open_gfs(zarr_path: str | list[str], public: bool = False) -> xr.DataArray:
14
14
  """Opens the GFS data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
+ public: Whether the data is public or private
18
19
 
19
20
  Returns:
20
21
  Xarray DataArray of the NWP data
@@ -22,12 +23,12 @@ def open_gfs(zarr_path: str | list[str]) -> xr.DataArray:
22
23
  _log.info("Loading NWP GFS data")
23
24
 
24
25
  # Open data
25
- gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc")
26
+ gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc", public=public)
26
27
  nwp: xr.DataArray = gfs.to_array()
28
+ nwp = nwp.rename({"variable": "channel"}) # `variable` appears when using `to_array`
27
29
 
28
30
  del gfs
29
31
 
30
- nwp = nwp.rename({"variable": "channel","init_time": "init_time_utc"})
31
32
  check_time_unique_increasing(nwp.init_time_utc)
32
33
  nwp = make_spatial_coords_increasing(nwp, x_coord="longitude", y_coord="latitude")
33
34
 
@@ -19,7 +19,7 @@ def remove_isobaric_lelvels_from_coords(nwp: xr.Dataset) -> xr.Dataset:
19
19
  return nwp.drop_vars(["isobaricInhPa", *variables_to_drop])
20
20
 
21
21
 
22
- def open_icon_eu(zarr_path: str) -> xr.Dataset:
22
+ def open_icon_eu(zarr_path: str | list[str]) -> xr.Dataset:
23
23
  """Opens the ICON data.
24
24
 
25
25
  ICON EU Data is on a regular lat/lon grid
@@ -27,7 +27,7 @@ def open_icon_eu(zarr_path: str) -> xr.Dataset:
27
27
  Each of the variables is its own data variable
28
28
 
29
29
  Args:
30
- zarr_path: Path to the zarr to open
30
+ zarr_path: Path to the zarr(s) to open
31
31
 
32
32
  Returns:
33
33
  Xarray DataArray of the NWP data
@@ -14,7 +14,7 @@ def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
14
14
  """Opens the NWP data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
18
 
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
@@ -3,32 +3,41 @@
3
3
  import xarray as xr
4
4
 
5
5
 
6
- def open_zarr_paths(zarr_path: str | list[str], time_dim: str = "init_time") -> xr.Dataset:
6
+ def open_zarr_paths(
7
+ zarr_path: str | list[str], time_dim: str = "init_time", public: bool = False,
8
+ ) -> xr.Dataset:
7
9
  """Opens the NWP data.
8
10
 
9
11
  Args:
10
12
  zarr_path: Path to the zarr(s) to open
11
13
  time_dim: Name of the time dimension
14
+ public: Whether the data is public or private
12
15
 
13
16
  Returns:
14
17
  The opened Xarray Dataset
15
18
  """
19
+ general_kwargs = {
20
+ "engine": "zarr",
21
+ "chunks": "auto",
22
+ "decode_timedelta": True,
23
+ }
24
+
25
+ if public:
26
+ # note this only works for s3 zarr paths at the moment
27
+ general_kwargs["storage_options"] = {"anon": True}
28
+
16
29
  if type(zarr_path) in [list, tuple] or "*" in str(zarr_path): # Multi-file dataset
17
30
  ds = xr.open_mfdataset(
18
31
  zarr_path,
19
- engine="zarr",
20
32
  concat_dim=time_dim,
21
33
  combine="nested",
22
- chunks="auto",
23
- decode_timedelta=True,
34
+ **general_kwargs,
24
35
  ).sortby(time_dim)
25
36
  else:
26
37
  ds = xr.open_dataset(
27
38
  zarr_path,
28
- engine="zarr",
29
39
  consolidated=True,
30
40
  mode="r",
31
- chunks="auto",
32
- decode_timedelta=True,
41
+ **general_kwargs,
33
42
  )
34
43
  return ds
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.18
3
+ Version: 0.2.20
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License