ocf-data-sampler 0.2.19__tar.gz → 0.2.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (67) hide show
  1. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/config/model.py +1 -0
  3. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/load_dataset.py +5 -1
  4. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/nwp.py +12 -2
  5. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +2 -4
  6. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/ecmwf.py +1 -1
  7. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/gfs.py +4 -3
  8. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/icon.py +2 -2
  9. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/ukv.py +1 -1
  10. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/utils.py +16 -7
  11. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +11 -13
  12. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler.egg-info/PKG-INFO +1 -1
  13. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/LICENSE +0 -0
  14. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/README.md +0 -0
  15. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/__init__.py +0 -0
  16. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/config/__init__.py +0 -0
  17. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/config/load.py +0 -0
  18. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/config/save.py +0 -0
  19. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
  20. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
  21. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/__init__.py +0 -0
  22. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/gsp.py +0 -0
  23. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  24. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  25. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/satellite.py +0 -0
  26. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/site.py +0 -0
  27. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/load/utils.py +0 -0
  28. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  29. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  30. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  31. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  32. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  33. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  34. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  35. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/site.py +0 -0
  36. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  37. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/__init__.py +0 -0
  38. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/dropout.py +0 -0
  39. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  40. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  41. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/geospatial.py +0 -0
  42. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/location.py +0 -0
  43. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  44. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/select/select_time_slice.py +0 -0
  45. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  46. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  47. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
  48. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
  49. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
  50. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
  51. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
  52. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +0 -0
  53. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  54. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
  55. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
  56. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  57. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
  58. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler/utils.py +0 -0
  59. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  60. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  61. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler.egg-info/requires.txt +0 -0
  62. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  63. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/pyproject.toml +0 -0
  64. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/scripts/download_gsp_location_data.py +0 -0
  65. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/scripts/refactor_site.py +0 -0
  66. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/setup.cfg +0 -0
  67. {ocf_data_sampler-0.2.19 → ocf_data_sampler-0.2.21}/utils/compute_icon_mean_stddev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.19
3
+ Version: 0.2.21
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -211,6 +211,7 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin, NormalisationConsta
211
211
  " used to construct an example. If set to None, then the max staleness is set according to"
212
212
  " the maximum forecast horizon of the NWP and the requested forecast length.",
213
213
  )
214
+ public: bool = Field(False, description="Whether the NWP data is public or private")
214
215
 
215
216
  @field_validator("provider")
216
217
  def validate_provider(cls, v: str) -> str:
@@ -38,7 +38,11 @@ def get_dataset_dict(
38
38
  if input_config.nwp:
39
39
  datasets_dict["nwp"] = {}
40
40
  for nwp_source, nwp_config in input_config.nwp.items():
41
- da_nwp = open_nwp(nwp_config.zarr_path, provider=nwp_config.provider)
41
+ da_nwp = open_nwp(
42
+ zarr_path=nwp_config.zarr_path,
43
+ provider=nwp_config.provider,
44
+ public=nwp_config.public,
45
+ )
42
46
 
43
47
  da_nwp = da_nwp.sel(channel=list(nwp_config.channels))
44
48
 
@@ -9,18 +9,23 @@ from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
9
9
  from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
10
10
 
11
11
 
12
- def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
12
+ def open_nwp(zarr_path: str | list[str], provider: str, public: bool = False) -> xr.DataArray:
13
13
  """Opens NWP zarr.
14
14
 
15
15
  Args:
16
16
  zarr_path: path to the zarr file
17
17
  provider: NWP provider
18
+ public: Whether the data is public or private (only for GFS)
18
19
 
19
20
  Returns:
20
21
  Xarray DataArray of the NWP data
21
22
  """
22
23
  provider = provider.lower()
23
24
 
25
+ kwargs = {
26
+ "zarr_path": zarr_path,
27
+ }
28
+
24
29
  if provider == "ukv":
25
30
  _open_nwp = open_ukv
26
31
  elif provider == "ecmwf":
@@ -29,9 +34,14 @@ def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
29
34
  _open_nwp = open_icon_eu
30
35
  elif provider == "gfs":
31
36
  _open_nwp = open_gfs
37
+
38
+ # GFS has a public/private flag
39
+ if public:
40
+ kwargs["public"] = True
41
+
32
42
  elif provider == "cloudcasting":
33
43
  _open_nwp = open_cloudcasting
34
44
  else:
35
45
  raise ValueError(f"Unknown provider: {provider}")
36
46
 
37
- return _open_nwp(zarr_path)
47
+ return _open_nwp(**kwargs)
@@ -1,7 +1,5 @@
1
1
  """Cloudcasting provider loader."""
2
2
 
3
- from pathlib import Path
4
-
5
3
  import xarray as xr
6
4
 
7
5
  from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
@@ -12,14 +10,14 @@ from ocf_data_sampler.load.utils import (
12
10
  )
13
11
 
14
12
 
15
- def open_cloudcasting(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
13
+ def open_cloudcasting(zarr_path: str | list[str]) -> xr.DataArray:
16
14
  """Opens the satellite predictions from cloudcasting.
17
15
 
18
16
  Cloudcasting is a OCF forecast product. We forecast future satellite images from recent
19
17
  satellite images. More information can be found in the references below.
20
18
 
21
19
  Args:
22
- zarr_path: Path to the zarr to open
20
+ zarr_path: Path to the zarr(s) to open
23
21
 
24
22
  Returns:
25
23
  Xarray DataArray of the cloudcasting data
@@ -14,7 +14,7 @@ def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
14
14
  """Opens the ECMWF IFS NWP data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
18
 
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
@@ -10,11 +10,12 @@ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spati
10
10
  _log = logging.getLogger(__name__)
11
11
 
12
12
 
13
- def open_gfs(zarr_path: str | list[str]) -> xr.DataArray:
13
+ def open_gfs(zarr_path: str | list[str], public: bool = False) -> xr.DataArray:
14
14
  """Opens the GFS data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
+ public: Whether the data is public or private
18
19
 
19
20
  Returns:
20
21
  Xarray DataArray of the NWP data
@@ -22,7 +23,7 @@ def open_gfs(zarr_path: str | list[str]) -> xr.DataArray:
22
23
  _log.info("Loading NWP GFS data")
23
24
 
24
25
  # Open data
25
- gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc")
26
+ gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc", public=public)
26
27
  nwp: xr.DataArray = gfs.to_array()
27
28
  nwp = nwp.rename({"variable": "channel"}) # `variable` appears when using `to_array`
28
29
 
@@ -19,7 +19,7 @@ def remove_isobaric_lelvels_from_coords(nwp: xr.Dataset) -> xr.Dataset:
19
19
  return nwp.drop_vars(["isobaricInhPa", *variables_to_drop])
20
20
 
21
21
 
22
- def open_icon_eu(zarr_path: str) -> xr.Dataset:
22
+ def open_icon_eu(zarr_path: str | list[str]) -> xr.Dataset:
23
23
  """Opens the ICON data.
24
24
 
25
25
  ICON EU Data is on a regular lat/lon grid
@@ -27,7 +27,7 @@ def open_icon_eu(zarr_path: str) -> xr.Dataset:
27
27
  Each of the variables is its own data variable
28
28
 
29
29
  Args:
30
- zarr_path: Path to the zarr to open
30
+ zarr_path: Path to the zarr(s) to open
31
31
 
32
32
  Returns:
33
33
  Xarray DataArray of the NWP data
@@ -14,7 +14,7 @@ def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
14
14
  """Opens the NWP data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
18
 
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
@@ -3,32 +3,41 @@
3
3
  import xarray as xr
4
4
 
5
5
 
6
- def open_zarr_paths(zarr_path: str | list[str], time_dim: str = "init_time") -> xr.Dataset:
6
+ def open_zarr_paths(
7
+ zarr_path: str | list[str], time_dim: str = "init_time", public: bool = False,
8
+ ) -> xr.Dataset:
7
9
  """Opens the NWP data.
8
10
 
9
11
  Args:
10
12
  zarr_path: Path to the zarr(s) to open
11
13
  time_dim: Name of the time dimension
14
+ public: Whether the data is public or private
12
15
 
13
16
  Returns:
14
17
  The opened Xarray Dataset
15
18
  """
19
+ general_kwargs = {
20
+ "engine": "zarr",
21
+ "chunks": "auto",
22
+ "decode_timedelta": True,
23
+ }
24
+
25
+ if public:
26
+ # note this only works for s3 zarr paths at the moment
27
+ general_kwargs["storage_options"] = {"anon": True}
28
+
16
29
  if type(zarr_path) in [list, tuple] or "*" in str(zarr_path): # Multi-file dataset
17
30
  ds = xr.open_mfdataset(
18
31
  zarr_path,
19
- engine="zarr",
20
32
  concat_dim=time_dim,
21
33
  combine="nested",
22
- chunks="auto",
23
- decode_timedelta=True,
34
+ **general_kwargs,
24
35
  ).sortby(time_dim)
25
36
  else:
26
37
  ds = xr.open_dataset(
27
38
  zarr_path,
28
- engine="zarr",
29
39
  consolidated=True,
30
40
  mode="r",
31
- chunks="auto",
32
- decode_timedelta=True,
41
+ **general_kwargs,
33
42
  )
34
43
  return ds
@@ -1,6 +1,5 @@
1
1
  """Torch dataset for UK PVNet."""
2
2
 
3
- import numpy as np
4
3
  import pandas as pd
5
4
  import xarray as xr
6
5
  from torch.utils.data import Dataset
@@ -257,22 +256,12 @@ class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
257
256
  # Construct a lookup for locations - useful for users to construct sample by GSP ID
258
257
  location_lookup = {loc.id: loc for loc in self.locations}
259
258
 
260
- # Construct indices for sampling
261
- t_index, loc_index = np.meshgrid(
262
- np.arange(len(self.valid_t0_times)),
263
- np.arange(len(self.locations)),
264
- )
265
-
266
- # Make array of all possible (t0, location) coordinates. Each row is a single coordinate
267
- index_pairs = np.stack((t_index.ravel(), loc_index.ravel())).T
268
-
269
259
  # Assign coords and indices to self
270
260
  self.location_lookup = location_lookup
271
- self.index_pairs = index_pairs
272
261
 
273
262
  @override
274
263
  def __len__(self) -> int:
275
- return len(self.index_pairs)
264
+ return len(self.locations)*len(self.valid_t0_times)
276
265
 
277
266
  def _get_sample(self, t0: pd.Timestamp, location: Location) -> NumpySample:
278
267
  """Generate the PVNet sample for given coordinates.
@@ -290,7 +279,16 @@ class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
290
279
  @override
291
280
  def __getitem__(self, idx: int) -> NumpySample:
292
281
  # Get the coordinates of the sample
293
- t_index, loc_index = self.index_pairs[idx]
282
+
283
+ if idx >= len(self):
284
+ raise ValueError(f"Index {idx} out of range for dataset of length {len(self)}")
285
+
286
+ # t_index will be between 0 and len(self.valid_t0_times)-1
287
+ t_index = idx % len(self.valid_t0_times)
288
+
289
+ # For each location, there are len(self.valid_t0_times) possible samples
290
+ loc_index = idx // len(self.valid_t0_times)
291
+
294
292
  location = self.locations[loc_index]
295
293
  t0 = self.valid_t0_times[t_index]
296
294
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.19
3
+ Version: 0.2.21
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License