ocf-data-sampler 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -211,6 +211,7 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin, NormalisationConsta
211
211
  " used to construct an example. If set to None, then the max staleness is set according to"
212
212
  " the maximum forecast horizon of the NWP and the requested forecast length.",
213
213
  )
214
+ public: bool = Field(False, description="Whether the NWP data is public or private")
214
215
 
215
216
  @field_validator("provider")
216
217
  def validate_provider(cls, v: str) -> str:
@@ -38,7 +38,11 @@ def get_dataset_dict(
38
38
  if input_config.nwp:
39
39
  datasets_dict["nwp"] = {}
40
40
  for nwp_source, nwp_config in input_config.nwp.items():
41
- da_nwp = open_nwp(nwp_config.zarr_path, provider=nwp_config.provider)
41
+ da_nwp = open_nwp(
42
+ zarr_path=nwp_config.zarr_path,
43
+ provider=nwp_config.provider,
44
+ public=nwp_config.public,
45
+ )
42
46
 
43
47
  da_nwp = da_nwp.sel(channel=list(nwp_config.channels))
44
48
 
@@ -9,18 +9,23 @@ from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
9
9
  from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
10
10
 
11
11
 
12
- def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
12
+ def open_nwp(zarr_path: str | list[str], provider: str, public: bool = False) -> xr.DataArray:
13
13
  """Opens NWP zarr.
14
14
 
15
15
  Args:
16
16
  zarr_path: path to the zarr file
17
17
  provider: NWP provider
18
+ public: Whether the data is public or private (only for GFS)
18
19
 
19
20
  Returns:
20
21
  Xarray DataArray of the NWP data
21
22
  """
22
23
  provider = provider.lower()
23
24
 
25
+ kwargs = {
26
+ "zarr_path": zarr_path,
27
+ }
28
+
24
29
  if provider == "ukv":
25
30
  _open_nwp = open_ukv
26
31
  elif provider == "ecmwf":
@@ -29,9 +34,14 @@ def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
29
34
  _open_nwp = open_icon_eu
30
35
  elif provider == "gfs":
31
36
  _open_nwp = open_gfs
37
+
38
+ # GFS has a public/private flag
39
+ if public:
40
+ kwargs["public"] = True
41
+
32
42
  elif provider == "cloudcasting":
33
43
  _open_nwp = open_cloudcasting
34
44
  else:
35
45
  raise ValueError(f"Unknown provider: {provider}")
36
46
 
37
- return _open_nwp(zarr_path)
47
+ return _open_nwp(**kwargs)
@@ -1,7 +1,5 @@
1
1
  """Cloudcasting provider loader."""
2
2
 
3
- from pathlib import Path
4
-
5
3
  import xarray as xr
6
4
 
7
5
  from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
@@ -12,14 +10,14 @@ from ocf_data_sampler.load.utils import (
12
10
  )
13
11
 
14
12
 
15
- def open_cloudcasting(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
13
+ def open_cloudcasting(zarr_path: str | list[str]) -> xr.DataArray:
16
14
  """Opens the satellite predictions from cloudcasting.
17
15
 
18
16
  Cloudcasting is a OCF forecast product. We forecast future satellite images from recent
19
17
  satellite images. More information can be found in the references below.
20
18
 
21
19
  Args:
22
- zarr_path: Path to the zarr to open
20
+ zarr_path: Path to the zarr(s) to open
23
21
 
24
22
  Returns:
25
23
  Xarray DataArray of the cloudcasting data
@@ -14,7 +14,7 @@ def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
14
14
  """Opens the ECMWF IFS NWP data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
18
 
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
@@ -10,11 +10,12 @@ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spati
10
10
  _log = logging.getLogger(__name__)
11
11
 
12
12
 
13
- def open_gfs(zarr_path: str | list[str]) -> xr.DataArray:
13
+ def open_gfs(zarr_path: str | list[str], public: bool = False) -> xr.DataArray:
14
14
  """Opens the GFS data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
+ public: Whether the data is public or private
18
19
 
19
20
  Returns:
20
21
  Xarray DataArray of the NWP data
@@ -22,12 +23,12 @@ def open_gfs(zarr_path: str | list[str]) -> xr.DataArray:
22
23
  _log.info("Loading NWP GFS data")
23
24
 
24
25
  # Open data
25
- gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc")
26
+ gfs: xr.Dataset = open_zarr_paths(zarr_path, time_dim="init_time_utc", public=public)
26
27
  nwp: xr.DataArray = gfs.to_array()
28
+ nwp = nwp.rename({"variable": "channel"}) # `variable` appears when using `to_array`
27
29
 
28
30
  del gfs
29
31
 
30
- nwp = nwp.rename({"variable": "channel","init_time": "init_time_utc"})
31
32
  check_time_unique_increasing(nwp.init_time_utc)
32
33
  nwp = make_spatial_coords_increasing(nwp, x_coord="longitude", y_coord="latitude")
33
34
 
@@ -19,7 +19,7 @@ def remove_isobaric_lelvels_from_coords(nwp: xr.Dataset) -> xr.Dataset:
19
19
  return nwp.drop_vars(["isobaricInhPa", *variables_to_drop])
20
20
 
21
21
 
22
- def open_icon_eu(zarr_path: str) -> xr.Dataset:
22
+ def open_icon_eu(zarr_path: str | list[str]) -> xr.Dataset:
23
23
  """Opens the ICON data.
24
24
 
25
25
  ICON EU Data is on a regular lat/lon grid
@@ -27,7 +27,7 @@ def open_icon_eu(zarr_path: str) -> xr.Dataset:
27
27
  Each of the variables is its own data variable
28
28
 
29
29
  Args:
30
- zarr_path: Path to the zarr to open
30
+ zarr_path: Path to the zarr(s) to open
31
31
 
32
32
  Returns:
33
33
  Xarray DataArray of the NWP data
@@ -14,7 +14,7 @@ def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
14
14
  """Opens the NWP data.
15
15
 
16
16
  Args:
17
- zarr_path: Path to the zarr to open
17
+ zarr_path: Path to the zarr(s) to open
18
18
 
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
@@ -3,32 +3,41 @@
3
3
  import xarray as xr
4
4
 
5
5
 
6
- def open_zarr_paths(zarr_path: str | list[str], time_dim: str = "init_time") -> xr.Dataset:
6
+ def open_zarr_paths(
7
+ zarr_path: str | list[str], time_dim: str = "init_time", public: bool = False,
8
+ ) -> xr.Dataset:
7
9
  """Opens the NWP data.
8
10
 
9
11
  Args:
10
12
  zarr_path: Path to the zarr(s) to open
11
13
  time_dim: Name of the time dimension
14
+ public: Whether the data is public or private
12
15
 
13
16
  Returns:
14
17
  The opened Xarray Dataset
15
18
  """
19
+ general_kwargs = {
20
+ "engine": "zarr",
21
+ "chunks": "auto",
22
+ "decode_timedelta": True,
23
+ }
24
+
25
+ if public:
26
+ # note this only works for s3 zarr paths at the moment
27
+ general_kwargs["storage_options"] = {"anon": True}
28
+
16
29
  if type(zarr_path) in [list, tuple] or "*" in str(zarr_path): # Multi-file dataset
17
30
  ds = xr.open_mfdataset(
18
31
  zarr_path,
19
- engine="zarr",
20
32
  concat_dim=time_dim,
21
33
  combine="nested",
22
- chunks="auto",
23
- decode_timedelta=True,
34
+ **general_kwargs,
24
35
  ).sortby(time_dim)
25
36
  else:
26
37
  ds = xr.open_dataset(
27
38
  zarr_path,
28
- engine="zarr",
29
39
  consolidated=True,
30
40
  mode="r",
31
- chunks="auto",
32
- decode_timedelta=True,
41
+ **general_kwargs,
33
42
  )
34
43
  return ds
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.18
3
+ Version: 0.2.20
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -2,25 +2,25 @@ ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,
2
2
  ocf_data_sampler/utils.py,sha256=DjuneGGisl08ENvPZV_lrcX4b2NCKJC1ZpXgIpxuQi4,290
3
3
  ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
4
4
  ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
5
- ocf_data_sampler/config/model.py,sha256=SyjtlSK6gzQHWUfgX3VNKYLODyiKuD0Mu4hlm9GoHeg,10427
5
+ ocf_data_sampler/config/model.py,sha256=iqffLs_VDqw9jOTLWchVFK4c6FWxHAzCngSfkjLUyCY,10516
6
6
  ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
7
7
  ocf_data_sampler/data/uk_gsp_locations_20220314.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
8
8
  ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKFzQSjs6hcHFsV8a9uDDpy2E,9055334
9
9
  ocf_data_sampler/load/__init__.py,sha256=-vQP9g0UOWdVbjEGyVX_ipa7R1btmiETIKAf6aw4d78,201
10
10
  ocf_data_sampler/load/gsp.py,sha256=UfPxwHw2Dw2xYSO5Al28oTamgnEM_n_4bYXsqGwY5Tc,1884
11
- ocf_data_sampler/load/load_dataset.py,sha256=sIi0nkijR_-1fRfW5JcXNTR0ccGbpkhxb7JX_zjJ-W4,1956
11
+ ocf_data_sampler/load/load_dataset.py,sha256=wSXPUQKgGRM6HC-yBXQ2IcDBQDckOSllmbGnhqikFMQ,2055
12
12
  ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
13
13
  ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
14
14
  ocf_data_sampler/load/utils.py,sha256=sZ0-zzconcLkVQwAkCYrqKDo98Hrh5ChdiQJv5Bh91g,2040
15
15
  ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
16
- ocf_data_sampler/load/nwp/nwp.py,sha256=OUK6thsKfEcqGUj4WoxDiyqGcVwwoujePdmQRis8H8I,1076
16
+ ocf_data_sampler/load/nwp/nwp.py,sha256=PNNYYREEGQT4sxGilNzuthKKOmVJdQL8R2r8bvzyEr0,1317
17
17
  ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- ocf_data_sampler/load/nwp/providers/cloudcasting.py,sha256=mGsEqo5nVGQhfIK1KC-ywLsaFKU18-UFKqQVtB8gpnw,1564
19
- ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=5AzktPJgertCx6oij6aePRosPuZHGFznMxTgtkk_mgc,994
20
- ocf_data_sampler/load/nwp/providers/gfs.py,sha256=pPqVqZqC9eK1gKfRsvR_hB-eiQ1V9LeYB15DVJElUX0,979
21
- ocf_data_sampler/load/nwp/providers/icon.py,sha256=q2ZJIKYptXFvoHzb9y3iNKSJZ0PNtnH54h2gwYbgIJ8,1569
22
- ocf_data_sampler/load/nwp/providers/ukv.py,sha256=-0v8JCLH8ypz8GMXZ6Rrx-I0LoHuHO8sXFupbC1RpM0,1013
23
- ocf_data_sampler/load/nwp/providers/utils.py,sha256=Rau6j6it2D4YLavoey-bdOlixgn5gDQioToAmRPAvtA,905
18
+ ocf_data_sampler/load/nwp/providers/cloudcasting.py,sha256=fozXpB3a2rNqQgnpRDC7xunxffh7Wwmc0kkCiYmDVJ4,1521
19
+ ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=an-gXsZwkPQvRXeza1U_4MNU5yEnVm0_8tn03rxTudI,997
20
+ ocf_data_sampler/load/nwp/providers/gfs.py,sha256=glBbo2kXtcTjQv_VNqA32lsdCCGB114Ovm-cibRWxTA,1088
21
+ ocf_data_sampler/load/nwp/providers/icon.py,sha256=6MkOfUk5dmv0XJZLrKMy1e8xipj2fHCTkYXuff7MgUY,1584
22
+ ocf_data_sampler/load/nwp/providers/ukv.py,sha256=Ka1KFZcJYPwr5vuxo-xWGVQC0pudheqGBonUnbyJCMg,1016
23
+ ocf_data_sampler/load/nwp/providers/utils.py,sha256=NrzE3JAtoc6oEywJHxPUdi_I4UJgJ_l5dxLZ4DLKvcg,1124
24
24
  ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
25
25
  ocf_data_sampler/numpy_sample/collate.py,sha256=hoxIc5SoHoIs3Nx37aRZzWChpswjy9lHUgaKgHIoo80,2039
26
26
  ocf_data_sampler/numpy_sample/common_types.py,sha256=9CjYHkUTx0ObduWh43fhsybZCTXvexql7qC2ptMDoek,377
@@ -55,7 +55,7 @@ ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul
55
55
  scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
56
56
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
57
57
  utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
58
- ocf_data_sampler-0.2.18.dist-info/METADATA,sha256=tW_PCGhhXGaFnPp3ChT8cmX640KcWeaAnCXRy4xsccw,11581
59
- ocf_data_sampler-0.2.18.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
60
- ocf_data_sampler-0.2.18.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
61
- ocf_data_sampler-0.2.18.dist-info/RECORD,,
58
+ ocf_data_sampler-0.2.20.dist-info/METADATA,sha256=r0mN9CoHXY_Vj44DxMbrCe2fd00RWYE2vxID5SQ-6RM,11581
59
+ ocf_data_sampler-0.2.20.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
60
+ ocf_data_sampler-0.2.20.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
61
+ ocf_data_sampler-0.2.20.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5