ocf-data-sampler 0.0.9__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (39) hide show
  1. {ocf_data_sampler-0.0.9/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.11}/PKG-INFO +1 -1
  2. ocf_data_sampler-0.0.11/ocf_data_sampler/load/__init__.py +1 -0
  3. ocf_data_sampler-0.0.11/ocf_data_sampler/load/gsp.py +33 -0
  4. ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/__init__.py +1 -0
  5. ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/nwp.py +23 -0
  6. ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers/ecmwf.py +37 -0
  7. ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers/ukv.py +45 -0
  8. ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers/utils.py +34 -0
  9. ocf_data_sampler-0.0.11/ocf_data_sampler/load/satellite.py +101 -0
  10. ocf_data_sampler-0.0.11/ocf_data_sampler/load/utils.py +29 -0
  11. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  12. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler.egg-info/SOURCES.txt +10 -0
  13. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/setup.py +1 -1
  14. ocf_data_sampler-0.0.11/tests/__init__.py +0 -0
  15. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/LICENSE +0 -0
  16. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/MANIFEST.in +0 -0
  17. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/README.md +0 -0
  18. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/__init__.py +0 -0
  19. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  20. {ocf_data_sampler-0.0.9/tests → ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers}/__init__.py +0 -0
  21. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
  22. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/numpy_batch/gsp.py +0 -0
  23. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/numpy_batch/nwp.py +0 -0
  24. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/numpy_batch/satellite.py +0 -0
  25. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/numpy_batch/sun_position.py +0 -0
  26. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/select/__init__.py +0 -0
  27. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/select/dropout.py +0 -0
  28. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  29. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  30. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  31. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/select/select_time_slice.py +0 -0
  32. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
  33. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +0 -0
  34. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  35. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler.egg-info/requires.txt +0 -0
  36. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  37. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/requirements.txt +0 -0
  38. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/setup.cfg +0 -0
  39. {ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/tests/conftest.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -0,0 +1 @@
1
+ from ocf_blosc2 import Blosc2 # noqa: F401
@@ -0,0 +1,33 @@
1
+ from pathlib import Path
2
+ import pkg_resources
3
+
4
+ import pandas as pd
5
+ import xarray as xr
6
+
7
+
8
+ def open_gsp(zarr_path: str | Path) -> xr.DataArray:
9
+
10
+ # Load GSP generation xr.Dataset
11
+ ds = xr.open_zarr(zarr_path)
12
+
13
+ # Rename to standard time name
14
+ ds = ds.rename({"datetime_gmt": "time_utc"})
15
+
16
+ # Load UK GSP locations
17
+ df_gsp_loc = pd.read_csv(
18
+ pkg_resources.resource_filename(__name__, "../data/uk_gsp_locations.csv"),
19
+ index_col="gsp_id",
20
+ )
21
+
22
+ # Add coordinates
23
+ ds = ds.assign_coords(
24
+ x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
25
+ y_osgb=(df_gsp_loc.y_osgb.to_xarray()),
26
+ nominal_capacity_mwp=ds.installedcapacity_mwp,
27
+ effective_capacity_mwp=ds.capacity_mwp,
28
+
29
+ )
30
+
31
+ return ds.generation_mw
32
+
33
+
@@ -0,0 +1 @@
1
+ from .nwp import open_nwp
@@ -0,0 +1,23 @@
1
+ from pathlib import Path
2
+ import xarray as xr
3
+
4
+ from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
5
+ from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
6
+
7
+
8
+ def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> xr.DataArray:
9
+ """Opens NWP Zarr
10
+
11
+ Args:
12
+ zarr_path: Path to the Zarr file
13
+ provider: NWP provider
14
+ """
15
+
16
+ if provider.lower() == "ukv":
17
+ _open_nwp = open_ukv
18
+ elif provider.lower() == "ecmwf":
19
+ _open_nwp = open_ifs
20
+ else:
21
+ raise ValueError(f"Unknown provider: {provider}")
22
+ return _open_nwp(zarr_path)
23
+
@@ -0,0 +1,37 @@
1
+ """ECMWF provider loaders"""
2
+ from pathlib import Path
3
+ import xarray as xr
4
+ from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
5
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
6
+
7
+ def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
8
+ """
9
+ Opens the ECMWF IFS NWP data
10
+
11
+ Args:
12
+ zarr_path: Path to the zarr to open
13
+
14
+ Returns:
15
+ Xarray DataArray of the NWP data
16
+ """
17
+ # Open the data
18
+ ds = open_zarr_paths(zarr_path)
19
+
20
+ # Rename
21
+ ds = ds.rename(
22
+ {
23
+ "init_time": "init_time_utc",
24
+ "variable": "channel",
25
+ }
26
+ )
27
+
28
+ # Check the timestmps are unique and increasing
29
+ check_time_unique_increasing(ds.init_time_utc)
30
+
31
+ # Make sure the spatial coords are in increasing order
32
+ ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
33
+
34
+ ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
35
+
36
+ # TODO: should we control the dtype of the DataArray?
37
+ return ds.ECMWF_UK
@@ -0,0 +1,45 @@
1
+ """UKV provider loaders"""
2
+
3
+ import xarray as xr
4
+
5
+ from pathlib import Path
6
+
7
+ from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
8
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
9
+
10
+
11
+ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
12
+ """
13
+ Opens the NWP data
14
+
15
+ Args:
16
+ zarr_path: Path to the zarr to open
17
+
18
+ Returns:
19
+ Xarray DataArray of the NWP data
20
+ """
21
+ # Open the data
22
+ ds = open_zarr_paths(zarr_path)
23
+
24
+ # Rename
25
+ ds = ds.rename(
26
+ {
27
+ "init_time": "init_time_utc",
28
+ "variable": "channel",
29
+ "x": "x_osgb",
30
+ "y": "y_osgb",
31
+ }
32
+ )
33
+
34
+ # Check the timestmps are unique and increasing
35
+ check_time_unique_increasing(ds.init_time_utc)
36
+
37
+ # Make sure the spatial coords are in increasing order
38
+ ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
39
+
40
+ ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
41
+
42
+ # TODO: should we control the dtype of the DataArray?
43
+ return ds.UKV
44
+
45
+
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+ import xarray as xr
3
+
4
+
5
+ def open_zarr_paths(
6
+ zarr_path: Path | str | list[Path] | list[str],
7
+ time_dim: str = "init_time"
8
+ ) -> xr.Dataset:
9
+ """Opens the NWP data
10
+
11
+ Args:
12
+ zarr_path: Path to the zarr(s) to open
13
+ time_dim: Name of the time dimension
14
+
15
+ Returns:
16
+ The opened Xarray Dataset
17
+ """
18
+ if type(zarr_path) in [list, tuple] or "*" in str(zarr_path): # Multi-file dataset
19
+ ds = xr.open_mfdataset(
20
+ zarr_path,
21
+ engine="zarr",
22
+ concat_dim=time_dim,
23
+ combine="nested",
24
+ chunks="auto",
25
+ ).sortby(time_dim)
26
+ else:
27
+ ds = xr.open_dataset(
28
+ zarr_path,
29
+ engine="zarr",
30
+ consolidated=True,
31
+ mode="r",
32
+ chunks="auto",
33
+ )
34
+ return ds
@@ -0,0 +1,101 @@
1
+ """Satellite loader"""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ import xarray as xr
8
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
9
+
10
+
11
+ def _get_single_sat_data(zarr_path: Path | str) -> xr.DataArray:
12
+ """Helper function to open a zarr from either local or GCP path.
13
+
14
+ The local or GCP path may contain wildcard matching (*)
15
+
16
+ Args:
17
+ zarr_path: Path to zarr file
18
+ """
19
+
20
+ # These kwargs are used if zarr path contains "*"
21
+ openmf_kwargs = dict(
22
+ engine="zarr",
23
+ concat_dim="time",
24
+ combine="nested",
25
+ chunks="auto",
26
+ join="override",
27
+ )
28
+
29
+ # Need to generate list of files if using GCP bucket storage
30
+ if "gs://" in str(zarr_path) and "*" in str(zarr_path):
31
+ result_string = subprocess.run(
32
+ f"gsutil ls -d {zarr_path}".split(" "), stdout=subprocess.PIPE
33
+ ).stdout.decode("utf-8")
34
+ files = result_string.splitlines()
35
+
36
+ ds = xr.open_mfdataset(files, **openmf_kwargs)
37
+
38
+ elif "*" in str(zarr_path): # Multi-file dataset
39
+ ds = xr.open_mfdataset(zarr_path, **openmf_kwargs)
40
+ else:
41
+ ds = xr.open_dataset(zarr_path, engine="zarr", chunks="auto")
42
+ ds = ds.drop_duplicates("time").sortby("time")
43
+
44
+ return ds
45
+
46
+
47
+ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
48
+ """Lazily opens the Zarr store.
49
+
50
+ Args:
51
+ zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL, it must start with
52
+ 'gs://'.
53
+
54
+ Example:
55
+ With wild cards and GCS path:
56
+ ```
57
+ zarr_paths = [
58
+ "gs://bucket/2020_nonhrv_split_*.zarr",
59
+ "gs://bucket/2019_nonhrv_split_*.zarr",
60
+ ]
61
+ ds = open_sat_data(zarr_paths)
62
+ ```
63
+ Without wild cards and with local path:
64
+ ```
65
+ zarr_paths = [
66
+ "/data/2020_nonhrv.zarr",
67
+ "/data/2019_nonhrv.zarr",
68
+ ]
69
+ ds = open_sat_data(zarr_paths)
70
+ ```
71
+ """
72
+
73
+ # Open the data
74
+ if isinstance(zarr_path, (list, tuple)):
75
+ ds = xr.combine_nested(
76
+ [_get_single_sat_data(path) for path in zarr_path],
77
+ concat_dim="time",
78
+ combine_attrs="override",
79
+ join="override",
80
+ )
81
+ else:
82
+ ds = _get_single_sat_data(zarr_path)
83
+
84
+ # Rename
85
+ ds = ds.rename(
86
+ {
87
+ "variable": "channel",
88
+ "time": "time_utc",
89
+ }
90
+ )
91
+
92
+ # Check the timestmps are unique and increasing
93
+ check_time_unique_increasing(ds.time_utc)
94
+
95
+ # Make sure the spatial coords are in increasing order
96
+ ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
97
+
98
+ ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
99
+
100
+ # TODO: should we control the dtype of the DataArray?
101
+ return ds.data
@@ -0,0 +1,29 @@
1
+ import xarray as xr
2
+ import pandas as pd
3
+
4
+ def check_time_unique_increasing(datetimes) -> None:
5
+ """Check that the time dimension is unique and increasing"""
6
+ time = pd.DatetimeIndex(datetimes)
7
+ assert time.is_unique
8
+ assert time.is_monotonic_increasing
9
+
10
+ def make_spatial_coords_increasing(ds: xr.Dataset, x_coord: str, y_coord: str) -> xr.Dataset:
11
+ """Make sure the spatial coordinates are in increasing order
12
+
13
+ Args:
14
+ ds: Xarray Dataset
15
+ x_coord: Name of the x coordinate
16
+ y_coord: Name of the y coordinate
17
+ """
18
+
19
+ # Make sure the coords are in increasing order
20
+ if ds[x_coord][0] > ds[x_coord][-1]:
21
+ ds = ds.isel({x_coord:slice(None, None, -1)})
22
+ if ds[y_coord][0] > ds[y_coord][-1]:
23
+ ds = ds.isel({y_coord:slice(None, None, -1)})
24
+
25
+ # Check the coords are all increasing now
26
+ assert (ds[x_coord].diff(dim=x_coord) > 0).all()
27
+ assert (ds[y_coord].diff(dim=y_coord) > 0).all()
28
+
29
+ return ds
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -10,6 +10,16 @@ ocf_data_sampler.egg-info/dependency_links.txt
10
10
  ocf_data_sampler.egg-info/requires.txt
11
11
  ocf_data_sampler.egg-info/top_level.txt
12
12
  ocf_data_sampler/data/uk_gsp_locations.csv
13
+ ocf_data_sampler/load/__init__.py
14
+ ocf_data_sampler/load/gsp.py
15
+ ocf_data_sampler/load/satellite.py
16
+ ocf_data_sampler/load/utils.py
17
+ ocf_data_sampler/load/nwp/__init__.py
18
+ ocf_data_sampler/load/nwp/nwp.py
19
+ ocf_data_sampler/load/nwp/providers/__init__.py
20
+ ocf_data_sampler/load/nwp/providers/ecmwf.py
21
+ ocf_data_sampler/load/nwp/providers/ukv.py
22
+ ocf_data_sampler/load/nwp/providers/utils.py
13
23
  ocf_data_sampler/numpy_batch/__init__.py
14
24
  ocf_data_sampler/numpy_batch/gsp.py
15
25
  ocf_data_sampler/numpy_batch/nwp.py
@@ -10,7 +10,7 @@ install_requires = (this_directory / "requirements.txt").read_text().splitlines(
10
10
 
11
11
  setup(
12
12
  name="ocf_data_sampler",
13
- version="0.0.9",
13
+ version="0.0.11",
14
14
  license="MIT",
15
15
  description="Sample from weather data for renewable energy prediction",
16
16
  author="James Fulton, Peter Dudfield, and the Open Climate Fix team",
File without changes