PyPI - ocf-data-sampler - Versions diffs - 0.0.9__tar.gz → 0.0.11__tar.gz - Mend

ocf-data-sampler 0.0.9tar.gz → 0.0.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (39) hide show

{ocf_data_sampler-0.0.9/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.9
+Version: 0.0.11
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

ocf_data_sampler-0.0.11/ocf_data_sampler/load/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from ocf_blosc2 import Blosc2 # noqa: F401

ocf_data_sampler-0.0.11/ocf_data_sampler/load/gsp.py ADDED Viewed

@@ -0,0 +1,33 @@
+from pathlib import Path
+import pkg_resources
+import pandas as pd
+import xarray as xr
+def open_gsp(zarr_path: str | Path) -> xr.DataArray:
+    # Load GSP generation xr.Dataset
+    ds = xr.open_zarr(zarr_path)
+    # Rename to standard time name
+    ds = ds.rename({"datetime_gmt": "time_utc"})
+    # Load UK GSP locations
+    df_gsp_loc = pd.read_csv(
+        pkg_resources.resource_filename(__name__, "../data/uk_gsp_locations.csv"),
+        index_col="gsp_id",
+    )
+    # Add coordinates
+    ds = ds.assign_coords(
+        x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
+        y_osgb=(df_gsp_loc.y_osgb.to_xarray()),
+        nominal_capacity_mwp=ds.installedcapacity_mwp,
+        effective_capacity_mwp=ds.capacity_mwp,
+    )
+    return ds.generation_mw

ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .nwp import open_nwp

ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/nwp.py ADDED Viewed

@@ -0,0 +1,23 @@
+from pathlib import Path
+import xarray as xr
+from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
+from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
+def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> xr.DataArray:
+    """Opens NWP Zarr
+    Args:
+        zarr_path: Path to the Zarr file
+        provider: NWP provider
+    """
+    if provider.lower() == "ukv":
+        _open_nwp = open_ukv
+    elif provider.lower() == "ecmwf":
+        _open_nwp = open_ifs
+    else:
+        raise ValueError(f"Unknown provider: {provider}")
+    return _open_nwp(zarr_path)

ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers/ecmwf.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""ECMWF provider loaders"""
+from pathlib import Path
+import xarray as xr
+from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
+from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
+def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
+    """
+    Opens the ECMWF IFS NWP data
+    Args:
+        zarr_path: Path to the zarr to open
+    Returns:
+        Xarray DataArray of the NWP data
+    """
+    # Open the data
+    ds = open_zarr_paths(zarr_path)
+    # Rename
+    ds = ds.rename(
+        {
+            "init_time": "init_time_utc",
+            "variable": "channel",
+        }
+    )
+    # Check the timestmps are unique and increasing
+    check_time_unique_increasing(ds.init_time_utc)
+    # Make sure the spatial coords are in increasing order
+    ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
+    ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
+    # TODO: should we control the dtype of the DataArray?
+    return ds.ECMWF_UK

ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers/ukv.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""UKV provider loaders"""
+import xarray as xr
+from pathlib import Path
+from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
+from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
+def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
+    """
+    Opens the NWP data
+    Args:
+        zarr_path: Path to the zarr to open
+    Returns:
+        Xarray DataArray of the NWP data
+    """
+    # Open the data
+    ds = open_zarr_paths(zarr_path)
+    # Rename
+    ds = ds.rename(
+        {
+            "init_time": "init_time_utc",
+            "variable": "channel",
+            "x": "x_osgb",
+            "y": "y_osgb",
+        }
+    )
+    # Check the timestmps are unique and increasing
+    check_time_unique_increasing(ds.init_time_utc)
+    # Make sure the spatial coords are in increasing order
+    ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
+    ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
+    # TODO: should we control the dtype of the DataArray?
+    return ds.UKV

ocf_data_sampler-0.0.11/ocf_data_sampler/load/nwp/providers/utils.py ADDED Viewed

@@ -0,0 +1,34 @@
+from pathlib import Path
+import xarray as xr
+def open_zarr_paths(
+        zarr_path: Path | str | list[Path] | list[str],
+        time_dim: str = "init_time"
+    ) -> xr.Dataset:
+    """Opens the NWP data
+    Args:
+        zarr_path: Path to the zarr(s) to open
+        time_dim: Name of the time dimension
+    Returns:
+        The opened Xarray Dataset
+    """
+    if type(zarr_path) in [list, tuple] or "*" in str(zarr_path):  # Multi-file dataset
+        ds = xr.open_mfdataset(
+            zarr_path,
+            engine="zarr",
+            concat_dim=time_dim,
+            combine="nested",
+            chunks="auto",
+        ).sortby(time_dim)
+    else:
+        ds = xr.open_dataset(
+            zarr_path,
+            engine="zarr",
+            consolidated=True,
+            mode="r",
+            chunks="auto",
+        )
+    return ds

ocf_data_sampler-0.0.11/ocf_data_sampler/load/satellite.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""Satellite loader"""
+import subprocess
+from pathlib import Path
+import pandas as pd
+import xarray as xr
+from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
+def _get_single_sat_data(zarr_path: Path | str) -> xr.DataArray:
+    """Helper function to open a zarr from either local or GCP path.
+    The local or GCP path may contain wildcard matching (*)
+    Args:
+        zarr_path: Path to zarr file
+    """
+    # These kwargs are used if zarr path contains "*"
+    openmf_kwargs = dict(
+        engine="zarr",
+        concat_dim="time",
+        combine="nested",
+        chunks="auto",
+        join="override",
+    )
+    # Need to generate list of files if using GCP bucket storage
+    if "gs://" in str(zarr_path) and "*" in str(zarr_path):
+        result_string = subprocess.run(
+            f"gsutil ls -d {zarr_path}".split(" "), stdout=subprocess.PIPE
+        ).stdout.decode("utf-8")
+        files = result_string.splitlines()
+        ds = xr.open_mfdataset(files, **openmf_kwargs)
+    elif "*" in str(zarr_path):  # Multi-file dataset
+        ds = xr.open_mfdataset(zarr_path, **openmf_kwargs)
+    else:
+        ds = xr.open_dataset(zarr_path, engine="zarr", chunks="auto")
+    ds = ds.drop_duplicates("time").sortby("time")
+    return ds
+def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
+    """Lazily opens the Zarr store.
+    Args:
+      zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL, it must start with
+          'gs://'.
+    Example:
+        With wild cards and GCS path:
+        ```
+        zarr_paths = [
+            "gs://bucket/2020_nonhrv_split_*.zarr",
+            "gs://bucket/2019_nonhrv_split_*.zarr",
+        ]
+        ds = open_sat_data(zarr_paths)
+        ```
+        Without wild cards and with local path:
+        ```
+        zarr_paths = [
+            "/data/2020_nonhrv.zarr",
+            "/data/2019_nonhrv.zarr",
+        ]
+        ds = open_sat_data(zarr_paths)
+        ```
+    """
+    # Open the data
+    if isinstance(zarr_path, (list, tuple)):
+        ds = xr.combine_nested(
+            [_get_single_sat_data(path) for path in zarr_path],
+            concat_dim="time",
+            combine_attrs="override",
+            join="override",
+        )
+    else:
+        ds = _get_single_sat_data(zarr_path)
+    # Rename
+    ds = ds.rename(
+        {
+            "variable": "channel",
+            "time": "time_utc",
+        }
+    )
+    # Check the timestmps are unique and increasing
+    check_time_unique_increasing(ds.time_utc)
+    # Make sure the spatial coords are in increasing order
+    ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
+    ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
+    # TODO: should we control the dtype of the DataArray?
+    return ds.data

ocf_data_sampler-0.0.11/ocf_data_sampler/load/utils.py ADDED Viewed

@@ -0,0 +1,29 @@
+import xarray as xr
+import pandas as pd
+def check_time_unique_increasing(datetimes) -> None:
+    """Check that the time dimension is unique and increasing"""
+    time = pd.DatetimeIndex(datetimes)
+    assert time.is_unique
+    assert time.is_monotonic_increasing
+def make_spatial_coords_increasing(ds: xr.Dataset, x_coord: str, y_coord: str) -> xr.Dataset:
+    """Make sure the spatial coordinates are in increasing order
+    Args:
+        ds: Xarray Dataset
+        x_coord: Name of the x coordinate
+        y_coord: Name of the y coordinate
+    """
+    # Make sure the coords are in increasing order
+    if ds[x_coord][0] > ds[x_coord][-1]:
+        ds = ds.isel({x_coord:slice(None, None, -1)})
+    if ds[y_coord][0] > ds[y_coord][-1]:
+       ds = ds.isel({y_coord:slice(None, None, -1)})
+    # Check the coords are all increasing now
+    assert (ds[x_coord].diff(dim=x_coord) > 0).all()
+    assert (ds[y_coord].diff(dim=y_coord) > 0).all()
+    return ds

{ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11/ocf_data_sampler.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.9
+Version: 0.0.11
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/ocf_data_sampler.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,6 +10,16 @@ ocf_data_sampler.egg-info/dependency_links.txt
 ocf_data_sampler.egg-info/requires.txt
 ocf_data_sampler.egg-info/top_level.txt
 ocf_data_sampler/data/uk_gsp_locations.csv
+ocf_data_sampler/load/__init__.py
+ocf_data_sampler/load/gsp.py
+ocf_data_sampler/load/satellite.py
+ocf_data_sampler/load/utils.py
+ocf_data_sampler/load/nwp/__init__.py
+ocf_data_sampler/load/nwp/nwp.py
+ocf_data_sampler/load/nwp/providers/__init__.py
+ocf_data_sampler/load/nwp/providers/ecmwf.py
+ocf_data_sampler/load/nwp/providers/ukv.py
+ocf_data_sampler/load/nwp/providers/utils.py
 ocf_data_sampler/numpy_batch/__init__.py
 ocf_data_sampler/numpy_batch/gsp.py
 ocf_data_sampler/numpy_batch/nwp.py

{ocf_data_sampler-0.0.9 → ocf_data_sampler-0.0.11}/setup.py RENAMED Viewed

@@ -10,7 +10,7 @@ install_requires = (this_directory / "requirements.txt").read_text().splitlines(
 setup(
     name="ocf_data_sampler",
-    version="0.0.9",
+    version="0.0.11",
     license="MIT",
     description="Sample from weather data for renewable energy prediction",
     author="James Fulton, Peter Dudfield, and the Open Climate Fix team",