PyPI - ocf-data-sampler - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

ocf-data-sampler 0.1.10py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (77) hide show

ocf_data_sampler/config/load.py +3 -3
ocf_data_sampler/config/model.py +86 -72
ocf_data_sampler/config/save.py +5 -4
ocf_data_sampler/constants.py +140 -12
ocf_data_sampler/load/gsp.py +6 -5
ocf_data_sampler/load/load_dataset.py +5 -6
ocf_data_sampler/load/nwp/nwp.py +17 -5
ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
ocf_data_sampler/load/nwp/providers/icon.py +46 -0
ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
ocf_data_sampler/load/nwp/providers/utils.py +3 -1
ocf_data_sampler/load/satellite.py +27 -36
ocf_data_sampler/load/site.py +11 -7
ocf_data_sampler/load/utils.py +21 -16
ocf_data_sampler/numpy_sample/collate.py +10 -9
ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
ocf_data_sampler/numpy_sample/gsp.py +15 -13
ocf_data_sampler/numpy_sample/nwp.py +17 -23
ocf_data_sampler/numpy_sample/satellite.py +17 -14
ocf_data_sampler/numpy_sample/site.py +8 -7
ocf_data_sampler/numpy_sample/sun_position.py +19 -25
ocf_data_sampler/sample/__init__.py +0 -7
ocf_data_sampler/sample/base.py +23 -44
ocf_data_sampler/sample/site.py +25 -69
ocf_data_sampler/sample/uk_regional.py +52 -103
ocf_data_sampler/select/dropout.py +42 -27
ocf_data_sampler/select/fill_time_periods.py +15 -3
ocf_data_sampler/select/find_contiguous_time_periods.py +87 -75
ocf_data_sampler/select/geospatial.py +63 -54
ocf_data_sampler/select/location.py +16 -51
ocf_data_sampler/select/select_spatial_slice.py +105 -89
ocf_data_sampler/select/select_time_slice.py +71 -58
ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
ocf_data_sampler/utils.py +3 -1
{ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
{ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
{ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
scripts/refactor_site.py +62 -33
utils/compute_icon_mean_stddev.py +72 -0
ocf_data_sampler-0.1.10.dist-info/LICENSE +0 -21
ocf_data_sampler-0.1.10.dist-info/RECORD +0 -82
tests/__init__.py +0 -0
tests/config/test_config.py +0 -113
tests/config/test_load.py +0 -7
tests/config/test_save.py +0 -28
tests/conftest.py +0 -286
tests/load/test_load_gsp.py +0 -15
tests/load/test_load_nwp.py +0 -21
tests/load/test_load_satellite.py +0 -17
tests/load/test_load_sites.py +0 -14
tests/numpy_sample/test_collate.py +0 -21
tests/numpy_sample/test_datetime_features.py +0 -37
tests/numpy_sample/test_gsp.py +0 -38
tests/numpy_sample/test_nwp.py +0 -52
tests/numpy_sample/test_satellite.py +0 -40
tests/numpy_sample/test_sun_position.py +0 -81
tests/select/test_dropout.py +0 -75
tests/select/test_fill_time_periods.py +0 -28
tests/select/test_find_contiguous_time_periods.py +0 -202
tests/select/test_location.py +0 -67
tests/select/test_select_spatial_slice.py +0 -154
tests/select/test_select_time_slice.py +0 -275
tests/test_sample/test_base.py +0 -164
tests/test_sample/test_site_sample.py +0 -195
tests/test_sample/test_uk_regional_sample.py +0 -163
tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
tests/torch_datasets/test_pvnet_uk.py +0 -167
tests/torch_datasets/test_site.py +0 -226
tests/torch_datasets/test_validate_channels_utils.py +0 -78

ocf_data_sampler/numpy_sample/site.py CHANGED Viewed

@@ -1,33 +1,34 @@
-"""Convert site to Numpy Sample"""
+"""Convert site to Numpy Sample."""
 import xarray as xr
 class SiteSampleKey:
+    """Keys for the site sample dictionary."""
     generation = "site"
     capacity_kwp = "site_capacity_kwp"
     time_utc = "site_time_utc"
     t0_idx = "site_t0_idx"
     id = "site_id"
-    solar_azimuth = "site_solar_azimuth"
-    solar_elevation = "site_solar_elevation"
     date_sin = "site_date_sin"
     date_cos = "site_date_cos"
     time_sin = "site_time_sin"
     time_cos = "site_time_cos"
 def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
-    """Convert from Xarray to NumpySample"""
+    """Convert from Xarray to NumpySample.
-    # Extract values from the DataArray
+    Args:
+        da: xarray DataArray containing site data
+        t0_idx: Index of the t0 timestamp in the time dimension of the site data
+    """
     sample = {
         SiteSampleKey.generation: da.values,
         SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
         SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
         SiteSampleKey.id: da["site_id"].values,
-        SiteSampleKey.solar_azimuth: da["solar_azimuth"].values,
-        SiteSampleKey.solar_elevation: da["solar_elevation"].values,
         SiteSampleKey.date_sin: da["date_sin"].values,
         SiteSampleKey.date_cos: da["date_cos"].values,
         SiteSampleKey.time_sin: da["time_sin"].values,

ocf_data_sampler/numpy_sample/sun_position.py CHANGED Viewed

@@ -1,16 +1,17 @@
+"""Module for calculating solar position."""
-import pvlib
 import numpy as np
 import pandas as pd
+import pvlib
 def calculate_azimuth_and_elevation(
-    datetimes: pd.DatetimeIndex,
-    lon: float,
-    lat: float
+    datetimes: pd.DatetimeIndex,
+    lon: float,
+    lat: float,
 ) -> tuple[np.ndarray, np.ndarray]:
-    """Calculate the solar coordinates for multiple datetimes at a single location
+    """Calculate the solar coordinates for multiple datetimes at a single location.
     Args:
         datetimes: The datetimes to calculate for
         lon: The longitude
@@ -20,46 +21,39 @@ def calculate_azimuth_and_elevation(
         np.ndarray: The azimuth of the datetimes in degrees
         np.ndarray: The elevation of the datetimes in degrees
     """
     solpos = pvlib.solarposition.get_solarposition(
         time=datetimes,
         longitude=lon,
         latitude=lat,
-        method='nrel_numpy'
+        method="nrel_numpy",
     )
-    azimuth = solpos["azimuth"].values
-    elevation = solpos["elevation"].values
-    return azimuth, elevation
+    return solpos["azimuth"].values, solpos["elevation"].values
 def make_sun_position_numpy_sample(
-        datetimes: pd.DatetimeIndex,
-        lon: float,
-        lat: float,
-        key_prefix: str = "gsp"
+    datetimes: pd.DatetimeIndex,
+    lon: float,
+    lat: float,
 ) -> dict:
-    """Creates NumpySample with standardized solar coordinates
+    """Creates NumpySample with standardized solar coordinates.
     Args:
         datetimes: The datetimes to calculate solar angles for
         lon: The longitude
         lat: The latitude
     """
     azimuth, elevation = calculate_azimuth_and_elevation(datetimes, lon, lat)
     # Normalise
     # Azimuth is in range [0, 360] degrees
     azimuth = azimuth / 360
-    # Elevation is in range [-90, 90] degrees
+    # Elevation is in range [-90, 90] degrees
     elevation = elevation / 180 + 0.5
     # Make NumpySample
-    sun_numpy_sample = {
-        key_prefix + "_solar_azimuth": azimuth,
-        key_prefix + "_solar_elevation": elevation,
+    return {
+        "solar_azimuth": azimuth,
+        "solar_elevation": elevation,
     }
-    return sun_numpy_sample

ocf_data_sampler/sample/__init__.py CHANGED Viewed

@@ -1,10 +1,3 @@
 from ocf_data_sampler.sample.base import SampleBase
 from ocf_data_sampler.sample.uk_regional import UKRegionalSample
 from ocf_data_sampler.sample.site import SiteSample
-__all__ = [
-    'SampleBase',
-    'UKRegionalSample',
-    'SiteSample'
-    ]

ocf_data_sampler/sample/base.py CHANGED Viewed

@@ -1,69 +1,49 @@
-"""
-Base class definition - abstract
-Handling of both flat and nested structures - consideration for NWP
-"""
+"""Base class for handling flat/nested data structures with NWP consideration."""
-import logging
-import numpy as np
-import torch
-import xarray as xr
-from pathlib import Path
-from typing import Any, Dict, Optional, Union, TypeAlias
 from abc import ABC, abstractmethod
+from typing import TypeAlias
+import numpy as np
+import torch
-logger = logging.getLogger(__name__)
-NumpySample: TypeAlias = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
-NumpyBatch: TypeAlias = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
-TensorBatch: TypeAlias = Dict[str, Union[torch.Tensor, Dict[str, torch.Tensor]]]
+NumpySample: TypeAlias = dict[str, np.ndarray | dict[str, np.ndarray]]
+NumpyBatch: TypeAlias = dict[str, np.ndarray | dict[str, np.ndarray]]
+TensorBatch: TypeAlias = dict[str, torch.Tensor | dict[str, torch.Tensor]]
 class SampleBase(ABC):
-    """
-    Abstract base class for all sample types
-    Provides core data storage functionality
-    """
-    def __init__(self, data: Optional[Union[NumpySample, xr.Dataset]] = None):
-        """ Initialise data container """
-        logger.debug("Initialising SampleBase instance")
-        self._data = data
+    """Abstract base class for all sample types."""
     @abstractmethod
     def to_numpy(self) -> NumpySample:
-        """ Convert data to a numpy array representation """
+        """Convert sample data to numpy format."""
         raise NotImplementedError
     @abstractmethod
-    def plot(self, **kwargs) -> None:
-        """ Abstract method for plotting """
+    def plot(self) -> None:
+        """Create a visualisation of the data."""
         raise NotImplementedError
     @abstractmethod
-    def save(self, path: Union[str, Path]) -> None:
-        """ Abstract method for saving sample data """
+    def save(self, path: str) -> None:
+        """Saves the sample to disk in the implementations' required format."""
         raise NotImplementedError
     @classmethod
     @abstractmethod
-    def load(cls, path: Union[str, Path]) -> 'SampleBase':
-        """ Abstract class method for loading sample data """
+    def load(cls, path: str) -> "SampleBase":
+        """Load a sample from disk from the implementations' format."""
         raise NotImplementedError
 def batch_to_tensor(batch: NumpyBatch) -> TensorBatch:
-    """
-    Moves ndarrays in a nested dict to torch tensors
+    """Recursively converts numpy arrays in nested dict to torch tensors.
     Args:
         batch: NumpyBatch with data in numpy arrays
     Returns:
         TensorBatch with data in torch tensors
     """
-    if not batch:
-        raise ValueError("Cannot convert empty batch to tensors")
     for k, v in batch.items():
         if isinstance(v, dict):
             batch[k] = batch_to_tensor(v)
@@ -75,22 +55,21 @@ def batch_to_tensor(batch: NumpyBatch) -> TensorBatch:
     return batch
-def copy_batch_to_device(batch: dict, device: torch.device) -> dict:
-    """
-    Moves tensor leaves in a nested dict to a new device.
+def copy_batch_to_device(batch: TensorBatch, device: torch.device) -> TensorBatch:
+    """Recursively copies tensors in nested dict to specified device.
     Args:
-        batch: Nested dict with tensors to move.
-        device: Device to move tensors to.
+        batch: Nested dict with tensors to move
+        device: Device to move tensors to
     Returns:
-        A dict with tensors moved to the new device.
+        A dict with tensors moved to the new device
     """
     batch_copy = {}
     for k, v in batch.items():
         if isinstance(v, dict):
-            batch_copy[k] = copy_batch_to_device(v, device)
+            batch_copy[k] = copy_batch_to_device(v, device)
         elif isinstance(v, torch.Tensor):
             batch_copy[k] = v.to(device)
         else:

ocf_data_sampler/sample/site.py CHANGED Viewed

@@ -1,81 +1,37 @@
-"""
-PVNet - Site sample / dataset implementation
-"""
+"""PVNet Site sample implementation for netCDF data handling and conversion."""
-import logging
 import xarray as xr
-import numpy as np
+from typing_extensions import override
-from pathlib import Path
-from typing import Dict, Any, Union
-from ocf_data_sampler.sample.base import SampleBase
+from ocf_data_sampler.sample.base import NumpySample, SampleBase
 from ocf_data_sampler.torch_datasets.datasets.site import convert_netcdf_to_numpy_sample
-logger = logging.getLogger(__name__)
 class SiteSample(SampleBase):
-    """ Sample class specific to Site PVNet """
+    """Handles PVNet site specific netCDF operations."""
-    def __init__(self):
-        logger.debug("Initialise SiteSample instance")
-        super().__init__()
-        self._data = {}
+    def __init__(self, data: xr.Dataset) -> None:
+        """Initializes the SiteSample object with the given xarray Dataset."""
+        if not isinstance(data, xr.Dataset):
+            raise TypeError(f"Data must be xarray Dataset - Found type {type(data)}")
+        self._data = data
-    def to_numpy(self) -> Dict[str, Any]:
-        """ Convert sample numpy arrays - netCDF conversion """
-        logger.debug("Converting site sample to numpy format")
-        try:
-            if not isinstance(self._data, xr.Dataset):
-                raise TypeError("Data must be xarray Dataset")
-            numpy_data = convert_netcdf_to_numpy_sample(self._data)
+    @override
+    def to_numpy(self) -> NumpySample:
+        return convert_netcdf_to_numpy_sample(self._data)
-            logger.debug("Successfully converted to numpy format")
-            return numpy_data
-        except Exception as e:
-            logger.error(f"Error converting to numpy: {str(e)}")
-            raise
-    def save(self, path: Union[str, Path]) -> None:
-        """ Save site sample as netCDF - h5netcdf engine """
-        logger.debug(f"Saving SiteSample to {path}")
-        path = Path(path)
-        if path.suffix != '.nc':
-            logger.error(f"Invalid file format - {path.suffix}")
-            raise ValueError("Only .nc format is supported")
-        if not isinstance(self._data, xr.Dataset):
-            raise TypeError("Data must be xarray Dataset for saving")
-        self._data.to_netcdf(
-            path,
-            mode="w",
-            engine="h5netcdf"
-        )
-        logger.debug(f"Successfully saved SiteSample - {path}")
+    @override
+    def save(self, path: str) -> None:
+        # Saves as NetCDF
+        self._data.to_netcdf(path, mode="w", engine="h5netcdf")
     @classmethod
-    def load(cls, path: str) -> None:
-        """ Load site sample from netCDF """
-        logger.debug(f"Loading SiteSample from {path}")
-        path = Path(path)
-        if path.suffix != '.nc':
-            logger.error(f"Invalid file format - {path.suffix}")
-            raise ValueError("Only .nc format is supported")
-        instance = cls()
-        instance._data = xr.open_dataset(path)
-        logger.debug(f"Loaded SiteSample from {path}")
-        return instance
-    # TO DO - placeholder for now
-    def plot(self, **kwargs) -> None:
-        """ Plot sample data - placeholder """
-        pass
+    @override
+    def load(cls, path: str) -> "SiteSample":
+        # Loads from NetCDF
+        return cls(xr.open_dataset(path))
+    @override
+    def plot(self) -> None:
+        # TODO - placeholder for now
+        raise NotImplementedError("Plotting not yet implemented for SiteSample")

ocf_data_sampler/sample/uk_regional.py CHANGED Viewed

@@ -1,120 +1,69 @@
-"""
-PVNet - UK Regional sample / dataset implementation
-"""
+"""PVNet UK Regional sample implementation for dataset handling and visualisation."""
-import numpy as np
-import pandas as pd
 import torch
-import logging
-from typing import Dict, Any, Union, List, Optional
-from pathlib import Path
+from typing_extensions import override
 from ocf_data_sampler.numpy_sample import (
-    NWPSampleKey,
     GSPSampleKey,
-    SatelliteSampleKey
+    NWPSampleKey,
+    SatelliteSampleKey,
 )
-from ocf_data_sampler.sample.base import SampleBase
-try:
-    import matplotlib.pyplot as plt
-    MATPLOTLIB_AVAILABLE = True
-except ImportError:
-    MATPLOTLIB_AVAILABLE = False
-    plt = None
-logger = logging.getLogger(__name__)
+from ocf_data_sampler.sample.base import NumpySample, SampleBase
 class UKRegionalSample(SampleBase):
-    """ Sample class specific to UK Regional PVNet """
+    """Handles UK Regional PVNet data operations."""
-    def __init__(self):
-        logger.debug("Initialise UKRegionalSample instance")
-        super().__init__()
-        self._data = {}
+    def __init__(self, data: NumpySample) -> None:
+        """Initialises UK Regional sample with data."""
+        self._data = data
-    def to_numpy(self) -> Dict[str, Any]:
-        """ Convert sample data to numpy format """
-        logger.debug("Converting sample data to numpy format")
+    @override
+    def to_numpy(self) -> NumpySample:
         return self._data
-    def save(self, path: Union[str, Path]) -> None:
-        """ Save PVNet sample as .pt """
-        logger.debug(f"Saving UKRegionalSample to {path}")
-        path = Path(path)
-        if path.suffix != '.pt':
-            logger.error(f"Invalid file format: {path.suffix}")
-            raise ValueError(f"Only .pt format is supported: {path.suffix}")
+    @override
+    def save(self, path: str) -> None:
+        # Saves to pickle format
         torch.save(self._data, path)
-        logger.debug(f"Successfully saved UKRegionalSample to {path}")
     @classmethod
-    def load(cls, path: Union[str, Path]) -> 'UKRegionalSample':
-        """ Load PVNet sample data from .pt """
-        logger.debug(f"Attempting to load UKRegionalSample from {path}")
-        path = Path(path)
-        if path.suffix != '.pt':
-            logger.error(f"Invalid file format: {path.suffix}")
-            raise ValueError(f"Only .pt format is supported: {path.suffix}")
-        instance = cls()
+    @override
+    def load(cls, path: str) -> "UKRegionalSample":
+        # Loads from .pt format
         # TODO: We should move away from using torch.load(..., weights_only=False)
-        # This is not recommended
-        instance._data = torch.load(path, weights_only=False)
-        logger.debug(f"Successfully loaded UKRegionalSample from {path}")
-        return instance
-    def plot(self, **kwargs) -> None:
-        """ Sample visualisation definition """
-        logger.debug("Creating UKRegionalSample visualisation")
-        if not MATPLOTLIB_AVAILABLE:
-            raise ImportError(
-                "Matplotlib required for plotting"
-                "Install via 'ocf_data_sampler[plot]'"
-            )
-        try:
-            fig, axes = plt.subplots(2, 2, figsize=(12, 8))
-            if NWPSampleKey.nwp in self._data:
-                logger.debug("Plotting NWP data")
-                first_nwp = list(self._data[NWPSampleKey.nwp].values())[0]
-                if 'nwp' in first_nwp:
-                    axes[0, 1].imshow(first_nwp['nwp'][0])
-                    axes[0, 1].set_title('NWP (First Channel)')
-                    if NWPSampleKey.channel_names in first_nwp:
-                        channel_names = first_nwp[NWPSampleKey.channel_names]
-                        if len(channel_names) > 0:
-                            axes[0, 1].set_title(f'NWP: {channel_names[0]}')
-            if GSPSampleKey.gsp in self._data:
-                logger.debug("Plotting GSP generation data")
-                axes[0, 0].plot(self._data[GSPSampleKey.gsp])
-                axes[0, 0].set_title('GSP Generation')
-            if GSPSampleKey.solar_azimuth in self._data and GSPSampleKey.solar_elevation in self._data:
-                logger.debug("Plotting solar position data")
-                axes[1, 1].plot(self._data[GSPSampleKey.solar_azimuth], label='Azimuth')
-                axes[1, 1].plot(self._data[GSPSampleKey.solar_elevation], label='Elevation')
-                axes[1, 1].set_title('Solar Position')
-                axes[1, 1].legend()
-            if SatelliteSampleKey.satellite_actual in self._data:
-                logger.debug("Plotting satellite data")
-                axes[1, 0].imshow(self._data[SatelliteSampleKey.satellite_actual])
-                axes[1, 0].set_title('Satellite Data')
-            plt.tight_layout()
-            plt.show()
-            logger.debug("Successfully created visualisation")
-        except Exception as e:
-            logger.error(f"Error creating visualisation: {str(e)}")
-            raise
+        return cls(torch.load(path, weights_only=False))
+    @override
+    def plot(self) -> None:
+        from matplotlib import pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(12, 8))
+        if NWPSampleKey.nwp in self._data:
+            first_nwp = next(iter(self._data[NWPSampleKey.nwp].values()))
+            if "nwp" in first_nwp:
+                axes[0, 1].imshow(first_nwp["nwp"][0])
+                title = "NWP (First Channel)"
+                if NWPSampleKey.channel_names in first_nwp:
+                    channel_names = first_nwp[NWPSampleKey.channel_names]
+                    if channel_names:
+                        title = f"NWP: {channel_names[0]}"
+                axes[0, 1].set_title(title)
+        if GSPSampleKey.gsp in self._data:
+            axes[0, 0].plot(self._data[GSPSampleKey.gsp])
+            axes[0, 0].set_title("GSP Generation")
+        if "solar_azimuth" in self._data and "solar_elevation" in self._data:
+                    axes[1, 1].plot(self._data["solar_azimuth"], label="Azimuth")
+                    axes[1, 1].plot(self._data["solar_elevation"], label="Elevation")
+                    axes[1, 1].set_title("Solar Position")
+                    axes[1, 1].legend()
+        if SatelliteSampleKey.satellite_actual in self._data:
+            axes[1, 0].imshow(self._data[SatelliteSampleKey.satellite_actual])
+            axes[1, 0].set_title("Satellite Data")
+        plt.tight_layout()
+        plt.show()

ocf_data_sampler/select/dropout.py CHANGED Viewed

@@ -1,39 +1,54 @@
-""" Functions for simulating dropout in time series data """
+"""Functions for simulating dropout in time series data.
+This is used for the following types of data: GSP, Satellite and Site
+This is not used for NWP
+"""
 import numpy as np
 import pandas as pd
 import xarray as xr
 def draw_dropout_time(
-        t0: pd.Timestamp,
-        dropout_timedeltas: list[pd.Timedelta] | pd.Timedelta | None,
-        dropout_frac: float = 0,
-    ):
-    if dropout_timedeltas is not None:
-        assert len(dropout_timedeltas) >= 1, "Must include list of relative dropout timedeltas"
-        assert all(
-            [t <= pd.Timedelta("0min") for t in dropout_timedeltas]
-        ), "dropout timedeltas must be negative"
-    assert 0 <= dropout_frac <= 1
-    if (dropout_timedeltas is None) or (np.random.uniform() >= dropout_frac):
-        dropout_time = None
+    t0: pd.Timestamp,
+    dropout_timedeltas: list[pd.Timedelta],
+    dropout_frac: float,
+) -> pd.Timestamp:
+    """Randomly pick a dropout time from a list of timedeltas.
+    Args:
+        t0: The forecast init-time
+        dropout_timedeltas: List of timedeltas relative to t0 to pick from
+        dropout_frac: Probability that dropout will be applied.
+            This should be between 0 and 1 inclusive
+    """
+    if dropout_frac > 0 and len(dropout_timedeltas) == 0:
+        raise ValueError("To apply dropout, dropout_timedeltas must be provided")
+    for t in dropout_timedeltas:
+        if t > pd.Timedelta("0min"):
+            raise ValueError("Dropout timedeltas must be negative")
+    if not (0 <= dropout_frac <= 1):
+        raise ValueError("dropout_frac must be between 0 and 1 inclusive")
+    if (len(dropout_timedeltas) == 0) or (np.random.uniform() >= dropout_frac):
+        dropout_time = t0
     else:
-        t0_datetime_utc = pd.Timestamp(t0)
-        dt = np.random.choice(dropout_timedeltas)
-        dropout_time = t0_datetime_utc + dt
+        dropout_time = t0 + np.random.choice(dropout_timedeltas)
     return dropout_time
 def apply_dropout_time(
-        ds: xr.DataArray,
-        dropout_time: pd.Timestamp | None,
-    ):
-    if dropout_time is None:
-        return ds
-    else:
-        # This replaces the times after the dropout with NaNs
-        return ds.where(ds.time_utc <= dropout_time)
+    ds: xr.DataArray,
+    dropout_time: pd.Timestamp,
+) -> xr.DataArray:
+    """Apply dropout time to the data.
+    Args:
+        ds: Xarray DataArray with 'time_utc' coordinate
+        dropout_time: Time after which data is set to NaN
+    """
+    # This replaces the times after the dropout with NaNs
+    return ds.where(ds.time_utc <= dropout_time)

ocf_data_sampler/select/fill_time_periods.py CHANGED Viewed

@@ -1,11 +1,23 @@
-"""fill time periods"""
+"""Fill time periods between specified start and end dates."""
-import pandas as pd
 import numpy as np
+import pandas as pd
 def fill_time_periods(time_periods: pd.DataFrame, freq: pd.Timedelta) -> pd.DatetimeIndex:
+    """Create range of timestamps between given start and end times.
+    Each of the continuous periods (i.e. each row of the input DataFrame) is filled with the
+    specified frequency.
+    Args:
+        time_periods: DataFrame with columns 'start_dt' and 'end_dt'
+        freq: Frequency to fill time periods with
+    """
     start_dts = pd.to_datetime(time_periods["start_dt"].values).ceil(freq)
     end_dts = pd.to_datetime(time_periods["end_dt"].values)
-    date_ranges = [pd.date_range(start_dt, end_dt, freq=freq) for start_dt, end_dt in zip(start_dts, end_dts)]
+    date_ranges = [
+        pd.date_range(start_dt, end_dt, freq=freq)
+        for start_dt, end_dt in zip(start_dts, end_dts, strict=False)
+        ]
     return pd.DatetimeIndex(np.concatenate(date_ranges))

ocf-data-sampler 0.1.10__py3-none-any.whl → 0.1.16__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.1.10py3-none-any.whl → 0.1.16py3-none-any.whl