PyPI - ocf-data-sampler - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

ocf-data-sampler 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (29) hide show

ocf_data_sampler/config/model.py +25 -23
ocf_data_sampler/load/satellite.py +21 -29
ocf_data_sampler/load/site.py +1 -1
ocf_data_sampler/numpy_sample/gsp.py +6 -2
ocf_data_sampler/numpy_sample/nwp.py +7 -13
ocf_data_sampler/numpy_sample/satellite.py +11 -8
ocf_data_sampler/numpy_sample/site.py +6 -2
ocf_data_sampler/numpy_sample/sun_position.py +9 -10
ocf_data_sampler/sample/__init__.py +0 -7
ocf_data_sampler/sample/base.py +16 -35
ocf_data_sampler/sample/site.py +28 -65
ocf_data_sampler/sample/uk_regional.py +52 -97
ocf_data_sampler/select/dropout.py +38 -25
ocf_data_sampler/select/fill_time_periods.py +3 -1
ocf_data_sampler/select/find_contiguous_time_periods.py +0 -1
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +2 -3
ocf_data_sampler/torch_datasets/datasets/site.py +9 -5
{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/METADATA +1 -1
{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/RECORD +29 -29
tests/config/test_config.py +3 -3
tests/conftest.py +33 -0
tests/numpy_sample/test_nwp.py +3 -42
tests/select/test_dropout.py +7 -13
tests/test_sample/test_site_sample.py +5 -35
tests/test_sample/test_uk_regional_sample.py +8 -35
tests/torch_datasets/test_pvnet_uk.py +6 -19
{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/LICENSE +0 -0
{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/WHEEL +0 -0
{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/top_level.txt +0 -0

ocf_data_sampler/sample/uk_regional.py CHANGED Viewed

@@ -1,120 +1,75 @@
-"""
-PVNet - UK Regional sample / dataset implementation
-"""
+"""PVNet UK Regional sample implementation for dataset handling and visualisation"""
-import numpy as np
-import pandas as pd
-import torch
-import logging
+from typing_extensions import override
-from typing import Dict, Any, Union, List, Optional
-from pathlib import Path
+import torch
+from ocf_data_sampler.sample.base import SampleBase, NumpySample
 from ocf_data_sampler.numpy_sample import (
     NWPSampleKey,
     GSPSampleKey,
     SatelliteSampleKey
 )
-from ocf_data_sampler.sample.base import SampleBase
-try:
-    import matplotlib.pyplot as plt
-    MATPLOTLIB_AVAILABLE = True
-except ImportError:
-    MATPLOTLIB_AVAILABLE = False
-    plt = None
-logger = logging.getLogger(__name__)
 class UKRegionalSample(SampleBase):
-    """ Sample class specific to UK Regional PVNet """
+    """Handles UK Regional PVNet data operations"""
-    def __init__(self):
-        logger.debug("Initialise UKRegionalSample instance")
-        super().__init__()
-        self._data = {}
+    def __init__(self, data: NumpySample):
+        self._data = data
-    def to_numpy(self) -> Dict[str, Any]:
-        """ Convert sample data to numpy format """
-        logger.debug("Converting sample data to numpy format")
+    @override
+    def to_numpy(self) -> NumpySample:
         return self._data
-    def save(self, path: Union[str, Path]) -> None:
-        """ Save PVNet sample as .pt """
-        logger.debug(f"Saving UKRegionalSample to {path}")
-        path = Path(path)
-        if path.suffix != '.pt':
-            logger.error(f"Invalid file format: {path.suffix}")
-            raise ValueError(f"Only .pt format is supported: {path.suffix}")
+    def save(self, path: str) -> None:
+        """Save PVNet sample as pickle format using torch.save
+        Args:
+            path: Path to save the sample data to
+        """
         torch.save(self._data, path)
-        logger.debug(f"Successfully saved UKRegionalSample to {path}")
     @classmethod
-    def load(cls, path: Union[str, Path]) -> 'UKRegionalSample':
-        """ Load PVNet sample data from .pt """
-        logger.debug(f"Attempting to load UKRegionalSample from {path}")
-        path = Path(path)
+    def load(cls, path: str) -> 'UKRegionalSample':
+        """Load PVNet sample data from .pt format
-        if path.suffix != '.pt':
-            logger.error(f"Invalid file format: {path.suffix}")
-            raise ValueError(f"Only .pt format is supported: {path.suffix}")
-        instance = cls()
+        Args:
+            path: Path to load the sample data from
+        """
         # TODO: We should move away from using torch.load(..., weights_only=False)
-        # This is not recommended
-        instance._data = torch.load(path, weights_only=False)
-        logger.debug(f"Successfully loaded UKRegionalSample from {path}")
-        return instance
-    def plot(self, **kwargs) -> None:
-        """ Sample visualisation definition """
-        logger.debug("Creating UKRegionalSample visualisation")
-        if not MATPLOTLIB_AVAILABLE:
-            raise ImportError(
-                "Matplotlib required for plotting"
-                "Install via 'ocf_data_sampler[plot]'"
-            )
-        try:
-            fig, axes = plt.subplots(2, 2, figsize=(12, 8))
-            if NWPSampleKey.nwp in self._data:
-                logger.debug("Plotting NWP data")
-                first_nwp = list(self._data[NWPSampleKey.nwp].values())[0]
-                if 'nwp' in first_nwp:
-                    axes[0, 1].imshow(first_nwp['nwp'][0])
-                    axes[0, 1].set_title('NWP (First Channel)')
-                    if NWPSampleKey.channel_names in first_nwp:
-                        channel_names = first_nwp[NWPSampleKey.channel_names]
-                        if len(channel_names) > 0:
-                            axes[0, 1].set_title(f'NWP: {channel_names[0]}')
+        return cls(torch.load(path, weights_only=False))
-            if GSPSampleKey.gsp in self._data:
-                logger.debug("Plotting GSP generation data")
-                axes[0, 0].plot(self._data[GSPSampleKey.gsp])
-                axes[0, 0].set_title('GSP Generation')
-            if GSPSampleKey.solar_azimuth in self._data and GSPSampleKey.solar_elevation in self._data:
-                logger.debug("Plotting solar position data")
-                axes[1, 1].plot(self._data[GSPSampleKey.solar_azimuth], label='Azimuth')
-                axes[1, 1].plot(self._data[GSPSampleKey.solar_elevation], label='Elevation')
-                axes[1, 1].set_title('Solar Position')
-                axes[1, 1].legend()
+    def plot(self) -> None:
+        """Creates visualisations for NWP, GSP, solar position, and satellite data"""
+        from matplotlib import pyplot as plt
-            if SatelliteSampleKey.satellite_actual in self._data:
-                logger.debug("Plotting satellite data")
-                axes[1, 0].imshow(self._data[SatelliteSampleKey.satellite_actual])
-                axes[1, 0].set_title('Satellite Data')
-            plt.tight_layout()
-            plt.show()
-            logger.debug("Successfully created visualisation")
-        except Exception as e:
-            logger.error(f"Error creating visualisation: {str(e)}")
-            raise
+        fig, axes = plt.subplots(2, 2, figsize=(12, 8))
+        if NWPSampleKey.nwp in self._data:
+            first_nwp = list(self._data[NWPSampleKey.nwp].values())[0]
+            if 'nwp' in first_nwp:
+                axes[0, 1].imshow(first_nwp['nwp'][0])
+                title = 'NWP (First Channel)'
+                if NWPSampleKey.channel_names in first_nwp:
+                    channel_names = first_nwp[NWPSampleKey.channel_names]
+                    if channel_names:
+                        title = f'NWP: {channel_names[0]}'
+                axes[0, 1].set_title(title)
+        if GSPSampleKey.gsp in self._data:
+            axes[0, 0].plot(self._data[GSPSampleKey.gsp])
+            axes[0, 0].set_title('GSP Generation')
+        if GSPSampleKey.solar_azimuth in self._data and GSPSampleKey.solar_elevation in self._data:
+            axes[1, 1].plot(self._data[GSPSampleKey.solar_azimuth], label='Azimuth')
+            axes[1, 1].plot(self._data[GSPSampleKey.solar_elevation], label='Elevation')
+            axes[1, 1].set_title('Solar Position')
+            axes[1, 1].legend()
+        if SatelliteSampleKey.satellite_actual in self._data:
+            axes[1, 0].imshow(self._data[SatelliteSampleKey.satellite_actual])
+            axes[1, 0].set_title('Satellite Data')
+        plt.tight_layout()
+        plt.show()

ocf_data_sampler/select/dropout.py CHANGED Viewed

@@ -1,39 +1,52 @@
-""" Functions for simulating dropout in time series data """
+"""Functions for simulating dropout in time series data
+This is used for the following types of data: GSP, Satellite and Site
+This is not used for NWP
+"""
 import numpy as np
 import pandas as pd
 import xarray as xr
 def draw_dropout_time(
-        t0: pd.Timestamp,
-        dropout_timedeltas: list[pd.Timedelta] | pd.Timedelta | None,
-        dropout_frac: float = 0,
-    ):
-    if dropout_timedeltas is not None:
-        assert len(dropout_timedeltas) >= 1, "Must include list of relative dropout timedeltas"
-        assert all(
-            [t <= pd.Timedelta("0min") for t in dropout_timedeltas]
-        ), "dropout timedeltas must be negative"
+    t0: pd.Timestamp,
+    dropout_timedeltas: list[pd.Timedelta],
+    dropout_frac: float,
+) -> pd.Timestamp:
+    """Randomly pick a dropout time from a list of timedeltas
+    Args:
+        t0: The forecast init-time
+        dropout_timedeltas: List of timedeltas relative to t0 to pick from
+        dropout_frac: Probability that dropout will be applied. This should be between 0 and 1
+            inclusive
+    """
+    if dropout_frac>0:
+        assert len(dropout_timedeltas) > 0, "To apply dropout dropout_timedeltas must be provided"
+    for t in dropout_timedeltas:
+        assert t <= pd.Timedelta("0min"), "Dropout timedeltas must be negative"
     assert 0 <= dropout_frac <= 1
-    if (dropout_timedeltas is None) or (np.random.uniform() >= dropout_frac):
-        dropout_time = None
+    if (len(dropout_timedeltas) == 0) or (np.random.uniform() >= dropout_frac):
+        dropout_time = t0
     else:
-        t0_datetime_utc = pd.Timestamp(t0)
-        dt = np.random.choice(dropout_timedeltas)
-        dropout_time = t0_datetime_utc + dt
+        dropout_time = t0 + np.random.choice(dropout_timedeltas)
     return dropout_time
 def apply_dropout_time(
-        ds: xr.DataArray,
-        dropout_time: pd.Timestamp | None,
-    ):
-    if dropout_time is None:
-        return ds
-    else:
-        # This replaces the times after the dropout with NaNs
-        return ds.where(ds.time_utc <= dropout_time)
+    ds: xr.DataArray,
+    dropout_time: pd.Timestamp,
+ ) -> xr.DataArray:
+    """Apply dropout time to the data
+    Args:
+        ds: Xarray DataArray with 'time_utc' coordiante
+        dropout_time: Time after which data is set to NaN
+    """
+    # This replaces the times after the dropout with NaNs
+    return ds.where(ds.time_utc <= dropout_time)

ocf_data_sampler/select/fill_time_periods.py CHANGED Viewed

@@ -1,10 +1,12 @@
-"""fill time periods"""
+"""Fill time periods between start and end dates at specified frequency"""
 import pandas as pd
 import numpy as np
 def fill_time_periods(time_periods: pd.DataFrame, freq: pd.Timedelta) -> pd.DatetimeIndex:
+    """Generate DatetimeIndex for all timestamps between start and end dates"""
     start_dts = pd.to_datetime(time_periods["start_dt"].values).ceil(freq)
     end_dts = pd.to_datetime(time_periods["end_dt"].values)
     date_ranges = [pd.date_range(start_dt, end_dt, freq=freq) for start_dt, end_dt in zip(start_dts, end_dts)]

ocf_data_sampler/select/find_contiguous_time_periods.py CHANGED Viewed

@@ -5,7 +5,6 @@ import pandas as pd
 from ocf_data_sampler.load.utils import check_time_unique_increasing
 def find_contiguous_time_periods(
     datetimes: pd.DatetimeIndex,
     min_seq_length: int,

ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py CHANGED Viewed

@@ -186,9 +186,8 @@ class PVNetUKRegionalDataset(Dataset):
             gsp_ids: List of GSP IDs to create samples for. Defaults to all
         """
-        config = load_yaml_configuration(config_filename)
-        # Validate channels for NWP and satellite data
+        # config = load_yaml_configuration(config_filename)
+        config: Configuration = load_yaml_configuration(config_filename)
         validate_nwp_channels(config)
         validate_satellite_channels(config)

ocf_data_sampler/torch_datasets/datasets/site.py CHANGED Viewed

@@ -20,7 +20,6 @@ from ocf_data_sampler.select import (
 from ocf_data_sampler.utils import minutes
 from ocf_data_sampler.torch_datasets.utils.valid_time_periods import find_valid_time_periods
 from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import merge_dicts, fill_nans_in_arrays
-from ocf_data_sampler.torch_datasets.utils.validate_channels import validate_nwp_channels
 from ocf_data_sampler.numpy_sample import (
     convert_site_to_numpy_sample,
@@ -30,8 +29,12 @@ from ocf_data_sampler.numpy_sample import (
     make_sun_position_numpy_sample,
 )
 from ocf_data_sampler.numpy_sample import NWPSampleKey
-from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS
+from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
+from ocf_data_sampler.torch_datasets.utils.validate_channels import (
+    validate_nwp_channels,
+    validate_satellite_channels,
+)
 xr.set_options(keep_attrs=True)
@@ -52,9 +55,8 @@ class SitesDataset(Dataset):
         """
         config: Configuration = load_yaml_configuration(config_filename)
-        # Validate NWP channels
         validate_nwp_channels(config)
+        validate_satellite_channels(config)
         datasets_dict = get_dataset_dict(config.input_data)
@@ -237,8 +239,10 @@ class SitesDataset(Dataset):
                 data_arrays.append((f"nwp-{provider}", da_nwp))
         if "sat" in dataset_dict:
-            # TODO add some satellite normalisation
             da_sat = dataset_dict["sat"]
+            # Standardise
+            da_sat = (da_sat - RSS_MEAN) / RSS_STD
             data_arrays.append(("satellite", da_sat))
         if "site" in dataset_dict:

{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.1.9
+Version: 0.1.11
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.1.9.dist-info → ocf_data_sampler-0.1.11.dist-info}/RECORD RENAMED Viewed

@@ -3,14 +3,14 @@ ocf_data_sampler/constants.py,sha256=0HYNmqwBaHVTAEEx9qzk6WD9YInh0gSKLeI3pyq7aNs
 ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
 ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
 ocf_data_sampler/config/load.py,sha256=sKCKmhkkeFvvkNL5xmnFvdAulaCtV4-rigPsFvVDPDc,634
-ocf_data_sampler/config/model.py,sha256=IMJhsjL_oGh2c50q8pBnCnArY4qHQcBc_M8jqlEeD0c,7129
+ocf_data_sampler/config/model.py,sha256=8PO-23uVy_JjWOJKgaZWdNMehQsAI-Jn8t0lcmBycwg,6992
 ocf_data_sampler/config/save.py,sha256=OqCPT3e0d7vMI2g2iRzmifPD7GscDkFQztU_qE5I0JY,1066
 ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
 ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
 ocf_data_sampler/load/gsp.py,sha256=uRxEORH7J99JAJ-D38nm0iJFOQh7dkm_NCXcpbYkyvo,857
 ocf_data_sampler/load/load_dataset.py,sha256=PHUGSm4hFHfS9nfIP2KjHHCp325O4br7uGBdQH_DP7g,1603
-ocf_data_sampler/load/satellite.py,sha256=4MRJBFDHxx5WXu_6X71wEBznJTIuldEVnu9d6DVoLPI,2436
-ocf_data_sampler/load/site.py,sha256=74M_7RYwEc1bU4idjs3ZmQrx9I8mJXm6H4lwEL-h9n0,1226
+ocf_data_sampler/load/satellite.py,sha256=SEQZ9oPe-asEeZeEMDkB1xWK5hErhWMagxohFcBl6KI,2294
+ocf_data_sampler/load/site.py,sha256=hMdoF6sn2PcSBfF2soj7nuQoK9SItaxDXco5nk2n-44,1232
 ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
 ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
 ocf_data_sampler/load/nwp/nwp.py,sha256=Jyq1dE7DN0iSe6iSEGA76uu9LoeJz9FzfEUkq6ZZExQ,565
@@ -21,19 +21,19 @@ ocf_data_sampler/load/nwp/providers/utils.py,sha256=MFOZ5ZXLu3-SxYVJExdlo30b3y3s
 ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
 ocf_data_sampler/numpy_sample/collate.py,sha256=oX5axq30sCsSquhNbmWAVMjM54HT1v3MCMopYHcO5Q0,1950
 ocf_data_sampler/numpy_sample/datetime_features.py,sha256=D0RajbnBjg15qjYk16h2H0XO4wH3fw-x0--4VC2nq0s,1204
-ocf_data_sampler/numpy_sample/gsp.py,sha256=5UaWO_aGRRVQo82wnDaT4zBKHihOnIsXiwgPjM8vGFM,1005
-ocf_data_sampler/numpy_sample/nwp.py,sha256=_seQNWsut3IzPsrpipqImjnaM3XNHZCy5_5be6syivk,1297
-ocf_data_sampler/numpy_sample/satellite.py,sha256=8OaTvkPjzSjotcdKsa6BKmmlBKDBunbhDN4Pjo0Grxs,910
-ocf_data_sampler/numpy_sample/site.py,sha256=I-cAXCOF0SDdm5Hx43lFqYZ3jh61kltLQK-fc4_nNu0,1314
-ocf_data_sampler/numpy_sample/sun_position.py,sha256=UklhucCxCT6GMlAhCWL6c4cfWrdc1cWgegrYaqUoHOY,1611
-ocf_data_sampler/sample/__init__.py,sha256=02CM7E5nKkGiYbVW-kvzjNd4RaqGuHCkDChtmDBDUoA,248
-ocf_data_sampler/sample/base.py,sha256=q3wpqoW4JXRmzfar6ed7UMn1nxBxSJXNvMLJmHXy1dw,2856
-ocf_data_sampler/sample/site.py,sha256=0BvDXs0kxTjUq7kWpeoITK_uN4uE0w1IvEFXZUoKOb0,2507
-ocf_data_sampler/sample/uk_regional.py,sha256=D1A6nQB1PYCmxb3FzU9gqbNufQfx__wcprcDm50jCJw,4381
+ocf_data_sampler/numpy_sample/gsp.py,sha256=uBquCFCoWuhJKY8sXpgsTCUDWUuLuv1XeixtFnFw6KU,1115
+ocf_data_sampler/numpy_sample/nwp.py,sha256=Tiba-es23XeyMoEPgZUpLT6EnJCGU9A_1MdY6qkE7bM,1015
+ocf_data_sampler/numpy_sample/satellite.py,sha256=RdXMdGGXysUx-AdL9T33yFOlxprtIdPNBKKX99-mhpY,991
+ocf_data_sampler/numpy_sample/site.py,sha256=TvoEU85fmjYW8pD9UZOyUUACjimdQYxEzulQXunRO6Q,1425
+ocf_data_sampler/numpy_sample/sun_position.py,sha256=ithM--eztAhiIQ1g52tlxgj-tMKbsJzx8mk6CgV2tzk,1613
+ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
+ocf_data_sampler/sample/base.py,sha256=IH3HbfqEUwjHmq-h2eJYLd8Jk-0ZcOylnehMyCPMV38,2223
+ocf_data_sampler/sample/site.py,sha256=ONf2Yz5zi8Ombd_znA4T7NXbO01F76kQsBZv6rfnC74,1343
+ocf_data_sampler/sample/uk_regional.py,sha256=KhJ5Ik1pZRp7PgIJjGIrE4i7SQnIdVjUbBHnfn-7ghg,2649
 ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
-ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
-ocf_data_sampler/select/fill_time_periods.py,sha256=h0XD1Ds_wUUoy-7bILxmN8AIbjlQ6YdXRKuCk_Is5jo,460
-ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=Nvz4gLCbbKzAe3sQXfxgExL9NtZVk1WNORvHs94DQ_k,11130
+ocf_data_sampler/select/dropout.py,sha256=Pgov9P7rQMkSdqluG_hwm8loGyYNFOg-3PJUBLN_kjU,1526
+ocf_data_sampler/select/fill_time_periods.py,sha256=EIcXG-77aQVOAYNwbDBEv6SGf6DO2p1WMEf96iW4MEM,596
+ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=IwPQwvgu4cOiAZ5Gbjflv3fnQCcs0EVK0g4V6yqqSgw,11129
 ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
 ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
@@ -41,15 +41,15 @@ ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56
 ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
 ocf_data_sampler/select/time_slice_for_dataset.py,sha256=Z7pOiilSHScxmBKZNG18K5J-S4ifdXXAYGZoHRHD3AY,4324
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=N85duDyEm6LIYgYIpLhrpxHddMIcvFosuZg8rzIztwE,12267
-ocf_data_sampler/torch_datasets/datasets/site.py,sha256=L_4w967ZxPjd7vHRkPtj7ZSmamEShKRT28j9_f-enJY,16228
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZgfvVCcEU3dj3RoY0zdBdKGppC7Wm81qecqB17gYTmE,12286
+ocf_data_sampler/torch_datasets/datasets/site.py,sha256=_uHmqg-VJu-MHgXc5JFDX1noPfH6E8nY4XhQmsrOav4,16325
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
 ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=u2EpiFAKAOHpmvINhOUJCT8Vbc-cle6qJ3YNVse4yLs,2884
 scripts/refactor_site.py,sha256=xaJGxt2_WObIPrPAnRiOMMB68r-5Q51jWRx409AcscM,1747
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/conftest.py,sha256=RlC7YYtBLipUzFS1tQxela1SgHCxSpReUKEJ4429PwQ,7689
-tests/config/test_config.py,sha256=VQjNiucIk5VnPQdGA6Mr-RNd9CwGI06AiikChTHrcnY,3969
+tests/conftest.py,sha256=k7nM3u2YJmkMupN4SIbJP3BRoxNR1dpIoo2fPFf0abg,8588
+tests/config/test_config.py,sha256=CzYVhAUpgT4lvQdIddtVxtJeMqYL_TJolfeIwaaohq4,3969
 tests/config/test_load.py,sha256=8nui2UsgK_eufWGD74yXvf-6eY_SxBFKhDmGYUtRQxw,260
 tests/config/test_save.py,sha256=BxSd2S50-bRPIXP_4iX0B6Wt7pRFJnUbLYtzfLaqlAs,915
 tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
@@ -59,24 +59,24 @@ tests/load/test_load_sites.py,sha256=6V-U3_EtBklkV7w-hOoR4nba3dSaZ_cnjuRWFs8kYVU
 tests/numpy_sample/test_collate.py,sha256=RqHCD5_LTRpe4r6kqC_2TKhmhM_IHYM0ZtFUvSjDqcM,654
 tests/numpy_sample/test_datetime_features.py,sha256=iR9WdBLj1nIBNqoaTFE9rkUaH1eKFJSNb96nwiEaQH0,1449
 tests/numpy_sample/test_gsp.py,sha256=FLlq4SlJ-9cSRAepf4_ksA6PsUVKegnKEAc5pUojCJ0,1458
-tests/numpy_sample/test_nwp.py,sha256=yf4u7mAU0E3FQ4xAH6YjuHuHBzzFoXjHSFNkOVJUdSM,1455
+tests/numpy_sample/test_nwp.py,sha256=Lnd-PMa6gI-fSIJkSZ554QiHFfnwxeXZxLg-rpuBv1U,442
 tests/numpy_sample/test_satellite.py,sha256=cCqtn5See-uSNfh89COGTUQNuFm6sIZ8QmBVHsuUeRI,1189
 tests/numpy_sample/test_sun_position.py,sha256=_ENYzsNBVPdNXf--FI-UUFqw2u5w7_zqw6LcENU2uZM,2504
-tests/select/test_dropout.py,sha256=kiycl7RxAQYMCZJlokmx6Da5h_oBpSs8Is8pmSW4gOU,2413
+tests/select/test_dropout.py,sha256=aQuSSqZF9RxBjN9-ogkQ8O-_zktAM30CrT1Lz7j1hMg,2222
 tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJxdXZluNQg,907
 tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
 tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
 tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
 tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
 tests/test_sample/test_base.py,sha256=sD9NZghYQWbkAcQP9YXypWZowqYkO3xeNMH-_mEoD5I,4833
-tests/test_sample/test_site_sample.py,sha256=Gln-Or060cUWvA7Q7c1vsthgCttOAM2z9yBI9zUIrDw,6238
-tests/test_sample/test_uk_regional_sample.py,sha256=gkeQWC2wC757jKJz_QBmDMFQjn3R54q_tEo948yyxCY,4840
+tests/test_sample/test_site_sample.py,sha256=8HNenhIWYouCQu4y389PDQGokSPI5jQ4lS4CG-eA1Y8,5382
+tests/test_sample/test_uk_regional_sample.py,sha256=MFibX9-M8mFK7vwMPu58gAG2VoY6y7w7chW5BlZclwk,3962
 tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
-tests/torch_datasets/test_pvnet_uk.py,sha256=F0D-DugFgVtt8G1q7lylmPLrOZj6H6YPNd9s_6Wn_yM,5594
+tests/torch_datasets/test_pvnet_uk.py,sha256=hgD_IDa4D8cgc4cgK1UqKYkT6sFlrTMAvgVn_iwD5_4,5086
 tests/torch_datasets/test_site.py,sha256=t57vAR_RRWcbG_kEFk6VrFCYzVxwFG6qJKBnRHF02fM,7000
 tests/torch_datasets/test_validate_channels_utils.py,sha256=Rzdweu98j1of45jCOUrSiBtyPlf-dDaCceulf0H7ml8,2921
-ocf_data_sampler-0.1.9.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.1.9.dist-info/METADATA,sha256=Lfu8Yrj4CSlqPzGhk0iDy5r5zCLd5REnGAlVcFuKuow,12173
-ocf_data_sampler-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ocf_data_sampler-0.1.9.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.1.9.dist-info/RECORD,,
+ocf_data_sampler-0.1.11.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.1.11.dist-info/METADATA,sha256=d8wctSlRyDbP1_yYHFvIGQgEC8DmOkM8h-ITI4XFuPw,12174
+ocf_data_sampler-0.1.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+ocf_data_sampler-0.1.11.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.1.11.dist-info/RECORD,,

tests/config/test_config.py CHANGED Viewed

@@ -30,7 +30,7 @@ def test_incorrect_interval_start_minutes(test_config_filename):
     configuration.input_data.nwp['ukv'].interval_start_minutes = -1111
     with pytest.raises(
         ValueError,
-        match="interval_start_minutes must be divisible by time_resolution_minutes"
+        match="interval_start_minutes.*must be divisible.*time_resolution_minutes.*"
     ):
         _ = Configuration(**configuration.model_dump())
@@ -45,7 +45,7 @@ def test_incorrect_interval_end_minutes(test_config_filename):
     configuration.input_data.nwp['ukv'].interval_end_minutes = 1111
     with pytest.raises(
         ValueError,
-        match="interval_end_minutes must be divisible by time_resolution_minutes"
+        match="interval_end_minutes.*must be divisible.*time_resolution_minutes.*"
     ):
         _ = Configuration(**configuration.model_dump())
@@ -103,7 +103,7 @@ def test_inconsistent_dropout_use(test_config_filename):
     configuration = load_yaml_configuration(test_config_filename)
     configuration.input_data.satellite.dropout_fraction= 1.0
-    configuration.input_data.satellite.dropout_timedeltas_minutes = None
+    configuration.input_data.satellite.dropout_timedeltas_minutes = []
     with pytest.raises(ValueError, match="To dropout fraction > 0 requires a list of dropout timedeltas"):
         _ = Configuration(**configuration.model_dump())

tests/conftest.py CHANGED Viewed

@@ -130,6 +130,39 @@ def nwp_ukv_zarr_path(session_tmp_path, ds_nwp_ukv):
     yield zarr_path
+@pytest.fixture()
+def ds_nwp_ukv_time_sliced():
+    t0 = pd.to_datetime("2024-01-02 00:00")
+    x = np.arange(-100, 100, 10)
+    y = np.arange(-100, 100, 10)
+    steps = pd.timedelta_range("0h", "8h", freq="1h")
+    target_times = t0 + steps
+    channels = ["t", "dswrf"]
+    init_times = pd.to_datetime([t0]*len(steps))
+    # Create dummy time-sliced NWP data
+    da_nwp = xr.DataArray(
+        np.random.normal(size=(len(target_times), len(channels), len(x), len(y))),
+        coords=dict(
+            target_time_utc=(["target_time_utc"], target_times),
+            channel=(["channel"], channels),
+            x_osgb=(["x_osgb"], x),
+            y_osgb=(["y_osgb"], y),
+        )
+    )
+    # Add extra non-coordinate dimensions
+    da_nwp = da_nwp.assign_coords(
+        init_time_utc=("target_time_utc", init_times),
+        step=("target_time_utc", steps),
+    )
+    return da_nwp
 @pytest.fixture(scope="session")
 def ds_nwp_ecmwf():
     init_times = pd.date_range(start="2023-01-01 00:00", freq="6h", periods=24 * 7)

tests/numpy_sample/test_nwp.py CHANGED Viewed

@@ -1,52 +1,13 @@
-import numpy as np
-import pandas as pd
-import xarray as xr
-import pytest
 from ocf_data_sampler.numpy_sample import convert_nwp_to_numpy_sample, NWPSampleKey
-@pytest.fixture(scope="module")
-def da_nwp_like():
-    """Create dummy data which looks like time-sliced NWP data"""
-    t0 = pd.to_datetime("2024-01-02 00:00")
-    x = np.arange(-100, 100, 10)
-    y = np.arange(-100, 100, 10)
-    steps = pd.timedelta_range("0h", "8h", freq="1h")
-    target_times = t0 + steps
-    channels = ["t", "dswrf"]
-    init_times = pd.to_datetime([t0]*len(steps))
-    # Create dummy time-sliced NWP data
-    da_nwp = xr.DataArray(
-        np.random.normal(size=(len(target_times), len(channels), len(x), len(y))),
-        coords=dict(
-            target_times_utc=(["target_times_utc"], target_times),
-            channel=(["channel"], channels),
-            x_osgb=(["x_osgb"], x),
-            y_osgb=(["y_osgb"], y),
-        )
-    )
-    # Add extra non-coordinate dimensions
-    da_nwp = da_nwp.assign_coords(
-        init_time_utc=("target_times_utc", init_times),
-        step=("target_times_utc", steps),
-    )
-    return da_nwp
-def test_convert_nwp_to_numpy_sample(da_nwp_like):
+def test_convert_nwp_to_numpy_sample(ds_nwp_ukv_time_sliced):
     # Call the function
-    numpy_sample = convert_nwp_to_numpy_sample(da_nwp_like)
+    numpy_sample = convert_nwp_to_numpy_sample(ds_nwp_ukv_time_sliced)
     # Assert the output type
     assert isinstance(numpy_sample, dict)
     # Assert the shape of the numpy sample
-    assert (numpy_sample[NWPSampleKey.nwp] == da_nwp_like.values).all()
+    assert (numpy_sample[NWPSampleKey.nwp] == ds_nwp_ukv_time_sliced.values).all()

tests/select/test_dropout.py CHANGED Viewed

@@ -14,10 +14,8 @@ def da_sample():
     datetimes = pd.date_range("2024-01-01 12:00", "2024-01-01 13:00", freq="5min")
     da_sat = xr.DataArray(
-        np.random.normal(size=(len(datetimes),)),
-        coords=dict(
-            time_utc=(["time_utc"], datetimes),
-        )
+        np.random.normal(size=(len(datetimes))),
+        coords=dict(time_utc=datetimes)
     )
     return da_sat
@@ -29,7 +27,7 @@ def test_draw_dropout_time():
     dropout_time = draw_dropout_time(t0, dropout_timedeltas, dropout_frac=1)
     assert isinstance(dropout_time, pd.Timestamp)
-    assert dropout_time-t0 in dropout_timedeltas
+    assert (dropout_time-t0) in dropout_timedeltas
 def test_draw_dropout_time_partial():
@@ -48,21 +46,17 @@ def test_draw_dropout_time_partial():
     dropouts == {None} | set(t0 + dt for dt in dropout_timedeltas)
-def test_draw_dropout_time_none():
+def test_draw_dropout_time_null():
     t0 = pd.Timestamp("2021-01-01 04:00:00")
-    # No dropout timedeltas
-    dropout_time = draw_dropout_time(t0, dropout_timedeltas=None, dropout_frac=1)
-    assert dropout_time is None
     # Dropout fraction is 0
     dropout_timedeltas = [pd.Timedelta(-30, "min")]
     dropout_time = draw_dropout_time(t0, dropout_timedeltas=dropout_timedeltas, dropout_frac=0)
-    assert dropout_time is None
+    assert dropout_time==t0
     # No dropout timedeltas and dropout fraction is 0
-    dropout_time = draw_dropout_time(t0, dropout_timedeltas=None, dropout_frac=0)
-    assert dropout_time is None
+    dropout_time = draw_dropout_time(t0, dropout_timedeltas=[], dropout_frac=0)
+    assert dropout_time==t0
 @pytest.mark.parametrize("t0_str", ["12:00", "12:30", "13:00"])

ocf-data-sampler 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl