PyPI - ocf-data-sampler - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

ocf-data-sampler 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (31) hide show

ocf_data_sampler/config/__init__.py +1 -1
ocf_data_sampler/config/load.py +6 -17
ocf_data_sampler/config/model.py +10 -20
ocf_data_sampler/config/save.py +9 -62
ocf_data_sampler/load/__init__.py +5 -1
ocf_data_sampler/load/gsp.py +10 -6
ocf_data_sampler/load/load_dataset.py +15 -17
ocf_data_sampler/load/nwp/nwp.py +3 -4
ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -17
ocf_data_sampler/load/nwp/providers/ukv.py +1 -9
ocf_data_sampler/load/nwp/providers/utils.py +1 -5
ocf_data_sampler/load/satellite.py +4 -8
ocf_data_sampler/load/site.py +20 -13
ocf_data_sampler/numpy_sample/collate.py +3 -4
ocf_data_sampler/numpy_sample/datetime_features.py +14 -22
ocf_data_sampler/sample/base.py +34 -3
ocf_data_sampler/select/find_contiguous_time_periods.py +2 -2
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +2 -2
ocf_data_sampler/torch_datasets/datasets/site.py +1 -1
{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/METADATA +1 -1
{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/RECORD +31 -30
tests/config/test_config.py +1 -47
tests/config/test_load.py +7 -0
tests/config/test_save.py +21 -30
tests/load/test_load_sites.py +1 -1
tests/numpy_sample/test_datetime_features.py +0 -10
tests/test_sample/test_base.py +63 -2
tests/torch_datasets/test_site.py +3 -3
{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/LICENSE +0 -0
{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/WHEEL +0 -0
{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/top_level.txt +0 -0

ocf_data_sampler/config/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """Configuration model"""
-from ocf_data_sampler.config.model import Configuration
+from ocf_data_sampler.config.model import Configuration, InputData
 from ocf_data_sampler.config.save import save_yaml_configuration
 from ocf_data_sampler.config.load import load_yaml_configuration

ocf_data_sampler/config/load.py CHANGED Viewed

@@ -1,33 +1,22 @@
-"""Loading configuration functions.
-Example:
-    from ocf_data_sampler.config import load_yaml_configuration
-    configuration = load_yaml_configuration(filename)
-"""
+"""Load configuration from a yaml file"""
 import fsspec
-from pathy import Pathy
 from pyaml_env import parse_config
 from ocf_data_sampler.config import Configuration
-def load_yaml_configuration(filename: str | Pathy) -> Configuration:
+def load_yaml_configuration(filename: str) -> Configuration:
     """
     Load a yaml file which has a configuration in it
     Args:
-        filename: the file name that you want to load.  Will load from local, AWS, or GCP
+        filename: the yaml file name that you want to load.  Will load from local, AWS, or GCP
             depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
-    Returns:pydantic class
+    Returns: pydantic class
     """
-    # load the file to a dictionary
     with fsspec.open(filename, mode="r") as stream:
         configuration = parse_config(data=stream)
-        # this means we can load ENVs in the yaml file
-    # turn into pydantic class
-    configuration = Configuration(**configuration)
-    return configuration
+    return Configuration(**configuration)

ocf_data_sampler/config/model.py CHANGED Viewed

@@ -1,16 +1,10 @@
 """Configuration model for the dataset.
-All paths must include the protocol prefix.  For local files,
-it's sufficient to just start with a '/'.  For aws, start with 's3://',
-for gcp start with 'gs://'.
-Example:
+Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// to read from alternative filesystems.
-    from ocf_data_sampler.config import Configuration
-    config = Configuration(**config_dict)
 """
-import logging
 from typing import Dict, List, Optional
 from typing_extensions import Self
@@ -18,10 +12,6 @@ from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInf
 from ocf_data_sampler.constants import NWP_PROVIDERS
-logger = logging.getLogger(__name__)
-providers = ["pvoutput.org", "solar_sheffield_passiv"]
 class Base(BaseModel):
     """Pydantic Base model where no extras can be added"""
@@ -79,8 +69,6 @@ class TimeWindowMixin(Base):
         return v
-# noinspection PyMethodParameters
 class DropoutMixin(Base):
     """Mixin class, to add dropout minutes"""
@@ -137,7 +125,8 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
     zarr_path: str | tuple[str] | list[str] = Field(
         ...,
-        description="The path or list of paths which hold the data zarr",
+        description="Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// "
+        "to read from alternative filesystems.",
     )
     channels: list[str] = Field(
@@ -145,13 +134,13 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
     )
-# noinspection PyMethodParameters
 class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
     """NWP configuration model"""
     zarr_path: str | tuple[str] | list[str] = Field(
         ...,
-        description="The path or list of paths which hold the data zarr",
+        description="Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// "
+        "to read from alternative filesystems.",
     )
     channels: list[str] = Field(
@@ -175,7 +164,6 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         """Validate 'provider'"""
         if v.lower() not in NWP_PROVIDERS:
             message = f"NWP provider {v} is not in {NWP_PROVIDERS}"
-            logger.warning(message)
             raise Exception(message)
         return v
@@ -209,7 +197,11 @@ class MultiNWP(RootModel):
 class GSP(TimeWindowMixin, DropoutMixin):
     """GSP configuration model"""
-    zarr_path: str = Field(..., description="The path which holds the GSP zarr")
+    zarr_path: str = Field(
+        ...,
+        description="Absolute or relative zarr filepath. Prefix with a protocol like s3:// "
+        "to read from alternative filesystems.",
+    )
 class Site(TimeWindowMixin, DropoutMixin):
@@ -228,8 +220,6 @@ class Site(TimeWindowMixin, DropoutMixin):
     # TODO validate the csv for metadata
-# noinspection PyPep8Naming
 class InputData(Base):
     """Input data model"""

ocf_data_sampler/config/save.py CHANGED Viewed

@@ -2,25 +2,16 @@
 This module provides functionality to save configuration objects to YAML files,
 supporting local and cloud storage locations.
-Example:
-    from ocf_data_sampler.config import save_yaml_configuration
-    saved_path = save_yaml_configuration(config, "config.yaml")
 """
 import json
-from pathlib import Path
-from typing import Union
 import fsspec
 import yaml
+import os
 from ocf_data_sampler.config import Configuration
-def save_yaml_configuration(
-    configuration: Configuration,
-    filename: Union[str, Path],
-) -> Path:
+def save_yaml_configuration(configuration: Configuration, filename: str) -> None:
     """Save a configuration object to a YAML file.
     Args:
@@ -28,57 +19,13 @@ def save_yaml_configuration(
         filename: Destination path for the YAML file. Can be a local path or
                  cloud storage URL (e.g., 'gs://', 's3://'). For local paths,
                  absolute paths are recommended.
-    Returns:
-        Path: The path where the configuration was saved
-    Raises:
-        ValueError: If filename is None, directory doesn't exist, or if writing to the specified path fails
-        TypeError: If the configuration cannot be serialized
     """
-    if filename is None:
-        raise ValueError("filename cannot be None")
-    try:
-        # Convert to absolute path if it's a relative path
-        if isinstance(filename, (str, Path)) and not any(
-            str(filename).startswith(prefix) for prefix in ('gs://', 's3://', '/')
-        ):
-            filename = Path.cwd() / filename
-        filepath = Path(filename)
-        # For local paths, check if parent directory exists before attempting to create
-        if filepath.is_absolute():
-            if not filepath.parent.exists():
-                raise ValueError("Directory does not exist")
-            # Only try to create directory if it's in a writable location
-            try:
-                filepath.parent.mkdir(parents=True, exist_ok=True)
-            except PermissionError:
-                raise ValueError(f"Permission denied when accessing directory {filepath.parent}")
-        # Serialize configuration to JSON-compatible dictionary
-        config_dict = json.loads(configuration.model_dump_json())
-        # Write to file directly for local paths
-        if filepath.is_absolute():
-            try:
-                with open(filepath, 'w') as f:
-                    yaml.safe_dump(config_dict, f, default_flow_style=False)
-            except PermissionError:
-                raise ValueError(f"Permission denied when writing to {filename}")
-        else:
-            # Use fsspec for cloud storage
-            with fsspec.open(str(filepath), mode='w') as yaml_file:
-                yaml.safe_dump(config_dict, yaml_file, default_flow_style=False)
+    if os.path.exists(filename):
+        raise FileExistsError(f"File already exists: {filename}")
-        return filepath
+    # Serialize configuration to JSON-compatible dictionary
+    config_dict = json.loads(configuration.model_dump_json())
-    except json.JSONDecodeError as e:
-        raise TypeError(f"Failed to serialize configuration: {str(e)}") from e
-    except (IOError, OSError) as e:
-        if "Permission denied" in str(e):
-            raise ValueError(f"Permission denied when writing to {filename}") from e
-        raise ValueError(f"Failed to write configuration to {filename}: {str(e)}") from e
+    with fsspec.open(filename, mode='w') as yaml_file:
+        yaml.safe_dump(config_dict, yaml_file, default_flow_style=False)

ocf_data_sampler/load/__init__.py CHANGED Viewed

@@ -1 +1,5 @@
-from ocf_blosc2 import Blosc2  # noqa: F401
+import ocf_blosc2
+from ocf_data_sampler.load.gsp import open_gsp
+from ocf_data_sampler.load.nwp import open_nwp
+from ocf_data_sampler.load.satellite import open_sat_data
+from ocf_data_sampler.load.site import open_site

ocf_data_sampler/load/gsp.py CHANGED Viewed

@@ -1,16 +1,21 @@
-from pathlib import Path
 import pkg_resources
 import pandas as pd
 import xarray as xr
-def open_gsp(zarr_path: str | Path) -> xr.DataArray:
+def open_gsp(zarr_path: str) -> xr.DataArray:
+    """Open the GSP data
+    Args:
+        zarr_path: Path to the GSP zarr data
+    Returns:
+        xr.DataArray: The opened GSP data
+    """
-    # Load GSP generation xr.Dataset
     ds = xr.open_zarr(zarr_path)
-    # Rename to standard time name
     ds = ds.rename({"datetime_gmt": "time_utc"})
     # Load UK GSP locations
@@ -19,13 +24,12 @@ def open_gsp(zarr_path: str | Path) -> xr.DataArray:
         index_col="gsp_id",
     )
-    # Add coordinates
+    # Add locations and capacities as coordinates for each GSP and datetime
     ds = ds.assign_coords(
         x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
         y_osgb=(df_gsp_loc.y_osgb.to_xarray()),
         nominal_capacity_mwp=ds.installedcapacity_mwp,
         effective_capacity_mwp=ds.capacity_mwp,
     )
     return ds.generation_mw

ocf_data_sampler/load/load_dataset.py CHANGED Viewed

@@ -1,36 +1,31 @@
 """ Loads all data sources """
 import xarray as xr
-from ocf_data_sampler.config import Configuration
-from ocf_data_sampler.load.gsp import open_gsp
-from ocf_data_sampler.load.nwp import open_nwp
-from ocf_data_sampler.load.satellite import open_sat_data
-from ocf_data_sampler.load.site import open_site
+from ocf_data_sampler.config import InputData
+from ocf_data_sampler.load import open_nwp, open_gsp, open_sat_data, open_site
-def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
+def get_dataset_dict(input_config: InputData) -> dict[str, dict[xr.DataArray] | xr.DataArray]:
     """Construct dictionary of all of the input data sources
     Args:
-        config: Configuration file
+        input_config: InputData configuration object
     """
-    in_config = config.input_data
     datasets_dict = {}
     # Load GSP data unless the path is None
-    if in_config.gsp and in_config.gsp.zarr_path:
-        da_gsp = open_gsp(zarr_path=in_config.gsp.zarr_path).compute()
+    if input_config.gsp and input_config.gsp.zarr_path:
+        da_gsp = open_gsp(zarr_path=input_config.gsp.zarr_path).compute()
         # Remove national GSP
         datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
     # Load NWP data if in config
-    if in_config.nwp:
+    if input_config.nwp:
         datasets_dict["nwp"] = {}
-        for nwp_source, nwp_config in in_config.nwp.items():
+        for nwp_source, nwp_config in input_config.nwp.items():
             da_nwp = open_nwp(nwp_config.zarr_path, provider=nwp_config.provider)
@@ -39,8 +34,8 @@ def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
             datasets_dict["nwp"][nwp_source] = da_nwp
     # Load satellite data if in config
-    if in_config.satellite:
-        sat_config = config.input_data.satellite
+    if input_config.satellite:
+        sat_config = input_config.satellite
         da_sat = open_sat_data(sat_config.zarr_path)
@@ -48,8 +43,11 @@ def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
         datasets_dict["sat"] = da_sat
-    if in_config.site:
-        da_sites = open_site(in_config.site)
+    if input_config.site:
+        da_sites = open_site(
+            generation_file_path=input_config.site.file_path,
+            metadata_file_path=input_config.site.metadata_file_path,
+        )
         datasets_dict["site"] = da_sites
     return datasets_dict

ocf_data_sampler/load/nwp/nwp.py CHANGED Viewed

@@ -1,15 +1,14 @@
-from pathlib import Path
 import xarray as xr
 from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
 from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
-def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> xr.DataArray:
-    """Opens NWP Zarr
+def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
+    """Opens NWP zarr
     Args:
-        zarr_path: Path to the Zarr file
+        zarr_path: path to the zarr file
         provider: NWP provider
     """

ocf_data_sampler/load/nwp/providers/ecmwf.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """ECMWF provider loaders"""
-from pathlib import Path
 import xarray as xr
 from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
 from ocf_data_sampler.load.utils import (
@@ -9,7 +9,7 @@ from ocf_data_sampler.load.utils import (
 )
-def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
+def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
     """
     Opens the ECMWF IFS NWP data
@@ -19,25 +19,14 @@ def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
     Returns:
         Xarray DataArray of the NWP data
     """
-    # Open the data
-    ds = open_zarr_paths(zarr_path)
-    # Rename
-    ds = ds.rename(
-        {
-            "init_time": "init_time_utc",
-        }
-    )
-    # LEGACY SUPPORT
-    # rename variable to channel if it exists
-    if "variable" in ds:
-        ds = ds.rename({"variable": "channel"})
+    ds = open_zarr_paths(zarr_path)
+    # LEGACY SUPPORT - rename variable to channel if it exists
+    ds = ds.rename({"init_time": "init_time_utc", "variable": "channel"})
-    # Check the timestamps are unique and increasing
     check_time_unique_increasing(ds.init_time_utc)
-    # Make sure the spatial coords are in increasing order
     ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
     ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")

ocf_data_sampler/load/nwp/providers/ukv.py CHANGED Viewed

@@ -2,8 +2,6 @@
 import xarray as xr
-from pathlib import Path
 from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
 from ocf_data_sampler.load.utils import (
     check_time_unique_increasing,
@@ -12,7 +10,7 @@ from ocf_data_sampler.load.utils import (
 )
-def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
+def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
     """
     Opens the NWP data
@@ -22,10 +20,8 @@ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
     Returns:
         Xarray DataArray of the NWP data
     """
-    # Open the data
     ds = open_zarr_paths(zarr_path)
-    # Rename
     ds = ds.rename(
         {
             "init_time": "init_time_utc",
@@ -35,15 +31,11 @@ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
         }
     )
-    # Check the timestamps are unique and increasing
     check_time_unique_increasing(ds.init_time_utc)
-    # Make sure the spatial coords are in increasing order
     ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
     ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
     # TODO: should we control the dtype of the DataArray?
     return get_xr_data_array_from_xr_dataset(ds)

ocf_data_sampler/load/nwp/providers/utils.py CHANGED Viewed

@@ -1,11 +1,7 @@
-from pathlib import Path
 import xarray as xr
-def open_zarr_paths(
-        zarr_path: Path | str | list[Path] | list[str],
-        time_dim: str = "init_time"
-    ) -> xr.Dataset:
+def open_zarr_paths(zarr_path: str | list[str], time_dim: str = "init_time") -> xr.Dataset:
     """Opens the NWP data
     Args:

ocf_data_sampler/load/satellite.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Satellite loader"""
 import subprocess
-from pathlib import Path
 import xarray as xr
 from ocf_data_sampler.load.utils import (
@@ -11,7 +10,7 @@ from ocf_data_sampler.load.utils import (
 )
-def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
+def _get_single_sat_data(zarr_path: str) -> xr.Dataset:
     """Helper function to open a Zarr from either a local or GCP path.
     Args:
@@ -50,7 +49,7 @@ def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
     return ds
-def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
+def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:
     """Lazily opens the Zarr store.
     Args:
@@ -69,7 +68,6 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
     else:
         ds = _get_single_sat_data(zarr_path)
-    # Rename dimensions
     ds = ds.rename(
         {
             "variable": "channel",
@@ -77,13 +75,11 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
         }
     )
-    # Check timestamps
     check_time_unique_increasing(ds.time_utc)
-    # Ensure spatial coordinates are sorted
     ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
     ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
     # TODO: should we control the dtype of the DataArray?
     return get_xr_data_array_from_xr_dataset(ds)

ocf_data_sampler/load/site.py CHANGED Viewed

@@ -1,30 +1,37 @@
+import numpy as np
 import pandas as pd
 import xarray as xr
-import numpy as np
-from ocf_data_sampler.config.model import Site
+def open_site(generation_file_path: str, metadata_file_path: str) -> xr.DataArray:
+    """Open a site's generation data and metadata.
+    Args:
+        generation_file_path: Path to the site generation netcdf data
+        metadata_file_path: Path to the site csv metadata
-def open_site(sites_config: Site) -> xr.DataArray:
+    Returns:
+        xr.DataArray: The opened site generation data
+    """
-    # Load site generation xr.Dataset
-    site_generation_ds = xr.open_dataset(sites_config.file_path)
+    generation_ds = xr.open_dataset(generation_file_path)
-    # Load site generation data
-    metadata_df = pd.read_csv(sites_config.metadata_file_path, index_col="site_id")
+    metadata_df = pd.read_csv(metadata_file_path, index_col="site_id")
+    assert metadata_df.index.is_unique
     # Ensure metadata aligns with the site_id dimension in data_ds
-    metadata_df = metadata_df.reindex(site_generation_ds.site_id.values)
+    metadata_df = metadata_df.reindex(generation_ds.site_id.values)
     # Assign coordinates to the Dataset using the aligned metadata
-    site_generation_ds = site_generation_ds.assign_coords(
+    generation_ds = generation_ds.assign_coords(
         latitude=("site_id", metadata_df["latitude"].values),
         longitude=("site_id", metadata_df["longitude"].values),
         capacity_kwp=("site_id", metadata_df["capacity_kwp"].values),
     )
     # Sanity checks
-    assert np.isfinite(site_generation_ds.capacity_kwp.values).all()
-    assert (site_generation_ds.capacity_kwp.values > 0).all()
-    assert metadata_df.index.is_unique
-    return site_generation_ds.generation_kw
+    assert np.isfinite(generation_ds.capacity_kwp.values).all()
+    assert (generation_ds.capacity_kwp.values > 0).all()
+    return generation_ds.generation_kw

ocf_data_sampler/numpy_sample/collate.py CHANGED Viewed

@@ -45,11 +45,12 @@ def stack_np_samples_into_batch(dict_list: list[dict]) -> dict:
     return batch
-def _key_is_constant(key: str):
+def _key_is_constant(key: str) -> bool:
+    """Check if a key is for value which is constant for all samples"""
     return key.endswith("t0_idx") or key.endswith("channel_names")
-def stack_data_list(data_list: list, key: str):
+def stack_data_list(data_list: list, key: str) -> np.ndarray:
     """Stack a sequence of data elements along a new axis
      Args:
@@ -57,8 +58,6 @@ def stack_data_list(data_list: list, key: str):
         key: string identifying the data type
     """
     if _key_is_constant(key):
-        # These are always the same for all examples.
         return data_list[0]
     else:
         return np.stack(data_list)

ocf_data_sampler/numpy_sample/datetime_features.py CHANGED Viewed

@@ -2,20 +2,21 @@
 import numpy as np
 import pandas as pd
-from numpy.typing import NDArray
-def _get_date_time_in_pi(
-    dt: pd.DatetimeIndex,
-) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
-    """
-    Change the datetimes, into time and date scaled in radians
+def _get_date_time_in_pi(dt: pd.DatetimeIndex) -> tuple[np.ndarray, np.ndarray]:
+    """Create positional embeddings for the datetimes in radians
+    Args:
+        dt: DatetimeIndex to create radian embeddings for
+    Returns:
+        Tuple of numpy arrays containing radian coordinates for date and time
     """
     day_of_year = dt.dayofyear
     minute_of_day = dt.minute + dt.hour * 60
-    # converting into positions on sin-cos circle
     time_in_pi = (2 * np.pi) * (minute_of_day / (24 * 60))
     date_in_pi = (2 * np.pi) * (day_of_year / 365)
@@ -23,24 +24,15 @@ def _get_date_time_in_pi(
 def make_datetime_numpy_dict(datetimes: pd.DatetimeIndex, key_prefix: str = "wind") -> dict:
-    """ Make dictionary of datetime features"""
-    if datetimes.empty:
-        raise ValueError("Input datetimes is empty for 'make_datetime_numpy_dict' function")
-    time_numpy_sample = {}
+    """ Creates dictionary of cyclical datetime features - encoded """
     date_in_pi, time_in_pi = _get_date_time_in_pi(datetimes)
-    # Store
-    date_sin_batch_key = key_prefix + "_date_sin"
-    date_cos_batch_key = key_prefix + "_date_cos"
-    time_sin_batch_key = key_prefix + "_time_sin"
-    time_cos_batch_key = key_prefix + "_time_cos"
+    time_numpy_sample = {}
-    time_numpy_sample[date_sin_batch_key] = np.sin(date_in_pi)
-    time_numpy_sample[date_cos_batch_key] = np.cos(date_in_pi)
-    time_numpy_sample[time_sin_batch_key] = np.sin(time_in_pi)
-    time_numpy_sample[time_cos_batch_key] = np.cos(time_in_pi)
+    time_numpy_sample[key_prefix + "_date_sin"] = np.sin(date_in_pi)
+    time_numpy_sample[key_prefix + "_date_cos"] = np.cos(date_in_pi)
+    time_numpy_sample[key_prefix + "_time_sin"] = np.sin(time_in_pi)
+    time_numpy_sample[key_prefix + "_time_cos"] = np.cos(time_in_pi)
     return time_numpy_sample

ocf_data_sampler/sample/base.py CHANGED Viewed

@@ -5,25 +5,34 @@ Handling of both flat and nested structures - consideration for NWP
 import logging
 import numpy as np
+import torch
+import xarray as xr
 from pathlib import Path
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Union, TypeAlias
 from abc import ABC, abstractmethod
 logger = logging.getLogger(__name__)
+NumpySample: TypeAlias = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+NumpyBatch: TypeAlias = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+TensorBatch: TypeAlias = Dict[str, Union[torch.Tensor, Dict[str, torch.Tensor]]]
 class SampleBase(ABC):
     """
     Abstract base class for all sample types
     Provides core data storage functionality
     """
-    def __init__(self):
+    def __init__(self, data: Optional[Union[NumpySample, xr.Dataset]] = None):
         """ Initialise data container """
         logger.debug("Initialising SampleBase instance")
+        self._data = data
     @abstractmethod
-    def to_numpy(self) -> Dict[str, Any]:
+    def to_numpy(self) -> NumpySample:
         """ Convert data to a numpy array representation """
         raise NotImplementedError
@@ -42,3 +51,25 @@ class SampleBase(ABC):
     def load(cls, path: Union[str, Path]) -> 'SampleBase':
         """ Abstract class method for loading sample data """
         raise NotImplementedError
+def batch_to_tensor(batch: NumpyBatch) -> TensorBatch:
+    """
+    Moves ndarrays in a nested dict to torch tensors
+    Args:
+        batch: NumpyBatch with data in numpy arrays
+    Returns:
+        TensorBatch with data in torch tensors
+    """
+    if not batch:
+        raise ValueError("Cannot convert empty batch to tensors")
+    for k, v in batch.items():
+        if isinstance(v, dict):
+            batch[k] = batch_to_tensor(v)
+        elif isinstance(v, np.ndarray):
+            if v.dtype == np.bool_:
+                batch[k] = torch.tensor(v, dtype=torch.bool)
+            elif np.issubdtype(v.dtype, np.number):
+                batch[k] = torch.as_tensor(v)
+    return batch

ocf_data_sampler/select/find_contiguous_time_periods.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import numpy as np
 import pandas as pd
+from ocf_data_sampler.load.utils import check_time_unique_increasing
@@ -28,8 +29,7 @@ def find_contiguous_time_periods(
     # Sanity checks.
     assert len(datetimes) > 0
     assert min_seq_length > 1
-    assert datetimes.is_monotonic_increasing
-    assert datetimes.is_unique
+    check_time_unique_increasing(datetimes)
     # Find indices of gaps larger than max_gap:
     gap_mask = pd.TimedeltaIndex(np.diff(datetimes)) > max_gap_duration

ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py CHANGED Viewed

@@ -187,7 +187,7 @@ class PVNetUKRegionalDataset(Dataset):
         config = load_yaml_configuration(config_filename)
-        datasets_dict = get_dataset_dict(config)
+        datasets_dict = get_dataset_dict(config.input_data)
         # Get t0 times where all input data is available
         valid_t0_times = find_valid_t0_times(datasets_dict, config)
@@ -295,7 +295,7 @@ class PVNetUKConcurrentDataset(Dataset):
         config = load_yaml_configuration(config_filename)
-        datasets_dict = get_dataset_dict(config)
+        datasets_dict = get_dataset_dict(config.input_data)
         # Get t0 times where all input data is available
         valid_t0_times = find_valid_t0_times(datasets_dict, config)

ocf_data_sampler/torch_datasets/datasets/site.py CHANGED Viewed

@@ -47,7 +47,7 @@ class SitesDataset(Dataset):
         """
         config: Configuration = load_yaml_configuration(config_filename)
-        datasets_dict = get_dataset_dict(config)
+        datasets_dict = get_dataset_dict(config.input_data)
         # Assign config and input data to self
         self.datasets_dict = datasets_dict

{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.1.5
+Version: 0.1.7
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/RECORD RENAMED Viewed

@@ -1,39 +1,39 @@
 ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 ocf_data_sampler/constants.py,sha256=ClteRIgp7EPlUPqIbkel83BfIaD7_VIDjUeHzUfyhnM,5079
 ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
-ocf_data_sampler/config/__init__.py,sha256=YXnAkgHViHB26hSsjiv32b6EbpG-A1kKTkARJf0_RkY,212
-ocf_data_sampler/config/load.py,sha256=4f7vPHAIAmd-55tPxoIzn7F_TI_ue4NxkDcLPoVWl0g,943
-ocf_data_sampler/config/model.py,sha256=sXmh7IadwXDT-7lxEl5_b3vjovZgZYR77EXy4GHaf4w,7276
-ocf_data_sampler/config/save.py,sha256=gB44isAZWUlCe3L6VBkLkngWC9GFpcCfAM57gy-0dkg,3156
+ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
+ocf_data_sampler/config/load.py,sha256=sKCKmhkkeFvvkNL5xmnFvdAulaCtV4-rigPsFvVDPDc,634
+ocf_data_sampler/config/model.py,sha256=IMJhsjL_oGh2c50q8pBnCnArY4qHQcBc_M8jqlEeD0c,7129
+ocf_data_sampler/config/save.py,sha256=OqCPT3e0d7vMI2g2iRzmifPD7GscDkFQztU_qE5I0JY,1066
 ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
-ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
-ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
-ocf_data_sampler/load/load_dataset.py,sha256=Ua3RaUg4PIYJkD9BKqTfN8IWUbezbhThJGgEkd9PcaE,1587
-ocf_data_sampler/load/satellite.py,sha256=f2Q7FSyySOf7DeHxcigHd-vk-J-U4S2pXg_CnhnhuwU,2571
-ocf_data_sampler/load/site.py,sha256=P83uz01WBDzoZajdOH0m8FQt4-buKDlUk19N548KqhA,1086
+ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
+ocf_data_sampler/load/gsp.py,sha256=uRxEORH7J99JAJ-D38nm0iJFOQh7dkm_NCXcpbYkyvo,857
+ocf_data_sampler/load/load_dataset.py,sha256=PHUGSm4hFHfS9nfIP2KjHHCp325O4br7uGBdQH_DP7g,1603
+ocf_data_sampler/load/satellite.py,sha256=4MRJBFDHxx5WXu_6X71wEBznJTIuldEVnu9d6DVoLPI,2436
+ocf_data_sampler/load/site.py,sha256=74M_7RYwEc1bU4idjs3ZmQrx9I8mJXm6H4lwEL-h9n0,1226
 ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
 ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
-ocf_data_sampler/load/nwp/nwp.py,sha256=O4QnajEZem8BvBgTcYYDBhRhgqPYuJkolHmpMRmrXEA,610
+ocf_data_sampler/load/nwp/nwp.py,sha256=Jyq1dE7DN0iSe6iSEGA76uu9LoeJz9FzfEUkq6ZZExQ,565
 ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=2iR1Iy542lo51rC6XFLV-3pbUE68dWjlHa6TVJzx3ac,1280
-ocf_data_sampler/load/nwp/providers/ukv.py,sha256=79Bm7q-K_GJPYMy62SUIZbRWRF4-tIaB1dYPEgLD9vo,1207
-ocf_data_sampler/load/nwp/providers/utils.py,sha256=Sy2exG1wpXLLhMXYdsfR-DZMR3txG1_bBmBdchlc-yA,848
+ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=8rYZKdV62AdczVNSOJ2G0BM4-fRFRV0_y5zkHgNYkQs,1004
+ocf_data_sampler/load/nwp/providers/ukv.py,sha256=dM_kvUI0xk9xEdslXqZGjOPP96PEw3qAci5mPUgUvxA,1014
+ocf_data_sampler/load/nwp/providers/utils.py,sha256=MFOZ5ZXLu3-SxYVJExdlo30b3y3s5ebRx3_6DO-33FQ,780
 ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
-ocf_data_sampler/numpy_sample/collate.py,sha256=Onl_aKhsZ4pbFJsh70orjsHk523GHxrpRirH2vJq_GA,1911
-ocf_data_sampler/numpy_sample/datetime_features.py,sha256=U-9uRplfZ7VYFA4qBduI8OkG2x_65RYIP8wrLG4i-Nw,1441
+ocf_data_sampler/numpy_sample/collate.py,sha256=oX5axq30sCsSquhNbmWAVMjM54HT1v3MCMopYHcO5Q0,1950
+ocf_data_sampler/numpy_sample/datetime_features.py,sha256=D0RajbnBjg15qjYk16h2H0XO4wH3fw-x0--4VC2nq0s,1204
 ocf_data_sampler/numpy_sample/gsp.py,sha256=5UaWO_aGRRVQo82wnDaT4zBKHihOnIsXiwgPjM8vGFM,1005
 ocf_data_sampler/numpy_sample/nwp.py,sha256=_seQNWsut3IzPsrpipqImjnaM3XNHZCy5_5be6syivk,1297
 ocf_data_sampler/numpy_sample/satellite.py,sha256=8OaTvkPjzSjotcdKsa6BKmmlBKDBunbhDN4Pjo0Grxs,910
 ocf_data_sampler/numpy_sample/site.py,sha256=I-cAXCOF0SDdm5Hx43lFqYZ3jh61kltLQK-fc4_nNu0,1314
 ocf_data_sampler/numpy_sample/sun_position.py,sha256=UklhucCxCT6GMlAhCWL6c4cfWrdc1cWgegrYaqUoHOY,1611
 ocf_data_sampler/sample/__init__.py,sha256=02CM7E5nKkGiYbVW-kvzjNd4RaqGuHCkDChtmDBDUoA,248
-ocf_data_sampler/sample/base.py,sha256=4U78tczCRsKMDwU4HkD20nyGyYjIBSZV5neF2mT--2M,1197
+ocf_data_sampler/sample/base.py,sha256=qeKuWyyO8M4QX6QDbItioeCiss0fG05NXRtf0TCMQSc,2246
 ocf_data_sampler/sample/site.py,sha256=0BvDXs0kxTjUq7kWpeoITK_uN4uE0w1IvEFXZUoKOb0,2507
 ocf_data_sampler/sample/uk_regional.py,sha256=D1A6nQB1PYCmxb3FzU9gqbNufQfx__wcprcDm50jCJw,4381
 ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
 ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
 ocf_data_sampler/select/fill_time_periods.py,sha256=h0XD1Ds_wUUoy-7bILxmN8AIbjlQ6YdXRKuCk_Is5jo,460
-ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqbhgtkZ4Js1gn5K9Qyp6DVLbsL-Q,11093
+ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=Nvz4gLCbbKzAe3sQXfxgExL9NtZVk1WNORvHs94DQ_k,11130
 ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
 ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
@@ -41,21 +41,22 @@ ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56
 ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
 ocf_data_sampler/select/time_slice_for_dataset.py,sha256=Z7pOiilSHScxmBKZNG18K5J-S4ifdXXAYGZoHRHD3AY,4324
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=4lqniFbUNt1qWSct4ISavXg9C7FM5cdVu48JHd7A9Pk,11873
-ocf_data_sampler/torch_datasets/datasets/site.py,sha256=5T8nkTMUHHFidZRuFOunYeKAqNuyZ8V7sikBoBOBwwA,16033
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=xuNJyCXZ4dZ9UldX1lqOoRSRNP39Vcy0DR77Vr7dxlk,11895
+ocf_data_sampler/torch_datasets/datasets/site.py,sha256=ZjvJS0mWUyQE7ZcrhS1TdMHaPrEZXVbBAv2vDwBvQwA,16044
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
 scripts/refactor_site.py,sha256=asZ27hQ4IyXgCCUaFJqcz1ObBNcV2W3ywqHBpSXA_fc,1728
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/conftest.py,sha256=RlC7YYtBLipUzFS1tQxela1SgHCxSpReUKEJ4429PwQ,7689
-tests/config/test_config.py,sha256=Vq_kTL5tJcwEP-hXD_Nah5O6cgafo99iX6Fw1AN5NDY,5288
-tests/config/test_save.py,sha256=rA_XVxP1pOxB--5Ebujz4T5o-VbcrCbg2VSlSq2iI0o,1318
+tests/config/test_config.py,sha256=VQjNiucIk5VnPQdGA6Mr-RNd9CwGI06AiikChTHrcnY,3969
+tests/config/test_load.py,sha256=8nui2UsgK_eufWGD74yXvf-6eY_SxBFKhDmGYUtRQxw,260
+tests/config/test_save.py,sha256=BxSd2S50-bRPIXP_4iX0B6Wt7pRFJnUbLYtzfLaqlAs,915
 tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
 tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
 tests/load/test_load_satellite.py,sha256=IQ8ISRZKCEoi8IsJoPpXZJTolD0mwjnl2E7762RM_PM,524
-tests/load/test_load_sites.py,sha256=T9lSEnGPI8FQISudVYHHNTHeplNS62Vrx48jaZ6J_Jo,364
+tests/load/test_load_sites.py,sha256=6V-U3_EtBklkV7w-hOoR4nba3dSaZ_cnjuRWFs8kYVU,405
 tests/numpy_sample/test_collate.py,sha256=RqHCD5_LTRpe4r6kqC_2TKhmhM_IHYM0ZtFUvSjDqcM,654
-tests/numpy_sample/test_datetime_features.py,sha256=o4t3KeKFdGrOBQ77rNFcDuDMQSD23ileCS5T5AP3wG4,1769
+tests/numpy_sample/test_datetime_features.py,sha256=iR9WdBLj1nIBNqoaTFE9rkUaH1eKFJSNb96nwiEaQH0,1449
 tests/numpy_sample/test_gsp.py,sha256=FLlq4SlJ-9cSRAepf4_ksA6PsUVKegnKEAc5pUojCJ0,1458
 tests/numpy_sample/test_nwp.py,sha256=yf4u7mAU0E3FQ4xAH6YjuHuHBzzFoXjHSFNkOVJUdSM,1455
 tests/numpy_sample/test_satellite.py,sha256=cCqtn5See-uSNfh89COGTUQNuFm6sIZ8QmBVHsuUeRI,1189
@@ -66,14 +67,14 @@ tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM
 tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
 tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
 tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
-tests/test_sample/test_base.py,sha256=ljtB38MmscTGN6OvUgclBceNnfx6m7AN8iHYDml9XW4,2189
+tests/test_sample/test_base.py,sha256=CkqKCZbrq3Vb4T7bOwPh3_0p8OTl0LfSLNBctYC_jag,4199
 tests/test_sample/test_site_sample.py,sha256=Gln-Or060cUWvA7Q7c1vsthgCttOAM2z9yBI9zUIrDw,6238
 tests/test_sample/test_uk_regional_sample.py,sha256=gkeQWC2wC757jKJz_QBmDMFQjn3R54q_tEo948yyxCY,4840
 tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
 tests/torch_datasets/test_pvnet_uk.py,sha256=loueo7PUUYJVda3-vBn3bQIC_zgrTAThfx-GTDcBOZg,5596
-tests/torch_datasets/test_site.py,sha256=5MH5zkHFJXekwpnV6nHuSxt_sRNu9_mxiUjfWqmEhr0,6966
-ocf_data_sampler-0.1.5.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.1.5.dist-info/METADATA,sha256=PetECVCNM6jys05FuPsOVmntGurbxTuW3n1_j7CYCLE,12173
-ocf_data_sampler-0.1.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ocf_data_sampler-0.1.5.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.1.5.dist-info/RECORD,,
+tests/torch_datasets/test_site.py,sha256=t57vAR_RRWcbG_kEFk6VrFCYzVxwFG6qJKBnRHF02fM,7000
+ocf_data_sampler-0.1.7.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.1.7.dist-info/METADATA,sha256=8SbL1qjkmeFDYdv1_hHBL9jxbSpt4aFCpx70rEEPeb0,12173
+ocf_data_sampler-0.1.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+ocf_data_sampler-0.1.7.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.1.7.dist-info/RECORD,,

tests/config/test_config.py CHANGED Viewed

@@ -1,59 +1,13 @@
-import tempfile
 import pytest
 from pydantic import ValidationError
-from pathlib import Path
-from ocf_data_sampler.config import (
-    load_yaml_configuration,
-    Configuration,
-    save_yaml_configuration
-)
+from ocf_data_sampler.config import load_yaml_configuration, Configuration
 def test_default_configuration():
     """Test default pydantic class"""
     _ = Configuration()
-def test_load_yaml_configuration(test_config_filename):
-    """
-    Test that yaml loading works for 'test_config.yaml'
-    and fails for an empty .yaml file
-    """
-    # Create temporary directory instead of file
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Create path for empty file
-        empty_file = Path(temp_dir) / "empty.yaml"
-        # Create an empty file
-        empty_file.touch()
-        # Test loading empty file
-        with pytest.raises(TypeError):
-            _ = load_yaml_configuration(str(empty_file))
-def test_yaml_save(test_config_filename):
-    """
-    Check configuration can be saved to a .yaml file
-    """
-    test_config = load_yaml_configuration(test_config_filename)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Create path for config file
-        config_path = Path(temp_dir) / "test_config.yaml"
-        # Save configuration
-        saved_path = save_yaml_configuration(test_config, config_path)
-        # Verify file exists
-        assert saved_path.exists()
-        # Test loading saved configuration
-        loaded_config = load_yaml_configuration(str(saved_path))
-        assert loaded_config == test_config
 def test_extra_field_error():
     """
     Check an extra parameters in config causes error

tests/config/test_load.py ADDED Viewed

@@ -0,0 +1,7 @@
+from ocf_data_sampler.config import Configuration, load_yaml_configuration
+def test_load_yaml_configuration(test_config_filename):
+    loaded_config = load_yaml_configuration(test_config_filename)
+    assert isinstance(loaded_config, Configuration)

tests/config/test_save.py CHANGED Viewed

@@ -1,37 +1,28 @@
 """Tests for configuration saving functionality."""
-import pytest
-from pathlib import Path
-import tempfile
-import yaml
+import os
+from ocf_data_sampler.config import Configuration, save_yaml_configuration, load_yaml_configuration
-from ocf_data_sampler.config import Configuration, save_yaml_configuration
-@pytest.fixture
-def temp_dir():
-    """Create a temporary directory."""
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        yield Path(tmpdirname)
-def test_save_yaml_configuration_basic(temp_dir):
-    """Test basic configuration saving functionality."""
+def test_save_yaml_configuration_basic(tmp_path):
+    """Save an empty configuration object"""
     config = Configuration()
-    filepath = temp_dir / "config.yaml"
-    result = save_yaml_configuration(config, filepath)
+    filepath = f"{tmp_path}/config.yaml"
+    save_yaml_configuration(config, filepath)
-    assert filepath.exists()
-    with open(filepath) as f:
-        loaded_yaml = yaml.safe_load(f)
-    assert isinstance(loaded_yaml, dict)
+    assert os.path.exists(filepath)
-def test_save_yaml_configuration_none_filename():
-    """Test that None filename raises ValueError."""
-    config = Configuration()
-    with pytest.raises(ValueError, match="filename cannot be None"):
-        save_yaml_configuration(config, None)
-def test_save_yaml_configuration_invalid_directory(temp_dir):
-    """Test handling of invalid directory paths."""
-    config = Configuration()
-    invalid_path = (temp_dir / "nonexistent" / "config.yaml").resolve()
-    with pytest.raises(ValueError, match="Directory does not exist"):
-        save_yaml_configuration(config, invalid_path)
+def test_save_load_yaml_configuration(tmp_path, test_config_filename):
+    """Make sure a saved configuration is the same after loading"""
+    # Start with this config
+    initial_config = load_yaml_configuration(test_config_filename)
+    # Save it
+    filepath = f"{tmp_path}/config.yaml"
+    save_yaml_configuration(initial_config, filepath)
+    # Load it and check it is still the same
+    loaded_config = load_yaml_configuration(filepath)
+    assert loaded_config == initial_config

tests/load/test_load_sites.py CHANGED Viewed

@@ -3,7 +3,7 @@ import xarray as xr
 def test_open_site(data_sites):
-    da = open_site(data_sites)
+    da = open_site(data_sites.file_path, data_sites.metadata_file_path)
     assert isinstance(da, xr.DataArray)
     assert da.dims == ("time_utc", "site_id")

tests/numpy_sample/test_datetime_features.py CHANGED Viewed

@@ -35,13 +35,3 @@ def test_make_datetime_numpy_batch_custom_key_prefix():
     # Assert dict contains expected quantity of keys and verify starting with custom prefix
     assert len(datetime_features) == 4
     assert all(key.startswith(key_prefix) for key in datetime_features.keys())
-def test_make_datetime_numpy_batch_empty_input():
-    # Verification that function raises error for empty input
-    datetimes = pd.DatetimeIndex([])
-    with pytest.raises(
-        ValueError, match="Input datetimes is empty for 'make_datetime_numpy_dict' function"
-    ):
-        make_datetime_numpy_dict(datetimes)

tests/test_sample/test_base.py CHANGED Viewed

@@ -3,11 +3,14 @@ Base class testing - SampleBase
 """
 import pytest
+import torch
 import numpy as np
 from pathlib import Path
-from ocf_data_sampler.sample.base import SampleBase
+from ocf_data_sampler.sample.base import (
+    SampleBase,
+    batch_to_tensor
+)
 class TestSample(SampleBase):
     """
@@ -84,3 +87,61 @@ def test_sample_base_to_numpy():
     assert isinstance(numpy_data, dict)
     assert all(isinstance(value, np.ndarray) for value in numpy_data.values())
     assert np.array_equal(numpy_data['list_data'], np.array([1, 2, 3]))
+def test_batch_to_tensor_nested():
+    """ Test nested dictionary conversion """
+    batch = {
+        'outer': {
+            'inner': np.array([1, 2, 3])
+        }
+    }
+    tensor_batch = batch_to_tensor(batch)
+    assert torch.equal(tensor_batch['outer']['inner'], torch.tensor([1, 2, 3]))
+def test_batch_to_tensor_mixed_types():
+    """ Test handling of mixed data types """
+    batch = {
+        'tensor_data': np.array([1, 2, 3]),
+        'string_data': 'not_a_tensor',
+        'nested': {
+            'numbers': np.array([4, 5, 6]),
+            'text': 'still_not_a_tensor'
+        }
+    }
+    tensor_batch = batch_to_tensor(batch)
+    assert isinstance(tensor_batch['tensor_data'], torch.Tensor)
+    assert isinstance(tensor_batch['string_data'], str)
+    assert isinstance(tensor_batch['nested']['numbers'], torch.Tensor)
+    assert isinstance(tensor_batch['nested']['text'], str)
+def test_batch_to_tensor_different_dtypes():
+    """ Test conversion of arrays with different dtypes """
+    batch = {
+        'float_data': np.array([1.0, 2.0, 3.0], dtype=np.float32),
+        'int_data': np.array([1, 2, 3], dtype=np.int64),
+        'bool_data': np.array([True, False, True], dtype=np.bool_)
+    }
+    tensor_batch = batch_to_tensor(batch)
+    assert isinstance(tensor_batch['bool_data'], torch.Tensor)
+    assert tensor_batch['float_data'].dtype == torch.float32
+    assert tensor_batch['int_data'].dtype == torch.int64
+    assert tensor_batch['bool_data'].dtype == torch.bool
+def test_batch_to_tensor_multidimensional():
+    """ Test conversion of multidimensional arrays """
+    batch = {
+        'matrix': np.array([[1, 2], [3, 4]]),
+        'tensor': np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+    }
+    tensor_batch = batch_to_tensor(batch)
+    assert tensor_batch['matrix'].shape == (2, 2)
+    assert tensor_batch['tensor'].shape == (2, 2, 2)
+    assert torch.equal(tensor_batch['matrix'], torch.tensor([[1, 2], [3, 4]]))

tests/torch_datasets/test_site.py CHANGED Viewed

@@ -33,7 +33,7 @@ def sites_dataset(site_config_filename):
     return SitesDataset(site_config_filename)
-def test_site(site_config_filename):
+def test_site(tmp_path, site_config_filename):
     # Create dataset object
     dataset = SitesDataset(site_config_filename)
@@ -71,8 +71,8 @@ def test_site(site_config_filename):
     expected_data_vars = {"nwp-ukv", "satellite", "site"}
-    sample.to_netcdf("sample.nc")
-    sample = xr.open_dataset("sample.nc")
+    sample.to_netcdf(f"{tmp_path}/sample.nc")
+    sample = xr.open_dataset(f"{tmp_path}/sample.nc")
     # Check dimensions
     assert (

{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{ocf_data_sampler-0.1.5.dist-info → ocf_data_sampler-0.1.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl