PyPI - ocf-data-sampler - Versions diffs - 0.1.16__tar.gz → 0.2.0__tar.gz - Mend

ocf-data-sampler 0.1.16tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (64) hide show

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf-data-sampler
-Version: 0.1.16
+Version: 0.2.0
 Author: James Fulton, Peter Dudfield
 Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License
@@ -60,8 +60,7 @@ Requires-Dist: h5netcdf
 We are currently migrating to this repo from [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/), which performs the same functions but is built around `PyTorch DataPipes`, which are quite cumbersome to work with and are no longer maintained by PyTorch. **ocf-data-sampler** uses `PyTorch Datasets`, and we've taken the opportunity to make the code much cleaner and more manageable.
 > [!Note]
-> This repository is in development and is replacing [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/).
-> It might not be ready for use out of the box! We would really appreciate any help to let us make the transition faster.
+> This repository is still in early development development and large changes to the user facing functions may still occur.
 ## Documentation

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/README.md RENAMED Viewed

@@ -12,8 +12,7 @@
 We are currently migrating to this repo from [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/), which performs the same functions but is built around `PyTorch DataPipes`, which are quite cumbersome to work with and are no longer maintained by PyTorch. **ocf-data-sampler** uses `PyTorch Datasets`, and we've taken the opportunity to make the code much cleaner and more manageable.
 > [!Note]
-> This repository is in development and is replacing [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/).
-> It might not be ready for use out of the box! We would really appreciate any help to let us make the transition faster.
+> This repository is still in early development development and large changes to the user facing functions may still occur.
 ## Documentation

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/ocf_data_sampler/config/model.py RENAMED Viewed

@@ -9,7 +9,12 @@ from collections.abc import Iterator
 from pydantic import BaseModel, Field, RootModel, field_validator, model_validator
 from typing_extensions import override
-from ocf_data_sampler.constants import NWP_PROVIDERS
+NWP_PROVIDERS = [
+    "ukv",
+    "ecmwf",
+    "gfs",
+    "icon_eu",
+]
 class Base(BaseModel):
@@ -125,7 +130,35 @@ class SpatialWindowMixin(Base):
     )
-class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
+class NormalisationValues(Base):
+    """Normalisation mean and standard deviation."""
+    mean: float = Field(..., description="Mean value for normalization")
+    std: float = Field(..., gt=0, description="Standard deviation (must be positive)")
+class NormalisationConstantsMixin(Base):
+    """Normalisation constants for multiple channels."""
+    normalisation_constants: dict[str, NormalisationValues]
+    @property
+    def channel_means(self) -> dict[str, float]:
+        """Return the channel means."""
+        return {
+            channel: norm_values.mean
+            for channel, norm_values in self.normalisation_constants.items()
+        }
+    @property
+    def channel_stds(self) -> dict[str, float]:
+        """Return the channel standard deviations."""
+        return {
+            channel: norm_values.std
+            for channel, norm_values in self.normalisation_constants.items()
+        }
+class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin, NormalisationConstantsMixin):
     """Satellite configuration model."""
     zarr_path: str | tuple[str] | list[str] = Field(
@@ -139,8 +172,20 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         description="the satellite channels that are used",
     )
+    @model_validator(mode="after")
+    def check_all_channel_have_normalisation_constants(self) -> "Satellite":
+        """Check that all the channels have normalisation constants."""
+        normalisation_channels = set(self.normalisation_constants.keys())
+        missing_norm_values = set(self.channels) - set(normalisation_channels)
+        if len(missing_norm_values)>0:
+            raise ValueError(
+                "Normalsation constants must be provided for all channels. Missing values for "
+                f"channels: {missing_norm_values}",
+            )
+        return self
-class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
+class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin, NormalisationConstantsMixin):
     """NWP configuration model."""
     zarr_path: str | tuple[str] | list[str] = Field(
@@ -173,6 +218,31 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         return v
+    @model_validator(mode="after")
+    def check_all_channel_have_normalisation_constants(self) -> "NWP":
+        """Check that all the channels have normalisation constants."""
+        normalisation_channels = set(self.normalisation_constants.keys())
+        non_accum_channels = [c for c in self.channels if c not in self.accum_channels]
+        accum_channel_names = [f"diff_{c}" for c in self.accum_channels]
+        missing_norm_values = set(non_accum_channels) - set(normalisation_channels)
+        if len(missing_norm_values)>0:
+            raise ValueError(
+                "Normalsation constants must be provided for all channels. Missing values for "
+                f"channels: {missing_norm_values}",
+            )
+        missing_norm_values = set(accum_channel_names) - set(normalisation_channels)
+        if len(missing_norm_values)>0:
+            raise ValueError(
+                "Normalsation constants must be provided for all channels. Accumulated "
+                "channels which will be diffed require normalisation constant names which "
+                "start with the prefix 'diff_'. The following channels were missing: "
+                f"{missing_norm_values}.",
+            )
+        return self
 class MultiNWP(RootModel):
     """Configuration for multiple NWPs."""

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py RENAMED Viewed

@@ -9,7 +9,6 @@ from torch.utils.data import Dataset
 from typing_extensions import override
 from ocf_data_sampler.config import Configuration, load_yaml_configuration
-from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
 from ocf_data_sampler.numpy_sample import (
     convert_gsp_to_numpy_sample,
@@ -27,15 +26,11 @@ from ocf_data_sampler.select import (
     slice_datasets_by_time,
 )
 from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
+from ocf_data_sampler.torch_datasets.utils import channel_dict_to_dataarray, find_valid_time_periods
 from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import (
     fill_nans_in_arrays,
     merge_dicts,
 )
-from ocf_data_sampler.torch_datasets.utils.valid_time_periods import find_valid_time_periods
-from ocf_data_sampler.torch_datasets.utils.validate_channels import (
-    validate_nwp_channels,
-    validate_satellite_channels,
-)
 from ocf_data_sampler.utils import minutes
 xr.set_options(keep_attrs=True)
@@ -54,10 +49,18 @@ def process_and_combine_datasets(
         nwp_numpy_modalities = {}
         for nwp_key, da_nwp in dataset_dict["nwp"].items():
-            provider = config.input_data.nwp[nwp_key].provider
             # Standardise and convert to NumpyBatch
-            da_nwp = (da_nwp - NWP_MEANS[provider]) / NWP_STDS[provider]
+            da_channel_means = channel_dict_to_dataarray(
+                config.input_data.nwp[nwp_key].channel_means,
+            )
+            da_channel_stds = channel_dict_to_dataarray(
+                config.input_data.nwp[nwp_key].channel_stds,
+            )
+            da_nwp = (da_nwp - da_channel_means) / da_channel_stds
             nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_sample(da_nwp)
         # Combine the NWPs into NumpyBatch
@@ -67,7 +70,11 @@ def process_and_combine_datasets(
         da_sat = dataset_dict["sat"]
         # Standardise and convert to NumpyBatch
-        da_sat = (da_sat - RSS_MEAN) / RSS_STD
+        da_channel_means = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
+        da_channel_stds = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
+        da_sat = (da_sat - da_channel_means) / da_channel_stds
         numpy_modalities.append(convert_satellite_to_numpy_sample(da_sat))
     if "gsp" in dataset_dict:
@@ -194,8 +201,6 @@ class PVNetUKRegionalDataset(Dataset):
         """
         # config = load_yaml_configuration(config_filename)
         config: Configuration = load_yaml_configuration(config_filename)
-        validate_nwp_channels(config)
-        validate_satellite_channels(config)
         datasets_dict = get_dataset_dict(config.input_data)
@@ -305,10 +310,6 @@ class PVNetUKConcurrentDataset(Dataset):
         """
         config = load_yaml_configuration(config_filename)
-        # Validate channels for NWP and satellite data
-        validate_nwp_channels(config)
-        validate_satellite_channels(config)
         datasets_dict = get_dataset_dict(config.input_data)
         # Get t0 times where all input data is available

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/ocf_data_sampler/torch_datasets/datasets/site.py RENAMED Viewed

@@ -9,7 +9,6 @@ from torch.utils.data import Dataset
 from typing_extensions import override
 from ocf_data_sampler.config import Configuration, load_yaml_configuration
-from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
 from ocf_data_sampler.numpy_sample import (
     NWPSampleKey,
@@ -27,15 +26,11 @@ from ocf_data_sampler.select import (
     slice_datasets_by_space,
     slice_datasets_by_time,
 )
+from ocf_data_sampler.torch_datasets.utils import channel_dict_to_dataarray, find_valid_time_periods
 from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import (
     fill_nans_in_arrays,
     merge_dicts,
 )
-from ocf_data_sampler.torch_datasets.utils.valid_time_periods import find_valid_time_periods
-from ocf_data_sampler.torch_datasets.utils.validate_channels import (
-    validate_nwp_channels,
-    validate_satellite_channels,
-)
 from ocf_data_sampler.utils import minutes
 xr.set_options(keep_attrs=True)
@@ -58,9 +53,6 @@ class SitesDataset(Dataset):
             end_time: Limit the init-times to be before this
         """
         config: Configuration = load_yaml_configuration(config_filename)
-        validate_nwp_channels(config)
-        validate_satellite_channels(config)
         datasets_dict = get_dataset_dict(config.input_data)
         # Assign config and input data to self
@@ -224,7 +216,6 @@ class SitesDataset(Dataset):
         Args:
             dataset_dict: dict containing sliced xr DataArrays
-            config: Configuration for the model
             t0: The initial timestamp of the sample
         Returns:
@@ -238,14 +229,29 @@ class SitesDataset(Dataset):
                 provider = self.config.input_data.nwp[nwp_key].provider
                 # Standardise
-                da_nwp = (da_nwp - NWP_MEANS[provider]) / NWP_STDS[provider]
+                da_channel_means = channel_dict_to_dataarray(
+                    self.config.input_data.nwp[nwp_key].channel_means,
+                )
+                da_channel_stds = channel_dict_to_dataarray(
+                    self.config.input_data.nwp[nwp_key].channel_stds,
+                )
+                da_nwp = (da_nwp - da_channel_means) / da_channel_stds
                 data_arrays.append((f"nwp-{provider}", da_nwp))
         if "sat" in dataset_dict:
             da_sat = dataset_dict["sat"]
-            # Standardise
-            da_sat = (da_sat - RSS_MEAN) / RSS_STD
+            da_channel_means = channel_dict_to_dataarray(
+                self.config.input_data.satellite.channel_means,
+            )
+            da_channel_stds = channel_dict_to_dataarray(
+                self.config.input_data.satellite.channel_stds,
+            )
+            da_sat = (da_sat - da_channel_means) / da_channel_stds
             data_arrays.append(("satellite", da_sat))
         if "site" in dataset_dict:

ocf_data_sampler-0.2.0/ocf_data_sampler/torch_datasets/utils/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .channel_dict_to_dataarray import channel_dict_to_dataarray
+from .merge_and_fill_utils import fill_nans_in_arrays, merge_dicts
+from .valid_time_periods import find_valid_time_periods

ocf_data_sampler-0.2.0/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Converts a dictionary of channel values to a DataArray."""
+import xarray as xr
+def channel_dict_to_dataarray(channel_dict: dict[str, float]) -> xr.DataArray:
+    """Converts a dictionary of channel values to a DataArray."""
+    return xr.DataArray(
+        list(channel_dict.values()),
+        coords={"channel": list(channel_dict.keys())},
+    )

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/ocf_data_sampler.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf-data-sampler
-Version: 0.1.16
+Version: 0.2.0
 Author: James Fulton, Peter Dudfield
 Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License
@@ -60,8 +60,7 @@ Requires-Dist: h5netcdf
 We are currently migrating to this repo from [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/), which performs the same functions but is built around `PyTorch DataPipes`, which are quite cumbersome to work with and are no longer maintained by PyTorch. **ocf-data-sampler** uses `PyTorch Datasets`, and we've taken the opportunity to make the code much cleaner and more manageable.
 > [!Note]
-> This repository is in development and is replacing [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/).
-> It might not be ready for use out of the box! We would really appreciate any help to let us make the transition faster.
+> This repository is still in early development development and large changes to the user facing functions may still occur.
 ## Documentation

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/ocf_data_sampler.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,7 +2,6 @@ LICENSE
 README.md
 pyproject.toml
 ocf_data_sampler/__init__.py
-ocf_data_sampler/constants.py
 ocf_data_sampler/utils.py
 ocf_data_sampler.egg-info/PKG-INFO
 ocf_data_sampler.egg-info/SOURCES.txt
@@ -53,8 +52,9 @@ ocf_data_sampler/select/time_slice_for_dataset.py
 ocf_data_sampler/torch_datasets/datasets/__init__.py
 ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py
 ocf_data_sampler/torch_datasets/datasets/site.py
+ocf_data_sampler/torch_datasets/utils/__init__.py
+ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py
-ocf_data_sampler/torch_datasets/utils/validate_channels.py
 scripts/refactor_site.py
 utils/compute_icon_mean_stddev.py

{ocf_data_sampler-0.1.16 → ocf_data_sampler-0.2.0}/scripts/refactor_site.py RENAMED Viewed

@@ -1,16 +1,18 @@
-import xarray as xr
+"""Refactor legacy site data into a more structured format."""
 import pandas as pd
+import xarray as xr
 def legacy_format(data_ds: xr.Dataset, metadata_df: pd.DataFrame) -> xr.Dataset:
-    """
-    Converts old legacy site data into a more structured format.
+    """Converts old legacy site data into a more structured format.
     This function does three main things:
     1. Renames some columns in the metadata to keep things consistent.
     2. Reshapes site data so that instead of having separate variables for each site,
        we use a `site_id` dimension—makes life easier for analysis.
     3. Adds `capacity_kwp` as a time series so that each site has its capacity info.
     Parameters:
         data_ds (xr.Dataset): The dataset containing legacy site data.
         metadata_df (pd.DataFrame): A DataFrame with metadata about the sites.
@@ -18,11 +20,10 @@ def legacy_format(data_ds: xr.Dataset, metadata_df: pd.DataFrame) -> xr.Dataset:
     Returns:
         xr.Dataset: Reformatted dataset with `generation_kw` and `capacity_kwp`.
     """
     # Step 1: Rename metadata columns to match the new expected format
     if "system_id" in metadata_df.columns:
         metadata_df = metadata_df.rename(columns={"system_id": "site_id"})
     # Convert capacity from megawatts to kilowatts if needed
     if "capacity_megawatts" in metadata_df.columns:
         metadata_df["capacity_kwp"] = metadata_df["capacity_megawatts"] * 1000

ocf_data_sampler-0.1.16/ocf_data_sampler/constants.py DELETED Viewed

@@ -1,350 +0,0 @@
-"""Constants for the package."""
-import numpy as np
-import xarray as xr
-from typing_extensions import override
-NWP_PROVIDERS = [
-    "ukv",
-    "ecmwf",
-    "gfs",
-    "icon_eu",
-]
-def _to_data_array(d: dict) -> xr.DataArray:
-    """Convert a dictionary to a DataArray."""
-    return xr.DataArray(
-        [d[k] for k in d],
-        coords={"channel": list(d.keys())},
-    ).astype(np.float32)
-class NWPStatDict(dict):
-    """Custom dictionary class to hold NWP normalization stats."""
-    @override
-    def __getitem__(self, key: str) -> xr.DataArray:
-        if key not in NWP_PROVIDERS:
-            raise KeyError(f"{key} is not a supported NWP provider - {NWP_PROVIDERS}")
-        elif key in self.keys():
-            return super().__getitem__(key)
-        else:
-            raise KeyError(
-                f"Values for {key} not yet available in ocf-data-sampler {list(self.keys())}",
-            )
-# ------ UKV
-# Means and std computed WITH version_7 and higher, MetOffice values
-UKV_STD = {
-    "cdcb": 2126.99350113,
-    "lcc": 39.33210726,
-    "mcc": 41.91144559,
-    "hcc": 38.07184418,
-    "sde": 0.1029753,
-    "hcct": 18382.63958991,
-    "dswrf": 190.47216887,
-    "dlwrf": 39.45988077,
-    "h": 1075.77812282,
-    "t": 4.38818501,
-    "r": 11.45012499,
-    "dpt": 4.57250482,
-    "vis": 21578.97975625,
-    "si10": 3.94718813,
-    "wdir10": 94.08407495,
-    "prmsl": 1252.71790539,
-    "prate": 0.00021497,
-}
-UKV_MEAN = {
-    "cdcb": 1412.26599062,
-    "lcc": 50.08362643,
-    "mcc": 40.88984494,
-    "hcc": 29.11949682,
-    "sde": 0.00289545,
-    "hcct": -18345.97478167,
-    "dswrf": 111.28265039,
-    "dlwrf": 325.03130139,
-    "h": 2096.51991356,
-    "t": 283.64913206,
-    "r": 81.79229501,
-    "dpt": 280.54379901,
-    "vis": 32262.03285118,
-    "si10": 6.88348448,
-    "wdir10": 199.41891636,
-    "prmsl": 101321.61574029,
-    "prate": 3.45793433e-05,
-}
-UKV_STD = _to_data_array(UKV_STD)
-UKV_MEAN = _to_data_array(UKV_MEAN)
-# ------ ECMWF
-# These were calculated from 100 random init times of UK data from 2020-2023
-ECMWF_STD = {
-    "dlwrf": 15855867.0,
-    "dswrf": 13025427.0,
-    "duvrs": 1445635.25,
-    "hcc": 0.42244860529899597,
-    "lcc": 0.3791404366493225,
-    "mcc": 0.38039860129356384,
-    "prate": 9.81039775069803e-05,
-    "sd": 0.000913831521756947,
-    "sr": 16294988.0,
-    "t2m": 3.692270040512085,
-    "tcc": 0.37487083673477173,
-    "u10": 5.531515598297119,
-    "u100": 7.2320556640625,
-    "u200": 8.049470901489258,
-    "v10": 5.411230564117432,
-    "v100": 6.944501876831055,
-    "v200": 7.561611652374268,
-    "diff_dlwrf": 131942.03125,
-    "diff_dswrf": 715366.3125,
-    "diff_duvrs": 81605.25,
-    "diff_sr": 818950.6875,
-}
-ECMWF_MEAN = {
-    "dlwrf": 27187026.0,
-    "dswrf": 11458988.0,
-    "duvrs": 1305651.25,
-    "hcc": 0.3961029052734375,
-    "lcc": 0.44901806116104126,
-    "mcc": 0.3288780450820923,
-    "prate": 3.108070450252853e-05,
-    "sd": 8.107526082312688e-05,
-    "sr": 12905302.0,
-    "t2m": 283.48333740234375,
-    "tcc": 0.7049227356910706,
-    "u10": 1.7677178382873535,
-    "u100": 2.393547296524048,
-    "u200": 2.7963004112243652,
-    "v10": 0.985887885093689,
-    "v100": 1.4244288206100464,
-    "v200": 1.6010299921035767,
-    "diff_dlwrf": 1136464.0,
-    "diff_dswrf": 420584.6875,
-    "diff_duvrs": 48265.4765625,
-    "diff_sr": 469169.5,
-}
-ECMWF_STD = _to_data_array(ECMWF_STD)
-ECMWF_MEAN = _to_data_array(ECMWF_MEAN)
-# ------ GFS
-GFS_STD = {
-    "dlwrf": 96.305916,
-    "dswrf": 246.18533,
-    "hcc": 42.525383,
-    "lcc": 44.3732,
-    "mcc": 43.150745,
-    "prate": 0.00010159573,
-    "r": 25.440672,
-    "sde": 0.43345627,
-    "t": 22.825893,
-    "tcc": 41.030598,
-    "u10": 5.470838,
-    "u100": 6.8899174,
-    "v10": 4.7401133,
-    "v100": 6.076132,
-    "vis": 8294.022,
-    "u": 10.614556,
-    "v": 7.176398,
-}
-GFS_MEAN = {
-    "dlwrf": 298.342,
-    "dswrf": 168.12321,
-    "hcc": 35.272,
-    "lcc": 43.578342,
-    "mcc": 33.738823,
-    "prate": 2.8190969e-05,
-    "r": 18.359747,
-    "sde": 0.36937004,
-    "t": 278.5223,
-    "tcc": 66.841606,
-    "u10": -0.0022310058,
-    "u100": 0.0823025,
-    "v10": 0.06219831,
-    "v100": 0.0797807,
-    "vis": 19628.32,
-    "u": 11.645444,
-    "v": 0.12330122,
-}
-GFS_STD = _to_data_array(GFS_STD)
-GFS_MEAN = _to_data_array(GFS_MEAN)
-# ------ ICON-EU
-# Statistics for ICON-EU variables
-ICON_EU_STD = {
-    "alb_rad": 13.7881,
-    "alhfl_s": 73.7198,
-    "ashfl_s": 54.8027,
-    "asob_s": 55.8319,
-    "asob_t": 74.9360,
-    "aswdifd_s": 21.4940,
-    "aswdifu_s": 18.7688,
-    "aswdir_s": 54.4683,
-    "athb_s": 34.8575,
-    "athb_t": 42.9108,
-    "aumfl_s": 0.1460,
-    "avmfl_s": 0.1892,
-    "cape_con": 32.2570,
-    "cape_ml": 106.3998,
-    "clch": 39.9324,
-    "clcl": 36.3961,
-    "clcm": 41.1690,
-    "clct": 34.7696,
-    "clct_mod": 0.4227,
-    "cldepth": 0.1739,
-    "h_snow": 0.9012,
-    "hbas_con": 1306.6632,
-    "htop_con": 1810.5665,
-    "htop_dc": 459.0422,
-    "hzerocl": 1144.6469,
-    "pmsl": 1103.3301,
-    "ps": 4761.3184,
-    "qv_2m": 0.0024,
-    "qv_s": 0.0038,
-    "rain_con": 1.7097,
-    "rain_gsp": 4.2654,
-    "relhum_2m": 15.3779,
-    "rho_snow": 120.2461,
-    "runoff_g": 0.7410,
-    "runoff_s": 2.1930,
-    "snow_con": 1.1432,
-    "snow_gsp": 1.8154,
-    "snowlmt": 656.0699,
-    "synmsg_bt_cl_ir10.8": 17.9438,
-    "t_2m": 7.7973,
-    "t_g": 8.7053,
-    "t_snow": 134.6874,
-    "tch": 0.0052,
-    "tcm": 0.0133,
-    "td_2m": 7.1460,
-    "tmax_2m": 7.8218,
-    "tmin_2m": 7.8346,
-    "tot_prec": 5.6312,
-    "tqc": 0.0976,
-    "tqi": 0.0247,
-    "u_10m": 3.8351,
-    "v_10m": 5.0083,
-    "vmax_10m": 5.5037,
-    "w_snow": 286.1510,
-    "ww": 27.2974,
-    "z0": 0.3901,
-}
-ICON_EU_MEAN = {
-    "alb_rad": 15.4437,
-    "alhfl_s": -54.9398,
-    "ashfl_s": -19.4684,
-    "asob_s": 40.9305,
-    "asob_t": 61.9244,
-    "aswdifd_s": 19.7813,
-    "aswdifu_s": 8.8328,
-    "aswdir_s": 29.9820,
-    "athb_s": -53.9873,
-    "athb_t": -212.8088,
-    "aumfl_s": 0.0558,
-    "avmfl_s": 0.0078,
-    "cape_con": 16.7397,
-    "cape_ml": 21.2189,
-    "clch": 26.4262,
-    "clcl": 57.1591,
-    "clcm": 36.1702,
-    "clct": 72.9254,
-    "clct_mod": 0.5561,
-    "cldepth": 0.1356,
-    "h_snow": 0.0494,
-    "hbas_con": 108.4975,
-    "htop_con": 433.0623,
-    "htop_dc": 454.0859,
-    "hzerocl": 1696.6272,
-    "pmsl": 101778.8281,
-    "ps": 99114.4766,
-    "qv_2m": 0.0049,
-    "qv_s": 0.0065,
-    "rain_con": 0.4869,
-    "rain_gsp": 0.9783,
-    "relhum_2m": 78.2258,
-    "rho_snow": 62.5032,
-    "runoff_g": 0.1301,
-    "runoff_s": 0.4119,
-    "snow_con": 0.2188,
-    "snow_gsp": 0.4317,
-    "snowlmt": 1450.3241,
-    "synmsg_bt_cl_ir10.8": 265.0639,
-    "t_2m": 278.8212,
-    "t_g": 279.9216,
-    "t_snow": 162.5582,
-    "tch": 0.0047,
-    "tcm": 0.0091,
-    "td_2m": 274.9544,
-    "tmax_2m": 279.3550,
-    "tmin_2m": 278.2519,
-    "tot_prec": 2.1158,
-    "tqc": 0.0424,
-    "tqi": 0.0108,
-    "u_10m": 1.1902,
-    "v_10m": -0.4733,
-    "vmax_10m": 8.4152,
-    "w_snow": 14.5936,
-    "ww": 15.3570,
-    "z0": 0.2386,
-}
-ICON_EU_STD = _to_data_array(ICON_EU_STD)
-ICON_EU_MEAN = _to_data_array(ICON_EU_MEAN)
-NWP_STDS = NWPStatDict(
-    ukv=UKV_STD,
-    ecmwf=ECMWF_STD,
-    gfs=GFS_STD,
-    icon_eu=ICON_EU_STD,
-)
-NWP_MEANS = NWPStatDict(
-    ukv=UKV_MEAN,
-    ecmwf=ECMWF_MEAN,
-    gfs=GFS_MEAN,
-    icon_eu=ICON_EU_MEAN,
-)
-# ------ Satellite
-# RSS Mean and std values from randomised 20% of 2020 imagery
-RSS_STD = {
-    "HRV": 0.11405209,
-    "IR_016": 0.21462157,
-    "IR_039": 0.04618041,
-    "IR_087": 0.06687243,
-    "IR_097": 0.0468558,
-    "IR_108": 0.17482725,
-    "IR_120": 0.06115861,
-    "IR_134": 0.04492306,
-    "VIS006": 0.12184761,
-    "VIS008": 0.13090034,
-    "WV_062": 0.16111417,
-    "WV_073": 0.12924142,
-}
-RSS_MEAN = {
-    "HRV": 0.09298719,
-    "IR_016": 0.17594202,
-    "IR_039": 0.86167645,
-    "IR_087": 0.7719318,
-    "IR_097": 0.8014212,
-    "IR_108": 0.71254843,
-    "IR_120": 0.89058584,
-    "IR_134": 0.944365,
-    "VIS006": 0.09633306,
-    "VIS008": 0.11426069,
-    "WV_062": 0.7359355,
-    "WV_073": 0.62479186,
-}
-RSS_STD = _to_data_array(RSS_STD)
-RSS_MEAN = _to_data_array(RSS_MEAN)

ocf_data_sampler-0.1.16/ocf_data_sampler/torch_datasets/utils/validate_channels.py DELETED Viewed

@@ -1,86 +0,0 @@
-"""Functions for checking that normalisation statistics exist for the data channels requested."""
-from ocf_data_sampler.config import Configuration
-from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
-def validate_channels(
-    data_channels: list,
-    means_channels: list,
-    stds_channels: list,
-    source_name: str | None = None,
-) -> None:
-    """Validates that all channels in data have corresponding normalisation constants.
-    Args:
-        data_channels: Set of channels from the data
-        means_channels: Set of channels from means constants
-        stds_channels: Set of channels from stds constants
-        source_name: Name of data source (e.g., 'ecmwf', 'satellite') for error messages
-    Raises:
-        ValueError: If there's a mismatch between data channels and normalisation constants
-    """
-    data_set = set(data_channels)
-    means_set = set(means_channels)
-    stds_set = set(stds_channels)
-    # Find missing channels in means
-    missing_in_means = data_set - means_set
-    if missing_in_means:
-        raise ValueError(
-            f"The following channels for {source_name} are missing in normalisation means: "
-            f"{missing_in_means}",
-        )
-    # Find missing channels in stds
-    missing_in_stds = data_set - stds_set
-    if missing_in_stds:
-        raise ValueError(
-            f"The following channels for {source_name} are missing in normalisation stds: "
-            f"{missing_in_stds}",
-        )
-def validate_nwp_channels(config: Configuration) -> None:
-    """Validate that NWP channels in config have corresponding normalisation constants.
-    Args:
-        config: Configuration object containing NWP channel information
-    Raises:
-        ValueError: If there's a mismatch between configured NWP channels
-        and normalisation constants
-    """
-    if hasattr(config.input_data, "nwp") and (
-        config.input_data.nwp is not None
-        ):
-        for _, nwp_config in config.input_data.nwp.items():
-            provider = nwp_config.provider
-            validate_channels(
-                data_channels=nwp_config.channels,
-                means_channels=NWP_MEANS[provider].channel.values,
-                stds_channels=NWP_STDS[provider].channel.values,
-                source_name=provider,
-            )
-def validate_satellite_channels(config: Configuration) -> None:
-    """Validate that satellite channels in config have corresponding normalisation constants.
-    Args:
-        config: Configuration object containing satellite channel information
-    Raises:
-        ValueError: If there's a mismatch between configured satellite channels
-        and normalisation constants
-    """
-    if hasattr(config.input_data, "satellite") and (
-        config.input_data.satellite is not None
-        ):
-        validate_channels(
-            data_channels=config.input_data.satellite.channels,
-            means_channels=RSS_MEAN.channel.values,
-            stds_channels=RSS_STD.channel.values,
-            source_name="satellite",
-        )