PyPI - ocf-data-sampler - Versions diffs - 0.0.36__tar.gz → 0.0.38__tar.gz - Mend

ocf-data-sampler 0.0.36tar.gz → 0.0.38tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (75) hide show

{ocf_data_sampler-0.0.36/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.38}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.36
+Version: 0.0.38
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org
@@ -56,7 +56,7 @@ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
 # ocf-data-sampler
 <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
-[![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-)
+[![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
 <!-- ALL-CONTRIBUTORS-BADGE:END -->
 [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -129,6 +129,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
     </tr>
   </tbody>
 </table>

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/README.md RENAMED Viewed

@@ -1,7 +1,7 @@
 # ocf-data-sampler
 <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
-[![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-)
+[![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
 <!-- ALL-CONTRIBUTORS-BADGE:END -->
 [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -74,6 +74,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
     </tr>
   </tbody>
 </table>

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/constants.py RENAMED Viewed

@@ -28,6 +28,7 @@ class NWPStatDict(dict):
                 f"Values for {key} not yet available in ocf-data-sampler {list(self.keys())}"
             )
 # ------ UKV
 # Means and std computed WITH version_7 and higher, MetOffice values
 UKV_STD = {
@@ -49,6 +50,7 @@ UKV_STD = {
     "prmsl": 1252.71790539,
     "prate": 0.00021497,
 }
 UKV_MEAN = {
     "cdcb": 1412.26599062,
     "lcc": 50.08362643,
@@ -97,6 +99,7 @@ ECMWF_STD = {
     "diff_duvrs": 81605.25,
     "diff_sr": 818950.6875,
 }
 ECMWF_MEAN = {
     "dlwrf": 27187026.0,
     "dswrf": 11458988.0,
@@ -133,3 +136,38 @@ NWP_MEANS = NWPStatDict(
     ecmwf=ECMWF_MEAN,
 )
+# ------ Satellite
+# RSS Mean and std values from randomised 20% of 2020 imagery
+RSS_STD = {
+    "HRV": 0.11405209,
+    "IR_016": 0.21462157,
+    "IR_039": 0.04618041,
+    "IR_087": 0.06687243,
+    "IR_097": 0.0468558,
+    "IR_108": 0.17482725,
+    "IR_120": 0.06115861,
+    "IR_134": 0.04492306,
+    "VIS006": 0.12184761,
+    "VIS008": 0.13090034,
+    "WV_062": 0.16111417,
+    "WV_073": 0.12924142,
+}
+RSS_MEAN = {
+    "HRV": 0.09298719,
+    "IR_016": 0.17594202,
+    "IR_039": 0.86167645,
+    "IR_087": 0.7719318,
+    "IR_097": 0.8014212,
+    "IR_108": 0.71254843,
+    "IR_120": 0.89058584,
+    "IR_134": 0.944365,
+    "VIS006": 0.09633306,
+    "VIS008": 0.11426069,
+    "WV_062": 0.7359355,
+    "WV_073": 0.62479186,
+}
+RSS_STD = _to_data_array(RSS_STD)
+RSS_MEAN = _to_data_array(RSS_MEAN)

ocf_data_sampler-0.0.38/ocf_data_sampler/numpy_batch/collate.py ADDED Viewed

@@ -0,0 +1,79 @@
+from ocf_data_sampler.numpy_batch import NWPBatchKey
+import numpy as np
+import logging
+from typing import Union
+logger = logging.getLogger(__name__)
+def stack_np_examples_into_batch(dict_list):
+    """
+    Stacks Numpy examples into a batch
+    See also: `unstack_np_batch_into_examples()` for opposite
+    Args:
+        dict_list: A list of dict-like Numpy examples to stack
+    Returns:
+        The stacked NumpyBatch object
+    """
+    batch = {}
+    batch_keys = list(dict_list[0].keys())
+    for batch_key in batch_keys:
+        # NWP is nested so treat separately
+        if batch_key == "nwp":
+            nwp_batch: dict[str, NWPBatchKey] = {}
+            # Unpack source keys
+            nwp_sources = list(dict_list[0]["nwp"].keys())
+            for nwp_source in nwp_sources:
+                # Keys can be different for different NWPs
+                nwp_batch_keys = list(dict_list[0]["nwp"][nwp_source].keys())
+                nwp_source_batch = {}
+                for nwp_batch_key in nwp_batch_keys:
+                    nwp_source_batch[nwp_batch_key] = stack_data_list(
+                        [d["nwp"][nwp_source][nwp_batch_key] for d in dict_list],
+                        nwp_batch_key,
+                    )
+                nwp_batch[nwp_source] = nwp_source_batch
+            batch["nwp"] = nwp_batch
+        else:
+            batch[batch_key] = stack_data_list(
+                [d[batch_key] for d in dict_list],
+                batch_key,
+            )
+    return batch
+def _key_is_constant(batch_key):
+    is_constant = batch_key.endswith("t0_idx") or batch_key == NWPBatchKey.channel_names
+    return is_constant
+def stack_data_list(
+    data_list: list,
+    batch_key: Union[str, NWPBatchKey],
+):
+    """How to combine data entries for each key
+    """
+    if _key_is_constant(batch_key):
+        # These are always the same for all examples.
+        return data_list[0]
+    try:
+        return np.stack(data_list)
+    except Exception as e:
+        logger.debug(f"Could not stack the following shapes together, ({batch_key})")
+        shapes = [example.shape for example in data_list]
+        logger.debug(shapes)
+        logger.error(e)
+        raise e

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/nwp.py RENAMED Viewed

@@ -1,5 +1,4 @@
 """Convert NWP to NumpyBatch"""
 import pandas as pd
 import xarray as xr

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/numpy_batch/satellite.py RENAMED Viewed

@@ -13,6 +13,7 @@ class SatelliteBatchKey:
 def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
     """Convert from Xarray to NumpyBatch"""
     example = {
         SatelliteBatchKey.satellite_actual: da.values,
         SatelliteBatchKey.time_utc: da.time_utc.values.astype(float),
@@ -27,4 +28,4 @@ def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None
     if t0_idx is not None:
         example[SatelliteBatchKey.t0_idx] = t0_idx
-    return example
+    return example

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler/torch_datasets/process_and_combine.py RENAMED Viewed

@@ -4,7 +4,7 @@ import xarray as xr
 from typing import Tuple
 from ocf_data_sampler.config import Configuration
-from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS
+from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
 from ocf_data_sampler.numpy_batch import (
     convert_nwp_to_numpy_batch,
     convert_satellite_to_numpy_batch,
@@ -25,8 +25,8 @@ def process_and_combine_datasets(
     location: Location,
     target_key: str = 'gsp'
 ) -> dict:
-    """Normalize and convert data to numpy arrays"""
+    """Normalise and convert data to numpy arrays"""
     numpy_modalities = []
     if "nwp" in dataset_dict:
@@ -37,19 +37,23 @@ def process_and_combine_datasets(
             # Standardise
             provider = config.input_data.nwp[nwp_key].provider
             da_nwp = (da_nwp - NWP_MEANS[provider]) / NWP_STDS[provider]
             # Convert to NumpyBatch
             nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_batch(da_nwp)
         # Combine the NWPs into NumpyBatch
         numpy_modalities.append({NWPBatchKey.nwp: nwp_numpy_modalities})
     if "sat" in dataset_dict:
-        # Satellite is already in the range [0-1] so no need to standardise
+        # Standardise
         da_sat = dataset_dict["sat"]
+        da_sat = (da_sat - RSS_MEAN) / RSS_STD
         # Convert to NumpyBatch
         numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
     gsp_config = config.input_data.gsp
     if "gsp" in dataset_dict:
@@ -93,6 +97,7 @@ def process_and_combine_datasets(
     return combined_sample
 def process_and_combine_site_sample_dict(
     dataset_dict: dict,
     config: Configuration,
@@ -119,8 +124,9 @@ def process_and_combine_site_sample_dict(
             data_arrays.append((f"nwp-{provider}", da_nwp))
     if "sat" in dataset_dict:
-        # TODO add some satellite normalisation
+        # Standardise
         da_sat = dataset_dict["sat"]
+        da_sat = (da_sat - RSS_MEAN) / RSS_STD
         data_arrays.append(("satellite", da_sat))
     if "site" in dataset_dict:
@@ -143,6 +149,7 @@ def merge_dicts(list_of_dicts: list[dict]) -> dict:
         combined_dict.update(d)
     return combined_dict
 def merge_arrays(normalised_data_arrays: list[Tuple[str, xr.DataArray]]) -> xr.Dataset:
     """
     Combine a list of DataArrays into a single Dataset with unique naming conventions.

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38/ocf_data_sampler.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.36
+Version: 0.0.38
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org
@@ -56,7 +56,7 @@ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
 # ocf-data-sampler
 <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
-[![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-)
+[![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
 <!-- ALL-CONTRIBUTORS-BADGE:END -->
 [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -129,6 +129,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
       <td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
     </tr>
   </tbody>
 </table>

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/ocf_data_sampler.egg-info/SOURCES.txt RENAMED Viewed

@@ -28,6 +28,7 @@ ocf_data_sampler/load/nwp/providers/ecmwf.py
 ocf_data_sampler/load/nwp/providers/ukv.py
 ocf_data_sampler/load/nwp/providers/utils.py
 ocf_data_sampler/numpy_batch/__init__.py
+ocf_data_sampler/numpy_batch/collate.py
 ocf_data_sampler/numpy_batch/gsp.py
 ocf_data_sampler/numpy_batch/nwp.py
 ocf_data_sampler/numpy_batch/satellite.py
@@ -56,6 +57,7 @@ tests/load/test_load_gsp.py
 tests/load/test_load_nwp.py
 tests/load/test_load_satellite.py
 tests/load/test_load_sites.py
+tests/numpy_batch/test_collate.py
 tests/numpy_batch/test_gsp.py
 tests/numpy_batch/test_nwp.py
 tests/numpy_batch/test_satellite.py
@@ -66,5 +68,6 @@ tests/select/test_find_contiguous_time_periods.py
 tests/select/test_location.py
 tests/select/test_select_spatial_slice.py
 tests/select/test_select_time_slice.py
+tests/torch_datasets/test_process_and_combine.py
 tests/torch_datasets/test_pvnet_uk_regional.py
 tests/torch_datasets/test_site.py

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ocf_data_sampler"
-version = "0.0.36"
+version = "0.0.38"
 license = { file = "LICENSE" }
 readme = "README.md"
 description = "Sample from weather data for renewable energy prediction"

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/conftest.py RENAMED Viewed

@@ -7,6 +7,7 @@ import xarray as xr
 import tempfile
 from ocf_data_sampler.config.model import Site
+from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
 _top_test_directory = os.path.dirname(os.path.realpath(__file__))
@@ -269,3 +270,18 @@ def uk_gsp_zarr_path(ds_uk_gsp):
         ds_uk_gsp.to_zarr(filename)
         yield filename
+@pytest.fixture()
+def pvnet_config_filename(
+    tmp_path, config_filename, nwp_ukv_zarr_path, uk_gsp_zarr_path, sat_zarr_path
+):
+    # adjust config to point to the zarr file
+    config = load_yaml_configuration(config_filename)
+    config.input_data.nwp["ukv"].zarr_path = nwp_ukv_zarr_path
+    config.input_data.satellite.zarr_path = sat_zarr_path
+    config.input_data.gsp.zarr_path = uk_gsp_zarr_path
+    filename = f"{tmp_path}/configuration.yaml"
+    save_yaml_configuration(config, filename)
+    return filename

ocf_data_sampler-0.0.38/tests/numpy_batch/test_collate.py ADDED Viewed

@@ -0,0 +1,26 @@
+from ocf_data_sampler.numpy_batch import GSPBatchKey, SatelliteBatchKey
+from ocf_data_sampler.numpy_batch.collate import stack_np_examples_into_batch
+from ocf_data_sampler.torch_datasets import PVNetUKRegionalDataset
+def test_pvnet(pvnet_config_filename):
+    # Create dataset object
+    dataset = PVNetUKRegionalDataset(pvnet_config_filename)
+    assert len(dataset.locations) == 317
+    assert len(dataset.valid_t0_times) == 39
+    assert len(dataset) == 317 * 39
+    # Generate 2 samples
+    sample1 = dataset[0]
+    sample2 = dataset[1]
+    batch = stack_np_examples_into_batch([sample1, sample2])
+    assert isinstance(batch, dict)
+    assert "nwp" in batch
+    assert isinstance(batch["nwp"], dict)
+    assert "ukv" in batch["nwp"]
+    assert GSPBatchKey.gsp in batch
+    assert SatelliteBatchKey.satellite_actual in batch

ocf_data_sampler-0.0.38/tests/torch_datasets/test_process_and_combine.py ADDED Viewed

@@ -0,0 +1,165 @@
+import pytest
+import tempfile
+import numpy as np
+import pandas as pd
+import xarray as xr
+import dask.array as da
+from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
+from ocf_data_sampler.config import Configuration
+from ocf_data_sampler.select.location import Location
+from ocf_data_sampler.numpy_batch import NWPBatchKey, GSPBatchKey, SatelliteBatchKey
+from ocf_data_sampler.torch_datasets import PVNetUKRegionalDataset
+from ocf_data_sampler.torch_datasets.process_and_combine import (
+    process_and_combine_datasets,
+    process_and_combine_site_sample_dict,
+    merge_dicts,
+    fill_nans_in_arrays,
+    compute,
+)
+def test_process_and_combine_datasets(pvnet_config_filename):
+    # Load in config for function and define location
+    config = load_yaml_configuration(pvnet_config_filename)
+    t0 = pd.Timestamp("2024-01-01 00:00")
+    location = Location(coordinate_system="osgb", x=1234, y=5678, id=1)
+    nwp_data = xr.DataArray(
+        np.random.rand(4, 2, 2, 2),
+        dims=["time_utc", "channel", "y", "x"],
+        coords={
+            "time_utc": pd.date_range("2024-01-01 00:00", periods=4, freq="h"),
+            "channel": ["t2m", "dswrf"],
+            "step": ("time_utc", pd.timedelta_range(start='0h', periods=4, freq='h')),
+            "init_time_utc": pd.Timestamp("2024-01-01 00:00")
+        }
+    )
+    sat_data = xr.DataArray(
+        np.random.rand(7, 1, 2, 2),
+        dims=["time_utc", "channel", "y", "x"],
+        coords={
+            "time_utc": pd.date_range("2024-01-01 00:00", periods=7, freq="5min"),
+            "channel": ["HRV"],
+            "x_geostationary": (["y", "x"], np.array([[1, 2], [1, 2]])),
+            "y_geostationary": (["y", "x"], np.array([[1, 1], [2, 2]]))
+        }
+    )
+    # Combine as dict
+    dataset_dict = {
+        "nwp": {"ukv": nwp_data},
+        "sat": sat_data
+    }
+    # Call relevant function
+    result = process_and_combine_datasets(dataset_dict, config, t0, location)
+    # Assert result is dict - check and validate
+    assert isinstance(result, dict)
+    assert NWPBatchKey.nwp in result
+    assert result[SatelliteBatchKey.satellite_actual].shape == (7, 1, 2, 2)
+    assert result[NWPBatchKey.nwp]["ukv"][NWPBatchKey.nwp].shape == (4, 1, 2, 2)
+def test_merge_dicts():
+    """Test merge_dicts function"""
+    dict1 = {"a": 1, "b": 2}
+    dict2 = {"c": 3, "d": 4}
+    dict3 = {"e": 5}
+    result = merge_dicts([dict1, dict2, dict3])
+    assert result == {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
+    # Test key overwriting
+    dict4 = {"a": 10, "f": 6}
+    result = merge_dicts([dict1, dict4])
+    assert result["a"] == 10
+def test_fill_nans_in_arrays():
+    """Test the fill_nans_in_arrays function"""
+    array_with_nans = np.array([1.0, np.nan, 3.0, np.nan])
+    nested_dict = {
+        "array1": array_with_nans,
+        "nested": {
+            "array2": np.array([np.nan, 2.0, np.nan, 4.0])
+        },
+        "string_key": "not_an_array"
+    }
+    result = fill_nans_in_arrays(nested_dict)
+    assert not np.isnan(result["array1"]).any()
+    assert np.array_equal(result["array1"], np.array([1.0, 0.0, 3.0, 0.0]))
+    assert not np.isnan(result["nested"]["array2"]).any()
+    assert np.array_equal(result["nested"]["array2"], np.array([0.0, 2.0, 0.0, 4.0]))
+    assert result["string_key"] == "not_an_array"
+def test_compute():
+    """Test compute function with dask array"""
+    da_dask = xr.DataArray(da.random.random((5, 5)))
+    # Create a nested dictionary with dask array
+    nested_dict = {
+        "array1": da_dask,
+        "nested": {
+            "array2": da_dask
+        }
+    }
+    # Ensure initial data is lazy - i.e. not yet computed
+    assert not isinstance(nested_dict["array1"].data, np.ndarray)
+    assert not isinstance(nested_dict["nested"]["array2"].data, np.ndarray)
+    # Call the compute function
+    result = compute(nested_dict)
+    # Assert that the result is an xarray DataArray and no longer lazy
+    assert isinstance(result["array1"], xr.DataArray)
+    assert isinstance(result["nested"]["array2"], xr.DataArray)
+    assert isinstance(result["array1"].data, np.ndarray)
+    assert isinstance(result["nested"]["array2"].data, np.ndarray)
+    # Ensure there no NaN values in computed data
+    assert not np.isnan(result["array1"].data).any()
+    assert not np.isnan(result["nested"]["array2"].data).any()
+def test_process_and_combine_site_sample_dict(pvnet_config_filename):
+    # Load config
+    config = load_yaml_configuration(pvnet_config_filename)
+    # Specify minimal structure for testing
+    raw_nwp_values = np.random.rand(4, 1, 2, 2)  # Single channel
+    site_dict = {
+        "nwp": {
+            "ukv": xr.DataArray(
+                raw_nwp_values,
+                dims=["time_utc", "channel", "y", "x"],
+                coords={
+                    "time_utc": pd.date_range("2024-01-01 00:00", periods=4, freq="h"),
+                    "channel": ["dswrf"],  # Single channel
+                },
+            )
+        }
+    }
+    print(f"Input site_dict: {site_dict}")
+    # Call function
+    result = process_and_combine_site_sample_dict(site_dict, config)
+    # Assert to validate output structure
+    assert isinstance(result, xr.Dataset), "Result should be an xarray.Dataset"
+    assert len(result.data_vars) > 0, "Dataset should contain data variables"
+    # Validate variable via assertion and shape of such
+    expected_variable = "nwp-ukv"
+    assert expected_variable in result.data_vars, f"Expected variable '{expected_variable}' not found"
+    nwp_result = result[expected_variable]
+    assert nwp_result.shape == (4, 1, 2, 2), f"Unexpected shape for '{expected_variable}': {nwp_result.shape}"

{ocf_data_sampler-0.0.36 → ocf_data_sampler-0.0.38}/tests/torch_datasets/test_pvnet_uk_regional.py RENAMED Viewed

@@ -6,19 +6,6 @@ from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configura
 from ocf_data_sampler.numpy_batch import NWPBatchKey, GSPBatchKey, SatelliteBatchKey
-@pytest.fixture()
-def pvnet_config_filename(tmp_path, config_filename, nwp_ukv_zarr_path, uk_gsp_zarr_path, sat_zarr_path):
-    # adjust config to point to the zarr file
-    config = load_yaml_configuration(config_filename)
-    config.input_data.nwp['ukv'].zarr_path = nwp_ukv_zarr_path
-    config.input_data.satellite.zarr_path = sat_zarr_path
-    config.input_data.gsp.zarr_path = uk_gsp_zarr_path
-    filename = f"{tmp_path}/configuration.yaml"
-    save_yaml_configuration(config, filename)
-    return filename
 def test_pvnet(pvnet_config_filename):